Statistics
| Revision:

root / trunk / lib / coderay / scanners / ruby / patterns.rb

History | View | Annotate | Download (7 KB)

1
# encoding: utf-8
2
module CodeRay
3
module Scanners
4
5
  module Ruby::Patterns  # :nodoc: all
6
7
    RESERVED_WORDS = %w[
8
      and def end in or unless begin
9
      defined? ensure module redo super until
10
      BEGIN break do next rescue then
11
      when END case else for retry
12
      while alias class elsif if not return
13
      undef yield
14
    ]
15
16
    # See http://murfy.de/ruby-constants.
17
    PREDEFINED_CONSTANTS = %w[
18
      nil true false self
19
      DATA ARGV ARGF ENV
20
      FALSE TRUE NIL
21
      STDERR STDIN STDOUT
22
      TOPLEVEL_BINDING
23
      RUBY_COPYRIGHT RUBY_DESCRIPTION RUBY_ENGINE RUBY_PATCHLEVEL
24
      RUBY_PLATFORM RUBY_RELEASE_DATE RUBY_REVISION RUBY_VERSION
25
      __FILE__ __LINE__ __ENCODING__
26
    ]
27
28
    IDENT_KIND = WordList.new(:ident).
29
      add(RESERVED_WORDS, :reserved).
30
      add(PREDEFINED_CONSTANTS, :pre_constant)
31
32
    KEYWORD_NEW_STATE = WordList.new(:initial).
33
      add(%w[ def ], :def_expected).
34
      add(%w[ undef ], :undef_expected).
35
      add(%w[ alias ], :alias_expected).
36
      add(%w[ class module ], :module_expected)
37
38
    IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
39
40
    METHOD_NAME = / #{IDENT} [?!]? /ox
41
    METHOD_NAME_OPERATOR = /
42
      \*\*?           # multiplication and power
43
      | [-+~]@?       # plus, minus, tilde with and without at sign
44
      | [\/%&|^`]     # division, modulo or format strings, and, or, xor, system
45
      | \[\]=?        # array getter and setter
46
      | << | >>       # append or shift left, shift right
47
      | <=?>? | >=?   # comparison, rocket operator
48
      | ===? | =~     # simple equality, case equality, match
49
      | ![~=@]?       # negation with and without at sign, not-equal and not-match
50
    /ox
51
    METHOD_SUFFIX = / (?: [?!] | = (?![~>]|=(?!>)) ) /x
52
    METHOD_NAME_EX = / #{IDENT} #{METHOD_SUFFIX}? | #{METHOD_NAME_OPERATOR} /ox
53
    METHOD_AFTER_DOT = / #{IDENT} [?!]? | #{METHOD_NAME_OPERATOR} /ox
54
    INSTANCE_VARIABLE = / @ #{IDENT} /ox
55
    CLASS_VARIABLE = / @@ #{IDENT} /ox
56
    OBJECT_VARIABLE = / @@? #{IDENT} /ox
57
    GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
58
    PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
59
    VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
60
61
    QUOTE_TO_TYPE = {
62
      '`' => :shell,
63
      '/'=> :regexp,
64
    }
65
    QUOTE_TO_TYPE.default = :string
66
67
    REGEXP_MODIFIERS = /[mousenix]*/
68
    REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
69
70
    DECIMAL = /\d+(?:_\d+)*/
71
    OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
72
    HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
73
    BINARY = /0b[01]+(?:_[01]+)*/
74
75
    EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
76
    FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
77
    FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
78
    NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
79
80
    SYMBOL = /
81
      :
82
      (?:
83
        #{METHOD_NAME_EX}
84
      | #{PREFIX_VARIABLE}
85
      | ['"]
86
      )
87
    /ox
88
    METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
89
90
    SIMPLE_ESCAPE = /
91
        [abefnrstv]
92
      |  [0-7]{1,3}
93
      | x[0-9A-Fa-f]{1,2}
94
      | .?
95
    /mx
96
    
97
    CONTROL_META_ESCAPE = /
98
      (?: M-|C-|c )
99
      (?: \\ (?: M-|C-|c ) )*
100
      (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
101
    /mox
102
    
103
    ESCAPE = /
104
      #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
105
    /mox
106
    
107
    CHARACTER = /
108
      \?
109
      (?:
110
        [^\s\\]
111
      | \\ #{ESCAPE}
112
      )
113
    /mox
114
115
    # NOTE: This is not completely correct, but
116
    # nobody needs heredoc delimiters ending with \n.
117
    HEREDOC_OPEN = /
118
      << (-)?              # $1 = float
119
      (?:
120
        ( [A-Za-z_0-9]+ )  # $2 = delim
121
      |
122
        ( ["'`\/] )        # $3 = quote, type
123
        ( [^\n]*? ) \3     # $4 = delim
124
      )
125
    /mx
126
127
    RUBYDOC = /
128
      =begin (?!\S)
129
      .*?
130
      (?: \Z | ^=end (?!\S) [^\n]* )
131
    /mx
132
133
    DATA = /
134
      __END__$
135
      .*?
136
      (?: \Z | (?=^\#CODE) )
137
    /mx
138
    
139
    RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
140
141
    # Checks for a valid value to follow. This enables
142
    # value_expected in method calls without parentheses.
143
    VALUE_FOLLOWS = /
144
      (?>[ \t\f\v]+)
145
      (?:
146
        [%\/][^\s=]
147
      | <<-?\S
148
      | [-+] \d
149
      | #{CHARACTER}
150
      )
151
    /ox
152
    KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
153
      and end in or unless begin
154
      defined? ensure redo super until
155
      break do next rescue then
156
      when case else for retry
157
      while elsif if not return
158
      yield
159
    ])
160
161
    FANCY_START = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
162
163
    FancyStringType = {
164
      'q' => [:string, false],
165
      'Q' => [:string, true],
166
      'r' => [:regexp, true],
167
      's' => [:symbol, false],
168
      'x' => [:shell, true]
169
    }
170
    FancyStringType['w'] = FancyStringType['q']
171
    FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
172
173
    class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
174
      :opening_paren, :paren_depth, :pattern, :next_state
175
176
      CLOSING_PAREN = Hash[ *%w[
177
        ( )
178
        [ ]
179
        < >
180
        { }
181
      ] ]
182
183
      CLOSING_PAREN.each { |k,v| k.freeze; v.freeze }  # debug, if I try to change it with <<
184
      OPENING_PAREN = CLOSING_PAREN.invert
185
186
      STRING_PATTERN = Hash.new do |h, k|
187
        delim, interpreted = *k
188
        delim_pattern = Regexp.escape(delim.dup)  # dup: workaround for old Ruby
189
        if closing_paren = CLOSING_PAREN[delim]
190
          delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION  # JRuby fix
191
          delim_pattern << Regexp.escape(closing_paren)
192
        end
193
        delim_pattern << '\\\\' unless delim == '\\'
194
        
195
        special_escapes =
196
          case interpreted
197
          when :regexp_symbols
198
            '| ' + REGEXP_SYMBOLS.source
199
          when :words
200
            '| \s'
201
          end
202
        
203
        h[k] =
204
          if interpreted and not delim == '#'
205
            / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
206
          else
207
            / (?= [#{delim_pattern}] #{special_escapes} ) /mx
208
          end
209
      end
210
211
      HEREDOC_PATTERN = Hash.new do |h, k|
212
        delim, interpreted, indented = *k
213
        delim_pattern = Regexp.escape(delim.dup)  # dup: workaround for old Ruby
214
        delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
215
        h[k] =
216
          if interpreted
217
            / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx  # $1 set == end of heredoc
218
          else
219
            / (?= #{delim_pattern}() | \\ ) /mx
220
          end
221
      end
222
223
      def initialize kind, interpreted, delim, heredoc = false
224
        if heredoc
225
          pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
226
          delim = nil
227
        else
228
          pattern = STRING_PATTERN[ [delim, interpreted] ]
229
          if closing_paren = CLOSING_PAREN[delim]
230
            opening_paren = delim
231
            delim = closing_paren
232
            paren_depth = 1
233
          end
234
        end
235
        super kind, interpreted, delim, heredoc, opening_paren, paren_depth, pattern, :initial
236
      end
237
    end unless defined? StringState
238
239
  end
240
241
end
242
end