root / trunk / lib / coderay / scanners / ruby / patterns.rb
History | View | Annotate | Download (7 KB)
| 1 | # encoding: utf-8
|
|---|---|
| 2 | module CodeRay |
| 3 | module Scanners |
| 4 | |
| 5 | module Ruby::Patterns # :nodoc: all |
| 6 | |
| 7 | RESERVED_WORDS = %w[ |
| 8 | and def end in or unless begin |
| 9 | defined? ensure module redo super until |
| 10 | BEGIN break do next rescue then |
| 11 | when END case else for retry |
| 12 | while alias class elsif if not return |
| 13 | undef yield |
| 14 | ]
|
| 15 | |
| 16 | # See http://murfy.de/ruby-constants.
|
| 17 | PREDEFINED_CONSTANTS = %w[ |
| 18 | nil true false self |
| 19 | DATA ARGV ARGF ENV |
| 20 | FALSE TRUE NIL |
| 21 | STDERR STDIN STDOUT |
| 22 | TOPLEVEL_BINDING |
| 23 | RUBY_COPYRIGHT RUBY_DESCRIPTION RUBY_ENGINE RUBY_PATCHLEVEL |
| 24 | RUBY_PLATFORM RUBY_RELEASE_DATE RUBY_REVISION RUBY_VERSION |
| 25 | __FILE__ __LINE__ __ENCODING__ |
| 26 | ]
|
| 27 | |
| 28 | IDENT_KIND = WordList.new(:ident). |
| 29 | add(RESERVED_WORDS, :reserved). |
| 30 | add(PREDEFINED_CONSTANTS, :pre_constant) |
| 31 | |
| 32 | KEYWORD_NEW_STATE = WordList.new(:initial). |
| 33 | add(%w[ def ], :def_expected). |
| 34 | add(%w[ undef ], :undef_expected). |
| 35 | add(%w[ alias ], :alias_expected). |
| 36 | add(%w[ class module ], :module_expected) |
| 37 | |
| 38 | IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/ |
| 39 | |
| 40 | METHOD_NAME = / #{IDENT} [?!]? /ox |
| 41 | METHOD_NAME_OPERATOR = / |
| 42 | \*\*? # multiplication and power |
| 43 | | [-+~]@? # plus, minus, tilde with and without at sign |
| 44 | | [\/%&|^`] # division, modulo or format strings, and, or, xor, system |
| 45 | | \[\]=? # array getter and setter |
| 46 | | << | >> # append or shift left, shift right |
| 47 | | <=?>? | >=? # comparison, rocket operator |
| 48 | | ===? | =~ # simple equality, case equality, match |
| 49 | | ![~=@]? # negation with and without at sign, not-equal and not-match |
| 50 | /ox |
| 51 | METHOD_SUFFIX = / (?: [?!] | = (?![~>]|=(?!>)) ) /x |
| 52 | METHOD_NAME_EX = / #{IDENT} #{METHOD_SUFFIX}? | #{METHOD_NAME_OPERATOR} /ox |
| 53 | METHOD_AFTER_DOT = / #{IDENT} [?!]? | #{METHOD_NAME_OPERATOR} /ox |
| 54 | INSTANCE_VARIABLE = / @ #{IDENT} /ox |
| 55 | CLASS_VARIABLE = / @@ #{IDENT} /ox |
| 56 | OBJECT_VARIABLE = / @@? #{IDENT} /ox |
| 57 | GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox |
| 58 | PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox |
| 59 | VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox |
| 60 | |
| 61 | QUOTE_TO_TYPE = {
|
| 62 | '`' => :shell, |
| 63 | '/'=> :regexp, |
| 64 | } |
| 65 | QUOTE_TO_TYPE.default = :string |
| 66 | |
| 67 | REGEXP_MODIFIERS = /[mousenix]*/ |
| 68 | REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/ |
| 69 | |
| 70 | DECIMAL = /\d+(?:_\d+)*/ |
| 71 | OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ |
| 72 | HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ |
| 73 | BINARY = /0b[01]+(?:_[01]+)*/ |
| 74 | |
| 75 | EXPONENT = / [eE] [+-]? #{DECIMAL} /ox |
| 76 | FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox |
| 77 | FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox |
| 78 | NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox |
| 79 | |
| 80 | SYMBOL = / |
| 81 | : |
| 82 | (?: |
| 83 | #{METHOD_NAME_EX} |
| 84 | | #{PREFIX_VARIABLE} |
| 85 | | ['"] |
| 86 | ) |
| 87 | /ox |
| 88 | METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox |
| 89 | |
| 90 | SIMPLE_ESCAPE = / |
| 91 | [abefnrstv] |
| 92 | | [0-7]{1,3}
|
| 93 | | x[0-9A-Fa-f]{1,2}
|
| 94 | | .? |
| 95 | /mx |
| 96 | |
| 97 | CONTROL_META_ESCAPE = / |
| 98 | (?: M-|C-|c ) |
| 99 | (?: \\ (?: M-|C-|c ) )* |
| 100 | (?: [^\\] | \\ #{SIMPLE_ESCAPE} )? |
| 101 | /mox |
| 102 | |
| 103 | ESCAPE = / |
| 104 | #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE} |
| 105 | /mox |
| 106 | |
| 107 | CHARACTER = / |
| 108 | \? |
| 109 | (?: |
| 110 | [^\s\\] |
| 111 | | \\ #{ESCAPE} |
| 112 | ) |
| 113 | /mox |
| 114 | |
| 115 | # NOTE: This is not completely correct, but
|
| 116 | # nobody needs heredoc delimiters ending with \n.
|
| 117 | HEREDOC_OPEN = / |
| 118 | << (-)? # $1 = float |
| 119 | (?: |
| 120 | ( [A-Za-z_0-9]+ ) # $2 = delim |
| 121 | | |
| 122 | ( ["'`\/] ) # $3 = quote, type |
| 123 | ( [^\n]*? ) \3 # $4 = delim |
| 124 | ) |
| 125 | /mx |
| 126 | |
| 127 | RUBYDOC = / |
| 128 | =begin (?!\S) |
| 129 | .*? |
| 130 | (?: \Z | ^=end (?!\S) [^\n]* ) |
| 131 | /mx |
| 132 | |
| 133 | DATA = / |
| 134 | __END__$ |
| 135 | .*? |
| 136 | (?: \Z | (?=^\#CODE) ) |
| 137 | /mx |
| 138 | |
| 139 | RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo |
| 140 | |
| 141 | # Checks for a valid value to follow. This enables
|
| 142 | # value_expected in method calls without parentheses.
|
| 143 | VALUE_FOLLOWS = / |
| 144 | (?>[ \t\f\v]+) |
| 145 | (?: |
| 146 | [%\/][^\s=] |
| 147 | | <<-?\S |
| 148 | | [-+] \d |
| 149 | | #{CHARACTER} |
| 150 | ) |
| 151 | /ox |
| 152 | KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[ |
| 153 | and end in or unless begin |
| 154 | defined? ensure redo super until |
| 155 | break do next rescue then |
| 156 | when case else for retry |
| 157 | while elsif if not return |
| 158 | yield |
| 159 | ])
|
| 160 | |
| 161 | FANCY_START = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx |
| 162 | |
| 163 | FancyStringType = {
|
| 164 | 'q' => [:string, false], |
| 165 | 'Q' => [:string, true], |
| 166 | 'r' => [:regexp, true], |
| 167 | 's' => [:symbol, false], |
| 168 | 'x' => [:shell, true] |
| 169 | } |
| 170 | FancyStringType['w'] = FancyStringType['q'] |
| 171 | FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] |
| 172 | |
| 173 | class StringState < Struct.new :type, :interpreted, :delim, :heredoc, |
| 174 | :opening_paren, :paren_depth, :pattern, :next_state |
| 175 | |
| 176 | CLOSING_PAREN = Hash[ *%w[ |
| 177 | ( ) |
| 178 | [ ] |
| 179 | < > |
| 180 | { }
|
| 181 | ] ]
|
| 182 | |
| 183 | CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << |
| 184 | OPENING_PAREN = CLOSING_PAREN.invert |
| 185 | |
| 186 | STRING_PATTERN = Hash.new do |h, k| |
| 187 | delim, interpreted = *k |
| 188 | delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby |
| 189 | if closing_paren = CLOSING_PAREN[delim] |
| 190 | delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix |
| 191 | delim_pattern << Regexp.escape(closing_paren)
|
| 192 | end
|
| 193 | delim_pattern << '\\\\' unless delim == '\\' |
| 194 | |
| 195 | special_escapes = |
| 196 | case interpreted
|
| 197 | when :regexp_symbols |
| 198 | '| ' + REGEXP_SYMBOLS.source |
| 199 | when :words |
| 200 | '| \s'
|
| 201 | end
|
| 202 | |
| 203 | h[k] = |
| 204 | if interpreted and not delim == '#' |
| 205 | / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
| 206 | else
|
| 207 | / (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
| 208 | end
|
| 209 | end
|
| 210 | |
| 211 | HEREDOC_PATTERN = Hash.new do |h, k| |
| 212 | delim, interpreted, indented = *k |
| 213 | delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby |
| 214 | delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
| 215 | h[k] = |
| 216 | if interpreted
|
| 217 | / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc |
| 218 | else
|
| 219 | / (?= #{delim_pattern}() | \\ ) /mx
|
| 220 | end
|
| 221 | end
|
| 222 | |
| 223 | def initialize kind, interpreted, delim, heredoc = false |
| 224 | if heredoc
|
| 225 | pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] |
| 226 | delim = nil
|
| 227 | else
|
| 228 | pattern = STRING_PATTERN[ [delim, interpreted] ]
|
| 229 | if closing_paren = CLOSING_PAREN[delim] |
| 230 | opening_paren = delim |
| 231 | delim = closing_paren |
| 232 | paren_depth = 1
|
| 233 | end
|
| 234 | end
|
| 235 | super kind, interpreted, delim, heredoc, opening_paren, paren_depth, pattern, :initial |
| 236 | end
|
| 237 | end unless defined? StringState |
| 238 | |
| 239 | end
|
| 240 | |
| 241 | end
|
| 242 | end
|