Added syntax highlightment for repository files (using CodeRay).

Supported languages: c, ruby, rhtml, yaml, html, xml.

git-svn-id: http://redmine.rubyforge.org/svn/trunk@644 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
Jean-Philippe Lang
2007-08-15 20:20:18 +00:00
parent a5849ee044
commit 889d50089d
53 changed files with 5813 additions and 13 deletions

View File

@@ -0,0 +1,15 @@
module CodeRay
module Scanners
map :cpp => :c,
:plain => :plaintext,
:pascal => :delphi,
:irb => :ruby,
:xml => :html,
:xhtml => :nitro_xhtml,
:nitro => :nitro_xhtml
default :plain
end
end

View File

@@ -0,0 +1,165 @@
module CodeRay
module Scanners
class C < Scanner
register_for :c
include Streamable
RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
'for', 'goto', 'if', 'return', 'switch', 'while',
'struct', 'union', 'enum', 'typedef',
'static', 'register', 'auto', 'extern',
'sizeof',
'volatile', 'const', # C89
'inline', 'restrict', # C99
]
PREDEFINED_TYPES = [
'int', 'long', 'short', 'char', 'void',
'signed', 'unsigned', 'float', 'double',
'bool', 'complex', # C99
]
PREDEFINED_CONSTANTS = [
'EOF', 'NULL',
'true', 'false', # C99
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
until eos?
kind = nil
match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
match << scan(/:/)
kind = :label
end
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(/#\s*(\w*)/)
kind = :preprocessor # FIXME multiline preprocs
state = :include_expected if self[1] == 'include'
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\\n"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
when :include_expected
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
elsif match = scan(/\s+/)
kind = :space
state = :initial if match.index ?\n
else
getch
kind = :error
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end

View File

@@ -0,0 +1,60 @@
module CodeRay
module Scanners
# = Debug Scanner
class Debug < Scanner
include Streamable
register_for :debug
protected
def scan_tokens tokens, options
opened_tokens = []
until eos?
kind = nil
match = nil
if scan(/\s+/)
tokens << [matched, :space]
next
elsif scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \) /x)
kind = self[1].to_sym
match = self[2].gsub(/\\(.)/, '\1')
elsif scan(/ (\w+) < /x)
kind = self[1].to_sym
opened_tokens << kind
match = :open
elsif scan(/ > /x)
kind = opened_tokens.pop
match = :close
else
kind = :error
getch
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens
end
end
end
end

View File

@@ -0,0 +1,149 @@
module CodeRay
module Scanners
class Delphi < Scanner
register_for :delphi
RESERVED_WORDS = [
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
'xor', 'on'
]
DIRECTIVES = [
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
'external', 'far', 'forward', 'implements', 'local',
'near', 'nodefault', 'on', 'overload', 'override',
'package', 'pascal', 'platform', 'private', 'protected', 'public',
'published', 'read', 'readonly', 'register', 'reintroduce',
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
'virtual', 'write', 'writeonly'
]
IDENT_KIND = CaseIgnoringWordList.new(:ident, caching=true).
add(RESERVED_WORDS, :reserved).
add(DIRECTIVES, :directive)
NAME_FOLLOWS = CaseIgnoringWordList.new(false, caching=true).
add(%w(procedure function .))
private
def scan_tokens tokens, options
state = :initial
last_token = ''
until eos?
kind = nil
match = nil
if state == :initial
if scan(/ \s+ /x)
tokens << [matched, :space]
next
elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
tokens << [matched, :preprocessor]
next
elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
tokens << [matched, :comment]
next
elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x)
kind = :operator
elsif match = scan(/\./)
kind = :operator
if last_token == 'end'
tokens << [match, kind]
next
end
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
tokens << [:open, :char]
tokens << ["'", :delimiter]
tokens << [self[1], :content]
tokens << ["'", :delimiter]
tokens << [:close, :char]
next
elsif match = scan(/ ' /x)
tokens << [:open, :string]
state = :string
kind = :delimiter
elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
kind = :char
elsif scan(/ \$ [0-9A-Fa-f]+ /x)
kind = :hex
elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
kind = :integer
elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
kind = :float
else
kind = :error
getch
end
elsif state == :string
if scan(/[^\n']+/)
kind = :content
elsif scan(/''/)
kind = :char
elsif scan(/'/)
tokens << ["'", :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/\n/)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise "else case \' reached; %p not handled." % peek(1), tokens
end
else
raise 'else-case reached', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
last_token = match
tokens << [match, kind]
end
tokens
end
end
end
end

View File

@@ -0,0 +1,177 @@
module CodeRay
module Scanners
# HTML Scanner
#
# $Id$
class HTML < Scanner
include Streamable
register_for :html
ATTR_NAME = /[\w.:-]+/
ATTR_VALUE_UNQUOTED = ATTR_NAME
TAG_END = /\/?>/
HEX = /[0-9a-fA-F]/
ENTITY = /
&
(?:
\w+
|
\#
(?:
\d+
|
x#{HEX}+
)
)
;
/ox
PLAIN_STRING_CONTENT = {
"'" => /[^&'>\n]+/,
'"' => /[^&">\n]+/,
}
def reset
super
@state = :initial
end
private
def setup
@state = :initial
@plain_string_content = nil
end
def scan_tokens tokens, options
state = @state
plain_string_content = @plain_string_content
until eos?
kind = nil
match = nil
if scan(/\s+/m)
kind = :space
else
case state
when :initial
if scan(/<!--.*?-->/m)
kind = :comment
elsif scan(/<!DOCTYPE.*?>/m)
kind = :preprocessor
elsif scan(/<\?xml.*?\?>/m)
kind = :preprocessor
elsif scan(/<\?.*?\?>|<%.*?%>/m)
kind = :comment
elsif scan(/<\/[-\w_.:]*>/m)
kind = :tag
elsif match = scan(/<[-\w_.:]+>?/m)
kind = :tag
state = :attribute unless match[-1] == ?>
elsif scan(/[^<>&]+/)
kind = :plain
elsif scan(/#{ENTITY}/ox)
kind = :entity
elsif scan(/[<>&]/)
kind = :error
else
raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
end
when :attribute
if scan(/#{TAG_END}/)
kind = :tag
state = :initial
elsif scan(/#{ATTR_NAME}/o)
kind = :attribute_name
state = :attribute_equal
else
kind = :error
getch
end
when :attribute_equal
if scan(/=/)
kind = :operator
state = :attribute_value
elsif scan(/#{ATTR_NAME}/o)
kind = :attribute_name
elsif scan(/#{TAG_END}/o)
kind = :tag
state = :initial
elsif scan(/./)
kind = :error
state = :attribute
end
when :attribute_value
if scan(/#{ATTR_VALUE_UNQUOTED}/o)
kind = :attribute_value
state = :attribute
elsif match = scan(/["']/)
tokens << [:open, :string]
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
kind = :delimiter
elsif scan(/#{TAG_END}/o)
kind = :tag
state = :initial
else
kind = :error
getch
end
when :attribute_value_string
if scan(plain_string_content)
kind = :content
elsif scan(/['"]/)
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :attribute
next
elsif scan(/#{ENTITY}/ox)
kind = :entity
elsif scan(/&/)
kind = :content
elsif scan(/[\n>]/)
tokens << [:close, :string]
kind = :error
state = :initial
end
else
raise_inspect 'Unknown state: %p' % [state], tokens
end
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
end
tokens
end
end
end
end

View File

@@ -0,0 +1,133 @@
module CodeRay
module Scanners
load :html
load :ruby
# Nitro XHTML Scanner
#
# $Id$
class NitroXHTML < Scanner
include Streamable
register_for :nitro_xhtml
NITRO_RUBY_BLOCK = /
<\?r
(?>
[^\?]*
(?> \?(?!>) [^\?]* )*
)
(?: \?> )?
|
<ruby>
(?>
[^<]*
(?> <(?!\/ruby>) [^<]* )*
)
(?: <\/ruby> )?
|
<%
(?>
[^%]*
(?> %(?!>) [^%]* )*
)
(?: %> )?
/mx
NITRO_VALUE_BLOCK = /
\#
(?:
\{
[^{}]*
(?>
\{ [^}]* \}
(?> [^{}]* )
)*
\}?
| \| [^|]* \|?
| \( [^)]* \)?
| \[ [^\]]* \]?
| \\ [^\\]* \\?
)
/x
NITRO_ENTITY = /
% (?: \#\d+ | \w+ ) ;
/
START_OF_RUBY = /
(?=[<\#%])
< (?: \?r | % | ruby> )
| \# [{(|]
| % (?: \#\d+ | \w+ ) ;
/x
CLOSING_PAREN = Hash.new do |h, p|
h[p] = p
end.update( {
'(' => ')',
'[' => ']',
'{' => '}',
} )
private
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
def scan_tokens tokens, options
until eos?
if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
@html_scanner.tokenize match
elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
start_tag = match[0,2]
delimiter = CLOSING_PAREN[start_tag[1,1]]
end_tag = match[-1,1] == delimiter ? delimiter : ''
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -1 - end_tag.size]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
start_tag = '<?r'
end_tag = match[-2,2] == '?>' ? '?>' : ''
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -(end_tag.size)-1]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
elsif entity = scan(/#{NITRO_ENTITY}/o)
tokens << [entity, :entity]
elsif scan(/%/)
tokens << [matched, :error]
else
raise_inspect 'else-case reached!', tokens
end
end
tokens
end
end
end
end

View File

@@ -0,0 +1,18 @@
module CodeRay
module Scanners
class Plaintext < Scanner
register_for :plaintext, :plain
include Streamable
def scan_tokens tokens, options
text = (scan_until(/\z/) || '')
tokens << [text, :plain]
end
end
end
end

View File

@@ -0,0 +1,73 @@
module CodeRay
module Scanners
load :html
load :ruby
# RHTML Scanner
#
# $Id$
class RHTML < Scanner
include Streamable
register_for :rhtml
ERB_RUBY_BLOCK = /
<%(?!%)[=-]?
(?>
[^\-%]* # normal*
(?> # special
(?: %(?!>) | -(?!%>) )
[^\-%]* # normal*
)*
)
(?: -?%> )?
/x
START_OF_ERB = /
<%(?!%)
/x
private
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
def scan_tokens tokens, options
until eos?
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
@html_scanner.tokenize match
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
start_tag = match[/\A<%[-=]?/]
end_tag = match[/-?%?>?\z/]
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -1 - end_tag.size]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
else
raise_inspect 'else-case reached!', tokens
end
end
tokens
end
end
end
end

View File

@@ -0,0 +1,368 @@
module CodeRay
module Scanners
# This scanner is really complex, since Ruby _is_ a complex language!
#
# It tries to highlight 100% of all common code,
# and 90% of strange codes.
#
# It is optimized for HTML highlighting, and is not very useful for
# parsing or pretty printing.
#
# For now, I think it's better than the scanners in VIM or Syntax, or
# any highlighter I was able to find, except Caleb's RubyLexer.
#
# I hope it's also better than the rdoc/irb lexer.
class Ruby < Scanner
include Streamable
register_for :ruby
file_extension 'rb'
helper :patterns
private
def scan_tokens tokens, options
last_token_dot = false
value_expected = true
heredocs = nil
last_state = nil
state = :initial
depth = nil
inline_block_stack = []
patterns = Patterns # avoid constant lookup
until eos?
match = nil
kind = nil
if state.instance_of? patterns::StringState
# {{{
match = scan_until(state.pattern) || scan_until(/\z/)
tokens << [match, :content] unless match.empty?
break if eos?
if state.heredoc and self[1] # end of heredoc
match = getch.to_s
match << scan_until(/$/) unless eos?
tokens << [match, :delimiter]
tokens << [:close, state.type]
state = state.next_state
next
end
case match = getch
when state.delim
if state.paren
state.paren_depth -= 1
if state.paren_depth > 0
tokens << [match, :nesting_delimiter]
next
end
end
tokens << [match, :delimiter]
if state.type == :regexp and not eos?
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
tokens << [modifiers, :modifier] unless modifiers.empty?
end
tokens << [:close, state.type]
value_expected = false
state = state.next_state
when '\\'
if state.interpreted
if esc = scan(/ #{patterns::ESCAPE} /ox)
tokens << [match + esc, :char]
else
tokens << [match, :error]
end
else
case m = getch
when state.delim, '\\'
tokens << [match + m, :char]
when nil
tokens << [match, :error]
else
tokens << [match + m, :content]
end
end
when '#'
case peek(1)
when '{'
inline_block_stack << [state, depth, heredocs]
value_expected = true
state = :initial
depth = 1
tokens << [:open, :inline]
tokens << [match + getch, :inline_delimiter]
when '$', '@'
tokens << [match, :escape]
last_state = state # scan one token as normal code, then return here
state = :initial
else
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
end
when state.paren
state.paren_depth += 1
tokens << [match, :nesting_delimiter]
when /#{patterns::REGEXP_SYMBOLS}/ox
tokens << [match, :function]
else
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
end
next
# }}}
else
# {{{
if match = scan(/[ \t\f]+/)
kind = :space
match << scan(/\s*/) unless eos? or heredocs
tokens << [match, kind]
next
elsif match = scan(/\\?\n/)
kind = :space
if match == "\n"
value_expected = true # FIXME not quite true
state = :initial if state == :undef_comma_expected
end
if heredocs
unscan # heredoc scanning needs \n at start
state = heredocs.shift
tokens << [:open, state.type]
heredocs = nil if heredocs.empty?
next
else
match << scan(/\s*/) unless eos?
end
tokens << [match, kind]
next
elsif match = scan(/\#.*/) or
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
kind = :comment
value_expected = true
tokens << [match, kind]
next
elsif state == :initial
# IDENTS #
if match = scan(/#{patterns::METHOD_NAME}/o)
if last_token_dot
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
else
kind = patterns::IDENT_KIND[match]
if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
kind = :constant
elsif kind == :reserved
state = patterns::DEF_NEW_STATE[match]
end
end
## experimental!
value_expected = :set if
patterns::REGEXP_ALLOWED[match] or check(/#{patterns::VALUE_FOLLOWS}/o)
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
kind = :ident
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
value_expected = :set
end
last_token_dot = :set if self[1]
kind = :operator
unless inline_block_stack.empty?
case match
when '{'
depth += 1
when '}'
depth -= 1
if depth == 0 # closing brace of inline block reached
state, depth, heredocs = inline_block_stack.pop
tokens << [match, :inline_delimiter]
kind = :inline
match = :close
end
end
end
elsif match = scan(/ ['"] /mx)
tokens << [:open, :string]
kind = :delimiter
state = patterns::StringState.new :string, match == '"', match # important for streaming
elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
kind = :instance_variable
elsif value_expected and match = scan(/\//)
tokens << [:open, :regexp]
kind = :delimiter
interpreted = true
state = patterns::StringState.new :regexp, interpreted, match
elsif match = scan(/#{patterns::NUMERIC}/o)
kind = if self[1] then :float else :integer end
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
tokens << [:open, :symbol]
tokens << [':', :symbol]
match = delim.chr
kind = :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
else
kind = :symbol
end
elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
value_expected = :set
kind = :operator
elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
indented = self[1] == '-'
quote = self[3]
delim = self[quote ? 4 : 2]
kind = patterns::QUOTE_TO_TYPE[quote]
tokens << [:open, kind]
tokens << [match, :delimiter]
match = :close
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
heredocs ||= [] # create heredocs if empty
heredocs << heredoc
elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
end
tokens << [:open, kind]
state = patterns::StringState.new kind, interpreted, self[2]
kind = :delimiter
elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
kind = :integer
elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
value_expected = :set
kind = :operator
elsif match = scan(/`/)
if last_token_dot
kind = :operator
else
tokens << [:open, :shell]
kind = :delimiter
state = patterns::StringState.new :shell, true, match
end
elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
kind = :global_variable
elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
kind = :class_variable
else
kind = :error
match = getch
end
elsif state == :def_expected
state = :initial
if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
kind = :method
else
next
end
elsif state == :undef_expected
state = :undef_comma_expected
if match = scan(/#{patterns::METHOD_NAME_EX}/o)
kind = :method
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
tokens << [:open, :symbol]
tokens << [':', :symbol]
match = delim.chr
kind = :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
kind = :symbol
end
else
state = :initial
next
end
elsif state == :undef_comma_expected
if match = scan(/,/)
kind = :operator
state = :undef_expected
else
state = :initial
next
end
elsif state == :module_expected
if match = scan(/<</)
kind = :operator
else
state = :initial
if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
kind = :class
else
next
end
end
end
# }}}
value_expected = value_expected == :set
last_token_dot = last_token_dot == :set
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
if last_state
state = last_state
last_state = nil
end
end
end
inline_block_stack << [state] if state.is_a? patterns::StringState
until inline_block_stack.empty?
this_block = inline_block_stack.pop
tokens << [:close, :inline] if this_block.size > 1
state = this_block.first
tokens << [:close, state.type]
end
tokens
end
end
end
end
# vim:fdm=marker

View File

@@ -0,0 +1,230 @@
module CodeRay
module Scanners
module Ruby::Patterns # :nodoc:
RESERVED_WORDS = %w[
and def end in or unless begin
defined? ensure module redo super until
BEGIN break do next rescue then
when END case else for retry
while alias class elsif if not return
undef yield
]
DEF_KEYWORDS = %w[ def ]
UNDEF_KEYWORDS = %w[ undef ]
MODULE_KEYWORDS = %w[class module]
DEF_NEW_STATE = WordList.new(:initial).
add(DEF_KEYWORDS, :def_expected).
add(UNDEF_KEYWORDS, :undef_expected).
add(MODULE_KEYWORDS, :module_expected)
IDENTS_ALLOWING_REGEXP = %w[
and or not while until unless if then elsif when sub sub! gsub gsub!
scan slice slice! split
]
REGEXP_ALLOWED = WordList.new(false).
add(IDENTS_ALLOWING_REGEXP, :set)
PREDEFINED_CONSTANTS = %w[
nil true false self
DATA ARGV ARGF __FILE__ __LINE__
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
IDENT = /[a-z_][\w_]*/i
METHOD_NAME = / #{IDENT} [?!]? /ox
METHOD_NAME_OPERATOR = /
\*\*? # multiplication and power
| [-+]@? # plus, minus
| [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
| \[\]=? # array getter and setter
| << | >> # append or shift left, shift right
| <=?>? | >=? # comparison, rocket operator
| ===? # simple equality and case equality
/ox
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
INSTANCE_VARIABLE = / @ #{IDENT} /ox
CLASS_VARIABLE = / @@ #{IDENT} /ox
OBJECT_VARIABLE = / @@? #{IDENT} /ox
GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
QUOTE_TO_TYPE = {
'`' => :shell,
'/'=> :regexp,
}
QUOTE_TO_TYPE.default = :string
REGEXP_MODIFIERS = /[mixounse]*/
REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
DECIMAL = /\d+(?:_\d+)*/
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
BINARY = /0b[01]+(?:_[01]+)*/
EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
SYMBOL = /
:
(?:
#{METHOD_NAME_EX}
| #{PREFIX_VARIABLE}
| ['"]
)
/ox
# TODO investigste \M, \c and \C escape sequences
# (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
# assert_equal(225, ?\M-a)
# assert_equal(129, ?\M-\C-a)
ESCAPE = /
[abefnrstv]
| M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
| [0-7]{1,3}
| x[0-9A-Fa-f]{1,2}
| .
/mx
CHARACTER = /
\?
(?:
[^\s\\]
| \\ #{ESCAPE}
)
/mx
# NOTE: This is not completely correct, but
# nobody needs heredoc delimiters ending with \n.
HEREDOC_OPEN = /
<< (-)? # $1 = float
(?:
( [A-Za-z_0-9]+ ) # $2 = delim
|
( ["'`\/] ) # $3 = quote, type
( [^\n]*? ) \3 # $4 = delim
)
/mx
RUBYDOC = /
=begin (?!\S)
.*?
(?: \Z | ^=end (?!\S) [^\n]* )
/mx
DATA = /
__END__$
.*?
(?: \Z | (?=^\#CODE) )
/mx
# Checks for a valid value to follow. This enables
# fancy_allowed in method calls.
VALUE_FOLLOWS = /
\s+
(?:
[%\/][^\s=]
|
<<-?\S
|
#{CHARACTER}
)
/x
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
# FIXME: \s and = are only a workaround, they are still allowed
# as delimiters.
FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
FancyStringType = {
'q' => [:string, false],
'Q' => [:string, true],
'r' => [:regexp, true],
's' => [:symbol, false],
'x' => [:shell, true]
}
FancyStringType['w'] = FancyStringType['q']
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
:paren, :paren_depth, :pattern, :next_state
CLOSING_PAREN = Hash[ *%w[
( )
[ ]
< >
{ }
] ]
CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
OPENING_PAREN = CLOSING_PAREN.invert
STRING_PATTERN = Hash.new { |h, k|
delim, interpreted = *k
delim_pattern = Regexp.escape(delim.dup)
if closing_paren = CLOSING_PAREN[delim]
delim_pattern << Regexp.escape(closing_paren)
end
special_escapes =
case interpreted
when :regexp_symbols
'| ' + REGEXP_SYMBOLS.source
when :words
'| \s'
end
h[k] =
if interpreted and not delim == '#'
/ (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
else
/ (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
end
}
HEREDOC_PATTERN = Hash.new { |h, k|
delim, interpreted, indented = *k
delim_pattern = Regexp.escape(delim.dup)
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
h[k] =
if interpreted
/ (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
else
/ (?= #{delim_pattern}() | \\ ) /mx
end
}
def initialize kind, interpreted, delim, heredoc = false
if heredoc
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
delim = nil
else
pattern = STRING_PATTERN[ [delim, interpreted] ]
if paren = CLOSING_PAREN[delim]
delim, paren = paren, delim
paren_depth = 1
end
end
super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
end
end unless defined? StringState
end
end
end

View File

@@ -0,0 +1,142 @@
module CodeRay
module Scanners
# Scheme scanner for CodeRay (by closure).
# Thanks to murphy for putting CodeRay into public.
class Scheme < Scanner
register_for :scheme
file_extension :scm
CORE_FORMS = %w[
lambda let let* letrec syntax-case define-syntax let-syntax
letrec-syntax begin define quote if or and cond case do delay
quasiquote set! cons force call-with-current-continuation call/cc
]
IDENT_KIND = CaseIgnoringWordList.new(:ident).
add(CORE_FORMS, :reserved)
#IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
#IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
#IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
DIGIT = /\d/
DIGIT10 = DIGIT
DIGIT16 = /[0-9a-f]/i
DIGIT8 = /[0-7]/
DIGIT2 = /[01]/
RADIX16 = /\#x/i
RADIX8 = /\#o/i
RADIX2 = /\#b/i
RADIX10 = /\#d/i
EXACTNESS = /#i|#e/i
SIGN = /[\+-]?/
EXP_MARK = /[esfdl]/i
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
SUFFIX = /#{EXP}?/
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
UINT10 = /#{DIGIT10}+#*/
UINT16 = /#{DIGIT16}+#*/
UINT8 = /#{DIGIT8}+#*/
UINT2 = /#{DIGIT2}+#*/
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
REAL10 = /#{SIGN}#{UREAL10}/
REAL16 = /#{SIGN}#{UREAL16}/
REAL8 = /#{SIGN}#{UREAL8}/
REAL2 = /#{SIGN}#{UREAL2}/
IMAG10 = /i|#{UREAL10}i/
IMAG16 = /i|#{UREAL16}i/
IMAG8 = /i|#{UREAL8}i/
IMAG2 = /i|#{UREAL2}i/
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
NUM16 = /#{PREFIX16}#{COMPLEX16}/
NUM8 = /#{PREFIX8}#{COMPLEX8}/
NUM2 = /#{PREFIX2}#{COMPLEX2}/
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
private
def scan_tokens tokens,options
state = :initial
ident_kind = IDENT_KIND
until eos?
kind = match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(/['\(\[\)\]]|#\(/)
kind = :operator_fat
elsif scan(/;.*/)
kind = :comment
elsif scan(/#\\(?:newline|space|.?)/)
kind = :char
elsif scan(/#[ft]/)
kind = :pre_constant
elsif scan(/#{IDENTIFIER}/o)
kind = ident_kind[matched]
elsif scan(/\./)
kind = :operator
elsif scan(/"/)
tokens << [:open, :string]
state = :string
tokens << ['"', :delimiter]
next
elsif scan(/#{NUM}/o) and not matched.empty?
kind = :integer
elsif getch
kind = :error
end
when :string
if scan(/[^"\\]+/) or scan(/\\.?/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
else
raise_inspect "else case \" reached; %p not handled." % peek(1),
tokens, state
end
else
raise "else case reached"
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens, state unless match
tokens << [match, kind]
end # until eos
if state == :string
tokens << [:close, :string]
end
tokens
end #scan_tokens
end #class
end #module scanners
end #module coderay

View File

@@ -0,0 +1,18 @@
module CodeRay
module Scanners
load :html
# XML Scanner
#
# $Id$
#
# Currently this is the same scanner as Scanners::HTML.
class XML < HTML
register_for :xml
end
end
end