mirror of
https://github.com/redmine/redmine.git
synced 2025-11-08 22:36:02 +01:00
Added syntax highlightment for repository files (using CodeRay).
Supported languages: c, ruby, rhtml, yaml, html, xml. git-svn-id: http://redmine.rubyforge.org/svn/trunk@644 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
383
vendor/plugins/coderay-0.7.6.227/lib/coderay/tokens.rb
vendored
Normal file
383
vendor/plugins/coderay-0.7.6.227/lib/coderay/tokens.rb
vendored
Normal file
@@ -0,0 +1,383 @@
|
||||
module CodeRay
|
||||
|
||||
# = Tokens
|
||||
#
|
||||
# The Tokens class represents a list of tokens returnd from
|
||||
# a Scanner.
|
||||
#
|
||||
# A token is not a special object, just a two-element Array
|
||||
# consisting of
|
||||
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
||||
# * the _token_ _text_ (the original source of the token in a String)
|
||||
#
|
||||
# A token looks like this:
|
||||
#
|
||||
# [:comment, '# It looks like this']
|
||||
# [:float, '3.1415926']
|
||||
# [:error, '<27><><EFBFBD>']
|
||||
#
|
||||
# Some scanners also yield some kind of sub-tokens, represented by special
|
||||
# token texts, namely :open and :close .
|
||||
#
|
||||
# The Ruby scanner, for example, splits "a string" into:
|
||||
#
|
||||
# [
|
||||
# [:open, :string],
|
||||
# [:delimiter, '"'],
|
||||
# [:content, 'a string'],
|
||||
# [:delimiter, '"'],
|
||||
# [:close, :string]
|
||||
# ]
|
||||
#
|
||||
# Tokens is also the interface between Scanners and Encoders:
|
||||
# The input is split and saved into a Tokens object. The Encoder
|
||||
# then builds the output from this object.
|
||||
#
|
||||
# Thus, the syntax below becomes clear:
|
||||
#
|
||||
# CodeRay.scan('price = 2.59', :ruby).html
|
||||
# # the Tokens object is here -------^
|
||||
#
|
||||
# See how small it is? ;)
|
||||
#
|
||||
# Tokens gives you the power to handle pre-scanned code very easily:
|
||||
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
|
||||
# that you put in your DB.
|
||||
#
|
||||
# Tokens' subclass TokenStream allows streaming to save memory.
|
||||
class Tokens < Array
|
||||
|
||||
class << self
|
||||
|
||||
# Convert the token to a string.
|
||||
#
|
||||
# This format is used by Encoders.Tokens.
|
||||
# It can be reverted using read_token.
|
||||
def write_token text, type
|
||||
if text.is_a? String
|
||||
"#{type}\t#{escape(text)}\n"
|
||||
else
|
||||
":#{text}\t#{type}\t\n"
|
||||
end
|
||||
end
|
||||
|
||||
# Read a token from the string.
|
||||
#
|
||||
# Inversion of write_token.
|
||||
#
|
||||
# TODO Test this!
|
||||
def read_token token
|
||||
type, text = token.split("\t", 2)
|
||||
if type[0] == ?:
|
||||
[text.to_sym, type[1..-1].to_sym]
|
||||
else
|
||||
[type.to_sym, unescape(text)]
|
||||
end
|
||||
end
|
||||
|
||||
# Escapes a string for use in write_token.
|
||||
def escape text
|
||||
text.gsub(/[\n\\]/, '\\\\\&')
|
||||
end
|
||||
|
||||
# Unescapes a string created by escape.
|
||||
def unescape text
|
||||
text.gsub(/\\[\n\\]/) { |m| m[1,1] }
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Whether the object is a TokenStream.
|
||||
#
|
||||
# Returns false.
|
||||
def stream?
|
||||
false
|
||||
end
|
||||
|
||||
# Iterates over all tokens.
|
||||
#
|
||||
# If a filter is given, only tokens of that kind are yielded.
|
||||
def each kind_filter = nil, &block
|
||||
unless kind_filter
|
||||
super(&block)
|
||||
else
|
||||
super() do |text, kind|
|
||||
next unless kind == kind_filter
|
||||
yield text, kind
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Iterates over all text tokens.
|
||||
# Range tokens like [:open, :string] are left out.
|
||||
#
|
||||
# Example:
|
||||
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
|
||||
def each_text_token
|
||||
each do |text, kind|
|
||||
next unless text.is_a? ::String
|
||||
yield text, kind
|
||||
end
|
||||
end
|
||||
|
||||
# Encode the tokens using encoder.
|
||||
#
|
||||
# encoder can be
|
||||
# * a symbol like :html oder :statistic
|
||||
# * an Encoder class
|
||||
# * an Encoder object
|
||||
#
|
||||
# options are passed to the encoder.
|
||||
def encode encoder, options = {}
|
||||
unless encoder.is_a? Encoders::Encoder
|
||||
unless encoder.is_a? Class
|
||||
encoder_class = Encoders[encoder]
|
||||
end
|
||||
encoder = encoder_class.new options
|
||||
end
|
||||
encoder.encode_tokens self, options
|
||||
end
|
||||
|
||||
|
||||
# Turn into a string using Encoders::Text.
|
||||
#
|
||||
# +options+ are passed to the encoder if given.
|
||||
def to_s options = {}
|
||||
encode :text, options
|
||||
end
|
||||
|
||||
|
||||
# Redirects unknown methods to encoder calls.
|
||||
#
|
||||
# For example, if you call +tokens.html+, the HTML encoder
|
||||
# is used to highlight the tokens.
|
||||
def method_missing meth, options = {}
|
||||
Encoders[meth].new(options).encode_tokens self
|
||||
end
|
||||
|
||||
# Returns the tokens compressed by joining consecutive
|
||||
# tokens of the same kind.
|
||||
#
|
||||
# This can not be undone, but should yield the same output
|
||||
# in most Encoders. It basically makes the output smaller.
|
||||
#
|
||||
# Combined with dump, it saves space for the cost of time.
|
||||
#
|
||||
# If the scanner is written carefully, this is not required -
|
||||
# for example, consecutive //-comment lines could already be
|
||||
# joined in one comment token by the Scanner.
|
||||
def optimize
|
||||
print ' Tokens#optimize: before: %d - ' % size if $DEBUG
|
||||
last_kind = last_text = nil
|
||||
new = self.class.new
|
||||
for text, kind in self
|
||||
if text.is_a? String
|
||||
if kind == last_kind
|
||||
last_text << text
|
||||
else
|
||||
new << [last_text, last_kind] if last_kind
|
||||
last_text = text
|
||||
last_kind = kind
|
||||
end
|
||||
else
|
||||
new << [last_text, last_kind] if last_kind
|
||||
last_kind = last_text = nil
|
||||
new << [text, kind]
|
||||
end
|
||||
end
|
||||
new << [last_text, last_kind] if last_kind
|
||||
print 'after: %d (%d saved = %2.0f%%)' %
|
||||
[new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
|
||||
new
|
||||
end
|
||||
|
||||
# Compact the object itself; see optimize.
|
||||
def optimize!
|
||||
replace optimize
|
||||
end
|
||||
|
||||
# Ensure that all :open tokens have a correspondent :close one.
|
||||
#
|
||||
# TODO: Test this!
|
||||
def fix
|
||||
# Check token nesting using a stack of kinds.
|
||||
opened = []
|
||||
for token, kind in self
|
||||
if token == :open
|
||||
opened.push kind
|
||||
elsif token == :close
|
||||
expected = opened.pop
|
||||
if kind != expected
|
||||
# Unexpected :close; decide what to do based on the kind:
|
||||
# - token was opened earlier: also close tokens in between
|
||||
# - token was never opened: delete the :close (skip with next)
|
||||
next unless opened.rindex expected
|
||||
tokens << [:close, kind] until (kind = opened.pop) == expected
|
||||
end
|
||||
end
|
||||
tokens << [token, kind]
|
||||
end
|
||||
# Close remaining opened tokens
|
||||
tokens << [:close, kind] while kind = opened.pop
|
||||
tokens
|
||||
end
|
||||
|
||||
def fix!
|
||||
replace fix
|
||||
end
|
||||
|
||||
# Makes sure that:
|
||||
# - newlines are single tokens
|
||||
# (which means all other token are single-line)
|
||||
# - there are no open tokens at the end the line
|
||||
#
|
||||
# This makes it simple for encoders that work line-oriented,
|
||||
# like HTML with list-style numeration.
|
||||
def split_into_lines
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def split_into_lines!
|
||||
replace split_into_lines
|
||||
end
|
||||
|
||||
# Dumps the object into a String that can be saved
|
||||
# in files or databases.
|
||||
#
|
||||
# The dump is created with Marshal.dump;
|
||||
# In addition, it is gzipped using GZip.gzip.
|
||||
#
|
||||
# The returned String object includes Undumping
|
||||
# so it has an #undump method. See Tokens.load.
|
||||
#
|
||||
# You can configure the level of compression,
|
||||
# but the default value 7 should be what you want
|
||||
# in most cases as it is a good compromise between
|
||||
# speed and compression rate.
|
||||
#
|
||||
# See GZip module.
|
||||
def dump gzip_level = 7
|
||||
require 'coderay/helpers/gzip_simple'
|
||||
dump = Marshal.dump self
|
||||
dump = dump.gzip gzip_level
|
||||
dump.extend Undumping
|
||||
end
|
||||
|
||||
# The total size of the tokens.
|
||||
# Should be equal to the input size before
|
||||
# scanning.
|
||||
def text_size
|
||||
size = 0
|
||||
each_text_token do |t, k|
|
||||
size + t.size
|
||||
end
|
||||
size
|
||||
end
|
||||
|
||||
# The total size of the tokens.
|
||||
# Should be equal to the input size before
|
||||
# scanning.
|
||||
def text
|
||||
map { |t, k| t if t.is_a? ::String }.join
|
||||
end
|
||||
|
||||
# Include this module to give an object an #undump
|
||||
# method.
|
||||
#
|
||||
# The string returned by Tokens.dump includes Undumping.
|
||||
module Undumping
|
||||
# Calls Tokens.load with itself.
|
||||
def undump
|
||||
Tokens.load self
|
||||
end
|
||||
end
|
||||
|
||||
# Undump the object using Marshal.load, then
|
||||
# unzip it using GZip.gunzip.
|
||||
#
|
||||
# The result is commonly a Tokens object, but
|
||||
# this is not guaranteed.
|
||||
def Tokens.load dump
|
||||
require 'coderay/helpers/gzip_simple'
|
||||
dump = dump.gunzip
|
||||
@dump = Marshal.load dump
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
# = TokenStream
|
||||
#
|
||||
# The TokenStream class is a fake Array without elements.
|
||||
#
|
||||
# It redirects the method << to a block given at creation.
|
||||
#
|
||||
# This allows scanners and Encoders to use streaming (no
|
||||
# tokens are saved, the input is highlighted the same time it
|
||||
# is scanned) with the same code.
|
||||
#
|
||||
# See CodeRay.encode_stream and CodeRay.scan_stream
|
||||
class TokenStream < Tokens
|
||||
|
||||
# Whether the object is a TokenStream.
|
||||
#
|
||||
# Returns true.
|
||||
def stream?
|
||||
true
|
||||
end
|
||||
|
||||
# The Array is empty, but size counts the tokens given by <<.
|
||||
attr_reader :size
|
||||
|
||||
# Creates a new TokenStream that calls +block+ whenever
|
||||
# its << method is called.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# require 'coderay'
|
||||
#
|
||||
# token_stream = CodeRay::TokenStream.new do |kind, text|
|
||||
# puts 'kind: %s, text size: %d.' % [kind, text.size]
|
||||
# end
|
||||
#
|
||||
# token_stream << [:regexp, '/\d+/']
|
||||
# #-> kind: rexpexp, text size: 5.
|
||||
#
|
||||
def initialize &block
|
||||
raise ArgumentError, 'Block expected for streaming.' unless block
|
||||
@callback = block
|
||||
@size = 0
|
||||
end
|
||||
|
||||
# Calls +block+ with +token+ and increments size.
|
||||
#
|
||||
# Returns self.
|
||||
def << token
|
||||
@callback.call token
|
||||
@size += 1
|
||||
self
|
||||
end
|
||||
|
||||
# This method is not implemented due to speed reasons. Use Tokens.
|
||||
def text_size
|
||||
raise NotImplementedError,
|
||||
'This method is not implemented due to speed reasons.'
|
||||
end
|
||||
|
||||
# A TokenStream cannot be dumped. Use Tokens.
|
||||
def dump
|
||||
raise NotImplementedError, 'A TokenStream cannot be dumped.'
|
||||
end
|
||||
|
||||
# A TokenStream cannot be optimized. Use Tokens.
|
||||
def optimize
|
||||
raise NotImplementedError, 'A TokenStream cannot be optimized.'
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
# Token name abbreviations
|
||||
require 'coderay/token_classes'
|
||||
|
||||
end
|
||||
Reference in New Issue
Block a user