mirror of
https://github.com/redmine/redmine.git
synced 2025-11-12 08:16:03 +01:00
Support multiple multi-word phrases in the search engine (#38446).
Patch by Go MAEDA (@maeda). git-svn-id: https://svn.redmine.org/redmine/trunk@22886 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
@@ -135,7 +135,11 @@ module Redmine
|
|||||||
def tokens
|
def tokens
|
||||||
# extract tokens from the question
|
# extract tokens from the question
|
||||||
# eg. hello "bye bye" => ["hello", "bye bye"]
|
# eg. hello "bye bye" => ["hello", "bye bye"]
|
||||||
tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
|
tokens = @question.scan(/"[^"]+"|[^\p{Zs}]+/).map do |token|
|
||||||
|
# Remove quotes from quoted tokens, strip surrounding whitespace
|
||||||
|
# e.g. "\" foo bar \"" => "foo bar"
|
||||||
|
token.gsub(/\A"\p{Zs}*|\p{Zs}*"\Z/, '')
|
||||||
|
end
|
||||||
# tokens must be at least 2 characters long
|
# tokens must be at least 2 characters long
|
||||||
# but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
|
# but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
|
||||||
# no more than 5 tokens to search for
|
# no more than 5 tokens to search for
|
||||||
|
|||||||
@@ -30,4 +30,9 @@ class Redmine::Search::Tokenize < ActiveSupport::TestCase
|
|||||||
value = "全角\u3000スペース"
|
value = "全角\u3000スペース"
|
||||||
assert_equal %w[全角 スペース], Redmine::Search::Tokenizer.new(value).tokens
|
assert_equal %w[全角 スペース], Redmine::Search::Tokenizer.new(value).tokens
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_tokenize_should_support_multiple_phrases
|
||||||
|
value = '"phrase one" "phrase two"'
|
||||||
|
assert_equal ["phrase one", "phrase two"], Redmine::Search::Tokenizer.new(value).tokens
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user