Support multiple multi-word phrases in the search engine (#38446).

Patch by Go MAEDA (@maeda).


git-svn-id: https://svn.redmine.org/redmine/trunk@22886 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
Go MAEDA
2024-06-19 14:27:20 +00:00
parent 07307140b9
commit 5a96997f6a
2 changed files with 10 additions and 1 deletions

View File

@@ -135,7 +135,11 @@ module Redmine
def tokens
# extract tokens from the question
# eg. hello "bye bye" => ["hello", "bye bye"]
tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
tokens = @question.scan(/"[^"]+"|[^\p{Zs}]+/).map do |token|
# Remove quotes from quoted tokens, strip surrounding whitespace
# e.g. "\" foo bar \"" => "foo bar"
token.gsub(/\A"\p{Zs}*|\p{Zs}*"\Z/, '')
end
# tokens must be at least 2 characters long
# but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
# no more than 5 tokens to search for

View File

@@ -30,4 +30,9 @@ class Redmine::Search::Tokenize < ActiveSupport::TestCase
value = "全角\u3000スペース"
assert_equal %w[全角 スペース], Redmine::Search::Tokenizer.new(value).tokens
end
def test_tokenize_should_support_multiple_phrases
value = '"phrase one" "phrase two"'
assert_equal ["phrase one", "phrase two"], Redmine::Search::Tokenizer.new(value).tokens
end
end