mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 10:56:10 +01:00 
			
		
		
		
	Support elastic search for code search (#10273)
* Support elastic search for code search * Finished elastic search implementation and add some tests * Enable test on drone and added docs * Add new fields to elastic search * Fix bug * remove unused changes * Use indexer alias to keep the gitea indexer version * Improve codes * Some code improvements * The real indexer name changed to xxx.v1 Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
		| @@ -209,6 +209,7 @@ steps: | ||||
|       TAGS: bindata | ||||
|       TEST_LDAP: 1 | ||||
|       USE_REPO_TEST_DIR: 1 | ||||
|       TEST_INDEXER_CODE_ES_URL: "http://elastic:changeme@elasticsearch:9200" | ||||
|     depends_on: | ||||
|       - build | ||||
|  | ||||
|   | ||||
| @@ -428,7 +428,15 @@ STARTUP_TIMEOUT=30s | ||||
|  | ||||
| ; repo indexer by default disabled, since it uses a lot of disk space | ||||
| REPO_INDEXER_ENABLED = false | ||||
| ; Code search engine type, could be `bleve` or `elasticsearch`. | ||||
| REPO_INDEXER_TYPE = bleve | ||||
| ; Index file used for code search. | ||||
| REPO_INDEXER_PATH = indexers/repos.bleve | ||||
| ; Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:changeme@localhost:9200 | ||||
| REPO_INDEXER_CONN_STR =  | ||||
| ; Code indexer name, available when `REPO_INDEXER_TYPE` is elasticsearch | ||||
| REPO_INDEXER_NAME = gitea_codes | ||||
|  | ||||
| UPDATE_BUFFER_LEN = 20 | ||||
| MAX_FILE_SIZE = 1048576 | ||||
| ; A comma separated list of glob patterns (see https://github.com/gobwas/glob) to include | ||||
|   | ||||
| @@ -270,7 +270,11 @@ relation to port exhaustion. | ||||
| - `ISSUE_INDEXER_QUEUE_BATCH_NUMBER`: **20**: Batch queue number. | ||||
|  | ||||
| - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). | ||||
| - `REPO_INDEXER_TYPE`: **bleve**: Code search engine type, could be `bleve` or `elasticsearch`. | ||||
| - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. | ||||
| - `REPO_INDEXER_CONN_STR`: ****: Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:changeme@localhost:9200 | ||||
| - `REPO_INDEXER_NAME`: **gitea_codes**: Code indexer name, available when `REPO_INDEXER_TYPE` is elasticsearch | ||||
|  | ||||
| - `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. | ||||
| - `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`. | ||||
| - `REPO_INDEXER_EXCLUDE_VENDORED`: **true**: Exclude vendored files from index. | ||||
|   | ||||
| @@ -98,8 +98,12 @@ menu: | ||||
| - `ISSUE_INDEXER_QUEUE_CONN_STR`: **addrs=127.0.0.1:6379 db=0**: 当 `ISSUE_INDEXER_QUEUE_TYPE` 为 `redis` 时,保存Redis队列的连接字符串。 | ||||
| - `ISSUE_INDEXER_QUEUE_BATCH_NUMBER`: **20**: 队列处理中批量提交数量。 | ||||
|  | ||||
| - `REPO_INDEXER_ENABLED`: **false**: 是否启用代码搜索(启用后会占用比较大的磁盘空间)。 | ||||
| - `REPO_INDEXER_ENABLED`: **false**: 是否启用代码搜索(启用后会占用比较大的磁盘空间,如果是bleve可能需要占用约6倍存储空间)。 | ||||
| - `REPO_INDEXER_TYPE`: **bleve**: 代码搜索引擎类型,可以为 `bleve` 或者 `elasticsearch`。 | ||||
| - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: 用于代码搜索的索引文件路径。 | ||||
| - `REPO_INDEXER_CONN_STR`: ****: 代码搜索引擎连接字符串,当 `REPO_INDEXER_TYPE` 为 `elasticsearch` 时有效。例如: http://elastic:changeme@localhost:9200 | ||||
| - `REPO_INDEXER_NAME`: **gitea_codes**: 代码搜索引擎的名字,当 `REPO_INDEXER_TYPE` 为 `elasticsearch` 时有效。 | ||||
|  | ||||
| - `UPDATE_BUFFER_LEN`: **20**: 代码索引请求的缓冲区长度。 | ||||
| - `MAX_FILE_SIZE`: **1048576**: 进行解析的源代码文件的最大长度,小于该值时才会索引。 | ||||
|  | ||||
|   | ||||
| @@ -58,10 +58,10 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // openIndexer open the index at the specified path, checking for metadata | ||||
| // openBleveIndexer open the index at the specified path, checking for metadata | ||||
| // updates and bleve version updates.  If index needs to be created (or | ||||
| // re-created), returns (nil, nil) | ||||
| func openIndexer(path string, latestVersion int) (bleve.Index, error) { | ||||
| func openBleveIndexer(path string, latestVersion int) (bleve.Index, error) { | ||||
| 	_, err := os.Stat(path) | ||||
| 	if err != nil && os.IsNotExist(err) { | ||||
| 		return nil, nil | ||||
| @@ -104,54 +104,14 @@ func (d *RepoIndexerData) Type() string { | ||||
| 	return repoIndexerDocType | ||||
| } | ||||
|  | ||||
| func addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
| 	// Ignore vendored files in code search | ||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | ||||
| 		return nil | ||||
| 	} | ||||
| 	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||
| 		RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | ||||
| 		return fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | ||||
| 		return addDelete(update.Filename, repo, batch) | ||||
| 	} | ||||
|  | ||||
| 	fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | ||||
| 		RunInDirBytes(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if !base.IsTextFile(fileContents) { | ||||
| 		// FIXME: UTF-16 files will probably fail here | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	id := filenameIndexerID(repo.ID, update.Filename) | ||||
| 	return batch.Index(id, &RepoIndexerData{ | ||||
| 		RepoID:    repo.ID, | ||||
| 		CommitID:  commitSha, | ||||
| 		Content:   string(charset.ToUTF8DropErrors(fileContents)), | ||||
| 		Language:  analyze.GetCodeLanguage(update.Filename, fileContents), | ||||
| 		UpdatedAt: time.Now().UTC(), | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func addDelete(filename string, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
| 	id := filenameIndexerID(repo.ID, filename) | ||||
| 	return batch.Delete(id) | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	repoIndexerAnalyzer      = "repoIndexerAnalyzer" | ||||
| 	repoIndexerDocType       = "repoIndexerDocType" | ||||
| 	repoIndexerLatestVersion = 5 | ||||
| ) | ||||
|  | ||||
| // createRepoIndexer create a repo indexer if one does not already exist | ||||
| func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) { | ||||
| // createBleveIndexer create a bleve repo indexer if one does not already exist | ||||
| func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) { | ||||
| 	docMapping := bleve.NewDocumentMapping() | ||||
| 	numericFieldMapping := bleve.NewNumericFieldMapping() | ||||
| 	numericFieldMapping.IncludeInAll = false | ||||
| @@ -199,18 +159,6 @@ func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) { | ||||
| 	return indexer, nil | ||||
| } | ||||
|  | ||||
| func filenameIndexerID(repoID int64, filename string) string { | ||||
| 	return indexerID(repoID) + "_" + filename | ||||
| } | ||||
|  | ||||
| func filenameOfIndexerID(indexerID string) string { | ||||
| 	index := strings.IndexByte(indexerID, '_') | ||||
| 	if index == -1 { | ||||
| 		log.Error("Unexpected ID in repo indexer: %s", indexerID) | ||||
| 	} | ||||
| 	return indexerID[index+1:] | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	_ Indexer = &BleveIndexer{} | ||||
| ) | ||||
| @@ -230,10 +178,51 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { | ||||
| 	return indexer, created, err | ||||
| } | ||||
|  | ||||
| func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
| 	// Ignore vendored files in code search | ||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||
| 		RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | ||||
| 		return fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | ||||
| 		return b.addDelete(update.Filename, repo, batch) | ||||
| 	} | ||||
|  | ||||
| 	fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | ||||
| 		RunInDirBytes(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if !base.IsTextFile(fileContents) { | ||||
| 		// FIXME: UTF-16 files will probably fail here | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	id := filenameIndexerID(repo.ID, update.Filename) | ||||
| 	return batch.Index(id, &RepoIndexerData{ | ||||
| 		RepoID:    repo.ID, | ||||
| 		CommitID:  commitSha, | ||||
| 		Content:   string(charset.ToUTF8DropErrors(fileContents)), | ||||
| 		Language:  analyze.GetCodeLanguage(update.Filename, fileContents), | ||||
| 		UpdatedAt: time.Now().UTC(), | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func (b *BleveIndexer) addDelete(filename string, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
| 	id := filenameIndexerID(repo.ID, filename) | ||||
| 	return batch.Delete(id) | ||||
| } | ||||
|  | ||||
| // init init the indexer | ||||
| func (b *BleveIndexer) init() (bool, error) { | ||||
| 	var err error | ||||
| 	b.indexer, err = openIndexer(b.indexDir, repoIndexerLatestVersion) | ||||
| 	b.indexer, err = openBleveIndexer(b.indexDir, repoIndexerLatestVersion) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| @@ -241,7 +230,7 @@ func (b *BleveIndexer) init() (bool, error) { | ||||
| 		return false, nil | ||||
| 	} | ||||
|  | ||||
| 	b.indexer, err = createRepoIndexer(b.indexDir, repoIndexerLatestVersion) | ||||
| 	b.indexer, err = createBleveIndexer(b.indexDir, repoIndexerLatestVersion) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| @@ -262,38 +251,19 @@ func (b *BleveIndexer) Close() { | ||||
| } | ||||
|  | ||||
| // Index indexes the data | ||||
| func (b *BleveIndexer) Index(repoID int64) error { | ||||
| 	repo, err := models.GetRepositoryByID(repoID) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	sha, err := getDefaultBranchSha(repo) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	changes, err := getRepoChanges(repo, sha) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if changes == nil { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| func (b *BleveIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { | ||||
| 	batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) | ||||
| 	for _, update := range changes.Updates { | ||||
| 		if err := addUpdate(sha, update, repo, batch); err != nil { | ||||
| 		if err := b.addUpdate(sha, update, repo, batch); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	for _, filename := range changes.RemovedFilenames { | ||||
| 		if err := addDelete(filename, repo, batch); err != nil { | ||||
| 		if err := b.addDelete(filename, repo, batch); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if err = batch.Flush(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha) | ||||
| 	return batch.Flush() | ||||
| } | ||||
|  | ||||
| // Delete deletes indexes by ids | ||||
|   | ||||
| @@ -6,21 +6,15 @@ package code | ||||
|  | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"path/filepath" | ||||
| 	"testing" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| 	"code.gitea.io/gitea/modules/util" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| func TestMain(m *testing.M) { | ||||
| 	models.MainTest(m, filepath.Join("..", "..", "..")) | ||||
| } | ||||
|  | ||||
| func TestIndexAndSearch(t *testing.T) { | ||||
| func TestBleveIndexAndSearch(t *testing.T) { | ||||
| 	models.PrepareTestEnv(t) | ||||
|  | ||||
| 	dir, err := ioutil.TempDir("", "bleve.index") | ||||
| @@ -31,10 +25,9 @@ func TestIndexAndSearch(t *testing.T) { | ||||
| 	} | ||||
| 	defer util.RemoveAll(dir) | ||||
|  | ||||
| 	setting.Indexer.RepoIndexerEnabled = true | ||||
| 	idx, _, err := NewBleveIndexer(dir) | ||||
| 	if err != nil { | ||||
| 		assert.Fail(t, "Unable to create indexer Error: %v", err) | ||||
| 		assert.Fail(t, "Unable to create bleve indexer Error: %v", err) | ||||
| 		if idx != nil { | ||||
| 			idx.Close() | ||||
| 		} | ||||
| @@ -42,45 +35,5 @@ func TestIndexAndSearch(t *testing.T) { | ||||
| 	} | ||||
| 	defer idx.Close() | ||||
|  | ||||
| 	err = idx.Index(1) | ||||
| 	assert.NoError(t, err) | ||||
|  | ||||
| 	var ( | ||||
| 		keywords = []struct { | ||||
| 			Keyword string | ||||
| 			IDs     []int64 | ||||
| 			Langs   int | ||||
| 		}{ | ||||
| 			{ | ||||
| 				Keyword: "Description", | ||||
| 				IDs:     []int64{1}, | ||||
| 				Langs:   1, | ||||
| 			}, | ||||
| 			{ | ||||
| 				Keyword: "repo1", | ||||
| 				IDs:     []int64{1}, | ||||
| 				Langs:   1, | ||||
| 			}, | ||||
| 			{ | ||||
| 				Keyword: "non-exist", | ||||
| 				IDs:     []int64{}, | ||||
| 				Langs:   0, | ||||
| 			}, | ||||
| 		} | ||||
| 	) | ||||
|  | ||||
| 	for _, kw := range keywords { | ||||
| 		total, res, langs, err := idx.Search(nil, "", kw.Keyword, 1, 10) | ||||
| 		assert.NoError(t, err) | ||||
| 		assert.EqualValues(t, len(kw.IDs), total) | ||||
|  | ||||
| 		assert.NotNil(t, langs) | ||||
| 		assert.Len(t, langs, kw.Langs) | ||||
|  | ||||
| 		var ids = make([]int64, 0, len(res)) | ||||
| 		for _, hit := range res { | ||||
| 			ids = append(ids, hit.RepoID) | ||||
| 		} | ||||
| 		assert.EqualValues(t, kw.IDs, ids) | ||||
| 	} | ||||
| 	testIndexer("beleve", t, idx) | ||||
| } | ||||
|   | ||||
							
								
								
									
										385
									
								
								modules/indexer/code/elastic_search.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										385
									
								
								modules/indexer/code/elastic_search.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,385 @@ | ||||
| // Copyright 2020 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package code | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
| 	"code.gitea.io/gitea/modules/analyze" | ||||
| 	"code.gitea.io/gitea/modules/base" | ||||
| 	"code.gitea.io/gitea/modules/charset" | ||||
| 	"code.gitea.io/gitea/modules/git" | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| 	"code.gitea.io/gitea/modules/timeutil" | ||||
|  | ||||
| 	"github.com/go-enry/go-enry/v2" | ||||
| 	"github.com/olivere/elastic/v7" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	esRepoIndexerLatestVersion = 1 | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	_ Indexer = &ElasticSearchIndexer{} | ||||
| ) | ||||
|  | ||||
| // ElasticSearchIndexer implements Indexer interface | ||||
| type ElasticSearchIndexer struct { | ||||
| 	client           *elastic.Client | ||||
| 	indexerAliasName string | ||||
| } | ||||
|  | ||||
| type elasticLogger struct { | ||||
| 	*log.Logger | ||||
| } | ||||
|  | ||||
| func (l elasticLogger) Printf(format string, args ...interface{}) { | ||||
| 	_ = l.Logger.Log(2, l.Logger.GetLevel(), format, args...) | ||||
| } | ||||
|  | ||||
| // NewElasticSearchIndexer creates a new elasticsearch indexer | ||||
| func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bool, error) { | ||||
| 	opts := []elastic.ClientOptionFunc{ | ||||
| 		elastic.SetURL(url), | ||||
| 		elastic.SetSniff(false), | ||||
| 		elastic.SetHealthcheckInterval(10 * time.Second), | ||||
| 		elastic.SetGzip(false), | ||||
| 	} | ||||
|  | ||||
| 	logger := elasticLogger{log.GetLogger(log.DEFAULT)} | ||||
|  | ||||
| 	if logger.GetLevel() == log.TRACE || logger.GetLevel() == log.DEBUG { | ||||
| 		opts = append(opts, elastic.SetTraceLog(logger)) | ||||
| 	} else if logger.GetLevel() == log.ERROR || logger.GetLevel() == log.CRITICAL || logger.GetLevel() == log.FATAL { | ||||
| 		opts = append(opts, elastic.SetErrorLog(logger)) | ||||
| 	} else if logger.GetLevel() == log.INFO || logger.GetLevel() == log.WARN { | ||||
| 		opts = append(opts, elastic.SetInfoLog(logger)) | ||||
| 	} | ||||
|  | ||||
| 	client, err := elastic.NewClient(opts...) | ||||
| 	if err != nil { | ||||
| 		return nil, false, err | ||||
| 	} | ||||
|  | ||||
| 	indexer := &ElasticSearchIndexer{ | ||||
| 		client:           client, | ||||
| 		indexerAliasName: indexerName, | ||||
| 	} | ||||
| 	exists, err := indexer.init() | ||||
|  | ||||
| 	return indexer, !exists, err | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	defaultMapping = `{ | ||||
| 		"mappings": { | ||||
| 			"properties": { | ||||
| 				"repo_id": { | ||||
| 					"type": "long", | ||||
| 					"index": true | ||||
| 				}, | ||||
| 				"content": { | ||||
| 					"type": "text", | ||||
| 					"index": true | ||||
| 				}, | ||||
| 				"commit_id": { | ||||
| 					"type": "keyword", | ||||
| 					"index": true | ||||
| 				}, | ||||
| 				"language": { | ||||
| 					"type": "keyword", | ||||
| 					"index": true | ||||
| 				}, | ||||
| 				"updated_at": { | ||||
| 					"type": "long", | ||||
| 					"index": true | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	}` | ||||
| ) | ||||
|  | ||||
| func (b *ElasticSearchIndexer) realIndexerName() string { | ||||
| 	return fmt.Sprintf("%s.v%d", b.indexerAliasName, esRepoIndexerLatestVersion) | ||||
| } | ||||
|  | ||||
| // Init will initialize the indexer | ||||
| func (b *ElasticSearchIndexer) init() (bool, error) { | ||||
| 	ctx := context.Background() | ||||
| 	exists, err := b.client.IndexExists(b.realIndexerName()).Do(ctx) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	if !exists { | ||||
| 		var mapping = defaultMapping | ||||
|  | ||||
| 		createIndex, err := b.client.CreateIndex(b.realIndexerName()).BodyString(mapping).Do(ctx) | ||||
| 		if err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 		if !createIndex.Acknowledged { | ||||
| 			return false, fmt.Errorf("create index %s with %s failed", b.realIndexerName(), mapping) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// check version | ||||
| 	r, err := b.client.Aliases().Do(ctx) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
|  | ||||
| 	realIndexerNames := r.IndicesByAlias(b.indexerAliasName) | ||||
| 	if len(realIndexerNames) < 1 { | ||||
| 		res, err := b.client.Alias(). | ||||
| 			Add(b.realIndexerName(), b.indexerAliasName). | ||||
| 			Do(ctx) | ||||
| 		if err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 		if !res.Acknowledged { | ||||
| 			return false, fmt.Errorf("") | ||||
| 		} | ||||
| 	} else if len(realIndexerNames) >= 1 && realIndexerNames[0] < b.realIndexerName() { | ||||
| 		log.Warn("Found older gitea indexer named %s, but we will create a new one %s and keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", | ||||
| 			realIndexerNames[0], b.realIndexerName()) | ||||
| 		res, err := b.client.Alias(). | ||||
| 			Remove(realIndexerNames[0], b.indexerAliasName). | ||||
| 			Add(b.realIndexerName(), b.indexerAliasName). | ||||
| 			Do(ctx) | ||||
| 		if err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 		if !res.Acknowledged { | ||||
| 			return false, fmt.Errorf("") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return exists, nil | ||||
| } | ||||
|  | ||||
| func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | ||||
| 	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||
| 		RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | ||||
| 		return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | ||||
| 		return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil | ||||
| 	} | ||||
|  | ||||
| 	fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | ||||
| 		RunInDirBytes(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} else if !base.IsTextFile(fileContents) { | ||||
| 		// FIXME: UTF-16 files will probably fail here | ||||
| 		return nil, nil | ||||
| 	} | ||||
|  | ||||
| 	id := filenameIndexerID(repo.ID, update.Filename) | ||||
|  | ||||
| 	return []elastic.BulkableRequest{ | ||||
| 		elastic.NewBulkIndexRequest(). | ||||
| 			Index(b.indexerAliasName). | ||||
| 			Id(id). | ||||
| 			Doc(map[string]interface{}{ | ||||
| 				"repo_id":    repo.ID, | ||||
| 				"content":    string(charset.ToUTF8DropErrors(fileContents)), | ||||
| 				"commit_id":  sha, | ||||
| 				"language":   analyze.GetCodeLanguage(update.Filename, fileContents), | ||||
| 				"updated_at": timeutil.TimeStampNow(), | ||||
| 			}), | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| func (b *ElasticSearchIndexer) addDelete(filename string, repo *models.Repository) elastic.BulkableRequest { | ||||
| 	id := filenameIndexerID(repo.ID, filename) | ||||
| 	return elastic.NewBulkDeleteRequest(). | ||||
| 		Index(b.indexerAliasName). | ||||
| 		Id(id) | ||||
| } | ||||
|  | ||||
| // Index will save the index data | ||||
| func (b *ElasticSearchIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { | ||||
| 	reqs := make([]elastic.BulkableRequest, 0) | ||||
| 	for _, update := range changes.Updates { | ||||
| 		updateReqs, err := b.addUpdate(sha, update, repo) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if len(updateReqs) > 0 { | ||||
| 			reqs = append(reqs, updateReqs...) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for _, filename := range changes.RemovedFilenames { | ||||
| 		reqs = append(reqs, b.addDelete(filename, repo)) | ||||
| 	} | ||||
|  | ||||
| 	if len(reqs) > 0 { | ||||
| 		_, err := b.client.Bulk(). | ||||
| 			Index(b.indexerAliasName). | ||||
| 			Add(reqs...). | ||||
| 			Do(context.Background()) | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Delete deletes indexes by ids | ||||
| func (b *ElasticSearchIndexer) Delete(repoID int64) error { | ||||
| 	_, err := b.client.DeleteByQuery(b.indexerAliasName). | ||||
| 		Query(elastic.NewTermsQuery("repo_id", repoID)). | ||||
| 		Do(context.Background()) | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { | ||||
| 	hits := make([]*SearchResult, 0, pageSize) | ||||
| 	for _, hit := range searchResult.Hits.Hits { | ||||
| 		// FIXME: There is no way to get the position the keyword on the content currently on the same request. | ||||
| 		// So we get it from content, this may made the query slower. See | ||||
| 		// https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291 | ||||
| 		var startIndex, endIndex int = -1, -1 | ||||
| 		c, ok := hit.Highlight["content"] | ||||
| 		if ok && len(c) > 0 { | ||||
| 			var subStr = make([]rune, 0, len(kw)) | ||||
| 			startIndex = strings.IndexFunc(c[0], func(r rune) bool { | ||||
| 				if len(subStr) >= len(kw) { | ||||
| 					subStr = subStr[1:] | ||||
| 				} | ||||
| 				subStr = append(subStr, r) | ||||
| 				return strings.EqualFold(kw, string(subStr)) | ||||
| 			}) | ||||
| 			if startIndex > -1 { | ||||
| 				endIndex = startIndex + len(kw) | ||||
| 			} else { | ||||
| 				panic(fmt.Sprintf("1===%#v", hit.Highlight)) | ||||
| 			} | ||||
| 		} else { | ||||
| 			panic(fmt.Sprintf("2===%#v", hit.Highlight)) | ||||
| 		} | ||||
|  | ||||
| 		repoID, fileName := parseIndexerID(hit.Id) | ||||
| 		var res = make(map[string]interface{}) | ||||
| 		if err := json.Unmarshal(hit.Source, &res); err != nil { | ||||
| 			return 0, nil, nil, err | ||||
| 		} | ||||
|  | ||||
| 		language := res["language"].(string) | ||||
|  | ||||
| 		hits = append(hits, &SearchResult{ | ||||
| 			RepoID:      repoID, | ||||
| 			Filename:    fileName, | ||||
| 			CommitID:    res["commit_id"].(string), | ||||
| 			Content:     res["content"].(string), | ||||
| 			UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)), | ||||
| 			Language:    language, | ||||
| 			StartIndex:  startIndex, | ||||
| 			EndIndex:    endIndex, | ||||
| 			Color:       enry.GetColor(language), | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
| 	return searchResult.TotalHits(), hits, extractAggs(searchResult), nil | ||||
| } | ||||
|  | ||||
| func extractAggs(searchResult *elastic.SearchResult) []*SearchResultLanguages { | ||||
| 	var searchResultLanguages []*SearchResultLanguages | ||||
| 	agg, found := searchResult.Aggregations.Terms("language") | ||||
| 	if found { | ||||
| 		searchResultLanguages = make([]*SearchResultLanguages, 0, 10) | ||||
|  | ||||
| 		for _, bucket := range agg.Buckets { | ||||
| 			searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{ | ||||
| 				Language: bucket.Key.(string), | ||||
| 				Color:    enry.GetColor(bucket.Key.(string)), | ||||
| 				Count:    int(bucket.DocCount), | ||||
| 			}) | ||||
| 		} | ||||
| 	} | ||||
| 	return searchResultLanguages | ||||
| } | ||||
|  | ||||
| // Search searches for codes and language stats by given conditions. | ||||
| func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { | ||||
| 	kwQuery := elastic.NewMultiMatchQuery(keyword, "content") | ||||
| 	query := elastic.NewBoolQuery() | ||||
| 	query = query.Must(kwQuery) | ||||
| 	if len(repoIDs) > 0 { | ||||
| 		var repoStrs = make([]interface{}, 0, len(repoIDs)) | ||||
| 		for _, repoID := range repoIDs { | ||||
| 			repoStrs = append(repoStrs, repoID) | ||||
| 		} | ||||
| 		repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) | ||||
| 		query = query.Must(repoQuery) | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		start       int | ||||
| 		kw          = "<em>" + keyword + "</em>" | ||||
| 		aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() | ||||
| 	) | ||||
|  | ||||
| 	if page > 0 { | ||||
| 		start = (page - 1) * pageSize | ||||
| 	} | ||||
|  | ||||
| 	if len(language) == 0 { | ||||
| 		searchResult, err := b.client.Search(). | ||||
| 			Index(b.indexerAliasName). | ||||
| 			Aggregation("language", aggregation). | ||||
| 			Query(query). | ||||
| 			Highlight(elastic.NewHighlight().Field("content")). | ||||
| 			Sort("repo_id", true). | ||||
| 			From(start).Size(pageSize). | ||||
| 			Do(context.Background()) | ||||
| 		if err != nil { | ||||
| 			return 0, nil, nil, err | ||||
| 		} | ||||
|  | ||||
| 		return convertResult(searchResult, kw, pageSize) | ||||
| 	} | ||||
|  | ||||
| 	langQuery := elastic.NewMatchQuery("language", language) | ||||
| 	countResult, err := b.client.Search(). | ||||
| 		Index(b.indexerAliasName). | ||||
| 		Aggregation("language", aggregation). | ||||
| 		Query(query). | ||||
| 		Size(0). // We only needs stats information | ||||
| 		Do(context.Background()) | ||||
| 	if err != nil { | ||||
| 		return 0, nil, nil, err | ||||
| 	} | ||||
|  | ||||
| 	query = query.Must(langQuery) | ||||
| 	searchResult, err := b.client.Search(). | ||||
| 		Index(b.indexerAliasName). | ||||
| 		Query(query). | ||||
| 		Highlight(elastic.NewHighlight().Field("content")). | ||||
| 		Sort("repo_id", true). | ||||
| 		From(start).Size(pageSize). | ||||
| 		Do(context.Background()) | ||||
| 	if err != nil { | ||||
| 		return 0, nil, nil, err | ||||
| 	} | ||||
|  | ||||
| 	total, hits, _, err := convertResult(searchResult, kw, pageSize) | ||||
|  | ||||
| 	return total, hits, extractAggs(countResult), err | ||||
| } | ||||
|  | ||||
| // Close implements indexer | ||||
| func (b *ElasticSearchIndexer) Close() {} | ||||
							
								
								
									
										36
									
								
								modules/indexer/code/elastic_search_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								modules/indexer/code/elastic_search_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| // Copyright 2020 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package code | ||||
|  | ||||
| import ( | ||||
| 	"os" | ||||
| 	"testing" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| func TestESIndexAndSearch(t *testing.T) { | ||||
| 	models.PrepareTestEnv(t) | ||||
|  | ||||
| 	u := os.Getenv("TEST_INDEXER_CODE_ES_URL") | ||||
| 	if u == "" { | ||||
| 		t.SkipNow() | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	indexer, _, err := NewElasticSearchIndexer(u, "gitea_codes") | ||||
| 	if err != nil { | ||||
| 		assert.Fail(t, "Unable to create ES indexer Error: %v", err) | ||||
| 		if indexer != nil { | ||||
| 			indexer.Close() | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 	defer indexer.Close() | ||||
|  | ||||
| 	testIndexer("elastic_search", t, indexer) | ||||
| } | ||||
| @@ -7,8 +7,11 @@ package code | ||||
| import ( | ||||
| 	"context" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
| 	"code.gitea.io/gitea/modules/graceful" | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| @@ -37,12 +40,33 @@ type SearchResultLanguages struct { | ||||
|  | ||||
| // Indexer defines an interface to indexer issues contents | ||||
| type Indexer interface { | ||||
| 	Index(repoID int64) error | ||||
| 	Index(repo *models.Repository, sha string, changes *repoChanges) error | ||||
| 	Delete(repoID int64) error | ||||
| 	Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) | ||||
| 	Close() | ||||
| } | ||||
|  | ||||
| func filenameIndexerID(repoID int64, filename string) string { | ||||
| 	return indexerID(repoID) + "_" + filename | ||||
| } | ||||
|  | ||||
| func parseIndexerID(indexerID string) (int64, string) { | ||||
| 	index := strings.IndexByte(indexerID, '_') | ||||
| 	if index == -1 { | ||||
| 		log.Error("Unexpected ID in repo indexer: %s", indexerID) | ||||
| 	} | ||||
| 	repoID, _ := strconv.ParseInt(indexerID[:index], 10, 64) | ||||
| 	return repoID, indexerID[index+1:] | ||||
| } | ||||
|  | ||||
| func filenameOfIndexerID(indexerID string) string { | ||||
| 	index := strings.IndexByte(indexerID, '_') | ||||
| 	if index == -1 { | ||||
| 		log.Error("Unexpected ID in repo indexer: %s", indexerID) | ||||
| 	} | ||||
| 	return indexerID[index+1:] | ||||
| } | ||||
|  | ||||
| // Init initialize the repo indexer | ||||
| func Init() { | ||||
| 	if !setting.Indexer.RepoIndexerEnabled { | ||||
| @@ -63,33 +87,61 @@ func Init() { | ||||
| 	waitChannel := make(chan time.Duration) | ||||
| 	go func() { | ||||
| 		start := time.Now() | ||||
| 		var ( | ||||
| 			rIndexer Indexer | ||||
| 			populate bool | ||||
| 			err      error | ||||
| 		) | ||||
| 		switch setting.Indexer.RepoType { | ||||
| 		case "bleve": | ||||
| 			log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath) | ||||
| 			defer func() { | ||||
| 				if err := recover(); err != nil { | ||||
| 					log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2)) | ||||
| 					log.Error("The indexer files are likely corrupted and may need to be deleted") | ||||
| 				log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.RepoPath) | ||||
| 				cancel() | ||||
| 				indexer.Close() | ||||
| 				close(waitChannel) | ||||
| 				log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) | ||||
| 					log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath) | ||||
| 				} | ||||
| 			}() | ||||
| 		bleveIndexer, created, err := NewBleveIndexer(setting.Indexer.RepoPath) | ||||
|  | ||||
| 			rIndexer, populate, err = NewBleveIndexer(setting.Indexer.RepoPath) | ||||
| 			if err != nil { | ||||
| 			if bleveIndexer != nil { | ||||
| 				bleveIndexer.Close() | ||||
| 				if rIndexer != nil { | ||||
| 					rIndexer.Close() | ||||
| 				} | ||||
| 				cancel() | ||||
| 				indexer.Close() | ||||
| 				close(waitChannel) | ||||
| 			log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) | ||||
| 				log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) | ||||
| 			} | ||||
| 		indexer.set(bleveIndexer) | ||||
| 		case "elasticsearch": | ||||
| 			log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoConnStr) | ||||
| 			defer func() { | ||||
| 				if err := recover(); err != nil { | ||||
| 					log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2)) | ||||
| 					log.Error("The indexer files are likely corrupted and may need to be deleted") | ||||
| 					log.Error("You can completely remove the \"%s\" index to make Gitea recreate the indexes", setting.Indexer.RepoConnStr) | ||||
| 				} | ||||
| 			}() | ||||
|  | ||||
| 			rIndexer, populate, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) | ||||
| 			if err != nil { | ||||
| 				if rIndexer != nil { | ||||
| 					rIndexer.Close() | ||||
| 				} | ||||
| 				cancel() | ||||
| 				indexer.Close() | ||||
| 				close(waitChannel) | ||||
| 				log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) | ||||
| 			} | ||||
| 		default: | ||||
| 			log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) | ||||
| 		} | ||||
|  | ||||
| 		indexer.set(rIndexer) | ||||
|  | ||||
| 		go processRepoIndexerOperationQueue(indexer) | ||||
|  | ||||
| 		if created { | ||||
| 		if populate { | ||||
| 			go populateRepoIndexer() | ||||
| 		} | ||||
| 		select { | ||||
|   | ||||
							
								
								
									
										83
									
								
								modules/indexer/code/indexer_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								modules/indexer/code/indexer_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| // Copyright 2020 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package code | ||||
|  | ||||
| import ( | ||||
| 	"path/filepath" | ||||
| 	"testing" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| func TestMain(m *testing.M) { | ||||
| 	models.MainTest(m, filepath.Join("..", "..", "..")) | ||||
| } | ||||
|  | ||||
| func testIndexer(name string, t *testing.T, indexer Indexer) { | ||||
| 	t.Run(name, func(t *testing.T) { | ||||
| 		var repoID int64 = 1 | ||||
| 		err := index(indexer, repoID) | ||||
| 		assert.NoError(t, err) | ||||
| 		var ( | ||||
| 			keywords = []struct { | ||||
| 				RepoIDs []int64 | ||||
| 				Keyword string | ||||
| 				IDs     []int64 | ||||
| 				Langs   int | ||||
| 			}{ | ||||
| 				{ | ||||
| 					RepoIDs: nil, | ||||
| 					Keyword: "Description", | ||||
| 					IDs:     []int64{repoID}, | ||||
| 					Langs:   1, | ||||
| 				}, | ||||
| 				{ | ||||
| 					RepoIDs: []int64{2}, | ||||
| 					Keyword: "Description", | ||||
| 					IDs:     []int64{}, | ||||
| 					Langs:   0, | ||||
| 				}, | ||||
| 				{ | ||||
| 					RepoIDs: nil, | ||||
| 					Keyword: "repo1", | ||||
| 					IDs:     []int64{repoID}, | ||||
| 					Langs:   1, | ||||
| 				}, | ||||
| 				{ | ||||
| 					RepoIDs: []int64{2}, | ||||
| 					Keyword: "repo1", | ||||
| 					IDs:     []int64{}, | ||||
| 					Langs:   0, | ||||
| 				}, | ||||
| 				{ | ||||
| 					RepoIDs: nil, | ||||
| 					Keyword: "non-exist", | ||||
| 					IDs:     []int64{}, | ||||
| 					Langs:   0, | ||||
| 				}, | ||||
| 			} | ||||
| 		) | ||||
|  | ||||
| 		for _, kw := range keywords { | ||||
| 			t.Run(kw.Keyword, func(t *testing.T) { | ||||
| 				total, res, langs, err := indexer.Search(kw.RepoIDs, "", kw.Keyword, 1, 10) | ||||
| 				assert.NoError(t, err) | ||||
| 				assert.EqualValues(t, len(kw.IDs), total) | ||||
| 				assert.EqualValues(t, kw.Langs, len(langs)) | ||||
|  | ||||
| 				var ids = make([]int64, 0, len(res)) | ||||
| 				for _, hit := range res { | ||||
| 					ids = append(ids, hit.RepoID) | ||||
| 					assert.EqualValues(t, "# repo1\n\nDescription for repo1", hit.Content) | ||||
| 				} | ||||
| 				assert.EqualValues(t, kw.IDs, ids) | ||||
| 			}) | ||||
| 		} | ||||
|  | ||||
| 		assert.NoError(t, indexer.Delete(repoID)) | ||||
| 	}) | ||||
| } | ||||
| @@ -10,7 +10,6 @@ import ( | ||||
| 	"code.gitea.io/gitea/models" | ||||
| 	"code.gitea.io/gitea/modules/graceful" | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| ) | ||||
|  | ||||
| type repoIndexerOperation struct { | ||||
| @@ -25,6 +24,30 @@ func initQueue(queueLength int) { | ||||
| 	repoIndexerOperationQueue = make(chan repoIndexerOperation, queueLength) | ||||
| } | ||||
|  | ||||
| func index(indexer Indexer, repoID int64) error { | ||||
| 	repo, err := models.GetRepositoryByID(repoID) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	sha, err := getDefaultBranchSha(repo) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	changes, err := getRepoChanges(repo, sha) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if changes == nil { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if err := indexer.Index(repo, sha, changes); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha) | ||||
| } | ||||
|  | ||||
| func processRepoIndexerOperationQueue(indexer Indexer) { | ||||
| 	for { | ||||
| 		select { | ||||
| @@ -35,7 +58,7 @@ func processRepoIndexerOperationQueue(indexer Indexer) { | ||||
| 					log.Error("indexer.Delete: %v", err) | ||||
| 				} | ||||
| 			} else { | ||||
| 				if err = indexer.Index(op.repoID); err != nil { | ||||
| 				if err = index(indexer, op.repoID); err != nil { | ||||
| 					log.Error("indexer.Index: %v", err) | ||||
| 				} | ||||
| 			} | ||||
| @@ -60,9 +83,6 @@ func UpdateRepoIndexer(repo *models.Repository, watchers ...chan<- error) { | ||||
| } | ||||
|  | ||||
| func addOperationToQueue(op repoIndexerOperation) { | ||||
| 	if !setting.Indexer.RepoIndexerEnabled { | ||||
| 		return | ||||
| 	} | ||||
| 	select { | ||||
| 	case repoIndexerOperationQueue <- op: | ||||
| 		break | ||||
|   | ||||
| @@ -7,6 +7,8 @@ package code | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"sync" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| @@ -55,12 +57,12 @@ func (w *wrappedIndexer) get() (Indexer, error) { | ||||
| 	return w.internal, nil | ||||
| } | ||||
|  | ||||
| func (w *wrappedIndexer) Index(repoID int64) error { | ||||
| func (w *wrappedIndexer) Index(repo *models.Repository, sha string, changes *repoChanges) error { | ||||
| 	indexer, err := w.get() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return indexer.Index(repoID) | ||||
| 	return indexer.Index(repo, sha, changes) | ||||
| } | ||||
|  | ||||
| func (w *wrappedIndexer) Delete(repoID int64) error { | ||||
|   | ||||
| @@ -36,7 +36,10 @@ var ( | ||||
| 		StartupTimeout        time.Duration | ||||
|  | ||||
| 		RepoIndexerEnabled bool | ||||
| 		RepoType           string | ||||
| 		RepoPath           string | ||||
| 		RepoConnStr        string | ||||
| 		RepoIndexerName    string | ||||
| 		UpdateQueueLength  int | ||||
| 		MaxIndexerFileSize int64 | ||||
| 		IncludePatterns    []glob.Glob | ||||
| @@ -52,6 +55,11 @@ var ( | ||||
| 		IssueQueueConnStr:     "", | ||||
| 		IssueQueueBatchNumber: 20, | ||||
|  | ||||
| 		RepoIndexerEnabled: false, | ||||
| 		RepoType:           "bleve", | ||||
| 		RepoPath:           "indexers/repos.bleve", | ||||
| 		RepoConnStr:        "", | ||||
| 		RepoIndexerName:    "gitea_codes", | ||||
| 		MaxIndexerFileSize: 1024 * 1024, | ||||
| 		ExcludeVendored:    true, | ||||
| 	} | ||||
| @@ -73,10 +81,14 @@ func newIndexerService() { | ||||
| 	Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) | ||||
|  | ||||
| 	Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false) | ||||
| 	Indexer.RepoType = sec.Key("REPO_INDEXER_TYPE").MustString("bleve") | ||||
| 	Indexer.RepoPath = sec.Key("REPO_INDEXER_PATH").MustString(path.Join(AppDataPath, "indexers/repos.bleve")) | ||||
| 	if !filepath.IsAbs(Indexer.RepoPath) { | ||||
| 		Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) | ||||
| 	} | ||||
| 	Indexer.RepoConnStr = sec.Key("REPO_INDEXER_CONN_STR").MustString("") | ||||
| 	Indexer.RepoIndexerName = sec.Key("REPO_INDEXER_NAME").MustString("gitea_codes") | ||||
|  | ||||
| 	Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString("")) | ||||
| 	Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString("")) | ||||
| 	Indexer.ExcludeVendored = sec.Key("REPO_INDEXER_EXCLUDE_VENDORED").MustBool(true) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user