One index per type and parallel indexing (#1781)

Before this change the search uses a single index which distinguishes types (repositories, users, etc.) with a field (_type).
But it has turned out that this could lead to problems, in particular if different types have the same field and uses different analyzers for those fields. The following links show even more problems of a combined index:

    https://www.elastic.co/blog/index-vs-type
    https://www.elastic.co/guide/en/elasticsearch/reference/6.0/removal-of-types.html

With this change every type becomes its own index and the SearchEngine gets an api to modify multiple indices at once to remove all documents from all indices, which are related to a specific repository, for example.

The search uses another new api to coordinate the indexing, the central work queue.
The central work queue is able to coordinate long-running or resource intensive tasks. It is able to run tasks in parallel, but can also run tasks which targets the same resources in sequence. The queue is also persistent and can restore queued tasks after restart.

Co-authored-by: Konstantin Schaper <konstantin.schaper@cloudogu.com>
This commit is contained in:
Sebastian Sdorra
2021-08-25 15:40:11 +02:00
committed by GitHub
parent 44f25d6b15
commit 0a26741ebd
72 changed files with 4536 additions and 1420 deletions

View File

@@ -30,8 +30,10 @@ import sonia.scm.plugin.Extension;
import sonia.scm.search.HandlerEventIndexSyncer;
import sonia.scm.search.Id;
import sonia.scm.search.Index;
import sonia.scm.search.IndexLogStore;
import sonia.scm.search.Indexer;
import sonia.scm.search.SearchEngine;
import sonia.scm.search.SerializableIndexTask;
import javax.inject.Inject;
import javax.inject.Singleton;
@@ -43,12 +45,10 @@ public class UserIndexer implements Indexer<User> {
@VisibleForTesting
static final int VERSION = 1;
private final UserManager userManager;
private final SearchEngine searchEngine;
@Inject
public UserIndexer(UserManager userManager, SearchEngine searchEngine) {
this.userManager = userManager;
public UserIndexer(SearchEngine searchEngine) {
this.searchEngine = searchEngine;
}
@@ -62,47 +62,46 @@ public class UserIndexer implements Indexer<User> {
return VERSION;
}
@Subscribe(async = false)
public void handleEvent(UserEvent event) {
new HandlerEventIndexSyncer<>(this).handleEvent(event);
@Override
public Class<? extends ReIndexAllTask<User>> getReIndexAllTask() {
return ReIndexAll.class;
}
@Override
public Updater<User> open() {
return new UserIndexUpdater(userManager, searchEngine.forType(User.class).getOrCreate());
public SerializableIndexTask<User> createStoreTask(User user) {
return index -> store(index, user);
}
public static class UserIndexUpdater implements Updater<User> {
@Override
public SerializableIndexTask<User> createDeleteTask(User item) {
return index -> index.delete().byId(Id.of(item));
}
@Subscribe(async = false)
public void handleEvent(UserEvent event) {
new HandlerEventIndexSyncer<>(searchEngine, this).handleEvent(event);
}
private static void store(Index<User> index, User user) {
index.store(Id.of(user), UserPermissions.read(user).asShiroString(), user);
}
public static class ReIndexAll extends ReIndexAllTask<User> {
private final UserManager userManager;
private final Index<User> index;
private UserIndexUpdater(UserManager userManager, Index<User> index) {
@Inject
public ReIndexAll(IndexLogStore logStore, UserManager userManager) {
super(logStore, User.class, VERSION);
this.userManager = userManager;
this.index = index;
}
@Override
public void store(User user) {
index.store(Id.of(user), UserPermissions.read(user).asShiroString(), user);
}
@Override
public void delete(User user) {
index.delete().byType().byId(Id.of(user));
}
@Override
public void reIndexAll() {
index.delete().byType().all();
public void update(Index<User> index) {
index.delete().all();
for (User user : userManager.getAll()) {
store(user);
store(index, user);
}
}
@Override
public void close() {
index.close();
}
}
}