lexer impl WIP + test

This commit is contained in:
zadam
2020-05-17 19:43:37 +02:00
parent 32eaafd024
commit e77e0ce675
9 changed files with 384 additions and 211 deletions

View File

@@ -1,7 +1,7 @@
const repository = require('./repository');
const sql = require('./sql');
const log = require('./log');
const parseFilters = require('./parse_filters');
const parseFilters = require('./search/parse_filters.js');
const buildSearchQuery = require('./build_search_query');
const noteCacheService = require('./note_cache/note_cache.js');

View File

@@ -0,0 +1,15 @@
"use strict";
class NotExp {
constructor(subExpression) {
this.subExpression = subExpression;
}
execute(noteSet, searchContext) {
const subNoteSet = this.subExpression.execute(noteSet, searchContext);
return noteSet.minus(subNoteSet);
}
}
module.exports = NotExp;

View File

@@ -0,0 +1,94 @@
function lexer(str) {
const fulltextTokens = [];
const expressionTokens = [];
let quotes = false;
let fulltextEnded = false;
let currentWord = '';
let symbol = false;
function isSymbol(chr) {
return ['=', '*', '>', '<', '!'].includes(chr);
}
function finishWord() {
if (currentWord === '') {
return;
}
if (fulltextEnded) {
expressionTokens.push(currentWord);
} else {
fulltextTokens.push(currentWord);
}
currentWord = '';
}
for (let i = 0; i < str.length; i++) {
const chr = str[i];
if (chr === '\\') {
if ((i + 1) < str.length) {
i++;
currentWord += str[i];
}
else {
currentWord += chr;
}
continue;
}
else if (['"', "'", '`'].includes(chr)) {
if (!quotes) {
if (currentWord.length === 0) {
quotes = chr;
}
else {
// quote inside a word does not have special meening and does not break word
// e.g. d'Artagnan is kept as a single token
currentWord += chr;
}
}
else if (quotes === chr) {
quotes = false;
finishWord();
}
else {
// it's a quote but within other kind of quotes so it's valid as a literal character
currentWord += chr;
}
continue;
}
else if (!quotes) {
if (chr === '#' || chr === '@') {
fulltextEnded = true;
continue;
}
else if (chr === ' ') {
finishWord();
continue;
}
else if (fulltextEnded && symbol !== isSymbol(chr)) {
finishWord();
currentWord += chr;
symbol = isSymbol(chr);
continue;
}
}
currentWord += chr;
}
finishWord();
return {
fulltextTokens,
expressionTokens
}
}
module.exports = lexer;

View File

@@ -21,6 +21,18 @@ class NoteSet {
mergeIn(anotherNoteSet) {
this.notes = this.notes.concat(anotherNoteSet.arr);
}
minus(anotherNoteSet) {
const newNoteSet = new NoteSet();
for (const note of this.notes) {
if (!anotherNoteSet.hasNoteId(note.noteId)) {
newNoteSet.add(note);
}
}
return newNoteSet;
}
}
module.exports = NoteSet;

View File

@@ -1,4 +1,9 @@
const dayjs = require("dayjs");
const AndExp = require('./expressions/and');
const OrExp = require('./expressions/or');
const NotExp = require('./expressions/not');
const NoteCacheFulltextExp = require('./expressions/note_cache_fulltext');
const NoteContentFulltextExp = require('./expressions/note_content_fulltext');
const filterRegex = /(\b(AND|OR)\s+)?@(!?)([\p{L}\p{Number}_]+|"[^"]+")\s*((=|!=|<|<=|>|>=|!?\*=|!?=\*|!?\*=\*)\s*([^\s=*"]+|"[^"]+"))?/igu;
const smartValueRegex = /^(NOW|TODAY|WEEK|MONTH|YEAR) *([+\-] *\d+)?$/i;