mirror of
				https://github.com/zadam/trilium.git
				synced 2025-11-03 20:06:08 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			378 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			378 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
import type { Element, Position, Writer } from "ckeditor5";
 | 
						|
import type { Node, Editor } from "ckeditor5";
 | 
						|
import { Plugin } from "ckeditor5";
 | 
						|
 | 
						|
interface SpanStackEntry {
 | 
						|
    className: string;
 | 
						|
    posStart: Position;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * This code is an adaptation of https://github.com/antoniotejada/Trilium-SyntaxHighlightWidget with additional improvements, such as:
 | 
						|
 *
 | 
						|
 *  - support for selecting the language manually;
 | 
						|
 *  - support for determining the language automatically, if a special language is selected ("Auto-detected");
 | 
						|
 *  - limit for highlighting.
 | 
						|
 */
 | 
						|
 | 
						|
export default class SyntaxHighlighting extends Plugin {
 | 
						|
 | 
						|
    private config!: EditorConfig;
 | 
						|
    private hljs!: HighlightJs;
 | 
						|
 | 
						|
    async init() {
 | 
						|
        const config = this.editor.config.get("syntaxHighlighting") as EditorConfig | null;
 | 
						|
        if (!config || !config.enabled) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        this.config = config;
 | 
						|
        this.hljs = await config.loadHighlightJs();
 | 
						|
 | 
						|
        this.initTextEditor(this.editor);
 | 
						|
    }
 | 
						|
 | 
						|
    initTextEditor(textEditor: Editor) {
 | 
						|
        log("initTextEditor");
 | 
						|
 | 
						|
        const document = textEditor.model.document;
 | 
						|
 | 
						|
        // Create a conversion from model to view that converts
 | 
						|
        // hljs:hljsClassName:uniqueId into a span with hljsClassName
 | 
						|
        // See the list of hljs class names at
 | 
						|
        // https://github.com/highlightjs/highlight.js/blob/6b8c831f00c4e87ecd2189ebbd0bb3bbdde66c02/docs/css-classes-reference.rst
 | 
						|
 | 
						|
        textEditor.conversion.for("editingDowncast").markerToHighlight({
 | 
						|
            model: "hljs",
 | 
						|
            view: ({ markerName }) => {
 | 
						|
                dbg("markerName " + markerName);
 | 
						|
                // markerName has the pattern addMarker:cssClassName:uniqueId
 | 
						|
                const [, cssClassName, id] = markerName.split(":");
 | 
						|
 | 
						|
                // The original code at
 | 
						|
                // https://github.com/ckeditor/ckeditor5/blob/master/packages/ckeditor5-find-and-replace/src/findandreplaceediting.js
 | 
						|
                // has this comment
 | 
						|
                //      Marker removal from the view has a bug:
 | 
						|
                //      https://github.com/ckeditor/ckeditor5/issues/7499
 | 
						|
                //      A minimal option is to return a new object for each converted marker...
 | 
						|
                return {
 | 
						|
                    name: "span",
 | 
						|
                    classes: [cssClassName],
 | 
						|
                    attributes: {
 | 
						|
                        // ...however, adding a unique attribute should be future-proof..
 | 
						|
                        "data-syntax-result": id
 | 
						|
                    }
 | 
						|
                };
 | 
						|
            }
 | 
						|
        });
 | 
						|
 | 
						|
        // XXX This is done at BalloonEditor.create time, so it assumes this
 | 
						|
        //     document is always attached to this textEditor, empirically that
 | 
						|
        //     seems to be the case even with two splits showing the same note,
 | 
						|
        //     it's not clear if CKEditor5 has apis to attach and detach
 | 
						|
        //     documents around
 | 
						|
        document.registerPostFixer((writer) => {
 | 
						|
            log("postFixer");
 | 
						|
            // Postfixers are a simpler way of tracking changes than onchange
 | 
						|
            // See
 | 
						|
            // https://github.com/ckeditor/ckeditor5/blob/b53d2a4b49679b072f4ae781ac094e7e831cfb14/packages/ckeditor5-block-quote/src/blockquoteediting.js#L54
 | 
						|
            const changes = document.differ.getChanges();
 | 
						|
            let dirtyCodeBlocks = new Set<Element>();
 | 
						|
 | 
						|
            function lookForCodeBlocks(node: Element | Node) {
 | 
						|
                if (!("getChildren" in node)) {
 | 
						|
                    return;
 | 
						|
                }
 | 
						|
 | 
						|
                for (const child of node.getChildren()) {
 | 
						|
                    if (child.is("element", "paragraph")) {
 | 
						|
                        continue;
 | 
						|
                    }
 | 
						|
 | 
						|
                    if (child.is("element", "codeBlock")) {
 | 
						|
                        dirtyCodeBlocks.add(child);
 | 
						|
                    } else if ((child as Element).childCount > 0) {
 | 
						|
                        lookForCodeBlocks(child);
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            for (const change of changes) {
 | 
						|
                dbg("change " + JSON.stringify(change));
 | 
						|
 | 
						|
                if ("name" in change && change.name !== "paragraph" && change.name !== "codeBlock" && change?.position?.nodeAfter && (change.position.nodeAfter as Element).childCount > 0) {
 | 
						|
                    /*
 | 
						|
                     * We need to look for code blocks recursively, as they can be placed within a <div> due to
 | 
						|
                     * general HTML support or normally underneath other elements such as tables, blockquotes, etc.
 | 
						|
                     */
 | 
						|
                    lookForCodeBlocks(change.position.nodeAfter);
 | 
						|
                } else if (change.type == "insert" && change.name == "codeBlock") {
 | 
						|
                    // A new code block was inserted
 | 
						|
                    const codeBlock = change.position?.nodeAfter;
 | 
						|
                    // Even if it's a new codeblock, it needs dirtying in case
 | 
						|
                    // it already has children, like when pasting one or more
 | 
						|
                    // full codeblocks, undoing a delete, changing the language,
 | 
						|
                    // etc (the postfixer won't get later changes for those).
 | 
						|
                    if (codeBlock) {
 | 
						|
                        log("dirtying inserted codeBlock " + JSON.stringify(codeBlock.toJSON()));
 | 
						|
                        dirtyCodeBlocks.add(codeBlock as Element);
 | 
						|
                    }
 | 
						|
                } else if (change.type == "remove" && change.name == "codeBlock" && change.position) {
 | 
						|
                    // An existing codeblock was removed, do nothing. Note the
 | 
						|
                    // node is no longer in the editor so the codeblock cannot
 | 
						|
                    // be inspected here. No need to dirty the codeblock since
 | 
						|
                    // it has been removed
 | 
						|
                    log("removing codeBlock at path " + JSON.stringify(change.position.toJSON()));
 | 
						|
                } else if ((change.type == "remove" || change.type == "insert") && change?.position?.parent.is("element", "codeBlock")) {
 | 
						|
                    // Text was added or removed from the codeblock, force a
 | 
						|
                    // highlight
 | 
						|
                    const codeBlock = change.position.parent;
 | 
						|
                    log("dirtying codeBlock " + JSON.stringify(codeBlock.toJSON()));
 | 
						|
                    dirtyCodeBlocks.add(codeBlock);
 | 
						|
                }
 | 
						|
            }
 | 
						|
            for (let codeBlock of dirtyCodeBlocks) {
 | 
						|
                this.highlightCodeBlock(codeBlock, writer);
 | 
						|
            }
 | 
						|
            // Adding markers doesn't modify the document data so no need for
 | 
						|
            // postfixers to run again
 | 
						|
            return false;
 | 
						|
        });
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * This implements highlighting via ephemeral markers (not stored in the
 | 
						|
     * document).
 | 
						|
     *
 | 
						|
     * XXX Another option would be to use formatting markers, which would have
 | 
						|
     *     the benefit of making it work for readonly notes. On the flip side,
 | 
						|
     *     the formatting would be stored with the note and it would need a
 | 
						|
     *     way to remove that formatting when editing back the note.
 | 
						|
     */
 | 
						|
    highlightCodeBlock(codeBlock: Element, writer: Writer) {
 | 
						|
        log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON()));
 | 
						|
        const model = codeBlock.root.document?.model;
 | 
						|
        if (!model) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        // Can't invoke addMarker with an already existing marker name,
 | 
						|
        // clear all highlight markers first. Marker names follow the
 | 
						|
        // pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1
 | 
						|
        const codeBlockRange = model.createRangeIn(codeBlock);
 | 
						|
        for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) {
 | 
						|
            dbg("removing marker " + marker.name);
 | 
						|
            writer.removeMarker(marker.name);
 | 
						|
        }
 | 
						|
 | 
						|
        // Don't highlight if plaintext (note this needs to remove the markers
 | 
						|
        // above first, in case this was a switch from non plaintext to
 | 
						|
        // plaintext)
 | 
						|
        const mimeType = codeBlock.getAttribute("language") as string;
 | 
						|
        if (mimeType == "text-plain") {
 | 
						|
            // XXX There's actually a plaintext language that could be used
 | 
						|
            //     if you wanted the non-highlight formatting of
 | 
						|
            //     highlight.js css applied, see
 | 
						|
            //     https://github.com/highlightjs/highlight.js/issues/700
 | 
						|
            log("not highlighting plaintext codeblock");
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        // Don't highlight if the code is too big, as the typing performance will be highly degraded.
 | 
						|
        if (codeBlock.childCount >= HIGHLIGHT_MAX_BLOCK_COUNT) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        // highlight.js needs the full text without HTML tags, eg for the
 | 
						|
        // text
 | 
						|
        // #include <stdio.h>
 | 
						|
        // the highlighted html is
 | 
						|
        // <span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string"><stdio.h></span></span>
 | 
						|
        // But CKEditor codeblocks have <br> instead of \n
 | 
						|
 | 
						|
        // Do a two pass algorithm:
 | 
						|
        // - First pass collect the codeblock children text, change <br> to
 | 
						|
        //   \n
 | 
						|
        // - invoke highlight.js on the collected text generating html
 | 
						|
        // - Second pass parse the highlighted html spans and match each
 | 
						|
        //   char to the CodeBlock text. Issue addMarker CKEditor calls for
 | 
						|
        //   each span
 | 
						|
 | 
						|
        // XXX This is brittle and assumes how highlight.js generates html
 | 
						|
        //     (blanks, which characters escapes, etc), a better approach
 | 
						|
        //     would be to use highlight.js beta api TreeTokenizer?
 | 
						|
 | 
						|
        // Collect all the text nodes to pass to the highlighter Text is
 | 
						|
        // direct children of the codeBlock
 | 
						|
        let text = "";
 | 
						|
        for (let i = 0; i < codeBlock.childCount; ++i) {
 | 
						|
            let child = codeBlock.getChild(i);
 | 
						|
            if (!child) {
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
 | 
						|
            // We only expect text and br elements here
 | 
						|
            if (child.is("$text")) {
 | 
						|
                dbg("child text " + child.data);
 | 
						|
                text += child.data;
 | 
						|
            } else if (child.is("element") && child.name == "softBreak") {
 | 
						|
                dbg("softBreak");
 | 
						|
                text += "\n";
 | 
						|
            } else {
 | 
						|
                warn("Unkown child " + JSON.stringify(child.toJSON()));
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        let highlightRes;
 | 
						|
        if (mimeType === this.config.defaultMimeType) {
 | 
						|
            highlightRes = this.hljs.highlightAuto(text);
 | 
						|
        } else {
 | 
						|
            highlightRes = this.hljs.highlight(text, { language: mimeType });
 | 
						|
        }
 | 
						|
 | 
						|
        if (!highlightRes) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        dbg("text\n" + text);
 | 
						|
        dbg("html\n" + highlightRes.value);
 | 
						|
 | 
						|
        let iHtml = 0;
 | 
						|
        let html = highlightRes.value;
 | 
						|
        let spanStack: SpanStackEntry[] = [];
 | 
						|
        let iChild = -1;
 | 
						|
        let childText = "";
 | 
						|
        let child: Node | null = null;
 | 
						|
        let iChildText = 0;
 | 
						|
 | 
						|
        while (iHtml < html.length) {
 | 
						|
            // Advance the text index and fetch a new child if necessary
 | 
						|
            if (iChildText >= childText.length) {
 | 
						|
                iChild++;
 | 
						|
                if (iChild < codeBlock.childCount) {
 | 
						|
                    dbg("Fetching child " + iChild);
 | 
						|
                    child = codeBlock.getChild(iChild);
 | 
						|
                    if (!child) {
 | 
						|
                        continue;
 | 
						|
                    }
 | 
						|
 | 
						|
                    if (child.is("$text")) {
 | 
						|
                        dbg("child text " + child.data);
 | 
						|
                        childText = child.data;
 | 
						|
                        iChildText = 0;
 | 
						|
                    } else if (child.is("element", "softBreak")) {
 | 
						|
                        dbg("softBreak");
 | 
						|
                        iChildText = 0;
 | 
						|
                        childText = "\n";
 | 
						|
                    } else {
 | 
						|
                        warn("child unknown!!!");
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    // Don't bail if beyond the last children, since there's
 | 
						|
                    // still html text, it must be a closing span tag that
 | 
						|
                    // needs to be dealt with below
 | 
						|
                    childText = "";
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            // This parsing is made slightly simpler and faster by only
 | 
						|
            // expecting <span> and </span> tags in the highlighted html
 | 
						|
            if (html[iHtml] == "<" && html[iHtml + 1] != "/") {
 | 
						|
                // new span, note they can be nested eg C preprocessor lines
 | 
						|
                // are inside a hljs-meta span, hljs-title function names
 | 
						|
                // inside a hljs-function span, etc
 | 
						|
                let iStartQuot = html.indexOf('"', iHtml + 1);
 | 
						|
                let iEndQuot = html.indexOf('"', iStartQuot + 1);
 | 
						|
                let className = html.slice(iStartQuot + 1, iEndQuot);
 | 
						|
                // XXX highlight js uses scope for Python "title function_",
 | 
						|
                //     etc for now just use the first style only
 | 
						|
                // See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes
 | 
						|
                let iBlank = className.indexOf(" ");
 | 
						|
                if (iBlank > 0) {
 | 
						|
                    className = className.slice(0, iBlank);
 | 
						|
                }
 | 
						|
                dbg("Found span start " + className);
 | 
						|
 | 
						|
                iHtml = html.indexOf(">", iHtml) + 1;
 | 
						|
 | 
						|
                // push the span
 | 
						|
                let posStart = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText);
 | 
						|
                spanStack.push({ className: className, posStart: posStart });
 | 
						|
            } else if (html[iHtml] == "<" && html[iHtml + 1] == "/") {
 | 
						|
                // Done with this span, pop the span and mark the range
 | 
						|
                iHtml = html.indexOf(">", iHtml + 1) + 1;
 | 
						|
 | 
						|
                let stackTop = spanStack.pop();
 | 
						|
                let posStart = stackTop?.posStart;
 | 
						|
                let className = stackTop?.className;
 | 
						|
                let posEnd = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText);
 | 
						|
                if (posStart) {
 | 
						|
                    let range = writer.createRange(posStart, posEnd);
 | 
						|
                    let markerName = "hljs:" + className + ":" + markerCounter;
 | 
						|
                    // Use an incrementing number for the uniqueId, random of
 | 
						|
                    // 10000000 is known to cause collisions with a few
 | 
						|
                    // codeblocks of 10s of lines on real notes (each line is
 | 
						|
                    // one or more marker).
 | 
						|
                    // Wrap-around for good measure so all numbers are positive
 | 
						|
                    // XXX Another option is to catch the exception and retry or
 | 
						|
                    //     go through the markers and get the largest + 1
 | 
						|
                    markerCounter = (markerCounter + 1) & 0xffffff;
 | 
						|
                    dbg("Found span end " + className);
 | 
						|
                    dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON()));
 | 
						|
                    writer.addMarker(markerName, { range: range, usingOperation: false });
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                // Text, we should also have text in the children
 | 
						|
                assert(iChild < codeBlock.childCount && iChildText < childText.length, "Found text in html with no corresponding child text!!!!");
 | 
						|
                if (html[iHtml] == "&") {
 | 
						|
                    // highlight.js only encodes
 | 
						|
                    // .replace(/&/g, '&')
 | 
						|
                    // .replace(/</g, '<')
 | 
						|
                    // .replace(/>/g, '>')
 | 
						|
                    // .replace(/"/g, '"')
 | 
						|
                    // .replace(/'/g, ''');
 | 
						|
                    // see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5
 | 
						|
                    let iAmpEnd = html.indexOf(";", iHtml);
 | 
						|
                    dbg(html.slice(iHtml, iAmpEnd));
 | 
						|
                    iHtml = iAmpEnd + 1;
 | 
						|
                } else {
 | 
						|
                    // regular text
 | 
						|
                    dbg(html[iHtml]);
 | 
						|
                    iHtml++;
 | 
						|
                }
 | 
						|
                iChildText++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
const HIGHLIGHT_MAX_BLOCK_COUNT = 500;
 | 
						|
 | 
						|
const tag = "SyntaxHighlightWidget";
 | 
						|
const debugLevels = ["error", "warn", "info", "log", "debug"];
 | 
						|
const debugLevel = debugLevels.indexOf("warn");
 | 
						|
 | 
						|
let warn = function (...args: unknown[]) {};
 | 
						|
if (debugLevel >= debugLevels.indexOf("warn")) {
 | 
						|
    warn = console.warn.bind(console, tag + ": ");
 | 
						|
}
 | 
						|
 | 
						|
let log = function (...args: unknown[]) {};
 | 
						|
if (debugLevel >= debugLevels.indexOf("log")) {
 | 
						|
    log = console.log.bind(console, tag + ": ");
 | 
						|
}
 | 
						|
 | 
						|
let dbg = function (...args: unknown[]) {};
 | 
						|
if (debugLevel >= debugLevels.indexOf("debug")) {
 | 
						|
    dbg = console.debug.bind(console, tag + ": ");
 | 
						|
}
 | 
						|
 | 
						|
function assert(e: boolean, msg?: string) {
 | 
						|
    console.assert(e, tag + ": " + msg);
 | 
						|
}
 | 
						|
 | 
						|
// TODO: Should this be scoped to note?
 | 
						|
let markerCounter = 0;
 |