feat: prefer HTML title tag over filename during import

When importing HTML files, extract and use the title from the <title> tag if available, falling back to the filename only when no title tag is found. This improves handling of titles with special characters that can't be represented in filenames.
2025-12-16 05:09:54 +01:00 · 2024-11-16 09:06:58 -07:00
parent 3ff75b14e9
commit 47c05b2c6d
2 changed files with 16 additions and 7 deletions
--- a/src/services/import/single.ts
+++ b/src/services/import/single.ts
@@ -149,15 +149,18 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote)
 }

 function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
-    const title = utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces);
    let content = file.buffer.toString("utf-8");
-
+    
    if (taskContext?.data?.safeImport) {
        content = htmlSanitizer.sanitize(content);
    }
-
+    
+    // Try to get title from HTML first, fall back to filename
+    const htmlTitle = importUtils.extractHtmlTitle(content);
+    const title = htmlTitle || utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces);
+    
    content = importUtils.handleH1(content, title);
-
+    
    const {note} = noteService.createNewNote({
        parentNoteId: parentNote.noteId,
        title,
@@ -166,9 +169,9 @@ function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
        mime: 'text/html',
        isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
    });
-
+    
    taskContext.increaseProgressCount();
-
+    
    return note;
 }

--- a/src/services/import/utils.ts
+++ b/src/services/import/utils.ts
@@ -11,6 +11,12 @@ function handleH1(content: string, title: string) {
    return content;
 }

+function extractHtmlTitle(content: string): string | null {
+    const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
+    return titleMatch ? titleMatch[1].trim() : null;
+}
+
 export default {
-    handleH1
+    handleH1,
+    extractHtmlTitle
 };