feat(import/zip): support UTF-16 LE with BOM (closes #1241)

This commit is contained in:
Elian Doran
2025-02-22 01:37:02 +02:00
parent c925ae5f15
commit bedc61c3d0
5 changed files with 73 additions and 47 deletions

View File

@@ -1,5 +1,7 @@
"use strict";
import chardet from "chardet";
import stripBom from "strip-bom";
import crypto from "crypto";
import { generator } from "rand-token";
import unescape from "unescape";
@@ -330,6 +332,36 @@ function compareVersions(v1: string, v2: string): number {
return 0;
}
/**
* For buffers, they are scanned for a supported encoding and decoded (UTF-8, UTF-16). In some cases, the BOM is also stripped.
*
* For strings, they are returned immediately without any transformation.
*
* For nullish values, an empty string is returned.
*
* @param data the string or buffer to process.
* @returns the string representation of the buffer, or the same string is it's a string.
*/
export function processStringOrBuffer(data: string | Buffer | null) {
if (!data) {
return "";
}
if (!Buffer.isBuffer(data)) {
return data;
}
const detectedEncoding = chardet.detect(data);
console.log("Detected as ", detectedEncoding);
switch (detectedEncoding) {
case "UTF-16LE":
return stripBom(data.toString("utf-16le"));
case "UTF-8":
default:
return data.toString("utf-8");
}
}
export default {
compareVersions,
crash,