diff --git a/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj b/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj index 2c8a07a..3462992 100644 --- a/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj +++ b/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj @@ -75,6 +75,7 @@ + diff --git a/source/ChanSort.Loader.SamsungJ/DbSerializer.cs b/source/ChanSort.Loader.SamsungJ/DbSerializer.cs index b40bc2b..26dd6c7 100644 --- a/source/ChanSort.Loader.SamsungJ/DbSerializer.cs +++ b/source/ChanSort.Loader.SamsungJ/DbSerializer.cs @@ -4,13 +4,14 @@ using System.Data; using System.Data.SQLite; using System.IO; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using ChanSort.Api; namespace ChanSort.Loader.SamsungJ { /// - /// Loader for Samsung J/K/M/N/R/Q series .zip files (2015 - 2019+) + /// Loader for Samsung J/K/M/N/R/Q series .zip files (2015 - 2020) /// class DbSerializer : SerializerBase { @@ -218,11 +219,15 @@ namespace ChanSort.Loader.SamsungJ try { cmd.CommandText = "select provId, cast(provName as blob) from PROV"; + var prevEncoding = this.encoding; + this.encoding = Encoding.BigEndianUnicode; // while Sat and Service names might be utf16 binary data inside an utf8 envelope, the providers are always plain utf16 using (var r = cmd.ExecuteReader()) { while (r.Read()) dict.Add(r.GetInt64(0), ReadUtf16(r, 1)); } + + this.encoding = prevEncoding; } catch { @@ -392,7 +397,7 @@ namespace ChanSort.Loader.SamsungJ return null; byte[] nameBytes = new byte[200]; int nameLen = (int)r.GetBytes(fieldIndex, 0, nameBytes, 0, nameBytes.Length); - this.encoding ??= AutoDetectUtf16Endian(nameBytes, nameLen); + this.encoding ??= AutoDetectUtf16Encoding(nameBytes, nameLen); if (this.encoding == null) return string.Empty; @@ -401,24 +406,35 @@ namespace ChanSort.Loader.SamsungJ #endregion #region AutoDetectUtf16Endian() - private Encoding AutoDetectUtf16Endian(byte[] nameBytes, int nameLen) + private Encoding AutoDetectUtf16Encoding(byte[] nameBytes, int nameLen) { if (this.DefaultEncoding is UnicodeEncoding) return this.DefaultEncoding; int evenBytesZero = 0; int oddBytesZero = 0; + int bytesAbove128 = 0; for (int i = 0; i < nameLen; i += 2) { if (nameBytes[i] == 0) ++evenBytesZero; + if (nameBytes[i] >= 128) + ++bytesAbove128; if (nameBytes[i + 1] == 0) ++oddBytesZero; + if (nameBytes[i + 1] >= 128) + ++bytesAbove128; } if (evenBytesZero + oddBytesZero == nameLen) return null; + if (bytesAbove128 + 1 >= nameLen) + { + //this.Features.ChannelNameEdit = ChannelNameEditMode.None; // unclear if the encoder produces byte sequences that the TV can decode again + return new Utf16InsideUtf8EnvelopeEncoding(); + } + return evenBytesZero >= oddBytesZero ? Encoding.BigEndianUnicode : Encoding.Unicode; } @@ -482,45 +498,44 @@ namespace ChanSort.Loader.SamsungJ #region SaveChannelList() private void SaveChannelList(ChannelList channelList, string dbPath) { - using (var conn = new SQLiteConnection("Data Source=" + dbPath)) + using var conn = new SQLiteConnection("Data Source=" + dbPath); + conn.Open(); + using var cmdUpdateSrv = PrepareUpdateCommand(conn); + using var cmdDeleteSrv = PrepareDeleteCommand(conn, (channelList.SignalSource & SignalSource.Digital) != 0); + using var cmdInsertFav = PrepareInsertFavCommand(conn); + using var cmdUpdateFav = PrepareUpdateFavCommand(conn); + using var cmdDeleteFav = PrepareDeleteFavCommand(conn); + using (var trans = conn.BeginTransaction()) { - conn.Open(); - using (var cmdUpdateSrv = PrepareUpdateCommand(conn)) - using (var cmdDeleteSrv = PrepareDeleteCommand(conn, (channelList.SignalSource & SignalSource.Digital) != 0)) - using (var cmdInsertFav = PrepareInsertFavCommand(conn)) - using (var cmdUpdateFav = PrepareUpdateFavCommand(conn)) - using (var cmdDeleteFav = PrepareDeleteFavCommand(conn)) - { - using (var trans = conn.BeginTransaction()) - { - Editor.SequentializeFavPos(channelList, 5); - this.WriteChannels(cmdUpdateSrv, cmdDeleteSrv, cmdInsertFav, cmdUpdateFav, cmdDeleteFav, channelList); - trans.Commit(); - } - this.RepairCorruptedDatabaseImage(cmdUpdateSrv); - } + Editor.SequentializeFavPos(channelList, 5); + this.WriteChannels(cmdUpdateSrv, cmdDeleteSrv, cmdInsertFav, cmdUpdateFav, cmdDeleteFav, channelList); + trans.Commit(); } + this.RepairCorruptedDatabaseImage(cmdUpdateSrv); } #endregion #region Prepare*Command() - private static SQLiteCommand PrepareUpdateCommand(SQLiteConnection conn) + private SQLiteCommand PrepareUpdateCommand(SQLiteConnection conn) { + var canUpdateNames = this.Features.ChannelNameEdit != ChannelNameEditMode.None; var cmd = conn.CreateCommand(); - cmd.CommandText = "update SRV set major=@nr, lockMode=@lock, hideGuide=@hidden, hidden=@hidden, numSel=@numsel, srvName=cast(@srvname as varchar) where srvId=@id"; + var updateSrvName = canUpdateNames ? ", srvName=cast(@srvname as varchar)" : ""; + cmd.CommandText = "update SRV set major=@nr, lockMode=@lock, hideGuide=@hidden, hidden=@hidden, numSel=@numsel" + updateSrvName + " where srvId=@id"; cmd.Parameters.Add(new SQLiteParameter("@id", DbType.Int64)); cmd.Parameters.Add(new SQLiteParameter("@nr", DbType.Int32)); cmd.Parameters.Add(new SQLiteParameter("@lock", DbType.Boolean)); cmd.Parameters.Add(new SQLiteParameter("@hidden", DbType.Boolean)); cmd.Parameters.Add(new SQLiteParameter("@numsel", DbType.Boolean)); - cmd.Parameters.Add(new SQLiteParameter("@srvname", DbType.Binary)); + if (canUpdateNames) + cmd.Parameters.Add(new SQLiteParameter("@srvname", DbType.Binary)); cmd.Prepare(); return cmd; } - private static SQLiteCommand PrepareDeleteCommand(SQLiteConnection conn, bool digital) + private SQLiteCommand PrepareDeleteCommand(SQLiteConnection conn, bool digital) { var cmd = conn.CreateCommand(); var sql = new StringBuilder(); @@ -536,7 +551,7 @@ namespace ChanSort.Loader.SamsungJ return cmd; } - private static SQLiteCommand PrepareInsertFavCommand(SQLiteConnection conn) + private SQLiteCommand PrepareInsertFavCommand(SQLiteConnection conn) { var cmd = conn.CreateCommand(); cmd.CommandText = "insert into SRV_FAV (srvId, fav, pos) values (@id, @fav, @pos)"; @@ -547,7 +562,7 @@ namespace ChanSort.Loader.SamsungJ return cmd; } - private static SQLiteCommand PrepareUpdateFavCommand(SQLiteConnection conn) + private SQLiteCommand PrepareUpdateFavCommand(SQLiteConnection conn) { var cmd = conn.CreateCommand(); cmd.CommandText = "update SRV_FAV set pos=@pos where srvId=@id and fav=@fav"; @@ -557,7 +572,7 @@ namespace ChanSort.Loader.SamsungJ cmd.Prepare(); return cmd; } - private static SQLiteCommand PrepareDeleteFavCommand(SQLiteConnection conn) + private SQLiteCommand PrepareDeleteFavCommand(SQLiteConnection conn) { var cmd = conn.CreateCommand(); cmd.CommandText = "delete from SRV_FAV where srvId=@id and fav=@fav"; @@ -573,7 +588,7 @@ namespace ChanSort.Loader.SamsungJ private void WriteChannels(SQLiteCommand cmdUpdateSrv, SQLiteCommand cmdDeleteSrv, SQLiteCommand cmdInsertFav, SQLiteCommand cmdUpdateFav, SQLiteCommand cmdDeleteFav, ChannelList channelList, bool analog = false) { - + bool canUpdateNames = this.Features.ChannelNameEdit != ChannelNameEditMode.None; foreach (ChannelInfo channelInfo in channelList.Channels.ToList()) { var channel = channelInfo as DbChannel; @@ -595,7 +610,8 @@ namespace ChanSort.Loader.SamsungJ cmdUpdateSrv.Parameters["@lock"].Value = channel.Lock; cmdUpdateSrv.Parameters["@hidden"].Value = channel.Hidden; cmdUpdateSrv.Parameters["@numsel"].Value = !channel.Skip; - cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetBytes(channel.Name); + if (canUpdateNames) + cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetBytes(channel.Name); cmdUpdateSrv.ExecuteNonQuery(); // update favorites diff --git a/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs b/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs new file mode 100644 index 0000000..1929719 --- /dev/null +++ b/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs @@ -0,0 +1,101 @@ +using System.IO; +using System.Text; + +namespace ChanSort.Loader.SamsungJ +{ + // Samsung 1242 format does not store UTF16 characters directly, but instead wraps 16 data bits inside a UTF-8 lead + continuation byte sequence. + // A 3 byte UTF-8 sequence is used to encode 16 bits of utf-16 big endian input: 1110aaaa 10bbbbcc 10ccdddd represents the 16bit big endian integer ccccddddaaaabbbb, i.e. 0xE4, 0x84, 0x80 => 0x00, 0x41 => "A" in UTF-16 BE + // The Samsung encoder seems to create some illegal UTF-8 sequences at the end of the string as a result of padding and operating on 32bit inputs (2 characters) with big-endianness, which + // this decoder has to take care of. 0xFFFD can appear both in the raw input bytes (0xFF, 0xFB) as well as already encoded into UTF-8 wrappings (0xEF,0xBF,0xBD) + + // This implementation here decodes the UTF-8 byte sequence into UTF-16 Little Endian for the sake of simplicity: aaaa=4, bbbb=1, cccc=0, dddd=0 => 0xE4, 0x84, 0x80 => 0x41, 0x00 => "A" in UTF-16 LE. + // The encoder here operates on 16bit characters and not 32bit 2-characters, so there is no need for padding and no invalid UTF-8 sequences. + + public class Utf16InsideUtf8EnvelopeEncoding : Encoding + { + public override int GetMaxByteCount(int charCount) + { + return charCount * 3; + } + + public override int GetByteCount(char[] chars, int index, int count) + { + return count * 3; + } + + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + var utf16Le = Unicode.GetBytes(chars, charIndex, charCount); + int o = byteIndex; + int c = utf16Le.Length; + int i; + for (i = 0; i < c; i += 2, o += 3) + { + var b0 = utf16Le[i + 0]; + var b1 = utf16Le[i + 1]; + bytes[o + 0] = (byte) (0xE0 + (b0 >> 4)); + bytes[o + 1] = (byte) (0x80 + ((b0 & 0x0F) << 2) + (b1 >> 6)); + bytes[o + 2] = (byte) (0x80 + (b1 & 0x3F)); + } + + return charCount * 3; + } + + + public override int GetMaxCharCount(int byteCount) + { + return (byteCount + 2) / 3; + } + + public override int GetCharCount(byte[] bytes, int index, int count) + { + return (count + 2) / 3; + } + + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) + { + using MemoryStream ms = new MemoryStream(40); + for (int i = byteIndex, c = byteIndex + byteCount; i < c; i++) + { + int b0 = bytes[i + 0]; + if (b0 == 0 && i == c - 1) // satellite names end with a single trailing 0x00 byte + break; + if (b0 > 0xF7) // invalid UTF-8 lead byte. (0xFF, 0xFD) = 0xFFFD in BigEndian can appear unencoded at the end of the byte stream, likely as a padding + continue; + if (b0 >= 0xE0) // 3-byte UTF envelope for 2 input bytes + { + int b1 = bytes[i + 1]; + int b2 = bytes[i + 2]; + if ((b2 & 0xC0) != 0x80) // invalid 2nd UTF-8 continuation byte; only a single byte is encoded as 1110aaaa 10bbbbcc => aaaabbbb + { + b2 = 0; + --i; + } + int ch1 = ((b0 & 0x0F) << 4) | ((b1 & 0x3C) >> 2); + int ch2 = ((b1 & 0x03) << 6) | (b2 & 0x3F); + if (ch1 != 0xFF || ch2 != 0xFD) // ignore UTF-16 "replacement character" U-0xFFFD + { + ms.WriteByte((byte) ch1); + ms.WriteByte((byte) ch2); + } + i += 2; + } + else if (b0 >= 0xC0) // 2-byte UTF envelope for 1 input byte as 110xaaaa 10bbbbcc => aaaabbbb + { + int b1 = bytes[i + 1]; + int ch = ((b0 & 0x0F) << 4) | ((b1 & 0x3C)>>2); + ms.WriteByte((byte)ch); + ms.WriteByte(0); + i++; + } + else if (b0 < 0x80) // 1-byte UTF envelope for 1 input byte < 0x80 + { + ms.WriteByte(bytes[i]); + ms.WriteByte(0); + } + } + + return Encoding.Unicode.GetChars(ms.GetBuffer(), 0, (int) ms.Length, chars, charIndex); + } + } +} diff --git a/source/ChanSort/UpdateCheck.cs b/source/ChanSort/UpdateCheck.cs index b2ab5f4..b28a558 100644 --- a/source/ChanSort/UpdateCheck.cs +++ b/source/ChanSort/UpdateCheck.cs @@ -1,6 +1,4 @@ -using System; -using System.Net; -using System.Net.Security; +using System.Net; using System.Threading; using ChanSort.Ui.Properties; using DevExpress.XtraEditors; @@ -40,11 +38,9 @@ namespace ChanSort.Ui //Change SSL checks so that all checks pass //ServicePointManager.ServerCertificateValidationCallback = delegate { return true; }; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; - using (WebClient client = new WebClient()) - { - client.Proxy = null; // prevent a 1min wait/timeout by a .NET bug - response = client.DownloadString(UpdateUrl); - } + using WebClient client = new WebClient(); + client.Proxy = null; // prevent a 1min wait/timeout by a .NET bug + response = client.DownloadString(UpdateUrl); } finally { diff --git a/source/changelog.md b/source/changelog.md index f426bb4..534af6b 100644 --- a/source/changelog.md +++ b/source/changelog.md @@ -1,6 +1,11 @@ ChanSort Change Log =================== +2020-07-13 +- Samsung 1242 format: channel names were displayed as chinese letters instead of latin + (Names are not stored as characters in this format, but instead 16 bits of UTF16 code points are encoded as "payload" + inside 3 byte UTF-8 sequences) + 2020-07-12 - added UTF-16 Big Endian and Little Endian options to character set menu - Samsung .zip loader: auto-detect UTF-16 endianness and allow to change encoding after loading to UTF-16 LE/BE