diff --git a/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj b/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj
index 2c8a07a..3462992 100644
--- a/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj
+++ b/source/ChanSort.Loader.SamsungJ/ChanSort.Loader.SamsungJ.csproj
@@ -75,6 +75,7 @@
+
diff --git a/source/ChanSort.Loader.SamsungJ/DbSerializer.cs b/source/ChanSort.Loader.SamsungJ/DbSerializer.cs
index b40bc2b..26dd6c7 100644
--- a/source/ChanSort.Loader.SamsungJ/DbSerializer.cs
+++ b/source/ChanSort.Loader.SamsungJ/DbSerializer.cs
@@ -4,13 +4,14 @@ using System.Data;
using System.Data.SQLite;
using System.IO;
using System.Linq;
+using System.Runtime.CompilerServices;
using System.Text;
using ChanSort.Api;
namespace ChanSort.Loader.SamsungJ
{
///
- /// Loader for Samsung J/K/M/N/R/Q series .zip files (2015 - 2019+)
+ /// Loader for Samsung J/K/M/N/R/Q series .zip files (2015 - 2020)
///
class DbSerializer : SerializerBase
{
@@ -218,11 +219,15 @@ namespace ChanSort.Loader.SamsungJ
try
{
cmd.CommandText = "select provId, cast(provName as blob) from PROV";
+ var prevEncoding = this.encoding;
+ this.encoding = Encoding.BigEndianUnicode; // while Sat and Service names might be utf16 binary data inside an utf8 envelope, the providers are always plain utf16
using (var r = cmd.ExecuteReader())
{
while (r.Read())
dict.Add(r.GetInt64(0), ReadUtf16(r, 1));
}
+
+ this.encoding = prevEncoding;
}
catch
{
@@ -392,7 +397,7 @@ namespace ChanSort.Loader.SamsungJ
return null;
byte[] nameBytes = new byte[200];
int nameLen = (int)r.GetBytes(fieldIndex, 0, nameBytes, 0, nameBytes.Length);
- this.encoding ??= AutoDetectUtf16Endian(nameBytes, nameLen);
+ this.encoding ??= AutoDetectUtf16Encoding(nameBytes, nameLen);
if (this.encoding == null)
return string.Empty;
@@ -401,24 +406,35 @@ namespace ChanSort.Loader.SamsungJ
#endregion
#region AutoDetectUtf16Endian()
- private Encoding AutoDetectUtf16Endian(byte[] nameBytes, int nameLen)
+ private Encoding AutoDetectUtf16Encoding(byte[] nameBytes, int nameLen)
{
if (this.DefaultEncoding is UnicodeEncoding)
return this.DefaultEncoding;
int evenBytesZero = 0;
int oddBytesZero = 0;
+ int bytesAbove128 = 0;
for (int i = 0; i < nameLen; i += 2)
{
if (nameBytes[i] == 0)
++evenBytesZero;
+ if (nameBytes[i] >= 128)
+ ++bytesAbove128;
if (nameBytes[i + 1] == 0)
++oddBytesZero;
+ if (nameBytes[i + 1] >= 128)
+ ++bytesAbove128;
}
if (evenBytesZero + oddBytesZero == nameLen)
return null;
+ if (bytesAbove128 + 1 >= nameLen)
+ {
+ //this.Features.ChannelNameEdit = ChannelNameEditMode.None; // unclear if the encoder produces byte sequences that the TV can decode again
+ return new Utf16InsideUtf8EnvelopeEncoding();
+ }
+
return evenBytesZero >= oddBytesZero ? Encoding.BigEndianUnicode : Encoding.Unicode;
}
@@ -482,45 +498,44 @@ namespace ChanSort.Loader.SamsungJ
#region SaveChannelList()
private void SaveChannelList(ChannelList channelList, string dbPath)
{
- using (var conn = new SQLiteConnection("Data Source=" + dbPath))
+ using var conn = new SQLiteConnection("Data Source=" + dbPath);
+ conn.Open();
+ using var cmdUpdateSrv = PrepareUpdateCommand(conn);
+ using var cmdDeleteSrv = PrepareDeleteCommand(conn, (channelList.SignalSource & SignalSource.Digital) != 0);
+ using var cmdInsertFav = PrepareInsertFavCommand(conn);
+ using var cmdUpdateFav = PrepareUpdateFavCommand(conn);
+ using var cmdDeleteFav = PrepareDeleteFavCommand(conn);
+ using (var trans = conn.BeginTransaction())
{
- conn.Open();
- using (var cmdUpdateSrv = PrepareUpdateCommand(conn))
- using (var cmdDeleteSrv = PrepareDeleteCommand(conn, (channelList.SignalSource & SignalSource.Digital) != 0))
- using (var cmdInsertFav = PrepareInsertFavCommand(conn))
- using (var cmdUpdateFav = PrepareUpdateFavCommand(conn))
- using (var cmdDeleteFav = PrepareDeleteFavCommand(conn))
- {
- using (var trans = conn.BeginTransaction())
- {
- Editor.SequentializeFavPos(channelList, 5);
- this.WriteChannels(cmdUpdateSrv, cmdDeleteSrv, cmdInsertFav, cmdUpdateFav, cmdDeleteFav, channelList);
- trans.Commit();
- }
- this.RepairCorruptedDatabaseImage(cmdUpdateSrv);
- }
+ Editor.SequentializeFavPos(channelList, 5);
+ this.WriteChannels(cmdUpdateSrv, cmdDeleteSrv, cmdInsertFav, cmdUpdateFav, cmdDeleteFav, channelList);
+ trans.Commit();
}
+ this.RepairCorruptedDatabaseImage(cmdUpdateSrv);
}
#endregion
#region Prepare*Command()
- private static SQLiteCommand PrepareUpdateCommand(SQLiteConnection conn)
+ private SQLiteCommand PrepareUpdateCommand(SQLiteConnection conn)
{
+ var canUpdateNames = this.Features.ChannelNameEdit != ChannelNameEditMode.None;
var cmd = conn.CreateCommand();
- cmd.CommandText = "update SRV set major=@nr, lockMode=@lock, hideGuide=@hidden, hidden=@hidden, numSel=@numsel, srvName=cast(@srvname as varchar) where srvId=@id";
+ var updateSrvName = canUpdateNames ? ", srvName=cast(@srvname as varchar)" : "";
+ cmd.CommandText = "update SRV set major=@nr, lockMode=@lock, hideGuide=@hidden, hidden=@hidden, numSel=@numsel" + updateSrvName + " where srvId=@id";
cmd.Parameters.Add(new SQLiteParameter("@id", DbType.Int64));
cmd.Parameters.Add(new SQLiteParameter("@nr", DbType.Int32));
cmd.Parameters.Add(new SQLiteParameter("@lock", DbType.Boolean));
cmd.Parameters.Add(new SQLiteParameter("@hidden", DbType.Boolean));
cmd.Parameters.Add(new SQLiteParameter("@numsel", DbType.Boolean));
- cmd.Parameters.Add(new SQLiteParameter("@srvname", DbType.Binary));
+ if (canUpdateNames)
+ cmd.Parameters.Add(new SQLiteParameter("@srvname", DbType.Binary));
cmd.Prepare();
return cmd;
}
- private static SQLiteCommand PrepareDeleteCommand(SQLiteConnection conn, bool digital)
+ private SQLiteCommand PrepareDeleteCommand(SQLiteConnection conn, bool digital)
{
var cmd = conn.CreateCommand();
var sql = new StringBuilder();
@@ -536,7 +551,7 @@ namespace ChanSort.Loader.SamsungJ
return cmd;
}
- private static SQLiteCommand PrepareInsertFavCommand(SQLiteConnection conn)
+ private SQLiteCommand PrepareInsertFavCommand(SQLiteConnection conn)
{
var cmd = conn.CreateCommand();
cmd.CommandText = "insert into SRV_FAV (srvId, fav, pos) values (@id, @fav, @pos)";
@@ -547,7 +562,7 @@ namespace ChanSort.Loader.SamsungJ
return cmd;
}
- private static SQLiteCommand PrepareUpdateFavCommand(SQLiteConnection conn)
+ private SQLiteCommand PrepareUpdateFavCommand(SQLiteConnection conn)
{
var cmd = conn.CreateCommand();
cmd.CommandText = "update SRV_FAV set pos=@pos where srvId=@id and fav=@fav";
@@ -557,7 +572,7 @@ namespace ChanSort.Loader.SamsungJ
cmd.Prepare();
return cmd;
}
- private static SQLiteCommand PrepareDeleteFavCommand(SQLiteConnection conn)
+ private SQLiteCommand PrepareDeleteFavCommand(SQLiteConnection conn)
{
var cmd = conn.CreateCommand();
cmd.CommandText = "delete from SRV_FAV where srvId=@id and fav=@fav";
@@ -573,7 +588,7 @@ namespace ChanSort.Loader.SamsungJ
private void WriteChannels(SQLiteCommand cmdUpdateSrv, SQLiteCommand cmdDeleteSrv, SQLiteCommand cmdInsertFav, SQLiteCommand cmdUpdateFav, SQLiteCommand cmdDeleteFav,
ChannelList channelList, bool analog = false)
{
-
+ bool canUpdateNames = this.Features.ChannelNameEdit != ChannelNameEditMode.None;
foreach (ChannelInfo channelInfo in channelList.Channels.ToList())
{
var channel = channelInfo as DbChannel;
@@ -595,7 +610,8 @@ namespace ChanSort.Loader.SamsungJ
cmdUpdateSrv.Parameters["@lock"].Value = channel.Lock;
cmdUpdateSrv.Parameters["@hidden"].Value = channel.Hidden;
cmdUpdateSrv.Parameters["@numsel"].Value = !channel.Skip;
- cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetBytes(channel.Name);
+ if (canUpdateNames)
+ cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetBytes(channel.Name);
cmdUpdateSrv.ExecuteNonQuery();
// update favorites
diff --git a/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs b/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs
new file mode 100644
index 0000000..1929719
--- /dev/null
+++ b/source/ChanSort.Loader.SamsungJ/Utf16InsideUtf8EnvelopeEncoding.cs
@@ -0,0 +1,101 @@
+using System.IO;
+using System.Text;
+
+namespace ChanSort.Loader.SamsungJ
+{
+ // Samsung 1242 format does not store UTF16 characters directly, but instead wraps 16 data bits inside a UTF-8 lead + continuation byte sequence.
+ // A 3 byte UTF-8 sequence is used to encode 16 bits of utf-16 big endian input: 1110aaaa 10bbbbcc 10ccdddd represents the 16bit big endian integer ccccddddaaaabbbb, i.e. 0xE4, 0x84, 0x80 => 0x00, 0x41 => "A" in UTF-16 BE
+ // The Samsung encoder seems to create some illegal UTF-8 sequences at the end of the string as a result of padding and operating on 32bit inputs (2 characters) with big-endianness, which
+ // this decoder has to take care of. 0xFFFD can appear both in the raw input bytes (0xFF, 0xFB) as well as already encoded into UTF-8 wrappings (0xEF,0xBF,0xBD)
+
+ // This implementation here decodes the UTF-8 byte sequence into UTF-16 Little Endian for the sake of simplicity: aaaa=4, bbbb=1, cccc=0, dddd=0 => 0xE4, 0x84, 0x80 => 0x41, 0x00 => "A" in UTF-16 LE.
+ // The encoder here operates on 16bit characters and not 32bit 2-characters, so there is no need for padding and no invalid UTF-8 sequences.
+
+ public class Utf16InsideUtf8EnvelopeEncoding : Encoding
+ {
+ public override int GetMaxByteCount(int charCount)
+ {
+ return charCount * 3;
+ }
+
+ public override int GetByteCount(char[] chars, int index, int count)
+ {
+ return count * 3;
+ }
+
+ public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+ {
+ var utf16Le = Unicode.GetBytes(chars, charIndex, charCount);
+ int o = byteIndex;
+ int c = utf16Le.Length;
+ int i;
+ for (i = 0; i < c; i += 2, o += 3)
+ {
+ var b0 = utf16Le[i + 0];
+ var b1 = utf16Le[i + 1];
+ bytes[o + 0] = (byte) (0xE0 + (b0 >> 4));
+ bytes[o + 1] = (byte) (0x80 + ((b0 & 0x0F) << 2) + (b1 >> 6));
+ bytes[o + 2] = (byte) (0x80 + (b1 & 0x3F));
+ }
+
+ return charCount * 3;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ return (byteCount + 2) / 3;
+ }
+
+ public override int GetCharCount(byte[] bytes, int index, int count)
+ {
+ return (count + 2) / 3;
+ }
+
+ public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
+ {
+ using MemoryStream ms = new MemoryStream(40);
+ for (int i = byteIndex, c = byteIndex + byteCount; i < c; i++)
+ {
+ int b0 = bytes[i + 0];
+ if (b0 == 0 && i == c - 1) // satellite names end with a single trailing 0x00 byte
+ break;
+ if (b0 > 0xF7) // invalid UTF-8 lead byte. (0xFF, 0xFD) = 0xFFFD in BigEndian can appear unencoded at the end of the byte stream, likely as a padding
+ continue;
+ if (b0 >= 0xE0) // 3-byte UTF envelope for 2 input bytes
+ {
+ int b1 = bytes[i + 1];
+ int b2 = bytes[i + 2];
+ if ((b2 & 0xC0) != 0x80) // invalid 2nd UTF-8 continuation byte; only a single byte is encoded as 1110aaaa 10bbbbcc => aaaabbbb
+ {
+ b2 = 0;
+ --i;
+ }
+ int ch1 = ((b0 & 0x0F) << 4) | ((b1 & 0x3C) >> 2);
+ int ch2 = ((b1 & 0x03) << 6) | (b2 & 0x3F);
+ if (ch1 != 0xFF || ch2 != 0xFD) // ignore UTF-16 "replacement character" U-0xFFFD
+ {
+ ms.WriteByte((byte) ch1);
+ ms.WriteByte((byte) ch2);
+ }
+ i += 2;
+ }
+ else if (b0 >= 0xC0) // 2-byte UTF envelope for 1 input byte as 110xaaaa 10bbbbcc => aaaabbbb
+ {
+ int b1 = bytes[i + 1];
+ int ch = ((b0 & 0x0F) << 4) | ((b1 & 0x3C)>>2);
+ ms.WriteByte((byte)ch);
+ ms.WriteByte(0);
+ i++;
+ }
+ else if (b0 < 0x80) // 1-byte UTF envelope for 1 input byte < 0x80
+ {
+ ms.WriteByte(bytes[i]);
+ ms.WriteByte(0);
+ }
+ }
+
+ return Encoding.Unicode.GetChars(ms.GetBuffer(), 0, (int) ms.Length, chars, charIndex);
+ }
+ }
+}
diff --git a/source/ChanSort/UpdateCheck.cs b/source/ChanSort/UpdateCheck.cs
index b2ab5f4..b28a558 100644
--- a/source/ChanSort/UpdateCheck.cs
+++ b/source/ChanSort/UpdateCheck.cs
@@ -1,6 +1,4 @@
-using System;
-using System.Net;
-using System.Net.Security;
+using System.Net;
using System.Threading;
using ChanSort.Ui.Properties;
using DevExpress.XtraEditors;
@@ -40,11 +38,9 @@ namespace ChanSort.Ui
//Change SSL checks so that all checks pass
//ServicePointManager.ServerCertificateValidationCallback = delegate { return true; };
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
- using (WebClient client = new WebClient())
- {
- client.Proxy = null; // prevent a 1min wait/timeout by a .NET bug
- response = client.DownloadString(UpdateUrl);
- }
+ using WebClient client = new WebClient();
+ client.Proxy = null; // prevent a 1min wait/timeout by a .NET bug
+ response = client.DownloadString(UpdateUrl);
}
finally
{
diff --git a/source/changelog.md b/source/changelog.md
index f426bb4..534af6b 100644
--- a/source/changelog.md
+++ b/source/changelog.md
@@ -1,6 +1,11 @@
ChanSort Change Log
===================
+2020-07-13
+- Samsung 1242 format: channel names were displayed as chinese letters instead of latin
+ (Names are not stored as characters in this format, but instead 16 bits of UTF16 code points are encoded as "payload"
+ inside 3 byte UTF-8 sequences)
+
2020-07-12
- added UTF-16 Big Endian and Little Endian options to character set menu
- Samsung .zip loader: auto-detect UTF-16 endianness and allow to change encoding after loading to UTF-16 LE/BE