Skip to content

Commit 3930f3a

Browse files
authored
Merge pull request #116 from Maxteabag/fix/issue-90-thai-unicode-supportf
fix: Thai language (TIS-620) Unicode support
2 parents f3a1358 + e7dae46 commit 3930f3a

File tree

8 files changed

+641
-1
lines changed

8 files changed

+641
-1
lines changed

infra/docker/docker-compose.test.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,48 @@ services:
5151
tmpfs:
5252
- /var/lib/mysql
5353

54+
# MySQL with TIS-620 charset (Thai) - for charset testing
55+
mysql-tis620:
56+
image: mysql:8.0
57+
container_name: sqlit-test-mysql-tis620
58+
command: --character-set-server=tis620 --collation-server=tis620_thai_ci
59+
environment:
60+
MYSQL_ROOT_PASSWORD: "TestPassword123!"
61+
MYSQL_USER: "testuser"
62+
MYSQL_PASSWORD: "TestPassword123!"
63+
MYSQL_DATABASE: "test_sqlit"
64+
ports:
65+
- "${MYSQL_TIS620_PORT:-3308}:3306"
66+
healthcheck:
67+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "testuser", "-pTestPassword123!"]
68+
interval: 5s
69+
timeout: 5s
70+
retries: 10
71+
start_period: 30s
72+
tmpfs:
73+
- /var/lib/mysql
74+
75+
# MySQL with Latin1 charset - for charset testing
76+
mysql-latin1:
77+
image: mysql:8.0
78+
container_name: sqlit-test-mysql-latin1
79+
command: --character-set-server=latin1 --collation-server=latin1_swedish_ci
80+
environment:
81+
MYSQL_ROOT_PASSWORD: "TestPassword123!"
82+
MYSQL_USER: "testuser"
83+
MYSQL_PASSWORD: "TestPassword123!"
84+
MYSQL_DATABASE: "test_sqlit"
85+
ports:
86+
- "${MYSQL_LATIN1_PORT:-3309}:3306"
87+
healthcheck:
88+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "testuser", "-pTestPassword123!"]
89+
interval: 5s
90+
timeout: 5s
91+
retries: 10
92+
start_period: 30s
93+
tmpfs:
94+
- /var/lib/mysql
95+
5496
clickhouse:
5597
image: clickhouse/clickhouse-server:latest
5698
container_name: sqlit-test-clickhouse

sqlit/domains/connections/providers/mariadb/adapter.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ def connect(self, config: ConnectionConfig) -> Any:
101101
connect_args["ssl_verify_identity"] = tls_mode_verifies_hostname(tls_mode)
102102

103103
conn = mariadb_any.connect(**connect_args)
104+
105+
# Note: The MariaDB Python connector only supports UTF-8 family charsets.
106+
# Legacy charsets like TIS-620 or Latin1 are not supported. For databases
107+
# using legacy charsets, use the MySQL provider with PyMySQL instead.
108+
104109
self._supports_sequences = self._detect_sequences_support(conn)
105110
return conn
106111

sqlit/domains/connections/providers/mysql/adapter.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,24 @@ def connect(self, config: ConnectionConfig) -> Any:
110110
ssl_params["check_hostname"] = tls_mode_verifies_hostname(tls_mode)
111111
connect_args["ssl"] = ssl_params
112112

113-
return pymysql.connect(**connect_args)
113+
conn = pymysql.connect(**connect_args)
114+
115+
# Auto-sync charset with server to handle legacy encodings (e.g., TIS-620, Latin1).
116+
# This ensures data is read correctly when the database uses a non-UTF-8 charset.
117+
try:
118+
cursor = conn.cursor()
119+
cursor.execute("SELECT @@character_set_database")
120+
row = cursor.fetchone()
121+
if row:
122+
server_charset = row[0]
123+
# Only switch if server uses a different charset than our default (utf8mb4)
124+
if server_charset and server_charset.lower() != "utf8mb4":
125+
# Use set_charset() which both sends SET NAMES AND updates
126+
# PyMySQL's internal encoding for proper byte decoding
127+
conn.set_charset(server_charset)
128+
cursor.close()
129+
except Exception:
130+
# If charset sync fails, continue with default - better than failing completely
131+
pass
132+
133+
return conn

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from tests.fixtures.mariadb import *
1515
from tests.fixtures.mssql import *
1616
from tests.fixtures.mysql import *
17+
from tests.fixtures.mysql_charset import *
1718
from tests.fixtures.oracle import *
1819
from tests.fixtures.oracle_legacy import *
1920
from tests.fixtures.presto import *

tests/fixtures/data.duckdb

12 KB
Binary file not shown.

tests/fixtures/data.duckdb.wal

859 Bytes
Binary file not shown.

tests/fixtures/mysql_charset.py

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
"""MySQL charset fixtures for testing legacy encodings (TIS-620, Latin1, etc.)."""
2+
3+
from __future__ import annotations
4+
5+
import os
6+
import time
7+
8+
import pytest
9+
10+
from tests.fixtures.utils import cleanup_connection, is_port_open, run_cli
11+
12+
# TIS-620 (Thai) MySQL
13+
MYSQL_TIS620_HOST = os.environ.get("MYSQL_TIS620_HOST", "localhost")
14+
MYSQL_TIS620_PORT = int(os.environ.get("MYSQL_TIS620_PORT", "3308"))
15+
16+
# Latin1 MySQL
17+
MYSQL_LATIN1_HOST = os.environ.get("MYSQL_LATIN1_HOST", "localhost")
18+
MYSQL_LATIN1_PORT = int(os.environ.get("MYSQL_LATIN1_PORT", "3309"))
19+
20+
# Common credentials (same as other MySQL containers)
21+
MYSQL_CHARSET_USER = os.environ.get("MYSQL_CHARSET_USER", "root")
22+
MYSQL_CHARSET_PASSWORD = os.environ.get("MYSQL_CHARSET_PASSWORD", "TestPassword123!")
23+
MYSQL_CHARSET_DATABASE = os.environ.get("MYSQL_CHARSET_DATABASE", "test_sqlit")
24+
25+
26+
def mysql_tis620_available() -> bool:
27+
"""Check if MySQL TIS-620 is available."""
28+
return is_port_open(MYSQL_TIS620_HOST, MYSQL_TIS620_PORT)
29+
30+
31+
def mysql_latin1_available() -> bool:
32+
"""Check if MySQL Latin1 is available."""
33+
return is_port_open(MYSQL_LATIN1_HOST, MYSQL_LATIN1_PORT)
34+
35+
36+
@pytest.fixture(scope="session")
37+
def mysql_tis620_server_ready() -> bool:
38+
"""Check if MySQL TIS-620 is ready and return True/False."""
39+
if not mysql_tis620_available():
40+
return False
41+
time.sleep(1)
42+
return True
43+
44+
45+
@pytest.fixture(scope="session")
46+
def mysql_latin1_server_ready() -> bool:
47+
"""Check if MySQL Latin1 is ready and return True/False."""
48+
if not mysql_latin1_available():
49+
return False
50+
time.sleep(1)
51+
return True
52+
53+
54+
@pytest.fixture(scope="function")
55+
def mysql_tis620_db(mysql_tis620_server_ready: bool) -> str:
56+
"""Set up MySQL TIS-620 test database with Thai data."""
57+
if not mysql_tis620_server_ready:
58+
pytest.skip("MySQL TIS-620 is not available")
59+
60+
try:
61+
import pymysql
62+
except ImportError:
63+
pytest.skip("PyMySQL is not installed")
64+
65+
try:
66+
# Connect with tis620 charset to match server charset
67+
conn = pymysql.connect(
68+
host=MYSQL_TIS620_HOST,
69+
port=MYSQL_TIS620_PORT,
70+
database=MYSQL_CHARSET_DATABASE,
71+
user=MYSQL_CHARSET_USER,
72+
password=MYSQL_CHARSET_PASSWORD,
73+
connect_timeout=10,
74+
charset="tis620",
75+
)
76+
cursor = conn.cursor()
77+
78+
# Create table and insert Thai data
79+
cursor.execute("DROP TABLE IF EXISTS charset_test")
80+
cursor.execute(
81+
"CREATE TABLE charset_test (id INT PRIMARY KEY, content TEXT) "
82+
"CHARACTER SET tis620 COLLATE tis620_thai_ci"
83+
)
84+
85+
cursor.execute("INSERT INTO charset_test VALUES (1, 'สวัสดีครับ')")
86+
cursor.execute("INSERT INTO charset_test VALUES (2, 'ภาษาไทย')")
87+
cursor.execute("INSERT INTO charset_test VALUES (3, 'กรุงเทพมหานคร')")
88+
89+
conn.commit()
90+
conn.close()
91+
92+
except Exception as e:
93+
pytest.skip(f"Failed to setup MySQL TIS-620 database: {e}")
94+
95+
yield MYSQL_CHARSET_DATABASE
96+
97+
# Cleanup
98+
try:
99+
conn = pymysql.connect(
100+
host=MYSQL_TIS620_HOST,
101+
port=MYSQL_TIS620_PORT,
102+
database=MYSQL_CHARSET_DATABASE,
103+
user=MYSQL_CHARSET_USER,
104+
password=MYSQL_CHARSET_PASSWORD,
105+
connect_timeout=10,
106+
charset="tis620",
107+
)
108+
cursor = conn.cursor()
109+
cursor.execute("DROP TABLE IF EXISTS charset_test")
110+
conn.commit()
111+
conn.close()
112+
except Exception:
113+
pass
114+
115+
116+
@pytest.fixture(scope="function")
117+
def mysql_latin1_db(mysql_latin1_server_ready: bool) -> str:
118+
"""Set up MySQL Latin1 test database with accented Latin characters."""
119+
if not mysql_latin1_server_ready:
120+
pytest.skip("MySQL Latin1 is not available")
121+
122+
try:
123+
import pymysql
124+
except ImportError:
125+
pytest.skip("PyMySQL is not installed")
126+
127+
try:
128+
# Connect with latin1 charset to match server charset
129+
conn = pymysql.connect(
130+
host=MYSQL_LATIN1_HOST,
131+
port=MYSQL_LATIN1_PORT,
132+
database=MYSQL_CHARSET_DATABASE,
133+
user=MYSQL_CHARSET_USER,
134+
password=MYSQL_CHARSET_PASSWORD,
135+
connect_timeout=10,
136+
charset="latin1",
137+
)
138+
cursor = conn.cursor()
139+
140+
# Create table and insert Latin1 data
141+
cursor.execute("DROP TABLE IF EXISTS charset_test")
142+
cursor.execute(
143+
"CREATE TABLE charset_test (id INT PRIMARY KEY, content TEXT) "
144+
"CHARACTER SET latin1 COLLATE latin1_swedish_ci"
145+
)
146+
147+
cursor.execute("INSERT INTO charset_test VALUES (1, 'café')")
148+
cursor.execute("INSERT INTO charset_test VALUES (2, 'naïve')")
149+
cursor.execute("INSERT INTO charset_test VALUES (3, 'Müller')")
150+
cursor.execute("INSERT INTO charset_test VALUES (4, 'señor')")
151+
152+
conn.commit()
153+
conn.close()
154+
155+
except Exception as e:
156+
pytest.skip(f"Failed to setup MySQL Latin1 database: {e}")
157+
158+
yield MYSQL_CHARSET_DATABASE
159+
160+
# Cleanup
161+
try:
162+
conn = pymysql.connect(
163+
host=MYSQL_LATIN1_HOST,
164+
port=MYSQL_LATIN1_PORT,
165+
database=MYSQL_CHARSET_DATABASE,
166+
user=MYSQL_CHARSET_USER,
167+
password=MYSQL_CHARSET_PASSWORD,
168+
connect_timeout=10,
169+
charset="latin1",
170+
)
171+
cursor = conn.cursor()
172+
cursor.execute("DROP TABLE IF EXISTS charset_test")
173+
conn.commit()
174+
conn.close()
175+
except Exception:
176+
pass
177+
178+
179+
@pytest.fixture(scope="function")
180+
def mysql_tis620_connection(mysql_tis620_db: str) -> str:
181+
"""Create a sqlit CLI connection for MySQL TIS-620."""
182+
connection_name = f"test_mysql_tis620_{os.getpid()}"
183+
184+
cleanup_connection(connection_name)
185+
186+
run_cli(
187+
"connections",
188+
"add",
189+
"mysql",
190+
"--name",
191+
connection_name,
192+
"--server",
193+
MYSQL_TIS620_HOST,
194+
"--port",
195+
str(MYSQL_TIS620_PORT),
196+
"--database",
197+
mysql_tis620_db,
198+
"--username",
199+
MYSQL_CHARSET_USER,
200+
"--password",
201+
MYSQL_CHARSET_PASSWORD,
202+
)
203+
204+
yield connection_name
205+
206+
cleanup_connection(connection_name)
207+
208+
209+
@pytest.fixture(scope="function")
210+
def mysql_latin1_connection(mysql_latin1_db: str) -> str:
211+
"""Create a sqlit CLI connection for MySQL Latin1."""
212+
connection_name = f"test_mysql_latin1_{os.getpid()}"
213+
214+
cleanup_connection(connection_name)
215+
216+
run_cli(
217+
"connections",
218+
"add",
219+
"mysql",
220+
"--name",
221+
connection_name,
222+
"--server",
223+
MYSQL_LATIN1_HOST,
224+
"--port",
225+
str(MYSQL_LATIN1_PORT),
226+
"--database",
227+
mysql_latin1_db,
228+
"--username",
229+
MYSQL_CHARSET_USER,
230+
"--password",
231+
MYSQL_CHARSET_PASSWORD,
232+
)
233+
234+
yield connection_name
235+
236+
cleanup_connection(connection_name)
237+
238+
239+
__all__ = [
240+
"MYSQL_CHARSET_DATABASE",
241+
"MYSQL_CHARSET_PASSWORD",
242+
"MYSQL_CHARSET_USER",
243+
"MYSQL_LATIN1_HOST",
244+
"MYSQL_LATIN1_PORT",
245+
"MYSQL_TIS620_HOST",
246+
"MYSQL_TIS620_PORT",
247+
"mysql_latin1_available",
248+
"mysql_latin1_connection",
249+
"mysql_latin1_db",
250+
"mysql_latin1_server_ready",
251+
"mysql_tis620_available",
252+
"mysql_tis620_connection",
253+
"mysql_tis620_db",
254+
"mysql_tis620_server_ready",
255+
]

0 commit comments

Comments
 (0)