Skip to content

Commit e7a3562

Browse files
committed
refactor: improve character conversion to use unidecode for better ASCII representation
1 parent a3e170e commit e7a3562

File tree

1 file changed

+9
-23
lines changed

1 file changed

+9
-23
lines changed

src/rda_python_common/PgLOG.py

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import socket
2929
import shutil
3030
import traceback
31+
from unidecode import unidecode
3132

3233
# define some constants for logging actions
3334
MSGLOG = (0x00001) # logging message
@@ -1579,33 +1580,18 @@ def check_process_host(hosts, chost = None, mflag = None, pinfo = None, logact =
15791580
return ret
15801581

15811582
#
1582-
# convert special characters
1583+
# convert special foreign characters into ascii characters
15831584
#
15841585
def convert_chars(name, default = 'X'):
15851586

15861587
if not name: return default
1587-
if re.match(r'^[a-zA-Z0-9]+$', name): return name # no need convert
1588-
1589-
z = ord('z')
1590-
newchrs = ochrs = ''
1591-
for i in range(len(name)):
1592-
ch = name[i]
1593-
if re.match(r'^[a-zA-Z0-9]$', ch):
1594-
newchrs += ch
1595-
elif (ch == ' ' or ch == '_') and newchrs:
1596-
newchrs += '_'
1597-
elif ord(ch) > z and ochrs != None:
1598-
if not ochrs:
1599-
ochrs = None
1600-
with open(PGLOG['DSSHOME'] + "/lib/ExtChrs.txt", "r") as CHR:
1601-
ochrs = CHR.readline()
1602-
nchrs = CHR.readline()
1603-
if ochrs is None: continue
1604-
idx = ochrs.find(ch)
1605-
if idx >= 0: newchrs += nchrs[idx]
1606-
1607-
if newchrs:
1608-
return newchrs
1588+
if re.match(r'^[a-zA-Z0-9]+$', name): return name # conversion not needed
1589+
1590+
decoded_name = unidecode(name).strip()
1591+
cleaned_name = re.sub(r'[^a-zA-Z0-9]', '', decoded_name)
1592+
1593+
if cleaned_name:
1594+
return cleaned_name
16091595
else:
16101596
return default
16111597

0 commit comments

Comments
 (0)