Skip to content

Commit f97de9b

Browse files
committed
extract name encoding/decoding to separate function, like in easyh5
1 parent b660fc1 commit f97de9b

File tree

7 files changed

+159
-189
lines changed

7 files changed

+159
-189
lines changed

decodevarname.m

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
function newname = decodevarname(name,varargin)
2+
%
3+
% newname = decodevarname(name)
4+
%
5+
% Decode a hex-encoded variable name (from encodevarname) and restore
6+
% its original form
7+
%
8+
% This function is sensitive to the default charset
9+
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
10+
% to set the encoding to UTF-8 before calling this function.
11+
%
12+
% author: Qianqian Fang (q.fang <at> neu.edu)
13+
%
14+
% input:
15+
% name: a string output from encodevarname, which converts the leading non-ascii
16+
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
17+
% format, where hex key HH stores the ascii (or Unicode) value
18+
% of the character.
19+
%
20+
% output:
21+
% newname: the restored original string
22+
%
23+
% example:
24+
% decodevarname('x0x5F_a) % returns _a
25+
% decodevarname('a_') % returns a_ as it is a valid variable name
26+
% decodevarname('x0xE58F98__0xE9878F_') % returns '变量'
27+
%
28+
% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5
29+
%
30+
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details
31+
%
32+
33+
isunpack=jsonopt('UnpackHex',1,varargin{:});
34+
newname=name;
35+
if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once')))
36+
return
37+
end
38+
if(isunpack)
39+
if(exist('native2unicode','builtin'))
40+
h2u=@hex2unicode;
41+
newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${h2u($2)}');
42+
else
43+
pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start');
44+
pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end');
45+
if(isempty(pos))
46+
return;
47+
end
48+
str0=name;
49+
pos0=[0 pend(:)' length(name)];
50+
newname='';
51+
for i=1:length(pos)
52+
newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))];
53+
end
54+
if(pos(end)~=length(name))
55+
newname=[newname str0(pos0(end-1)+1:pos0(end))];
56+
end
57+
end
58+
end
59+
60+
%--------------------------------------------------------------------------
61+
function str=hex2unicode(hexstr)
62+
val=hex2dec(hexstr);
63+
id=histc(val,[0 2^8 2^16 2^32 2^64]);
64+
type={'uint8','uint16','uint32','uint64'};
65+
bytes=typecast(cast(val,type{id~=0}),'uint8');
66+
str=native2unicode(fliplr(bytes(:,1:find(bytes,1,'last'))));

encodevarname.m

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
function str = encodevarname(str,varargin)
2+
%
3+
% newname = encodevarname(name)
4+
%
5+
% Encode an invalid variable name using a hex-format for bi-directional
6+
% conversions.
7+
8+
% This function is sensitive to the default charset
9+
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
10+
% to set the encoding to UTF-8 before calling this function.
11+
%
12+
% author: Qianqian Fang (q.fang <at> neu.edu)
13+
%
14+
% input:
15+
% name: a string, can be either a valid or invalid variable name
16+
%
17+
% output:
18+
% newname: a valid variable name by converting the leading non-ascii
19+
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
20+
% format, where HH is the ascii (or Unicode) value of the
21+
% character.
22+
%
23+
% if the encoded variable name CAN NOT be longer than 63, i.e.
24+
% the maximum variable name specified by namelengthmax, and
25+
% one uses the output of this function as a struct or variable
26+
% name, the name will be trucated at 63. Please consider using
27+
% the name as a containers.Map key, which does not have such
28+
% limit.
29+
%
30+
% example:
31+
% encodevarname('_a') % returns x0x5F_a
32+
% encodevarname('a_') % returns a_ as it is a valid variable name
33+
% encodevarname('变量') % returns 'x0xE58F98__0xE9878F_'
34+
%
35+
% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5
36+
%
37+
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details
38+
%
39+
40+
if(~isempty(regexp(str,'^[^A-Za-z]','once')))
41+
if(exist('unicode2native','builtin'))
42+
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
43+
else
44+
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end));
45+
end
46+
end
47+
if(isvarname(str))
48+
return;
49+
end
50+
if(exist('unicode2native','builtin'))
51+
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
52+
else
53+
cpos=regexp(str,'[^0-9A-Za-z_]');
54+
if(isempty(cpos))
55+
return;
56+
end
57+
str0=str;
58+
pos0=[0 cpos(:)' length(str)];
59+
str='';
60+
for i=1:length(cpos)
61+
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)];
62+
end
63+
if(cpos(end)~=length(str))
64+
str=[str str0(pos0(end-1)+1:pos0(end))];
65+
end
66+
end
67+
end

loadjson.m

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@
413413
end
414414
pos=parse_char(inputstr, pos, ':');
415415
[val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
416-
object.(valid_field(str,varargin{:}))=val;
416+
object.(encodevarname(str,varargin{:}))=val;
417417
[cc,pos]=next_char(inputstr,pos);
418418
if cc == '}'
419419
break;
@@ -442,41 +442,6 @@
442442

443443
%%-------------------------------------------------------------------------
444444

445-
function str = valid_field(str,varargin)
446-
% From MATLAB doc: field names must begin with a letter, which may be
447-
% followed by any combination of letters, digits, and underscores.
448-
% Invalid characters will be converted to underscores, and the prefix
449-
% "x0x[Hex code]_" will be added if the first character is not a letter.
450-
if(~isempty(regexp(str,'^[^A-Za-z]','once')))
451-
if(~isoctavemesh && str(1)+0 > 255)
452-
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
453-
else
454-
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end));
455-
end
456-
end
457-
if(isvarname(str))
458-
return;
459-
end
460-
if(~isoctavemesh)
461-
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
462-
else
463-
cpos=regexp(str,'[^0-9A-Za-z_]');
464-
if(isempty(cpos))
465-
return;
466-
end
467-
str0=str;
468-
pos0=[0 cpos(:)' length(str)];
469-
str='';
470-
for i=1:length(cpos)
471-
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)];
472-
end
473-
if(cpos(end)~=length(str))
474-
str=[str str0(pos0(end-1)+1:pos0(end))];
475-
end
476-
end
477-
end
478-
%%-------------------------------------------------------------------------
479-
480445
function newpos=skip_whitespace(pos, inputstr)
481446
newpos=pos;
482447
while newpos <= length(inputstr) && isspace(inputstr(newpos))

loadmsgpack.m

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -227,42 +227,6 @@
227227
out = struct();
228228
for n=1:len
229229
[key, idx] = parse(bytes, idx);
230-
[out.(valid_field(char(key))), idx] = parse(bytes, idx);
231-
end
232-
end
233-
234-
function str = valid_field(str,varargin)
235-
% From MATLAB doc: field names must begin with a letter, which may be
236-
% followed by any combination of letters, digits, and underscores.
237-
% Invalid characters will be converted to underscores, and the prefix
238-
% "x0x[Hex code]_" will be added if the first character is not a letter.
239-
isoct=exist('OCTAVE_VERSION','builtin');
240-
cpos=regexp(str,'^[^A-Za-z]','once');
241-
if(~isempty(cpos))
242-
if(~isoct)
243-
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
244-
else
245-
str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
246-
end
247-
end
248-
if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' )))
249-
return;
250-
end
251-
if(~isoct)
252-
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
253-
else
254-
cpos=regexp(str,'[^0-9A-Za-z_]');
255-
if(isempty(cpos))
256-
return;
257-
end
258-
str0=str;
259-
pos0=[0 cpos(:)' length(str)];
260-
str='';
261-
for i=1:length(cpos)
262-
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))];
263-
end
264-
if(cpos(end)~=length(str))
265-
str=[str str0(pos0(end-1)+1:pos0(end))];
266-
end
230+
[out.(encodevarname(char(key))), idx] = parse(bytes, idx);
267231
end
268232
end

loadubjson.m

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@
330330
end
331331
[val, pos] = parse_value(inputstr, pos, varargin{:});
332332
num=num+1;
333-
object.(valid_field(str,varargin{:}))=val;
333+
object.(encodevarname(str,varargin{:}))=val;
334334
[cc, pos]=next_char(inputstr,pos);
335335
if cc == '}' || (count>=0 && num>=count)
336336
break;
@@ -357,38 +357,3 @@
357357
error_pos('unsupported type at position %d',inputstr, pos);
358358
end
359359
end
360-
%%-------------------------------------------------------------------------
361-
362-
function str = valid_field(str,varargin)
363-
% From MATLAB doc: field names must begin with a letter, which may be
364-
% followed by any combination of letters, digits, and underscores.
365-
% Invalid characters will be converted to underscores, and the prefix
366-
% "x0x[Hex code]_" will be added if the first character is not a letter.
367-
if(~isempty(regexp(str,'^[^A-Za-z]','once')))
368-
if(~isoctavemesh && str(1)+0 > 255)
369-
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
370-
else
371-
str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
372-
end
373-
end
374-
if(isvarname(str))
375-
return;
376-
end
377-
if(~isoctavemesh)
378-
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
379-
else
380-
cpos=regexp(str,'[^0-9A-Za-z_]');
381-
if(isempty(cpos))
382-
return;
383-
end
384-
str0=str;
385-
pos0=[0 cpos(:)' length(str)];
386-
str='';
387-
for i=1:length(cpos)
388-
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))];
389-
end
390-
if(cpos(end)~=length(str))
391-
str=[str str0(pos0(end-1)+1:pos0(end))];
392-
end
393-
end
394-
end

0 commit comments

Comments
 (0)