Skip to content

Commit 760a578

Browse files
authored
Merge pull request #9560 from keymanapp/feat/developer/9515-drop-u1234-epic-ldml
2 parents e09d42f + a55454b commit 760a578

File tree

14 files changed

+776
-1007
lines changed

14 files changed

+776
-1007
lines changed

developer/src/kmc-kmn/src/compiler/compiler.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,17 @@ export class KmnCompiler implements UnicodeSetParser {
421421
return Module.kmcmp_testSentry();
422422
}
423423

424+
/** convert `\u{1234}` to `\u1234` etc */
425+
public static fixNewPattern(pattern: string) : string {
426+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{6})\}/g, `\\U00$1`);
427+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{5})\}/g, `\\U000$1`);
428+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{4})\}/g, `\\u$1`);
429+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{3})\}/g, `\\u0$1`);
430+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{2})\}/g, `\\u00$1`);
431+
pattern = pattern.replaceAll(/\\u\{([0-9a-fA-F]{1})\}/g, `\\u000$1`);
432+
return pattern;
433+
}
434+
424435
/**
425436
*
426437
* @param pattern UnicodeSet pattern such as `[a-z]`
@@ -435,6 +446,8 @@ export class KmnCompiler implements UnicodeSetParser {
435446

436447
// TODO-LDML: Catch OOM
437448
const buf = this.wasmExports.malloc(rangeCount * 2 * Module.HEAPU32.BYTES_PER_ELEMENT);
449+
// fix \u1234 pattern format
450+
pattern = KmnCompiler.fixNewPattern(pattern);
438451
/** If <= 0: return code. If positive: range count */
439452
const rc = Module.kmcmp_parseUnicodeSet(pattern, buf, rangeCount * 2);
440453
if (rc >= 0) {
@@ -459,6 +472,8 @@ export class KmnCompiler implements UnicodeSetParser {
459472
/* c8 ignore next 2 */
460473
return null;
461474
}
475+
// fix \u1234 pattern format
476+
pattern = KmnCompiler.fixNewPattern(pattern);
462477
// call with rangeCount = 0 to invoke in 'preflight' mode.
463478
const rc = Module.kmcmp_parseUnicodeSet(pattern, 0, 0);
464479
if (rc >= 0) {

developer/src/kmc-kmn/test/test-wasm-uset.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,22 @@ import { CompilerMessages } from '../src/compiler/messages.js';
66
import { compilerErrorFormatCode } from '@keymanapp/common-types';
77

88
describe('Compiler UnicodeSet function', function() {
9+
it('should fixup "short" \\u{} escapes', function () {
10+
assert.equal(KmnCompiler.fixNewPattern(`\\u{A}`), `\\u000A`); // "
11+
assert.equal(KmnCompiler.fixNewPattern(`\\u{22}`), `\\u0022`); // "
12+
assert.equal(KmnCompiler.fixNewPattern(`\\u{ead}`), `\\u0ead`); // "
13+
});
14+
it('should fixup \\u1234 format escapes', function() {
15+
assert.equal(KmnCompiler.fixNewPattern(`\\u{1234}`), `\\u1234`);
16+
assert.equal(KmnCompiler.fixNewPattern(`\\u1234`), `\\u1234`);
17+
assert.equal(KmnCompiler.fixNewPattern(`[\\u{1234}-\\u{5678}]`), `[\\u1234-\\u5678]`);
18+
assert.equal(KmnCompiler.fixNewPattern(`something else`), `something else`);
19+
});
20+
it('should fixup supplemental \\u format escapes', function() {
21+
assert.equal(KmnCompiler.fixNewPattern(`\\u{1F640}`), `\\U0001F640`);
22+
assert.equal(KmnCompiler.fixNewPattern(`\\u{10FFFD}`),`\\U0010FFFD`);
23+
});
24+
925
it('should start', async function() {
1026
const compiler = new KmnCompiler();
1127
const callbacks = new TestCompilerCallbacks();
@@ -51,6 +67,33 @@ describe('Compiler UnicodeSet function', function() {
5167
assert.deepEqual(callbacks.messages, []);
5268
assert.equal(len2, set.length);
5369
});
70+
it('should compile an even more complex uset', async function() {
71+
const compiler = new KmnCompiler();
72+
const callbacks = new TestCompilerCallbacks();
73+
assert(await compiler.init(callbacks));
74+
assert(compiler.verifyInitialized());
75+
76+
const pat = "[\\u{10FFFD}\\u{2019}\\u{22}\\u{a}\\u{ead}\\u{1F640}]";
77+
const set = compiler.parseUnicodeSet(pat, 23);
78+
79+
assert.equal(set.length, 6);
80+
// verify we're all single chars
81+
for (let i = 0; i<set.length; i++) {
82+
assert.equal(set.ranges[i][0], set.ranges[i][1], `Range ${i} should be a single char`);
83+
}
84+
// check the single char value
85+
assert.equal(set.ranges[0][0], 0x000A);
86+
assert.equal(set.ranges[1][0], 0x0022);
87+
assert.equal(set.ranges[2][0], 0x0EAD);
88+
assert.equal(set.ranges[3][0], 0x2019);
89+
assert.equal(set.ranges[4][0], 0x1F640);
90+
assert.equal(set.ranges[5][0], 0x10FFFD);
91+
assert.deepEqual(callbacks.messages, []);
92+
callbacks.clear();
93+
const len2 = compiler.sizeUnicodeSet(pat);
94+
assert.deepEqual(callbacks.messages, []);
95+
assert.equal(len2, set.length);
96+
});
5497
it('should fail in various ways', async function() {
5598
const compiler = new KmnCompiler();
5699
const callbacks = new TestCompilerCallbacks();

developer/src/kmc-ldml/src/compiler/messages.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,6 @@ export class CompilerMessages {
9090

9191
static Error_InvalidHardware = (o:{form: string}) => m(this.ERROR_InvalidHardware,
9292
`layers has invalid value form=${o.form}`);
93-
/**
94-
* Note: may not hit this due to XML validation.
95-
*/
9693
static ERROR_InvalidHardware = SevError | 0x0013;
9794

9895
static Error_InvalidModifier = (o:{layer: string, modifier: string}) => m(this.ERROR_InvalidModifier,

developer/src/kmc-ldml/test/test-layr.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { assert } from 'chai';
33
import { LayrCompiler } from '../src/compiler/layr.js';
44
import { CompilerMessages } from '../src/compiler/messages.js';
55
import { compilerTestCallbacks, loadSectionFixture, testCompilationCases } from './helpers/index.js';
6-
import { KMXPlus, CommonTypesMessages } from '@keymanapp/common-types';
6+
import { KMXPlus } from '@keymanapp/common-types';
77
import { constants } from '@keymanapp/ldml-keyboard-constants';
88

99
import Layr = KMXPlus.Layr;
@@ -106,11 +106,9 @@ describe('layr', function () {
106106
},
107107
{
108108
subpath: 'sections/layr/invalid-invalid-form.xml',
109-
errors: [CommonTypesMessages.Error_SchemaValidationError({
110-
instancePath: '/keyboard/layers/0/form',
111-
keyword: 'enum',
112-
message: 'must be equal to one of the allowed values',
113-
params: `allowedValues="touch,us,iso,jis,abnt2"`}),],
109+
errors: [CompilerMessages.Error_InvalidHardware({
110+
form: 'holographic',
111+
}),],
114112
},
115113
{
116114
// missing layer element

resources/standards-data/ldml-keyboards/techpreview/3.0/fr-t-k0-azerty.xml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
<!-- Note: displays is only used for keycap presentation -->
4444
<!-- this example is not required for this keyboard as we use the spacing
4545
modifiers -->
46-
<display to="\u0300" display="${grave}" /> <!-- display combining grave as modifier letter grave ˋ -->
46+
<display to="\u{0300}" display="${grave}" /> <!-- display combining grave as modifier letter grave ˋ -->
4747
<!-- Note: We discussed why the existing displayMap was used for display.
4848
rather than adding something closer to the key layout. 1. This way we can
4949
tell the renderer what to do. Could be double diacritics, spacing issues,
@@ -89,19 +89,19 @@
8989
<!-- test key -->
9090
<key id="a" flicks="a" to="a" longPress="à â á ä ã å ā" />
9191
<flicks id="a">
92-
<flick directions="nw" to="\u1234" />
93-
<flick directions="nw se" to="\uFFFF" />
94-
<flick directions="e" to="\uFFF0" />
92+
<flick directions="nw" to="\u{1234}" />
93+
<flick directions="nw se" to="\u{FFFF}" />
94+
<flick directions="e" to="\u{FFF0}" />
9595
</flicks>
9696

9797
<!-- test key -->
9898
<key id="A" flicks="b" to="A" longPress="À Á Ä Ã Å Ā" />
9999

100100
<!-- test flick -->
101101
<flicks id="b">
102-
<flick directions="nw" to="\u4567" />
103-
<flick directions="nw se" to="\uFFFF" />
104-
<flick directions="e" to="\uFFF0" />
102+
<flick directions="nw" to="\u{4567}" />
103+
<flick directions="nw se" to="\u{FFFF}" />
104+
<flick directions="e" to="\u{FFF0}" />
105105
</flicks>
106106

107107
<!-- TODO: all additional maps, hardware and touch -->
@@ -203,9 +203,9 @@
203203
<transformGroup>
204204
<!-- this is a reorder group -->
205205
<!-- nod-Lana partial example -->
206-
<reorder from="\u1A60" order="127" />
207-
<reorder from="\u1A6B" order="42" />
208-
<reorder from="[\u1A75-\u1A79]" order="55" />
206+
<reorder from="\u{1A60}" order="127" />
207+
<reorder from="\u{1A6B}" order="42" />
208+
<reorder from="[\u{1A75}-\u{1A79}]" order="55" />
209209
<!-- ... partial example ... -->
210210
</transformGroup>
211211
</transforms>
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"sha": "44903d0867e42df37f17cf21d28938044eb1edc0",
3-
"description": "release-44-m1-75-g44903d0867",
4-
"date": "Thu, 17 Aug 2023 19:14:19 +0000"
2+
"sha": "61b74a36de8329daed152005133a699ae7f2012b",
3+
"description": "release-44-alpha2-5-g61b74a36de",
4+
"date": "Thu, 14 Sep 2023 07:38:47 +0000"
55
}

resources/standards-data/ldml-keyboards/techpreview/dtd/ldmlKeyboard.dtd

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ The CLDR Keyboard Subcommittee is currently developing major changes to the CLDR
1010
Please view the subcommittee page for the most recent information.
1111
<https://cldr.unicode.org/index/keyboard-workgroup> -->
1212

13-
<!ELEMENT keyboard ( import*, locales?, version?, info?, names, settings?, vkeys?, displays?, keys?, layers*, variables?, transforms*, special* ) >
13+
<!ELEMENT keyboard ( import*, locales?, version?, info?, names, settings?, vkeys?, displays?, keys?, forms?, layers*, variables?, transforms*, special* ) >
1414
<!--@TECHPREVIEW-->
1515
<!ATTLIST keyboard locale CDATA #REQUIRED >
1616
<!--@MATCH:validity/bcp47-wellformed-->
@@ -161,9 +161,24 @@ Please view the subcommittee page for the most recent information.
161161
<!--@VALUE-->
162162
<!--@ALLOWS_UESC-->
163163

164+
<!ELEMENT forms ( import*, form*, special* ) >
165+
<!--@TECHPREVIEW-->
166+
167+
<!ELEMENT form ( scanCodes+, special* ) >
168+
<!--@TECHPREVIEW-->
169+
<!ATTLIST form id NMTOKEN #IMPLIED >
170+
<!--@MATCH:any-->
171+
172+
<!ELEMENT scanCodes EMPTY >
173+
<!--@TECHPREVIEW-->
174+
<!ATTLIST scanCodes codes NMTOKENS #REQUIRED >
175+
<!--@MATCH:regex/[0-9a-fA-F]{2}( [0-9a-fA-F]{2})*-->
176+
<!--@VALUE-->
177+
164178
<!ELEMENT layers ( import*, layer*, special* ) >
165179
<!--@TECHPREVIEW-->
166-
<!ATTLIST layers form (touch | us | iso | jis | abnt2) #REQUIRED >
180+
<!ATTLIST layers form NMTOKEN #REQUIRED >
181+
<!--@MATCH:any-->
167182
<!ATTLIST layers minDeviceWidth CDATA #IMPLIED >
168183
<!--@MATCH:range/1~999-->
169184

0 commit comments

Comments
 (0)