@@ -41,11 +41,10 @@ U_CDECL_END
4141U_NAMESPACE_BEGIN
4242
4343RBBISymbolTable::RBBISymbolTable (RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
44- : fRules(rules), fRuleScanner(rs), ffffString( static_cast < char16_t >( 0xffff ))
44+ : fRules(rules), fRuleScanner(rs)
4545{
4646 fHashTable = nullptr ;
47- fCachedSetLookup = nullptr ;
48-
47+
4948 fHashTable = uhash_open (uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr , &status);
5049 // uhash_open checks status
5150 if (U_FAILURE (status)) {
@@ -71,60 +70,47 @@ RBBISymbolTable::~RBBISymbolTable()
7170//
7271const UnicodeString *RBBISymbolTable::lookup (const UnicodeString& s) const
7372{
74- RBBISymbolTableEntry *el;
75- RBBINode *varRefNode;
76- RBBINode *exprNode;
77- RBBINode *usetNode;
78- const UnicodeString *retString;
79- RBBISymbolTable *This = const_cast <RBBISymbolTable*>(this ); // cast off const
80-
81- el = static_cast <RBBISymbolTableEntry*>(uhash_get (fHashTable , &s));
73+ const RBBISymbolTableEntry* const el =
74+ static_cast <const RBBISymbolTableEntry*>(uhash_get (fHashTable , &s));
8275 if (el == nullptr ) {
8376 return nullptr ;
8477 }
78+ const RBBINode& exprNode = *el->val ->fLeftChild ; // Root node of expression for variable
79+ // Return the original source string for the expression.
80+ // Note that for set-valued variables used in UnicodeSet expressions, this would be rejected by
81+ // the UnicodeSet parser if the source itself contains variable references. For instance, with
82+ // $CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019];
83+ // $Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable];
84+ // If lookupSet were not overridden, when parsing the right-hand side of
85+ // $NotCased = [[^ $Cased] - $CaseIgnorable];
86+ // there would be a call to lookup("Cased") which would return
87+ // "[[:Upper_Case:][:Lower_Case:][:Lt:]-$CaseIgnorable]". This contains a variable, which is
88+ // disallowed by the UnicodeSet parser inside a variable expansion.
89+ // However, set-valued variables are pre-parsed, and returned by lookupSet instead, so this call
90+ // to lookup() never happens; instead, lookupSet("CaseIgnorable") is called when computing
91+ // $Cased and returns the non-null value of $CaseIgnorable, and then when computing $NotCased,
92+ // lookupSet("Cased") returns the value computed for $Cased.
93+ return &exprNode.fText ;
94+ }
8595
86- varRefNode = el->val ;
87- exprNode = varRefNode->fLeftChild ; // Root node of expression for variable
88- if (exprNode->fType == RBBINode::setRef) {
89- // The $variable refers to a single UnicodeSet
90- // return the ffffString, which will subsequently be interpreted as a
91- // stand-in character for the set by RBBISymbolTable::lookupMatcher()
92- usetNode = exprNode->fLeftChild ;
93- This->fCachedSetLookup = usetNode->fInputSet ;
94- retString = &ffffString;
96+ const UnicodeSet* RBBISymbolTable::lookupSet (const UnicodeString& s) const {
97+ const RBBISymbolTableEntry* const el = static_cast <const RBBISymbolTableEntry*>(uhash_get (fHashTable , &s));
98+ if (el == nullptr ) {
99+ return nullptr ;
95100 }
96- else
97- {
98- // The variable refers to something other than just a set.
99- // return the original source string for the expression
100- retString = &exprNode->fText ;
101- This->fCachedSetLookup = nullptr ;
101+ const RBBINode& exprNode = *el->val ->fLeftChild ;
102+ if (exprNode.fType == RBBINode::setRef) {
103+ return exprNode.fLeftChild ->fInputSet ;
104+ } else {
105+ return nullptr ;
102106 }
103- return retString;
104107}
105108
106109
107110
108- //
109- // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
110- // interface maps a single stand-in character to a
111- // pointer to a Unicode Set. The Unicode Set code uses this
112- // mechanism to get all references to the same $variable
113- // name to refer to a single common Unicode Set instance.
114- //
115- // This implementation cheats a little, and does not maintain a map of stand-in chars
116- // to sets. Instead, it takes advantage of the fact that the UnicodeSet
117- // constructor will always call this function right after calling lookup(),
118- // and we just need to remember what set to return between these two calls.
119- const UnicodeFunctor *RBBISymbolTable::lookupMatcher (UChar32 ch) const
120- {
121- UnicodeSet *retVal = nullptr ;
122- RBBISymbolTable *This = const_cast <RBBISymbolTable*>(this ); // cast off const
123- if (ch == 0xffff ) {
124- retVal = fCachedSetLookup ;
125- This->fCachedSetLookup = nullptr ;
126- }
127- return retVal;
111+ // No longer used, see ICU-23297.
112+ const UnicodeFunctor* RBBISymbolTable::lookupMatcher (UChar32 /* ch*/ ) const {
113+ return nullptr ;
128114}
129115
130116//
0 commit comments