1+ package org .unicode .utilities ;
2+ import java .awt .Font ;
3+ import java .awt .Graphics2D ;
4+ import java .awt .Image ;
5+ import java .awt .Point ;
6+ import java .awt .Rectangle ;
7+ import java .awt .geom .Rectangle2D ;
8+ import java .awt .image .BufferedImage ;
9+ import java .util .ArrayList ;
10+ import java .util .List ;
11+
12+ import org .unicode .text .utility .Utility ;
13+
14+ import com .ibm .icu .text .UnicodeSet ;
15+ import com .ibm .icu .util .Output ;
16+
17+ public class FastSymbolMatcher {
18+
19+ // --- Tuning Parameters ---
20+ private static final double MAX_ASPECT_RATIO_DIFF = 0.5 ; // Fail if width/height is too different
21+ private static final double MAX_DENSITY_DIFF = 0.25 ; // Fail if ink thickness is too different
22+
23+ public static void main (String [] args ) {
24+ UnicodeSet chars = new UnicodeSet (args [0 ]);
25+ System .out .println (chars .size () + "\t " + chars );
26+
27+ List <SymbolProfile > profiles = new ArrayList <>();
28+ Output <Rectangle2D > visualBounds = new Output <>();
29+ String fontName = "Noto Sans" ;
30+ int fontSize = 144 ;
31+ Font font = new Font (fontName , Font .PLAIN , fontSize );
32+
33+ int count = 0 ;
34+
35+ for (int cp : chars .codePoints ()) {
36+ if ((count % 10000 ) == 0 ) {
37+ System .out .println (count + "\t " + Utility .hex (cp ));
38+ }
39+ ++count ;
40+ String character = Character .toString (cp );
41+
42+ BufferedImage image =
43+ GlyphRenderer .createGlyphBitmap (
44+ font , character , visualBounds , fontSize * 2 , fontSize * 3 / 2 );
45+ profiles .add (new SymbolProfile (cp , image ));
46+ }
47+ compare (profiles );
48+ }
49+
50+ public static void compare (List <SymbolProfile > profiles ) {
51+
52+ // 2. PRE-PROCESSING (The "N" Step)
53+ // Convert heavy images into lightweight Profiles ONCE.
54+ // This is O(N) complexity.
55+
56+ System .out .println ("Generating comparisons" );
57+ // 3. COMPARISON (The "N^2" Step)
58+ // Compare every symbol against every other symbol
59+ for (int i = 0 ; i < profiles .size (); i ++) {
60+ System .out .print ("\t " + Character .toString (profiles .get (i ).cp ));
61+ }
62+ System .out .println ();
63+ for (int i = 0 ; i < profiles .size (); i ++) {
64+ SymbolProfile p1 = profiles .get (i );
65+ System .out .print (Character .toString (p1 .cp ));
66+ for (int j = 0 ; j < i ; j ++) {
67+ SymbolProfile p2 = profiles .get (j );
68+
69+ // --- THE QUICK TEST ---
70+ if (shouldFailFast (p1 , p2 )) {
71+ // Skip expensive logic completely
72+ //System.out.println("Quick Mismatch: " + GlyphRenderer.charInfo(p2.cp));
73+ System .out .print ("\t " );
74+ continue ;
75+ }
76+
77+ // --- EXPENSIVE TEST ---
78+ // Only runs if the symbols are roughly similar shapes
79+ double score = detailedCompare (p1 , p2 );
80+ System .out .print ("\t " + (int )(score * 100 ));
81+ }
82+ System .out .println ();
83+ }
84+ }
85+
86+ /**
87+ * The "Gatekeeper" method.
88+ * Returns TRUE if the symbols are so different we shouldn't bother comparing pixels.
89+ */
90+ private static boolean shouldFailFast (SymbolProfile p1 , SymbolProfile p2 ) {
91+ // Test 1: Aspect Ratio (Is one tall and thin, and the other short and wide?)
92+ // e.g., prevents comparing "l" with "w"
93+ if (Math .abs (p1 .aspectRatio - p2 .aspectRatio ) > MAX_ASPECT_RATIO_DIFF ) {
94+ return true ;
95+ }
96+
97+ // Test 2: Ink Density (Is one heavy/filled and the other light/empty?)
98+ // e.g., prevents comparing "." with "M"
99+ if (Math .abs (p1 .inkDensity - p2 .inkDensity ) > MAX_DENSITY_DIFF ) {
100+ return true ;
101+ }
102+
103+ return false ;
104+ }
105+
106+ /**
107+ * The detailed pixel comparison (same logic as before, but using cached thumbnails).
108+ */
109+ private static double detailedCompare (SymbolProfile p1 , SymbolProfile p2 ) {
110+ // 1. Calculate Weights
111+ double visualSim = getVisualSimilarity (p1 .thumbnail , p2 .thumbnail );
112+
113+ // Size Penalty (using cached bounds)
114+ double areaA = p1 .bounds .width * p1 .bounds .height ;
115+ double areaB = p2 .bounds .width * p2 .bounds .height ;
116+ double sizeSim = 1.0 - (Math .abs (areaA - areaB ) / Math .max (areaA , areaB ));
117+
118+ // Position Penalty (using cached centers)
119+ double dist = p1 .center .distance (p2 .center );
120+ double maxDist = Math .sqrt (Math .pow (1000 , 2 ) + Math .pow (1000 , 2 )); // Mock max canvas size
121+ double posSim = 1.0 - (dist / maxDist );
122+ if (posSim < 0 ) posSim = 0 ;
123+
124+ return (visualSim * 0.6 ) + (sizeSim * 0.2 ) + (posSim * 0.2 );
125+ }
126+
127+ private static double getVisualSimilarity (BufferedImage imgA , BufferedImage imgB ) {
128+ long diff = 0 ;
129+ int w = imgA .getWidth ();
130+ int h = imgA .getHeight ();
131+ for (int y = 0 ; y < h ; y ++) {
132+ for (int x = 0 ; x < w ; x ++) {
133+ // Quick grayscale diff
134+ int rgbA = imgA .getRGB (x , y ) & 0xFF ; // Blue channel proxy for gray
135+ int rgbB = imgB .getRGB (x , y ) & 0xFF ;
136+ diff += Math .abs (rgbA - rgbB );
137+ }
138+ }
139+ return 1.0 - (diff / (w * h * 255.0 ));
140+ }
141+
142+ // --- INNER CLASS FOR PRE-CALCULATED DATA ---
143+ static class SymbolProfile {
144+ int cp ;
145+ Rectangle bounds ;
146+ Point center ;
147+ double aspectRatio ;
148+ double inkDensity ;
149+ BufferedImage thumbnail ; // Small 32x32 cached version
150+
151+ public SymbolProfile (int cp , BufferedImage raw ) {
152+ this .cp = cp ;
153+ // 1. Expensive Scan (Done ONLY ONCE per image)
154+ this .bounds = getBounds (raw );
155+
156+ if (this .bounds == null ) {
157+ // Handle empty image
158+ this .bounds = new Rectangle (0 ,0 ,1 ,1 );
159+ this .aspectRatio = 0 ;
160+ this .inkDensity = 0 ;
161+ this .thumbnail = new BufferedImage (32 , 32 , BufferedImage .TYPE_INT_ARGB );
162+ return ;
163+ }
164+
165+ this .center = new Point ((int )bounds .getCenterX (), (int )bounds .getCenterY ());
166+
167+ // 2. Pre-calculate Heuristics
168+ this .aspectRatio = (double ) bounds .width / bounds .height ;
169+
170+ // 3. Create Cached Thumbnail
171+ BufferedImage cropped = raw .getSubimage (bounds .x , bounds .y , bounds .width , bounds .height );
172+ this .thumbnail = resize (cropped , 32 , 32 );
173+
174+ // 4. Calculate Density (on the small thumbnail to save time)
175+ this .inkDensity = calculateDensity (this .thumbnail );
176+ }
177+
178+ private double calculateDensity (BufferedImage img ) {
179+ long totalPixels = img .getWidth () * img .getHeight ();
180+ long filledPixels = 0 ;
181+ for (int y =0 ; y <img .getHeight (); y ++) {
182+ for (int x =0 ; x <img .getWidth (); x ++) {
183+ int alpha = (img .getRGB (x , y ) >> 24 ) & 0xff ;
184+ if (alpha > 0 ) filledPixels ++; // Assuming transparent background
185+ // If white background, check brightness < 200
186+ }
187+ }
188+ return (double ) filledPixels / totalPixels ;
189+ }
190+
191+ private Rectangle getBounds (BufferedImage img ) {
192+ int minX = img .getWidth (), minY = img .getHeight (), maxX = -1 , maxY = -1 ;
193+ boolean found = false ;
194+ for (int y = 0 ; y < img .getHeight (); y ++) {
195+ int xSum = 0 ;
196+ for (int x = 0 ; x < img .getWidth (); x ++) {
197+ int alpha = (img .getRGB (x , y ) >> 24 ) & 0xff ;
198+ xSum += alpha ;
199+ if (alpha != 0 ) { // Assuming transparent background
200+ if (x < minX ) minX = x ;
201+ if (x > maxX ) maxX = x ;
202+ if (y < minY ) minY = y ;
203+ if (y > maxY ) maxY = y ;
204+ found = true ;
205+ }
206+ }
207+ }
208+ return found ? new Rectangle (minX , minY , maxX - minX + 1 , maxY - minY + 1 ) : null ;
209+ }
210+
211+ private BufferedImage resize (BufferedImage img , int w , int h ) {
212+ Image tmp = img .getScaledInstance (w , h , Image .SCALE_SMOOTH );
213+ BufferedImage dimg = new BufferedImage (w , h , BufferedImage .TYPE_INT_ARGB );
214+ Graphics2D g2d = dimg .createGraphics ();
215+ g2d .drawImage (tmp , 0 , 0 , null );
216+ g2d .dispose ();
217+ return dimg ;
218+ }
219+ }
220+ }
0 commit comments