|
| 1 | +import java.lang.Character.* |
| 2 | +import java.util.* |
| 3 | +import java.util.Collections.* |
| 4 | + |
| 5 | +open class Node(var id:Char=MIN_VALUE,map:MutableMap<Char,Node> =mutableMapOf()):MutableMap<Char,Node>by map{ |
| 6 | + operator fun invoke(f:Node.()->Unit):Node{f();return this} |
| 7 | + override fun hashCode() = id.toInt() |
| 8 | + override fun toString():String{ |
| 9 | + val str = hex |
| 10 | + return if(str[0].isLetter()||str[0].isDigit()&&str[1].isDigit()&&str[2].isDigit()&&str[3].isDigit())str else '"'+str+'"' |
| 11 | + } |
| 12 | + val leaf:Leaf?get(){ |
| 13 | + var node:Node? = this |
| 14 | + while(node!is Leaf && node!=null) node = node.values.firstOrNull() |
| 15 | + return node as Leaf? |
| 16 | + } |
| 17 | + open fun recursivelySetIDs(ids:MutableSet<Char> =((1.toChar()..0xFFFF.toChar())-id).toMutableSet()){ |
| 18 | + for((char,node)in this) if(node.id == MIN_VALUE){ |
| 19 | + node.id = if(char in ids) char else ids.last() |
| 20 | + ids -= node.id |
| 21 | + node.recursivelySetIDs(ids) |
| 22 | + } |
| 23 | + } |
| 24 | + open fun toGraphvizString(visited:MutableSet<Node> =mutableSetOf()):String{ |
| 25 | + var result = "" |
| 26 | + for((char,node)in this){ |
| 27 | + result+="$this->$node[label=${if(char.isLetterOrDigit())"$char" else "\"${char.toString().replace("\\","\\\\").replace("\"","\\\"")}\""}]\n" |
| 28 | + if(node !in visited){ |
| 29 | + visited += node |
| 30 | + result += node.toGraphvizString(visited) |
| 31 | + } |
| 32 | + } |
| 33 | + return result |
| 34 | + } |
| 35 | + open fun toDeadTableString(visited:MutableSet<Node> =mutableSetOf()):String{ |
| 36 | + var result = "DEADKEY "+hex+"\n" |
| 37 | + for((char,node)in this)result+=char.hex+' '+node.hex+if(node!is Leaf && node!is Root)"@\n" else "\n" |
| 38 | + result += '\n' |
| 39 | + for(node in values)if(node !in visited){ |
| 40 | + visited += node |
| 41 | + result += node.toDeadTableString(visited) |
| 42 | + } |
| 43 | + return result |
| 44 | + } |
| 45 | + infix operator fun StringBuilder.rangeTo(char:Char):Node{//maps all permutations to char |
| 46 | + fun permute(i:Int){ |
| 47 | + if(i<=0)this as CharSequence..char |
| 48 | + else for(j in 0..i){ |
| 49 | + val c = this[j] |
| 50 | + this[j] = this[0] |
| 51 | + this[0] = c |
| 52 | + permute(i-1) |
| 53 | + } |
| 54 | + } |
| 55 | + permute(length-1) |
| 56 | + return this@Node |
| 57 | + } |
| 58 | + infix operator fun CharSequence.rangeTo(char:Char):Node{ |
| 59 | + var node = this@Node |
| 60 | + for(i in 0..length-2){ |
| 61 | + if(node is Leaf)throw IllegalStateException("$this -> $char attempted to overwrite ${substring(0,i)} -> $node") |
| 62 | + node = node.getOrPut(this[i]){Node()} |
| 63 | + } |
| 64 | + val leaf = Leaf(char) |
| 65 | + val removed = node.put(this[length-1],leaf) |
| 66 | + if(null!=removed && leaf!=removed)throw IllegalStateException("$this -> $char attempted to overwrite $this -> ${removed.leaf}") |
| 67 | + return this@Node |
| 68 | + } |
| 69 | + infix operator fun CharSequence.rangeTo(chars:Pair<Char,Char>):Node{ |
| 70 | + if(leaves[chars.first.toInt()]&&leaves[chars.second.toInt()])return this@Node |
| 71 | + var nodes = setOf(this@Node) |
| 72 | + for(i in 0..length-2){ |
| 73 | + val set:MutableSet<Node> = mutableSetOf() |
| 74 | + val n = Node() |
| 75 | + for(node in nodes)if(node !is Leaf){ |
| 76 | + set += node.getOrPut(this[i].toUpperCase()){n} |
| 77 | + set += node.getOrPut(this[i].toLowerCase()){n} |
| 78 | + } |
| 79 | + if(set.isEmpty())throw IllegalStateException("$this -> ${chars.first} attempted to overwrite ${substring(0,1)} -> ${nodes.first()}") |
| 80 | + nodes = set |
| 81 | + } |
| 82 | + val leaf1 = Leaf(chars.first) |
| 83 | + val leaf2 = if(chars.first == chars.second) leaf1 else Leaf(chars.second) |
| 84 | + for(node in nodes)if(node !is Leaf){ |
| 85 | + nodes = EMPTY_SET as Set<Node> |
| 86 | + val removed1 = node.put(this[length-1].toUpperCase(),leaf1) |
| 87 | + val removed2 = node.put(this[length-1].toLowerCase(),leaf2) |
| 88 | + if(null!=removed1 && leaf1!=removed1)throw IllegalStateException("$this -> $leaf1 attempted to overwrite $this -> $removed1") |
| 89 | + if(null!=removed2 && leaf2!=removed2)throw IllegalStateException("$this -> $leaf2 attempted to overwrite $this -> $removed2") |
| 90 | + } |
| 91 | + if(nodes != EMPTY_SET)throw IllegalStateException("$this -> ${chars.first} attempted to overwrite $this -> ${nodes.firstOrNull()}") |
| 92 | + return this@Node |
| 93 | + } |
| 94 | + |
| 95 | + |
| 96 | + |
| 97 | + class Leaf(val char:Char):Node(char,EMPTY_MAP as MutableMap<Char,Node>){ |
| 98 | + init{leaves.set(char.toInt())} |
| 99 | + override fun toString() = if(char.isLetterOrDigit())""+char else "\""+char+"\"" |
| 100 | + override fun equals(o:Any?) = if(o is Leaf) char == o.char else false |
| 101 | + override fun toGraphvizString(visited:MutableSet<Node>) = "" |
| 102 | + override fun toDeadTableString(visited:MutableSet<Node>) = "" |
| 103 | + override fun put(key:Char,value:Node):Node?=try{ |
| 104 | + super.put(key,value) |
| 105 | + }catch(e:UnsupportedOperationException){ |
| 106 | + throw IllegalStateException("$key -> $value attempted to overwrite $this",e) |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + |
| 111 | + |
| 112 | + companion object Root:Node('⎄'){ |
| 113 | + override fun toString() = "⎄" |
| 114 | + override fun toGraphvizString(visited:MutableSet<Node>) = "digraph{\n"+super.toGraphvizString(visited)+'}' |
| 115 | + override fun toDeadTableString(visited:MutableSet<Node>) = "DEADTABLE\n\n"+super.toDeadTableString(visited)+"ENDDEADTABLE" |
| 116 | + val leaves = BitSet(1+MAX_VALUE.toInt()) |
| 117 | + init{ |
| 118 | + leaves.set('⎄'.toInt()) |
| 119 | + } |
| 120 | + val CHARS=(0xFF.toChar()..0x33FF.toChar())+(0xA500.toChar()..0xABFF.toChar())+(0xF900.toChar()..0xFFFF.toChar()) |
| 121 | + val Any.hex:String get() = String.format("%04X",hashCode()) |
| 122 | + val Char.name get() = getName(toInt())?:"" |
| 123 | + val Char.nfd get() = java.text.Normalizer.normalize(""+this,java.text.Normalizer.Form.NFD) |
| 124 | + val Char.nfkd get() = java.text.Normalizer.normalize(""+this,java.text.Normalizer.Form.NFKD) |
| 125 | + operator infix fun Char.plus(c:Char) = this to c |
| 126 | + operator fun Char.unaryPlus() = this + this |
| 127 | + operator fun CharSequence.unaryPlus() = StringBuilder(this) |
| 128 | + fun CharSequence.isPrintableASCII():Boolean{ |
| 129 | + for(c in this) if(c <' ' || c >'~') return false |
| 130 | + return true |
| 131 | + } |
| 132 | + fun CharSequence.hasLetterOrDigit():Boolean{ |
| 133 | + for(c in this) if(c.isLetterOrDigit()) return true |
| 134 | + return false |
| 135 | + } |
| 136 | + fun CharSequence.isLettersAndDigits():Boolean{ |
| 137 | + for(c in this) if(!c.isLetterOrDigit()) return false |
| 138 | + return true |
| 139 | + } |
| 140 | + val CharSequence.sub get() = StringBuilder(this).apply{ |
| 141 | + for(i in 0 until length) this[i] = this[i].sub |
| 142 | + } |
| 143 | + val Char.sub get() = when(this){ |
| 144 | + '̈'->'"'//diaeresis |
| 145 | + '̄'->'-'//macron |
| 146 | + //breve |
| 147 | + '̨'->','//ogonek |
| 148 | + '́'->'\''//acute accent |
| 149 | + '̂'->'^'//circumflex accent |
| 150 | + '̇'->'*'//dot above |
| 151 | + //caron |
| 152 | + '̧'->','//cedilla |
| 153 | + '̃'->'~'//tilde |
| 154 | + '·'->'.'//middle dot |
| 155 | + '̋'->'\''//double acute |
| 156 | + '̊'->'0'//ring above |
| 157 | + //horn |
| 158 | + '̀'->'`'//grave |
| 159 | + '̏'->'`'//double grave |
| 160 | + //inverted breve |
| 161 | + //comma below |
| 162 | + |
| 163 | + //'̥'->'0'//ring below |
| 164 | + '̣'->'.'//dot below |
| 165 | + '̱'->'_'//macron below |
| 166 | + '̭'->'^'//circumflex accent below |
| 167 | + //'̰'->'~'//tilde below |
| 168 | + //breve below |
| 169 | + '̤'->':'//diaeresis below |
| 170 | + //hook above |
| 171 | + '̓'->','//comma above |
| 172 | + '̔'->','//reversed comma above |
| 173 | + |
| 174 | + '̳'->'='//double low line |
| 175 | + |
| 176 | + '̅'->'-'//overline |
| 177 | + '⁄'->'/'//fraction slash |
| 178 | + '̸'->'/'//long solidus overlay |
| 179 | + else->this |
| 180 | + } |
| 181 | + fun String.removeAffixes() = this |
| 182 | + .removeSuffix(" CROSS") |
| 183 | + .removeSuffix(" LATIN") |
| 184 | + .removeSuffix(" WHITE")//shadowed white latin cross |
| 185 | + .removeSuffix(" SYRIAC") |
| 186 | + .removeSuffix(" SOURCE") |
| 187 | + .removeSuffix(" CONSTANT") |
| 188 | + .removeSuffix(" ORNAMENT") |
| 189 | + .removeSuffix(" SYMBOL") |
| 190 | + .removeSuffix(" SIGN") |
| 191 | + .removeSuffix(" MARK") |
| 192 | + .removeSuffix(" ACCENT") |
| 193 | + .removeSuffix(" LINE")//property line, wavy line, centre line symbol |
| 194 | + .removeSuffix(" SUIT")//playing cards |
| 195 | + .removeSuffix(" CROSS") |
| 196 | + .removeSuffix(" COPRODUCT")//amalgamation or coproduct |
| 197 | + .removeSuffix(" PRODUCT")//wreath product |
| 198 | + .removeSuffix(" PROOF")//end of proof |
| 199 | + .removeSuffix(" ANGLE")//right angle |
| 200 | + .removeSuffix(" WAVE")//sine wave |
| 201 | + .removeSuffix(" OR")//amalgamation or coproduct |
| 202 | + .removeSuffix(" OF")//element of |
| 203 | + .removeSuffix(" TO")//identical to |
| 204 | + .removeSuffix(" NOTES") |
| 205 | + .removeSuffix(" NOTE") |
| 206 | + .removePrefix("CROSS ") |
| 207 | + |
| 208 | + .removePrefix("OF ") |
| 209 | + .removePrefix("BEAMED ") |
| 210 | + .removePrefix("MUSIC ") |
| 211 | + .removePrefix("SYMBOL ") |
| 212 | + .removePrefix("FOR ")//for all |
| 213 | + .removePrefix("THERE ")//there exists |
| 214 | + .removePrefix("APL ") |
| 215 | + .removePrefix("FUNCTIONAL ") |
| 216 | + .removePrefix("IDEOGRAPHIC ") |
| 217 | + .removePrefix("TELEGRAPH ") |
| 218 | + .removePrefix("LOGICAL ") |
| 219 | + .removePrefix("CHESS ") |
| 220 | + .removePrefix("DEGREE ")//celsius fahrenheit |
| 221 | + |
| 222 | + |
| 223 | + |
| 224 | + @JvmStatic fun main(vararg args:String){ |
| 225 | + for(c in CHARS-'Ȩ'-'ȩ'){//map accented characters |
| 226 | + val decomp = c.nfd |
| 227 | + if(decomp.length < 2) continue |
| 228 | + val sub = decomp.sub |
| 229 | + if(!sub.isPrintableASCII()||!sub.hasLetterOrDigit()) continue |
| 230 | + //if(sub.length < 3) println("$sub -> $c") else println(sub.reversed().toString()+" -> $c") |
| 231 | + if(sub.length < 3) +sub..c else{ |
| 232 | + //sub.reversed()..c |
| 233 | + ""+sub[1]+sub[2]+sub[0]..c//TODO: ADD SUPPORT FOR SWITCHING ACCENT ORDER |
| 234 | + } |
| 235 | + } |
| 236 | + for(c in "㍷㎜㎝㎞") c.nfkd+'1'..c |
| 237 | + for(c in CHARS-('Ⅰ'..'ⅿ')-('⒑'..'⒛')-"ffſt㍷㎜㎝㎞㏂…⩶︙︰".asIterable()){//map ligatures |
| 238 | + val s = c.nfkd |
| 239 | + if(s.length > 1 && s.isPrintableASCII()) s..c |
| 240 | + } |
| 241 | + "am"..'㏂' |
| 242 | + "ft"..'ſt' |
| 243 | + +"2f"..'ff' |
| 244 | + "SP"..'␠' |
| 245 | + "BEL"..'␇' |
| 246 | + "<3"..'❤' |
| 247 | + "3>"..'❥' |
| 248 | + "(:"..'☻' |
| 249 | + ":)"..'☺' |
| 250 | + ":("..'☹' |
| 251 | + "):"..'☹' |
| 252 | + |
| 253 | + "/0"..'∅' |
| 254 | + |
| 255 | + "+-"..'±' |
| 256 | + "-+"..'∓' |
| 257 | + +".+"..'∔' |
| 258 | + "/\\"..'∧' |
| 259 | + "\\/"..'∨' |
| 260 | + |
| 261 | + "||"..'∥' |
| 262 | + "/||"..'∦' |
| 263 | + //"::"..'∷' |
| 264 | + //"-."..'∸' |
| 265 | + //"-:"..'∹' |
| 266 | + ":-:"..'∺' |
| 267 | + //+":~"..'∻' |
| 268 | + |
| 269 | + "~~"..'≈' |
| 270 | + "/~~"..'≉' |
| 271 | + |
| 272 | + +".="..'≐' |
| 273 | + //"..="..'≑' |
| 274 | + |
| 275 | + ":="..'≔' |
| 276 | + "=:"..'≕' |
| 277 | + "=o"..'≖' |
| 278 | + "o="..'≗' |
| 279 | + |
| 280 | + +"*="..'≛' |
| 281 | + |
| 282 | + +"d="..'≝' |
| 283 | + +"m="..'≞' |
| 284 | + +"?="..'≟' |
| 285 | + +"/="..'≠' |
| 286 | + +"-="..'≡' |
| 287 | + "/-="..'≢' |
| 288 | + //"=="..'≣' |
| 289 | + "<-"..'≤' |
| 290 | + ">-"..'≥' |
| 291 | + "<="..'≦' |
| 292 | + ">="..'≧' |
| 293 | + "</="..'≨' |
| 294 | + ">/="..'≩' |
| 295 | + "<<"..'≪' |
| 296 | + ">>"..'≫' |
| 297 | + +")("..'≬' |
| 298 | + |
| 299 | + "/<"..'≮' |
| 300 | + "/>"..'≯' |
| 301 | + "/-<"..'≰' |
| 302 | + "/->"..'≱' |
| 303 | + +"<~"..'≲' |
| 304 | + +">~"..'≳' |
| 305 | + "/~<"..'≴' |
| 306 | + "/~>"..'≵' |
| 307 | + "<>"..'≶' |
| 308 | + "><"..'≷' |
| 309 | + |
| 310 | + "{-".."≼" |
| 311 | + "}-"..'≽' |
| 312 | + "{~"..'≾' |
| 313 | + "}~"..'≿' |
| 314 | + |
| 315 | + "0+"..'⊕' |
| 316 | + "0-"..'⊖' |
| 317 | + "0x"..'⊗';"0X"..'⊗' |
| 318 | + "0/"..'⊘' |
| 319 | + "0."..'⊙' |
| 320 | + "0o"..'⊚';"0O"..'⊚';"00"..'⊚' |
| 321 | + "0*"..'⊛' |
| 322 | + "0="..'⊜' |
| 323 | + "0_"..'⊝' |
| 324 | + "[+"..'⊞' |
| 325 | + "[-"..'⊟' |
| 326 | + "[x"..'⊠';"[X"..'⊠' |
| 327 | + "[."..'⊡' |
| 328 | + |
| 329 | + "<|"..'⊲' |
| 330 | + "|>"..'⊳' |
| 331 | + "=<|"..'⊴' |
| 332 | + "=|>"..'⊵' |
| 333 | + |
| 334 | + "|X"..'⋉';"|x"..'⋉' |
| 335 | + "X|"..'⋊';"x|"..'⋊' |
| 336 | + |
| 337 | + "=||"..'⋕' |
| 338 | + +"<."..'⋖' |
| 339 | + +".>"..'⋗' |
| 340 | + |
| 341 | + "-<"..'⋜' |
| 342 | + "->"..'⋝' |
| 343 | + |
| 344 | + "SUM"..+'∑' |
| 345 | + "PROD"..+'∏' |
| 346 | + "COPROD"..+'∐' |
| 347 | + "SQRT"..+'√' |
| 348 | + "CBRT"..+'∛' |
| 349 | + "FORT"..+'∜' |
| 350 | + "1S"..+'∫' |
| 351 | + "2S"..+'∬' |
| 352 | + "3S"..+'∭' |
| 353 | + "4S"..+'⨌' |
| 354 | + "1DS"..+'∮' |
| 355 | + "2DS"..+'∯' |
| 356 | + "3DS"..+'∰' |
| 357 | + "CS"..+'∱' |
| 358 | + "C1DS"..+'∲' |
| 359 | + "A1DS"..+'∳' |
| 360 | + |
| 361 | + "EIGHTH"..'♫'+'♪' |
| 362 | + "MAN"..'⛂'+'⛀' |
| 363 | + "MEN"..'⛃'+'⛁' |
| 364 | + "BULLET"..'•'+'◦' |
| 365 | + "BOWTIE"..'⧓'+'⋈' |
| 366 | + "LOZENGE"..'⧫'+'◊' |
| 367 | + "RHOMBUS"..'◆'+'◇' |
| 368 | + "SNOWMAN"..'⛇'+'☃' |
| 369 | + "SPARKLE"..'❈'+'❇' |
| 370 | + "SPARKLY"..+'✨' |
| 371 | + "TIME"..+'⌛' |
| 372 | + |
| 373 | + for(c in CHARS.asReversed()-'⅌'-'⍴'-'₠'-'∷'-'⏥')with(c.name.removeAffixes()){if(isNotEmpty()&&' ' !in this&&'-' !in this&&c.nfkd.length<=1)this..+c} |
| 374 | + |
| 375 | + for(b in CHARS-'◆'){ |
| 376 | + val name = b.name |
| 377 | + if(!name.startsWith("BLACK "))continue |
| 378 | + val bname = name.substring(6).removeAffixes() |
| 379 | + if(' ' in bname)continue |
| 380 | + for(w in CHARS-'◇'){ |
| 381 | + val name = w.name |
| 382 | + if(!name.startsWith("WHITE "))continue |
| 383 | + val wname = name.substring(6).removeAffixes() |
| 384 | + if(wname==bname){ |
| 385 | + wname..b+w |
| 386 | + break |
| 387 | + } |
| 388 | + } |
| 389 | + } |
| 390 | + recursivelySetIDs() |
| 391 | + print(toGraphvizString()) |
| 392 | + //print(toDeadTableString()) |
| 393 | + } |
| 394 | + } |
| 395 | +} |
| 396 | +//fun main(vararg args:String){Node.Root.main(*args)} |
0 commit comments