Skip to content

Commit 9063a06

Browse files
committed
Add optional interpolation search mode to Unicode Explorer
Adds a checkbox (unchecked by default) that switches Phase 2 from standard binary search to interpolation search. When enabled, the algorithm estimates the target's position proportionally between the known boundary codepoints rather than always picking the midpoint, often reaching the result in fewer HTTP fetches. Includes a linked footnote explaining the technique. https://claude.ai/code/session_01JAGszwm5ArgDnxaeRkQhs5
1 parent 698a281 commit 9063a06

File tree

1 file changed

+68
-2
lines changed

1 file changed

+68
-2
lines changed

unicode-binary-search.html

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,47 @@
268268

269269
.loading-msg.visible { display: block; }
270270

271+
/* Options row */
272+
.options-row {
273+
display: flex;
274+
align-items: center;
275+
gap: 0.5rem;
276+
margin-bottom: 1.5rem;
277+
font-size: 0.875rem;
278+
color: var(--text-secondary);
279+
}
280+
281+
.options-row label {
282+
display: flex;
283+
align-items: center;
284+
gap: 0.4rem;
285+
cursor: pointer;
286+
}
287+
288+
.options-row input[type="checkbox"] {
289+
accent-color: var(--accent);
290+
}
291+
292+
.options-row a {
293+
color: var(--accent);
294+
text-decoration: none;
295+
}
296+
297+
.options-row a:hover { text-decoration: underline; }
298+
299+
/* Footnotes */
300+
.footnotes {
301+
margin-top: 1.5rem;
302+
padding-top: 1rem;
303+
border-top: 1px solid var(--border);
304+
font-size: 0.8rem;
305+
color: var(--text-muted);
306+
line-height: 1.6;
307+
}
308+
309+
.footnotes a { color: var(--accent); text-decoration: none; }
310+
.footnotes a:hover { text-decoration: underline; }
311+
271312
@media (max-width: 600px) {
272313
.container { padding: 1rem 0.75rem; }
273314
.search-row { flex-direction: column; }
@@ -291,6 +332,10 @@ <h1>Unicode Explorer</h1>
291332
<button id="searchBtn" class="search-btn">Search</button>
292333
</div>
293334

335+
<div class="options-row">
336+
<label><input type="checkbox" id="interpolationCheck"> Use interpolation search<sup><a href="#fn-interpolation">1</a></sup></label>
337+
</div>
338+
294339
<div id="resultCard" class="result-card">
295340
<div id="resultChar" class="result-char"></div>
296341
<div class="result-info">
@@ -336,6 +381,10 @@ <h1>Unicode Explorer</h1>
336381
<div id="loadingMsg" class="loading-msg">Loading metadata&hellip;</div>
337382
<div id="summary" class="summary"></div>
338383
</div>
384+
385+
<div class="footnotes">
386+
<p id="fn-interpolation"><sup>1</sup> Standard binary search always picks the middle record, ignoring the actual values. Interpolation search makes a smarter guess based on where the target codepoint falls proportionally between the known boundary values &mdash; like opening a phone book near the back for &ldquo;W&rdquo; rather than always opening to the middle. Within each signpost interval the codepoints are roughly evenly distributed, so the first guess is often very close, reducing the number of HTTP fetches needed.</p>
387+
</div>
339388
</div>
340389

341390
<script>
@@ -360,6 +409,7 @@ <h1>Unicode Explorer</h1>
360409
const emptyState = document.getElementById('emptyState');
361410
const loadingMsg = document.getElementById('loadingMsg');
362411
const summaryEl = document.getElementById('summary');
412+
const interpolationCheck = document.getElementById('interpolationCheck');
363413

364414
// Category full names
365415
const CATEGORIES = {
@@ -493,8 +543,11 @@ <h1>Unicode Explorer</h1>
493543
clearResults();
494544

495545
const { recordWidth, totalRecords, signposts, totalBytes } = meta;
546+
const useInterpolation = interpolationCheck.checked;
496547
let lo = 0;
497548
let hi = totalRecords - 1;
549+
let loCP = 0;
550+
let hiCP = 0x10FFFF;
498551
let step = 0;
499552
let fetchCount = 0;
500553
let bytesTransferred = 0;
@@ -534,17 +587,28 @@ <h1>Unicode Explorer</h1>
534587

535588
if (sp.cp < targetCP) {
536589
lo = sp.idx;
590+
loCP = sp.cp;
537591
} else {
538592
hi = sp.idx;
593+
hiCP = sp.cp;
539594
break; // signposts are sorted, no need to continue
540595
}
541596
}
542597

543-
// Phase 2: Binary search via Range requests
598+
// Phase 2: Binary search (or interpolation search) via Range requests
544599
let found = false;
545600
while (lo <= hi) {
546601
step++;
547-
const mid = Math.floor((lo + hi) / 2);
602+
let mid;
603+
if (useInterpolation && hiCP > loCP) {
604+
// Estimate position proportionally between known boundary codepoints
605+
const fraction = (targetCP - loCP) / (hiCP - loCP);
606+
mid = lo + Math.floor(fraction * (hi - lo));
607+
// Clamp to valid range
608+
mid = Math.max(lo, Math.min(hi, mid));
609+
} else {
610+
mid = Math.floor((lo + hi) / 2);
611+
}
548612
const byteStart = mid * recordWidth;
549613
const byteEnd = byteStart + recordWidth - 1;
550614
const rangeHeader = 'bytes=' + byteStart + '-' + byteEnd;
@@ -568,8 +632,10 @@ <h1>Unicode Explorer</h1>
568632

569633
if (record.cp < targetCP) {
570634
lo = mid + 1;
635+
loCP = record.cp;
571636
} else {
572637
hi = mid - 1;
638+
hiCP = record.cp;
573639
}
574640
}
575641

0 commit comments

Comments
 (0)