Add color coding for flashcard suitability (#119)

mreichhoff · web-flow · commit 9c538ed5a41a · 2025-11-16T11:37:46.000-05:00
This change adds a heuristic for determining how good a flashcard a
given sentence is likely to be. Sentences where:
* most of the words are already in your flashcard set
* those that aren't already being studied are high-frequency words
are usually better flashcards in that they are closer to comprehensible
input and the new words are high value. This heuristic can be refined
over time.

Another TODO is to let users define their own priority based on frequency
ranks. For now, the range 0 - 10k is used. One can also imagine a different
graph color coding mechanism that emphasizes high priority words.
diff --git a/public/css/hanzi-graph.css b/public/css/hanzi-graph.css
@@ -711,6 +711,21 @@ https://developer.mozilla.org/en-US/docs/Web/API/File_API/Using_files_from_web_a
     user-select: none;
 }
 
+.suitability-max {
+    background-color: #00ab3f;
+    color: black;
+}
+
+.suitability-high {
+    background-color: #90D5FF;
+    color: black;
+}
+
+.suitability-medium {
+    background-color: #ffc300;
+    color: black;
+}
+
 .definition .tags {
     margin-top: 8px;
 }
diff --git a/public/js/modules/data-layer.js b/public/js/modules/data-layer.js
@@ -232,15 +232,14 @@ let findOtherCards = function (seeking, currentKey) {
     return candidates;
 };
 
-function countWordsWithoutCards(tokenizedSentence) {
-    let count = 0;
-    const uniqueWords = new Set(tokenizedSentence);
-    for (const word of uniqueWords) {
+function getWordsWithoutCards(tokenizedSentence) {
+    const uniqueWords = new Set();
+    for (const word of tokenizedSentence) {
         if (!studyListWords.has(word)) {
-            count++;
+            uniqueWords.add(word);
         }
     }
-    return count;
+    return uniqueWords;
 }
 
 function initVocabSets() {
@@ -646,4 +645,4 @@ async function analyzeImage(base64ImageContents) {
     return result;
 }
 
-export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, countWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }
+export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, getWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }
diff --git a/public/js/modules/explore.js b/public/js/modules/explore.js
@@ -1,4 +1,4 @@
-import { writeExploreState, addCard, inStudyList, isFlashCardUser, explainChineseSentence, generateChineseSentences, isAiEligible, countWordsWithoutCards, hasCardWithWord, registerCallback, dataTypes } from "./data-layer.js";
+import { writeExploreState, addCard, inStudyList, isFlashCardUser, explainChineseSentence, generateChineseSentences, isAiEligible, getWordsWithoutCards, hasCardWithWord, registerCallback, dataTypes } from "./data-layer.js";
 import { hanziBox, notFoundElement, walkThrough, examplesList, createLoadingDots } from "./dom.js";
 import { getActiveGraph, getPartition } from "./options.js";
 import { renderCoverageGraph } from "./coverage-graph"
@@ -544,23 +544,58 @@ let setupExampleElements = function (word, examples, exampleList, defaultSource)
         // if the user doesn't use HanziGraph for flashcards, render nothing.
         // otherwise, let them know how many new words are in the sentence to aid the choice of whether to make a flashcard
         const words = examples[i].zh.filter(x => x in wordSet);
-        const unknownWordCount = countWordsWithoutCards(words);
+        const unknownWords = getWordsWithoutCards(words);
         const unknownWordTag = document.createElement('span');
-        unknownWordTag.innerHTML = getUnknownWordHtml(unknownWordCount);
+        unknownWordTag.innerHTML = getUnknownWordHtml(unknownWords);
         unknownWordTag.classList.add('tag', 'nowrap');
         missingWordElements.push({ unknownWordTag, words });
         tagContainer.appendChild(unknownWordTag);
         exampleHolder.appendChild(tagContainer);
         exampleList.appendChild(exampleHolder);
     }
 };
-function getUnknownWordHtml(unknownWordCount) {
+function getSuitabilityClass(unknownWordSet) {
+    // TODO: make these rank cutoffs configurable, and set up a priority color coding graph style
+    const minFreqRank = 0;
+    const maxFreqRank = 10000;
+    const totalUnknownWords = unknownWordSet.size;
+    let highPriorityWordCount = 0;
+    for (const unknownWord of unknownWordSet) {
+        // we assume upstream filtering of words being in the wordset before added to unknownWordSet
+        const rank = wordSet[unknownWord];
+        // ranks in wordSet start from 1 for ease of rendering (i.e., 1st most common instead of 0th)
+        // so check min is exclusive, max is inclusive
+        if (rank <= maxFreqRank && rank > minFreqRank) {
+            highPriorityWordCount++;
+        }
+    }
+    // a heuristic approach to determining how suitable a sentence is to be made into a flashcard
+    // the idea is that one would want no more than 3 unknown words in a sentence at a time
+    // and the number of those that are 'high priority' must be nonzero.
+    // this should also probably include a minimum percentage for totalUnknownWords / allWords
+    // but for now just use minimum counts
+    if (totalUnknownWords <= 3 && highPriorityWordCount > 0) {
+        // every unknown word is high priority...great flash card
+        if (highPriorityWordCount === totalUnknownWords) {
+            return 'max';
+        }
+        // all but one of the unknown words are high priority
+        if (highPriorityWordCount == (totalUnknownWords - 1)) {
+            return 'high';
+        }
+        // at least one unknown word is high priority
+        return 'medium';
+    }
+    return 'low';
+}
+function getUnknownWordHtml(unknownWords) {
+    const unknownWordCount = unknownWords.size;
     if (!isFlashCardUser()) {
         return '';
     }
     return unknownWordCount === 0 ?
         `<span class="deemphasized">✅ No unknown words</span>` :
-        `<span class="deemphasized">No flashcards: <b>${unknownWordCount} word${unknownWordCount !== 1 ? 's' : ''}</b></span>`;
+        `<span class="deemphasized">No flashcards: <b class="${`suitability-${getSuitabilityClass(unknownWords)}`}">${unknownWordCount} word${unknownWordCount !== 1 ? 's' : ''}</b></span>`;
 }
 
 // expects callers to ensure augmentation is available
@@ -1178,8 +1213,8 @@ let initialize = function () {
     fetchStats();
     registerCallback(dataTypes.studyList, function () {
         for (const item of missingWordElements) {
-            const unknownWordCount = countWordsWithoutCards(item.words);
-            item.unknownWordTag.innerHTML = getUnknownWordHtml(unknownWordCount);
+            const unknownWords = getWordsWithoutCards(item.words);
+            item.unknownWordTag.innerHTML = getUnknownWordHtml(unknownWords);
         }
         for (const item of hasCardsElements) {
             item.cardTag.innerHTML = addFlashCardDefinitionTag(item.word);

Original file line number	Diff line number	Diff line change
`@@ -232,15 +232,14 @@ let findOtherCards = function (seeking, currentKey) {`
`232`	`232`	`return candidates;`
`233`	`233`	`};`
`234`	`234`
`235`		`-function countWordsWithoutCards(tokenizedSentence) {`
`236`		`- let count = 0;`
`237`		`- const uniqueWords = new Set(tokenizedSentence);`
`238`		`- for (const word of uniqueWords) {`
	`235`	`+function getWordsWithoutCards(tokenizedSentence) {`
	`236`	`+ const uniqueWords = new Set();`
	`237`	`+ for (const word of tokenizedSentence) {`
`239`	`238`	`if (!studyListWords.has(word)) {`
`240`		`- count++;`
	`239`	`+ uniqueWords.add(word);`
`241`	`240`	`}`
`242`	`241`	`}`
`243`		`- return count;`
	`242`	`+ return uniqueWords;`
`244`	`243`	`}`
`245`	`244`
`246`	`245`	`function initVocabSets() {`
`@@ -646,4 +645,4 @@ async function analyzeImage(base64ImageContents) {`
`646`	`645`	`return result;`
`647`	`646`	`}`
`648`	`647`
`649`		`-export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, countWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }`
	`648`	`+export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, getWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }`