Skip to content

Commit 9c538ed

Browse files
authored
Add color coding for flashcard suitability (#119)
This change adds a heuristic for determining how good a flashcard a given sentence is likely to be. Sentences where: * most of the words are already in your flashcard set * those that aren't already being studied are high-frequency words are usually better flashcards in that they are closer to comprehensible input and the new words are high value. This heuristic can be refined over time. Another TODO is to let users define their own priority based on frequency ranks. For now, the range 0 - 10k is used. One can also imagine a different graph color coding mechanism that emphasizes high priority words.
1 parent 047cf7c commit 9c538ed

File tree

3 files changed

+63
-14
lines changed

3 files changed

+63
-14
lines changed

public/css/hanzi-graph.css

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,21 @@ https://developer.mozilla.org/en-US/docs/Web/API/File_API/Using_files_from_web_a
711711
user-select: none;
712712
}
713713

714+
.suitability-max {
715+
background-color: #00ab3f;
716+
color: black;
717+
}
718+
719+
.suitability-high {
720+
background-color: #90D5FF;
721+
color: black;
722+
}
723+
724+
.suitability-medium {
725+
background-color: #ffc300;
726+
color: black;
727+
}
728+
714729
.definition .tags {
715730
margin-top: 8px;
716731
}

public/js/modules/data-layer.js

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,15 +232,14 @@ let findOtherCards = function (seeking, currentKey) {
232232
return candidates;
233233
};
234234

235-
function countWordsWithoutCards(tokenizedSentence) {
236-
let count = 0;
237-
const uniqueWords = new Set(tokenizedSentence);
238-
for (const word of uniqueWords) {
235+
function getWordsWithoutCards(tokenizedSentence) {
236+
const uniqueWords = new Set();
237+
for (const word of tokenizedSentence) {
239238
if (!studyListWords.has(word)) {
240-
count++;
239+
uniqueWords.add(word);
241240
}
242241
}
243-
return count;
242+
return uniqueWords;
244243
}
245244

246245
function initVocabSets() {
@@ -646,4 +645,4 @@ async function analyzeImage(base64ImageContents) {
646645
return result;
647646
}
648647

649-
export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, countWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }
648+
export { writeExploreState, readExploreState, writeOptionState, readOptionState, registerCallback, saveStudyList, addCard, inStudyList, getWordsWithoutCards, getStudyList, isFlashCardUser, removeFromStudyList, findOtherCards, updateCard, recordEvent, getStudyResults, explainChineseSentence, translateEnglish, analyzeImage, generateChineseSentences, analyzeCollocation, isAiEligible, hasCardWithWord, initialize, studyResult, dataTypes, cardTypes }

public/js/modules/explore.js

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { writeExploreState, addCard, inStudyList, isFlashCardUser, explainChineseSentence, generateChineseSentences, isAiEligible, countWordsWithoutCards, hasCardWithWord, registerCallback, dataTypes } from "./data-layer.js";
1+
import { writeExploreState, addCard, inStudyList, isFlashCardUser, explainChineseSentence, generateChineseSentences, isAiEligible, getWordsWithoutCards, hasCardWithWord, registerCallback, dataTypes } from "./data-layer.js";
22
import { hanziBox, notFoundElement, walkThrough, examplesList, createLoadingDots } from "./dom.js";
33
import { getActiveGraph, getPartition } from "./options.js";
44
import { renderCoverageGraph } from "./coverage-graph"
@@ -544,23 +544,58 @@ let setupExampleElements = function (word, examples, exampleList, defaultSource)
544544
// if the user doesn't use HanziGraph for flashcards, render nothing.
545545
// otherwise, let them know how many new words are in the sentence to aid the choice of whether to make a flashcard
546546
const words = examples[i].zh.filter(x => x in wordSet);
547-
const unknownWordCount = countWordsWithoutCards(words);
547+
const unknownWords = getWordsWithoutCards(words);
548548
const unknownWordTag = document.createElement('span');
549-
unknownWordTag.innerHTML = getUnknownWordHtml(unknownWordCount);
549+
unknownWordTag.innerHTML = getUnknownWordHtml(unknownWords);
550550
unknownWordTag.classList.add('tag', 'nowrap');
551551
missingWordElements.push({ unknownWordTag, words });
552552
tagContainer.appendChild(unknownWordTag);
553553
exampleHolder.appendChild(tagContainer);
554554
exampleList.appendChild(exampleHolder);
555555
}
556556
};
557-
function getUnknownWordHtml(unknownWordCount) {
557+
function getSuitabilityClass(unknownWordSet) {
558+
// TODO: make these rank cutoffs configurable, and set up a priority color coding graph style
559+
const minFreqRank = 0;
560+
const maxFreqRank = 10000;
561+
const totalUnknownWords = unknownWordSet.size;
562+
let highPriorityWordCount = 0;
563+
for (const unknownWord of unknownWordSet) {
564+
// we assume upstream filtering of words being in the wordset before added to unknownWordSet
565+
const rank = wordSet[unknownWord];
566+
// ranks in wordSet start from 1 for ease of rendering (i.e., 1st most common instead of 0th)
567+
// so check min is exclusive, max is inclusive
568+
if (rank <= maxFreqRank && rank > minFreqRank) {
569+
highPriorityWordCount++;
570+
}
571+
}
572+
// a heuristic approach to determining how suitable a sentence is to be made into a flashcard
573+
// the idea is that one would want no more than 3 unknown words in a sentence at a time
574+
// and the number of those that are 'high priority' must be nonzero.
575+
// this should also probably include a minimum percentage for totalUnknownWords / allWords
576+
// but for now just use minimum counts
577+
if (totalUnknownWords <= 3 && highPriorityWordCount > 0) {
578+
// every unknown word is high priority...great flash card
579+
if (highPriorityWordCount === totalUnknownWords) {
580+
return 'max';
581+
}
582+
// all but one of the unknown words are high priority
583+
if (highPriorityWordCount == (totalUnknownWords - 1)) {
584+
return 'high';
585+
}
586+
// at least one unknown word is high priority
587+
return 'medium';
588+
}
589+
return 'low';
590+
}
591+
function getUnknownWordHtml(unknownWords) {
592+
const unknownWordCount = unknownWords.size;
558593
if (!isFlashCardUser()) {
559594
return '';
560595
}
561596
return unknownWordCount === 0 ?
562597
`<span class="deemphasized">✅ No unknown words</span>` :
563-
`<span class="deemphasized">No flashcards: <b>${unknownWordCount} word${unknownWordCount !== 1 ? 's' : ''}</b></span>`;
598+
`<span class="deemphasized">No flashcards: <b class="${`suitability-${getSuitabilityClass(unknownWords)}`}">${unknownWordCount} word${unknownWordCount !== 1 ? 's' : ''}</b></span>`;
564599
}
565600

566601
// expects callers to ensure augmentation is available
@@ -1178,8 +1213,8 @@ let initialize = function () {
11781213
fetchStats();
11791214
registerCallback(dataTypes.studyList, function () {
11801215
for (const item of missingWordElements) {
1181-
const unknownWordCount = countWordsWithoutCards(item.words);
1182-
item.unknownWordTag.innerHTML = getUnknownWordHtml(unknownWordCount);
1216+
const unknownWords = getWordsWithoutCards(item.words);
1217+
item.unknownWordTag.innerHTML = getUnknownWordHtml(unknownWords);
11831218
}
11841219
for (const item of hasCardsElements) {
11851220
item.cardTag.innerHTML = addFlashCardDefinitionTag(item.word);

0 commit comments

Comments
 (0)