Skip to content

Commit 65dcdec

Browse files
committed
fix: redo fuzzy searcha again with more improvements
1 parent 9b0699e commit 65dcdec

File tree

5 files changed

+150
-102
lines changed

5 files changed

+150
-102
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ import { search, getItemById } from '@castdrian/kdapi';
4444
const results = search('stayc', { // Supports Korean, Japanese, Chinese characters
4545
type: 'all', // 'idol' | 'group' | 'all'
4646
limit: 10, // Max number of results
47-
threshold: 0.4 // Match quality threshold (0-1)
47+
threshold: 0.3 // Match quality threshold (0-1)
4848
});
4949

5050
// The search is performed across multiple fields:

bun.lockb

-44 Bytes
Binary file not shown.

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@castdrian/kdapi",
3-
"version": "0.3.2",
3+
"version": "0.3.3",
44
"type": "module",
55
"description": "kpop idol and group profiles dataset generator",
66
"main": "dist/index.js",
@@ -32,10 +32,10 @@
3232
},
3333
"dependencies": {
3434
"@biomejs/biome": "^1.9.4",
35-
"@m31coding/fuzzy-search": "^1.0.2",
3635
"cheerio": "^1.0.0",
3736
"commander": "^13.1.0",
3837
"date-fns": "^4.1.0",
38+
"fuse.js": "^7.1.0",
3939
"undici": "^7.8.0",
4040
"uuid": "^11.1.0",
4141
"zod": "^3.22.4"

src/index.ts

Lines changed: 139 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { Idol, Group, DataSet, GroupsData, IdolsData } from "@src/types";
22
import groups from "@root/data/groups.json" assert { type: "json" };
33
import idols from "@root/data/idols.json" assert { type: "json" };
4-
import * as fuzzySearchLib from "@m31coding/fuzzy-search";
4+
import Fuse, { type IFuseOptions } from "fuse.js";
55

66
const dataset: DataSet = {
77
femaleIdols: (idols as IdolsData).femaleIdols,
@@ -11,61 +11,52 @@ const dataset: DataSet = {
1111
coedGroups: (groups as GroupsData).coedGroups,
1212
};
1313

14-
// Configure searcher to handle all character types (for Korean, Japanese, Chinese characters)
15-
const config = fuzzySearchLib.Config.createDefaultConfig();
16-
config.normalizerConfig.allowCharacter = (_c: string) => true;
17-
18-
// Create separate searchers for idols and groups for better performance
19-
const idolSearcher = fuzzySearchLib.SearcherFactory.createSearcher<
20-
Idol,
21-
string
22-
>(config);
23-
const groupSearcher = fuzzySearchLib.SearcherFactory.createSearcher<
24-
Group,
25-
string
26-
>(config);
27-
28-
// Index the idols and groups
29-
idolSearcher.indexEntities(
30-
[...dataset.femaleIdols, ...dataset.maleIdols],
31-
(entity) => entity.id,
32-
(entity) =>
33-
[
34-
entity.names.stage,
35-
entity.names.full,
36-
entity.names.native,
37-
entity.names.korean,
38-
entity.names.japanese,
39-
entity.names.chinese,
40-
// Add group names to search terms
41-
...(entity.groups?.flatMap((g) => [
42-
g.name, // Group name by itself
43-
`${entity.names.stage} ${g.name}`, // "Name Group"
44-
`${g.name} ${entity.names.stage}`, // "Group Name"
45-
]) || []),
46-
].filter((name): name is string => name !== null),
47-
);
14+
const groupSearchOptions: IFuseOptions<Group> = {
15+
keys: [
16+
{ name: "groupInfo.names.stage", weight: 2 },
17+
{ name: "groupInfo.names.korean", weight: 2 },
18+
{ name: "groupInfo.names.japanese", weight: 1.5 },
19+
{ name: "groupInfo.names.chinese", weight: 1.5 },
20+
{ name: "groupInfo.fandomName", weight: 0.7 },
21+
{ name: "company.current", weight: 0.3 },
22+
{ name: "memberHistory.currentMembers.name", weight: 1 },
23+
],
24+
includeScore: true,
25+
threshold: 0.3,
26+
ignoreLocation: true,
27+
minMatchCharLength: 2,
28+
};
29+
30+
const idolSearchOptions: IFuseOptions<Idol> = {
31+
keys: [
32+
{ name: "names.stage", weight: 2 },
33+
{ name: "names.full", weight: 2 },
34+
{ name: "names.native", weight: 2 },
35+
{ name: "names.korean", weight: 2 },
36+
{ name: "names.japanese", weight: 1.5 },
37+
{ name: "names.chinese", weight: 1.5 },
38+
{
39+
name: "groups.name",
40+
weight: 1,
41+
},
42+
],
43+
includeScore: true,
44+
threshold: 0.3,
45+
ignoreLocation: true,
46+
minMatchCharLength: 2,
47+
};
4848

49-
groupSearcher.indexEntities(
49+
// Initialize searchers
50+
const groupSearcher = new Fuse(
5051
[...dataset.girlGroups, ...dataset.boyGroups, ...dataset.coedGroups],
51-
(entity) => entity.id,
52-
(entity) =>
53-
[
54-
entity.groupInfo?.names?.stage,
55-
entity.groupInfo?.names?.korean,
56-
entity.groupInfo?.names?.japanese,
57-
entity.groupInfo?.names?.chinese,
58-
entity.groupInfo?.fandomName,
59-
entity.company?.current,
60-
...(entity.company?.history ?? []).map((h) => h.name),
61-
...(entity.memberHistory?.currentMembers ?? []).map((m) => m.name),
62-
...(entity.memberHistory?.formerMembers ?? []).map((m) => m.name),
63-
].filter((name): name is string => name !== null),
52+
groupSearchOptions,
53+
);
54+
55+
const idolSearcher = new Fuse(
56+
[...dataset.femaleIdols, ...dataset.maleIdols],
57+
idolSearchOptions,
6458
);
6559

66-
/**
67-
* Search across both idols and groups with improved accuracy
68-
*/
6960
export function search(
7061
query: string,
7162
options: {
@@ -74,47 +65,118 @@ export function search(
7465
threshold?: number;
7566
} = {},
7667
) {
77-
const { type = "all", limit = 10, threshold = 0.4 } = options;
68+
const { type = "all", limit = 10, threshold } = options;
7869
const results: { item: Idol | Group; type: "idol" | "group" }[] = [];
7970

80-
const searchQuery = new fuzzySearchLib.Query(query, limit, threshold);
71+
// Split query into words for better matching
72+
const words = query.toLowerCase().trim().split(/\s+/);
73+
const hasMultipleWords = words.length > 1;
8174

82-
if (type === "all" || type === "idol") {
83-
const idolResults = idolSearcher.getMatches(searchQuery);
84-
results.push(
85-
...idolResults.matches.map((match) => ({
86-
item: match.entity,
87-
type: "idol" as const,
88-
})),
89-
);
90-
}
75+
// If threshold is provided, create new searchers with updated options
76+
const groupSearcherInstance =
77+
threshold !== undefined
78+
? new Fuse(
79+
[...dataset.girlGroups, ...dataset.boyGroups, ...dataset.coedGroups],
80+
{
81+
...groupSearchOptions,
82+
threshold,
83+
},
84+
)
85+
: groupSearcher;
9186

92-
if (type === "all" || type === "group") {
93-
const groupResults = groupSearcher.getMatches(searchQuery);
94-
results.push(
95-
...groupResults.matches.map((match) => ({
96-
item: match.entity,
97-
type: "group" as const,
98-
})),
99-
);
87+
const idolSearcherInstance =
88+
threshold !== undefined
89+
? new Fuse([...dataset.femaleIdols, ...dataset.maleIdols], {
90+
...idolSearchOptions,
91+
threshold,
92+
})
93+
: idolSearcher;
94+
95+
if (hasMultipleWords) {
96+
const [firstWord, ...restWords] = words;
97+
if (!firstWord) return [];
98+
const restWordsStr = restWords.join(" ");
99+
100+
// Search for idols matching the first word
101+
const potentialIdols = idolSearcherInstance.search(firstWord);
102+
103+
// Add matches where idol belongs to the specified group
104+
for (const idolResult of potentialIdols) {
105+
const idol = idolResult.item;
106+
if (idol.groups?.some((g) => g.name.toLowerCase() === restWordsStr)) {
107+
results.push({
108+
item: idol,
109+
type: "idol",
110+
});
111+
}
112+
}
113+
114+
// Try reverse order (group first, then idol name)
115+
const reversePotentialIdols = idolSearcherInstance.search(restWordsStr);
116+
for (const idolResult of reversePotentialIdols) {
117+
const idol = idolResult.item;
118+
if (idol.groups?.some((g) => g.name.toLowerCase() === firstWord)) {
119+
results.push({
120+
item: idol,
121+
type: "idol",
122+
});
123+
}
124+
}
125+
126+
// If no exact matches found, fall back to fuzzy search
127+
if (results.length === 0) {
128+
for (const idolResult of potentialIdols) {
129+
const idol = idolResult.item;
130+
if (
131+
idol.groups?.some((g) => g.name.toLowerCase().includes(restWordsStr))
132+
) {
133+
results.push({
134+
item: idol,
135+
type: "idol",
136+
});
137+
}
138+
}
139+
}
140+
} else {
141+
if (type === "all" || type === "idol") {
142+
const idolResults = idolSearcherInstance.search(query);
143+
results.push(
144+
...idolResults.map((result) => ({
145+
item: result.item,
146+
type: "idol" as const,
147+
})),
148+
);
149+
}
150+
151+
if (type === "all" || type === "group") {
152+
const groupResults = groupSearcherInstance.search(query);
153+
results.push(
154+
...groupResults.map((result) => ({
155+
item: result.item,
156+
type: "group" as const,
157+
})),
158+
);
159+
}
100160
}
101161

102-
return results;
162+
// Remove duplicates
163+
const uniqueResults = results.filter(
164+
(result, index, self) =>
165+
index === self.findIndex((r) => r.item.id === result.item.id),
166+
);
167+
168+
return uniqueResults.slice(0, limit);
103169
}
104170

105-
/**
106-
* Get a specific idol or group by ID
107-
*/
171+
export type { Idol, Group };
108172
export function getItemById(
109173
id: string,
110174
): { item: Idol | Group; type: "idol" | "group" } | null {
111-
// Search idols
112175
const idol = [...dataset.femaleIdols, ...dataset.maleIdols].find(
113176
(i) => i.id === id,
114177
);
115178
if (idol) return { item: idol, type: "idol" };
116179

117-
// Search groups
118180
const group = [
119181
...dataset.girlGroups,
120182
...dataset.boyGroups,
@@ -124,17 +186,3 @@ export function getItemById(
124186

125187
return null;
126188
}
127-
128-
// Export types for external use
129-
export type {
130-
Idol,
131-
DataSet,
132-
GroupType,
133-
Company,
134-
SocialMedia,
135-
GroupNames,
136-
GroupInfo,
137-
MemberHistory,
138-
Group,
139-
GroupsData,
140-
} from "@src/types";

test/search.test.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@ import { describe, expect, test } from "bun:test";
22
import { type Idol, type Group, search } from "@src/index";
33

44
describe("Fuzzy Search", () => {
5-
const testJapaneseName = "ステイシー";
6-
75
test("should handle null company data", () => {
86
const results = search("company");
97
expect(results).toBeDefined();
10-
// Should not throw any errors
118
});
129

1310
test("should search by idol name", () => {
1411
const results = search("sumin");
12+
console.log("Results for 'sumin':", JSON.stringify(results, null, 2));
1513
expect(results.length).toBeGreaterThan(0);
1614
expect(results.some((r) => r.type === "idol")).toBe(true);
1715
});
@@ -23,7 +21,9 @@ describe("Fuzzy Search", () => {
2321
results.some(
2422
(r) =>
2523
r.type === "idol" &&
26-
(r.item as Idol).groups?.some((g) => g.name.toLowerCase().includes("fromis")),
24+
(r.item as Idol).groups?.some((g) =>
25+
g.name.toLowerCase().includes("stayc"),
26+
),
2727
),
2828
).toBe(true);
2929
});
@@ -40,15 +40,15 @@ describe("Fuzzy Search", () => {
4040
});
4141

4242
test("should handle Japanese characters", () => {
43-
const results = search(testJapaneseName);
43+
const results = search("ステイシー");
4444
expect(results.length).toBeGreaterThan(0);
4545
expect(
4646
results.some(
4747
(r) =>
4848
r.type === "group" &&
4949
(r.item as Group).groupInfo?.names?.stage
5050
?.toLowerCase()
51-
.includes("twice"),
51+
.includes("stayc"),
5252
),
5353
).toBe(true);
5454
});
@@ -59,10 +59,10 @@ describe("Fuzzy Search", () => {
5959
});
6060

6161
test("should filter by type", () => {
62-
const idolResults = search("jin", { type: "idol" });
62+
const idolResults = search("sumin", { type: "idol" });
6363
expect(idolResults.every((r) => r.type === "idol")).toBe(true);
6464

65-
const groupResults = search("bts", { type: "group" });
65+
const groupResults = search("stayc", { type: "group" });
6666
expect(groupResults.every((r) => r.type === "group")).toBe(true);
6767
});
6868
});

0 commit comments

Comments
 (0)