Skip to content

Commit 1095cae

Browse files
authored
Migrate MongoDB GUIDs from subtype 3 to 4 (#4179)
1 parent 1dff940 commit 1095cae

File tree

8 files changed

+240
-4
lines changed

8 files changed

+240
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ scripts/*.js
3535
!scripts/frontendScripts.js
3636
!scripts/jestTest.js
3737
!scripts/setupMongo.js
38+
database/*.js
3839
*.log
3940
*-debug.log*
4041
*-error.log*

Backend/Models/Sense.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class Sense
1616
/// </summary>
1717
[Required]
1818
[BsonElement("guid")]
19-
[BsonGuidRepresentation(GuidRepresentation.CSharpLegacy)]
19+
[BsonGuidRepresentation(GuidRepresentation.Standard)]
2020
#pragma warning disable CA1720
2121
public Guid Guid { get; set; }
2222
#pragma warning restore CA1720

Backend/Models/UserEdit.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public UserEdit Clone()
3535
public class UserEditStepWrapper
3636
{
3737
[Required]
38-
[BsonGuidRepresentation(GuidRepresentation.CSharpLegacy)]
38+
[BsonGuidRepresentation(GuidRepresentation.Standard)]
3939
public Guid EditGuid { get; set; }
4040

4141
[Required]
@@ -57,7 +57,7 @@ public class Edit
5757
{
5858
[Required]
5959
[BsonElement("guid")]
60-
[BsonGuidRepresentation(GuidRepresentation.CSharpLegacy)]
60+
[BsonGuidRepresentation(GuidRepresentation.Standard)]
6161
#pragma warning disable CA1720
6262
public Guid Guid { get; set; } = Guid.NewGuid();
6363
#pragma warning restore CA1720

Backend/Models/Word.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public class Word
2020
/// </summary>
2121
[Required]
2222
[BsonElement("guid")]
23-
[BsonGuidRepresentation(GuidRepresentation.CSharpLegacy)]
23+
[BsonGuidRepresentation(GuidRepresentation.Standard)]
2424
#pragma warning disable CA1720
2525
public Guid Guid { get; set; }
2626
#pragma warning restore CA1720
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
// Migration script: convert MongoDB GUID fields from BinData subtype 3 (CSharpLegacy)
2+
// to BinData subtype 4 (Standard/RFC 4122).
3+
//
4+
// Usage:
5+
// npx tsc database/migrate-guids-to-subtype4.ts
6+
// mongosh CombineDatabase database/migrate-guids-to-subtype4.js
7+
//
8+
// Type-check only (no output):
9+
// npx tsc --project database/tsconfig.json --noEmit
10+
//
11+
// Background:
12+
// The C# MongoDB driver previously encoded System.Guid values using BinData subtype 3
13+
// with a CSharpLegacy byte order (little-endian for the first three components).
14+
// BinData subtype 4 uses a stable RFC 4122 byte order across all drivers, making
15+
// it easier to search by GUID in mongosh and other tools.
16+
//
17+
// CSharpLegacy byte order for UUID aabbccdd-eeff-gghh-iijj-kkllmmnnoopp:
18+
// stored as [dd,cc,bb,aa, ff,ee, hh,gg, ii,jj,kk,ll,mm,nn,oo,pp]
19+
// Standard (subtype 4) byte order for the same UUID:
20+
// stored as [aa,bb,cc,dd, ee,ff, gg,hh, ii,jj,kk,ll,mm,nn,oo,pp]
21+
22+
// Type declarations for mongosh globals.
23+
// All `declare` statements are erased by tsc and produce no JS output.
24+
//
25+
// The inline BsonBinary interface matches the bson@^7 Binary API:
26+
// sub_type is a number property; toString("hex") returns the hex string.
27+
interface BsonBinary {
28+
sub_type: number;
29+
toString(encoding: "hex" | "base64" | "utf8" | "utf-8"): string;
30+
}
31+
declare function UUID(hexstr?: string): BsonBinary;
32+
declare function print(msg: string): void;
33+
declare const db: MongoDB;
34+
35+
type MongoDoc = Record<string, unknown>;
36+
37+
interface MongoCursor {
38+
forEach(callback: (doc: MongoDoc) => void): void;
39+
}
40+
41+
interface MongoCollection {
42+
find(query: MongoDoc): MongoCursor;
43+
updateOne(filter: MongoDoc, update: MongoDoc): void;
44+
}
45+
46+
interface MongoDB {
47+
getCollection(name: string): MongoCollection;
48+
getCollectionNames(): string[];
49+
}
50+
51+
/**
52+
* Convert a BinData subtype 3 (CSharpLegacy) GUID to BinData subtype 4 (Standard).
53+
* Returns null if the input is not a subtype-3 binary value.
54+
*/
55+
function csharpGuidToStandard(bin: unknown): ReturnType<typeof UUID> | null {
56+
if (bin === null || typeof bin !== "object") {
57+
return null;
58+
}
59+
const binary = bin as Partial<BsonBinary>;
60+
if (binary.sub_type !== 3 || typeof binary.toString !== "function") {
61+
return null;
62+
}
63+
// Split the 32-character hex string into 16 byte pairs.
64+
const hexBytes = binary.toString("hex").match(/../g);
65+
if (hexBytes === null || hexBytes.length !== 16) {
66+
return null;
67+
}
68+
69+
// Rearrange the first 8 bytes (4+2+2) from little-endian to big-endian;
70+
// the remaining 8 bytes are already in big-endian order.
71+
const rev1 = hexBytes[3] + hexBytes[2] + hexBytes[1] + hexBytes[0];
72+
const rev2 = hexBytes[5] + hexBytes[4];
73+
const rev3 = hexBytes[7] + hexBytes[6];
74+
const keep1 = hexBytes[8] + hexBytes[9];
75+
const keep2 = hexBytes.slice(10).join("");
76+
const uuidStr = rev1 + "-" + rev2 + "-" + rev3 + "-" + keep1 + "-" + keep2;
77+
return UUID(uuidStr);
78+
}
79+
80+
let totalGuidsConverted = 0;
81+
let totalDocumentsUpdated = 0;
82+
83+
// ── WordsCollection and FrontierCollection ──────────────────────────────────
84+
//
85+
// Each Word document has:
86+
// - guid (BinData, top-level)
87+
// - senses[].guid (BinData, per-element in the senses array)
88+
89+
for (const collName of ["WordsCollection", "FrontierCollection"]) {
90+
const coll = db.getCollection(collName);
91+
92+
// Find all words that have binData guid fields (the conversion function handles subtype checking).
93+
coll.find({ guid: { $type: "binData" } }).forEach((doc) => {
94+
const update: Record<string, ReturnType<typeof UUID>> = {};
95+
96+
// Convert top-level guid.
97+
const newGuid = csharpGuidToStandard(doc["guid"]);
98+
if (newGuid !== null) {
99+
update["guid"] = newGuid;
100+
totalGuidsConverted++;
101+
}
102+
103+
// Convert each sense's guid.
104+
if (Array.isArray(doc["senses"])) {
105+
doc["senses"].forEach((sense, i) => {
106+
if (sense === null || typeof sense !== "object") {
107+
return;
108+
}
109+
const newSenseGuid = csharpGuidToStandard(sense["guid"]);
110+
if (newSenseGuid !== null) {
111+
update[`senses.${i}.guid`] = newSenseGuid;
112+
totalGuidsConverted++;
113+
}
114+
});
115+
}
116+
117+
if (Object.keys(update).length > 0) {
118+
try {
119+
coll.updateOne({ _id: doc["_id"] }, { $set: update });
120+
totalDocumentsUpdated++;
121+
} catch (e) {
122+
print(`Error updating document ${doc["_id"]}: ${e}`);
123+
}
124+
}
125+
});
126+
127+
print(`${collName}: done`);
128+
}
129+
130+
// ── UserEditsCollection ──────────────────────────────────────────────────────
131+
//
132+
// Each UserEdit document has:
133+
// - edits[].guid (BinData, per-element in the edits array)
134+
135+
const userEditsColl = db.getCollection("UserEditsCollection");
136+
137+
// Find all UserEdits that have binData guid fields (the conversion function handles subtype checking).
138+
userEditsColl.find({ "edits.guid": { $type: "binData" } }).forEach((doc) => {
139+
const update: Record<string, ReturnType<typeof UUID>> = {};
140+
141+
if (Array.isArray(doc["edits"])) {
142+
doc["edits"].forEach((edit, i) => {
143+
if (edit === null || typeof edit !== "object") {
144+
return;
145+
}
146+
const newEditGuid = csharpGuidToStandard(edit["guid"]);
147+
if (newEditGuid !== null) {
148+
update[`edits.${i}.guid`] = newEditGuid;
149+
totalGuidsConverted++;
150+
}
151+
});
152+
}
153+
154+
if (Object.keys(update).length > 0) {
155+
try {
156+
userEditsColl.updateOne({ _id: doc["_id"] }, { $set: update });
157+
totalDocumentsUpdated++;
158+
} catch (e) {
159+
print(`Error updating document ${doc["_id"]}: ${e}`);
160+
}
161+
}
162+
});
163+
164+
print("UserEditsCollection: done");
165+
166+
print(
167+
`Migration complete. ${totalGuidsConverted} GUID(s) converted in ${totalDocumentsUpdated} document(s).`
168+
);
169+
170+
// ── Final verification scan ─────────────────────────────────────────────────
171+
//
172+
// Recursively scan every collection/document/field and count objects with a
173+
// sub_type property that is not 4.
174+
175+
function countObjectsWithSubtypeNot4(root: unknown): number {
176+
if (root === null || root === undefined || typeof root !== "object") {
177+
return 0;
178+
}
179+
180+
if (Array.isArray(root)) {
181+
return root.reduce<number>(
182+
(sum, item) => sum + countObjectsWithSubtypeNot4(item),
183+
0
184+
);
185+
}
186+
187+
if ("sub_type" in root && root.sub_type !== 4) {
188+
return 1;
189+
}
190+
191+
return Object.values(root as Record<string, unknown>).reduce<number>(
192+
(sum, child) => sum + countObjectsWithSubtypeNot4(child),
193+
0
194+
);
195+
}
196+
197+
let totalNonSubtype4Objects = 0;
198+
199+
for (const collName of db.getCollectionNames()) {
200+
print(
201+
`Scanning collection ${collName} for objects found with sub_type !== 4...`
202+
);
203+
db.getCollection(collName)
204+
.find({})
205+
.forEach((doc) => {
206+
const subcount = countObjectsWithSubtypeNot4(doc);
207+
if (subcount > 0) {
208+
print(`* doc ${doc["_id"]}: ${subcount} objects with sub_type !== 4`);
209+
}
210+
totalNonSubtype4Objects += subcount;
211+
});
212+
}
213+
214+
print(`Final scan: ${totalNonSubtype4Objects} objects found.`);

database/tsconfig.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"extends": "../tsconfig.json",
3+
"compilerOptions": {
4+
"lib": ["esnext"],
5+
"isolatedModules": false,
6+
"noEmit": true
7+
},
8+
"include": ["."]
9+
}

package-lock.json

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
"@types/validator": "~13.15.10",
110110
"babel-jest": "~30.2.0",
111111
"babel-plugin-transform-import-meta": "~2.3.3",
112+
"bson": "^7.2.0",
112113
"buffer": "~6.0.3",
113114
"css-mediaquery": "~0.1.2",
114115
"dotenv": "~16.6.1",

0 commit comments

Comments
 (0)