Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.

Commit 9b3e7a0

Browse files
authored
Dop 3203 (#703)
* DOP-3203: Add upsert functionality, asset upserts * fixup * DOP-3203: Stub functions, Stub tests, finalized merge query + logic * DOP-3203: Reorder logic * Improve the typing * fixup * stub toctreeorder * fixup * fixup * fixup * fixup * fixup * Use project for node options, not name * fixup * DOP-3203: Invoke the merging logic * DOP-3203: Coalesce feedback * DOP-3203: Rename associations to associated_products * DOP-3203: Null safety w/ default empty objects in insertions * DOP-3203: Unset _id in new shared metadata documents * DOP-3203: Address feedback further * fixup * fixup * fixup * fixup
1 parent 7f546f5 commit 9b3e7a0

File tree

10 files changed

+333
-6
lines changed

10 files changed

+333
-6
lines changed

modules/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# Overview
22

33
This directory contains various build-time modules for use in makefiles or for use as published modules utilized in other CI/CD routines.
4+
Each entry in this directory should NOT be coupled to another module, or code within the autobuilder.
5+
It is the express intent of maintainers that each entry is standalone, and capable of being run or exported as needed with dependencies only within their constituent directories.

modules/persistence/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Snooty Transport Module
1+
# Snooty Persistence Module
22

33
As part of integrating Snooty Parser with the unified Snooty toolchain, this module holds exclusive responsibility over transporting and persisting AST and metadata output artifacts from the parser into datastores.
44

modules/persistence/index.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ dotenv.config();
55
import AdmZip from 'adm-zip';
66
import minimist from 'minimist';
77
import * as mongodb from 'mongodb';
8+
import { teardown as closeDBConnection } from './src/services/connector';
89
import { insertPages } from './src/services/pages';
9-
import { insertMetadata } from './src/services/metadata';
10+
import { insertMetadata, insertUmbrellaMetadata } from './src/services/metadata';
11+
import { upsertAssets } from './src/services/assets';
1012

1113
interface ModuleArgs {
1214
path: string;
@@ -29,10 +31,17 @@ const app = async (path: string) => {
2931
// atomic buildId for all artifacts read by this module - fundamental assumption
3032
// that only one build will be used per run of this module.
3133
const buildId = new mongodb.ObjectId();
32-
await Promise.all([insertPages(buildId, zip), insertMetadata(buildId, zip)]);
34+
await Promise.all([
35+
insertPages(buildId, zip),
36+
insertMetadata(buildId, zip),
37+
upsertAssets(zip),
38+
insertUmbrellaMetadata(buildId, zip),
39+
]);
40+
closeDBConnection();
3341
process.exit(0);
3442
} catch (error) {
3543
console.error(`Persistence Module encountered a terminal error: ${error}`);
44+
closeDBConnection();
3645
throw error;
3746
}
3847
};
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import AdmZip from 'adm-zip';
2+
import { upsert } from '../connector';
3+
4+
const COLLECTION_NAME = 'documents';
5+
6+
// Service responsible for upsertion of any image or blob assets.
7+
8+
const assetsFromZip = (zip: AdmZip) => {
9+
const assets = zip.getEntries();
10+
return assets
11+
.filter((entry) => entry.entryName?.startsWith('assets/'))
12+
.map((entry) => ({ _id: entry.entryName.replace('assets/', ''), data: entry.getData() }));
13+
};
14+
15+
export const upsertAssets = async (zip: AdmZip) => {
16+
try {
17+
const assets = await assetsFromZip(zip);
18+
return Promise.all(assets.map((asset) => upsert(asset, COLLECTION_NAME, asset._id)));
19+
} catch (error) {
20+
console.error(`Error at upsertion time for ${COLLECTION_NAME}: ${error}`);
21+
throw error;
22+
}
23+
};

modules/persistence/src/services/connector/index.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,30 @@
1+
// Service that holds responsibility for initializing and exposing mdb interfaces.
2+
// Also exports helper functions for common operations (insert, upsert one by _id, etc.)
3+
// When adding helpers here, ask yourself if the helper will be used by more than one service
4+
// If no, the helper should be implemented in that service, not here
5+
16
import * as mongodb from 'mongodb';
27
import { ObjectId, Db } from 'mongodb';
8+
import { db as poolDb } from './pool';
39

410
// We should only ever have one client active at a time.
511
const atlasURL = `mongodb+srv://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}/?retryWrites=true&w=majority`;
612
const client = new mongodb.MongoClient(atlasURL);
13+
14+
export const teardown = () => {
15+
client.close();
16+
};
17+
18+
// Initialize and export our pool connection
19+
// Try to limit access to pool as much as possible - we mostly want it for just repo_branches.
20+
export const pool = async () => {
21+
return poolDb(client);
22+
};
23+
724
// cached db object, so we can handle initial connection process once if unitialized
825
let dbInstance: Db;
9-
1026
// Handles memoization of db object, and initial connection logic if needs to be initialized
11-
const db = async () => {
27+
export const db = async () => {
1228
if (!dbInstance) {
1329
try {
1430
await client.connect();
@@ -33,3 +49,16 @@ export const insert = async (docs: any[], collection: string, buildId: ObjectId)
3349
throw error;
3450
}
3551
};
52+
53+
// Upsert wrapper, requires an _id field.
54+
export const upsert = async (payload: any, collection: string, _id: string | ObjectId) => {
55+
const upsertSession = await db();
56+
try {
57+
const query = { _id };
58+
const update = { $set: payload };
59+
const options = { upsert: true };
60+
return upsertSession.collection(collection).updateOne(query, update, options);
61+
} catch (error) {
62+
console.error(`Error at upsertion time for ${collection}: ${error}`);
63+
}
64+
};
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { MongoClient, Db } from 'mongodb';
2+
3+
// cached db object, so we can handle initial connection process once if unitialized
4+
let dbInstance: Db;
5+
// Handles memoization of db object, and initial connection logic if needs to be initialized
6+
export const db = async (client: MongoClient) => {
7+
if (!dbInstance) {
8+
try {
9+
await client.connect();
10+
dbInstance = client.db(process.env.POOL_DB_NAME);
11+
} catch (error) {
12+
console.error(`Error at db client connection: ${error}`);
13+
throw error;
14+
}
15+
}
16+
return dbInstance;
17+
};
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import { SharedMetadata, AssociatedProduct } from '../associated_products';
2+
3+
export interface ToC {
4+
title: string;
5+
slug: string;
6+
children: ToC[];
7+
options?: {
8+
[key: string]: any;
9+
};
10+
[key: string]: any;
11+
}
12+
13+
type project = string;
14+
type branchName = string;
15+
type branch = { [key: branchName]: ToC };
16+
17+
export interface ToCInsertions {
18+
[key: project]: branch;
19+
}
20+
21+
export interface TocOrderInsertions {
22+
[key: string]: {
23+
[key: string]: any[];
24+
};
25+
}
26+
27+
const isInsertionCandidateNode = (node: ToC, associated_products: AssociatedProduct[] = []) => {
28+
const nodeInProducts = node.options?.project && associated_products.find((p) => p.name === node.options?.project);
29+
const nodeHasNoChildren = !node.children || node.children.length === 0;
30+
return nodeHasNoChildren && nodeInProducts;
31+
};
32+
33+
const mergeNode = (node: ToC, tocs: ToCInsertions) => {
34+
// Options might be undefined, so safely cast to {} if nullish
35+
node.options = node.options ?? {};
36+
37+
const project = tocs[node?.options?.project];
38+
const branches = Object.keys(project);
39+
node.options.versions = branches;
40+
node.children = branches.map((branch) => {
41+
const child = project[branch];
42+
const options = {
43+
...child.options,
44+
version: branch,
45+
};
46+
child.options = options;
47+
return child;
48+
});
49+
return node;
50+
};
51+
52+
const mergeTocTreeOrder = (metadata: SharedMetadata, node, insertions: TocOrderInsertions) => {
53+
const insertion = insertions[metadata.project]?.[metadata.branch] || [];
54+
const index = metadata.toctreeOrder.indexOf(node.options?.project);
55+
return metadata.toctreeOrder.splice(index, 0, ...insertion);
56+
};
57+
58+
// BFS through the toctree from the metadata entry provided as an arg
59+
// and insert matching tocInsertion entries + tocOrders
60+
export const traverseAndMerge = (
61+
metadata: SharedMetadata,
62+
tocInsertions: ToCInsertions,
63+
tocOrderInsertions: TocOrderInsertions
64+
) => {
65+
const { toctree, associated_products } = metadata;
66+
67+
let queue = [toctree];
68+
while (queue?.length) {
69+
let next = queue.shift();
70+
if (next && isInsertionCandidateNode(next, associated_products)) {
71+
next = mergeNode(next, tocInsertions);
72+
metadata.toctreeorder = mergeTocTreeOrder(metadata, next, tocOrderInsertions);
73+
}
74+
if (next?.children) queue = [...queue, ...next.children];
75+
}
76+
return metadata;
77+
};
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import { AggregationCursor } from 'mongodb';
2+
import { pool, db } from '../../connector';
3+
import { ToC, ToCInsertions, TocOrderInsertions, traverseAndMerge } from '../ToC';
4+
5+
export interface AssociatedProduct {
6+
name: string;
7+
versions: string[];
8+
}
9+
10+
export interface SharedMetadata {
11+
project: string;
12+
branch: string;
13+
associated_products?: AssociatedProduct[];
14+
toctree: ToC;
15+
toctreeOrder: any[];
16+
[key: string]: any;
17+
}
18+
19+
// TODO: move the branch/repobranch interfaces into their own file, or into a seperate abstraction?
20+
interface BranchEntry {
21+
name: string;
22+
gitBranchName: string;
23+
[key: string]: any;
24+
}
25+
26+
export interface ReposBranchesDocument {
27+
repoName: string;
28+
project: string;
29+
branches: BranchEntry[];
30+
[key: string]: any;
31+
}
32+
33+
// Queries pool*.repos_branches for any entries for the given project and branch from a metadata entry.
34+
const getRepoBranchesEntry = async (project, branch) => {
35+
const db = await pool();
36+
return db.collection('repos_branches').findOne({ branches: { $elemMatch: { gitBranchName: branch } }, project });
37+
};
38+
39+
// Queries pool*.repos_branches for all entries for associated_products in a shared metadata entry
40+
const getAllAssociatedRepoBranchesEntries = async (metadata: SharedMetadata) => {
41+
const { associated_products } = metadata;
42+
if (!associated_products || !associated_products.length) return [];
43+
const associatedProductNames = associated_products.map((a) => a.name);
44+
const db = await pool();
45+
const entries = await db
46+
.collection('repos_branches')
47+
.find({ project: { $in: associatedProductNames } })
48+
.toArray();
49+
return entries as unknown as ReposBranchesDocument[];
50+
};
51+
52+
const mapRepoBranches = (repoBranches: ReposBranchesDocument[]) =>
53+
Object.fromEntries(
54+
repoBranches.map((entry) => {
55+
const branches = Object.fromEntries(entry.branches.map((branch) => [branch.gitBranchName, branch]));
56+
return [entry.project, branches];
57+
})
58+
);
59+
60+
const hasAssociations = (metadata) => !!metadata.associated_products?.length;
61+
62+
const umbrellaMetadataEntry = async (metadata): Promise<SharedMetadata> => {
63+
try {
64+
const snooty = await db();
65+
const entry = await snooty
66+
.collection('metadata')
67+
.find({
68+
'associated_products.name': metadata.project,
69+
})
70+
.sort({ build_id: -1 })
71+
.limit(1)
72+
.toArray();
73+
return entry[0] as unknown as SharedMetadata;
74+
} catch (error) {
75+
console.log(`Error at time of querying for umbrella metadata entry: ${error}`);
76+
throw error;
77+
}
78+
};
79+
80+
// Convert our cursor from the shared ToC aggregation query into a series of ToC objects
81+
const shapeToCsCursor = async (
82+
tocCursor: AggregationCursor,
83+
repoBranchesMap
84+
): Promise<{ tocInsertions: ToCInsertions; tocOrderInsertions: TocOrderInsertions }> => {
85+
const tocInsertions = {};
86+
const tocOrderInsertions = {};
87+
88+
await tocCursor.forEach((doc) => {
89+
// Initialize to empty object if we haven't already, for a given project.
90+
if (!tocInsertions[doc._id.project]) tocInsertions[doc._id.project] = {};
91+
if (!tocOrderInsertions[doc._id.project]) tocOrderInsertions[doc._id.project] = {};
92+
93+
// TODO: If we want staging builds with embedded versions, it needs to be added here
94+
if (repoBranchesMap?.[doc._id.project]?.[doc._id.branch]) {
95+
tocInsertions[doc._id.project][doc._id.branch] = doc.most_recent.toctree;
96+
tocOrderInsertions[doc._id.project][doc._id.branch] = doc.most_recent.toctreeOrder;
97+
}
98+
});
99+
100+
return { tocInsertions, tocOrderInsertions };
101+
};
102+
103+
const getAssociatedProducts = async (umbrellaMetadata) => {
104+
try {
105+
const { associated_products } = umbrellaMetadata;
106+
const associatedProductNames = associated_products.map((a) => a.name);
107+
const snooty = await db();
108+
109+
// This query matches on projects in associated_products for our given metadata that have a build_id and that do not have merged tocs
110+
// then groups per branch and per project from those matches
111+
// and gets the most recent doc entry (by build_id), with the toctree and toctreeOrder fields.
112+
const tocs = snooty.collection('metadata').aggregate([
113+
{
114+
$match: { project: { $in: associatedProductNames }, build_id: { $exists: true }, is_merged_toc: { $ne: true } },
115+
},
116+
{
117+
$group: {
118+
_id: { project: '$project', branch: '$branch' },
119+
most_recent: {
120+
$max: {
121+
build_id: '$build_id',
122+
toctree: '$toctree',
123+
toctreeOrder: '$toctreeOrder',
124+
},
125+
},
126+
},
127+
},
128+
]);
129+
return tocs;
130+
} catch (error) {
131+
console.log(`Error at time of aggregating existing associated product metadata entries: ${error}`);
132+
throw error;
133+
}
134+
};
135+
136+
export const mergeAssociatedToCs = async (metadata) => {
137+
const { project, branch } = metadata;
138+
const umbrellaMetadata = hasAssociations(metadata) ? metadata : await umbrellaMetadataEntry(metadata);
139+
140+
// Short circuit execution here if there's no umbrella product metadata found
141+
if (!umbrellaMetadata) return;
142+
// Short circuit execution if the project branch is NOT in repo branches
143+
// If we want to embed with staging builds, then this needs to be turned off
144+
// or converted so that local metadata ToC is added to tocInsertions
145+
const isStagingBranch = await !getRepoBranchesEntry(project, branch);
146+
if (isStagingBranch) return;
147+
148+
const repoBranchesEntries = await getAllAssociatedRepoBranchesEntries(umbrellaMetadata);
149+
const repoBranchesMap = mapRepoBranches(repoBranchesEntries);
150+
const tocsCursor = await getAssociatedProducts(umbrellaMetadata);
151+
const { tocInsertions, tocOrderInsertions } = await shapeToCsCursor(tocsCursor, repoBranchesMap);
152+
const mergedMetadataEntry = traverseAndMerge(umbrellaMetadata, tocInsertions, tocOrderInsertions);
153+
154+
// Remove the _id and treat the entry as a brand new document.
155+
delete mergedMetadataEntry._id;
156+
// Add a flag to denote that the entry contains a merged ToC.
157+
mergedMetadataEntry.is_merged_toc = true;
158+
159+
return mergedMetadataEntry;
160+
};

0 commit comments

Comments
 (0)