Skip to content

Commit 8aefd6c

Browse files
authored
πŸ“¦ NEW: Add local embed support for git-sync memory (#43)
* πŸ“¦ NEW: Add embed support for git-sync memory * πŸ‘Œ IMPROVE: update hash in the end of deploy * πŸ‘Œ IMPROVE: improve flow of embed
1 parent 3a9f5f4 commit 8aefd6c

File tree

9 files changed

+196
-40
lines changed

9 files changed

+196
-40
lines changed

β€Žpackages/baseai/src/deploy/index.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ import color from 'picocolors';
1919
import type { MemoryI } from 'types/memory';
2020
import type { Pipe, PipeOld } from 'types/pipe';
2121
import { getStoredAuth } from './../auth/index';
22-
import { handleGitSyncMemories } from '@/utils/memory/git-sync/handle-git-sync-memories';
22+
import {
23+
handleGitSyncMemories,
24+
updateDeployedCommitHash
25+
} from '@/utils/memory/git-sync/handle-git-sync-memories';
2326
import { handleSingleDocDeploy } from './document';
2427

2528
export interface Account {
@@ -615,6 +618,10 @@ export async function upsertMemory({
615618
// Upload documents
616619
const { name } = (await createResponse.json()) as MemoryI;
617620
await uploadDocumentsToMemory({ documents, name, account });
621+
622+
if (isGitSync) {
623+
await updateDeployedCommitHash(memory.name);
624+
}
618625
} catch (error) {
619626
dlog('Error in createNewMemory:', error);
620627
throw error;

β€Žpackages/baseai/src/memory/embed.ts

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import { heading } from '@/utils/heading';
22
import { checkMemoryExists } from '@/utils/memory/check-memory-exist';
33
import { generateEmbeddings } from '@/utils/memory/generate-embeddings';
4+
import {
5+
handleGitSyncMemories,
6+
updateEmbeddedCommitHash
7+
} from '@/utils/memory/git-sync/handle-git-sync-memories';
48
import { validateMemoryName } from '@/utils/memory/lib';
9+
import loadMemoryConfig from '@/utils/memory/load-memory-config';
510
import { loadMemoryFiles } from '@/utils/memory/load-memory-files';
611
import * as p from '@clack/prompts';
712
import color from 'picocolors';
@@ -39,22 +44,45 @@ export async function embedMemory({
3944

4045
// 2- Load memory data.
4146
s.start('Processing memory docs...');
42-
const memoryFiles = await loadMemoryFiles(memoryName);
47+
let memoryFiles = await loadMemoryFiles(memoryName);
4348

4449
if (memoryFiles.length === 0) {
4550
p.cancel(`No valid documents found in memory '${memoryName}'.`);
4651
process.exit(1);
4752
}
4853

49-
// 3- Generate embeddings.
54+
// 3- Get memory config.
55+
const memoryConfig = await loadMemoryConfig(memoryName);
56+
57+
let filesToEmbed: string[] = [];
58+
59+
if (memoryConfig?.useGitRepo) {
60+
filesToEmbed = await handleGitSyncMemories({
61+
memoryName: memoryName,
62+
config: memoryConfig
63+
});
64+
65+
// Filter memory files to emebed
66+
memoryFiles = memoryFiles.filter(doc =>
67+
filesToEmbed.includes(doc.name)
68+
);
69+
}
70+
71+
// 4- Generate embeddings.
5072
s.message('Generating embeddings...');
73+
const shouldOverwrite = memoryConfig?.useGitRepo ? true : overwrite;
5174
const result = await generateEmbeddings({
5275
memoryFiles,
5376
memoryName,
54-
overwrite: overwrite || false,
77+
overwrite: shouldOverwrite || false,
5578
useLocalEmbeddings
5679
});
5780

81+
if (memoryConfig?.useGitRepo) {
82+
p.log.success('Synced memory files with git repository.');
83+
await updateEmbeddedCommitHash(memoryName);
84+
}
85+
5886
s.stop(result);
5987
} catch (error: any) {
6088
s.stop(`Stopped!`);

β€Žpackages/baseai/src/utils/memory/generate-embeddings.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,13 @@ function generateEmbeddingSummary({
231231

232232
p.log.info(summary);
233233
}
234+
235+
export async function listLocalEmbeddedMemoryDocuments({
236+
memoryName
237+
}: {
238+
memoryName: string;
239+
}) {
240+
const memoryDb = await loadDb(memoryName);
241+
const documents = Object.keys(memoryDb.data.documents);
242+
return documents;
243+
}

β€Žpackages/baseai/src/utils/memory/git-sync/get-changed-files-between-commits.ts

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,23 @@
11
import { execSync } from 'child_process';
2-
import path from 'path';
32

43
/**
5-
* Get changed files between two Git commits
6-
* @param {Object} options - The options for the function
7-
* @param {string} options.oldCommit - The old already deployed commit
8-
* @param {string} options.latestCommit - The latest commit to deploy (default: 'HEAD')
9-
* @param {string} options.repoPath - The path to the Git repository (default: process.cwd())
10-
* @param {string[]} options.extensions - Array of file extensions to filter (default: all files)
11-
* @param {boolean} options.includeUntracked - Whether to include untracked files (default: false)
12-
* @returns {Promise<string[]>} - Array of changed file paths
4+
* Retrieves a list of files that have changed between two Git commits within a specified directory.
5+
*
6+
* @param {Object} options - The options for the function.
7+
* @param {string} options.oldCommit - The old commit reference to compare from.
8+
* @param {string} [options.latestCommit='HEAD'] - The latest commit reference to compare to. Defaults to 'HEAD'.
9+
* @param {string} options.dirToTrack - The directory to track for changes.
10+
* @returns {Promise<string[]>} A promise that resolves to an array of changed file paths.
11+
* @throws Will throw an error if the Git command fails or if the oldCommit is an empty string.
1312
*/
1413
export async function getChangedFilesBetweenCommits({
1514
oldCommit,
1615
latestCommit = 'HEAD',
17-
dirToTrack,
18-
extensions = []
16+
dirToTrack
1917
}: {
2018
oldCommit: string;
2119
latestCommit: string;
2220
dirToTrack: string;
23-
extensions: string[];
2421
}): Promise<string[]> {
2522
try {
2623
// Validate inputs

β€Žpackages/baseai/src/utils/memory/git-sync/handle-git-sync-memories.ts

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { getChangedFilesBetweenCommits } from './get-changed-files-between-commi
55
import type { MemoryConfigI } from 'types/memory';
66
import { listMemoryDocuments, type Account } from '@/deploy';
77
import { loadMemoryFilesFromCustomDir } from '../load-memory-files';
8+
import { listLocalEmbeddedMemoryDocuments } from '../generate-embeddings';
9+
import { saveEmbeddedCommitHashInMemoryConfig } from './save-embedded-commit-in-config';
810

911
export async function handleGitSyncMemories({
1012
memoryName,
@@ -13,7 +15,7 @@ export async function handleGitSyncMemories({
1315
}: {
1416
memoryName: string;
1517
config: MemoryConfigI;
16-
account: Account;
18+
account?: Account; // Undefined for local embed
1719
}): Promise<string[]> {
1820
// Check for uncommitted changes
1921
try {
@@ -30,13 +32,19 @@ export async function handleGitSyncMemories({
3032

3133
let filesToDeploy: string[] = [];
3234

35+
const isEmbed = !account;
36+
3337
// Step 1:
3438
// Fetch the uploaded documents and compare with the local documents
3539
// Handles new files that are not in the prodDocs due to extension and path updates
36-
const prodDocs = await listMemoryDocuments({
37-
account,
38-
memoryName
39-
});
40+
const prodDocs = !isEmbed
41+
? await listMemoryDocuments({
42+
account,
43+
memoryName
44+
})
45+
: await listLocalEmbeddedMemoryDocuments({
46+
memoryName
47+
}); // For local embedded docs are prod equivalent
4048

4149
const allFilesWithContent = await loadMemoryFilesFromCustomDir({
4250
memoryName,
@@ -53,7 +61,11 @@ export async function handleGitSyncMemories({
5361
// Step 2.1:
5462
// If there's no deployedCommitHash, user is deploying for the first time
5563
// Deploy all files in the directory
56-
if (!config.deployedCommitHash) {
64+
const lastHashUsed = isEmbed
65+
? config.embeddedCommitHash
66+
: config.deployedCommitHash;
67+
68+
if (!lastHashUsed) {
5769
filesToDeploy = allFiles;
5870
p.log.info(
5971
`Found no previous deployed commit. Deploying all ${filesToDeploy.length} files in memory "${memoryName}":`
@@ -62,10 +74,9 @@ export async function handleGitSyncMemories({
6274
// Step 2.2: Otherwise, get changed files between commits
6375
else {
6476
filesToDeploy = await getChangedFilesBetweenCommits({
65-
oldCommit: config.deployedCommitHash,
77+
oldCommit: lastHashUsed,
6678
latestCommit: 'HEAD',
67-
dirToTrack: config.dirToTrack,
68-
extensions: config.extToTrack
79+
dirToTrack: config.dirToTrack
6980
});
7081

7182
if (filesToDeploy.length > 0) {
@@ -76,9 +87,16 @@ export async function handleGitSyncMemories({
7687
// Print the changed file names TODO: Remove because it may clutter the terminal?
7788
filesToDeploy.forEach(file => p.log.message(file));
7889
} else {
79-
p.log.info(
80-
`No changes detected for memory "${memoryName}" since last deployment.`
81-
);
90+
const isEmbed = !account;
91+
if (isEmbed) {
92+
p.log.info(
93+
`No changes detected for memory "${memoryName}" since last embedding.`
94+
);
95+
} else {
96+
p.log.info(
97+
`No changes detected for memory "${memoryName}" since last deployment.`
98+
);
99+
}
82100
}
83101
}
84102

@@ -90,14 +108,21 @@ export async function handleGitSyncMemories({
90108
return [];
91109
}
92110

93-
// Step 4
94-
// Update deployedCommitHash in memory config
95-
// TODO: Should we update the deployedCommitHash after deploying?
111+
return filesToDeploy;
112+
}
113+
114+
export async function updateDeployedCommitHash(memoryName: string) {
96115
const currentCommitHash = execSync('git rev-parse HEAD').toString().trim();
97116
await saveDeployedCommitHashInMemoryConfig({
98117
memoryName,
99118
deployedCommitHash: currentCommitHash
100119
});
120+
}
101121

102-
return filesToDeploy;
122+
export async function updateEmbeddedCommitHash(memoryName: string) {
123+
const currentCommitHash = execSync('git rev-parse HEAD').toString().trim();
124+
await saveEmbeddedCommitHashInMemoryConfig({
125+
memoryName,
126+
embeddedCommitHash: currentCommitHash
127+
});
103128
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import fs from 'fs/promises';
2+
import * as p from '@clack/prompts';
3+
import path from 'path';
4+
5+
export async function saveEmbeddedCommitHashInMemoryConfig({
6+
memoryName,
7+
embeddedCommitHash
8+
}: {
9+
memoryName: string;
10+
embeddedCommitHash: string;
11+
}): Promise<void> {
12+
try {
13+
const memoryDir = path.join(
14+
process.cwd(),
15+
'baseai',
16+
'memory',
17+
memoryName
18+
);
19+
const indexFilePath = path.join(memoryDir, 'index.ts');
20+
let fileContents = await fs.readFile(indexFilePath, 'utf-8');
21+
22+
// Check if the embeddedCommitHash already exists in the config
23+
if (fileContents.includes('embeddedCommitHash:')) {
24+
// Update the existing embeddedCommitHash
25+
fileContents = fileContents.replace(
26+
/embeddedCommitHash:\s*['"].*['"]/,
27+
`embeddedCommitHash: '${embeddedCommitHash}'`
28+
);
29+
} else {
30+
// Add the embeddedCommitHash to the config
31+
fileContents = fileContents.replace(
32+
/config:\s*{/,
33+
`config: {\n embeddedCommitHash: '${embeddedCommitHash}',`
34+
);
35+
}
36+
37+
// Write the updated contents back to the file
38+
await fs.writeFile(indexFilePath, fileContents, 'utf-8');
39+
40+
p.log.success(`Updated embeddedCommitHash for memory '${memoryName}'.`);
41+
} catch (error) {
42+
console.error(`Error saving latest commit hash: ${error}`);
43+
throw error;
44+
}
45+
}

β€Žpackages/baseai/src/utils/memory/load-memory-config.ts

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import fs from 'fs/promises';
22
import path from 'path';
33
import * as p from '@clack/prompts';
4-
import type { MemoryConfigI } from 'types/memory';
4+
import { memoryConfigSchema, type MemoryConfigI } from 'types/memory';
55

66
function parsePathJoin(joinArgs: string): string {
77
// Remove any quotes, split by comma, and trim each argument
@@ -24,6 +24,12 @@ function parseConfig(configString: string): MemoryConfigI {
2424
/dirToTrack:(?:path\.(?:posix\.)?join\((.*?)\)|['"](.+?)['"])/
2525
);
2626
const extToTrackMatch = cleanConfig.match(/extToTrack:(\[.*?\])/);
27+
const deployedCommitHashMatch = cleanConfig.match(
28+
/deployedCommitHash:['"](.+?)['"]/
29+
);
30+
const embeddedCommitHashMatch = cleanConfig.match(
31+
/embeddedCommitHash:['"](.+?)['"]/
32+
);
2733

2834
if (!useGitRepoMatch || !dirToTrackMatch || !extToTrackMatch) {
2935
throw new Error('Unable to parse config structure');
@@ -34,12 +40,34 @@ function parseConfig(configString: string): MemoryConfigI {
3440
? dirToTrackMatch[2]
3541
: parsePathJoin(dirToTrackMatch[1]);
3642
const extToTrack = JSON.parse(extToTrackMatch[1].replace(/'/g, '"'));
43+
const deployedCommitHash = deployedCommitHashMatch
44+
? deployedCommitHashMatch[1]
45+
: undefined;
46+
const embeddedCommitHash = embeddedCommitHashMatch
47+
? embeddedCommitHashMatch[1]
48+
: undefined;
3749

38-
return {
50+
const config: MemoryConfigI = {
3951
useGitRepo,
4052
dirToTrack,
4153
extToTrack
4254
};
55+
56+
if (deployedCommitHash) {
57+
config.deployedCommitHash = deployedCommitHash;
58+
}
59+
60+
if (embeddedCommitHash) {
61+
config.embeddedCommitHash = embeddedCommitHash;
62+
}
63+
64+
// Validate the parsed config against the schema
65+
const result = memoryConfigSchema.safeParse(config);
66+
if (!result.success) {
67+
throw new Error(`Invalid config: ${result.error.message}`);
68+
}
69+
70+
return config;
4371
}
4472

4573
export default async function loadMemoryConfig(
@@ -73,14 +101,28 @@ export default async function loadMemoryConfig(
73101
try {
74102
const config = parseConfig(configMatch[1]);
75103
return config;
76-
} catch (parseError) {
77-
p.cancel(`Unable to read config in '${memoryName}/index.ts'.`);
104+
} catch (error) {
105+
if (error instanceof Error) {
106+
p.cancel(
107+
`Unable to read config in '${memoryName}/index.ts': ${error.message}`
108+
);
109+
} else {
110+
p.cancel(
111+
`Unable to read config in '${memoryName}/index.ts': Unknown error occurred`
112+
);
113+
}
78114
process.exit(1);
79115
}
80116
} catch (error) {
81-
p.cancel(
82-
`Memory '${memoryName}' does not exist or could not be loaded.`
83-
);
117+
if (error instanceof Error) {
118+
p.cancel(
119+
`Memory '${memoryName}' does not exist or could not be loaded: ${error.message}`
120+
);
121+
} else {
122+
p.cancel(
123+
`Memory '${memoryName}' does not exist or could not be loaded: Unknown error occurred`
124+
);
125+
}
84126
process.exit(1);
85127
}
86128
}

β€Žpackages/baseai/types/memory.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ export const memoryConfigSchema = z.object({
4242
)
4343
.min(1, 'At least one file extension must be specified')
4444
]),
45-
deployedCommitHash: z.string().optional()
45+
deployedCommitHash: z.string().optional(),
46+
embeddedCommitHash: z.string().optional()
4647
});
4748

4849
export type MemoryConfigI = z.infer<typeof memoryConfigSchema>;

0 commit comments

Comments
Β (0)