Skip to content

Commit 71f8ead

Browse files
committed
Initial commit
0 parents  commit 71f8ead

File tree

6 files changed

+373
-0
lines changed

6 files changed

+373
-0
lines changed

.github/workflows/archive.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: Archive Gemini Conversations
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
workflow_dispatch:
8+
9+
jobs:
10+
archive-gemini-conversations:
11+
runs-on: ubuntu-latest
12+
permissions:
13+
contents: write
14+
steps:
15+
- name: Checkout repository
16+
uses: actions/checkout@v4
17+
18+
- name: Set up Node.js
19+
uses: actions/setup-node@v4
20+
with:
21+
node-version: 23
22+
23+
- name: Install pnpm
24+
uses: pnpm/action-setup@v3
25+
with:
26+
version: 10
27+
28+
- name: Install dependencies
29+
run: |
30+
pnpm install
31+
pnpm exec playwright install --with-deps
32+
33+
- name: Archive Gemini conversations
34+
id: archive
35+
run: pnpm start
36+
37+
- name: Commit changes
38+
run: |
39+
git config user.name "github-actions[bot]"
40+
git config user.email "github-actions[bot]@users.noreply.github.com"
41+
git add conversations/*.html || true
42+
git add -u conversations/*.html || true
43+
git diff --cached --quiet && exit 0
44+
git commit -F .git/commitmsg
45+
git push

.gitignore

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
lerna-debug.log*
8+
9+
# Diagnostic reports (https://nodejs.org/api/report.html)
10+
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
11+
12+
# Runtime data
13+
pids
14+
*.pid
15+
*.seed
16+
*.pid.lock
17+
18+
# Directory for instrumented libs generated by jscoverage/JSCover
19+
lib-cov
20+
21+
# Coverage directory used by tools like istanbul
22+
coverage
23+
*.lcov
24+
25+
# nyc test coverage
26+
.nyc_output
27+
28+
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
29+
.grunt
30+
31+
# Bower dependency directory (https://bower.io/)
32+
bower_components
33+
34+
# node-waf configuration
35+
.lock-wscript
36+
37+
# Compiled binary addons (https://nodejs.org/api/addons.html)
38+
build/Release
39+
40+
# Dependency directories
41+
node_modules/
42+
jspm_packages/
43+
44+
# Snowpack dependency directory (https://snowpack.dev/)
45+
web_modules/
46+
47+
# TypeScript cache
48+
*.tsbuildinfo
49+
50+
# Optional npm cache directory
51+
.npm
52+
53+
# Optional eslint cache
54+
.eslintcache
55+
56+
# Optional stylelint cache
57+
.stylelintcache
58+
59+
# Optional REPL history
60+
.node_repl_history
61+
62+
# Output of 'npm pack'
63+
*.tgz
64+
65+
# Yarn Integrity file
66+
.yarn-integrity
67+
68+
# dotenv environment variable files
69+
.env
70+
.env.*
71+
!.env.example
72+
73+
# parcel-bundler cache (https://parceljs.org/)
74+
.cache
75+
.parcel-cache
76+
77+
# Next.js build output
78+
.next
79+
out
80+
81+
# Nuxt.js build / generate output
82+
.nuxt
83+
dist
84+
85+
# Gatsby files
86+
.cache/
87+
# Comment in the public line in if your project uses Gatsby and not Next.js
88+
# https://nextjs.org/blog/next-9-1#public-directory-support
89+
# public
90+
91+
# vuepress build output
92+
.vuepress/dist
93+
94+
# vuepress v2.x temp and cache directory
95+
.temp
96+
.cache
97+
98+
# Sveltekit cache directory
99+
.svelte-kit/
100+
101+
# vitepress build output
102+
**/.vitepress/dist
103+
104+
# vitepress cache directory
105+
**/.vitepress/cache
106+
107+
# Docusaurus cache and generated files
108+
.docusaurus
109+
110+
# Serverless directories
111+
.serverless/
112+
113+
# FuseBox cache
114+
.fusebox/
115+
116+
# DynamoDB Local files
117+
.dynamodb/
118+
119+
# Firebase cache directory
120+
.firebase/
121+
122+
# TernJS port file
123+
.tern-port
124+
125+
# Stores VSCode versions used for testing VSCode extensions
126+
.vscode-test
127+
128+
# yarn v3
129+
.pnp.*
130+
.yarn/*
131+
!.yarn/patches
132+
!.yarn/plugins
133+
!.yarn/releases
134+
!.yarn/sdks
135+
!.yarn/versions
136+
137+
# Vite logs files
138+
vite.config.js.timestamp-*
139+
vite.config.ts.timestamp-*

package.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"name": "gemini-archive",
3+
"version": "1.0.0",
4+
"type": "module",
5+
"scripts": {
6+
"start": "node --import @swc-node/register/esm-register src/archive.ts"
7+
},
8+
"dependencies": {
9+
"@swc-node/register": "^1.10.10",
10+
"async": "^3.2.6",
11+
"playwright": "^1.54.2",
12+
"single-file-cli": "^2.0.75"
13+
},
14+
"devDependencies": {
15+
"@types/async": "^3.2.25",
16+
"@types/node": "^24.2.1",
17+
"typescript": "^5.9.2"
18+
}
19+
}

src/archive.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { mkdir, writeFile } from "fs/promises"
2+
import { join } from "path"
3+
4+
import { queue } from "async"
5+
import { type Browser, chromium } from "playwright"
6+
7+
import { ARCHIVE_DIR } from "./constants.js"
8+
import {
9+
archiveConversation,
10+
buildCommitMessage,
11+
deleteFile,
12+
getArchivedConversations,
13+
getGeminiIdsFromReadme
14+
} from "./util.js"
15+
16+
await mkdir(ARCHIVE_DIR, { recursive: true })
17+
18+
const listedIds = await getGeminiIdsFromReadme("README.md")
19+
const archivedMap = await getArchivedConversations(ARCHIVE_DIR)
20+
const archivedIds = new Set(archivedMap.keys())
21+
22+
const newIds = [...listedIds].filter(id => !archivedIds.has(id))
23+
const staleIds = [...archivedIds].filter(id => !listedIds.has(id))
24+
25+
// Delete stale
26+
const deleted: string[] = []
27+
for (const id of staleIds) {
28+
const file = join(ARCHIVE_DIR, archivedMap.get(id)!)
29+
if (await deleteFile(file)) deleted.push(id)
30+
}
31+
32+
// Archive new conversations with async.queue
33+
const added: string[] = []
34+
const errored: string[] = []
35+
let browser: Browser | null = null
36+
37+
try {
38+
browser = await chromium.launch({ args: ["--disable-web-security"] })
39+
40+
const archiveQueue = queue<string>(async (id, callback) => {
41+
try {
42+
await archiveConversation(browser!, id)
43+
added.push(id)
44+
} catch (error) {
45+
errored.push(id)
46+
}
47+
callback()
48+
}, 10)
49+
50+
archiveQueue.push(newIds)
51+
await archiveQueue.drain()
52+
} finally {
53+
if (browser) await browser.close()
54+
}
55+
56+
// Compose commit message and output it for workflow
57+
const commitMessage = buildCommitMessage(added, deleted)
58+
await writeFile(".git/commit-msg", commitMessage, "utf8")

src/constants.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// @ts-expect-error
2+
import { getHookScriptSource, getScriptSource, getZipScriptSource } from "single-file-cli/lib/single-file-script.js"
3+
4+
export const ARCHIVE_DIR = "./conversations"
5+
6+
export const SCRIPT = (await getScriptSource({})) + "; window.singlefile = singlefile"
7+
export const HOOK_SCRIPT = await getHookScriptSource()
8+
export const ZIP_SCRIPT = await getZipScriptSource()

src/util.ts

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import { readFile, readdir, unlink, writeFile } from "fs/promises"
2+
import { join } from "path"
3+
4+
import type { Browser } from "playwright"
5+
6+
import { ARCHIVE_DIR, HOOK_SCRIPT, SCRIPT, ZIP_SCRIPT } from "./constants.js"
7+
8+
export async function getGeminiIdsFromReadme(readmePath: string): Promise<Set<string>> {
9+
const readme = await readFile(readmePath, "utf8")
10+
const matches = readme.matchAll(/https:\/\/(?:gemini\.google\.com|g\.co\/gemini)\/share\/(?<id>[^\/\s]+)/g)
11+
12+
const ids = new Set<string>()
13+
for (const match of matches) ids.add(match.groups!.id!)
14+
return ids
15+
}
16+
17+
export async function getArchivedConversations(archiveDir: string): Promise<Map<string, string>> {
18+
const files = await readdir(archiveDir)
19+
const map = new Map<string, string>()
20+
21+
for (const file of files) {
22+
const match = file.match(/^(?<id>[^-]+) - .+\.html$/)
23+
if (match) map.set(match.groups?.["id"] as string, file)
24+
}
25+
return map
26+
}
27+
28+
export async function deleteFile(filePath: string) {
29+
try {
30+
await unlink(filePath)
31+
return true
32+
} catch {
33+
return false
34+
}
35+
}
36+
37+
export async function archiveConversation(browser: Browser, id: string) {
38+
const url = `https://gemini.google.com/share/${id}`
39+
let page
40+
41+
try {
42+
page = await browser.newPage({ bypassCSP: true })
43+
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L235-L243
44+
await page.addInitScript({ content: HOOK_SCRIPT })
45+
await page.addInitScript({ content: SCRIPT })
46+
47+
await page.goto(url)
48+
await page.waitForSelector("message-content", { timeout: 20000 })
49+
await page.waitForTimeout(3000)
50+
51+
// Click all visible elements with text starting with "Show"
52+
const showButtons = await page.getByText("Show").all()
53+
for (const btn of showButtons) {
54+
if (await btn.isVisible()) await btn.click()
55+
}
56+
57+
const title = // @ts-expect-error
58+
(await page.evaluate(() => document.querySelector("h1 > strong").textContent, "")).substring(0, 100)
59+
60+
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/single-file-cli-api.js#L258
61+
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L332
62+
// https://github.com/gildas-lormeau/single-file-core/blob/212a657/single-file.js#L125
63+
// @ts-expect-error
64+
const pageData = await page.evaluate(async options => await singlefile.getPageData(options), {
65+
zipScript: ZIP_SCRIPT
66+
})
67+
68+
const fileContent = pageData.content
69+
.replaceAll(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>\s*/gi, "")
70+
.replaceAll(/@font-face\s*{\s*[^}]+font-family:\s*codicon[^}]+}/gi, "")
71+
.replaceAll(/@font-face\s*\{[^}]*\}/g, (fontFaceRule: string) => {
72+
const fontFamilyMatch = fontFaceRule.match(/font-family:\s*(?<quote>['"]?)(?<fontFamily>[^'"]+)\k<quote>;/)
73+
74+
if (fontFamilyMatch && fontFamilyMatch.groups?.fontFamily) {
75+
const fontFamily = fontFamilyMatch.groups?.fontFamily.trim()
76+
if (fontFamily === "Google Symbols") return fontFaceRule
77+
if (pageData.content.includes(`class="katex"`) && fontFamily.startsWith("KaTeX")) return fontFaceRule
78+
}
79+
80+
return ""
81+
})
82+
83+
// Remove illegal filename chars
84+
const sanitizedTitle = title.replace(/[\\/:*?"<>|\n]/g, "")
85+
const filepath = join(ARCHIVE_DIR, `${id} - ${sanitizedTitle}.html`)
86+
await writeFile(filepath, fileContent)
87+
88+
await page.close()
89+
} catch (err) {
90+
if (page) await page.close()
91+
92+
console.error(`Failed to archive ${id}: ${(err as Error).message}`)
93+
throw err
94+
}
95+
}
96+
97+
export function buildCommitMessage(added: string[], deleted: string[]): string {
98+
let msg = "chore: Automatic conversation archive\n"
99+
100+
if (added.length > 0) msg += `\nAdded conversations: ${added.join(", ")}`
101+
if (deleted.length > 0) msg += `\nDeleted conversations: ${deleted.join(", ")}`
102+
103+
return msg
104+
}

0 commit comments

Comments
 (0)