Skip to content

Commit 315361c

Browse files
authored
Add TMiR 2025-02 transcript, add script to automate transcript processing (#341)
* AI gen transcript processing script Thanks Claude, seems to work * Add TMiR 2025-02 transcript
1 parent 8911c2c commit 315361c

File tree

4 files changed

+624
-2
lines changed

4 files changed

+624
-2
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
"lint": "next lint",
1212
"test": "next lint && prettier --check .",
1313
"prettier": "prettier --write .",
14-
"install-if-package-changed": "git diff-tree -r --name-only --no-commit-id ORIG_HEAD HEAD | grep --quiet yarn.lock && yarn install || exit 0"
14+
"install-if-package-changed": "git diff-tree -r --name-only --no-commit-id ORIG_HEAD HEAD | grep --quiet yarn.lock && yarn install || exit 0",
15+
"process-tmir": "node --experimental-strip-types scripts/process-tmir.ts"
1516
},
1617
"husky": {
1718
"hooks": {

scripts/process-tmir.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Transcript Converter
5+
*
6+
* Transforms raw transcript markdown into a publishing-ready format with
7+
* a table of contents and properly formatted timestamps.
8+
*
9+
* Usage:
10+
* cat | npm run process-tmir | pbcopy
11+
*/
12+
13+
// Read from stdin
14+
let rawTranscript = "";
15+
process.stdin.on("data", (chunk) => {
16+
rawTranscript += chunk;
17+
});
18+
19+
process.stdin.on("end", () => {
20+
const processedTranscript = processTranscript(rawTranscript);
21+
process.stdout.write(processedTranscript);
22+
});
23+
24+
function processTranscript(input: string): string {
25+
// Split the input into lines for processing
26+
const lines = input.split("\n");
27+
28+
// Extract section headers for table of contents
29+
const sectionHeaders = extractSectionHeaders(lines);
30+
const tableOfContents = generateTableOfContents(sectionHeaders);
31+
32+
// Process the transcript lines
33+
const processedLines = lines.map((line) => {
34+
// Process section headers (remove timestamp from the header itself)
35+
if (line.startsWith("## ")) {
36+
return processSectionHeader(line);
37+
}
38+
39+
// Process speaker lines and other content (move timestamp to end)
40+
return processContentLine(line);
41+
});
42+
43+
// Combine the table of contents with the processed transcript
44+
return tableOfContents + "\n\n" + processedLines.join("\n");
45+
}
46+
47+
interface SectionHeader {
48+
timestamp: string;
49+
title: string;
50+
anchorLink: string;
51+
}
52+
53+
function extractSectionHeaders(lines: string[]): SectionHeader[] {
54+
const headers: SectionHeader[] = [];
55+
56+
lines.forEach((line) => {
57+
if (line.startsWith("## ")) {
58+
// Extract timestamp and title
59+
const timestampMatch = line.match(/\[(\d{2}:\d{2}:\d{2})\]/);
60+
if (!timestampMatch) return;
61+
62+
const timestamp = timestampMatch[1];
63+
64+
// Extract the title (everything after the timestamp)
65+
let title = line.replace(/## \[\d{2}:\d{2}:\d{2}\] /, "");
66+
67+
// For the anchor link generation, we need to handle colons which are common in titles
68+
// but need to be removed for proper anchor links
69+
70+
// Create an anchor link from the title
71+
// Extract all text from the title, including text within markdown links
72+
// First, replace all markdown links [text](url) with just the text
73+
let plainTitle = title.replace(/\[(.*?)\]\(.*?\)/g, "$1");
74+
75+
// Convert to lowercase, remove special chars, and replace spaces with hyphens
76+
let anchorLink = plainTitle
77+
.toLowerCase()
78+
.replace(/[^\w\s-]/g, "")
79+
.replace(/\s+/g, "-");
80+
81+
headers.push({
82+
timestamp,
83+
title,
84+
anchorLink,
85+
});
86+
}
87+
});
88+
89+
return headers;
90+
}
91+
92+
function generateTableOfContents(headers: SectionHeader[]): string {
93+
return headers
94+
.map(
95+
(header) =>
96+
`- [[${header.timestamp}](#${header.anchorLink})] ${header.title}`,
97+
)
98+
.join("\n");
99+
}
100+
101+
function processSectionHeader(line: string): string {
102+
// Extract timestamp
103+
const timestampMatch = line.match(/\[(\d{2}:\d{2}:\d{2})\]/);
104+
if (!timestampMatch) return line;
105+
106+
// Remove timestamp from the header
107+
return line.replace(/\[\d{2}:\d{2}:\d{2}\] /, "");
108+
}
109+
110+
function processContentLine(line: string): string {
111+
// Skip empty lines or lines without timestamps
112+
if (!line.trim() || !line.includes("[") || !line.includes("]")) {
113+
return line;
114+
}
115+
116+
// Extract timestamp
117+
const timestampMatch = line.match(/\[(\d{2}:\d{2}:\d{2})\]/);
118+
if (!timestampMatch) return line;
119+
120+
const timestamp = timestampMatch[0];
121+
122+
// Remove timestamp from the beginning
123+
const contentWithoutTimestamp = line.replace(timestamp + " ", "");
124+
125+
// Add timestamp to the end
126+
return contentWithoutTimestamp + " " + timestamp;
127+
}

0 commit comments

Comments
 (0)