Skip to content

Commit 36ce651

Browse files
committed
data
1 parent 3c9fb81 commit 36ce651

File tree

2 files changed

+171
-1
lines changed

2 files changed

+171
-1
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ qighuerojg/
1515

1616
accounts.db
1717

18-
*.local.*
18+
*.local.*
19+
20+
data/*.zip

scripts/fetchTweetsAndReplies.ts

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env bun
2+
3+
/**
4+
* Fetch all tweets and replies for a Twitter user via SocialAPI and save as JSONL.
5+
*
6+
* Usage:
7+
* bun scripts/fetchTweetsAndReplies.ts --user-id 44196397 \
8+
* --out ./data/elonmusk.tweets-and-replies.jsonl
9+
*
10+
* Env:
11+
* SOCIALAPI_KEY=...
12+
*/
13+
14+
import { createWriteStream, existsSync, mkdirSync, rmSync } from "node:fs";
15+
import { dirname } from "node:path";
16+
17+
type SocialApiTweet = {
18+
id_str?: string;
19+
id?: string | number;
20+
tweet_created_at?: string;
21+
text?: string | null;
22+
full_text?: string | null;
23+
[key: string]: unknown;
24+
};
25+
26+
type TimelineResponse = {
27+
next_cursor?: string | null;
28+
tweets: SocialApiTweet[];
29+
};
30+
31+
function getArg(flag: string, fallback?: string): string | undefined {
32+
const idx = process.argv.findIndex(
33+
(a) => a === flag || a.startsWith(`${flag}=`)
34+
);
35+
if (idx === -1) return fallback;
36+
const val = process.argv[idx];
37+
if (val.includes("=")) return val.slice(val.indexOf("=") + 1);
38+
return process.argv[idx + 1];
39+
}
40+
41+
function asString(value: unknown): string | undefined {
42+
if (value == null) return undefined;
43+
return String(value);
44+
}
45+
46+
async function fetchPage(
47+
apiKey: string,
48+
userId: string,
49+
kind: "tweets" | "tweets-and-replies",
50+
cursor?: string
51+
): Promise<TimelineResponse> {
52+
const baseUrl = `https://api.socialapi.me/twitter/user/${encodeURIComponent(userId)}/${kind}`;
53+
const url = cursor
54+
? `${baseUrl}?cursor=${encodeURIComponent(cursor)}`
55+
: baseUrl;
56+
57+
const res = await fetch(url, {
58+
headers: {
59+
Authorization: `Bearer ${apiKey}`,
60+
Accept: "application/json",
61+
},
62+
});
63+
64+
if (!res.ok) {
65+
const body = await res.text();
66+
throw new Error(
67+
`Request failed: ${res.status} ${res.statusText}${body}`
68+
);
69+
}
70+
71+
const json = (await res.json()) as TimelineResponse;
72+
if (!json || !Array.isArray(json.tweets)) {
73+
throw new Error(
74+
`Unexpected response shape: ${JSON.stringify(json).slice(0, 500)}...`
75+
);
76+
}
77+
return json;
78+
}
79+
80+
async function main(): Promise<void> {
81+
const apiKey = process.env.SOCIALAPI_KEY;
82+
const userId = getArg("--user-id");
83+
const outPath =
84+
getArg("--out") ?? `./data/${userId ?? "unknown"}.tweets-and-replies.jsonl`;
85+
const startCursor = getArg("--cursor");
86+
const kind =
87+
(getArg("--kind", "tweets-and-replies") as
88+
| "tweets"
89+
| "tweets-and-replies") ?? "tweets-and-replies";
90+
const maxPages = Number(getArg("--max-pages", "0")); // 0 = no limit
91+
const overwrite =
92+
getArg("--overwrite") === "true" || getArg("--overwrite") === "1";
93+
94+
if (!apiKey) {
95+
console.error("Missing API key. Provide via SOCIALAPI_KEY env.");
96+
process.exit(1);
97+
}
98+
if (!userId) {
99+
console.error(
100+
"Missing --user-id. Example: bun scripts/fetchTweetsAndReplies.ts --user-id 44196397"
101+
);
102+
process.exit(1);
103+
}
104+
105+
const dir = dirname(outPath);
106+
if (!existsSync(dir)) {
107+
mkdirSync(dir, { recursive: true });
108+
}
109+
if (overwrite && existsSync(outPath)) {
110+
rmSync(outPath);
111+
}
112+
113+
console.log(`Fetching '${kind}' for user_id=${userId} -> ${outPath}`);
114+
115+
const writeStream = createWriteStream(outPath, { flags: "a" });
116+
writeStream.on("error", (err) => {
117+
console.error("Write stream error:", err);
118+
process.exit(1);
119+
});
120+
121+
let seenTweetIds = new Set<string>();
122+
let page = 0;
123+
let cursor: string | undefined = startCursor;
124+
let totalFetched = 0;
125+
let totalWritten = 0;
126+
127+
try {
128+
while (true) {
129+
page += 1;
130+
const response = await fetchPage(apiKey, userId, kind, cursor);
131+
132+
const tweets = response.tweets ?? [];
133+
totalFetched += tweets.length;
134+
135+
let wroteThisPage = 0;
136+
for (const tweet of tweets) {
137+
const id = asString(tweet.id_str ?? tweet.id);
138+
if (!id) continue;
139+
if (seenTweetIds.has(id)) continue;
140+
seenTweetIds.add(id);
141+
writeStream.write(JSON.stringify(tweet) + "\n");
142+
wroteThisPage += 1;
143+
}
144+
totalWritten += wroteThisPage;
145+
146+
const hasNext = Boolean(response.next_cursor);
147+
console.log(
148+
`page=${page} fetched=${tweets.length} wrote=${wroteThisPage} next_cursor=${hasNext}`
149+
);
150+
151+
if (!hasNext) break;
152+
if (maxPages > 0 && page >= maxPages) break;
153+
154+
cursor = response.next_cursor ?? undefined;
155+
}
156+
} finally {
157+
writeStream.end();
158+
}
159+
160+
console.log(
161+
`Done. pages=${page} fetched=${totalFetched} unique_written=${totalWritten} file=${outPath}`
162+
);
163+
}
164+
165+
main().catch((err) => {
166+
console.error(err);
167+
process.exit(1);
168+
});

0 commit comments

Comments
 (0)