Skip to content

Commit 8f7c976

Browse files
committed
redpanda on uptime
1 parent db99c1e commit 8f7c976

File tree

5 files changed

+262
-121
lines changed

5 files changed

+262
-121
lines changed

apps/uptime/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"@opentelemetry/sdk-trace-node": "^2.2.0",
1616
"@opentelemetry/semantic-conventions": "^1.38.0",
1717
"@upstash/qstash": "^2.8.4",
18-
"elysia": "^1.4.18"
18+
"elysia": "^1.4.18",
19+
"kafkajs": "^2.2.4"
1920
}
2021
}

apps/uptime/src/actions.ts

Lines changed: 96 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { createHash } from "node:crypto";
22
import { connect } from "node:tls";
3-
import { chQuery } from "@databuddy/db";
3+
// import { chQuery } from "@databuddy/db";
44
import { websiteService } from "@databuddy/services/websites";
55
import { captureError, record } from "./lib/tracing";
66
import type { ActionResult, UptimeData } from "./types";
@@ -35,11 +35,11 @@ type FetchFailure = {
3535
error: string;
3636
};
3737

38-
type Heartbeat = {
39-
status: number;
40-
retries: number;
41-
streak: number;
42-
};
38+
// type Heartbeat = {
39+
// status: number;
40+
// retries: number;
41+
// streak: number;
42+
// };
4343

4444
export function lookupWebsite(
4545
id: string
@@ -237,80 +237,86 @@ function getProbeMetadata(): Promise<{ ip: string; region: string }> {
237237
});
238238
}
239239

240-
function getLastHeartbeat(siteId: string): Promise<Heartbeat | null> {
241-
return record("uptime.get_last_heartbeat", async () => {
242-
try {
243-
const rows = await chQuery<{
244-
status: number;
245-
retries: number;
246-
failure_streak: number;
247-
}>(
248-
`
249-
SELECT status, retries, failure_streak
250-
FROM uptime.uptime_monitor
251-
WHERE site_id = {siteId:String}
252-
ORDER BY timestamp DESC
253-
LIMIT 1
254-
`,
255-
{ siteId }
256-
);
257-
258-
if (!rows || rows.length === 0) {
259-
return null;
260-
}
261-
262-
return {
263-
status: rows[0].status,
264-
retries: rows[0].retries,
265-
streak: rows[0].failure_streak,
266-
};
267-
} catch (error) {
268-
console.error("Failed to fetch last heartbeat:", error);
269-
return null;
270-
}
271-
});
272-
}
240+
// function getLastHeartbeat(siteId: string): Promise<Heartbeat | null> {
241+
// return record("uptime.get_last_heartbeat", async () => {
242+
// try {
243+
// const rows = await chQuery<{
244+
// status: number;
245+
// retries: number;
246+
// failure_streak: number;
247+
// }>(
248+
// `
249+
// SELECT status, retries, failure_streak
250+
// FROM uptime.uptime_monitor
251+
// WHERE site_id = {siteId:String}
252+
// ORDER BY timestamp DESC
253+
// LIMIT 1
254+
// `,
255+
// { siteId }
256+
// );
257+
258+
// if (!rows || rows.length === 0) {
259+
// return null;
260+
// }
261+
262+
// return {
263+
// status: rows[0].status,
264+
// retries: rows[0].retries,
265+
// streak: rows[0].failure_streak,
266+
// };
267+
// } catch (error) {
268+
// console.error("Failed to fetch last heartbeat:", error);
269+
// return null;
270+
// }
271+
// });
272+
// }
273273

274274
// the retry logic - this prevents false alarms when a site has a temporary hiccup
275-
function calculateStatus(
276-
isUp: boolean,
277-
last: Heartbeat | null,
278-
maxRetries: number
279-
): { status: number; retries: number; streak: number } {
275+
// function calculateStatus(
276+
// isUp: boolean,
277+
// last: Heartbeat | null,
278+
// maxRetries: number
279+
// ): { status: number; retries: number; streak: number } {
280+
// const { UP, DOWN } = MonitorStatus;
281+
// // const { UP, DOWN, PENDING } = MonitorStatus;
282+
283+
// // first time checking this site
284+
// if (!last) {
285+
// // if (!isUp && maxRetries > 0) {
286+
// // return { status: PENDING, retries: 1, streak: 0 };
287+
// // }
288+
// return { status: isUp ? UP : DOWN, retries: 0, streak: isUp ? 0 : 1 };
289+
// }
290+
291+
// // site was up, now it's down
292+
// if (last.status === UP && !isUp) {
293+
// // if (maxRetries > 0 && last.retries < maxRetries) {
294+
// // return {
295+
// // status: PENDING,
296+
// // retries: last.retries + 1,
297+
// // streak: last.streak,
298+
// // };
299+
// // }
300+
// return { status: DOWN, retries: 0, streak: last.streak + 1 };
301+
// }
302+
303+
// // still pending, still down
304+
// // if (last.status === PENDING && !isUp && last.retries < maxRetries) {
305+
// // return { status: PENDING, retries: last.retries + 1, streak: last.streak };
306+
// // }
307+
308+
// // confirmed down or recovered
309+
// if (!isUp) {
310+
// return { status: DOWN, retries: 0, streak: last.streak + 1 };
311+
// }
312+
313+
// return { status: UP, retries: 0, streak: 0 };
314+
// }
315+
316+
// simplified status calculation - just UP or DOWN based on current check
317+
function calculateStatus(isUp: boolean): { status: number; retries: number; streak: number } {
280318
const { UP, DOWN } = MonitorStatus;
281-
// const { UP, DOWN, PENDING } = MonitorStatus;
282-
283-
// first time checking this site
284-
if (!last) {
285-
// if (!isUp && maxRetries > 0) {
286-
// return { status: PENDING, retries: 1, streak: 0 };
287-
// }
288-
return { status: isUp ? UP : DOWN, retries: 0, streak: isUp ? 0 : 1 };
289-
}
290-
291-
// site was up, now it's down
292-
if (last.status === UP && !isUp) {
293-
// if (maxRetries > 0 && last.retries < maxRetries) {
294-
// return {
295-
// status: PENDING,
296-
// retries: last.retries + 1,
297-
// streak: last.streak,
298-
// };
299-
// }
300-
return { status: DOWN, retries: 0, streak: last.streak + 1 };
301-
}
302-
303-
// still pending, still down
304-
// if (last.status === PENDING && !isUp && last.retries < maxRetries) {
305-
// return { status: PENDING, retries: last.retries + 1, streak: last.streak };
306-
// }
307-
308-
// confirmed down or recovered
309-
if (!isUp) {
310-
return { status: DOWN, retries: 0, streak: last.streak + 1 };
311-
}
312-
313-
return { status: UP, retries: 0, streak: 0 };
319+
return { status: isUp ? UP : DOWN, retries: 0, streak: 0 };
314320
}
315321

316322
export function checkUptime(
@@ -325,17 +331,22 @@ export function checkUptime(
325331
const timestamp = Date.now();
326332

327333
// gather all the data we need in parallel
328-
const [pingResult, lastBeat, probe] = await Promise.all([
334+
const [pingResult, probe] = await Promise.all([
329335
pingWebsite(normalizedUrl),
330-
getLastHeartbeat(siteId),
331336
getProbeMetadata(),
332337
]);
333-
334-
const { status, retries, streak } = calculateStatus(
335-
pingResult.ok,
336-
lastBeat,
337-
maxRetries
338-
);
338+
// const [pingResult, lastBeat, probe] = await Promise.all([
339+
// pingWebsite(normalizedUrl),
340+
// getLastHeartbeat(siteId),
341+
// getProbeMetadata(),
342+
// ]);
343+
344+
const { status, retries, streak } = calculateStatus(pingResult.ok);
345+
// const { status, retries, streak } = calculateStatus(
346+
// pingResult.ok,
347+
// lastBeat,
348+
// maxRetries
349+
// );
339350

340351
// site is down - minimal data
341352
if (!pingResult.ok) {

apps/uptime/src/index.ts

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import { clickHouse, formatClickhouseDate } from "@databuddy/db";
21
import { Receiver } from "@upstash/qstash";
32
import Elysia from "elysia";
43
import { checkUptime, lookupWebsite } from "./actions";
4+
import { sendUptimeEvent } from "./lib/producer";
55
import {
66
captureError,
77
endRequestSpan,
@@ -125,43 +125,15 @@ const app = new Elysia()
125125

126126
const { data } = result;
127127

128-
// TO-DO: migrate this to use redpanda & vector instead of clickhouse.
128+
// Send event to Redpanda for ingestion via Vector
129129
try {
130-
await clickHouse.insert({
131-
table: "uptime.uptime_monitor",
132-
values: [
133-
{
134-
site_id: data.site_id,
135-
url: data.url,
136-
timestamp: formatClickhouseDate(new Date(data.timestamp)),
137-
status: data.status,
138-
http_code: data.http_code,
139-
ttfb_ms: data.ttfb_ms,
140-
total_ms: data.total_ms,
141-
attempt: data.attempt,
142-
retries: data.retries,
143-
failure_streak: data.failure_streak,
144-
response_bytes: data.response_bytes,
145-
content_hash: data.content_hash,
146-
redirect_count: data.redirect_count,
147-
probe_region: data.probe_region,
148-
probe_ip: data.probe_ip,
149-
ssl_expiry: data.ssl_expiry
150-
? formatClickhouseDate(new Date(data.ssl_expiry))
151-
: null,
152-
ssl_valid: data.ssl_valid,
153-
env: data.env,
154-
check_type: data.check_type,
155-
user_agent: data.user_agent,
156-
error: data.error,
157-
},
158-
],
159-
format: "JSONEachRow",
160-
});
130+
await sendUptimeEvent(data, data.site_id);
161131
} catch (error) {
162-
console.error("Failed to store uptime data in ClickHouse:", error);
163-
// continue execution even if clickhouse insert fails
132+
console.error("Failed to send uptime event to Redpanda:", error);
133+
captureError(error);
134+
// continue execution even if redpanda send fails
164135
}
136+
165137
return new Response("Uptime check complete", { status: 200 });
166138
} catch (error) {
167139
captureError(error);

0 commit comments

Comments
 (0)