Skip to content

Commit 9f134d2

Browse files
committed
Refactor Slurm API data fetching to use fetchSlurmData for improved error handling and logging
1 parent 49860a0 commit 9f134d2

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

app/api/prometheus/ipmi/route.ts

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { PrometheusQueryResponse } from "@/types/types";
22
import { NextResponse } from "next/server";
33
import { PrometheusDriver } from "prometheus-query";
44
import { env } from "process";
5+
import { fetchSlurmData } from "@/lib/slurm-api";
56

67
export const revalidate = 0;
78
const PROMETHEUS_URL = env.PROMETHEUS_URL;
@@ -38,26 +39,26 @@ async function getClusterNodes(): Promise<string[]> {
3839
}
3940

4041
try {
41-
// Fetch node information from Slurm API
42-
const baseURL = env.NEXT_PUBLIC_BASE_URL || "http://localhost:3000";
43-
const response = await fetch(`${baseURL}/api/slurm/nodes`);
42+
// Fetch node information directly from Slurm API
43+
const { data, error } = await fetchSlurmData('/nodes');
4444

45-
if (!response.ok) {
46-
throw new Error(`Failed to fetch nodes: ${response.statusText}`);
45+
if (error) {
46+
console.error(`Failed to fetch nodes from Slurm: ${error}`);
47+
return clusterNodesCache.nodes; // Return stale cache if available
4748
}
4849

49-
const data = await response.json();
50-
51-
if (!data.nodes || !Array.isArray(data.nodes)) {
50+
if (!data?.nodes || !Array.isArray(data.nodes)) {
5251
console.warn("Invalid nodes data format from Slurm API");
53-
return [];
52+
return clusterNodesCache.nodes; // Return stale cache if available
5453
}
5554

5655
// Extract node names
5756
const nodeNames = data.nodes
5857
.map((node: any) => node.name || null)
5958
.filter(Boolean);
6059

60+
console.log(`Fetched ${nodeNames.length} nodes from Slurm API`);
61+
6162
// Update cache
6263
clusterNodesCache = {
6364
timestamp: now,
@@ -67,7 +68,7 @@ async function getClusterNodes(): Promise<string[]> {
6768
return nodeNames;
6869
} catch (error) {
6970
console.error("Error fetching cluster nodes:", error);
70-
return [];
71+
return clusterNodesCache.nodes; // Return stale cache if available
7172
}
7273
}
7374

app/api/reporting/gpu/route.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import { NextResponse } from "next/server";
33
import { PrometheusDriver } from "prometheus-query";
44
import { env } from "process";
5+
import { fetchSlurmData } from "@/lib/slurm-api";
56

67
const PROMETHEUS_URL = env.PROMETHEUS_URL;
78
const STALE_JOB_THRESHOLD_SECONDS = 30; // Consider a job stale if no metrics in the last 30 seconds
@@ -119,14 +120,14 @@ const checkJobFreshness = async (jobId: string): Promise<boolean> => {
119120
// Get the actual running jobs from Slurm
120121
const getRunningJobsFromSlurm = async (): Promise<Set<string>> => {
121122
try {
122-
const baseURL = env.NEXT_PUBLIC_BASE_URL || "http://localhost:3000";
123-
const response = await fetch(`${baseURL}/api/slurm/jobs`);
123+
// Fetch job information directly from Slurm API
124+
const { data, error } = await fetchSlurmData('/jobs');
124125

125-
if (!response.ok) {
126-
throw new Error(`Failed to fetch jobs: ${response.statusText}`);
126+
if (error) {
127+
console.error(`Failed to fetch jobs from Slurm: ${error}`);
128+
return new Set<string>();
127129
}
128130

129-
const data = await response.json();
130131
const runningJobs = new Set<string>();
131132

132133
if (data && data.jobs && Array.isArray(data.jobs)) {
@@ -138,6 +139,7 @@ const getRunningJobsFromSlurm = async (): Promise<Set<string>> => {
138139
});
139140
}
140141

142+
console.log(`Fetched ${runningJobs.size} running jobs from Slurm API`);
141143
return runningJobs;
142144
} catch (error) {
143145
console.error("Error fetching running jobs from Slurm:", error);

0 commit comments

Comments
 (0)