diff --git a/torchci/clickhouse_queries/oss_ci_util/oss_ci_list_util_metadata_info/query.sql b/torchci/clickhouse_queries/oss_ci_util/oss_ci_list_util_metadata_info/query.sql index 2b873c528c..948206cb64 100644 --- a/torchci/clickhouse_queries/oss_ci_util/oss_ci_list_util_metadata_info/query.sql +++ b/torchci/clickhouse_queries/oss_ci_util/oss_ci_list_util_metadata_info/query.sql @@ -5,8 +5,7 @@ SELECT job_name, run_attempt, repo -FROM - misc.oss_ci_utilization_metadata +FROM misc.oss_ci_utilization_metadata WHERE - workflow_id = { workflowId: UInt64} - AND repo = {repo: String } + workflow_id IN {workflowIds: Array(UInt64)} + AND repo IN {repos: Array(String)} diff --git a/torchci/components/additionalTestInfo/TestInfo.tsx b/torchci/components/additionalTestInfo/TestInfo.tsx index 4eef142c06..3dea63fe70 100644 --- a/torchci/components/additionalTestInfo/TestInfo.tsx +++ b/torchci/components/additionalTestInfo/TestInfo.tsx @@ -41,6 +41,10 @@ export function isPending(jobs: JobData[]) { return jobs.some((job) => IsJobInProgress(job.conclusion)); } +export function isPendingJob(job: JobData) { + return IsJobInProgress(job.conclusion); +} + export function RecursiveDetailsSummary({ info, level, diff --git a/torchci/components/commit/WorkflowBox.tsx b/torchci/components/commit/WorkflowBox.tsx index 5ed9135185..51c8ad6895 100644 --- a/torchci/components/commit/WorkflowBox.tsx +++ b/torchci/components/commit/WorkflowBox.tsx @@ -1,5 +1,5 @@ import { Button, Stack, styled, Tooltip, Typography } from "@mui/material"; -import { isPending, TestInfo } from "components/additionalTestInfo/TestInfo"; +import { isPending, isPendingJob, TestInfo } from "components/additionalTestInfo/TestInfo"; import styles from "components/commit/commit.module.css"; import LogViewer, { SearchLogViewer } from "components/common/log/LogViewer"; import { durationDisplay } from "components/common/TimeUtils"; @@ -44,14 +44,12 @@ const JobButton = styled(Button)({ }); function WorkflowJobSummary({ job, - utilMetadata, artifacts, artifactsToShow, setArtifactsToShow, unstableIssues, }: { job: JobData; - utilMetadata?: UtilizationMetadataInfo[]; artifacts?: Artifact[]; artifactsToShow: Set; setArtifactsToShow: any; @@ -97,18 +95,13 @@ function WorkflowJobSummary({ ); } - if (utilMetadata && utilMetadata.length > 0) { - if (utilMetadata.length > 1) { - console.log( - `Multiple util metadata found for job ${job.id}, currently only showing the first one` - ); - } - const m = utilMetadata[0]; + if (job.id && !isPendingJob(job)) { + const m = job; subInfo.push( <> job.workflowId)); const [artifactsToShow, setArtifactsToShow] = useState(new Set()); const groupedArtifacts = groupArtifacts(jobs, artifacts); @@ -305,11 +294,6 @@ export default function WorkflowBox({
( - `/api/list_utilization_metadata_info/${workflowId}`, + `/api/list_utilization_metadata_info/workflows? parameters={repo:"pytorch/pytorch", workflow_ids:${workflowIds}}`, fetcher ); diff --git a/torchci/components/utilization/WorkflowUtilizationPage/WorkflowUtilizationPage.tsx b/torchci/components/utilization/WorkflowUtilizationPage/WorkflowUtilizationPage.tsx index 2a9e34c914..62b54b6b14 100644 --- a/torchci/components/utilization/WorkflowUtilizationPage/WorkflowUtilizationPage.tsx +++ b/torchci/components/utilization/WorkflowUtilizationPage/WorkflowUtilizationPage.tsx @@ -138,7 +138,7 @@ function useUtilMetadata(workflowId: string | undefined): { metaError: any; } { const { data, error } = useSWR( - `/api/list_utilization_metadata_info/${workflowId}?includes_stats=true`, + `/api/list_utilization_metadata_info/workflow/${workflowId}?includes_stats=true`, fetcher, { refreshInterval: 20 * 60 * 1000, // refresh every 20 minuts diff --git a/torchci/lib/utilization/fetchListUtilizationMetadataInfo.ts b/torchci/lib/utilization/fetchListUtilizationMetadataInfo.ts index 2feac8c8b1..a929a006b8 100644 --- a/torchci/lib/utilization/fetchListUtilizationMetadataInfo.ts +++ b/torchci/lib/utilization/fetchListUtilizationMetadataInfo.ts @@ -12,7 +12,7 @@ const LIST_UTIL_METADATA_INFO_QUERY_FOLDER_NAME = const LIST_UTIL_METADATA_WITH_STATS_QUERY = "oss_ci_util/oss_ci_list_util_stats"; -export default async function fetchListUtilizationMetadataInfo( +export async function fetchListUtilizationMetadataInfoForSingleWorkflow( params: ListUtilizationMetadataInfoParams ): Promise { let meta_resp = null; @@ -29,8 +29,8 @@ export default async function fetchListUtilizationMetadataInfo( `[api][list_utilization_metadata_info][${params.workflow_id}]list util metadata without runtime aggregated stats` ); meta_resp = await listUtilizationMetadataInfo( - params.workflow_id, - params.repo + [params.workflow_id], + params.repo? [params.repo] : [UTILIZATION_DEFAULT_REPO] ); } @@ -45,15 +45,15 @@ export default async function fetchListUtilizationMetadataInfo( }; } -async function listUtilizationMetadataInfo( - workflow_id: string, - repo: string = UTILIZATION_DEFAULT_REPO +export async function listUtilizationMetadataInfo( + workflow_ids: string[], + repos: string[] = [UTILIZATION_DEFAULT_REPO] ) { const response = await queryClickhouseSaved( LIST_UTIL_METADATA_INFO_QUERY_FOLDER_NAME, { - workflowId: workflow_id, - repo: repo, + workflowIds: workflow_ids, + repo: repos, } ); return response; @@ -79,6 +79,7 @@ async function listUtilizationMetadataWithStats( } return res; } + function toMetadata(metadata: any) { const data: UtilizationMetadataInfo = { workflow_id: metadata.workflow_id, @@ -90,6 +91,7 @@ function toMetadata(metadata: any) { }; return data; } + function toUtilizationStats(metadata: any) { const stats: UtilizationAggreStats = { cpu_max: metadata.cpu_max, diff --git a/torchci/lib/utilization/types.ts b/torchci/lib/utilization/types.ts index fb227d2b38..42c3a86d27 100644 --- a/torchci/lib/utilization/types.ts +++ b/torchci/lib/utilization/types.ts @@ -7,6 +7,11 @@ export const EMPTY_LIST_UTILIZATION_METADATA_INFO_API_RESPONSE: ListUtilizationM metadata_list: [], }; + + export const EMPTY_LIST_WORKFLOWS_UTILIZATION_METADATA_INFO_API_RESPONSE: ListWorkflowsUtilizationMetadataInfoAPIResponse = + { + metadata_map: {}, + }; export interface UtilizationParams { workflow_id: string; job_id: string; @@ -94,6 +99,13 @@ export interface UtilizationAggreStats { gpu_allocated_memory_p90?: number; } + +export interface ListWorkflowsUtilizationMetadataInfoAPIResponse { + metadata_map:{ + [key: number]: UtilizationMetadataInfo[]; + } +} + /** * The response of the API call to list utilization metadata info. * @param metadata_list The list of utilization metadata info. diff --git a/torchci/pages/api/list_utilization_metadata_info/[workflowId].ts b/torchci/pages/api/list_utilization_metadata_info/workflow/[workflowId].ts similarity index 84% rename from torchci/pages/api/list_utilization_metadata_info/[workflowId].ts rename to torchci/pages/api/list_utilization_metadata_info/workflow/[workflowId].ts index 1bdde0af32..c0e4a0cbd8 100644 --- a/torchci/pages/api/list_utilization_metadata_info/[workflowId].ts +++ b/torchci/pages/api/list_utilization_metadata_info/workflow/[workflowId].ts @@ -1,5 +1,5 @@ import { getErrorMessage } from "lib/error_utils"; -import fetchListUtilizationMetadataInfo from "lib/utilization/fetchListUtilizationMetadataInfo"; +import { fetchListUtilizationMetadataInfoForSingleWorkflow } from "lib/utilization/fetchListUtilizationMetadataInfo"; import { EMPTY_LIST_UTILIZATION_METADATA_INFO_API_RESPONSE, ListUtilizationMetadataInfoParams, @@ -26,7 +26,7 @@ export default async function handler( }; try { - const resp = await fetchListUtilizationMetadataInfo(params); + const resp = await fetchListUtilizationMetadataInfoForSingleWorkflow(params); if (!resp) { return res .status(200) diff --git a/torchci/pages/api/list_utilization_metadata_info/workflows.ts b/torchci/pages/api/list_utilization_metadata_info/workflows.ts new file mode 100644 index 0000000000..7a4a302970 --- /dev/null +++ b/torchci/pages/api/list_utilization_metadata_info/workflows.ts @@ -0,0 +1,85 @@ +import { readApiGetParams } from "lib/benchmark/api_helper/backend/common/utils"; +import { getErrorMessage } from "lib/error_utils"; +import { listUtilizationMetadataInfo } from "lib/utilization/fetchListUtilizationMetadataInfo"; +import { EMPTY_LIST_WORKFLOWS_UTILIZATION_METADATA_INFO_API_RESPONSE } from "lib/utilization/types"; +import { NextApiRequest, NextApiResponse } from "next"; + +/** + * API Route: /api/list_utilization_metadata/workflows + * Fetch benchmark time series data (e.g., compiler performance). + * currently only support compiler_precompute + * + * Supported Methods: + * - GET : Pass parameters via query string + * Example: + * /api/list_utilization_metadata/workflows?parameters={repo:"pytorch/pytorch", workflow_ids:["f1234567890"]} + * - POST : Pass parameters in JSON body + * Example: + * { + * + * repo: string, + * workflow_ids: Array, + * } + **/ +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "GET" && req.method !== "POST") { + res.setHeader("Allow", "GET, POST"); + return res.status(405).json({ error: "Only GET and POST allowed" }); + } + const params = readApiGetParams(req); + console.log("[API]list_utilization_metadata_info/workflows, received request:", params); + + // validate params + if ( + !params || + !params.query_params || + Object.keys(params).length == 0) { + return res.status(400).json({ error: "Missing parameters" }); + } + + if (!params.workflow_ids || params.workflow_ids.length == 0 || !params.repo) { + return res.status(400).json({ error: "Missing required parameters" }); + } + + try { + const resp = await fetchListUtilizationMetadataInfoForWorkflows(params); + if (!resp) { + return res + .status(200) + .json(EMPTY_LIST_WORKFLOWS_UTILIZATION_METADATA_INFO_API_RESPONSE); + } + return res.status(200).json(resp); + } catch (error) { + const err_msg = getErrorMessage(error); + console.error("[API]list_utilization_metadata_info/workflows, error: ", err_msg) + return res.status(500).json({ error: err_msg }); + } +} + + + +export async function fetchListUtilizationMetadataInfoForWorkflows( + params: any +): Promise { + let workflowIds = [] + if (params.workflow_id){ + workflowIds = [params.workflow_id] + } else if (params.workflow_ids){ + workflowIds = params.workflow_ids + } + let repos = [params.repo] + const meta_resp = await listUtilizationMetadataInfo( + workflowIds, + repos + ); + + if (!meta_resp || meta_resp.length == 0) { + return EMPTY_LIST_WORKFLOWS_UTILIZATION_METADATA_INFO_API_RESPONSE; + } + return { + metadata_map: + }; +} diff --git a/torchci/pages/utilization/[workflowId]/[jobId]/[attempt]/[[...page]].tsx b/torchci/pages/utilization/[workflowId]/[jobId]/[attempt]/[[...page]].tsx index 81b57c0e16..f25a07d3ea 100644 --- a/torchci/pages/utilization/[workflowId]/[jobId]/[attempt]/[[...page]].tsx +++ b/torchci/pages/utilization/[workflowId]/[jobId]/[attempt]/[[...page]].tsx @@ -9,8 +9,10 @@ const JobUtilization = () => { const router = useRouter(); const { workflowId, jobId, attempt } = router.query; + let shouldFetch = workflowId && jobId; + let { data, error } = useSWRImmutable( - `/api/utilization/${workflowId}/${jobId}/${attempt}`, + shouldFetch ? `/api/utilization/${workflowId}/${jobId}/${attempt}` : null, fetcherHandleError, { errorRetryCount: 3,