diff --git a/helm-frontend/src/components/Landing/CallCenterLanding.tsx b/helm-frontend/src/components/Landing/CallCenterLanding.tsx deleted file mode 100644 index f00d5c8116..0000000000 --- a/helm-frontend/src/components/Landing/CallCenterLanding.tsx +++ /dev/null @@ -1,58 +0,0 @@ -import MiniLeaderboard from "@/components/MiniLeaderboard"; -import { Link } from "react-router-dom"; - -export default function CallCenterLanding() { - return ( -
- LLMs show great potential for applications for the call center, yet - there is a lack of domain-specific and ecologically-valid - evaluations in this domain. To address this, we introduce the{" "} - HELM Call Center leaderboard. - The HELM Call Center leaderboard evaluates leading LLMs on a - summarization task over a dataset of real helpdesk call transcripts - provided by Accenture. The quality of the summaries is evaluated - using LLM-as-judge with an ensemble of 3 models. We hope that this - leaderboard provides some initial insights into the potential of - LLMs in this domain. -
-- This leaderboard was produced through research collaboration with{" "} - - Accenture - - , and was funded by the{" "} - - HAI Corporate Affiliate Program - - . -
-
- - Large language models (LLMs) show great potential for call center - applications, yet there is a lack of domain-specific and - ecologically valid evaluations in this domain. To address this, we - introduce the{" "} - - HELM Call Transcript Summarization - {" "} - leaderboard, which evaluates leading LLMs on a summarization task - over a dataset of real call transcripts provided by Accenture. -
-- This dataset consists of 162 transcribed calls to an internal - corporate IT helpdesk. The calls were transcribed using an automatic - speech recognition (ASR) model. Transcription errors were - deliberately left uncorrected to reflect the nature of real-life - transcripts. The transcripts were anonymized using a semi-automated - process with human verification. -
-- To evaluate the LLMs, summaries of the transcripts were generated - using 17 LLMs. The quality of the generated summaries were then - evaluated using LLM-as-judge with an ensemble of 3 models. -
-- As with all HELM leaderboards, this leaderboard provides full - transparency into all LLM requests and responses, and the results - are reproducible using the HELM open source framework. We hope that - this leaderboard offers initial insights into the potential of LLMs - for this task. -
-- This leaderboard was produced through research collaboration with{" "} - - Accenture - - , and was funded by the{" "} - - HAI Corporate Affiliate Program - - . -
-