|
| 1 | +#include <catch2/catch_test_macros.hpp> |
| 2 | +#include <chrono> |
| 3 | +#include <cstdlib> |
| 4 | +#include <iostream> |
| 5 | +#include <string> |
| 6 | + |
| 7 | +#include "openai/OpenAIClient.h" |
| 8 | +#include "openai/OpenAITypes.h" |
| 9 | + |
| 10 | +using namespace std::chrono; |
| 11 | + |
| 12 | +static bool isReasoningModel(OpenAI::Model model) { |
| 13 | + return model == OpenAI::Model::GPT_5 || model == OpenAI::Model::GPT_5_Mini || |
| 14 | + model == OpenAI::Model::GPT_5_Nano || model == OpenAI::Model::O3 || |
| 15 | + model == OpenAI::Model::O3_Mini || model == OpenAI::Model::O1 || |
| 16 | + model == OpenAI::Model::O1_Mini || model == OpenAI::Model::O1_Preview || |
| 17 | + model == OpenAI::Model::O1_Pro || model == OpenAI::Model::O4_Mini; |
| 18 | +} |
| 19 | + |
| 20 | +static bool isExcludedModel(const std::string &modelName) { |
| 21 | + return modelName == "gpt-image-1" || modelName == "computer-use-preview"; |
| 22 | +} |
| 23 | + |
| 24 | +static bool isGpt5Family(OpenAI::Model model) { |
| 25 | + return model == OpenAI::Model::GPT_5 || model == OpenAI::Model::GPT_5_Mini || |
| 26 | + model == OpenAI::Model::GPT_5_Nano; |
| 27 | +} |
| 28 | + |
| 29 | +TEST_CASE("OpenAI model benchmarks (structured outputs)", "[openai][integration][benchmark]") { |
| 30 | + const char* runBenchEnv = std::getenv("LLMCPP_RUN_BENCHMARKS"); |
| 31 | + if (!runBenchEnv || std::string(runBenchEnv) != "1") { |
| 32 | + SUCCEED("Benchmarks skipped. Set LLMCPP_RUN_BENCHMARKS=1 to enable."); |
| 33 | + return; |
| 34 | + } |
| 35 | + |
| 36 | + const char* apiKey = std::getenv("OPENAI_API_KEY"); |
| 37 | + REQUIRE(apiKey != nullptr); |
| 38 | + |
| 39 | + OpenAIClient client(apiKey); |
| 40 | + |
| 41 | + // Minimal structured output schema |
| 42 | + json schema = {{"type", "object"}, |
| 43 | + {"properties", {{"answer", {{"type", "string"}}}}}, |
| 44 | + {"required", json::array({"answer"})}, |
| 45 | + {"additionalProperties", false}}; |
| 46 | + |
| 47 | + // Simple input aligned with structured output requirement |
| 48 | + auto input = OpenAI::ResponsesInput::fromText( |
| 49 | + "Return a JSON object that conforms to the provided schema with answer set to 'OK'."); |
| 50 | + |
| 51 | + // Iterate through response-capable models |
| 52 | + for (const auto& modelName : OpenAI::RESPONSES_MODELS) { |
| 53 | + if (isExcludedModel(modelName)) { |
| 54 | + std::cout << "[BENCH] skipping model=" << modelName << " (not supported for JSON schema bench)" << std::endl; |
| 55 | + continue; |
| 56 | + } |
| 57 | + DYNAMIC_SECTION("Benchmark model: " << modelName) { |
| 58 | + OpenAI::ResponsesRequest req; |
| 59 | + req.model = modelName; |
| 60 | + req.input = input; |
| 61 | + req.text = OpenAI::TextOutputConfig("bench_schema", schema, true); |
| 62 | + // Do not set max_output_tokens at all; let the server decide for all models |
| 63 | + auto modelEnum = OpenAI::modelFromString(modelName); |
| 64 | + |
| 65 | + // Tweak reasoning parameters when appropriate |
| 66 | + if (isReasoningModel(modelEnum)) { |
| 67 | + req.reasoning = json{{"effort", "low"}}; |
| 68 | + } |
| 69 | + |
| 70 | + const auto start = steady_clock::now(); |
| 71 | + auto response = client.sendResponsesRequest(req); |
| 72 | + const auto end = steady_clock::now(); |
| 73 | + |
| 74 | + const auto elapsedMs = duration_cast<milliseconds>(end - start).count(); |
| 75 | + const bool ok = (response.isCompleted() && !response.hasError()); |
| 76 | + int inTok = response.usage.inputTokens; |
| 77 | + int outTok = response.usage.outputTokens; |
| 78 | + int totalTok = inTok + outTok; |
| 79 | + std::cout << modelName << "," << elapsedMs << "," << (ok ? "ok" : "fail") |
| 80 | + << "," << inTok << "," << outTok << "," << totalTok << std::endl; |
| 81 | + |
| 82 | + // Sanity: we should at least get a response object back; don't assert success to avoid |
| 83 | + // flakes |
| 84 | + REQUIRE(!response.id.empty()); |
| 85 | + } |
| 86 | + } |
| 87 | +} |
0 commit comments