Merge pull request #45 from lucaromagnoli/ci/security-workflow

lucaromagnoli · web-flow · commit 9505901cf9f8 · 2025-08-08T22:11:30.000+01:00
Ci/security workflow
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
@@ -0,0 +1,90 @@
+name: Security
+
+on:
+  push:
+    branches: ["**"]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+      - 'LICENSE'
+  schedule:
+    - cron: '0 3 * * 1'  # Weekly, Mondays at 03:00 UTC
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  security-events: write
+  actions: read
+  pull-requests: read
+
+jobs:
+  secret-file-guard:
+    name: Secret file guard
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Fail if private key-like files present
+        run: |
+          echo "Scanning workspace for private key material (*.pem, *.p12, *.pfx, *.key)..."
+          if find . -type f \( -name "*.pem" -o -name "*.p12" -o -name "*.pfx" -o -name "*.key" \) | grep -q .; then
+            echo "❌ Private key-like files found in workspace."
+            find . -type f \( -name "*.pem" -o -name "*.p12" -o -name "*.pfx" -o -name "*.key" \) -print
+            exit 1
+          fi
+          echo "✅ No private key files detected."
+
+  codeql:
+    name: CodeQL (C/C++)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v3
+        with:
+          languages: cpp
+
+      - name: Configure build
+        run: |
+          cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLMCPP_BUILD_TESTS=OFF
+
+      - name: Build
+        run: cmake --build build --parallel
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v3
+
+  gitleaks:
+    name: Gitleaks (Secret scanning)
+    continue-on-error: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install gitleaks
+        run: |
+          set -euo pipefail
+          VERSION=8.24.3
+          curl -sSL -o gitleaks.tgz "https://github.com/gitleaks/gitleaks/releases/download/v${VERSION}/gitleaks_${VERSION}_linux_x64.tar.gz"
+          tar -xzf gitleaks.tgz gitleaks
+          sudo mv gitleaks /usr/local/bin/gitleaks
+          gitleaks version
+      - name: Run gitleaks (generate SARIF)
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          gitleaks detect --source . --redact --report-format sarif --report-path gitleaks.sarif
+      - name: Upload SARIF to code scanning
+        uses: github/codeql-action/upload-sarif@v3
+        if: always() && hashFiles('gitleaks.sarif') != ''
+        with:
+          sarif_file: gitleaks.sarif
+
+  # Dependency Review is PR-only by design; omit to avoid duplicate PR checks
+
+
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -34,6 +34,7 @@ set(INTEGRATION_TEST_SOURCES
 add_executable(llmcpp_tests
     ${UNIT_TEST_SOURCES}
     ${INTEGRATION_TEST_SOURCES}
+    bench/benchmark_core.cpp
 )
 
 # Link against the library and test framework
diff --git a/tests/bench/benchmark_core.cpp b/tests/bench/benchmark_core.cpp
@@ -0,0 +1,75 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/benchmark/catch_benchmark.hpp>
+
+#include "openai/OpenAITypes.h"
+
+using namespace OpenAI;
+
+// Simple helper to build a representative ResponsesRequest
+static ResponsesRequest makeRequest() {
+    ResponsesRequest r;
+    r.model = "gpt-4o";
+    r.instructions = "Answer briefly.";
+    r.maxOutputTokens = 128;
+    r.toolChoice = ToolChoiceMode::Auto;
+
+    // Add a small JSON schema
+    json schema = json::parse(R"({
+        "type":"object",
+        "properties":{"answer":{"type":"string"}},
+        "required":["answer"]
+    })");
+    r.text = TextOutputConfig("answer_schema", schema, true);
+
+    // Add a couple of input messages
+    std::vector<InputMessage> messages;
+    InputMessage sys;
+    sys.role = InputMessage::Role::System;
+    sys.content = "You are helpful.";
+    messages.push_back(sys);
+
+    InputMessage usr;
+    usr.role = InputMessage::Role::User;
+    usr.content = "Hello";
+    messages.push_back(usr);
+
+    r.input = ResponsesInput::fromContentList(messages);
+    return r;
+}
+
+TEST_CASE("Benchmark: ResponsesRequest serialization", "[benchmark]") {
+    auto req = makeRequest();
+    BENCHMARK("toJson serialize") {
+        return req.toJson();
+    };
+}
+
+TEST_CASE("Benchmark: ResponsesResponse parsing", "[benchmark]") {
+    // Minimal example response JSON
+    auto sample = json::parse(R"({
+        "id": "resp_123",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": "gpt-4o-mini-2024-07-18",
+        "usage": {"input_tokens": 10, "output_tokens": 5},
+        "output": [
+          {"type":"message","id":"msg_1","role":"assistant","content":[{"type":"output_text","text":"Hi"}]}
+        ]
+    })");
+
+    BENCHMARK("fromJson parse") {
+        return ResponsesResponse::fromJson(sample);
+    };
+}
+
+TEST_CASE("Benchmark: Model enum conversions", "[benchmark]") {
+    BENCHMARK("modelToString") {
+        return toString(Model::GPT_4o);
+    };
+    BENCHMARK("stringToModel") {
+        return modelFromString("gpt-4o");
+    };
+}
+
+
diff --git a/tests/integration/test_benchmarks.cpp b/tests/integration/test_benchmarks.cpp
@@ -0,0 +1,140 @@
+#include <catch2/catch_test_macros.hpp>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include "openai/OpenAIClient.h"
+#include "openai/OpenAITypes.h"
+
+using namespace std::chrono;
+
+static bool isReasoningModel(OpenAI::Model model) {
+    return model == OpenAI::Model::GPT_5 || model == OpenAI::Model::GPT_5_Mini ||
+           model == OpenAI::Model::GPT_5_Nano || model == OpenAI::Model::O3 ||
+           model == OpenAI::Model::O3_Mini || model == OpenAI::Model::O1 ||
+           model == OpenAI::Model::O1_Mini || model == OpenAI::Model::O1_Preview ||
+           model == OpenAI::Model::O1_Pro || model == OpenAI::Model::O4_Mini;
+}
+
+TEST_CASE("OpenAI model benchmarks (structured outputs)", "[openai][integration][benchmark]") {
+    const char* runBenchEnv = std::getenv("LLMCPP_RUN_BENCHMARKS");
+    if (!runBenchEnv || std::string(runBenchEnv) != "1") {
+        SUCCEED("Benchmarks skipped. Set LLMCPP_RUN_BENCHMARKS=1 to enable.");
+        return;
+    }
+
+    const char* apiKey = std::getenv("OPENAI_API_KEY");
+    REQUIRE(apiKey != nullptr);
+
+    OpenAI::OpenAIClient client(apiKey);
+
+    // Minimal structured output schema
+    json schema = {{"type", "object"},
+                   {"properties", {{"answer", {{"type", "string"}}}}},
+                   {"required", json::array({"answer"})},
+                   {"additionalProperties", false}};
+
+    // Simple input
+    auto input = OpenAI::ResponsesInput::fromText("Reply with the word OK.");
+
+    // Iterate through response-capable models
+    for (const auto& modelName : OpenAI::RESPONSES_MODELS) {
+        DYNAMIC_SECTION("Benchmark model: " << modelName) {
+            OpenAI::ResponsesRequest req;
+            req.model = modelName;
+            req.input = input;
+            req.text = OpenAI::TextOutputConfig("bench_schema", schema, true);
+            req.maxOutputTokens = 16;
+
+            // Tweak reasoning parameters when appropriate
+            auto modelEnum = OpenAI::modelFromString(modelName);
+            if (isReasoningModel(modelEnum)) {
+                req.reasoning = json{{"effort", "low"}};
+            }
+
+            const auto start = steady_clock::now();
+            auto response = client.sendResponsesRequest(req);
+            const auto end = steady_clock::now();
+
+            const auto elapsedMs = duration_cast<milliseconds>(end - start).count();
+            std::cout << "[BENCH] model=" << modelName << ", ms=" << elapsedMs
+                      << ", success=" << (response.isCompleted() && !response.hasError()) << std::endl;
+
+            // Sanity: we should at least get a response object back; don't assert success to avoid
+            // flakes
+            REQUIRE(!response.id.empty());
+        }
+    }
+}
+
+#include <catch2/catch_test_macros.hpp>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include "openai/OpenAIClient.h"
+#include "openai/OpenAITypes.h"
+
+using namespace std::chrono;
+
+static bool isReasoningModel(OpenAI::Model model) {
+    return model == OpenAI::Model::GPT_5 || model == OpenAI::Model::GPT_5_Mini ||
+           model == OpenAI::Model::GPT_5_Nano || model == OpenAI::Model::O3 ||
+           model == OpenAI::Model::O3_Mini || model == OpenAI::Model::O1 ||
+           model == OpenAI::Model::O1_Mini || model == OpenAI::Model::O1_Preview ||
+           model == OpenAI::Model::O1_Pro || model == OpenAI::Model::O4_Mini;
+}
+
+TEST_CASE("OpenAI model benchmarks (structured outputs)", "[openai][integration][benchmark]") {
+    const char* runBenchEnv = std::getenv("LLMCPP_RUN_BENCHMARKS");
+    if (!runBenchEnv || std::string(runBenchEnv) != "1") {
+        SUCCEED("Benchmarks skipped. Set LLMCPP_RUN_BENCHMARKS=1 to enable.");
+        return;
+    }
+
+    const char* apiKey = std::getenv("OPENAI_API_KEY");
+    REQUIRE(apiKey != nullptr);
+
+    OpenAIClient client(apiKey);
+
+    // Minimal structured output schema
+    json schema = {{"type", "object"},
+                   {"properties", {{"answer", {{"type", "string"}}}}},
+                   {"required", json::array({"answer"})},
+                   {"additionalProperties", false}};
+
+    // Simple input
+    auto input = OpenAI::ResponsesInput::fromText("Reply with the word OK.");
+
+    // Iterate through response-capable models
+    for (const auto& modelName : OpenAI::RESPONSES_MODELS) {
+        DYNAMIC_SECTION("Benchmark model: " << modelName) {
+            OpenAI::ResponsesRequest req;
+            req.model = modelName;
+            req.input = input;
+            req.text = OpenAI::TextOutputConfig("bench_schema", schema, true);
+            req.maxOutputTokens = 16;
+
+            // Tweak reasoning parameters when appropriate
+            auto modelEnum = OpenAI::modelFromString(modelName);
+            if (isReasoningModel(modelEnum)) {
+                req.reasoning = json{{"effort", "low"}};
+            }
+
+            const auto start = steady_clock::now();
+            auto response = client.sendResponsesRequest(req);
+            const auto end = steady_clock::now();
+
+            const auto elapsedMs = duration_cast<milliseconds>(end - start).count();
+            std::cout << "[BENCH] model=" << modelName << ", ms=" << elapsedMs
+                      << ", success=" << (response.isCompleted() && !response.hasError())
+                      << std::endl;
+
+            // Sanity: we should at least get a response object back; don't assert success to avoid
+            // flakes
+            REQUIRE(!response.id.empty());
+        }
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@ set(INTEGRATION_TEST_SOURCES`
`34`	`34`	`add_executable(llmcpp_tests`
`35`	`35`	`${UNIT_TEST_SOURCES}`
`36`	`36`	`${INTEGRATION_TEST_SOURCES}`
	`37`	`+ bench/benchmark_core.cpp`
`37`	`38`	`)`
`38`	`39`
`39`	`40`	`# Link against the library and test framework`