Giskard-AI · kevinmessiaen · Dec 18, 2025 · Dec 23, 2025 · Dec 23, 2025 · Dec 23, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
     rev: v1.5.0
     hooks:
       - id: detect-secrets
-        args: ["--baseline", ".secrets.baseline"]
+        args: ["--baseline", ".secrets.baseline", "--exclude-secrets", "your-api-key"]
         exclude: |
           (?x)^(
             .*\.lock$|

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.12
+3.13
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,29 +6,26 @@ authors = [
     {name = "Giskard Team", email = "hello@giskard.ai"}
 ]
 readme = "README.md"
-requires-python = ">=3.10,<4.0"
-dependencies = []
+requires-python = ">=3.13,<4.0"
+dependencies = [
+    "giskard-core @ git+ssh://git@github.com/Giskard-AI/giskard-oss.git@feature/giskard-v3#subdirectory=libs/giskard-core",
+    "giskard-agents @ git+ssh://git@github.com/Giskard-AI/giskard-oss.git@feature/giskard-v3#subdirectory=libs/giskard-agents",
+    "giskard-checks @ git+ssh://git@github.com/Giskard-AI/giskard-oss.git@feature/giskard-v3#subdirectory=libs/giskard-checks",
+]
 
 [dependency-groups]
 dev = [
     "sphinxawesome-theme==5.3.2; python_version>='3.12'",
     "myst-parser==4.0.1; python_version>='3.12'",
-    "notebook==7.4.7",
-    "nbsphinx==0.9.7; python_version>='3.12'",
     "sphinx-click==6.1.0; python_version>='3.12'",
     "sphinx-autobuild==2025.8.25; python_version>='3.12'",
     "sphinx-autodoc-typehints==2.3.0; python_version>='3.12'",
     "sphinx-design==0.6.1; python_version>='3.12'",
     "sphinx-tabs>=3.4.7; python_version>='3.12'",
     "sphinxext-opengraph[social_cards]>=0.12.0; python_version>='3.12'",
     "sphinx-notfound-page>=1.1.0; python_version>='3.12'",
-    "pandoc>=2.4",
     "sphinxcontrib-mermaid>=0.9.0; python_version>='3.12'",
-    "giskard[llm]==2.18.0; python_version>='3.10' and python_version<'3.13'",
     "pyarrow<21.0.1; python_version>='3.12'",
-    "ragas>=0.3.7,<=0.3.7",
-    "ipywidgets>=8.1.7",
-    "torch>=2.8.0",
     "sphinx-autobuild>=2024.10.3",
     "giskard-hub>=2.1.0",
     "sphinxext-rediraffe"
@@ -39,12 +36,3 @@ dev = [
 Homepage = "https://github.com/Giskard-AI/giskard-hub"
 Repository = "https://github.com/Giskard-AI/giskard-hub"
 Documentation = "https://docs.giskard.ai/"
-
-[[tool.uv.index]]
-name = "pytorch_cpu"
-url = "https://download.pytorch.org/whl/cpu"
-explicit = true
-
-[tool.uv.sources]
-# Use CPU-only PyTorch for non-macOS systems, default PyPI for macOS
-torch = { index = "pytorch_cpu", marker = "platform_system != 'Darwin'" }
diff --git a/source/_static/custom.css b/source/_static/custom.css
@@ -27,6 +27,7 @@
   --sidebar-heading-color: #0f1729;
   --non-selected-color: rgba(15, 23, 41, 0.6);
   --link-color: inherit;
+  --border: 0 0% 100% / 0.10;
 }
 
 .dark {
@@ -420,6 +421,14 @@ header nav a:not(.text-foreground):hover {
   color: rgba(198, 255, 255, 0.8) !important;
 }
 
+#left-sidebar a.current {
+  border: none;
+}
+
+#left-sidebar ul ul:is(.dark *)::before {
+  background-color: hsl(var(--border));
+}
+
 /* Recently selected navbar item styling */
 header nav a.recently-selected,
 html[data-content_root="./"] header nav a.recently-selected,

diff --git a/source/_static/images/oss/checks/quickstart-simple_example_result.png b/source/_static/images/oss/checks/quickstart-simple_example_result.png
diff --git a/source/_static/images/oss/checks/quickstart-structured_interactions.png b/source/_static/images/oss/checks/quickstart-structured_interactions.png
diff --git a/source/_templates/sidebars/sidebar_oss_checks.html b/source/_templates/sidebars/sidebar_oss_checks.html
@@ -0,0 +1,3 @@
+<nav class="table w-full min-w-full my-6 lg:my-8">
+  {{ toctree_from_doc('oss/checks/index', collapse=False, maxdepth=20, includehidden=True, titles_only=False) }}
+</nav>
diff --git a/source/conf.py b/source/conf.py
@@ -70,7 +70,6 @@ def update_sidebar_templates():
 
 extensions = [
     "myst_parser",
-    "nbsphinx",
     "sphinx_design",
     "sphinx.ext.todo",
     "sphinx.ext.napoleon",
@@ -126,28 +125,20 @@ def update_sidebar_templates():
 html_js_files = ["custom.js"]
 html_favicon = "_static/favicon.ico"
 
+html_sidebars = {
+    "oss/checks/**": [
+        "sidebar_main_nav_links.html",
+        "sidebars/sidebar_oss_checks.html",
+    ],
+}
+
 # Do not execute the notebooks when building the docs
 docs_version = os.getenv("READTHEDOCS_VERSION", "latest")
 if docs_version == "latest" or docs_version == "stable":
     branch = "main"
 else:
     branch = docs_version.replace("-", "/")
 branch = "main"
-
-# -- Options for nbsphinx ----------------------------------------------------
-nbsphinx_execute = "never"
-# fmt: off
-nbsphinx_prolog = """
-.. raw:: html
-
-    <div class="open-in-colab__wrapper">
-    <a href="https://colab.research.google.com/github/Giskard-AI/giskard-hub/blob/""" + branch + """/script-docs/{{ env.doc2path(env.docname, base=None) }}" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" style="display: inline; margin: 0" alt="Open In Colab"/></a>
-    <a href="https://github.com/Giskard-AI/giskard-hub/tree/""" + branch + """/script-docs/{{ env.doc2path(env.docname, base=None) }}" target="_blank"><img src="https://img.shields.io/badge/github-view%20source-black.svg" style="display: inline; margin: 0" alt="View Notebook on GitHub"/></a>
-    </div>
-"""
-# fmt: on
-
-
 theme_options = ThemeOptions(
     show_prev_next=True,
     show_scrolltop=True,
@@ -158,7 +149,7 @@ def update_sidebar_templates():
         "Overview": "/index",
         "Hub UI": "/hub/ui/index",
         "Hub SDK": "/hub/sdk/index",
-        "Open Source": "/oss/sdk/index",
+        "Checks": "/oss/checks/index",
     },
 )
 html_theme_options = asdict(theme_options)
@@ -193,6 +184,49 @@ def update_sidebar_templates():
 ogp_image = "https://docs.giskard.ai/_static/open-graph-image.png"
 
 
+# Add custom template function to render toctree from a specific document
+def setup(app):
+    def html_page_context(app, pagename, templatename, context, doctree):
+        def toctree_from_doc(docname, **kwargs):
+            """Render toctree starting from a specific document"""
+            from sphinx.environment.adapters.toctree import TocTree
+            from sphinx import addnodes
+            source_doctree = app.env.get_doctree(docname)
+            toctrees = list(source_doctree.findall(addnodes.toctree))
+
+            if not toctrees:
+                return ""
+
+            toctree_adapter = TocTree(app.env)
+            resolved = [
+                toctree_adapter.resolve(
+                    pagename,  # Use current page context, not the toctree source
+                    app.builder,
+                    toctree,
+                    prune=False,
+                    maxdepth=kwargs.get("maxdepth", -1),
+                    titles_only=kwargs.get("titles_only", False),
+                    collapse=kwargs.get("collapse", False),
+                    includehidden=kwargs.get("includehidden", False),
+                )
+                for toctree in toctrees
+            ]
+
+            resolved = [r for r in resolved if r is not None]
+            if not resolved:
+                return ""
+
+            result = resolved[0]
+            for toctree in resolved[1:]:
+                result.extend(toctree.children)
+
+            return app.builder.render_partial(result)["fragment"]
+
+        context["toctree_from_doc"] = toctree_from_doc
+
+    app.connect("html-page-context", html_page_context)
+
+
 # make github links resolve
 def linkcode_resolve(domain, info):
     if domain != "py":

diff --git a/source/index.rst b/source/index.rst
@@ -46,7 +46,6 @@ Giskard Hub
    Ready to unlock the full potential of enterprise-grade AI testing? Try **Giskard Hub** with a free trial and discover advanced team collaboration, continuous red teaming, and enterprise security features.
 
    :doc:`Start your free enterprise trial </start/enterprise-trial>` and see how Giskard Hub can transform your AI testing workflow.
-
 Open source
 -----------
 
@@ -74,7 +73,6 @@ The library provides a set of tools for testing and evaluating LLMs, including:
    **⚖️ Unsure about the difference between Open Source and Hub?**
 
    Check out our :doc:`/start/comparison` guide to learn more about the different features.
-
 Open research
 -------------
 
@@ -107,8 +105,6 @@ Some work has been funded by the `the European Commission <https://commission.eu
 .. tip::
 
    Are you interested in supporting our research? Check out our `Open Collective funding page for Phare <https://opencollective.com/phare-llm-benchmark>`_.
-
-
 .. include:: toctree.rst
 .. include:: toctree_hub_ui.rst
 .. include:: toctree_hub_sdk.rst

diff --git a/source/oss/checks/ai-testing/core-concepts.rst b/source/oss/checks/ai-testing/core-concepts.rst
@@ -0,0 +1,179 @@
+=============
+Core Concepts
+=============
+
+Understanding the key concepts in Giskard Checks will help you write effective tests for your AI applications.
+
+
+Overview
+--------
+
+Giskard Checks is built around a few core primitives that work together:
+
+* **Interaction**: A single turn of data exchange (inputs and outputs)
+* **InteractionSpec**: A specification for generating interactions dynamically
+* **Trace**: An immutable snapshot of all interactions in a scenario
+* **Check**: A validation that runs on a trace and returns a result
+* **Scenario**: A list of steps (interactions and checks) executed sequentially
+
+At runtime, the flow looks like this:
+
+1. A Scenario is created with a sequence of steps.
+
+2. For each step in order:
+
+   a. Each InteractionSpec is resolved into a concrete Interaction.
+   b. The Interaction is appended to the Trace.
+   c. Checks run against the current Trace.
+
+3. Results are returned as a ScenarioResult.
+
+Interaction
+-----------
+
+An ``Interaction`` represents a single turn of data exchange with the system under test.
+Interactions are computed at execution time by resolving ``InteractionSpec`` objects into the trace.
+
+**Properties:**
+
+* ``inputs``: The input to your system (string, dict, Pydantic model, etc.)
+* ``outputs``: The output from your system (any serializable type)
+* ``metadata``: Optional dictionary for additional context (timings, model info, etc.)
+
+Interactions are **immutable**, as they represent something that has already happened.
+
+
+InteractionSpec
+---------------
+
+An ``InteractionSpec`` describes *how* to generate an interaction and is used to describe a scenario.
+When you call ``.interact(...)`` in the fluent API, it adds an ``InteractionSpec`` to the scenario sequence.
+Inputs and outputs can be static values or dynamic callables, and you can mix both.
+
+.. code-block:: python
+
+   from giskard.checks import InteractionSpec
+   from openai import OpenAI
+   import random
+
+   def generate_random_question() -> str:
+       return f"What is 2 + {random.randint(0, 10)}?"
+
+   def generate_answer(inputs: str) -> str:
+       client = OpenAI()
+       response = client.chat.completions.create(
+           model="gpt-5-mini",
+           messages=[{"role": "user", "content": inputs}],
+       )
+       return response.choices[0].message.content
+
+   spec = InteractionSpec(
+       inputs=generate_random_question,
+       outputs=generate_answer,
+       metadata={
+           "category": "math",
+           "difficulty": "easy"
+       }
+   )
+
+Specs are resolved into interactions during scenario execution. This is common in multi-turn scenarios, where inputs and outputs are generated based on previous interactions. See :doc:`multi-turn` for practical examples.
+
+Trace
+-----
+
+A ``Trace`` is an immutable snapshot of all data exchanged with the system under test. In its simplest form, it is a list of interactions.
+
+.. code-block:: python
+
+   from giskard.checks import Trace, Interaction
+
+   trace = Trace(interactions=[
+       Interaction(inputs="Hello", outputs="Hi there!"),
+       Interaction(inputs="How are you?", outputs="I'm doing well, thanks!")
+   ])
+
+Traces are typically created during scenario execution by resolving each ``InteractionSpec`` into a frozen interaction.
+
+
+Checks
+------
+
+A ``Check`` validates something about a trace and returns a ``CheckResult``. There's a library of built-in checks, but you can also create your own.
+
+When referencing values in a trace, use JSONPath expressions that start with ``trace.``. The ``last`` property is a shortcut for ``interactions[-1]`` and can be used in both JSONPath keys and Python code.
+
+.. code-block:: python
+
+   from giskard.checks import Groundedness, Trace
+
+   check = Groundedness(
+        answer_key="trace.last.outputs",
+        context="Giskard Checks is a testing framework for AI systems."
+   )
+
+
+Scenario
+--------
+
+A ``Scenario`` is a list of steps (interactions and checks) that are executed sequentially with a shared trace. Scenarios work for both single-turn and multi-turn tests.
+
+.. code-block:: python
+
+   from giskard.checks import scenario
+
+   test_scenario = (
+       scenario("test_with_checks")
+       .interact(inputs="test input", outputs="test output")
+       .check(check1)
+       .check(check2)
+   )
+
+   result = await test_scenario.run()
+
+.. note::
+   The ``run()`` method is asynchronous. When running in a script, use ``asyncio.run()``:
+
+   .. code-block:: python
+
+      import asyncio
+
+      async def main():
+          result = await test_scenario.run()
+          return result
+
+      result = asyncio.run(main())
+
+   In async contexts (like pytest with ``@pytest.mark.asyncio``), you can use ``await`` directly.
+
+This will give us a result object with the results of the checks.
+
+
+Fluent API Mapping
+------------------
+
+The fluent API is the preferred user-facing entry point and maps directly to the core primitives above:
+
+* ``scenario(name)`` creates a ``Scenario`` builder.
+* ``.interact(...)`` adds an ``InteractionSpec`` to the scenario sequence.
+* ``.check(...)`` adds a ``Check`` to the scenario sequence.
+* ``.run()`` resolves specs to interactions, builds the ``Trace``, runs checks, and returns a ``ScenarioResult``.
+
+For example, we can test a simple conversation flow with two turns:
+
+.. code-block:: python
+
+   from giskard.checks import scenario, Conformity
+
+   test_scenario = (
+       scenario("conversation_flow")
+       .interact(inputs="Hello", outputs=generate_answer)
+       .check(Conformity(key="trace.last.outputs", rule="response should be a friendly greeting"))
+       .interact(inputs="Who invented the HTML?", outputs=generate_answer)
+       .check(Conformity(key="trace.last.outputs", rule="response should mention Tim Berners-Lee as the inventor of HTML"))
+   )
+
+   # Run with asyncio.run() if in a script
+   import asyncio
+   result = await test_scenario.run()  # or: result = asyncio.run(test_scenario.run())
+
+For a practical introduction to the fluent API, see :doc:`quickstart`.