GitHubSecurityLab · kevinbackhouse · Oct 31, 2025 · Oct 27, 2025 · Oct 28, 2025 · Oct 28, 2025
@@ -11,10 +11,11 @@ permissions:
 
 jobs:
   test:
-    name: Run Tests
-    runs-on: ubuntu-latest
+    name: Run Tests ${{ matrix.python-version }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
         python-version: ['3.11', '3.13'] # the one we have in the Codespace + the latest supported one by PyO3.
       fail-fast: false  # Continue testing other version(s) if one fails
 
@@ -28,17 +29,14 @@ jobs:
         python-version: ${{ matrix.python-version }}
         cache: 'pip'
 
-    - name: Install dependencies
-      run: |
-        python -m venv .venv
-        source .venv/bin/activate
-        python -m pip install --upgrade pip
-        python -m pip install -r requirements.txt
-        python -m pip install -r requirements-test.txt
 
-    - name: Run tests with pytest
+    - name: Install Hatch
+      run: pip install --upgrade hatch
+
+    - name: Run static analysis
       run: |
-        source .venv/bin/activate
-        pytest tests/ -v
-      env:
-        PYTHONPATH: ${{ github.workspace }}
+        # hatch fmt --check
+        echo linter errors will be fixed in a separate PR
+
+    - name: Run tests
+      run: hatch test --python ${{ matrix.python-version }} --cover --randomize --parallel --retries 2 --retry-delay 1
@@ -40,7 +40,7 @@ jobs:
         run: |
           python -m venv .venv
           source .venv/bin/activate
-          python -m pip install -r requirements.txt
+          python -m pip install hatch
 
       - name: Run tests
         env:
@@ -49,19 +49,20 @@ jobs:
 
         run: |
           source .venv/bin/activate
-          python main.py -p personalities.assistant 'explain modems to me please'
-          python main.py -p personalities.c_auditer 'explain modems to me please'
-          python main.py -p personalities.examples.echo 'explain modems to me please'
-          python main.py -t taskflows.CVE-2023-2283.CVE-2023-2283
-          python main.py -t taskflows.examples.echo
-          python main.py -t taskflows.examples.example
-          python main.py -t taskflows.examples.example_globals
-          python main.py -t taskflows.examples.example_inputs
-          python main.py -t taskflows.examples.example_large_list_result_iter
-          python main.py -t taskflows.examples.example_repeat_prompt
-          python main.py -t taskflows.examples.example_repeat_prompt_async
-          python main.py -t taskflows.examples.example_repeat_prompt_dictionary
-          python main.py -t taskflows.examples.example_reusable_prompt
-          python main.py -t taskflows.examples.example_reusable_taskflows
-          python main.py -t taskflows.examples.example_triage_taskflow
-          python main.py -t taskflows.examples.single_step_taskflow
+          hatch build
+          hatch run main -p seclab_taskflow_agent.personalities.assistant 'explain modems to me please'
+          hatch run main -p seclab_taskflow_agent.personalities.c_auditer 'explain modems to me please'
+          hatch run main -p examples.personalities.echo 'explain modems to me please'
+          hatch run main -t examples.taskflows.CVE-2023-2283
+          hatch run main -t examples.taskflows.echo
+          hatch run main -t examples.taskflows.example
+          hatch run main -t examples.taskflows.example_globals
+          hatch run main -t examples.taskflows.example_inputs
+          hatch run main -t examples.taskflows.example_large_list_result_iter
+          hatch run main -t examples.taskflows.example_repeat_prompt
+          hatch run main -t examples.taskflows.example_repeat_prompt_async
+          hatch run main -t examples.taskflows.example_repeat_prompt_dictionary
+          hatch run main -t examples.taskflows.example_reusable_prompt
+          hatch run main -t examples.taskflows.example_reusable_taskflows
+          hatch run main -t examples.taskflows.example_triage_taskflow
+          hatch run main -t examples.taskflows.single_step_taskflow
@@ -57,7 +57,7 @@ The task should include the `filekey` in its list of `agents`:
 ```yaml
   - task:
       agents:
-        - personalities.assistant
+        - seclab_taskflow_agent.personalities.assistant
   ...
 ```
 
@@ -108,7 +108,7 @@ Example:
 
 ### Running templated tasks in a loop
 
-Often we may want to iterate through the same tasks with different inputs. For example, we may want to do fetch all the functions from a code base and then analyze each of the function. This can be done using two consecutive task and with the help of the `repeat_prompt` field. 
+Often we may want to iterate through the same tasks with different inputs. For example, we may want to fetch all the functions from a code base and then analyze each of the functions. This can be done using two consecutive tasks and with the help of the `repeat_prompt` field. 
 
 ```yaml
   - task:
@@ -124,7 +124,7 @@ Often we may want to iterate through the same tasks with different inputs. For e
       The function has name {{ RESULT_name }} and body {{ RESULT_body }} analyze the function.
 ```
 
-In the above, the first task fetches functions in the code base and create a json list object, with each entry having a `name` and `body` field. In the next task, `repeat_prompt` is set to true, meaning that a task is created for each individual object in the list and the object fields are referenced in the templated prompt using `{{ RESULT_<fieldname> }}`. In other words, `{{ RESULT_name }}` in the prompt is replaced with the value of the `name` field of the object etc. For example, if the list of functions fetched from the first task is:
+In the above, the first task fetches functions in the code base and creates a json list object, with each entry having a `name` and `body` field. In the next task, `repeat_prompt` is set to true, meaning that a task is created for each individual object in the list and the object fields are referenced in the templated prompt using `{{ RESULT_<fieldname> }}`. In other words, `{{ RESULT_name }}` in the prompt is replaced with the value of the `name` field of the object etc. For example, if the list of functions fetched from the first task is:
 
 ```javascript
 [{'name' : foo, 'body' : foo(){return 1;}}, {'name' : bar, 'body' : bar(a) {return a + 1;}}]
@@ -189,7 +189,7 @@ An optional limit can be set to limit the number of asynchronous tasks via `asyn
       The function has name {{ RESULT_name }} and body {{ RESULT_body }} analyze the function.
 ```
 
-Both `async` and `async_limit` have no effect when use outside of a `repeat_prompt`.
+Both `async` and `async_limit` have no effect when used outside of a `repeat_prompt`.
 
 At the moment, we do not support nested `repeat_prompt`. So the following is not allowed:
 
@@ -207,7 +207,7 @@ At the moment, we do not support nested `repeat_prompt`. So the following is not
 
 #### Shell Tasks
 
-Tasks can be entirely shell based through the run directive. This simply runs a shell command and pass the result directly to the next task. It is used for creating iterable results for `repeat_prompt`.
+Tasks can be entirely shell based through the run directive. This simply runs a shell command and pass the result directly to the next task. It can be used for creating iterable results for `repeat_prompt`.
 
 For example:
 
@@ -255,15 +255,15 @@ Toolboxes are MCP server configurations. They can be defined at the Agent level
   - task:
       ...
       toolboxes:
-        - toolboxes.codeql
+        - seclab_taskflow_agent.toolboxes.codeql
 ```
 
 If no `toolboxes` is specified, then the `toolboxes` defined in the `personality` of the `agent` is used:
 
 ```yaml
    - task:
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
         List all the files in the codeql database `some/codeql/db`.      
    - task:
@@ -276,15 +276,15 @@ Note that when `toolboxes` is defined for a task, it *overwrites* the `toolboxes
 ```yaml
    - task:
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
         List all the files in the codeql database `some/codeql/db`.      
       toolboxes:
-        - toolboxes.echo
+        - seclab_taskflow_agent.toolboxes.echo
 
 ```
 
-For this task, the `agent` `personalities.c_auditer` will have access to the `toolboxes.echo` tool.
+For this task, the `agent` `seclab_taskflow_agent.personalities.c_auditer` will have access to the `seclab_taskflow_agent.toolboxes.echo` tool.
 
 ### Headless Runs
 
@@ -380,11 +380,9 @@ Then the `task` that uses it effectively becomes:
         - some_toolboxes
 ```
 
-which all settings inherited from `single_step_taskflow` while `model` is overwritten.
-
 Any `taskflow` that contains only a single step can be used as a reusable taskflow.
 
-A reusable taskflow can also have templated prompt that takes inputs from its user. This is specified with the `inputs` field from the user.
+A reusable taskflow can also have a templated prompt that takes inputs from its user. This is specified with the `inputs` field from the user.
 
 ```yaml
   - task:
@@ -413,7 +411,7 @@ In this case, the template parameter `{{ INPUTS_fruit }}` is replaced by the val
 
 ### Reusable Prompts
 
-Reusable prompts are defined in files of `filetype` `prompts`. These are like macros that gets replaced when a templated parameter of the form `{{ PROMPTS_<filekey> }}` is encountered.
+Reusable prompts are defined in files of `filetype` `prompts`. These are like macros that get replaced when a templated parameter of the form `{{ PROMPTS_<filekey> }}` is encountered.
 
 Tasks can incorporate templated prompts which are then replaced by the actual prompt. For example:
 
@@ -422,13 +420,13 @@ Example:
 ```yaml
   - task:
       agents:
-        - fruit_expert
+        - examples.personalities.fruit_expert
       user_prompt: |
         Tell me more about apples.
-
-        {{ PROMPTS_prompts.examples.example_prompt }}
+        
+        {{ PROMPTS_examples.prompts.example_prompt }}
 ```
-and `prompts.examples.example_prompt` is the following:
+and `examples.prompts.example_prompt` is the following:
 
 ```yaml
 seclab-taskflow-agent:
@@ -444,7 +442,7 @@ Then the actual task becomes:
 ```yaml
   - task:
       agents:
-        - fruit_expert
+        - examples.personalities.fruit_expert
       user_prompt: |
         Tell me more about apples.
 
@@ -453,15 +451,14 @@ Then the actual task becomes:
 
 ### Model config
 
-LLM models can be configured in a taskflow by setting the `model_config` field to the `filekey` of a file of `filetype` `model_config` :
+LLM models can be configured in a taskflow by setting the `model_config` field to the `filekey` of a file of `filetype` `model_config`:
 
 ```yaml
 seclab-taskflow-agent:
   version: 1
   filetype: taskflow
 
-model_config: configs.model_config
-
+model_config: examples.model_configs.model_config
 ```
 
 The variables defined in the `model_config` file can then be used throughout the taskflow, e.g.
@@ -471,7 +468,7 @@ seclab-taskflow-agent:
   version: 1
   filetype: model_config
 models:
-   gpt_latest: gpt-5
+  gpt_latest: gpt-5
 ```
 
 When `gpt_latest` is used in the taskflow to specify a model, the value `gpt-5` is used:
@@ -481,9 +478,9 @@ When `gpt_latest` is used in the taskflow to specify a model, the value `gpt-5`
       model: gpt_latest
       must_complete: false
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
 
 ```
 
-This provides a easy way to update model versions in a taskflow.
+This provides a easy way to update model versions in a taskflow.
@@ -0,0 +1,8 @@
+seclab-taskflow-agent:
+  version: 1
+  filetype: model_config
+models:
+  sonnet_default: claude-sonnet-4
+  sonnet_latest: claude-sonnet-4.5
+  gpt_default: gpt-4.1
+  gpt_latest: gpt-5
@@ -1,3 +1,4 @@
+# personalities define the system prompt level directives for this Agent
 seclab-taskflow-agent:
   version: 1
   filetype: personality
@@ -8,6 +9,6 @@ personality: |
 task: |
   Echo user inputs using the echo tools.
 
+# personality toolboxes map to mcp servers made available to this Agent
 toolboxes:
-  - toolboxes.echo
-
+  - seclab_taskflow_agent.toolboxes.echo
@@ -2,23 +2,23 @@ seclab-taskflow-agent:
   version: 1
   filetype: taskflow
 
-model_config: configs.model_config
+model_config: examples.model_configs.model_config
 
 taskflow:
   - task:
       must_complete: true
       headless: true
       agents:
-        - personalities.assistant
+        - seclab_taskflow_agent.personalities.assistant
       user_prompt: |
         Clear the memory cache.
       toolboxes:
-        - toolboxes.memcache
+        - seclab_taskflow_agent.toolboxes.memcache
   - task:
       model: gpt_latest
       must_complete: false
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
         You are auditing code using the `libssh-mirror/libssh-codeql`
         CodeQL database.
@@ -78,15 +78,15 @@ taskflow:
         6. Make small and concise single line notes while you work. Update the
         existing value for `notes` in memory as you work.
       toolboxes:
-        - toolboxes.codeql
-        - toolboxes.memcache
+        - seclab_taskflow_agent.toolboxes.codeql
+        - seclab_taskflow_agent.toolboxes.memcache
   - task:
       must_complete: true
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
         Fetch your audit notes from memory using the `notes`
         key. Do not perform any additional security review, only show me your
         notes.
       toolboxes:
-        - toolboxes.memcache
+        - seclab_taskflow_agent.toolboxes.memcache
@@ -8,13 +8,13 @@ taskflow:
       max_steps: 5
       must_complete: true
       agents:
-        - personalities.examples.echo
+        - examples.personalities.echo
       user_prompt: |
         Hello
   - task:
       must_complete: true
       agents:
-        - personalities.examples.echo
+        - examples.personalities.echo
       user_prompt: |
         Goodbye
       env:

@@ -12,8 +12,8 @@ taskflow:
       must_complete: true
       # taskflows can set a primary (first entry) and handoff (additional entries) agent
       agents:
-        - personalities.c_auditer
-        - personalities.examples.fruit_expert
+        - seclab_taskflow_agent.personalities.c_auditer
+        - examples.personalities.fruit_expert
       user_prompt: |
         Store an example vulnerable C program that uses `strcpy` in the
         `vulnerable_c_example` memory key and explain why `strcpy`
@@ -34,13 +34,13 @@ taskflow:
       # this normally only has the memcache toolbox, but we extend it here with
       # the GHSA toolbox
       toolboxes:
-        - toolboxes.memcache
-        - toolboxes.codeql
+        - seclab_taskflow_agent.toolboxes.memcache
+        - seclab_taskflow_agent.toolboxes.codeql
   - task:
       must_complete: true
       model: gpt-4.1
       agents:
-        - personalities.c_auditer
+        - seclab_taskflow_agent.personalities.c_auditer
       user_prompt: |
         Retrieve C code for security review from the `vulnerable_c_example`
         memory key and perform a review.
@@ -50,7 +50,7 @@ taskflow:
         MEMCACHE_STATE_DIR: "example_taskflow/"
         MEMCACHE_BACKEND: "dictionary_file"
       toolboxes:
-        - toolboxes.memcache
+        - seclab_taskflow_agent.toolboxes.memcache
       # headless mode does not prompt for tool call confirms configured for a server
       # note: this will auto-allow, if you want control over potentially dangerous
       # tool calls, then you should NOT run a task in headless mode (default: false)
@@ -63,6 +63,6 @@ taskflow:
   - task:
       repeat_prompt: true
       agents:
-        - personalities.assistant
+        - seclab_taskflow_agent.personalities.assistant
       user_prompt: |
         What kind of fruit is {{ RESULT }}?