Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
47d2c4e
add example response as comments
paulz Mar 8, 2025
f6526e7
fix: mypy missing types
paulz Mar 8, 2025
2b80b06
remove: unused actions
paulz Mar 8, 2025
a5beed7
fix: format pyproject.toml for consistency
paulz Mar 8, 2025
2a10097
fix: update test names for clarity in test_reporter.py
paulz Mar 9, 2025
b4a9dd3
feat: add statistical analysis functionality for test results
paulz Mar 10, 2025
cd50a2f
feat: enhance statistical analysis with new tests and confidence inte…
paulz Mar 10, 2025
3939c07
feat: add failure rate bar graph test with error margins visualization
paulz Mar 10, 2025
53bfa96
feat: update failure rate bar graph and enhance statistical analysis …
paulz Mar 10, 2025
4593174
feat: enhance failure rate bar graph with error margins and update st…
paulz Mar 10, 2025
087cc97
feat: add CSV export functionality for statistical analysis results
paulz Mar 10, 2025
ad66f95
feat: improve CSV export function by ensuring consistent newline char…
paulz Mar 10, 2025
5278428
feat: update CSV row representation and headers to include type hints
paulz Mar 10, 2025
32ea225
feat: adjust CSV writer to use MacOS-style newlines for consistency
paulz Mar 10, 2025
f9edf75
fix: PARENT_FOLDER_IDS usage for Google Drive upload script
paulz Mar 10, 2025
0abdfda
refactor: define FILENAME as env
paulz Mar 10, 2025
e2029c4
feat: enable caching for uv installation in GitHub Actions workflow
paulz Mar 10, 2025
30a2174
refactor: update module import style and adjust command execution in …
paulz Mar 10, 2025
92197b0
Fix statistical reporting; limit rounds to 1 for experiments
tkersey Mar 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions .github/actions/commit-visualizations/action.yml

This file was deleted.

58 changes: 0 additions & 58 deletions .github/actions/setup-python-poetry/action.yml

This file was deleted.

26 changes: 18 additions & 8 deletions .github/workflows/cat-test-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ jobs:

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
prune-cache: false

- name: "Set up Python"
uses: actions/setup-python@v5
Expand All @@ -39,6 +42,8 @@ jobs:
id: set-number-of-runs
run: |
ROUNDS=${{ inputs.rounds || 10 }}
[[ $GITHUB_REF_NAME == ci-experiment* ]] && ROUNDS=1

echo "::notice::Starting $ROUNDS runs"
echo "number_of_runs=$ROUNDS" >> "$GITHUB_OUTPUT"
echo "CAT_AI_SAMPLE_SIZE=$ROUNDS" >> $GITHUB_ENV
Expand All @@ -58,19 +63,24 @@ jobs:
# -H "Authorization: AWS minioadmin:minioadmin" \
# http://localhost:9000/yourbucket/yourfile.zip

- name: Show number of test failures
- name: Show CAT AI Statistical Report
if: always()
run: |
FAILURES=$(find examples/team_recommender/tests/test_runs -type f -name "fail-*" | wc -l)
uv run python src/cat_ai/reporter.py $FAILURES $CAT_AI_SAMPLE_SIZE >> $GITHUB_STEP_SUMMARY
FOLDER=examples/team_recommender/tests/test_runs
FAILURE_COUNT=$(find "$FOLDER" -type f -name "fail-*" | wc -l)
PYTHONPATH=src uv run python -m cat_ai.reporter \
"$FAILURE_COUNT" \
"$CAT_AI_SAMPLE_SIZE" \
>> "$GITHUB_STEP_SUMMARY"

- name: Upload artifacts to Google Drive
if: always()
- name: Upload main artifacts to Google Drive
if: always() && github.ref == 'refs/heads/main'
run: |
zip -r test-output-${{ github.run_number }}.zip examples/team_recommender/tests/test_runs
uv run python src/cat_ai/publish_to_gdrive.py test-output-${{ github.run_number }}.zip
zip -r "$FILENAME" examples/team_recommender/tests/test_runs
uv run python src/cat_ai/publish_to_gdrive.py "$FILENAME"
env:
GOOGLE_DRIVE_TEST_OUTPUT_FOLDER_ID: ${{ vars.GOOGLE_DRIVE_TEST_OUTPUT_FOLDER_ID }}
PARENT_FOLDER_IDS: ${{ vars.GOOGLE_DRIVE_TEST_OUTPUT_FOLDER_ID }}
FILENAME: test-output-${{ github.run_number }}.zip

- name: Upload artifacts
uses: actions/upload-artifact@v4
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ jobs:

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
prune-cache: false

- name: "Set up Python"
uses: actions/setup-python@v5
Expand Down
25 changes: 25 additions & 0 deletions .run/Template Python tests.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<component name="ProjectRunConfigurationManager">
<configuration default="true" type="tests" factoryName="py.test">
<module name="continuous-alignment-testing" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<option name="SDK_HOME" value="" />
<option name="SDK_NAME" value="uv (continuous-alignment-testing)" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<EXTENSION ID="com.fapiko.jetbrains.plugins.better_direnv.runconfigs.PycharmRunConfigurationExtension">
<option name="DIRENV_ENABLED" value="false" />
<option name="DIRENV_TRUSTED" value="false" />
</EXTENSION>
<option name="_new_keywords" value="&quot;&quot;" />
<option name="_new_parameters" value="&quot;&quot;" />
<option name="_new_additionalArguments" value="&quot;--snapshot-update&quot;" />
<option name="_new_target" value="&quot;&quot;" />
<option name="_new_targetType" value="&quot;PATH&quot;" />
<method v="2" />
</configuration>
</component>
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def test_response_shows_developer_names():
)
response = completion.choices[0].message.content
print(response)
# For the iOS Native project starting on June 3rd, the best developers based on the given list would be:
#
# 1. Sam Thomas - Specializes in Swift and Objective-C, and is available for the project.
# 2. Drew Anderson - Specializes in Swift but will be on vacation from June 1st to June 10th, so they are not available when the project starts.
#
# Therefore, Sam Thomas is the most suitable developer for this project.
assert "Sam Thomas" in response
assert "Drew Anderson" in response, "Surprisingly Drew Anderson is on vacation but still in the response"

Expand Down Expand Up @@ -63,6 +69,29 @@ def test_llm_will_hallucinate_given_no_data():
)
response = completion.choices[0].message.content
print(response)
# Here is the list of developers with their skills and availability:
#
# 1. Sarah Johnson
# - Skills: iOS Native, Mobile Development
# - Availability: Available starting May 1st
#
# 2. Alex Kim
# - Skills: iOS Native, iPhone Development, Video Processing
# - Availability: Available starting June 10th
#
# 3. Jamie Smith
# - Skills: iOS Native, Mobile UI Design
# - Availability: Available starting May 20th
#
# Based on the project requirements and availability, the best developer for this mobile iOS project for the telecom company would be:
#
# 1. Sarah Johnson
# - Skills: iOS Native, Mobile Development
# - Availability: Available starting May 1st
#
# 2. Jamie Smith
# - Skills: iOS Native, Mobile UI Design
# - Availability: Available starting May 20th
assert "Sam Thomas" not in response, "LLM obviously could not get our expected developer and will hallucinate"
assert "Drew Anderson" not in response, "Response will contain made up names"
assert len(response.split('\n')) > 5, "response contains list of made up developers in multiple lines"
39 changes: 21 additions & 18 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,40 @@ name = "cat-ai"
version = "0.0.5-alpha"
description = "Python client for running CAT tests in a Python codebase"
authors = [
{ name = "Mike Gehard", email = "[email protected]" },
{ name = "Randy Lutcavich", email = "[email protected]" },
{ name = "Erik Luetkehans", email = "[email protected]" },
{ name = "Paul Zabelin", email = "[email protected]" },
{ name = "Tim Kersey", email = "[email protected]" },
{ name = "Michael Harris", email = "[email protected]" },
{ name = "Mike Gehard", email = "[email protected]" },
{ name = "Randy Lutcavich", email = "[email protected]" },
{ name = "Erik Luetkehans", email = "[email protected]" },
{ name = "Paul Zabelin", email = "[email protected]" },
{ name = "Tim Kersey", email = "[email protected]" },
{ name = "Michael Harris", email = "[email protected]" },
]
requires-python = "~=3.13"
readme = "README.md"
dependencies = [
# this small library should be kept independent
# consider adding dependencies to on of the dependency groups
# this small library should be kept independent
# consider adding dependencies to on of the dependency groups
]
packages = [{ include = "cat_ai", from = "src" }]
license = "MIT"

[dependency-groups]
test = [
"pytest>=8.3.4,<9",
"pytest-asyncio>=0.21.0,<0.22",
"mypy>=1.8.0,<2",
"black>=24.2.0,<25",
"matplotlib>=3.10.1",
"pytest>=8.3.4,<9",
"pytest-asyncio>=0.21.0,<0.22",
"mypy>=1.8.0,<2",
"black>=24.2.0,<25",
"pytest-snapshot>=0.9.0",
]
examples = ["openai>=1.63.2,<2", "python-dotenv>=1.0.1,<2"]
dev = [
"sphinx>=8.1.3,<9",
"sphinx-rtd-theme>=3.0.2,<4",
"sphinx-markdown-builder>=0.6.8,<0.7",
"notebook>=7.3.2",
"pydrive2>=1.21.3,<2",
"pydantic>=2.10.6,<3",
"sphinx>=8.1.3,<9",
"sphinx-rtd-theme>=3.0.2,<4",
"sphinx-markdown-builder>=0.6.8,<0.7",
"notebook>=7.3.2",
"pydrive2>=1.21.3,<2",
"pydantic>=2.10.6,<3",
"ruff>=0.9.10",
]

[tool.uv]
Expand Down
3 changes: 2 additions & 1 deletion src/cat_ai/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .reporter import Reporter
from .runner import Runner
from .validator import Validator
from .statistical_analysis import StatisticalAnalysis, analyse_sample_from_test

__all__ = ["Reporter", "Runner", "Validator"]
__all__ = ["Reporter", "Runner", "Validator", "StatisticalAnalysis", "analyse_sample_from_test"]
12 changes: 10 additions & 2 deletions src/cat_ai/publish_to_gdrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ def login_with_service_account(credentials_path: str) -> GoogleAuth:
return gauth


PARENT_FOLDER_IDS = "PARENT_FOLDER_IDS"

if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python publish_to_gdrive.py <file_path>")
print(f"{PARENT_FOLDER_IDS} - comma-separated list of google folder IDs")
sys.exit(1)

file_path = sys.argv[1]
Expand All @@ -43,8 +46,13 @@ def login_with_service_account(credentials_path: str) -> GoogleAuth:
drive = GoogleDrive(google_auth)

file_name = os.path.basename(file_path)
PARENT_FOLDER_ID = os.environ.get("GOOGLE_DRIVE_TEST_OUTPUT_FOLDER_ID")
gfile = drive.CreateFile({"title": file_name, "parents": [{"id": PARENT_FOLDER_ID}]})
parent_ids = os.environ.get(PARENT_FOLDER_IDS)
if not parent_ids:
print(f"Error: {PARENT_FOLDER_IDS} environment variable is not set.")
sys.exit(2)
parents = [{"id": pid.strip()} for pid in (parent_ids.split(","))]
gfile = drive.CreateFile({"title": file_name, "parents": parents})

gfile.SetContentFile(file_path)
gfile.Upload()

Expand Down
Loading
Loading