Skip to content

Commit 453f6c7

Browse files
2026 update
1 parent 685f8cb commit 453f6c7

File tree

20 files changed

+1628
-895
lines changed

20 files changed

+1628
-895
lines changed

.cursor/rules/python.mdc

Lines changed: 401 additions & 0 deletions
Large diffs are not rendered by default.

.github/workflows/ci-checks.yml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,31 +28,32 @@ jobs:
2828

2929
# --- Python toolchain (uv) ---
3030
- name: Setup uv
31-
uses: astral-sh/setup-uv@v6
31+
uses: astral-sh/setup-uv@v7
3232
with:
3333
enable-cache: true
3434

3535
- name: Sync dependencies and check uv lockfile is up to date
3636
run: uv sync --dev --locked
3737

38-
# --- Quality: lint, format, type-checking, unused deps ---
38+
# --- Dependency Checks ---
39+
- name: Check for unused dependencies (Deptry)
40+
run: uv run --locked deptry .
41+
continue-on-error: true
42+
43+
# --- Quality: lint, format, type-checking ---
44+
- name: Pyproject formatting
45+
run: uv run pyproject-fmt pyproject.toml
46+
3947
- name: Ruff format
4048
run: uv run ruff format . --check --diff
4149

4250
- name: Ruff check
4351
run: uv run ruff check . --diff
4452

45-
- name: Type checking (Pyrefly)
46-
run: uv run pyrefly check .
47-
4853
- name: Type checking (Ty)
4954
run: uv run ty check .
50-
continue-on-error: true
51-
52-
- name: Unused dependencies (Deptry)
53-
run: uv run deptry .
5455

5556
# --- Tests ---
5657
- name: Unit tests (PyTest)
5758
run: |
58-
uv run pytest -v --tb=short -n auto
59+
uv run pytest -v --tb=short

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
.venv
32
__pycache__/
43
.ruff_cache/
@@ -8,4 +7,4 @@ __pycache__/
87
.cache
98
.wandb
109
.env
11-
data.csv
10+
.vscode

.pre-commit-config.yaml

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
repos:
2+
- repo: local
3+
hooks:
4+
- id: pyproject-fmt
5+
name: Pyproject Formatting
6+
language: system
7+
entry: |
8+
bash -c '
9+
uv run pyproject-fmt pyproject.toml
10+
exit_code=$?
11+
if [ $exit_code -ne 0 ]; then
12+
echo -e "\n❌ ERROR: Pyproject formatting failed."
13+
exit 1
14+
fi
15+
# Auto-stage if modified
16+
if ! git diff --quiet pyproject.toml 2>/dev/null; then
17+
git add pyproject.toml
18+
echo "✅ Auto-staged pyproject.toml formatting changes"
19+
fi
20+
'
21+
files: ^(pyproject\.toml)$
22+
pass_filenames: false
23+
24+
- id: uv-lock
25+
name: Dependency Locking
26+
language: system
27+
entry: |
28+
bash -c '
29+
locks=$(git ls-files --cached --others --exclude-standard | grep -E "(^|/)uv\.lock$" || true)
30+
count=$(echo -n "$locks" | grep -c . || echo 0)
31+
if [ "$count" -gt 1 ]; then
32+
echo -e "\n❌ ERROR: Found $count uv.lock files (expected 1)."
33+
echo "$locks"
34+
echo -e "👉 FIX: Remove extra uv.lock files. Only the root uv.lock should exist.\n"
35+
exit 1
36+
fi
37+
uv lock -q
38+
exit_code=$?
39+
if [ $exit_code -ne 0 ]; then
40+
echo -e "\n❌ ERROR: uv lock failed."
41+
exit 1
42+
fi
43+
# Auto-stage if modified
44+
if ! git diff --quiet uv.lock 2>/dev/null; then
45+
git add uv.lock
46+
echo "✅ Auto-staged uv.lock changes"
47+
fi
48+
'
49+
files: ^(pyproject\.toml|uv\.lock)$
50+
pass_filenames: false
51+
52+
- id: ruff-check
53+
name: Python Linting
54+
language: system
55+
entry: |
56+
bash -c '
57+
# Get list of staged Python files before running ruff
58+
staged_py=$(git diff --cached --name-only --diff-filter=ACM | grep -E "\.py$" || true)
59+
60+
uv run ruff check --fix
61+
exit_code=$?
62+
if [ $exit_code -ne 0 ]; then
63+
echo -e "\n❌ ERROR: Ruff found linting issues that could not be auto-fixed."
64+
echo -e "👉 FIX: Run \"uv run ruff check\" to see the errors and fix them manually.\n"
65+
exit 1
66+
fi
67+
68+
# Auto-stage any modified Python files that were originally staged
69+
if [ -n "$staged_py" ]; then
70+
for f in $staged_py; do
71+
if [ -f "$f" ] && ! git diff --quiet "$f" 2>/dev/null; then
72+
git add "$f"
73+
echo "✅ Auto-staged ruff fixes in $f"
74+
fi
75+
done
76+
fi
77+
'
78+
types: [python]
79+
pass_filenames: false
80+
81+
- id: ruff-format
82+
name: Python Formatting
83+
language: system
84+
entry: |
85+
bash -c '
86+
# Get list of staged Python files before running ruff format
87+
staged_py=$(git diff --cached --name-only --diff-filter=ACM | grep -E "\.py$" || true)
88+
89+
uv run ruff format
90+
exit_code=$?
91+
if [ $exit_code -ne 0 ]; then
92+
echo -e "\n❌ ERROR: Ruff formatting failed."
93+
exit 1
94+
fi
95+
96+
# Auto-stage any modified Python files that were originally staged
97+
if [ -n "$staged_py" ]; then
98+
for f in $staged_py; do
99+
if [ -f "$f" ] && ! git diff --quiet "$f" 2>/dev/null; then
100+
git add "$f"
101+
echo "✅ Auto-staged ruff format changes in $f"
102+
fi
103+
done
104+
fi
105+
'
106+
types: [python]
107+
pass_filenames: false
108+
109+
- id: ty
110+
name: Static type checking
111+
language: system
112+
entry: |
113+
bash -c '
114+
uv run ty check -q
115+
exit_code=$?
116+
if [ $exit_code -ne 0 ]; then
117+
echo -e "\n❌ ERROR: Type checking failed."
118+
echo -e "👉 FIX: Run \"uv run ty check\" to see the errors and fix them.\n"
119+
exit 1
120+
fi
121+
'
122+
types: [python]
123+
pass_filenames: false
124+
125+
- id: sync-codestyle
126+
name: Sync STYLEGUIDE.md to agent rule files
127+
language: system
128+
entry: |
129+
bash -c '
130+
SRC="STYLEGUIDE.md"
131+
CURSOR_DST=".cursor/rules/python.mdc"
132+
133+
# Read source body
134+
SRC_BODY=$(cat "$SRC")
135+
136+
# Write Cursor file with frontmatter
137+
{
138+
echo "---"
139+
echo "globs: \"*.py\""
140+
echo "alwaysApply: false"
141+
echo "---"
142+
echo "$SRC_BODY"
143+
} > "$CURSOR_DST"
144+
145+
# Auto-stage if changed
146+
if ! git diff --quiet "$CURSOR_DST" 2>/dev/null; then
147+
git add "$CURSOR_DST"
148+
echo "✅ Auto-staged $CURSOR_DST"
149+
fi
150+
'
151+
files: ^STYLEGUIDE\.md$
152+
pass_filenames: false

AGENTS.md

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# AGENTS.md
2+
3+
## Documentation
4+
5+
- `CONTRIBUTING.md`: Contains the rules for contributing to the project.
6+
- `STYLEGUIDE.md`: Contains the code style guide. It is mandatory for all python contributions.
7+
8+
## `ai-data` - Common Library
9+
10+
### Core Components
11+
12+
`src/ai_data` is the core Python package for shared constants, paths, and models:
13+
14+
- `cache`: disk-based response caching with `@cached_method` decorator.
15+
- `clients`: singleton API clients with rate limiting (WizClient, MikaClient).
16+
- `credentials`: singleton environment configuration classes for external services.
17+
- `llm_utils`: LLM client abstractions (`ModelGateway`, `LlmConfig`, model enums).
18+
- `paths`: shared project paths.
19+
- `types`: shared data models.
20+
- `utils`: shared async utilities (JSON, file helpers).
21+
22+
### Paths
23+
24+
- Use `AI_DATA_ROOT` for the project root.
25+
- See `paths.py` for other paths.
26+
27+
### Types
28+
29+
`Struct` is the base model (Pydantic) with stricter rules:
30+
31+
- Forbids extra fields.
32+
- Validates defaults and assignments.
33+
- Strips whitespace from string fields.
34+
- Uses enum values for enum fields.
35+
- Provides a deterministic SHA256 `signature` for change detection.
36+
37+
### Credentials
38+
39+
`GlobalCredentials` loads from `.env`, environment variables, or device authentication.
40+
41+
```python
42+
from ai_data.credentials.base import GlobalCredentials
43+
44+
class MyConfig(GlobalCredentials):
45+
api_key: str = Field(validation_alias="MY_API_KEY")
46+
47+
MyConfig() # loads from .env then environment
48+
MyConfig(sources=[".env"]) # loads from .env only
49+
MyConfig(sources=["environment"]) # loads from os.environ only
50+
MyConfig(sources=["device"]) # loads from device authentication
51+
MyConfig().api_key
52+
```
53+
54+
**Available credentials:**
55+
56+
- `VertexCredentials`: `VERTEX_AI_PROJECT`, `VERTEX_AI_LOCATION`, `GOOGLE_VERTEX_AI_SA` (service account JSON)
57+
- `WizCredentials`: Wiz API credentials
58+
59+
`WizCredentials` env var prefix selection via `WizEnv`:
60+
61+
- `WizEnv.WIZ`: `WIZ_*`
62+
- `WizEnv.WIZ_DEMO`: `WIZ_DEMO_*`
63+
- `WizEnv.WIZ_DEMO_ADVANCED`: `WIZ_DEMO_ADVANCED_*`
64+
- `WizEnv.WIZ_TEST`: `WIZ_TEST_*`
65+
66+
### Cache
67+
68+
Disk-based response caching using `@cached_method` decorator for async methods.
69+
70+
```python
71+
from ai_data.cache import cached_method
72+
73+
class MyClient:
74+
@cached_method(namespace_arg="model", ttl="1h")
75+
async def fetch(self, model: str, query: str) -> dict:
76+
return await self._api_call(model, query)
77+
78+
# Normal call (uses cache)
79+
result = await client.fetch("gpt4", "query")
80+
81+
# Force refresh (skips cache read, overwrites cache)
82+
result = await client.fetch("gpt4", "query", use_cache=False)
83+
```
84+
85+
TTL units: `s` (seconds), `m` (minutes), `h` (hours), `d` (days), `w` (weeks).
86+
87+
### Utils
88+
89+
`ai_data.utils` provides async file helpers:
90+
91+
```python
92+
from ai_data.utils import load_json, save_json, read_file, write_file
93+
94+
# JSON
95+
data = await load_json("config.json")
96+
await save_json("out.json", data)
97+
98+
# Text files
99+
content = await read_file("file.txt")
100+
await write_file("file.txt", content)
101+
```
102+
103+
### ModelGateway
104+
105+
User-facing LLM client with flexible API, structured output, and caching.
106+
107+
```python
108+
from ai_data import ModelGateway, ModelName
109+
from pydantic import BaseModel
110+
111+
gateway = ModelGateway()
112+
113+
# Plain text completion
114+
result = await gateway.completion("What is 2+2?")
115+
print(result.content) # "4"
116+
117+
# Structured output
118+
class Answer(BaseModel):
119+
value: int
120+
explanation: str
121+
122+
result = await gateway.completion("What is 2+2?", output_type=Answer)
123+
print(result.content.value) # 4
124+
125+
# With specific model
126+
result = await gateway.completion("Hello", model=ModelName.HAIKU_4_5)
127+
128+
# Batch completions
129+
results = await gateway.batch_completion(["Q1", "Q2"], output_type=Answer)
130+
```
131+
132+
### WizClient
133+
134+
Singleton Wiz API client with rate limiting and async support.
135+
136+
```python
137+
from ai_data import WizClient
138+
from ai_data.clients.wiz import GqlQuery
139+
140+
client = WizClient()
141+
142+
# Single query with GqlQuery dataclass
143+
query = GqlQuery(query="query { viewer { id } }", variables={})
144+
result = await WizClient.gql_query(query)
145+
146+
# Batch queries (concurrent, rate-limited)
147+
results = await WizClient.batch_gql_query([
148+
GqlQuery(query=query1, variables=vars1),
149+
GqlQuery(query=query2, variables=vars2),
150+
])
151+
152+
WizClient.override(max_concurrency=2) # Replaces the existing singleton
153+
WizClient.clear() # Deletes the global singleton instance
154+
```
155+
156+
### MikaClient
157+
158+
Extends `WizClient` with Mika AI Assistant methods.
159+
160+
```python
161+
from ai_data import MikaClient
162+
163+
MikaClient() # Creates singleton
164+
response = await MikaClient.query_mika("What are my critical issues?")
165+
results = await MikaClient.batch_query_mika([
166+
"What are my critical issues?",
167+
"Show me recent vulnerabilities",
168+
])
169+
```

0 commit comments

Comments
 (0)