Skip to content

Commit 0a80f0c

Browse files
Phase 1: Core Analyzer Stabilization & Testing
- Enhanced Python parser with nested async function support and parent scope tracking - Added Go parser support for generic type constraints (Go 1.18+) and struct tags - Improved Java parser with record class support, nested annotations, and lambda filtering - Added comprehensive test suite with 100+ real-world code samples - Implemented error recovery mechanism for partial AST parsing on syntax errors - Created performance benchmarks targeting 1000 LOC in < 500ms - Built ground truth validation dataset with > 95% accuracy requirement - Added GitHub Actions CI workflow with coverage reporting - Updated AST models with new fields for enhanced language support - Created analyzer development documentation and test configuration Co-authored-by: openhands <[email protected]>
1 parent e7a324e commit 0a80f0c

20 files changed

+3843
-21
lines changed
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
name: Analyzer Test Suite
2+
3+
on:
4+
push:
5+
branches: [ main, master, feat/round7-phase1-analyzer-stabilization ]
6+
pull_request:
7+
branches: [ main, master ]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: [3.10, 3.11, 3.12]
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Set up Python ${{ matrix.python-version }}
20+
uses: actions/setup-python@v4
21+
with:
22+
python-version: ${{ matrix.python-version }}
23+
24+
- name: Install Poetry
25+
uses: snok/install-poetry@v1
26+
with:
27+
version: latest
28+
virtualenvs-create: true
29+
virtualenvs-in-project: true
30+
31+
- name: Load cached venv
32+
id: cached-poetry-dependencies
33+
uses: actions/cache@v3
34+
with:
35+
path: .venv
36+
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
37+
38+
- name: Install dependencies
39+
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
40+
run: poetry install --no-interaction --no-root
41+
42+
- name: Install project
43+
run: poetry install --no-interaction
44+
45+
- name: Run analyzer unit tests with coverage
46+
run: |
47+
poetry run pytest tests/unit/analyzers/ \
48+
--cov=codesage/analyzers \
49+
--cov-report=xml \
50+
--cov-report=html \
51+
--cov-report=term-missing \
52+
--cov-fail-under=95 \
53+
-v
54+
55+
- name: Run performance tests
56+
run: |
57+
poetry run pytest tests/performance/ \
58+
--benchmark-only \
59+
--benchmark-sort=mean \
60+
--benchmark-min-rounds=3 \
61+
-v
62+
63+
- name: Run ground truth validation
64+
run: |
65+
poetry run pytest tests/unit/analyzers/test_ground_truth_validation.py \
66+
-v
67+
68+
- name: Upload coverage reports to Codecov
69+
uses: codecov/codecov-action@v3
70+
with:
71+
file: ./coverage.xml
72+
flags: analyzers
73+
name: codecov-umbrella
74+
fail_ci_if_error: true
75+
76+
- name: Upload coverage HTML report
77+
uses: actions/upload-artifact@v3
78+
if: always()
79+
with:
80+
name: coverage-report-${{ matrix.python-version }}
81+
path: htmlcov/
82+
83+
- name: Upload benchmark results
84+
uses: actions/upload-artifact@v3
85+
if: always()
86+
with:
87+
name: benchmark-results-${{ matrix.python-version }}
88+
path: .benchmarks/
89+
90+
integration-test:
91+
runs-on: ubuntu-latest
92+
needs: test
93+
94+
steps:
95+
- uses: actions/checkout@v4
96+
97+
- name: Set up Python 3.11
98+
uses: actions/setup-python@v4
99+
with:
100+
python-version: 3.11
101+
102+
- name: Install Poetry
103+
uses: snok/install-poetry@v1
104+
with:
105+
version: latest
106+
virtualenvs-create: true
107+
virtualenvs-in-project: true
108+
109+
- name: Install dependencies
110+
run: poetry install --no-interaction
111+
112+
- name: Run integration tests
113+
run: |
114+
poetry run pytest tests/integration/ \
115+
-k "analyzer or parser" \
116+
--tb=short \
117+
-v
118+
119+
- name: Test analyzer CLI commands
120+
run: |
121+
# Test Python analysis
122+
echo "def test_function(): pass" > test_file.py
123+
poetry run codesage analyze test_file.py --format json
124+
125+
# Test Go analysis (if Go files exist)
126+
if [ -f "*.go" ]; then
127+
poetry run codesage analyze *.go --format json
128+
fi
129+
130+
# Test Java analysis (if Java files exist)
131+
if [ -f "*.java" ]; then
132+
poetry run codesage analyze *.java --format json
133+
fi
134+
135+
quality-check:
136+
runs-on: ubuntu-latest
137+
needs: test
138+
139+
steps:
140+
- uses: actions/checkout@v4
141+
142+
- name: Set up Python 3.11
143+
uses: actions/setup-python@v4
144+
with:
145+
python-version: 3.11
146+
147+
- name: Install Poetry
148+
uses: snok/install-poetry@v1
149+
with:
150+
version: latest
151+
virtualenvs-create: true
152+
virtualenvs-in-project: true
153+
154+
- name: Install dependencies
155+
run: poetry install --no-interaction
156+
157+
- name: Run code quality checks
158+
run: |
159+
# Check analyzer code quality
160+
poetry run ruff check codesage/analyzers/
161+
poetry run black --check codesage/analyzers/
162+
163+
# Check test code quality
164+
poetry run ruff check tests/unit/analyzers/
165+
poetry run black --check tests/unit/analyzers/
166+
167+
- name: Check analyzer performance benchmarks
168+
run: |
169+
# Run performance tests and check they meet requirements
170+
poetry run pytest tests/performance/ \
171+
--benchmark-only \
172+
--benchmark-json=benchmark_results.json
173+
174+
# Verify performance requirements are met
175+
python -c "
176+
import json
177+
with open('benchmark_results.json') as f:
178+
data = json.load(f)
179+
180+
for benchmark in data['benchmarks']:
181+
if '1000_loc' in benchmark['name']:
182+
mean_time = benchmark['stats']['mean']
183+
assert mean_time < 0.5, f'Performance requirement failed: {benchmark[\"name\"]} took {mean_time:.3f}s > 0.5s'
184+
print(f'✓ {benchmark[\"name\"]}: {mean_time:.3f}s')
185+
"
186+
187+
documentation:
188+
runs-on: ubuntu-latest
189+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
190+
191+
steps:
192+
- uses: actions/checkout@v4
193+
194+
- name: Set up Python 3.11
195+
uses: actions/setup-python@v4
196+
with:
197+
python-version: 3.11
198+
199+
- name: Install Poetry
200+
uses: snok/install-poetry@v1
201+
with:
202+
version: latest
203+
virtualenvs-create: true
204+
virtualenvs-in-project: true
205+
206+
- name: Install dependencies
207+
run: poetry install --no-interaction
208+
209+
- name: Generate test report
210+
run: |
211+
# Generate comprehensive test report
212+
poetry run pytest tests/unit/analyzers/ \
213+
--cov=codesage/analyzers \
214+
--cov-report=html \
215+
--html=docs/phase1-test-report.html \
216+
--self-contained-html \
217+
-v
218+
219+
- name: Upload test report
220+
uses: actions/upload-artifact@v3
221+
with:
222+
name: phase1-test-report
223+
path: docs/phase1-test-report.html

codesage/analyzers/ast_models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class VariableNode(ASTNode):
1616
kind: str = "global" # global, local, field (for structs)
1717
type_name: Optional[str] = None # For typed variables (Go)
1818
is_exported: bool = False
19+
struct_tag: Optional[str] = None # For Go struct tags
1920

2021
class FunctionNode(ASTNode):
2122
name: str
@@ -29,13 +30,21 @@ class FunctionNode(ASTNode):
2930
cyclomatic_complexity: int = 1
3031
cognitive_complexity: int = 0
3132
is_exported: bool = False
33+
parent_scope: Optional[str] = None # For nested functions
34+
type_parameters: List[dict] = Field(default_factory=list) # For generic functions
35+
throws_clause: List[str] = Field(default_factory=list) # For Java throws
36+
is_synchronized: bool = False # For Java synchronized methods
37+
is_static: bool = False # For static methods
38+
is_record_constructor: bool = False # For Java record constructors
39+
record_components: List[str] = Field(default_factory=list) # For Java record components
3240

3341
class ClassNode(ASTNode):
3442
name: str
3543
methods: List[FunctionNode] = Field(default_factory=list)
3644
fields: List[VariableNode] = Field(default_factory=list) # For structs
3745
base_classes: List[str] = Field(default_factory=list)
3846
is_exported: bool = False
47+
type_parameters: List[dict] = Field(default_factory=list) # For generic classes/structs
3948

4049
class ImportNode(ASTNode):
4150
path: str

codesage/analyzers/go_parser.py

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,17 +144,37 @@ def _build_function_node(self, node):
144144
if result_node:
145145
return_type = self._text(result_node)
146146

147+
# Extract type parameters for generics (Go 1.18+)
148+
type_parameters = []
147149
is_generic = type_params_node is not None
150+
if type_params_node:
151+
for param in type_params_node.children:
152+
if param.type == "type_parameter_declaration":
153+
param_name = None
154+
param_constraint = None
155+
for child in param.children:
156+
if child.type == "type_identifier":
157+
param_name = self._text(child)
158+
elif child.type in ("type_constraint", "type_term"):
159+
param_constraint = self._text(child)
160+
161+
if param_name:
162+
type_parameters.append({
163+
'name': param_name,
164+
'constraint': param_constraint or 'any'
165+
})
148166

149167
# Determine exported
150168
func_name = self._text(name_node) if name_node else ''
151169
is_exported = func_name[0].isupper() if func_name else False
152170

153-
decorators = ["generic"] if is_generic else []
171+
decorators = []
172+
if is_generic:
173+
decorators.append("generic")
154174
if is_exported:
155175
decorators.append("exported")
156176

157-
return FunctionNode(
177+
func = FunctionNode(
158178
node_type="function",
159179
name=func_name,
160180
params=params,
@@ -166,6 +186,11 @@ def _build_function_node(self, node):
166186
is_async=False,
167187
decorators=decorators
168188
)
189+
190+
# Add type parameters as custom attribute
191+
func.type_parameters = type_parameters
192+
193+
return func
169194

170195
# ----------------------------------------------------------------------
171196
# Struct extraction
@@ -196,6 +221,26 @@ def extract_structs(self) -> List[ClassNode]:
196221
def _build_struct_node(self, type_spec_node) -> ClassNode:
197222
name_node = type_spec_node.child_by_field_name("name")
198223
struct_type = type_spec_node.child_by_field_name("type")
224+
225+
# Check for type parameters (generics)
226+
type_params_node = type_spec_node.child_by_field_name("type_parameters")
227+
type_parameters = []
228+
if type_params_node:
229+
for param in type_params_node.children:
230+
if param.type == "type_parameter_declaration":
231+
param_name = None
232+
param_constraint = None
233+
for child in param.children:
234+
if child.type == "type_identifier":
235+
param_name = self._text(child)
236+
elif child.type in ("type_constraint", "type_term"):
237+
param_constraint = self._text(child)
238+
239+
if param_name:
240+
type_parameters.append({
241+
'name': param_name,
242+
'constraint': param_constraint or 'any'
243+
})
199244

200245
fields = []
201246
if struct_type:
@@ -210,6 +255,12 @@ def _build_struct_node(self, type_spec_node) -> ClassNode:
210255
if child.type == "field_declaration":
211256
type_node = child.child_by_field_name("type")
212257
type_str = self._text(type_node) if type_node else None
258+
259+
# Extract struct tags (e.g., `json:"name"`)
260+
tag_str = None
261+
tag_node = child.child_by_field_name("tag")
262+
if tag_node:
263+
tag_str = self._text(tag_node)
213264

214265
names = []
215266
for sub in child.children:
@@ -224,31 +275,42 @@ def _build_struct_node(self, type_spec_node) -> ClassNode:
224275
field_name_for_exported = type_str.lstrip("*")
225276
is_exported_field = field_name_for_exported[0].isupper() if field_name_for_exported else False
226277

227-
fields.append(VariableNode(
278+
field = VariableNode(
228279
node_type="variable",
229280
name=type_str,
230281
type_name=type_str,
231282
kind="embedded_field",
232283
is_exported=is_exported_field
233-
))
284+
)
285+
if tag_str:
286+
field.struct_tag = tag_str
287+
fields.append(field)
234288
else:
235289
for n in names:
236290
is_exported_field = n[0].isupper() if n else False
237-
fields.append(VariableNode(
291+
field = VariableNode(
238292
node_type="variable",
239293
name=n,
240294
type_name=type_str,
241295
kind="field",
242296
is_exported=is_exported_field
243-
))
297+
)
298+
if tag_str:
299+
field.struct_tag = tag_str
300+
fields.append(field)
244301

245-
return ClassNode(
302+
struct = ClassNode(
246303
node_type="struct",
247304
name=self._text(name_node) if name_node else "<anonymous>",
248305
fields=fields,
249306
methods=[],
250307
base_classes=[]
251308
)
309+
310+
# Add type parameters as custom attribute
311+
struct.type_parameters = type_parameters
312+
313+
return struct
252314

253315
# ----------------------------------------------------------------------
254316
# Interface extraction

0 commit comments

Comments
 (0)