Skip to content

Commit 4516edb

Browse files
Merge remote-tracking branch 'origin/main'
2 parents 97fe35b + c628732 commit 4516edb

17 files changed

+143173
-8
lines changed

data-analysis/01_extract_source.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,24 @@
22
from pathlib import Path
33
from typing import Any, Dict, List
44

5+
from bson import ObjectId
56
from pymongo import MongoClient
67

78
from common import ROOT, coerce_bool, coerce_float, coerce_int, load_config, parse_args, parse_json_field, write_csv, write_json
89

910

11+
def convert_objectid_to_str(obj: Any) -> Any:
12+
"""Recursively convert MongoDB ObjectId objects to strings."""
13+
if isinstance(obj, ObjectId):
14+
return str(obj)
15+
elif isinstance(obj, dict):
16+
return {key: convert_objectid_to_str(value) for key, value in obj.items()}
17+
elif isinstance(obj, list):
18+
return [convert_objectid_to_str(item) for item in obj]
19+
else:
20+
return obj
21+
22+
1023
def flatten_match_row(entry: Dict[str, Any]) -> Dict[str, Any]:
1124
metadata = entry.get('metadata') or {}
1225
return {
@@ -16,26 +29,26 @@ def flatten_match_row(entry: Dict[str, Any]) -> Dict[str, Any]:
1629
'robotPosition': metadata.get('robotPosition', ''),
1730
'robotAbsent': coerce_bool(entry.get('robotAbsent', False)),
1831
'autoStartingPosition': entry.get('autoStartingPosition'),
19-
'autoPathJson': json.dumps(entry.get('autoPath') or {}, separators=(',', ':')),
20-
'shootTimeBySegmentJson': json.dumps(entry.get('shootTimeBySegment') or {}, separators=(',', ':')),
21-
'passTimeBySegmentJson': json.dumps(entry.get('passTimeBySegment') or {}, separators=(',', ':')),
22-
'actionTimelineJson': json.dumps(entry.get('actionTimeline') or {}, separators=(',', ':')),
32+
'autoPathJson': json.dumps(convert_objectid_to_str(entry.get('autoPath') or {}), separators=(',', ':')),
33+
'shootTimeBySegmentJson': json.dumps(convert_objectid_to_str(entry.get('shootTimeBySegment') or {}), separators=(',', ':')),
34+
'passTimeBySegmentJson': json.dumps(convert_objectid_to_str(entry.get('passTimeBySegment') or {}), separators=(',', ':')),
35+
'actionTimelineJson': json.dumps(convert_objectid_to_str(entry.get('actionTimeline') or {}), separators=(',', ':')),
2336
'ballsPerSecondUsed': coerce_float(entry.get('ballsPerSecondUsed', 0)),
2437
'autoFuelScored': coerce_float(entry.get('autoFuelScored', 0)),
25-
'teleFuelBySegmentJson': json.dumps(entry.get('teleFuelBySegment') or {}, separators=(',', ':')),
38+
'teleFuelBySegmentJson': json.dumps(convert_objectid_to_str(entry.get('teleFuelBySegment') or {}), separators=(',', ':')),
2639
'teleTower': entry.get('teleTower', 'None'),
2740
'breakdown': entry.get('breakdown', 'None'),
2841
'driverQuality': entry.get('driverQuality', 'ok'),
2942
'defenseProvided': entry.get('defenseProvided', 'None'),
3043
'defenseReceived': coerce_bool(entry.get('defenseReceived', False)),
31-
'foulsJson': json.dumps(entry.get('fouls') or {}, separators=(',', ':')),
32-
'breaksJson': json.dumps(entry.get('breaks') or {}, separators=(',', ':')),
44+
'foulsJson': json.dumps(convert_objectid_to_str(entry.get('fouls') or {}), separators=(',', ':')),
45+
'breaksJson': json.dumps(convert_objectid_to_str(entry.get('breaks') or {}), separators=(',', ':')),
3346
'freeText': entry.get('freeText', ''),
3447
}
3548

3649

3750
def flatten_pit_row(entry: Dict[str, Any]) -> Dict[str, Any]:
38-
intake = parse_json_field(entry.get('intakeSources'), {})
51+
intake = parse_json_field(convert_objectid_to_str(entry.get('intakeSources')), {})
3952
if not isinstance(intake, dict):
4053
intake = {}
4154

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"startedAt": "2026-03-25T03:42:24.475532Z",
3+
"finishedAt": "2026-03-25T03:42:26.426745Z",
4+
"configPath": "C:\\GitKrakenStuff\\ScoutingApp2026\\data-analysis\\pipeline_config.json",
5+
"sourceMode": "mongo",
6+
"runStage07": false,
7+
"executedStages": [
8+
"01_extract_source.py",
9+
"02_clean_normalize.py",
10+
"03_feature_engineering.py",
11+
"04_team_aggregation.py",
12+
"05_picklist_scores.py",
13+
"06_export_app_payloads.py"
14+
]
15+
}

data-analysis/output/01_match_raw.csv

Lines changed: 211 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
batteryCount,drivebase,intakeDepot,intakeFloorNeutral,intakeOutpostCorral,maxFuelStorageEstimate,notes,preferredScoringSpot,scoringMethod,scouterName,teamNumber,towerCapabilityClaimed
2+
1,tank,True,False,False,1,Worst robot at event fr fr,nearHub,other,Phong,4201,level3
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"stage": "01_extract_source",
3+
"sourceMode": "mongo",
4+
"counts": {
5+
"match": 210,
6+
"pit": 1
7+
},
8+
"configPath": "C:\\GitKrakenStuff\\ScoutingApp2026\\data-analysis\\pipeline_config.json"
9+
}

data-analysis/output/02_match_clean.csv

Lines changed: 211 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
batteryCount,drivebase,intakeDepot,intakeFloorNeutral,intakeOutpostCorral,maxFuelStorageEstimate,notes,preferredScoringSpot,scoringMethod,scouterName,teamNumber,towerCapabilityClaimed
2+
1,tank,True,False,False,1.0,Worst robot at event fr fr,nearHub,other,Phong,4201,level3
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dataset,rowNumber,severity,issues

0 commit comments

Comments
 (0)