Skip to content

Commit e690ea6

Browse files
committed
Initial commit: ML Microstructure Signals project
Complete machine learning pipeline for microstructure signal prediction: - Order book data processing and feature extraction - Multiple ML models (LogReg, RF, LightGBM, LSTM, Transformer) - Backtesting framework with execution simulation - Streamlit dashboard for visualization - Comprehensive test suite and CI/CD pipeline - LaTeX research report template - Hydra configuration management - Complete educational documentation
0 parents  commit e690ea6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+7249
-0
lines changed

.github/workflows/ci.yml

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
name: CI/CD Pipeline
2+
3+
on:
4+
push:
5+
branches: [ main, develop ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
jobs:
10+
lint:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Set up Python
16+
uses: actions/setup-python@v4
17+
with:
18+
python-version: '3.11'
19+
20+
- name: Install dependencies
21+
run: |
22+
python -m pip install --upgrade pip
23+
pip install -e ".[dev]"
24+
25+
- name: Run ruff
26+
run: ruff check .
27+
28+
- name: Run black
29+
run: black --check .
30+
31+
- name: Run mypy
32+
run: mypy ml_microstructure/
33+
34+
test:
35+
runs-on: ubuntu-latest
36+
strategy:
37+
matrix:
38+
python-version: ['3.11', '3.12']
39+
40+
steps:
41+
- uses: actions/checkout@v4
42+
43+
- name: Set up Python ${{ matrix.python-version }}
44+
uses: actions/setup-python@v4
45+
with:
46+
python-version: ${{ matrix.python-version }}
47+
48+
- name: Install dependencies
49+
run: |
50+
python -m pip install --upgrade pip
51+
pip install -e ".[dev]"
52+
53+
- name: Run tests
54+
run: pytest --cov=ml_microstructure --cov-report=xml --cov-report=term-missing
55+
56+
- name: Upload coverage to Codecov
57+
uses: codecov/codecov-action@v3
58+
with:
59+
file: ./coverage.xml
60+
flags: unittests
61+
name: codecov-umbrella
62+
fail_ci_if_error: false
63+
64+
integration-test:
65+
runs-on: ubuntu-latest
66+
needs: [lint, test]
67+
68+
steps:
69+
- uses: actions/checkout@v4
70+
71+
- name: Set up Python
72+
uses: actions/setup-python@v4
73+
with:
74+
python-version: '3.11'
75+
76+
- name: Install dependencies
77+
run: |
78+
python -m pip install --upgrade pip
79+
pip install -e ".[dev]"
80+
81+
- name: Run integration tests
82+
run: pytest tests/ -m integration
83+
84+
- name: Test synthetic data generation
85+
run: |
86+
python -c "
87+
from ml_microstructure.data import SyntheticLOBGenerator
88+
generator = SyntheticLOBGenerator(duration_seconds=10)
89+
data = generator.generate_data()
90+
print(f'Generated {len(data)} synthetic snapshots')
91+
"
92+
93+
- name: Test feature extraction
94+
run: |
95+
python -c "
96+
from ml_microstructure.data import SyntheticLOBGenerator, OrderBookProcessor
97+
from ml_microstructure.features import FeaturePipeline
98+
generator = SyntheticLOBGenerator(duration_seconds=10)
99+
snapshots = generator.generate_data()
100+
processor = OrderBookProcessor()
101+
df = processor.process_snapshots(snapshots)
102+
pipeline = FeaturePipeline()
103+
features = pipeline.extract_features(df)
104+
print(f'Extracted {len(features.columns)} features')
105+
"
106+
107+
- name: Test model training
108+
run: |
109+
python -c "
110+
from ml_microstructure.data import SyntheticLOBGenerator, OrderBookProcessor
111+
from ml_microstructure.features import FeaturePipeline
112+
from ml_microstructure.models import ModelFactory, ModelConfig
113+
from ml_microstructure.utils.labeling import LabelGenerator
114+
import pandas as pd
115+
import numpy as np
116+
117+
# Generate data
118+
generator = SyntheticLOBGenerator(duration_seconds=10)
119+
snapshots = generator.generate_data()
120+
processor = OrderBookProcessor()
121+
df = processor.process_snapshots(snapshots)
122+
123+
# Extract features
124+
pipeline = FeaturePipeline()
125+
df_features = pipeline.extract_features(df)
126+
127+
# Generate labels
128+
label_generator = LabelGenerator()
129+
labels = label_generator.generate_labels(df_features)
130+
df_labeled = df_features.copy()
131+
df_labeled['label'] = labels
132+
df_labeled = df_labeled.dropna()
133+
134+
# Prepare features
135+
feature_cols = [col for col in df_labeled.columns if col not in ['timestamp', 'label']]
136+
X = df_labeled[feature_cols]
137+
y = df_labeled['label']
138+
139+
# Train model
140+
config = ModelConfig(model_type='lightgbm')
141+
model = ModelFactory.create_model(config)
142+
model.fit(X, y)
143+
144+
# Make predictions
145+
predictions = model.predict(X)
146+
probabilities = model.predict_proba(X)
147+
148+
print(f'Trained model on {len(X)} samples')
149+
print(f'Predictions shape: {predictions.shape}')
150+
print(f'Probabilities shape: {probabilities.shape}')
151+
"
152+
153+
build:
154+
runs-on: ubuntu-latest
155+
needs: [lint, test, integration-test]
156+
157+
steps:
158+
- uses: actions/checkout@v4
159+
160+
- name: Set up Python
161+
uses: actions/setup-python@v4
162+
with:
163+
python-version: '3.11'
164+
165+
- name: Install build dependencies
166+
run: |
167+
python -m pip install --upgrade pip
168+
pip install build twine
169+
170+
- name: Build package
171+
run: python -m build
172+
173+
- name: Check package
174+
run: twine check dist/*
175+
176+
- name: Upload build artifacts
177+
uses: actions/upload-artifact@v3
178+
with:
179+
name: dist
180+
path: dist/
181+
182+
security:
183+
runs-on: ubuntu-latest
184+
185+
steps:
186+
- uses: actions/checkout@v4
187+
188+
- name: Set up Python
189+
uses: actions/setup-python@v4
190+
with:
191+
python-version: '3.11'
192+
193+
- name: Install dependencies
194+
run: |
195+
python -m pip install --upgrade pip
196+
pip install safety bandit
197+
198+
- name: Run safety check
199+
run: safety check
200+
201+
- name: Run bandit security check
202+
run: bandit -r ml_microstructure/
203+
204+
documentation:
205+
runs-on: ubuntu-latest
206+
needs: [lint, test]
207+
208+
steps:
209+
- uses: actions/checkout@v4
210+
211+
- name: Set up Python
212+
uses: actions/setup-python@v4
213+
with:
214+
python-version: '3.11'
215+
216+
- name: Install dependencies
217+
run: |
218+
python -m pip install --upgrade pip
219+
pip install -e ".[dev]"
220+
221+
- name: Check documentation
222+
run: |
223+
python -c "
224+
import ml_microstructure
225+
print('Package imports successfully')
226+
print(f'Version: {ml_microstructure.__version__}')
227+
"
228+
229+
- name: Test CLI commands
230+
run: |
231+
python -m ml_microstructure.pipeline.train --help || echo "CLI help not available"
232+
python -m ml_microstructure.pipeline.predict --help || echo "CLI help not available"
233+
python -m ml_microstructure.pipeline.evaluate --help || echo "CLI help not available"
234+
python -m ml_microstructure.backtest.run --help || echo "CLI help not available"

0 commit comments

Comments
 (0)