Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: CI

on:
push:
branches: [py3-port] # or - py3-port
pull_request:

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install test deps
run: |
python -m pip install --upgrade pip
pip install -e '.[dev]' # pulls pytest + tox (from setup.py extras)

- name: Run pytest
run: python -m pytest -q
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Changelog
=========
:Version: 2.0.0 of 2025-08-02

Support for Python 3.8+
Added calibrate_tim2_day function to batch.py

:Version: 1.0.2 of 2017-09-29

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Installation
Dependencies (automatically installed)
--------------------------------------

- Python 2.7
- Python 3.8+
- NumPy
- SciPy
- Pandas
Expand Down
5 changes: 4 additions & 1 deletion priceprop/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from propagator import *
from .propagator import *

__version__ = "2.0.0"


def __reload_submodules__():
reload(propagator)
153 changes: 137 additions & 16 deletions priceprop/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import matplotlib.pyplot as plt
import scorr
import propagator as prop
from . import propagator as prop

# Helpers
# ============================================================================
Expand All @@ -12,7 +12,15 @@ def complete_data_columns(
tt,
split_dates=True
):
"""Fill in columns necessary for models"""
"""
Fill in columns necessary for models

Args:
tt: pandas.DataFrame
The trades DataFrame
split_dates: bool
Whether to split trades into two samples based on date
"""

if not 'sc' in tt:
tt['sc'] = tt['sign'] * tt['change']
Expand All @@ -31,7 +39,18 @@ def complete_data_columns(


def get_trade_split(tt, split_by):
"Get dict of sample key-mask pairs for splitting trades into groups."
"""
Get dict of sample key-mask pairs for splitting trades into groups.

Args:
tt: pandas.DataFrame
The trades DataFrame
split_by: str
The column to split trades by
Returns:
dict
A dictionary with sample keys and mask values
"""
if split_by:
samples = tt[split_by].unique()
masks = {m: (tt[split_by] == m) for m in samples}
Expand All @@ -41,10 +60,19 @@ def get_trade_split(tt, split_by):
return masks

def shift(x, n, val=np.nan):
"""Shift array, pad with fixed value.
"""
Shift array, pad with fixed value.

Example: Convert r_1 = p(t+1) - p(t) to causal return p(t) - p(t-1)
without losing timesteps with pad(r_1, 1).
Args:
x: array-like
The array to shift
n: int
The number of steps to shift
val: float
The value to pad with
Returns:
"""

if n == 0:
Expand All @@ -59,6 +87,47 @@ def shift(x, n, val=np.nan):
res[:n] = x[n:]
return res


def calibrate_tim2_day(r, eps, maxlag=180, norm='corr'):
"""
High-level shortcut: given raw return series ``r`` and order-flow
series ``eps`` (same length), compute sparse PC/NPC streams, the six
full-lag correlations, and call :pyfunc:`propagator.calibrate_tim2`.

Args:
r: array-like
The return series
eps: array-like
The order-flow series
maxlag: int
The maximum lag to calculate the response of
norm: str
The normalization to use
Returns:
tuple
The non-price-changing and price-changing kernels
"""
import numpy as np, scorr
# PC/NPC masks (price-changing = mid-price move with matching sign)
mask_pc = (r != 0) & (np.sign(r) == np.sign(eps))
mask_npc = ~mask_pc
eps_pc_full = np.where(mask_pc, eps, 0.0)
eps_npc_full = np.where(mask_npc, eps, 0.0)

nncorr = scorr.acorr(eps_npc_full, norm=norm)
cccorr = scorr.acorr(eps_pc_full, norm=norm)
cncorr = scorr.xcorr(eps_pc_full, eps_npc_full, norm=norm)
ncncorr = scorr.xcorr(eps_npc_full, eps_pc_full, norm=norm)
Sln = scorr.xcorr(r, eps_pc_full, norm=norm)
Slc = scorr.xcorr(r, eps_npc_full, norm=norm)

G_npc, G_pc = prop.calibrate_tim2(
nncorr, cccorr, cncorr, ncncorr, Sln, Slc, maxlag=maxlag
)

return G_npc, G_pc


# Analyse Trades
# ============================================================================

Expand All @@ -68,9 +137,23 @@ def calibrate_models(
group=False,
models = ['cim','tim1','tim2','hdim2','hdim2_x2']
):
"""Return dict with correlations, kernels, and responses.
"""
Return dict with correlations, kernels, and responses.
Calculate sign & price change correlations, response functions,
and fitted propagator kernels for trades in DataFrame.

Args:
tt: pandas.DataFrame
The trades DataFrame
nfft: str
The nfft to calculate the response of
group: bool
Whether to group trades by date
models: list
The models to calibrate
Returns:
dict
A dictionary with the results
"""

# store results
Expand All @@ -81,6 +164,18 @@ def calibrate_models(

# apply intra-day mask
tt = tt[mask0]

# check if we have enough data after filtering
if len(tt) < 100: # minimum data size for correlation analysis
raise ValueError(f"Insufficient data for calibration: {len(tt)} rows (minimum 100 required)")

# check that all required columns have data
required_cols = ['change', 'sign', 'r1']
for col in required_cols:
if col not in tt.columns:
raise ValueError(f"Missing required column: {col}")
if tt[col].isnull().all():
raise ValueError(f"Column {col} contains only null values")

# get same optimal nfft used by pna functions
nfft_opt, events_required = scorr.get_nfft(nfft, tt.groupby('date')['r1'])
Expand Down Expand Up @@ -158,7 +253,7 @@ def calibrate_models(
# triple cross correlations
if 'hdim2' in models:
res['ccccorr'] = scorr.x3corr_grouped_df(
tt, ['change', 'sc', 'sc'], nfft=nfft, **kwargs
tt, ['change', 'sc', 'sc'], nfft=nfft, pad=maxlag, **kwargs
)[0]
res['nnccorr'] = scorr.x3corr_grouped_df(
tt, ['change', 'sn', 'sn'], nfft=nfft, **kwargs
Expand Down Expand Up @@ -245,11 +340,33 @@ def calc_models(
models = ['cim','tim1','tim2','hdim2','hdim2_x2'],
smooth_kernel = True
):
"""Add propagator(-like) models to trades.
"""
Add propagator(-like) models to trades.

Pass a dict: calibration is added to dict, not returned.
Pass trades DataFrame directly: calibration is returned as DataFrame(s)

Args:
dbc: dict
A dictionary with the trades DataFrame
nfft: str
The nfft to calculate the response of
group: bool
Whether to group trades by date
calibrate: bool
Whether to calibrate the models
split_by: str
The column to split trades by
rshift: int
The number of steps to shift the return
models: list
The models to run
smooth_kernel: bool
Whether to smooth the kernel
Returns:
dict
A dictionary with the results

See also: calibrate_models, aggregate_impact.add_models_to_trades
"""
# normalise inputs (dict / df)
Expand All @@ -273,23 +390,27 @@ def calc_models(

# get masks for different samples (groups of events, normally days)
masks = get_trade_split(tt, split_by)
samples = masks.keys()
samples = list(masks.keys())

for i in range(len(samples)):
# get calibration for a sample
## get a sample name and the corresponding mask
s = samples[i-1]
s = samples[i]
m = masks[s]

## calculate now or rely on existing calibration?
if calibrate:
cal = calibrate_models(
tt.loc[m], nfft=nfft, group=group, models=models
)
if 'cal' in dbc:
dbc['cal'][s] = cal
else:
dbc['cal'] = {s: cal}
try:
cal = calibrate_models(
tt.loc[m], nfft=nfft, group=group, models=models
)
if 'cal' in dbc:
dbc['cal'][s] = cal
else:
dbc['cal'] = {s: cal}
except ValueError as e:
print(f"Warning: Skipping sample '{s}' due to insufficient data: {e}")
continue # skip this sample and move to the next one
else:
cal = dbc['cal'][s]

Expand Down
Loading