felixpatzelt · robertdewitt · Aug 2, 2025 · Aug 4, 2025 · Aug 6, 2025
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,29 @@
+name: CI
+
+on:
+  push:
+    branches: [py3-port]   # or - py3-port
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install test deps
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e '.[dev]'      # pulls pytest + tox (from setup.py extras)
+
+      - name: Run pytest
+        run: python -m pytest -q
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,9 @@
 Changelog
 =========
+:Version: 2.0.0 of 2025-08-02
 
+Support for Python 3.8+
+Added calibrate_tim2_day function to batch.py
 
 :Version: 1.0.2 of 2017-09-29
 

diff --git a/README.rst b/README.rst
@@ -51,7 +51,7 @@ Installation
 Dependencies (automatically installed)
 --------------------------------------
 
-    - Python 2.7
+    - Python 3.8+
     - NumPy
     - SciPy
     - Pandas

diff --git a/priceprop/__init__.py b/priceprop/__init__.py
@@ -1,4 +1,7 @@
-from propagator import *
+from .propagator import *
+
+__version__ = "2.0.0"
+
 
 def __reload_submodules__():
     reload(propagator)
diff --git a/priceprop/batch.py b/priceprop/batch.py
@@ -3,7 +3,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import scorr
-import propagator as prop
+from . import propagator as prop
 
 # Helpers
 # ============================================================================
@@ -12,7 +12,15 @@ def complete_data_columns(
         tt,
         split_dates=True
     ):
-    """Fill in columns necessary for models"""
+    """
+    Fill in columns necessary for models
+
+    Args:
+        tt: pandas.DataFrame
+            The trades DataFrame
+        split_dates: bool
+            Whether to split trades into two samples based on date
+    """
 
     if not 'sc' in tt:
         tt['sc'] = tt['sign'] * tt['change']
@@ -31,7 +39,18 @@ def complete_data_columns(
 
 
 def get_trade_split(tt, split_by):
-    "Get dict of sample key-mask pairs for splitting trades into groups."
+    """
+    Get dict of sample key-mask pairs for splitting trades into groups.
+
+    Args:
+        tt: pandas.DataFrame
+            The trades DataFrame
+        split_by: str
+            The column to split trades by
+    Returns:
+        dict
+            A dictionary with sample keys and mask values
+    """
     if split_by:
             samples = tt[split_by].unique()
             masks   = {m: (tt[split_by] == m) for m in samples}
@@ -41,10 +60,19 @@ def get_trade_split(tt, split_by):
     return masks
 
 def shift(x, n, val=np.nan):
-    """Shift array, pad with fixed value.
+    """
+    Shift array, pad with fixed value.
 
     Example: Convert r_1 = p(t+1) - p(t) to causal return p(t) - p(t-1)
              without losing timesteps with pad(r_1, 1).
+    Args:
+        x: array-like
+            The array to shift
+        n: int
+            The number of steps to shift
+        val: float
+            The value to pad with
+    Returns:
     """
 
     if n == 0:
@@ -59,6 +87,47 @@ def shift(x, n, val=np.nan):
             res[:n] = x[n:]
         return res
 
+
+def calibrate_tim2_day(r, eps, maxlag=180, norm='corr'):
+    """
+    High-level shortcut: given raw return series ``r`` and order-flow
+    series ``eps`` (same length), compute sparse PC/NPC streams, the six
+    full-lag correlations, and call :pyfunc:`propagator.calibrate_tim2`.
+
+    Args:
+        r: array-like
+            The return series
+        eps: array-like
+            The order-flow series
+        maxlag: int
+            The maximum lag to calculate the response of   
+        norm: str
+            The normalization to use
+    Returns:
+        tuple
+            The non-price-changing and price-changing kernels
+    """
+    import numpy as np, scorr
+    # PC/NPC masks  (price-changing = mid-price move with matching sign)
+    mask_pc  = (r != 0) & (np.sign(r) == np.sign(eps))
+    mask_npc = ~mask_pc
+    eps_pc_full  = np.where(mask_pc,  eps, 0.0)
+    eps_npc_full = np.where(mask_npc, eps, 0.0)
+
+    nncorr  = scorr.acorr(eps_npc_full, norm=norm)
+    cccorr  = scorr.acorr(eps_pc_full,  norm=norm)
+    cncorr  = scorr.xcorr(eps_pc_full,  eps_npc_full, norm=norm)
+    ncncorr = scorr.xcorr(eps_npc_full, eps_pc_full,  norm=norm)
+    Sln     = scorr.xcorr(r, eps_pc_full,  norm=norm)
+    Slc     = scorr.xcorr(r, eps_npc_full, norm=norm)
+
+    G_npc, G_pc = prop.calibrate_tim2(
+        nncorr, cccorr, cncorr, ncncorr, Sln, Slc, maxlag=maxlag
+    )
+
+    return G_npc, G_pc
+
+
 # Analyse Trades
 # ============================================================================
 
@@ -68,9 +137,23 @@ def calibrate_models(
         group=False,
         models = ['cim','tim1','tim2','hdim2','hdim2_x2']
     ):
-    """Return dict with correlations, kernels, and responses.
+    """
+    Return dict with correlations, kernels, and responses.
     Calculate sign & price change correlations, response functions, 
     and fitted propagator kernels for trades in DataFrame.
+
+    Args:
+        tt: pandas.DataFrame
+            The trades DataFrame
+        nfft: str
+            The nfft to calculate the response of
+        group: bool
+            Whether to group trades by date
+        models: list
+            The models to calibrate
+    Returns:
+        dict
+            A dictionary with the results
     """
 
     # store results
@@ -81,6 +164,18 @@ def calibrate_models(
 
     # apply intra-day mask
     tt = tt[mask0]
+
+    # check if we have enough data after filtering
+    if len(tt) < 100:  # minimum data size for correlation analysis
+        raise ValueError(f"Insufficient data for calibration: {len(tt)} rows (minimum 100 required)")
+
+    # check that all required columns have data
+    required_cols = ['change', 'sign', 'r1']
+    for col in required_cols:
+        if col not in tt.columns:
+            raise ValueError(f"Missing required column: {col}")
+        if tt[col].isnull().all():
+            raise ValueError(f"Column {col} contains only null values")
 
     # get same optimal nfft used by pna functions
     nfft_opt, events_required = scorr.get_nfft(nfft, tt.groupby('date')['r1'])
@@ -158,7 +253,7 @@ def calibrate_models(
         # triple cross correlations
         if 'hdim2' in models:
             res['ccccorr'] = scorr.x3corr_grouped_df(
-                tt, ['change', 'sc', 'sc'], nfft=nfft, **kwargs
+                tt, ['change', 'sc', 'sc'], nfft=nfft, pad=maxlag, **kwargs
             )[0]
             res['nnccorr'] = scorr.x3corr_grouped_df(
                 tt, ['change', 'sn', 'sn'], nfft=nfft, **kwargs
@@ -245,11 +340,33 @@ def calc_models(
         models            = ['cim','tim1','tim2','hdim2','hdim2_x2'],
         smooth_kernel     = True
     ):
-    """Add propagator(-like) models to trades.  
+    """
+    Add propagator(-like) models to trades.  
 
     Pass a dict: calibration is added to dict, not returned.
     Pass trades DataFrame directly: calibration is returned as DataFrame(s)
 
+    Args:
+        dbc: dict
+            A dictionary with the trades DataFrame
+        nfft: str
+            The nfft to calculate the response of
+        group: bool
+            Whether to group trades by date 
+        calibrate: bool
+            Whether to calibrate the models
+        split_by: str
+            The column to split trades by
+        rshift: int
+            The number of steps to shift the return
+        models: list
+            The models to run
+        smooth_kernel: bool
+            Whether to smooth the kernel
+    Returns:
+        dict
+            A dictionary with the results
+
     See also: calibrate_models, aggregate_impact.add_models_to_trades
     """
     # normalise inputs (dict / df)
@@ -273,23 +390,27 @@ def calc_models(
 
     # get masks for different samples (groups of events, normally days)
     masks   = get_trade_split(tt, split_by)
-    samples = masks.keys()
+    samples = list(masks.keys())
 
     for i in range(len(samples)):
         # get calibration for a sample
         ## get a sample name and the corresponding mask
-        s = samples[i-1]
+        s = samples[i]
         m = masks[s]
 
         ## calculate now or rely on existing calibration?
         if calibrate:
-            cal = calibrate_models(
-                tt.loc[m], nfft=nfft, group=group, models=models
-            )
-            if 'cal' in dbc:
-                dbc['cal'][s] = cal
-            else:
-                dbc['cal'] = {s: cal}
+            try:
+                cal = calibrate_models(
+                    tt.loc[m], nfft=nfft, group=group, models=models
+                )
+                if 'cal' in dbc:
+                    dbc['cal'][s] = cal
+                else:
+                    dbc['cal'] = {s: cal}
+            except ValueError as e:
+                print(f"Warning: Skipping sample '{s}' due to insufficient data: {e}")
+                continue  # skip this sample and move to the next one
         else:
             cal = dbc['cal'][s]
-Original file line number
+Diff line change
@@ Expand Up / @@ -51,7 +51,7 @@ Installation @@
     Dependencies (automatically installed)
     --------------------------------------
-        - Python 2.7
+        - Python 3.8+
         - NumPy
         - SciPy
         - Pandas
@@ Expand Down @@