From 40834d9f6b3daff918126573b2b93ac3d826fa3d Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Thu, 30 May 2024 11:18:37 +0200
Subject: [PATCH 1/4] ENH expect pd.DataFrame as imput of FaDIn.fit

---
 fadin/solver.py      |  8 ++++++--
 fadin/utils/utils.py | 46 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/fadin/solver.py b/fadin/solver.py
index 18a3a7f..8839e96 100644
--- a/fadin/solver.py
+++ b/fadin/solver.py
@@ -252,8 +252,12 @@ def fit(self, events, end_time):
 
         Parameters
         ----------
-        events : list of array of size number of timestamps,
-        list size is self.n_dim.
+        events : pd.DataFrame
+            The events to infer the Hawkes Process's parameters.
+            The event should be formatted as a pd.DataFrame, with columns:
+                - `'time'` or index: to represent the time of the event.
+                - `'type'`: to annotate which event type the event belongs to.
+                - `'mark'` (optional): to represent the mark of the event.
 
         end_time : int
             The end time of the Hawkes process.
diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py
index 36e9270..eff85a2 100644
--- a/fadin/utils/utils.py
+++ b/fadin/utils/utils.py
@@ -68,16 +68,46 @@ def check_params(list_params, number_params):
 
 def projected_grid(events, grid_step, n_grid):
     """Project the events on the defined grid.
+
+    Parameters
+    ----------
+    events : pd.DataFrame
+        The events to infer the Hawkes Process's parameters.
+        The event should be formatted as a pd.DataFrame, with columns:
+            - `'time'` or index: to represent the time of the event.
+            - `'type'`: to annotate which event type the event belongs to.
+            - `'mark'` (optional): to represent the mark of the event.
+
+    grid_step : float
+        The step of the grid.
+
+    n_grid : int
+        The number of grid points.
+
+    Returns
+    -------
+    events_grid : torch.Tensor
+        The events projected on the grid.
     """
-    n_dim = len(events)
-    # size_discret = int(1 / grid_step)
+    # compute time of the event on the grid
+    events['time_g'] = (events['time'] / grid_step).round().astype(int)
+
+    # Compute sum of the marks, or number of events at each time in the grid
+    if 'mark' in events.columns:
+        events = events.groupby(['type', 'time_g'])['mark'].sum()
+    else:
+        events = events.groupby(['type', 'time_g']).count()
+
+    # Make sure the resulting DataFrame has the right columns
+    events.name = "mark_sum"
+    events = events.reset_index()
+
+    # Initialize the grid and fill it with events
+    n_dim = events['type'].nunique()
     events_grid = torch.zeros(n_dim, n_grid)
-    for i in range(n_dim):
-        ei_torch = torch.tensor(events[i])
-        temp = torch.round(ei_torch / grid_step).long() # * grid_step
-        # temp2 = torch.round(temp * size_discret)
-        idx, data = np.unique(temp, return_counts=True)
-        events_grid[i, idx] += torch.tensor(data)
+
+    i, idx = events['type'].values, events['time_g'].values
+    events_grid[i, idx] += torch.tensor(events["mark_sum"])
 
     return events_grid
 

From 9e4f8183fdf65503b48e48a482614c9ea60ee1bd Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Thu, 30 May 2024 11:37:26 +0200
Subject: [PATCH 2/4] FIX generate events as pandas

---
 fadin/utils/utils.py      |   2 +-
 fadin/utils/utils_simu.py | 132 +++++++++-----------------------------
 2 files changed, 33 insertions(+), 101 deletions(-)

diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py
index eff85a2..2976794 100644
--- a/fadin/utils/utils.py
+++ b/fadin/utils/utils.py
@@ -96,7 +96,7 @@ def projected_grid(events, grid_step, n_grid):
     if 'mark' in events.columns:
         events = events.groupby(['type', 'time_g'])['mark'].sum()
     else:
-        events = events.groupby(['type', 'time_g']).count()
+        events = events.groupby(['type', 'time_g']).size()
 
     # Make sure the resulting DataFrame has the right columns
     events.name = "mark_sum"
diff --git a/fadin/utils/utils_simu.py b/fadin/utils/utils_simu.py
index 25dd64b..ce38644 100644
--- a/fadin/utils/utils_simu.py
+++ b/fadin/utils/utils_simu.py
@@ -1,16 +1,30 @@
 import numpy as np
-from scipy.optimize import minimize_scalar
-from scipy.interpolate import interp1d
-from scipy.integrate import simps
-import scipy.stats as stats
+import pandas as pd
+
+import scipy
+from scipy import stats
 
 
 def find_max(intensity_function, duration):
     """Find the maximum intensity of a function."""
-    res = minimize_scalar(lambda x: -intensity_function(x), bounds=(0, duration))
+    res = scipy.optimize.minimize_scalar(
+        lambda x: -intensity_function(x), bounds=(0, duration)
+    )
     return -res.fun
 
 
+def to_pandas(events):
+
+    return pd.DataFrame({
+        'time': np.concatenate(events),
+        'type': np.concatenate([[i] * len(events[i]) for i in range(n_dim)])
+    })
+
+
+def from_pandas(events):
+    return [events.query("type == @i") for i in events['type'].unique()]
+
+
 def check_random_state(seed):
     """Turn seed into a np.random.RandomState instance.
     If seed is None, return the RandomState singleton used by np.random.
@@ -84,7 +98,7 @@ def __init__(self, custom_density, params=dict(), kernel_length=10):
     # function to normalise the pdf over chosen domain
 
     def normalisation(self, x):
-        return simps(self.pdf(x), x)
+        return scipy.integrate.simps(self.pdf(x), x)
 
     def create_cdf_ppf(self):
         # define normalization support with the given kernel length
@@ -98,8 +112,10 @@ def create_cdf_ppf(self):
         # make sure cdf bounded on [0,1]
         my_cdf = my_cdf / my_cdf[-1]
         # create cdf and ppf
-        func_cdf = interp1d(discrete_time, my_cdf)
-        func_ppf = interp1d(my_cdf, discrete_time, fill_value='extrapolate')
+        func_cdf = scipy.interpolate.interp1d(discrete_time, my_cdf)
+        func_ppf = scipy.interpolate.interp1d(
+            my_cdf, discrete_time, fill_value='extrapolate'
+        )
         return func_cdf, func_ppf
 
     # pdf function for averaged normals
@@ -151,7 +167,7 @@ def simu_poisson(end_time, intensity, upper_bound=None, random_state=None):
         assert isinstance(intensity, (int, float))
         n_events = rng.poisson(lam=intensity*end_time, size=1)
         events = np.sort(rng.uniform(0, end_time, size=n_events))
-        return events
+        return to_pandas(events)
 
     if upper_bound is None:
         upper_bound = find_max(intensity, end_time)
@@ -164,7 +180,7 @@ def simu_poisson(end_time, intensity, upper_bound=None, random_state=None):
     # ogata's thinning algorithm
     accepted = intensity(ev_x) > ev_y
     events = np.sort(ev_x[accepted])
-    return events
+    return to_pandas(events)
 
 
 def simu_multi_poisson(end_time, intensity, upper_bound=None, random_state=None):
@@ -206,7 +222,7 @@ def simu_multi_poisson(end_time, intensity, upper_bound=None, random_state=None)
         for i in range(n_dim):
             evs = np.sort(rng.uniform(0, end_time, size=n_events[i]))
             events.append(evs)
-        return events
+        return to_pandas(events)
 
     if upper_bound is None:
         upper_bound = np.zeros(n_dim)
@@ -221,7 +237,7 @@ def simu_multi_poisson(end_time, intensity, upper_bound=None, random_state=None)
 
         events.append(np.sort(ev_xi[accepted_i]))
 
-    return events
+    return to_pandas(events)
 
 
 def simu_hawkes_cluster(end_time, baseline, alpha, kernel,
@@ -282,7 +298,7 @@ def simu_hawkes_cluster(end_time, baseline, alpha, kernel,
                                     upper_bound=upper_bound,
                                     random_state=random_state)
     gen = dict(gen0=immigrants)
-    events = immigrants.copy()
+    events = from_pandas(immigrants)
 
     it = 0
     while len(gen[f'gen{it}']):
@@ -317,7 +333,8 @@ def simu_hawkes_cluster(end_time, baseline, alpha, kernel,
 
         it += 1
 
-    return events
+
+    return to_pandas(events)
 
 
 def simu_hawkes_thinning(end_time, baseline, alpha, kernel,
@@ -402,89 +419,4 @@ def simu_hawkes_thinning(end_time, baseline, alpha, kernel,
             del events[i][-1]
         events[i] = np.array(events[i])
 
-    return events
-
-
-# def simu_hawkes(end_time, baseline, alpha, kernel, upper_bound=None,
-#                 random_state=None):
-#     """ Simulate a Hawkes process following an immigration-birth procedure.
-#         Edge effects may be reduced according to the second references below.
-
-#     References:
-
-#     Møller, J., & Rasmussen, J. G. (2006). Approximate simulation of Hawkes processes.
-#     Methodology and Computing in Applied Probability, 8, 53-64.
-
-#     Møller, J., & Rasmussen, J. G. (2005). Perfect simulation of Hawkes processes.
-#     Advances in applied probability, 37(3), 629-646.
-
-#     Parameters
-#     ----------
-#     end_time : int | float
-#         Duration of the Poisson process.
-
-#     baseline : float
-#         Baseline parameter of the Hawkes process.
-
-#     alpha : float
-#         Weight parameter associated to the kernel function.
-
-#     kernel: str
-#         The choice of the kernel for the simulation.
-#         Kernel available are probability distribution from scipy.stats module.
-
-#     upper_bound : int, float or None, default=None
-#         Upper bound of the baseline function. If None,
-#         the maximum of the baseline is taken onto a finite discrete grid.
-
-#     random_state : int, RandomState instance or None, default=None
-#         Set the numpy seed to 'random_state'.
-
-#     Returns
-#     -------
-#     events : array
-#         The timestamps of the point process' events.
-#     """
-#     if random_state is None:
-#         np.random.seed(0)
-#     else:
-#         np.random.seed(random_state)
-
-#     # Simulate events from baseline
-#     immigrants = simu_poisson(end_time, intensity=baseline, upper_bound=upper_bound)
-
-#     # initialize
-#     gen = dict(gen0=immigrants)
-#     ev = [immigrants]
-#     sample_from_kernel = getattr(scipy.stats, kernel) #  dic = [4, 3]
-
-#     # generate offsprings
-#     it = 0
-#     while len(gen[f'gen{it}']):
-#         Ci = gen[f'gen{it}']
-
-#         # number of descendents of an event follow a Poisson law of parameter alpha
-#         # the expected number of event induced by an event is then alpha
-
-#         Di = np.random.poisson(lam=alpha, size=len(Ci))
-
-#         # sample interval range according to the given pdf (kernel of the parameter)
-#         Ci = np.repeat(Ci, repeats=Di)
-#         n = Di.sum()
-#         Ei =  sample_from_kernel.rvs(size=n) # * dic
-#         Fi = Ci + Ei
-
-
-#         if len(Fi) > 0:
-#             gen[f'gen{it+1}'] = np.hstack(Fi)
-#             ev.append(np.hstack(Fi))
-#         else:
-#             break
-#         it += 1
-
-#     # Add immigrants and sort
-#     events = np.hstack(ev)
-#     valid_events = events < end_time
-#     events = np.sort(events[valid_events])
-
-#     return events
+    return to_pandas(events)

From 8035e9fe5eab857c2eabacf4595185858952393d Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Thu, 30 May 2024 14:39:09 +0200
Subject: [PATCH 3/4] FIX tests

---
 fadin/utils/utils_simu.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fadin/utils/utils_simu.py b/fadin/utils/utils_simu.py
index ce38644..f4f051e 100644
--- a/fadin/utils/utils_simu.py
+++ b/fadin/utils/utils_simu.py
@@ -17,12 +17,18 @@ def to_pandas(events):
 
     return pd.DataFrame({
         'time': np.concatenate(events),
-        'type': np.concatenate([[i] * len(events[i]) for i in range(n_dim)])
+        'type': np.concatenate([
+            [i] * len(evi) for i, evi in enumerate(events)
+        ])
     })
 
 
 def from_pandas(events):
-    return [events.query("type == @i") for i in events['type'].unique()]
+    assert 'mark' not in events.columns
+    return [
+        events.query("type == @i")['time'].values
+        for i in events['type'].unique()
+    ]
 
 
 def check_random_state(seed):
@@ -297,8 +303,9 @@ def simu_hawkes_cluster(end_time, baseline, alpha, kernel,
     immigrants = simu_multi_poisson(end_time, baseline,
                                     upper_bound=upper_bound,
                                     random_state=random_state)
+    immigrants = from_pandas(immigrants)
     gen = dict(gen0=immigrants)
-    events = from_pandas(immigrants)
+    events = immigrants.copy()
 
     it = 0
     while len(gen[f'gen{it}']):
@@ -333,7 +340,6 @@ def simu_hawkes_cluster(end_time, baseline, alpha, kernel,
 
         it += 1
 
-
     return to_pandas(events)
 
 
@@ -363,8 +369,8 @@ def simu_hawkes_thinning(end_time, baseline, alpha, kernel,
     kernel: str
         The choice of the kernel for the simulation.
         Kernel available are probability distribution from scipy.stats module.
-        Note that this function will automatically convert the scipy kernel to a
-        finite support kernel of size 'kernel_length'.
+        Note that this function will automatically convert the scipy kernel to
+        a finite support kernel of size 'kernel_length'.
 
     kernel_length: int
         Length of kernels in the Hawkes process.

From 44c5acd370140d0008e6d06bfe1906a35b3d8557 Mon Sep 17 00:00:00 2001
From: tommoral <thomas.moreau.2010@gmail.com>
Date: Thu, 30 May 2024 14:44:58 +0200
Subject: [PATCH 4/4] FIX add missing deps

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e6f5458..89c028d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,8 @@ dependencies = [
     "torch>=1.11",
     "scipy>=1.8.0",
     "numba>=0.55.2",
-    "matplotlib>=3.7.2"
+    "matplotlib>=3.7.2",
+    "pandas>=2.0"
 ]