1- from lifelines import AalenJohansenFitter
1+ # from lifelines import AalenJohansenFitter
22import pandas as pd
33import numpy as np
44import polars as pl
@@ -15,93 +15,93 @@ def _enum_dataframe(column_name: str, values: Sequence[str]) -> pl.DataFrame:
1515 return pl .DataFrame ({column_name : pl .Series (values , dtype = enum_dtype )})
1616
1717
18- def extract_aj_estimate (data_to_adjust , fixed_time_horizons ):
19- """
20- Python implementation of the R extract_aj_estimate function for Aalen-Johansen estimation.
21-
22- Parameters:
23- data_to_adjust (pd.DataFrame): DataFrame containing survival data
24- fixed_time_horizons (list or float): Time points at which to evaluate the survival
25-
26- Returns:
27- pd.DataFrame: DataFrame with Aalen-Johansen estimates
28- """
18+ # def extract_aj_estimate(data_to_adjust, fixed_time_horizons):
19+ # """
20+ # Python implementation of the R extract_aj_estimate function for Aalen-Johansen estimation.
21+
22+ # Parameters:
23+ # data_to_adjust (pd.DataFrame): DataFrame containing survival data
24+ # fixed_time_horizons (list or float): Time points at which to evaluate the survival
25+
26+ # Returns:
27+ # pd.DataFrame: DataFrame with Aalen-Johansen estimates
28+ # """
29+
30+ # # Ensure fixed_time_horizons is a list
31+ # if not isinstance(fixed_time_horizons, list):
32+ # fixed_time_horizons = [fixed_time_horizons]
33+
34+ # # Create a categorical version of reals for stratification
35+ # data = data_to_adjust.copy()
36+ # data["reals_cat"] = pd.Categorical(
37+ # data["reals_labels"],
38+ # categories=[
39+ # "real_negatives",
40+ # "real_positives",
41+ # "real_competing",
42+ # "real_censored",
43+ # ],
44+ # ordered=True,
45+ # )
2946
30- # Ensure fixed_time_horizons is a list
31- if not isinstance (fixed_time_horizons , list ):
32- fixed_time_horizons = [fixed_time_horizons ]
47+ # # Get unique strata values
48+ # strata_values = data["strata"].unique()
3349
34- # Create a categorical version of reals for stratification
35- data = data_to_adjust .copy ()
36- data ["reals_cat" ] = pd .Categorical (
37- data ["reals_labels" ],
38- categories = [
39- "real_negatives" ,
40- "real_positives" ,
41- "real_competing" ,
42- "real_censored" ,
43- ],
44- ordered = True ,
45- )
50+ # event_map = {
51+ # "real_negatives": 0, # Treat as censored
52+ # "real_positives": 1, # Event of interest
53+ # "real_competing": 2, # Competing risk
54+ # "real_censored": 0, # Censored
55+ # }
4656
47- # Get unique strata values
48- strata_values = data ["strata" ].unique ()
57+ # data["event_code"] = data["reals_labels"].map(event_map)
4958
50- event_map = {
51- "real_negatives" : 0 , # Treat as censored
52- "real_positives" : 1 , # Event of interest
53- "real_competing" : 2 , # Competing risk
54- "real_censored" : 0 , # Censored
55- }
59+ # # Initialize result dataframes
60+ # results = []
5661
57- data ["event_code" ] = data ["reals_labels" ].map (event_map )
62+ # # For each stratum, fit Aalen-Johansen model
63+ # for stratum in strata_values:
64+ # # Filter data for current stratum
65+ # stratum_data = data.loc[data["strata"] == stratum]
5866
59- # Initialize result dataframes
60- results = []
67+ # # Initialize Aalen-Johansen fitter
68+ # ajf = AalenJohansenFitter()
69+ # ajf_competing = AalenJohansenFitter()
6170
62- # For each stratum, fit Aalen-Johansen model
63- for stratum in strata_values :
64- # Filter data for current stratum
65- stratum_data = data .loc [data ["strata" ] == stratum ]
71+ # # Fit the model
72+ # ajf.fit(stratum_data["times"], stratum_data["event_code"], event_of_interest=1)
6673
67- # Initialize Aalen-Johansen fitter
68- ajf = AalenJohansenFitter ()
69- ajf_competing = AalenJohansenFitter ( )
74+ # ajf_competing.fit(
75+ # stratum_data["times"], stratum_data["event_code"], event_of_interest=2
76+ # )
7077
71- # Fit the model
72- ajf .fit (stratum_data ["times" ], stratum_data ["event_code" ], event_of_interest = 1 )
78+ # # Calculate cumulative incidence at fixed time horizons
79+ # for t in fixed_time_horizons:
80+ # n = len(stratum_data)
81+ # real_positives_est = ajf.predict(t)
82+ # real_competing_est = ajf_competing.predict(t)
83+ # real_negatives_est = 1 - real_positives_est - real_competing_est
7384
74- ajf_competing .fit (
75- stratum_data ["times" ], stratum_data ["event_code" ], event_of_interest = 2
76- )
85+ # states = ["real_negatives", "real_positives", "real_competing"]
86+ # estimates = [real_negatives_est, real_positives_est, real_competing_est]
7787
78- # Calculate cumulative incidence at fixed time horizons
79- for t in fixed_time_horizons :
80- n = len (stratum_data )
81- real_positives_est = ajf .predict (t )
82- real_competing_est = ajf_competing .predict (t )
83- real_negatives_est = 1 - real_positives_est - real_competing_est
84-
85- states = ["real_negatives" , "real_positives" , "real_competing" ]
86- estimates = [real_negatives_est , real_positives_est , real_competing_est ]
87-
88- for state , estimate in zip (states , estimates ):
89- results .append (
90- {
91- "strata" : stratum ,
92- "reals" : state ,
93- "fixed_time_horizon" : t ,
94- "reals_estimate" : estimate * n ,
95- }
96- )
88+ # for state, estimate in zip(states, estimates):
89+ # results.append(
90+ # {
91+ # "strata": stratum,
92+ # "reals": state,
93+ # "fixed_time_horizon": t,
94+ # "reals_estimate": estimate * n,
95+ # }
96+ # )
9797
98- # Convert to DataFrame
99- result_df = pd .DataFrame (results )
98+ # # Convert to DataFrame
99+ # result_df = pd.DataFrame(results)
100100
101- # Convert strata to categorical if needed
102- result_df ["strata" ] = pd .Categorical (result_df ["strata" ])
101+ # # Convert strata to categorical if needed
102+ # result_df["strata"] = pd.Categorical(result_df["strata"])
103103
104- return result_df
104+ # return result_df
105105
106106
107107def add_cutoff_strata (data : pl .DataFrame , by : float , stratified_by ) -> pl .DataFrame :
0 commit comments