@@ -98,46 +98,7 @@ def lrtp_restrict_vp_to_only_terminal_stop_ids(polars_df: pl.DataFrame) -> pl.Da
9898 )
9999 return polars_df
100100
101- def temporary_lrtp_assign_new_trip_ids (polars_df : pl .DataFrame , threshold_sec : int = 60 * 15 ) -> pl .DataFrame :
102- """
103- Function to apply temporary trip ids to trips that have the same trip_id assigned for improbable stop durations at a single station
104-
105- THIS IS NOT PERMANENT - THIS SHOULD BE REMOVED WHEN THE TRIP_IDS ARE PROPERLY POPULATED UPSTREAM
106-
107- """
108- polars_df = (
109- polars_df .sort ("feed_timestamp" )
110- .with_columns (
111- pl .col ("feed_timestamp" ).diff ().alias ("mdiff" ).over ("vehicle.trip.trip_id" , "vehicle.trip.start_date" )
112- )
113- .with_columns (
114- pl .when (pl .col ("mdiff" ) < threshold_sec )
115- .then (0 )
116- .otherwise (1 )
117- .fill_null (0 )
118- .cum_sum ()
119- .cast (pl .String )
120- .alias ("new_id" )
121- .over ("vehicle.trip.trip_id" , "vehicle.trip.start_date" )
122- )
123- .with_columns (
124- pl .when (pl .col ("new_id" ).ne ("1" ))
125- .then (
126- pl .concat_str (
127- [pl .col ("vehicle.trip.trip_id" ), pl .lit ("_LAMP" ), pl .col ("new_id" )], ignore_nulls = True
128- )
129- )
130- .alias ("vehicle.trip.trip_id1" )
131- )
132- .with_columns (pl .coalesce ("vehicle.trip.trip_id1" , "vehicle.trip.trip_id" ).alias ("vehicle.trip.trip_id" ))
133- .drop ("vehicle.trip.trip_id1" )
134- )
135- return polars_df
136-
137101 polars_df = lrtp_restrict_vp_to_only_terminal_stop_ids (polars_df )
138- # after we have filtered to only terminal stop_ids, then check that the trip_id vs timestamps make sense, and
139- # assign new trip IDs if it doesn't
140- polars_df = temporary_lrtp_assign_new_trip_ids (polars_df )
141102 polars_df = apply_gtfs_rt_vehicle_positions_timezone_conversions (polars_df )
142103 valid = LightRailTerminalVehiclePositions .validate (polars_df )
143104
0 commit comments