Skip to content

Commit 6493a3d

Browse files
authored
fix(tableau): re-remove temporary trip IDs (#673)
* Re-remove temporary trip IDs * Bump versions for devgreen LRTP datasets
1 parent 2a956ae commit 6493a3d

File tree

2 files changed

+2
-39
lines changed

2 files changed

+2
-39
lines changed

src/lamp_py/runtime_utils/remote_files.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,13 @@ def s3_uri(self) -> str:
221221
tableau_devgreen_rt_vehicle_positions_lightrail_60_day = S3Location(
222222
bucket=S3_ARCHIVE,
223223
prefix=os.path.join(TABLEAU, "devgreen-gtfs-rt", "LAMP_DEVGREEN_RT_VehiclePositions_LR_60_day.parquet"),
224+
version="1.1.0",
224225
)
225226
# light rail output file - to be converted to .hyper
226227
tableau_devgreen_rt_trip_updates_lightrail_60_day = S3Location(
227228
bucket=S3_ARCHIVE,
228229
prefix=os.path.join(TABLEAU, "devgreen-gtfs-rt", "LAMP_DEVGREEN_RT_TripUpdates_LR_60_day.parquet"),
230+
version="1.1.0",
229231
)
230232

231233
# dataframely errors

src/lamp_py/tableau/conversions/convert_gtfs_rt_vehicle_position.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -98,46 +98,7 @@ def lrtp_restrict_vp_to_only_terminal_stop_ids(polars_df: pl.DataFrame) -> pl.Da
9898
)
9999
return polars_df
100100

101-
def temporary_lrtp_assign_new_trip_ids(polars_df: pl.DataFrame, threshold_sec: int = 60 * 15) -> pl.DataFrame:
102-
"""
103-
Function to apply temporary trip ids to trips that have the same trip_id assigned for improbable stop durations at a single station
104-
105-
THIS IS NOT PERMANENT - THIS SHOULD BE REMOVED WHEN THE TRIP_IDS ARE PROPERLY POPULATED UPSTREAM
106-
107-
"""
108-
polars_df = (
109-
polars_df.sort("feed_timestamp")
110-
.with_columns(
111-
pl.col("feed_timestamp").diff().alias("mdiff").over("vehicle.trip.trip_id", "vehicle.trip.start_date")
112-
)
113-
.with_columns(
114-
pl.when(pl.col("mdiff") < threshold_sec)
115-
.then(0)
116-
.otherwise(1)
117-
.fill_null(0)
118-
.cum_sum()
119-
.cast(pl.String)
120-
.alias("new_id")
121-
.over("vehicle.trip.trip_id", "vehicle.trip.start_date")
122-
)
123-
.with_columns(
124-
pl.when(pl.col("new_id").ne("1"))
125-
.then(
126-
pl.concat_str(
127-
[pl.col("vehicle.trip.trip_id"), pl.lit("_LAMP"), pl.col("new_id")], ignore_nulls=True
128-
)
129-
)
130-
.alias("vehicle.trip.trip_id1")
131-
)
132-
.with_columns(pl.coalesce("vehicle.trip.trip_id1", "vehicle.trip.trip_id").alias("vehicle.trip.trip_id"))
133-
.drop("vehicle.trip.trip_id1")
134-
)
135-
return polars_df
136-
137101
polars_df = lrtp_restrict_vp_to_only_terminal_stop_ids(polars_df)
138-
# after we have filtered to only terminal stop_ids, then check that the trip_id vs timestamps make sense, and
139-
# assign new trip IDs if it doesn't
140-
polars_df = temporary_lrtp_assign_new_trip_ids(polars_df)
141102
polars_df = apply_gtfs_rt_vehicle_positions_timezone_conversions(polars_df)
142103
valid = LightRailTerminalVehiclePositions.validate(polars_df)
143104

0 commit comments

Comments
 (0)