-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgtfstools.Rmd
More file actions
337 lines (259 loc) · 10 KB
/
gtfstools.Rmd
File metadata and controls
337 lines (259 loc) · 10 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
---
title: "gtfstools"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
### Reading and manipulating GTFS files with data.table
```{r}
library(gtfstools)
path <- system.file("extdata/spo_gtfs.zip", package = "gtfstools")
gtfs <- read_gtfs(path) # transform to object, data.table
names(gtfs)
head(gtfs$trips)
```
```{r}
# Using data.table to manipulate the dataset
original_headway <- gtfs$frequencies$headway_secs # save original data
head(gtfs$frequencies, 3)
gtfs$frequencies[, headway_secs := headway_secs + 100] # update headway + 100s
# gtfs$frequencies <- gtfs$frequencies$headway_secs + 100
head(gtfs$frequencies, 3)
# restores the original headway
gtfs$frequencies[, headway_secs := original_headway]
head(gtfs$frequencies, 3)
```
```{r}
# write modified GTFS objects to GTFS txt files again
export_path <- tempfile("new_gtfs", fileext = ".zip") # points to the path where gtfs should be written to
write_gtfs(gtfs, path = export_path) # write gtfs to path
zip::zip_list(export_path)[, c("filename", "compressed_size", "timestamp")] # list files within feed
```
### Calculating trip speed
```{r}
# calculating the speeds of all trips (km/h)
speeds <- get_trip_speed(gtfs)
head(speeds)
nrow(speeds)
```
```{r}
# calculates the speeds of two specific trips (km/h)
speeds <- get_trip_speed(gtfs, trip_id = c("CPTM L07-0", "2002-10-0"))
speeds
```
```{r}
# calculate the length of all trips (km)
lengths <- get_trip_length(gtfs, file = "shapes")
head(lengths)
```
```{r}
# calculates the duration of all trips (minute)
durations <- get_trip_duration(gtfs)
head(durations)
```
### Combining and filtering feeds
#### Combine / merge GTFS objects that have same data table from 2 gtfs
```{r}
# Remove duplication of record
gtfs$agency
gtfs$calendar
no_dups_gtfs <- remove_duplicates(gtfs)
no_dups_gtfs$agency
no_dups_gtfs$calendar
```
```{r}
# merge 2 feeds file: bus feed and rail feed for one aree
poa_path <- system.file("extdata/poa_gtfs.zip", package = "gtfstools") # reads Porto Alegre's GTFS
poa_gtfs <- read_gtfs(poa_path)
poa_gtfs$agency
# combines Porto Alegre's and São Paulo's GTFS objects
combined_gtfs <- merge_gtfs(no_dups_gtfs, poa_gtfs)
# check result
combined_gtfs$agency
```
#### Filtering by identifiers
```{r}
# checks pre-filter object size
utils::object.size(gtfs)
```
```{r}
# select column with data.table
head(gtfs$trips[, .(trip_id, trip_headsign, shape_id)])
# keeps entries related to the two specified ids
filtered_gtfs <- filter_by_trip_id(
gtfs,
trip_id = c("CPTM L07-0", "CPTM L07-1")
)
# checks post-filter object size
utils::object.size(filtered_gtfs)
# check data after filter
head(filtered_gtfs$trips[, .(trip_id, trip_headsign, shape_id)])
unique(filtered_gtfs)
unique(filtered_gtfs$shapes$shape_id)
```
#### Filtering by day of the week and time of the day
```{r}
# keeps services that operate on both saturday AND sunday
head(no_dups_gtfs$calendar)
filtered_gtfs <- filter_by_weekday(
no_dups_gtfs,
weekday = c("saturday", "sunday"),
combine = "and"
)
filtered_gtfs$calendar[, c("service_id", "sunday", "saturday")]
# keeps services that operate on both saturday OR sunday
filtered_gtfs <- filter_by_weekday(
no_dups_gtfs,
weekday = c("saturday", "sunday"),
combine = "or"
)
filtered_gtfs$calendar[, c("service_id", "sunday", "saturday")]
```
```{r}
# filter by time of the day => apply for "frequencies"
filtered_gtfs <- filter_by_time_of_day(gtfs, from = "05:00:00", to = "06:00:00")
head(filtered_gtfs$frequencies)
```
#### Filtering using a spatial extent
```{r}
# Ex: SPTran's Feed, shape = 68962
library(ggplot2)
# creates a polygon with the bounding box of shape 68962
# note: shape of GTFS presents a path that vehicle follows.
shape_68962 <- convert_shapes_to_sf(gtfs, shape_id = "68962") # convert shape to a simple feature spatial linestring object representing the route's geometry.
bbox <- sf::st_bbox(shape_68962) # bbox is now a numeric vector containing the min/max coordinates that frame the shape (xmin: Minimum longitude (left), ymin: Minimum latitude (bottom), xmax: Maximum longitude (right), ymax: Maximum latitude (top))
bbox_geometry <- sf::st_as_sfc(bbox) # a polygon. While st_bbox() gives a basic numerical extent, converting it into an sfc polygon makes it more flexible for spatial analysis
# creates a geometry with all the shapes described in the gtfs
all_shapes <- convert_shapes_to_sf(gtfs)
ggplot() +
geom_sf(data = all_shapes) +
geom_sf(data = bbox_geometry, fill = NA, color = "red") +
theme_minimal()
```
```{r}
# filter_by_sf() keeps all entries related to trips that INTERSECT with the specified polygon bbox (bbox was calculated above shape = 68962)
filtered_gtfs <- filter_by_sf(gtfs, bbox) # This function filters the GTFS dataset (gtfs) to include only elements intersect the bounding box (bbox).
# An intersection occurs when:
## A shape (route line) passes through the bbox (even partially).
## A shape starts or ends inside the bbox.
## A shape completely fits inside the bbox.
## A shape only touches the edge of the bbox.
filtered_shapes <- convert_shapes_to_sf(filtered_gtfs)
ggplot() +
geom_sf(data = filtered_shapes) +
geom_sf(data = bbox_geometry, fill = NA, color = "red") +
theme_minimal()
```
```{r}
# try with spatial_operation = sf::st_contains
filtered_gtfs <- filter_by_sf(gtfs, bbox, spatial_operation = sf::st_contains)
filtered_shapes <- convert_shapes_to_sf(filtered_gtfs)
ggplot() +
geom_sf(data = filtered_shapes) +
geom_sf(data = bbox_geometry, fill = NA, color = "red") +
theme_minimal()
```
### Validating GTFS data
```{r}
tmpdir <- tempdir()
validator_path <- download_validator(tmpdir)
validator_path
```
```{r, eval=FALSE}
output_dir <- tempfile("gtfs_validation")
validate_gtfs(
path,
output_path = output_dir,
validator_path = validator_path
)
list.files(output_dir)
```
### workflow example: spatial visualization of headways
1. How are the times between vehicles operating the same route (the headways) spatially distributed in SPTrans’ GTFS?
2. Or Time differences between consecutive trips serving the same stop along the same route
```{r}
# Scope of analysis: services operating during the morning peak, between 7am and 9am, on a typical tuesday
gtfs <- read_gtfs(path)
library(dplyr)
# filters the GTFS
filtered_gtfs <- gtfs |>
remove_duplicates() |>
filter_by_weekday("tuesday") |>
filter_by_time_of_day(from = "07:00:00", to = "09:00:00")
# checking the result
filtered_gtfs$frequencies[trip_id == "2105-10-0"] # check time of the date
filtered_gtfs$calendar
```
Data shows that each trip is associated to more than one headway, as shown above (one entry for the 7am to 7:59am interval and another for the 8am to 8:59am interval). To solve this, average headway from 7am to 9am is calculated.
Calculate the average headways weighted by the time duration of each headway:
- Step 1: multiply each headway by the size of the time interval during which it is valid. To calculate the time intervals,first use the convert_time_to_seconds() function to calculate the start and end time of the time interval in seconds and then subtract the latter by the former.
- Step 2: sum these multiplication results for each trip
- Step 3: and then divide this amount by the total time interval (two hours, in our case)
```{r}
filtered_gtfs <- convert_time_to_seconds(filtered_gtfs)
filtered_gtfs$frequencies
# check how the results look like for a particular trip id
filtered_gtfs$frequencies[trip_id == "2105-10-0"]
filtered_gtfs$frequencies[, time_interval := end_time_secs - start_time_secs]
average_headway <- filtered_gtfs$frequencies[,
.(average_headway = weighted.mean(x = headway_secs, w = time_interval)),
by = trip_id
]
average_headway[trip_id == "2105-10-0"]
head(average_headway)
```
```{r, eval=FALSE}
# calculate step by step (alternative option)
library(data.table)
# Sample data
df <- data.table(
trip_id = c("2105-10-0", "2105-10-0"),
start_time = c("07:00:00", "08:00:00"),
end_time = c("07:59:00", "08:59:00"),
headway_secs = c(900, 1200)
)
# Function to convert time to seconds since midnight
convert_time_to_seconds <- function(time_str) {
hms <- as.numeric(strsplit(time_str, ":")[[1]])
return(hms[1] * 3600 + hms[2] * 60 + hms[3])
}
# Convert start and end times to seconds
df[, start_seconds := sapply(start_time, convert_time_to_seconds)]
df[, end_seconds := sapply(end_time, convert_time_to_seconds)]
# Calculate time interval duration
df[, time_interval := end_seconds - start_seconds]
# Compute weighted average headway
weighted_avg_headway <- df[, sum(headway_secs * time_interval) / sum(time_interval)]
# Print the result
print(weighted_avg_headway)
```
The importance of weighted mean is: If we used a simple mean, we’d assume both headways are equally relevant. But if one headway applies for a longer period, it should have more influence on the final result. This method gives a more accurate reflection of what passengers experience.
Next, visualisation of the headways (time between veicles)
```{r}
# generate each trip geometry and join this data to the average headways
selected_trips <- average_headway$trip_id # char vector contain trip id
geometries <- get_trip_geometry(
filtered_gtfs, # gtfs data contain files, e.g. trip, shape, calendar, frequencies, stops,...
trip_id = selected_trips,
file = "shapes"
)
head(geometries)
# Note: get_trip_geometry() function returns the spatial geometries of the trips in the feed. This function allows us to specify which trips we want to generate the geometries of, so we are only going to apply the procedure to the trips present in the average headways table:
```
```{r}
# join the average headway data to the geometries and then configure the map
geoms_with_headways <- merge(
geometries,
average_headway,
by = "trip_id"
)
```
```{r}
# the color and line width of each trip geometry varies with its headway
ggplot(geoms_with_headways) +
geom_sf(aes(color = average_headway, size = average_headway), alpha = 0.8) +
scale_color_gradient(high = "#132B43", low = "#56B1F7") +
labs(color = "Average headway", size = "Average headway") +
theme_minimal()
```