Skip to content

Commit e2eb492

Browse files
committed
Added New York city's Taxi and Limousine Commission high volume for hire vehicle trip support
1 parent 3b7e864 commit e2eb492

File tree

4 files changed

+259
-0
lines changed

4 files changed

+259
-0
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env ruby
2+
3+
require "datasets-parquet"
4+
5+
trips = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1)
6+
7+
p trips.to_arrow
8+
#<Arrow::Table:0x13f920640 ptr=0x13f180160>
9+
# hvfhs_license_num dispatching_base_num originating_base_num request_datetime on_scene_datetime pickup_datetime dropoff_datetime PULocationID DOLocationID trip_miles trip_time base_passenger_fare tolls bcf sales_tax congestion_surcharge airport_fee tips driver_pay shared_request_flag shared_match_flag access_a_ride_flag wav_request_flag wav_match_flag
10+
# 0 HV0003 B03404 B03404 2022-01-01T09:05:31+09:00 2022-01-01T09:05:40+09:00 2022-01-01T09:07:24+09:00 2022-01-01T09:18:28+09:00 170 161 1.180000 664 24.900000 0.000000 0.750000 2.210000 2.750000 0.000000 0.000000 23.030000 N N N N
11+
# 1 HV0003 B03404 B03404 2022-01-01T09:19:27+09:00 2022-01-01T09:22:08+09:00 2022-01-01T09:22:32+09:00 2022-01-01T09:30:12+09:00 237 161 0.820000 460 11.970000 0.000000 0.360000 1.060000 2.750000 0.000000 0.000000 12.320000 N N N N
12+
# ...
13+
14+
15+
trips.each do |trip|
16+
p [
17+
trip.hvfhs_license_num,
18+
trip.dispatching_base_num,
19+
trip.originating_base_num,
20+
trip.request_datetime,
21+
trip.on_scene_datetime,
22+
trip.pickup_datetime,
23+
trip.dropoff_datetime,
24+
trip.pu_locationID,
25+
trip.do_locationID,
26+
trip.trip_miles,
27+
trip.trip_time,
28+
trip.base_passenger_fare,
29+
trip.tolls,
30+
trip.bcf,
31+
trip.sales_tax,
32+
trip.congestion_surcharge,
33+
trip.airport_fee,
34+
trip.tips,
35+
trip.driver_pay,
36+
trip.shared_request_flag?,
37+
trip.shared_match_flag?,
38+
trip.access_a_ride_flag?,
39+
trip.wav_request_flag?,
40+
trip.wav_match_flag?,
41+
]
42+
end
43+
# [:uber, "B03404", "B03404", 2022-01-01 09:05:31 +0900, 2022-01-01 09:05:40 +0900, 2022-01-01 09:07:24 +0900, 2022-01-01 09:18:28 +0900, 170, 161, 1.18, 664, 24.9, 0.0, 0.75, 2.21, 2.75, 0.0, 0.0, 23.03, false, false, false, false, false]
44+
# [:uber, "B03404", "B03404", 2022-01-01 09:19:27 +0900, 2022-01-01 09:22:08 +0900, 2022-01-01 09:22:32 +0900, 2022-01-01 09:30:12 +0900, 237, 161, 0.82, 460, 11.97, 0.0, 0.36, 1.06, 2.75, 0.0, 0.0, 12.32, false, false, false, false, false]
45+
# [:uber, "B03404", "B03404", 2022-01-01 09:43:53 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 10:07:32 +0900, 237, 161, 1.18, 595, 29.82, 0.0, 0.89, 2.65, 2.75, 0.0, 0.0, 23.3, false, false, false, false, false]
46+
# ...

lib/datasets-parquet.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@
55

66
require_relative "datasets-parquet/tlc/fhv-trip"
77
require_relative "datasets-parquet/tlc/green-taxi-trip"
8+
require_relative "datasets-parquet/tlc/high-volume-fhv-trip"
89
require_relative "datasets-parquet/tlc/yellow-taxi-trip"
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
module Datasets
2+
module TLC
3+
class HighVolumeFHVTrip < Dataset
4+
class Record < Struct.new(:hvfhs_license_num,
5+
:dispatching_base_num,
6+
:originating_base_num,
7+
:request_datetime,
8+
:on_scene_datetime,
9+
:pickup_datetime,
10+
:dropoff_datetime,
11+
:pu_locationID,
12+
:do_locationID,
13+
:trip_miles,
14+
:trip_time,
15+
:base_passenger_fare,
16+
:tolls,
17+
:bcf,
18+
:sales_tax,
19+
:congestion_surcharge,
20+
:airport_fee,
21+
:tips,
22+
:driver_pay,
23+
:shared_request_flag,
24+
:shared_match_flag,
25+
:access_a_ride_flag,
26+
:wav_request_flag,
27+
:wav_match_flag)
28+
alias_method :shared_request_flag?, :shared_request_flag
29+
alias_method :shared_match_flag?, :shared_match_flag
30+
alias_method :access_a_ride_flag?, :access_a_ride_flag
31+
alias_method :wav_request_flag?, :wav_request_flag
32+
alias_method :wav_match_flag?, :wav_match_flag
33+
34+
def initialize(*values)
35+
super()
36+
members.zip(values) do |member, value|
37+
__send__("#{member}=", value)
38+
end
39+
end
40+
41+
def hvfhs_license_num=(hvfhs_license_num)
42+
case hvfhs_license_num
43+
when 'HV0002'
44+
super(:juno)
45+
when 'HV0003'
46+
super(:uber)
47+
when 'HV0004'
48+
super(:via)
49+
when 'HV0005'
50+
super(:lyft)
51+
end
52+
end
53+
54+
def shared_request_flag=(shared_request_flag)
55+
super(shared_request_flag == 'Y')
56+
end
57+
58+
def shared_match_flag=(shared_match_flag)
59+
super(shared_match_flag == 'Y')
60+
end
61+
62+
def access_a_ride_flag=(access_a_ride_flag)
63+
super(access_a_ride_flag == 'Y')
64+
end
65+
66+
def wav_request_flag=(wav_request_flag)
67+
super(wav_request_flag == 'Y')
68+
end
69+
70+
def wav_match_flag=(wav_match_flag)
71+
super(wav_match_flag == 'Y')
72+
end
73+
end
74+
75+
def initialize(year: Date.today.year, month: Date.today.month)
76+
super()
77+
@metadata.id = "nyc-taxi-and-limousine-commission-high-volume-for-hire-vehicle-trip"
78+
@metadata.name = "New York city Taxi and Limousine Commission: high volume for hire vehicle trip record dataset"
79+
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page"
80+
@metadata.licenses = [
81+
{
82+
name: "NYC Open Data Terms of Use",
83+
url: "https://opendata.cityofnewyork.us/overview/#termsofuse",
84+
}
85+
]
86+
@year = year
87+
@month = month
88+
end
89+
90+
def to_arrow
91+
base_name = "fhvhv_tripdata_%04d-%02d.parquet" % [@year, @month]
92+
data_path = cache_dir_path + base_name
93+
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}"
94+
download(data_path, data_url)
95+
Arrow::Table.load(data_path)
96+
end
97+
98+
def each
99+
return to_enum(__method__) unless block_given?
100+
101+
to_arrow.raw_records.each do |raw_record|
102+
record = Record.new(*raw_record)
103+
yield(record)
104+
end
105+
end
106+
end
107+
end
108+
end
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
class TLCHighVolumeFHVTripTest < Test::Unit::TestCase
2+
def setup
3+
@default_timezone_env = ENV["TZ"]
4+
ENV["TZ"] = "UTC"
5+
@dataset = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1)
6+
end
7+
8+
def teardown
9+
ENV["TZ"] = @default_timezone_env
10+
end
11+
12+
test("#to_arrow") do
13+
assert_equal(<<~TABLE, @dataset.to_arrow.to_s)
14+
\thvfhs_license_num\tdispatching_base_num\toriginating_base_num\t request_datetime\t on_scene_datetime\t pickup_datetime\t dropoff_datetime\tPULocationID\tDOLocationID\ttrip_miles\ttrip_time\tbase_passenger_fare\t tolls\t bcf\t sales_tax\tcongestion_surcharge\tairport_fee\t tips\tdriver_pay\tshared_request_flag\tshared_match_flag\taccess_a_ride_flag\twav_request_flag\twav_match_flag
15+
0\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:05:31+00:00\t2022-01-01T00:05:40+00:00\t2022-01-01T00:07:24+00:00\t2022-01-01T00:18:28+00:00\t 170\t 161\t 1.180000\t 664\t 24.900000\t 0.000000\t 0.750000\t 2.210000\t 2.750000\t 0.000000\t 0.000000\t 23.030000\tN \tN \t \tN \tN
16+
1\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:19:27+00:00\t2022-01-01T00:22:08+00:00\t2022-01-01T00:22:32+00:00\t2022-01-01T00:30:12+00:00\t 237\t 161\t 0.820000\t 460\t 11.970000\t 0.000000\t 0.360000\t 1.060000\t 2.750000\t 0.000000\t 0.000000\t 12.320000\tN \tN \t \tN \tN
17+
2\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:43:53+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T01:07:32+00:00\t 237\t 161\t 1.180000\t 595\t 29.820000\t 0.000000\t 0.890000\t 2.650000\t 2.750000\t 0.000000\t 0.000000\t 23.300000\tN \tN \t \tN \tN
18+
3\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:15:36+00:00\t2022-01-01T00:17:08+00:00\t2022-01-01T00:18:02+00:00\t2022-01-01T00:23:05+00:00\t 262\t 229\t 1.650000\t 303\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 2.750000\t 0.000000\t 0.000000\t 6.300000\tN \tN \t \tN \tN
19+
4\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:25:45+00:00\t2022-01-01T00:26:01+00:00\t2022-01-01T00:28:01+00:00\t2022-01-01T00:35:42+00:00\t 229\t 141\t 1.650000\t 461\t 9.440000\t 0.000000\t 0.280000\t 0.840000\t 2.750000\t 0.000000\t 0.000000\t 7.440000\tN \tN \t \tN \tN
20+
5\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:34:44+00:00\t2022-01-01T00:36:52+00:00\t2022-01-01T00:38:50+00:00\t2022-01-01T00:51:32+00:00\t 263\t 79\t 4.510000\t 762\t 17.670000\t 0.000000\t 0.530000\t 1.570000\t 2.750000\t 0.000000\t 0.000000\t 12.250000\tN \tN \t \tN \tN
21+
6\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:47:51+00:00\t2022-01-01T00:52:00+00:00\t2022-01-01T00:53:25+00:00\t2022-01-01T01:08:56+00:00\t 113\t 140\t 3.680000\t 931\t 16.680000\t 0.000000\t 0.500000\t 1.480000\t 2.750000\t 0.000000\t 0.000000\t 12.750000\tN \tN \t \tN \tN
22+
7\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:06:21+00:00\t2022-01-01T00:06:58+00:00\t2022-01-01T00:08:58+00:00\t2022-01-01T00:23:01+00:00\t 151\t 75\t 2.770000\t 843\t 14.410000\t 0.000000\t 0.430000\t 1.280000\t 0.000000\t 0.000000\t 4.000000\t 11.470000\tN \tN \t \tN \tN
23+
8\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:27:54+00:00\t2022-01-01T00:30:26+00:00\t2022-01-01T00:32:25+00:00\t2022-01-01T00:44:15+00:00\t 263\t 229\t 2.040000\t 710\t 10.640000\t 0.000000\t 0.320000\t 0.940000\t 2.750000\t 0.000000\t 0.000000\t 9.550000\tN \tN \t \tN \tN
24+
9\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:44:59+00:00\t2022-01-01T00:48:23+00:00\t2022-01-01T00:50:23+00:00\t2022-01-01T01:15:30+00:00\t 237\t 169\t 8.790000\t 1507\t 107.560000\t 0.000000\t 0.830000\t 2.450000\t 2.750000\t 0.000000\t 0.000000\t 23.670000\tN \tN \t \tN \tN
25+
...
26+
14751581\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:36+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:33:23+00:00\t 163\t 244\t 7.570000\t 858\t 18.460000\t 0.000000\t 0.550000\t 1.640000\t 2.750000\t 0.000000\t 0.000000\t 15.870000\tN \tN \t \tN \tN
27+
14751582\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:34+00:00\t2022-01-31T23:34:20+00:00\t2022-01-31T23:36:02+00:00\t2022-01-31T23:50:15+00:00\t 244\t 47\t 3.050000\t 853\t 16.230000\t 0.000000\t 0.490000\t 1.440000\t 0.000000\t 0.000000\t 0.000000\t 10.850000\tN \tN \t \tN \tN
28+
14751583\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:57:18+00:00\t2022-01-31T23:07:52+00:00\t2022-01-31T23:09:52+00:00\t2022-01-31T23:19:46+00:00\t 86\t 86\t 2.050000\t 594\t 9.630000\t 0.000000\t 0.290000\t 0.850000\t 0.000000\t 0.000000\t 0.000000\t 8.510000\tN \tN \t \tN \tN
29+
14751584\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:23:00+00:00\t2022-01-31T23:24:44+00:00\t2022-01-31T23:26:37+00:00\t2022-01-31T23:34:37+00:00\t 86\t 117\t 1.300000\t 480\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 6.730000\tN \tN \t \tN \tN
30+
14751585\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:19+00:00\t2022-01-31T23:40:56+00:00\t2022-01-31T23:41:58+00:00\t2022-01-31T23:47:44+00:00\t 86\t 86\t 1.530000\t 346\t 7.190000\t 0.000000\t 0.220000\t 0.640000\t 0.000000\t 0.000000\t 0.000000\t 6.680000\tN \tN \t \tN \tN
31+
14751586\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:22:16+00:00\t2022-01-31T23:26:04+00:00\t2022-01-31T23:27:20+00:00\t2022-01-31T23:40:46+00:00\t 77\t 71\t 2.590000\t 806\t 14.280000\t 0.000000\t 0.430000\t 1.270000\t 0.000000\t 0.000000\t 0.000000\t 9.900000\tN \tN \t \tN \tN
32+
14751587\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:42:30+00:00\t2022-01-31T23:45:08+00:00\t2022-01-31T23:45:46+00:00\t2022-01-31T23:59:44+00:00\t 72\t 72\t 1.560000\t 838\t 10.420000\t 0.000000\t 0.310000\t 0.920000\t 0.000000\t 0.000000\t 0.000000\t 9.030000\tN \tN \t \tN \tN
33+
14751588\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:56:50+00:00\t2022-01-31T23:03:17+00:00\t2022-01-31T23:03:25+00:00\t2022-01-31T23:17:17+00:00\t 136\t 20\t 1.230000\t 832\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 8.730000\tN \tN \t \tN \tN
34+
14751589\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:07+00:00\t2022-01-31T23:19:25+00:00\t2022-01-31T23:20:26+00:00\t2022-01-31T23:30:26+00:00\t 20\t 136\t 1.690000\t 600\t 9.320000\t 0.000000\t 0.280000\t 0.830000\t 0.000000\t 0.000000\t 0.000000\t 7.300000\tN \tN \t \tN \tN
35+
14751590\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:24+00:00\t2022-01-31T23:36:13+00:00\t2022-01-31T23:38:13+00:00\t2022-02-01T00:07:24+00:00\t 136\t 82\t 14.700000\t 1751\t 27.340000\t 6.550000\t 1.020000\t 3.010000\t 0.000000\t 0.000000\t 0.000000\t 31.280000\tN \tN \t \tN \tN
36+
TABLE
37+
end
38+
39+
test("#each") do
40+
omit("Skip test of HighVolumeFHVTrip#each because the size of data is too huge to execute.")
41+
records = @dataset.each.to_a
42+
43+
assert_equal([
44+
14751590,
45+
{
46+
hvfhs_license_num: :uber,
47+
dispatching_base_num: "B03404",
48+
originating_base_num: "B03404",
49+
request_datetime: Time.parse("2022-01-01 00:05:31 +0000"),
50+
on_scene_datetime: Time.parse("2022-01-01 00:05:40 +0000"),
51+
pickup_datetime: Time.parse("2022-01-01 00:07:24 +0000"),
52+
dropoff_datetime: Time.parse("2022-01-01 00:18:28 +0000"),
53+
pu_locationID: 170,
54+
do_locationID: 161,
55+
trip_miles: 1.18,
56+
trip_time: 664,
57+
base_passenger_fare: 24.9,
58+
tolls: 0.0,
59+
bcf: 0.75,
60+
sales_tax: 2.21,
61+
congestion_surcharge: 2.75,
62+
airport_fee: 0.0,
63+
tips: 0.0,
64+
driver_pay: 23.03,
65+
shared_request_flag: false,
66+
shared_match_flag: false,
67+
access_a_ride_flag: false,
68+
wav_request_flag: false,
69+
wav_match_flag: false
70+
},
71+
{
72+
hvfhs_license_num: :uber,
73+
dispatching_base_num: "B03404",
74+
originating_base_num: "B03404",
75+
request_datetime: Time.parse("2022-01-31 23:33:24 +00:00"),
76+
on_scene_datetime: Time.parse("2022-01-31 23:36:13 +00:00"),
77+
pickup_datetime: Time.parse("2022-01-31 23:38:13 +00:00"),
78+
dropoff_datetime: Time.parse("2022-02-01 00:07:24 +00:00"),
79+
pu_locationID: 136,
80+
do_locationID: 82,
81+
trip_miles: 14.7,
82+
trip_time: 1751,
83+
base_passenger_fare: 27.34,
84+
tolls: 6.55,
85+
bcf: 1.02,
86+
sales_tax: 3.01,
87+
congestion_surcharge: 0.0,
88+
airport_fee: 0.0,
89+
tips: 0.0,
90+
driver_pay: 31.28,
91+
shared_request_flag: false,
92+
shared_match_flag: false,
93+
access_a_ride_flag: false,
94+
wav_request_flag: false,
95+
wav_match_flag: false
96+
}
97+
],
98+
[
99+
records.size,
100+
records.first.to_h,
101+
records.last.to_h,
102+
])
103+
end
104+
end

0 commit comments

Comments
 (0)