Skip to content

Commit 7b4ebe8

Browse files
authored
Add New York city's Taxi and Limousine Commission green taxi trip support (#4)
## Related Issue - #1 ## What I did - added New York city's taxi and limousine commission `green taxi trip` support - added simple unit test ### Details - [Green Trips Data Dictionary](https://www1.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_green.pdf) - There is no explanation about `ehail_fee` and `congestion_surcharge` in this dictionary. - ehail_fee: https://www1.nyc.gov/site/tlc/businesses/e-hail-providers.page - congestion_surcharge: https://www1.nyc.gov/site/tlc/about/congestion-surcharge.page - ref: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page ## What I Didn't - added following datasets related with NYC Taxi and Limousine Commission - FHV Trips Data - High Volume FHV Trips Data ## What I checked - ran tlc-green-taxi-trip example ```console % ruby example/tlc-green-taxi-trip.rb [:veri_fone_inc, 2022-01-01 09:14:21 +0900, 2022-01-01 09:15:33 +0900, false, :standard_rate, 42, 42, 1.0, 0.44, 3.5, 0.5, 0.5, 0.0, 0.0, nil, 0.3, 4.8, :cash, :street_hail, 0.0] [:creative_mobile_technologies, 2022-01-01 09:20:55 +0900, 2022-01-01 09:29:38 +0900, false, :standard_rate, 116, 41, 1.0, 2.1, 9.5, 0.5, 0.5, 0.0, 0.0, nil, 0.3, 10.8, :cash, :street_hail, 0.0] ```
1 parent 463b0dc commit 7b4ebe8

File tree

5 files changed

+254
-44
lines changed

5 files changed

+254
-44
lines changed

example/tlc-green-taxi-trip.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env ruby
2+
3+
require "datasets-parquet"
4+
5+
trips = DatasetsParquet::TLC::GreenTaxiTrip.new(year: 2022, month: 1)
6+
trips.each do |trip|
7+
p [
8+
trip.vendor,
9+
trip.lpep_pickup_datetime,
10+
trip.lpep_dropoff_datetime,
11+
trip.store_and_fwd?,
12+
trip.rate_code,
13+
trip.pu_location_id,
14+
trip.do_location_id,
15+
trip.passenger_count,
16+
trip.trip_distance,
17+
trip.fare_amount,
18+
trip.extra,
19+
trip.mta_tax,
20+
trip.tip_amount,
21+
trip.tolls_amount,
22+
trip.ehail_fee,
23+
trip.improvement_surcharge,
24+
trip.total_amount,
25+
trip.payment,
26+
trip.trip,
27+
trip.congestion_surcharge,
28+
]
29+
# [:veri_fone_inc, 2022-01-01 09:14:21 +0900, 2022-01-01 09:15:33 +0900, false, :standard_rate, 42, 42, 1.0, 0.44, 3.5, 0.5, 0.5, 0.0, 0.0, nil, 0.3, 4.8, :cash, :street_hail, 0.0]
30+
# [:creative_mobile_technologies, 2022-01-01 09:20:55 +0900, 2022-01-01 09:29:38 +0900, false, :standard_rate, 116, 41, 1.0, 2.1, 9.5, 0.5, 0.5, 0.0, 0.0, nil, 0.3, 10.8, :cash, :street_hail, 0.0]
31+
end

lib/datasets-parquet.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33

44
require_relative "datasets-parquet/version"
55

6+
require_relative "datasets-parquet/tlc/green-taxi-trip"
67
require_relative "datasets-parquet/tlc/yellow-taxi-trip"
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
module DatasetsParquet
2+
module TLC
3+
class GreenTaxiTrip < Datasets::Dataset
4+
class Record < Struct.new(:vendor,
5+
:lpep_pickup_datetime,
6+
:lpep_dropoff_datetime,
7+
:store_and_fwd,
8+
:rate_code,
9+
:pu_location_id,
10+
:do_location_id,
11+
:passenger_count,
12+
:trip_distance,
13+
:fare_amount,
14+
:extra,
15+
:mta_tax,
16+
:tip_amount,
17+
:tolls_amount,
18+
:ehail_fee,
19+
:improvement_surcharge,
20+
:total_amount,
21+
:payment,
22+
:trip,
23+
:congestion_surcharge)
24+
alias_method :store_and_fwd?, :store_and_fwd
25+
26+
def initialize(*values)
27+
super()
28+
members.zip(values) do |member, value|
29+
__send__("#{member}=", value)
30+
end
31+
end
32+
33+
def vendor=(vendor)
34+
super(vendor == 1 ? :creative_mobile_technologies : :veri_fone_inc)
35+
end
36+
37+
def rate_code=(rate_code)
38+
case rate_code
39+
when 1.0
40+
super(:standard_rate)
41+
when 2.0
42+
super(:jfk)
43+
when 3.0
44+
super(:newark)
45+
when 4.0
46+
super(:Nassau_or_westchester)
47+
when 5.0
48+
super(:negotiated_fare)
49+
when 6.0
50+
super(:group_ride)
51+
end
52+
end
53+
54+
def store_and_fwd=(store_and_fwd)
55+
super(store_and_fwd == 'Y')
56+
end
57+
58+
def payment=(payment)
59+
case payment
60+
when 1
61+
super(:credit_card)
62+
when 2
63+
super(:cash)
64+
when 3
65+
super(:no_charge)
66+
when 4
67+
super(:dispute)
68+
when 5
69+
super(:unknown)
70+
when 6
71+
super(:voided_trip)
72+
end
73+
end
74+
75+
def trip=(trip)
76+
case trip
77+
when 1.0
78+
super(:street_hail)
79+
when 2.0
80+
super(:dispatch)
81+
end
82+
end
83+
end
84+
85+
def initialize(year: Date.today.year, month: Date.today.month)
86+
super()
87+
@metadata.id = "nyc-taxi-and-limousine-commission-green-taxi-trip"
88+
@metadata.name = "New York city Taxi and Limousine Commission: green taxi trip record dataset"
89+
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page"
90+
@metadata.licenses = [
91+
{
92+
name: "NYC Open Data Terms of Use",
93+
url: "https://opendata.cityofnewyork.us/overview/#termsofuse",
94+
}
95+
]
96+
@year = year
97+
@month = month
98+
end
99+
100+
def each
101+
return to_enum(__method__) unless block_given?
102+
103+
open_data.raw_records.each do |raw_record|
104+
record = Record.new(*raw_record)
105+
yield(record)
106+
end
107+
end
108+
109+
private
110+
def open_data
111+
base_name = "green_tripdata_%04d-%02d.parquet" % [@year, @month]
112+
data_path = cache_dir_path + base_name
113+
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}"
114+
download(data_path, data_url)
115+
Arrow::Table.load(data_path)
116+
end
117+
end
118+
end
119+
end

test/test-tlc-green-taxi-trip.rb

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
class TLCGreenTaxiTripTest < Test::Unit::TestCase
2+
test("each") do
3+
dataset = DatasetsParquet::TLC::GreenTaxiTrip.new(year: 2022, month: 1)
4+
records = dataset.each.to_a
5+
6+
assert_equal([
7+
62495,
8+
{
9+
vendor: :veri_fone_inc,
10+
lpep_pickup_datetime: Time.parse('2022-01-01 09:14:21 +0900'),
11+
lpep_dropoff_datetime: Time.parse('2022-01-01 09:15:33 +0900'),
12+
store_and_fwd: false,
13+
rate_code: :standard_rate,
14+
pu_location_id: 42,
15+
do_location_id: 42,
16+
passenger_count: 1.0,
17+
trip_distance: 0.44,
18+
fare_amount: 3.5,
19+
extra: 0.5,
20+
mta_tax: 0.5,
21+
tip_amount: 0.0,
22+
tolls_amount: 0.0,
23+
ehail_fee: nil,
24+
improvement_surcharge: 0.3,
25+
total_amount: 4.8,
26+
payment: :cash,
27+
trip: :street_hail,
28+
congestion_surcharge: 0.0,
29+
},
30+
{
31+
vendor: :veri_fone_inc,
32+
lpep_pickup_datetime: Time.parse('2022-02-01 08:52:00 +0900'),
33+
lpep_dropoff_datetime: Time.parse('2022-02-01 09:26:00 +0900'),
34+
store_and_fwd: false,
35+
rate_code: nil,
36+
pu_location_id: 225,
37+
do_location_id: 179,
38+
passenger_count: nil,
39+
trip_distance: 9.6,
40+
fare_amount: 32.18,
41+
extra: 0.0,
42+
mta_tax: 0.0,
43+
tip_amount: 7.23,
44+
tolls_amount: 10.0,
45+
ehail_fee: nil,
46+
improvement_surcharge: 0.3,
47+
total_amount: 49.71,
48+
payment: nil,
49+
trip: nil,
50+
congestion_surcharge: nil,
51+
}
52+
],
53+
[
54+
records.size,
55+
records.first.to_h,
56+
records.last.to_h,
57+
])
58+
end
59+
end

test/test-tlc-yellow-taxi-trip.rb

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,57 @@
11
class TLCYellowTaxiTripTest < Test::Unit::TestCase
2-
def record(*args)
3-
DatasetsParquet::TLC::YellowTaxiTrip::Record.new(*args)
4-
end
5-
62
test("each") do
73
dataset = DatasetsParquet::TLC::YellowTaxiTrip.new(year: 2022, month: 1)
84
records = dataset.each.to_a
95

106
assert_equal([
117
2463931,
12-
record(1,
13-
Time.parse('2022-01-01 09:35:40 +0900'),
14-
Time.parse('2022-01-01 09:53:29 +0900'),
15-
2.0,
16-
3.8,
17-
1.0,
18-
'N',
19-
142,
20-
236,
21-
1,
22-
14.5,
23-
3.0,
24-
0.5,
25-
3.65,
26-
0.0,
27-
0.3,
28-
21.95,
29-
2.5,
30-
0.0),
31-
record(2,
32-
Time.parse('2022-02-01 08:46:00 +0900'),
33-
Time.parse('2022-02-01 09:13:00 +0900'),
34-
nil,
35-
8.94,
36-
nil,
37-
nil,
38-
186,
39-
181,
40-
nil,
41-
25.48,
42-
0.0,
43-
0.5,
44-
6.28,
45-
0.0,
46-
0.3,
47-
35.06,
48-
nil,
49-
nil)
8+
{
9+
vendor: :creative_mobile_technologies,
10+
tpep_pickup_datetime: Time.parse('2022-01-01 09:35:40 +0900'),
11+
tpep_dropoff_datetime: Time.parse('2022-01-01 09:53:29 +0900'),
12+
passenger_count: 2.0,
13+
trip_distance: 3.8,
14+
rate_code: :standard_rate,
15+
store_and_fwd: false,
16+
pu_location_id: 142,
17+
do_location_id: 236,
18+
payment: :credit_card,
19+
fare_amount: 14.5,
20+
extra: 3.0,
21+
mta_tax: 0.5,
22+
tip_amount: 3.65,
23+
tolls_amount: 0.0,
24+
improvement_surcharge: 0.3,
25+
total_amount: 21.95,
26+
congestion_surcharge: 2.5,
27+
airport_fee: 0.0,
28+
},
29+
{
30+
vendor: :veri_fone_inc,
31+
tpep_pickup_datetime: Time.parse('2022-02-01 08:46:00 +0900'),
32+
tpep_dropoff_datetime: Time.parse('2022-02-01 09:13:00 +0900'),
33+
passenger_count: nil,
34+
trip_distance: 8.94,
35+
rate_code: nil,
36+
store_and_fwd: false,
37+
pu_location_id: 186,
38+
do_location_id: 181,
39+
payment: nil,
40+
fare_amount: 25.48,
41+
extra: 0.0,
42+
mta_tax: 0.5,
43+
tip_amount: 6.28,
44+
tolls_amount: 0.0,
45+
improvement_surcharge: 0.3,
46+
total_amount: 35.06,
47+
congestion_surcharge: nil,
48+
airport_fee: nil
49+
},
5050
],
5151
[
5252
records.size,
53-
records.first,
54-
records.last,
53+
records.first.to_h,
54+
records.last.to_h,
5555
])
5656
end
5757
end

0 commit comments

Comments
 (0)