Skip to content

Commit 4018e84

Browse files
committed
Added New York city's Taxi and Limousine Commission for hire vehicle trip support
1 parent 9633a78 commit 4018e84

File tree

3 files changed

+128
-0
lines changed

3 files changed

+128
-0
lines changed

lib/datasets-parquet.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33

44
require_relative "datasets-parquet/version"
55

6+
require_relative "datasets-parquet/tlc/fhv-trip"
67
require_relative "datasets-parquet/tlc/green-taxi-trip"
78
require_relative "datasets-parquet/tlc/yellow-taxi-trip"
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
module Datasets
2+
module TLC
3+
class FHVTrip < Dataset
4+
class Record < Struct.new(:dispatching_base_num,
5+
:pickup_datetime,
6+
:dropoff_datetime,
7+
:pu_location_id,
8+
:do_location_id,
9+
:sr_flag,
10+
:affiliated_base_number)
11+
alias_method :sr_flag?, :sr_flag
12+
13+
def initialize(*values)
14+
super()
15+
members.zip(values) do |member, value|
16+
__send__("#{member}=", value)
17+
end
18+
end
19+
20+
def sr_flag=(sr_flag)
21+
super(!!sr_flag)
22+
end
23+
end
24+
25+
def initialize(year: Date.today.year, month: Date.today.month)
26+
super()
27+
@metadata.id = "nyc-taxi-and-limousine-commission-for-hire-vehicle-trip"
28+
@metadata.name = "New York city Taxi and Limousine Commission: for hire vehicle trip record dataset"
29+
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page"
30+
@metadata.licenses = [
31+
{
32+
name: "NYC Open Data Terms of Use",
33+
url: "https://opendata.cityofnewyork.us/overview/#termsofuse",
34+
}
35+
]
36+
@year = year
37+
@month = month
38+
end
39+
40+
def to_arrow
41+
base_name = "fhv_tripdata_%04d-%02d.parquet" % [@year, @month]
42+
data_path = cache_dir_path + base_name
43+
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}"
44+
download(data_path, data_url)
45+
Arrow::Table.load(data_path)
46+
end
47+
48+
def each
49+
return to_enum(__method__) unless block_given?
50+
51+
to_arrow.raw_records.each do |raw_record|
52+
record = Record.new(*raw_record)
53+
yield(record)
54+
end
55+
end
56+
end
57+
end
58+
end

test/test-tlc-fhv-trip.rb

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
class TLCFHVTripTest < Test::Unit::TestCase
2+
def setup
3+
@default_timezone_env = ENV['TZ']
4+
ENV['TZ'] = 'UTC'
5+
@dataset = Datasets::TLC::FHVTrip.new(year: 2022, month: 1)
6+
end
7+
8+
def teardown
9+
ENV['TZ'] = @default_timezone_env
10+
end
11+
12+
test("#to_arrow") do
13+
assert_equal(<<~TABLE, @dataset.to_arrow.to_s)
14+
\tdispatching_base_num\t pickup_datetime\t dropOff_datetime\tPUlocationID\tDOlocationID\tSR_Flag\tAffiliated_base_number
15+
0\tB00009 \t2022-01-01T00:31:00+00:00\t2022-01-01T01:05:00+00:00\t (null)\t (null)\t (null)\tB00009
16+
1\tB00009 \t2022-01-01T00:37:00+00:00\t2022-01-01T01:05:00+00:00\t (null)\t (null)\t (null)\tB00009
17+
2\tB00037 \t2022-01-01T00:56:37+00:00\t2022-01-01T01:06:11+00:00\t (null)\t 85.000000\t (null)\tB00037
18+
3\tB00037 \t2022-01-01T00:19:54+00:00\t2022-01-01T00:30:47+00:00\t (null)\t 85.000000\t (null)\tB00037
19+
4\tB00037 \t2022-01-01T00:41:49+00:00\t2022-01-01T00:52:16+00:00\t (null)\t 188.000000\t (null)\tB00037
20+
5\tB00037 \t2022-01-01T00:21:32+00:00\t2022-01-01T00:35:06+00:00\t (null)\t 61.000000\t (null)\tB00037
21+
6\tB00037 \t2022-01-01T00:51:19+00:00\t2022-01-01T01:08:06+00:00\t (null)\t 76.000000\t (null)\tB00037
22+
7\tB00111 \t2022-01-01T00:30:00+00:00\t2022-01-01T01:41:00+00:00\t (null)\t (null)\t (null)\tB03406
23+
8\tB00112 \t2022-01-01T00:31:30+00:00\t2022-01-01T01:10:06+00:00\t (null)\t 67.000000\t (null)\tB00112
24+
9\tB00112 \t2022-01-01T00:12:26+00:00\t2022-01-01T00:37:22+00:00\t (null)\t 155.000000\t (null)\tB00112
25+
...
26+
1143681\tB03380 \t2022-01-31T23:39:32+00:00\t2022-01-31T23:47:43+00:00\t 246.000000\t 158.000000\t (null)\tB03380
27+
1143682\tB03380 \t2022-01-31T23:52:52+00:00\t2022-02-01T00:03:14+00:00\t 158.000000\t 107.000000\t (null)\tB03380
28+
1143683\tB03380 \t2022-01-31T23:24:44+00:00\t2022-01-31T23:35:46+00:00\t 231.000000\t 4.000000\t (null)\tB03380
29+
1143684\tB03380 \t2022-01-31T23:21:35+00:00\t2022-01-31T23:32:16+00:00\t 229.000000\t 48.000000\t (null)\tB03380
30+
1143685\tB03380 \t2022-01-31T23:02:50+00:00\t2022-01-31T23:20:07+00:00\t 142.000000\t 113.000000\t (null)\tB03380
31+
1143686\tB03380 \t2022-01-31T23:22:41+00:00\t2022-01-31T23:26:39+00:00\t 234.000000\t 107.000000\t (null)\tB03380
32+
1143687\tB03380 \t2022-01-31T23:42:42+00:00\t2022-01-31T23:52:58+00:00\t 114.000000\t 148.000000\t (null)\tB03380
33+
1143688\tB03380 \t2022-01-31T23:07:13+00:00\t2022-01-31T23:13:40+00:00\t 90.000000\t 113.000000\t (null)\tB03380
34+
1143689\tB03380 \t2022-01-31T23:16:14+00:00\t2022-01-31T23:31:03+00:00\t 113.000000\t 140.000000\t (null)\tB03380
35+
1143690\tB03381 \t2022-01-31T23:47:42+00:00\t2022-02-01T00:15:03+00:00\t (null)\t 122.000000\t (null)\tB03404
36+
TABLE
37+
end
38+
39+
test("#each") do
40+
records = @dataset.each.to_a
41+
42+
assert_equal([
43+
1143691,
44+
{
45+
dispatching_base_num: 'B00009',
46+
pickup_datetime: Time.parse('2022-01-01 00:31:00 +0000'),
47+
dropoff_datetime: Time.parse('2022-01-01 01:05:00 +0000'),
48+
pu_location_id: nil,
49+
do_location_id: nil,
50+
sr_flag: false,
51+
affiliated_base_number: 'B00009'
52+
},
53+
{
54+
dispatching_base_num: 'B03381',
55+
pickup_datetime: Time.parse('2022-01-31 23:47:42 +0000'),
56+
dropoff_datetime: Time.parse('2022-02-01 00:15:03 +0000'),
57+
pu_location_id: nil,
58+
do_location_id: 122.0,
59+
sr_flag: false,
60+
affiliated_base_number: 'B03404'
61+
}
62+
],
63+
[
64+
records.size,
65+
records.first.to_h,
66+
records.last.to_h,
67+
])
68+
end
69+
end

0 commit comments

Comments
 (0)