|
| 1 | +import csv |
| 2 | +import datetime |
| 3 | +import random |
| 4 | + |
| 5 | +random.seed(10) |
| 6 | + |
| 7 | + |
| 8 | +def generate_pu(): |
| 9 | + lower_bound = datetime.datetime( |
| 10 | + year=2021, month=1, day=1, hour=0, minute=0, second=0 |
| 11 | + ) |
| 12 | + upper_bound = datetime.datetime( |
| 13 | + year=2021, month=12, day=31, hour=23, minute=59, second=59 |
| 14 | + ) |
| 15 | + random_pickup = random.random() * (upper_bound - lower_bound) + lower_bound |
| 16 | + return random_pickup.replace(microsecond=0) |
| 17 | + |
| 18 | + |
| 19 | +def generate_do(pick_up): |
| 20 | + random_dropoff = pick_up + datetime.timedelta( |
| 21 | + minutes=random.randint(10, 120), seconds=random.randint(0, 59) |
| 22 | + ) |
| 23 | + return random_dropoff |
| 24 | + |
| 25 | + |
| 26 | +def generate_choice(*kwargs): |
| 27 | + return random.choice(kwargs) |
| 28 | + |
| 29 | + |
| 30 | +def generate_file(): |
| 31 | + with open("2021_Yellow_Taxi_Trip_Data.csv", "w", newline="") as csvfile: |
| 32 | + header = ( |
| 33 | + "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count," |
| 34 | + "trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID," |
| 35 | + "payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount," |
| 36 | + "improvement_surcharge,total_amount,congestion_surcharge,airport_fee" |
| 37 | + ).split(",") |
| 38 | + |
| 39 | + writer = csv.writer(csvfile) |
| 40 | + writer.writerow(header) |
| 41 | + |
| 42 | + for _ in range(170_000_000): |
| 43 | + fare_amount = round(random.uniform(5.0, 100.0), 2) |
| 44 | + extra = generate_choice(0, 0.5, 3) |
| 45 | + mta_tax = generate_choice(0, 0.5) |
| 46 | + tip_amount = round(random.uniform(0.0, 20.0), 2) |
| 47 | + tolls_amount = generate_choice(0, 6.12) |
| 48 | + improvement_surcharge = 0.3 |
| 49 | + congestion_surcharge = 2.5 |
| 50 | + total_amount = round( |
| 51 | + fare_amount |
| 52 | + + extra |
| 53 | + + mta_tax |
| 54 | + + tip_amount |
| 55 | + + tolls_amount |
| 56 | + + improvement_surcharge |
| 57 | + + congestion_surcharge, |
| 58 | + 2, |
| 59 | + ) |
| 60 | + |
| 61 | + pick_up = generate_pu() |
| 62 | + |
| 63 | + ride = [ |
| 64 | + random.randint(0, 6), # VendorID |
| 65 | + pick_up, # tpep_pickup_datetime |
| 66 | + generate_do(pick_up), # tpep_dropoff_datetime |
| 67 | + random.randint(0, 5), # passenger_count |
| 68 | + random.randint(0, 15), # trip_distance |
| 69 | + random.randint(0, 6), # RatecodeID |
| 70 | + generate_choice("Y", "N"), # store_and_fwd_flag |
| 71 | + random.randint(1, 265), # PULocationID |
| 72 | + random.randint(1, 265), # DOLocationID |
| 73 | + random.randint(0, 5), # payment_type |
| 74 | + fare_amount, # fare_amount |
| 75 | + extra, # extra |
| 76 | + mta_tax, # mta_tax |
| 77 | + tip_amount, # tip_amount |
| 78 | + tolls_amount, # tolls_amount |
| 79 | + improvement_surcharge, # improvement_surcharge |
| 80 | + total_amount, # total_amount |
| 81 | + congestion_surcharge, # congestion_surcharge |
| 82 | + ] |
| 83 | + |
| 84 | + writer.writerow(ride) |
| 85 | + |
| 86 | + |
| 87 | +generate_file() |
0 commit comments