-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit_files.py
More file actions
41 lines (31 loc) · 1.21 KB
/
split_files.py
File metadata and controls
41 lines (31 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import csv
import os
def split_csv(input_file, output_dir, rows_per_file):
with open(input_file, 'r') as csv_file:
reader = csv.reader(csv_file)
header = next(reader) # Extract the header row
total_rows = sum(1 for row in reader) # Get the total number of rows
csv_file.seek(0) # Reset the file pointer
next(reader) # Skip the header row
file_count = 1
current_row = 0
while True:
output_file = os.path.join(output_dir, f'part_{file_count}.csv')
with open(output_file, 'w', newline='') as output:
writer = csv.writer(output)
writer.writerow(header)
for _ in range(rows_per_file):
try:
row = next(reader)
writer.writerow(row)
current_row += 1
except StopIteration:
break
if current_row == total_rows: # No more rows to write
break
file_count += 1
# Example usage
input_filename = 'HPC.csv'
output_directory = './'
rows_num_per_file = 100000
split_csv(input_filename, output_directory, rows_num_per_file)