1
1
import re
2
+ from pathlib import Path
3
+
2
4
import numpy as np
3
5
import pandas as pd
4
- from pathlib import Path
5
6
from pydantic import validate_arguments
6
7
7
-
8
8
pattern = re .compile (r".+\[.+\]" )
9
9
pattern2 = re .compile (r"CMAST" )
10
10
11
+ expected_columns = [
12
+ "code" ,
13
+ "centimeter" ,
14
+ "track_section" ,
15
+ "kilometer" ,
16
+ "meter" ,
17
+ "track_lane" ,
18
+ "1?" ,
19
+ "2?" ,
20
+ "3?" ,
21
+ "4?" ,
22
+ "sweref99_tm_x" ,
23
+ "sweref99_tm_y" ,
24
+ "contact_wire_material" ,
25
+ "rail_model" ,
26
+ "sliper_model" ,
27
+ "between_stations" ,
28
+ "5?" ,
29
+ "6?" ,
30
+ "7?" ,
31
+ "8?" ,
32
+ "max_speed" ,
33
+ "datetime" ,
34
+ "bearing" ,
35
+ "linear_coordinate" ,
36
+ ]
37
+ expected_dtypes = dict (
38
+ centimeter = np .int64 ,
39
+ track_section = str ,
40
+ kilometer = np .int32 ,
41
+ meter = np .int32 ,
42
+ track_lane = str ,
43
+ sweref99_tm_x = np .float32 ,
44
+ sweref99_tm_y = np .float32 ,
45
+ )
46
+
11
47
12
48
@validate_arguments
13
49
def read_kmm2 (path : Path ):
@@ -19,49 +55,45 @@ def read_kmm2(path: Path):
19
55
]
20
56
21
57
try :
22
- return pd .read_csv (
23
- path ,
24
- skiprows = [0 ] + skiprows ,
25
- delimiter = "\t " ,
26
- encoding = "latin1" ,
27
- names = [
28
- "code" ,
29
- "centimeter" ,
30
- "track_section" ,
31
- "kilometer" ,
32
- "meter" ,
33
- "track_lane" ,
34
- "1?" ,
35
- "2?" ,
36
- "3?" ,
37
- "4?" ,
38
- "sweref99_tm_x" ,
39
- "sweref99_tm_y" ,
40
- "contact_wire_material" ,
41
- "rail_model" ,
42
- "sliper_model" ,
43
- "between_stations" ,
44
- "5?" ,
45
- "6?" ,
46
- "7?" ,
47
- "8?" ,
48
- "max_speed" ,
49
- ],
50
- dtype = dict (
51
- centimeter = np .int64 ,
52
- track_section = str ,
53
- kilometer = np .int32 ,
54
- meter = np .int32 ,
55
- track_lane = str ,
56
- sweref99_tm_x = np .float32 ,
57
- sweref99_tm_y = np .float32 ,
58
- ),
59
- low_memory = False ,
60
- )
58
+ try :
59
+ df = pd .read_csv (
60
+ path ,
61
+ skiprows = [0 ] + skiprows ,
62
+ delimiter = "\t " ,
63
+ encoding = "latin1" ,
64
+ low_memory = False ,
65
+ )
66
+ except pd .errors .EmptyDataError :
67
+ return pd .DataFrame (columns = expected_columns )
68
+ else :
69
+ return with_column_names (df )
61
70
except Exception as e :
62
71
raise ValueError ("Unable to parse kmm2 file, invalid csv." ) from e
63
72
64
73
74
+ def with_column_names (df ):
75
+ length_diff = len (df .columns ) - len (expected_columns )
76
+ if length_diff > 0 :
77
+ columns = expected_columns + [f"{ i } ?" for i in range (8 , 8 + length_diff )]
78
+ elif length_diff < 0 :
79
+ columns = expected_columns [:length_diff ]
80
+ else :
81
+ columns = expected_columns
82
+ df .columns = columns
83
+ df .astype (
84
+ {
85
+ column : dtype
86
+ for column , dtype in expected_dtypes .items ()
87
+ if column in df .columns
88
+ }
89
+ )
90
+ return df
91
+
92
+
65
93
def test_patterns ():
66
94
assert pattern .match ("Västerås central [Vå]" )
67
95
assert pattern2 .match ("CMAST 281-2B" )
96
+
97
+
98
+ def test_extra_columns ():
99
+ read_kmm2 (Path ("tests/extra_columns.kmm2" ))
0 commit comments