Skip to content

Commit a901665

Browse files
FelixAbrahamssonFelixAbrahamsson
authored andcommitted
fix: kmm files with extra columns
1 parent 8cc1518 commit a901665

File tree

2 files changed

+86
-41
lines changed

2 files changed

+86
-41
lines changed

kmm/positions/read_kmm2.py

Lines changed: 73 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,49 @@
11
import re
2+
from pathlib import Path
3+
24
import numpy as np
35
import pandas as pd
4-
from pathlib import Path
56
from pydantic import validate_arguments
67

7-
88
pattern = re.compile(r".+\[.+\]")
99
pattern2 = re.compile(r"CMAST")
1010

11+
expected_columns = [
12+
"code",
13+
"centimeter",
14+
"track_section",
15+
"kilometer",
16+
"meter",
17+
"track_lane",
18+
"1?",
19+
"2?",
20+
"3?",
21+
"4?",
22+
"sweref99_tm_x",
23+
"sweref99_tm_y",
24+
"contact_wire_material",
25+
"rail_model",
26+
"sliper_model",
27+
"between_stations",
28+
"5?",
29+
"6?",
30+
"7?",
31+
"8?",
32+
"max_speed",
33+
"datetime",
34+
"bearing",
35+
"linear_coordinate",
36+
]
37+
expected_dtypes = dict(
38+
centimeter=np.int64,
39+
track_section=str,
40+
kilometer=np.int32,
41+
meter=np.int32,
42+
track_lane=str,
43+
sweref99_tm_x=np.float32,
44+
sweref99_tm_y=np.float32,
45+
)
46+
1147

1248
@validate_arguments
1349
def read_kmm2(path: Path):
@@ -19,49 +55,45 @@ def read_kmm2(path: Path):
1955
]
2056

2157
try:
22-
return pd.read_csv(
23-
path,
24-
skiprows=[0] + skiprows,
25-
delimiter="\t",
26-
encoding="latin1",
27-
names=[
28-
"code",
29-
"centimeter",
30-
"track_section",
31-
"kilometer",
32-
"meter",
33-
"track_lane",
34-
"1?",
35-
"2?",
36-
"3?",
37-
"4?",
38-
"sweref99_tm_x",
39-
"sweref99_tm_y",
40-
"contact_wire_material",
41-
"rail_model",
42-
"sliper_model",
43-
"between_stations",
44-
"5?",
45-
"6?",
46-
"7?",
47-
"8?",
48-
"max_speed",
49-
],
50-
dtype=dict(
51-
centimeter=np.int64,
52-
track_section=str,
53-
kilometer=np.int32,
54-
meter=np.int32,
55-
track_lane=str,
56-
sweref99_tm_x=np.float32,
57-
sweref99_tm_y=np.float32,
58-
),
59-
low_memory=False,
60-
)
58+
try:
59+
df = pd.read_csv(
60+
path,
61+
skiprows=[0] + skiprows,
62+
delimiter="\t",
63+
encoding="latin1",
64+
low_memory=False,
65+
)
66+
except pd.errors.EmptyDataError:
67+
return pd.DataFrame(columns=expected_columns)
68+
else:
69+
return with_column_names(df)
6170
except Exception as e:
6271
raise ValueError("Unable to parse kmm2 file, invalid csv.") from e
6372

6473

74+
def with_column_names(df):
75+
length_diff = len(df.columns) - len(expected_columns)
76+
if length_diff > 0:
77+
columns = expected_columns + [f"{i}?" for i in range(8, 8 + length_diff)]
78+
elif length_diff < 0:
79+
columns = expected_columns[:length_diff]
80+
else:
81+
columns = expected_columns
82+
df.columns = columns
83+
df.astype(
84+
{
85+
column: dtype
86+
for column, dtype in expected_dtypes.items()
87+
if column in df.columns
88+
}
89+
)
90+
return df
91+
92+
6593
def test_patterns():
6694
assert pattern.match("Västerås central [Vå]")
6795
assert pattern2.match("CMAST 281-2B")
96+
97+
98+
def test_extra_columns():
99+
read_kmm2(Path("tests/extra_columns.kmm2"))

tests/extra_columns.kmm2

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
VER Nav2020 1.06 (RTK) NAV = 20230223_999999_2011TE
2+
POS 59350972 909A 533 852 N 3 48 0 -332 6225000.4 423417.0 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21904286 N 19 21904286
3+
POS 59351072 909A 533 853 N 3 48 174 -138 6224999.4 423416.7 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21906348 N 19 21904286
4+
POS 59351172 909A 533 854 N 3 48 303 -110 6224998.5 423416.3 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21907536 N 19 21904286
5+
POS 59351272 909A 533 855 N 3 48 396 -65 6224997.5 423416.0 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21908444 N 19 21904286
6+
POS 59351372 909A 533 856 N 3 48 478 -71 6224996.6 423415.7 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21909197 N 19 21904286
7+
POS 59351473 909A 533 857 N 3 48 544 -72 6224995.7 423415.4 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21909858 N 19 21904286
8+
POS 59351573 909A 533 858 N 3 48 606 -59 6224994.7 423415.1 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21910452 N 19 21904286
9+
POS 59351673 909A 533 859 N 3 48 665 -62 6224993.7 423414.8 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21910993 N 19 21904286
10+
POS 59351773 909A 533 860 N 3 48 714 -68 6224992.8 423414.5 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21911497 N 19 21904286
11+
POS 59351873 909A 533 861 N 3 48 762 -47 6224991.8 423414.2 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21911969 N 19 21904286
12+
POS 59351973 909A 533 862 N 3 48 812 -49 6224990.9 423413.8 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21912412 N 19 21904286
13+
POS 59352074 909A 533 863 N 3 48 853 -32 6224990.0 423413.5 10 ? UIC60 B Hm 405 Hm 461 0 160/180/180/200 >< N 19 21912834 N 19 21904286

0 commit comments

Comments
 (0)