Skip to content

Commit 8be0eda

Browse files
committed
Initial task complete. All data from test input files being written to CSV files.
1 parent c4875a8 commit 8be0eda

17 files changed

+2983
-43
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
Under development. README to be populated later.
2+
3+
Reference materials:
4+
https://www.trimble.com/OEM_ReceiverHelp/V4.44/en/NMEA-0183messages_MessageOverview.html

nmea_parser.py

Lines changed: 174 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
import pynmea2
22
import argparse
3+
import sys
34
import os
45
from datetime import datetime
56
import pandas as pd
7+
from collections import namedtuple
8+
import re
9+
import functools
10+
print = functools.partial(print, flush=True)
611

712
def parse_and_validate_args():
813

@@ -25,18 +30,18 @@ def read_file(file):
2530

2631
sentences = []
2732

28-
for line in file.readlines():
33+
for line_idx, line in enumerate(file.readlines()):
2934
try:
3035
sentence = pynmea2.parse(line)
3136
sentences.append(sentence)
3237
except pynmea2.ParseError as e:
33-
print('Parse error: {}'.format(e))
38+
print(f'Parse error on line {line_idx+1}: {e}')
3439
continue
3540

3641
return sentences
3742

3843

39-
def sort_sentences(sentences):
44+
def categorize_sentences(sentences):
4045

4146
# Make a list of all sentence types
4247
sentence_types = []
@@ -61,14 +66,26 @@ def datetime_stamp_sentences(sentences, cycle_start='RMC'):
6166

6267
datetime_stamped_sentences = []
6368

69+
# Set all datetimes to
70+
date_time = pd.NaT # NaT -> 'Not a Time', essentiall a NaN/Null value
71+
6472
for sentence in sentences:
6573

6674
# TODO: Search through cycle to find sentence(s) containing date and/or time
6775

6876
# If sentence is the first in a cycle, get the timestamp
6977
if sentence.sentence_type == cycle_start:
70-
time = sentence.timestamp
71-
date = sentence.datestamp
78+
79+
if hasattr(sentence, 'timestamp'):
80+
time = sentence.timestamp
81+
else:
82+
time = None
83+
84+
if hasattr(sentence, 'datestamp'):
85+
date = sentence.datestamp
86+
else:
87+
date = None
88+
7289
if date and time:
7390
date_time = datetime.combine(date, time)
7491
else: # Sentence does not contain date and time
@@ -80,49 +97,118 @@ def datetime_stamp_sentences(sentences, cycle_start='RMC'):
8097
return datetime_stamped_sentences
8198

8299

83-
# Merge sentences that have the same type and that are (assumed to be)
84-
# part of the same cycle (e.g., msg_num 1 or 2 out of num_messages 2)
85-
def merge_sentence_groups(sentence_sets):
100+
# Get list of groups (lists) of sentences that should be merged together
101+
# Sentences to be merged are those that have the same type and that are (assumed to be)
102+
# part of the same cycle (e.g., msg_num 1 or 2 out of num_messages 2) and where the date and time match
103+
# Assumes sentences that should be merged are listed together but in no particular order
104+
# Currently only supporting merging GSV sentences
105+
def get_merge_groups(sentence_sets):
86106

87-
for sentence_set in sentence_sets:
88-
for dts_sentence in sentence_set:
107+
merge_group_lists = [[[]] for idx in range(len(sentence_sets))]
89108

90-
print(dts_sentence)
109+
for set_idx, sentence_set in enumerate(sentence_sets):
110+
for sentence_idx, dts_sentence in enumerate(sentence_set):
91111

92-
return sentences
112+
if hasattr(dts_sentence.sentence, 'num_messages') and (int(dts_sentence.sentence.num_messages) > 1) \
113+
and (dts_sentence.sentence.sentence_type == 'GSV'): # If sentence needs to be merged with another sentence
93114

115+
# Check whether current sentence should be merged with current group or if new group should be started
116+
for sentence_in_group_idx, _ in enumerate(merge_group_lists[set_idx][-1]):
94117

95-
def sentences_to_dataframes(sentences):
118+
if dts_sentence.sentence.num_messages != sentence_sets[set_idx][merge_group_lists[set_idx][-1][sentence_in_group_idx]].sentence.num_messages \
119+
or dts_sentence.sentence.msg_num == sentence_sets[set_idx][merge_group_lists[set_idx][-1][sentence_in_group_idx]].sentence.msg_num \
120+
or dts_sentence.date_time is not sentence_sets[set_idx][merge_group_lists[set_idx][-1][sentence_in_group_idx]].date_time:
96121

97-
return sentences
122+
merge_group_lists[set_idx].append([]) # Start new merge group
123+
break # Break is necessary here so that we don't append more than one []
98124

125+
merge_group_lists[set_idx][-1].append(sentence_idx)
99126

100-
def dfs_to_csv(sentence_dfs):
127+
return merge_group_lists
101128

102-
pass
103129

130+
def merge_groups(sentence_sets, merge_group_lists):
104131

105-
def get_sentence_type(sentence):
132+
for set_idx, set_merge_group_list in enumerate(merge_group_lists):
133+
if any(set_merge_group_list): # If there are merge groups
134+
for merge_group in set_merge_group_list:
106135

107-
return sentence.talker + sentence.sentence_type
136+
if len(merge_group) > 1: # There will be some empty sets, but we want a s
137+
merge_group_sentences = [sentence_sets[set_idx][sen_idx] for sen_idx in merge_group ]
138+
merged_sentence = MergedSentence_GSV(merge_group_sentences)
139+
sentence_sets[set_idx].append(merged_sentence)
108140

141+
return sentence_sets
109142

110-
def print_sentences(sentences):
111143

112-
for sentence in sentences:
113-
print(type(sentence))
114-
print(sentence)
115-
print(type(repr(sentence)))
116-
print(repr(sentence))
117-
print(type(sentence.data))
118-
print(sentence.data)
119-
print()
120-
print(sentence.fields)
121-
print()
122-
print(sentence.__dict__)
123-
print()
124-
print()
125-
print()
144+
def sentences_to_dataframes(sentence_sets):
145+
146+
dfs = []
147+
148+
for set_idx, sentence_set in enumerate(sentence_sets):
149+
150+
if sentence_sets[set_idx][0].sentence.sentence_type == 'GSV':
151+
fields = expand_GSV_fields(sentence_sets[set_idx][0].sentence.fields)
152+
else:
153+
fields = sentence_sets[set_idx][0].sentence.fields
154+
columns = [column_tuple[1] for column_tuple in fields]
155+
156+
# Add columns for data fields missing from class
157+
# TODO: Fork pynmea2 module to correct
158+
if sentence_sets[set_idx][0].sentence.sentence_type == 'RMC':
159+
columns.append('mode')
160+
161+
columns.insert(0, 'datetime')
162+
columns.insert(0, 'talker')
163+
columns.insert(0, 'sentence_type')
164+
165+
df = pd.DataFrame(columns=columns)
166+
167+
for dts_sentence in sentence_set:
168+
169+
row_data = dts_sentence.sentence.data.copy()
170+
171+
if isinstance(dts_sentence.date_time, pd._libs.tslibs.nattype.NaTType):
172+
date_time = ''
173+
else:
174+
date_time = dts_sentence.date_time
175+
row_data.insert(0, date_time)
176+
row_data.insert(0, dts_sentence.sentence.talker)
177+
row_data.insert(0, dts_sentence.sentence.sentence_type)
178+
179+
# Single GSV sentences have data for 4 SVs and merged GSV sentences have data for 12 SVs, so fill single GSV sentences with NaNs for SV 5-12 data
180+
if dts_sentence.sentence.sentence_type == 'GSV':
181+
placeholders = [''] * (len(columns) - len(row_data))
182+
row_data = row_data + placeholders
183+
184+
df.loc[len(df)] = row_data # Append data as new row in dataframe
185+
186+
dfs.append(df)
187+
188+
# pd.set_option('display.max_rows', None)
189+
# pd.set_option('display.max_columns', None)
190+
# pd.set_option('display.width', 180)
191+
192+
return dfs
193+
194+
195+
def dfs_to_csv(sentence_dfs, input_file_path, verbose=False):
196+
197+
input_file_name = os.path.basename(input_file_path)
198+
input_file_name = os.path.splitext(input_file_name)[0]
199+
200+
for df_idx, df in enumerate(sentence_dfs):
201+
filename = f"{input_file_name}_{df['talker'][0]}{df['sentence_type'][0]}.csv"
202+
df.to_csv(filename, index=False) # Save to cwd
203+
204+
if verbose:
205+
if df_idx is 0: # If this is the first df
206+
print("data written to:")
207+
print(" " + filename)
208+
209+
def get_sentence_type(sentence):
210+
211+
return sentence.talker + sentence.sentence_type
126212

127213

128214
class DateTimeStampedSentence:
@@ -140,20 +226,65 @@ def __str__(self):
140226
return str(self.sentence) + ' ' + str(self.date_time)
141227

142228

143-
if __name__ == '__main__':
229+
class MergedSentence_GSV:
144230

145-
args = parse_and_validate_args()
231+
def __init__(self, merge_group):
232+
233+
self.date_time = merge_group[0].date_time
234+
235+
Sentence = namedtuple('sentence', 'talker sentence_type fields data')
236+
237+
talker = merge_group[0].sentence.talker
238+
sentence_type = merge_group[0].sentence.sentence_type
239+
240+
# Add fields for SVs 5-12. 12 SVs seems to be a common number of maximum supported SVs for GNSS devices
241+
fields = expand_GSV_fields(merge_group[0].sentence.fields)
242+
243+
# Merge SV data from sentences after the first with the data from the first sentences
244+
data = merge_group[0].sentence.data
245+
data[1] = '-1' # msg_num doesn't apply to merged sentence, so set to NaN flag
246+
for dts_sentence in merge_group[1:]:
247+
data = data + dts_sentence.sentence.data[3:]
146248

249+
self.sentence = Sentence(talker, sentence_type, fields, data)
250+
251+
252+
# Add fields for SVs 5-12. 12 SVs seems to be a common number of maximum supported SVs for GNSS devices
253+
def expand_GSV_fields(fields):
254+
255+
fields = list(fields) # Make mutable
256+
fields_to_duplicate = [field for field in fields if field[0].endswith('4')] # Original GSV sentence supports 0-4 SVs, so copy and change fields for SV 4
257+
for SV_idx in range(4, 12+1):
258+
new_fields = [(re.sub(r'4$', str(SV_idx), field[0]), re.sub(r'4$', str(SV_idx), field[1])) for field in fields_to_duplicate]
259+
fields = fields + new_fields
260+
fields = tuple(fields) # Return to original immutable tuple state
261+
262+
return fields
263+
264+
265+
def main():
266+
267+
print("\nReading in data... ", end="")
268+
args = parse_and_validate_args()
147269
file = open_file(args.filepath)
148270
sentences = read_file(file)
149-
# print_sentences(sentences)
150-
datetime_stamped_sentences = datetime_stamp_sentences(sentences, 'RMC') # Cycle starts with 'RMC' sentence
271+
print("done.")
272+
273+
print("\nProcessing data... ", end="")
274+
dts_sentences = datetime_stamp_sentences(sentences, 'RMC') # Cycle starts with 'RMC' sentence
275+
# 'dts' -> 'datetime stamped'
276+
sentence_sets = categorize_sentences(dts_sentences)
277+
merge_group_lists = get_merge_groups(sentence_sets)
278+
sentence_sets = merge_groups(sentence_sets, merge_group_lists)
279+
sentence_dfs = sentences_to_dataframes(sentence_sets)
280+
print("done.")
151281

152-
dts_sentences = datetime_stamped_sentences # Alias/rename variable
153-
del datetime_stamped_sentences
282+
print("\nWriting data to CSVs... ", end="")
283+
dfs_to_csv(sentence_dfs, args.filepath, verbose=True)
284+
print("done.")
154285

155-
sentence_sets = sort_sentences(dts_sentences)
156-
sentence_sets = merge_sentence_groups(sentence_sets)
157-
sentence_dfs = sentences_to_dataframes(sentences)
158-
dfs_to_csv(sentence_dfs)
286+
print("\nAll done. Exiting.\n\n")
287+
288+
if __name__ == '__main__':
159289

290+
main()

0 commit comments

Comments
 (0)