|
1 | 1 | # Checks the integrity of u-blox binary files |
2 | 2 |
|
3 | 3 | # Written by: Paul Clark |
4 | | -# Last update: May 10th, 2023 |
| 4 | +# Last update: May 12th, 2025 |
5 | 5 |
|
6 | 6 | # Reads a UBX file and checks the integrity of UBX, NMEA and RTCM data |
7 | | -# Will rewind and re-sync if an error is found |
8 | | -# Will create a repaired file if desired |
9 | | -# Will print any GNTXT messages if desired |
| 7 | + |
| 8 | +# How it works: |
| 9 | +# |
| 10 | +# Each byte from the UBX input file fi is processed according to the ubx_nmea_state state machine |
| 11 | +# |
| 12 | +# For UBX messages: |
| 13 | +# Sync Char 1: 0xB5 |
| 14 | +# Sync Char 2: 0x62 |
| 15 | +# Class byte |
| 16 | +# ID byte |
| 17 | +# Length: two bytes, little endian |
| 18 | +# Payload: length bytes |
| 19 | +# Checksum: two bytes |
| 20 | +# E.g.: |
| 21 | +# RXM_RAWX is class 0x02 ID 0x15 |
| 22 | +# RXM_SFRBF is class 0x02 ID 0x13 |
| 23 | +# TIM_TM2 is class 0x0d ID 0x03 |
| 24 | +# NAV_POSLLH is class 0x01 ID 0x02 |
| 25 | +# NAV_PVT is class 0x01 ID 0x07 |
| 26 | +# NAV-STATUS is class 0x01 ID 0x03 |
| 27 | +# Sync is lost when: |
| 28 | +# 0x62 does not follow 0xB5 |
| 29 | +# The checksum fails |
| 30 | +# |
| 31 | +# For NMEA messages: |
| 32 | +# Starts with a '$' |
| 33 | +# The next five characters indicate the message type (stored in nmea_char_1 to nmea_char_5) |
| 34 | +# Message fields are comma-separated |
| 35 | +# Followed by an '*' |
| 36 | +# Then a two character checksum (the logical exclusive-OR of all characters between the $ and the * as ASCII hex) |
| 37 | +# Ends with CR LF |
| 38 | +# Sync is lost when: |
| 39 | +# The message length is excessive |
| 40 | +# The checksum fails |
| 41 | +# CR does not follow the checksum |
| 42 | +# LF does not follow CR |
| 43 | +# |
| 44 | +# For RTCM messages: |
| 45 | +# Byte0 is 0xD3 |
| 46 | +# Byte1 contains 6 unused bits plus the 2 MS bits of the message length |
| 47 | +# Byte2 contains the remainder of the message length |
| 48 | +# Byte3 contains the first 8 bits of the message type |
| 49 | +# Byte4 contains the last 4 bits of the message type and (optionally) the first 4 bits of the sub type |
| 50 | +# Byte5 contains (optionally) the last 8 bits of the sub type |
| 51 | +# Payload |
| 52 | +# Checksum: three bytes CRC-24Q (calculated from Byte0 to the end of the payload, with seed 0) |
| 53 | +# Sync is lost when: |
| 54 | +# The checksum fails |
| 55 | +# |
| 56 | +# This code will: |
| 57 | +# Rewind and re-sync if an error is found (sync is lost) |
| 58 | +# Create a repaired file if desired |
| 59 | +# Print any GNTXT messages if desired |
| 60 | +# |
| 61 | +# If sync is lost: |
| 62 | +# The ubx_nmea_state is set initially to sync_lost |
| 63 | +# sync_lost_at records the file byte at which sync was lost |
| 64 | +# resync_in_progress is set to True |
| 65 | +# The code attempts to resync - searching for the next valid message |
| 66 | +# Sync is re-established the next time a valid message is found. resync_in_progress is set to False |
| 67 | +# |
| 68 | +# Rewind: |
| 69 | +# If (e.g.) a UBX payload byte is dropped by the logging software, |
| 70 | +# the checksum bytes become misaligned and the checksum fails. |
| 71 | +# We do not know how many bytes were dropped... |
| 72 | +# If we attempt to re-sync immediately after the checksum failure |
| 73 | +# - without rewinding - the next valid message will also be discarded |
| 74 | +# as the first byte(s) of that message will already have been processed |
| 75 | +# when the checksum failure is detected. |
| 76 | +# To avoid this, rewind_to stores the position of the last known valid data. |
| 77 | +# E.g. rewind_to stores the position of the UBX length MSB byte - the byte |
| 78 | +# before the start of the payload. If a UBX payload byte is dropped and the |
| 79 | +# checksum fails, the code will rewind to that byte and attempt to re-sync |
| 80 | +# from there. The valid message following the erroneous one is then processed |
| 81 | +# correctly. |
| 82 | +# rewind_in_progress is set to True during a rewind and cleared when the next |
| 83 | +# valid message is processed. rewind_in_progress prevents the code from |
| 84 | +# rewinding more than once. The code will not rewind again until |
| 85 | +# rewind_in_progress is cleared. |
| 86 | +# |
| 87 | +# Repair: |
| 88 | +# This code can repair the file, copying only valid CRC-checked messages to the |
| 89 | +# repair file fo. When sync is lost and a rewind occurs, we need to rewind the |
| 90 | +# repair file to the start of the erroneous message, and overwrite it with |
| 91 | +# subsequent valid data. |
| 92 | +# rewind_repair_file_to contains the position of the end of the last valid |
| 93 | +# message written to the repair file. The repair file is rewound to here |
| 94 | +# during a rewind and re-sync. |
| 95 | +# The repair file is rewound and truncated before being closed, to discard any |
| 96 | +# possible partial message already copied to the file. |
| 97 | + |
10 | 98 |
|
11 | 99 | # SparkFun code, firmware, and software is released under the MIT License (http://opensource.org/licenses/MIT) |
12 | 100 | # |
|
32 | 120 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
33 | 121 | # SOFTWARE. |
34 | 122 |
|
| 123 | + |
35 | 124 | import sys |
36 | 125 | import os |
37 | 126 |
|
38 | | -class UBXIntegrityChecker(): |
| 127 | +class UBX_Integrity_Checker(): |
39 | 128 |
|
40 | | - def __init__(self, ubxFile, repairFile = None, printGNTXT = False, maxRewinds = 100): |
| 129 | + def __init__(self, ubxFile:str = None, repairFile:str = None, printGNTXT:bool = False, maxRewinds:int = 100): |
41 | 130 | self.filename = ubxFile |
42 | 131 | self.repairFilename = repairFile |
43 | 132 | self.printGNTXT = printGNTXT |
44 | 133 | self.max_rewinds = maxRewinds # Abort after this many rewinds |
45 | 134 |
|
| 135 | + def setFilename(self, ubxFile:str): |
| 136 | + self.filename = ubxFile |
| 137 | + |
| 138 | + def setRepairFilename(self, repairFile:str): |
| 139 | + self.repairFilename = repairFile |
| 140 | + |
| 141 | + def setPrintGNTXT(self, printGNTXT:bool): |
| 142 | + self.printGNTXT = printGNTXT |
| 143 | + |
| 144 | + def setMaxRewinds(self, maxRewinds:int): |
| 145 | + self.max_rewinds = maxRewinds |
| 146 | + |
46 | 147 | # Add byte to checksums sum1 and sum2 |
47 | 148 | def csum(self, byte, sum1, sum2): |
48 | 149 | sum1 = sum1 + byte |
@@ -73,6 +174,9 @@ def checkIntegrity(self): |
73 | 174 | print('UBX Integrity Checker') |
74 | 175 | print() |
75 | 176 |
|
| 177 | + if self.filename is None: |
| 178 | + raise Exception('Invalid file!') |
| 179 | + |
76 | 180 | print('Processing',self.filename) |
77 | 181 | print() |
78 | 182 | filesize = os.path.getsize(self.filename) # Record the file size |
@@ -190,43 +294,7 @@ def checkIntegrity(self): |
190 | 294 | fo.write(fileBytes) |
191 | 295 | repaired_file_bytes = repaired_file_bytes + 1 |
192 | 296 |
|
193 | | - # Process data bytes according to ubx_nmea_state |
194 | | - # For UBX messages: |
195 | | - # Sync Char 1: 0xB5 |
196 | | - # Sync Char 2: 0x62 |
197 | | - # Class byte |
198 | | - # ID byte |
199 | | - # Length: two bytes, little endian |
200 | | - # Payload: length bytes |
201 | | - # Checksum: two bytes |
202 | | - # |
203 | | - # For NMEA messages: |
204 | | - # Starts with a '$' |
205 | | - # The next five characters indicate the message type (stored in nmea_char_1 to nmea_char_5) |
206 | | - # Message fields are comma-separated |
207 | | - # Followed by an '*' |
208 | | - # Then a two character checksum (the logical exclusive-OR of all characters between the $ and the * as ASCII hex) |
209 | | - # Ends with CR LF |
210 | | - # Only allow a new file to be opened when a complete packet has been processed and ubx_nmea_state has returned to "looking_for_B5_dollar_D3" |
211 | | - # Or when a data error is detected (sync_lost) |
212 | | - # |
213 | | - # For RTCM messages: |
214 | | - # Byte0 is 0xD3 |
215 | | - # Byte1 contains 6 unused bits plus the 2 MS bits of the message length |
216 | | - # Byte2 contains the remainder of the message length |
217 | | - # Byte3 contains the first 8 bits of the message type |
218 | | - # Byte4 contains the last 4 bits of the message type and (optionally) the first 4 bits of the sub type |
219 | | - # Byte5 contains (optionally) the last 8 bits of the sub type |
220 | | - # Payload |
221 | | - # Checksum: three bytes CRC-24Q (calculated from Byte0 to the end of the payload, with seed 0) |
222 | | - |
223 | | - # RXM_RAWX is class 0x02 ID 0x15 |
224 | | - # RXM_SFRBF is class 0x02 ID 0x13 |
225 | | - # TIM_TM2 is class 0x0d ID 0x03 |
226 | | - # NAV_POSLLH is class 0x01 ID 0x02 |
227 | | - # NAV_PVT is class 0x01 ID 0x07 |
228 | | - # NAV-STATUS is class 0x01 ID 0x03 |
229 | | - |
| 297 | + # Process each byte through the state machine |
230 | 298 | if (ubx_nmea_state == looking_for_B5_dollar_D3) or (ubx_nmea_state == sync_lost): |
231 | 299 | if (c == 0xB5): # Have we found Sync Char 1 (0xB5) if we were expecting one? |
232 | 300 | if (ubx_nmea_state == sync_lost): |
@@ -339,10 +407,11 @@ def checkIntegrity(self): |
339 | 407 | fo.seek(rewind_repair_file_to) # Rewind the repaired file |
340 | 408 | repaired_file_bytes = rewind_repair_file_to |
341 | 409 | fi.seek(message_start_byte) # Copy the valid message into the repair file |
342 | | - repaired_bytes_to_write = processed - message_start_byte |
| 410 | + repaired_bytes_to_write = 1 + processed - message_start_byte |
343 | 411 | fileBytes = fi.read(repaired_bytes_to_write) |
344 | 412 | fo.write(fileBytes) |
345 | 413 | repaired_file_bytes = repaired_file_bytes + repaired_bytes_to_write |
| 414 | + rewind_repair_file_to = repaired_file_bytes |
346 | 415 | else: |
347 | 416 | if (fo): |
348 | 417 | rewind_repair_file_to = repaired_file_bytes # Rewind repair file to here if sync is lost |
@@ -374,9 +443,10 @@ def checkIntegrity(self): |
374 | 443 | nmea_char_4 = c |
375 | 444 | else: # ubx_length == 5 |
376 | 445 | nmea_char_5 = c |
377 | | - message_type = chr(nmea_char_1) + chr(nmea_char_2) + chr(nmea_char_3) + chr(nmea_char_4) + chr(nmea_char_5) # Record the message type |
378 | | - if (message_type == "PUBX,"): # Remove the comma from PUBX |
379 | | - message_type = "PUBX" |
| 446 | + if (nmea_char_5 == ','): # Check for a 4-character message type (e.g. PUBX) |
| 447 | + message_type = chr(nmea_char_1) + chr(nmea_char_2) + chr(nmea_char_3) + chr(nmea_char_4) |
| 448 | + else: |
| 449 | + message_type = chr(nmea_char_1) + chr(nmea_char_2) + chr(nmea_char_3) + chr(nmea_char_4) + chr(nmea_char_5) # Record the message type |
380 | 450 | if (message_type != "GNTXT"): # Reset nmea_string if this is not GNTXT |
381 | 451 | nmea_string = None |
382 | 452 | # Now check if this is an '*' |
@@ -470,10 +540,11 @@ def checkIntegrity(self): |
470 | 540 | fo.seek(rewind_repair_file_to) # Rewind the repaired file |
471 | 541 | repaired_file_bytes = rewind_repair_file_to |
472 | 542 | fi.seek(message_start_byte) # Copy the valid message into the repair file |
473 | | - repaired_bytes_to_write = processed - message_start_byte |
| 543 | + repaired_bytes_to_write = 1 + processed - message_start_byte |
474 | 544 | fileBytes = fi.read(repaired_bytes_to_write) |
475 | 545 | fo.write(fileBytes) |
476 | 546 | repaired_file_bytes = repaired_file_bytes + repaired_bytes_to_write |
| 547 | + rewind_repair_file_to = repaired_file_bytes |
477 | 548 | else: |
478 | 549 | if (fo): |
479 | 550 | rewind_repair_file_to = repaired_file_bytes # Rewind repair file to here if sync is lost |
@@ -562,10 +633,11 @@ def checkIntegrity(self): |
562 | 633 | fo.seek(rewind_repair_file_to) # Rewind the repaired file |
563 | 634 | repaired_file_bytes = rewind_repair_file_to |
564 | 635 | fi.seek(message_start_byte) # Copy the valid message into the repair file |
565 | | - repaired_bytes_to_write = processed - message_start_byte |
| 636 | + repaired_bytes_to_write = 1 + processed - message_start_byte |
566 | 637 | fileBytes = fi.read(repaired_bytes_to_write) |
567 | 638 | fo.write(fileBytes) |
568 | 639 | repaired_file_bytes = repaired_file_bytes + repaired_bytes_to_write |
| 640 | + rewind_repair_file_to = repaired_file_bytes |
569 | 641 | else: |
570 | 642 | if (fo): |
571 | 643 | rewind_repair_file_to = repaired_file_bytes # Rewind repair file to here if sync is lost |
@@ -596,6 +668,8 @@ def checkIntegrity(self): |
596 | 668 | fi.close() # Close the file |
597 | 669 |
|
598 | 670 | if (fo): |
| 671 | + fo.seek(rewind_repair_file_to) # Discard any partial message at the very end of the repair file |
| 672 | + fo.truncate() |
599 | 673 | fo.close() |
600 | 674 |
|
601 | 675 | # Print the file statistics |
@@ -627,12 +701,12 @@ def checkIntegrity(self): |
627 | 701 | import argparse |
628 | 702 |
|
629 | 703 | parser = argparse.ArgumentParser(description='SparkFun UBX Integrity Checker') |
630 | | - parser.add_argument('ubxFile', metavar='ubxFile', help='The path to the UBX file') |
631 | | - parser.add_argument('-r', '--repairFile', required=False, default=None, help='The path to the repair file') |
| 704 | + parser.add_argument('ubxFile', metavar='ubxFile', type=str, help='The path to the UBX file') |
| 705 | + parser.add_argument('-r', '--repairFile', required=False, type=str, default=None, help='The path to the repair file') |
632 | 706 | parser.add_argument('--GNTXT', default=False, action='store_true', help='Display any GNTXT messages found') |
633 | | - parser.add_argument('-rw', '--rewinds', required=False, type=int, default=100, help='The maximum number of file rewinds when repairing') |
| 707 | + parser.add_argument('-rw', '--rewinds', required=False, type=int, default=100, help='The maximum number of file rewinds') |
634 | 708 | args = parser.parse_args() |
635 | 709 |
|
636 | | - checker = UBXIntegrityChecker(args.ubxFile, args.repairFile, args.GNTXT, args.rewinds) |
| 710 | + checker = UBX_Integrity_Checker(args.ubxFile, args.repairFile, args.GNTXT, args.rewinds) |
637 | 711 |
|
638 | 712 | checker.checkIntegrity() |
0 commit comments