Skip to content

Commit d6f921a

Browse files
authored
Consolidate Data products ground scripts (#278)
* Refactor data products CLI * Start refactoring some things into common.py * More refactoring, add new_parser.py * Add dictionary loaders for data products and data templates * Clean up * Fix error with variable length types * Add default output path generation for DataProductParser * reformat * Clean up and remove legacy code * Refactor dictionary loading to a static method for improved configuration management * Test code cleanup ? * Refactor test utilities and cleanup logic for improved test isolation * Move globals_cleanup to src tree * Dump all record info rather than just ID * Rename DataProductParser to DataProductDecoder and add tests * Rename parse to decode and fix a few things * Rework enum REP_TYPE to be IntegerType instead of string * Fix REP_TYPE remnants * fix optional typing usage * fix first round of review comments and XML loader * remove PROC_TYPE_HARD_ZERO from enumeratedConstants in dictionary.json * Other batch of review comments * Remove unused imports from decoder.py
1 parent 28356be commit d6f921a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1899
-1351
lines changed

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ Repository = "https://github.com/fprime-community/fprime-gds"
6262
[project.scripts]
6363
fprime-cli = "fprime_gds.executables.fprime_cli:main"
6464
fprime-seqgen = "fprime_gds.common.tools.seqgen:main"
65-
fprime-dp-write = "fprime_gds.executables.data_product_writer:main"
66-
fprime-dp-validate = "fprime_gds.executables.data_product_validator:main"
65+
fprime-dp = "fprime_gds.executables.data_products:main"
6766
fprime-gds = "fprime_gds.executables.run_deployment:main"
6867
fprime-prm-write = "fprime_gds.common.tools.params:main"
6968
fprime-merge-dictionary = "fprime_gds.executables.dictionary_merge:main"

src/fprime_gds/common/dp/__init__.py

Whitespace-only changes.

src/fprime_gds/common/dp/common.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""
2+
Common utilities and constants for Data Product processing.
3+
4+
This module contains shared functionality between the Data Product Decoder
5+
and Validator, including:
6+
- Checksum configuration and CRC32 utilities
7+
- Header field definitions
8+
- Binary format constants
9+
10+
@author: thomas-bc
11+
"""
12+
13+
from binascii import crc32
14+
15+
from fprime_gds.common.models.serialize.serializable_type import SerializableType
16+
from fprime_gds.common.utils.config_manager import ConfigManager
17+
from fprime_gds.common.models.serialize.numerical_types import U8Type, U32Type
18+
from fprime_gds.common.models.serialize.array_type import ArrayType
19+
from fprime_gds.common.models.serialize.time_type import TimeType
20+
21+
22+
23+
# ==============================================================================
24+
# Binary Format Constants
25+
# ==============================================================================
26+
27+
# Deserialize the binary file big endian
28+
BIG_ENDIAN = ">"
29+
30+
# ==============================================================================
31+
# Checksum Configuration
32+
# ==============================================================================
33+
34+
class ChecksumConfig:
35+
"""Configuration for CRC32 checksum validation.
36+
37+
These values are technically configurable by F Prime end users,
38+
but are treated as constants here. Future work could parameterize these.
39+
"""
40+
# Configurable values
41+
CHECKSUM_TOKEN_TYPE = U32Type
42+
CHECKSUM_INIT = 0
43+
CHECKSUM_XOR_OUT = 0xFFFFFFFF
44+
# Computed values
45+
CHECKSUM_LEN = CHECKSUM_TOKEN_TYPE.getSize()
46+
CHECKSUM_STRUCT = CHECKSUM_TOKEN_TYPE.get_serialize_format()
47+
48+
49+
50+
def calculate_crc32(data: bytes, init_value: int = ChecksumConfig.CHECKSUM_INIT) -> int:
51+
"""Calculate CRC32 checksum for given data.
52+
53+
Used by both decoder (for accumulating CRC during read) and validator
54+
(for validating checksums).
55+
56+
Args:
57+
data: Bytes to calculate checksum for
58+
init_value: Initial CRC value (default: 0)
59+
60+
Returns:
61+
Calculated CRC32 checksum as 32-bit unsigned integer
62+
"""
63+
return crc32(data, init_value) & ChecksumConfig.CHECKSUM_XOR_OUT
64+
65+
66+
# ==============================================================================
67+
# Data Product Header Type
68+
# ==============================================================================
69+
70+
def get_dp_header_type() -> type[SerializableType]:
71+
"""Returns a dictionary-configured DataProduct header serializable type
72+
As defined per https://fprime.jpl.nasa.gov/latest/Fw/Dp/docs/sdd
73+
Ideally this should be part of the dictionary, but it is not currently."""
74+
# The reason to return construct_type() is that we want the type to be constructed
75+
# after the ConfigManager has been initialized, so we can't easily just define a type
76+
# statically and return it (or use it directly).
77+
# So we construct the type here the first time this function is called
78+
return SerializableType.construct_type("DataProductHeaderType",
79+
[
80+
("PacketDescriptor", ConfigManager().get_type("FwPacketDescriptorType"), "{}", "The F Prime packet descriptor"),
81+
("Id", ConfigManager().get_type("FwDpIdType"), "{}", "The container ID"),
82+
("Priority", ConfigManager().get_type("FwDpPriorityType"), "{}", "The container priority"),
83+
("Time", TimeType, "{}", "Fw.Time object"),
84+
("ProcTypes", ConfigManager().get_type("Fw.DpCfg.ProcType").REP_TYPE, "{}", "Processing types bit mask"),
85+
("UserData", ArrayType.construct_type("UserData", U8Type, ConfigManager().get_constant("Fw.DpCfg.CONTAINER_USER_DATA_SIZE"), "{}"), "{}", "User-configurable data"),
86+
("DpState", ConfigManager().get_type("Fw.DpState"), "{}", "Data product state"),
87+
("DataSize", ConfigManager().get_type("FwSizeStoreType"), "{}", "Size of data payload in bytes"),
88+
("Checksum", U32Type, "{}", "Header checksum")
89+
]
90+
)
91+
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
"""
2+
Data Product Decoder using ConfigManager
3+
4+
This module provides a ConfigManager-based decoder for F Prime Data Product files.
5+
Unlike the original implementation which uses Pydantic models and JSON parsing,
6+
this implementation queries type information directly from ConfigManager.
7+
8+
Key differences from the original implementation:
9+
- Uses ConfigManager.get_type() and get_constant() instead of Pydantic models
10+
- No JSON dictionary parsing - assumes ConfigManager is already loaded
11+
- Simplified type resolution through ConfigManager
12+
13+
@author: thomas-bc
14+
@date: January 2026
15+
"""
16+
17+
import json
18+
import sys
19+
from pathlib import Path
20+
from typing import Dict, List, Any, Optional
21+
import dataclasses
22+
23+
from fprime_gds.common.dp.common import (
24+
ChecksumConfig,
25+
calculate_crc32,
26+
get_dp_header_type,
27+
)
28+
from fprime_gds.common.models.dictionaries import Dictionaries
29+
from fprime_gds.common.utils.config_manager import ConfigManager
30+
from fprime_gds.common.templates.dp_record_template import DpRecordTemplate
31+
32+
# ==============================================================================
33+
# Custom Exceptions
34+
# ==============================================================================
35+
36+
class DataProductError(Exception):
37+
"""Base exception for data product decoding errors."""
38+
pass
39+
40+
41+
class CRCError(DataProductError):
42+
"""Raised when CRC checksum validation fails."""
43+
44+
def __init__(self, section: str, expected: int, calculated: int):
45+
self.section = section
46+
self.expected = expected
47+
self.calculated = calculated
48+
super().__init__(
49+
f"CRC mismatch in {section}: expected {expected:#x}, got {calculated:#x}"
50+
)
51+
52+
53+
class RecordNotFoundError(DataProductError):
54+
"""Raised when a record ID is not found in the dictionary."""
55+
56+
def __init__(self, record_id: int):
57+
self.record_id = record_id
58+
super().__init__(f"Record ID {record_id} not found in dictionary")
59+
60+
# ==============================================================================
61+
# Data Product Decoder (ConfigManager-based)
62+
# ==============================================================================
63+
64+
class DataProductDecoder:
65+
"""Decoder for F Prime Data Product binary files.
66+
67+
This decoder reads binary data product files and converts them to human-readable format.
68+
69+
This currently only supports a JSON representation of the data product.
70+
71+
Data Product Structure:
72+
1. Header (variable size based on configuration)
73+
- See common.py: get_dp_header_type()
74+
75+
2. Data Records (repeated until DataSize bytes consumed)
76+
- Record metadata (id, type, etc.)
77+
- Record data (type depends on record definition)
78+
79+
3. Data Hash (CRC32 of all record data)
80+
81+
Assumptions:
82+
- ConfigManager is already loaded with dictionary information
83+
- dictionaries property (see constructor) is loaded with data product dictionary info
84+
- both these assumptions can be resolved by loading dictionaries (see executables/data_products.py)
85+
"""
86+
87+
def __init__(self, dictionaries: Dictionaries, binary_file_path: str, output_json_path: Optional[str] = None):
88+
"""Initialize the decoder.
89+
90+
Args:
91+
dictionaries: Dictionaries object containing dictionary information
92+
binary_file_path: Path to the binary data product file (.fdp)
93+
output_json_path: Optional path for output JSON file
94+
(defaults to <binary_file>.json)
95+
"""
96+
self.dictionaries = dictionaries
97+
self.binary_file_path = binary_file_path
98+
if output_json_path is None:
99+
# Generate default output path if not provided as same path with .json extension
100+
self.output_json_path = str(Path(binary_file_path).with_suffix('.json'))
101+
else:
102+
self.output_json_path = output_json_path
103+
104+
def decode_header(self, file_handle) -> Dict[str, Any]:
105+
"""Decode the data product header.
106+
107+
Args:
108+
file_handle: file handle to the data product binary
109+
110+
Returns:
111+
Dictionary containing header fields
112+
113+
Raises:
114+
CRCError: If header checksum validation fails
115+
"""
116+
header = get_dp_header_type()()
117+
header_size = header.getMaxSize()
118+
header_bin_data = file_handle.read(header_size)
119+
header.deserialize(header_bin_data, 0)
120+
121+
# Compute hash on header (from beginning until we hit the checksum)
122+
computed_hash = calculate_crc32(header_bin_data[:header_size - ChecksumConfig.CHECKSUM_LEN])
123+
124+
# Validate hash
125+
if header.to_jsonable()["Checksum"]["value"] != computed_hash:
126+
raise CRCError("Header", header.to_jsonable()["Checksum"]["value"], computed_hash)
127+
128+
return header
129+
130+
def decode_record(self, file_handle, record_id: int) -> Dict[str, Any]:
131+
"""Decode a single data record. file_handle is expected to be positioned at beginning of data
132+
and will be moved to end of data after decoding.
133+
134+
Note: Dp records are retrieved through the dictionaries member, which is expected to have been
135+
loaded with dictionary information.
136+
137+
Args:
138+
file_handle: file handle for binary dp - assuming it is positioned at beginning of data
139+
record_id: ID of the record to decode
140+
141+
Returns:
142+
Dictionary containing record data
143+
144+
Raises:
145+
RecordNotFoundError: If record ID not found
146+
"""
147+
148+
# Query ConfigManager for record definition
149+
record_template: DpRecordTemplate = self.dictionaries.dp_record_id.get(record_id)
150+
151+
if record_template is None:
152+
raise RecordNotFoundError(record_id)
153+
154+
# Record object to return
155+
record: dict = {'Record': dataclasses.asdict(record_template)}
156+
157+
# Get the record type
158+
record_type = record_template.get_type()
159+
160+
def read_element(element_type):
161+
"""Inner function of decode_record
162+
Read a single element from file_handle, handling variable-length types.
163+
"""
164+
element_instance = element_type()
165+
# Save start position and read MaxSize into a buffer
166+
start_pos = file_handle.tell()
167+
max_size = element_instance.getMaxSize()
168+
buffer = file_handle.read(max_size)
169+
# Deserialize from buffer
170+
element_instance.deserialize(buffer, 0)
171+
# If actual deserialized size is different than what was read, seek to correct position
172+
actual_size = element_instance.getSize()
173+
if actual_size != max_size:
174+
# Seek to true end of element that was just read
175+
file_handle.seek(start_pos + actual_size)
176+
return element_instance
177+
178+
# decode based on whether it's an array or scalar
179+
if record_template.get_is_array():
180+
# For array records, read the array size first
181+
array_size_type = ConfigManager().get_type("FwSizeStoreType")()
182+
array_size_data = file_handle.read(array_size_type.getSize())
183+
array_size_type.deserialize(array_size_data, 0)
184+
array_size = array_size_type.val
185+
186+
record['Size'] = array_size
187+
record['Data'] = []
188+
189+
# Read each array element
190+
for _ in range(array_size):
191+
element_instance = read_element(record_type)
192+
record['Data'].append(element_instance.to_jsonable())
193+
else:
194+
# For scalar records, read the single value
195+
element_instance = read_element(record_type)
196+
record['Data'] = element_instance.to_jsonable()
197+
198+
return record
199+
200+
def decode(self) -> List[Dict[str, Any]]:
201+
"""decode the entire data product file.
202+
203+
Returns:
204+
Dict object containing header and list of all records
205+
206+
Raises:
207+
FileNotFoundError: If binary file doesn't exist
208+
CRCError: If checksum validation fails
209+
DataProductError: For other decoding errors
210+
"""
211+
results = {"Header": None, "Records": []}
212+
213+
with open(self.binary_file_path, 'rb') as f:
214+
##################
215+
# decode header #
216+
##################
217+
header_obj = self.decode_header(f)
218+
header_json = header_obj.to_jsonable()
219+
results["Header"] = header_json
220+
221+
#####################
222+
# decode all records #
223+
#####################
224+
data_size = header_json['DataSize']["value"]
225+
position_at_start = f.tell()
226+
while (f.tell() - position_at_start) < data_size:
227+
# Read record ID
228+
record_id_bin = f.read(ConfigManager().get_type("FwDpIdType").getSize())
229+
record_id_obj = ConfigManager().get_type("FwDpIdType")()
230+
record_id_obj.deserialize(record_id_bin, 0)
231+
record_id = record_id_obj.val
232+
233+
# decode the record
234+
record = self.decode_record(f, record_id)
235+
results["Records"].append(record)
236+
237+
#####################
238+
# Validate checksum #
239+
#####################
240+
# 1) Retrieve checksum in data product file
241+
assert f.tell() == position_at_start + data_size
242+
dp_crc_bin = f.read(ChecksumConfig.CHECKSUM_LEN)
243+
dp_crc = ChecksumConfig.CHECKSUM_TOKEN_TYPE()
244+
dp_crc.deserialize(dp_crc_bin, 0)
245+
# 2) Compute checksum of data
246+
f.seek(position_at_start)
247+
data_to_crc = f.read(data_size)
248+
computed_crc = calculate_crc32(data_to_crc)
249+
# 3) Compare computed and stored checksums
250+
if computed_crc != dp_crc.val:
251+
raise CRCError("Data", dp_crc.val, computed_crc)
252+
253+
return results
254+
255+
def process(self):
256+
"""Main processing: decode binary file and write JSON output."""
257+
try:
258+
print(f"Decoding {self.binary_file_path}...")
259+
data = self.decode()
260+
with open(self.output_json_path, 'w') as f:
261+
json.dump(data, f, indent=2, default=str)
262+
print("Decoding complete!")
263+
264+
except DataProductError as e:
265+
print(f"Error: {e}", file=sys.stderr)
266+
sys.exit(1)
267+
except Exception as e:
268+
print(f"Unexpected error: {e}", file=sys.stderr)
269+
raise
270+

0 commit comments

Comments
 (0)