Skip to content

Commit a44fa1f

Browse files
authored
fix: Update corrupted file (#120)
* fix: Add a test * fix: Add a debug test for CAWG * fix: Fix corrupted file * fix: Cleaning up * fix: Fix non-threaded tests * fix: unrandomize test * fix: Improve test sensitivity * fix: Improve comment
1 parent a8cf604 commit a44fa1f

File tree

3 files changed

+96
-52
lines changed

3 files changed

+96
-52
lines changed
51.3 KB
Loading

tests/test_unit_tests.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,30 @@ def test_read_all_files_using_extension(self):
212212
except Exception as e:
213213
self.fail(f"Failed to read metadata from {filename}: {str(e)}")
214214

215+
def test_read_cawg_data_file(self):
216+
"""Test reading C2PA metadata from C_with_CAWG_data.jpg file."""
217+
file_path = os.path.join(self.data_dir, "files-for-reading-tests", "C_with_CAWG_data.jpg")
218+
219+
with open(file_path, "rb") as file:
220+
reader = Reader("image/jpeg", file)
221+
json_data = reader.json()
222+
self.assertIsInstance(json_data, str)
223+
224+
# Parse the JSON and verify specific fields
225+
manifest_data = json.loads(json_data)
226+
227+
# Verify basic manifest structure
228+
self.assertIn("manifests", manifest_data)
229+
self.assertIn("active_manifest", manifest_data)
230+
231+
# Get the active manifest
232+
active_manifest_id = manifest_data["active_manifest"]
233+
active_manifest = manifest_data["manifests"][active_manifest_id]
234+
235+
# Verify manifest is not null or empty
236+
assert active_manifest is not None, "Active manifest should not be null"
237+
assert len(active_manifest) > 0, "Active manifest should not be empty"
238+
215239

216240
class TestBuilder(unittest.TestCase):
217241
def setUp(self):

tests/test_unit_tests_threaded.py

Lines changed: 72 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,57 +2096,40 @@ def add_ingredient(ingredient_json, thread_id):
20962096
builder.close()
20972097

20982098
def test_builder_sign_with_multiple_ingredient_random_many_threads(self):
2099-
"""Test Builder class operations with 10 threads, each adding 3 random ingredients and signing a random file."""
2099+
"""Test Builder class operations with 12 threads, each adding 3 specific ingredients and signing a file."""
21002100
# Number of threads to use in the test
2101-
# We are pushing it here, as we want to test with thread count one to two orders of magnitude
2102-
# higher than "usual" max numbers of cores on (server) machines may be.
2103-
21042101
TOTAL_THREADS_USED = 12
21052102

2106-
# Get list of files from files-for-reading-tests directory
2107-
reading_dir = os.path.join(self.data_dir, "files-for-reading-tests")
2108-
2109-
# Filter for JPG and PNG files only
2110-
all_files = [
2111-
f for f in os.listdir(reading_dir)
2112-
if os.path.isfile(os.path.join(reading_dir, f))
2113-
and os.path.splitext(f)[1].lower() in {'.jpg', '.jpeg', '.png'}
2103+
# Define the specific files to use as ingredients
2104+
# THose files should be valid to use as ingredient
2105+
ingredient_files = [
2106+
os.path.join(self.data_dir, "A_thumbnail.jpg"),
2107+
os.path.join(self.data_dir, "C.jpg"),
2108+
os.path.join(self.data_dir, "cloud.jpg")
21142109
]
21152110

2116-
# Ensure we have enough files
2117-
self.assertGreaterEqual(
2118-
len(all_files),
2119-
3,
2120-
"Need at least 3 JPG/PNG files for testing")
2121-
21222111
# Thread synchronization
21232112
thread_results = {}
21242113
completed_threads = 0
2114+
thread_lock = threading.Lock() # Lock for thread-safe access to shared data
21252115

21262116
def thread_work(thread_id):
21272117
nonlocal completed_threads
21282118
try:
21292119
# Create a new builder for this thread
21302120
builder = Builder.from_json(self.manifestDefinition)
21312121

2132-
# Select 3 random files for ingredients
2133-
# Use thread_id as seed for reproducibility
2134-
random.seed(thread_id)
2135-
ingredient_files = random.sample(all_files, 3)
2136-
21372122
# Add each ingredient
2138-
for i, file_name in enumerate(ingredient_files, 1):
2139-
file_path = os.path.join(reading_dir, file_name)
2123+
for i, file_path in enumerate(ingredient_files, 1):
21402124
ingredient_json = json.dumps({
2141-
"title": f"Thread {thread_id} Ingredient {i} - {file_name}"
2125+
"title": f"Thread {thread_id} Ingredient {i} - {os.path.basename(file_path)}"
21422126
})
21432127

21442128
with open(file_path, 'rb') as f:
21452129
builder.add_ingredient(ingredient_json, "image/jpeg", f)
21462130

2147-
# Select a random file for signing
2148-
sign_file = random.choice(all_files)
2149-
sign_file_path = os.path.join(reading_dir, sign_file)
2131+
# Use A.jpg as the file to sign
2132+
sign_file_path = os.path.join(self.data_dir, "A.jpg")
21502133

21512134
# Sign the file
21522135
with open(sign_file_path, "rb") as file:
@@ -2168,11 +2151,13 @@ def thread_work(thread_id):
21682151
manifest_data = json.loads(json_data)
21692152

21702153
# Store results for verification
2171-
thread_results[thread_id] = {
2172-
'manifest': manifest_data,
2173-
'ingredient_files': ingredient_files,
2174-
'sign_file': sign_file
2175-
}
2154+
with thread_lock:
2155+
thread_results[thread_id] = {
2156+
'manifest': manifest_data,
2157+
'ingredient_files': [os.path.basename(f) for f in ingredient_files],
2158+
'sign_file': os.path.basename(sign_file_path),
2159+
'manifest_hash': hash(json.dumps(manifest_data, sort_keys=True)) # Add hash for comparison
2160+
}
21762161

21772162
# Clean up streams
21782163
output.close()
@@ -2181,11 +2166,13 @@ def thread_work(thread_id):
21812166
builder.close()
21822167

21832168
except Exception as e:
2184-
thread_results[thread_id] = {
2185-
'error': str(e)
2186-
}
2169+
with thread_lock:
2170+
thread_results[thread_id] = {
2171+
'error': str(e)
2172+
}
21872173
finally:
2188-
completed_threads += 1
2174+
with thread_lock:
2175+
completed_threads += 1
21892176

21902177
# Create and start threads
21912178
threads = []
@@ -2199,26 +2186,31 @@ def thread_work(thread_id):
21992186
thread.join()
22002187

22012188
# Verify all threads completed
2202-
self.assertEqual(completed_threads, TOTAL_THREADS_USED, f"All {
2203-
TOTAL_THREADS_USED} threads should have completed")
2189+
self.assertEqual(completed_threads, TOTAL_THREADS_USED, f"All {TOTAL_THREADS_USED} threads should have completed")
22042190
self.assertEqual(
22052191
len(thread_results),
22062192
TOTAL_THREADS_USED,
22072193
f"Should have results from all {TOTAL_THREADS_USED} threads")
22082194

2195+
# Collect all manifest hashes for comparison
2196+
manifest_hashes = set()
2197+
thread_manifest_data = {}
2198+
22092199
# Verify results for each thread
22102200
for thread_id in range(1, TOTAL_THREADS_USED + 1):
22112201
result = thread_results[thread_id]
22122202

22132203
# Check if thread encountered an error
22142204
if 'error' in result:
2215-
self.fail(
2216-
f"Thread {thread_id} failed with error: {
2217-
result['error']}")
2205+
self.fail(f"Thread {thread_id} failed with error: {result['error']}")
22182206

22192207
manifest_data = result['manifest']
22202208
ingredient_files = result['ingredient_files']
2221-
sign_file = result['sign_file']
2209+
manifest_hash = result['manifest_hash']
2210+
2211+
# Store manifest data for cross-thread comparison
2212+
thread_manifest_data[thread_id] = manifest_data
2213+
manifest_hashes.add(manifest_hash)
22222214

22232215
# Verify active manifest exists
22242216
self.assertIn("active_manifest", manifest_data)
@@ -2234,16 +2226,44 @@ def thread_work(thread_id):
22342226
self.assertIsInstance(active_manifest["ingredients"], list)
22352227
self.assertEqual(len(active_manifest["ingredients"]), 3)
22362228

2237-
# Verify all ingredients exist with correct thread ID and file
2238-
# names
2239-
ingredient_titles = [ing["title"]
2240-
for ing in active_manifest["ingredients"]]
2229+
# Verify all ingredients exist with correct thread ID and file names
2230+
ingredient_titles = [ing["title"] for ing in active_manifest["ingredients"]]
22412231
for i, file_name in enumerate(ingredient_files, 1):
2242-
expected_title = f"Thread {
2243-
thread_id} Ingredient {i} - {file_name}"
2244-
self.assertIn(expected_title, ingredient_titles, f"Thread {
2245-
thread_id} should have ingredient with title {expected_title}")
2232+
expected_title = f"Thread {thread_id} Ingredient {i} - {file_name}"
2233+
self.assertIn(expected_title, ingredient_titles, f"Thread {thread_id} should have ingredient with title {expected_title}")
2234+
2235+
# Verify no cross-thread contamination in ingredient titles
2236+
for other_thread_id in range(1, TOTAL_THREADS_USED + 1):
2237+
if other_thread_id != thread_id:
2238+
for title in ingredient_titles:
2239+
# Check for exact thread ID pattern to avoid false positives
2240+
self.assertNotIn(
2241+
f"Thread {other_thread_id} Ingredient",
2242+
title,
2243+
f"Thread {thread_id}'s manifest contains ingredient data from thread {other_thread_id}")
2244+
2245+
# Verify all manifests are unique (no data scrambling between threads)
2246+
self.assertEqual(
2247+
len(manifest_hashes),
2248+
TOTAL_THREADS_USED,
2249+
"Each thread should have a unique manifest (no data scrambling)")
22462250

2251+
# Additional verification: Compare manifest structures between threads
2252+
for thread_id in range(1, TOTAL_THREADS_USED + 1):
2253+
current_manifest = thread_manifest_data[thread_id]
2254+
2255+
# Verify manifest structure is consistent
2256+
self.assertIn("active_manifest", current_manifest)
2257+
self.assertIn("manifests", current_manifest)
2258+
2259+
# Verify no cross-thread contamination in manifest data
2260+
for other_thread_id in range(1, TOTAL_THREADS_USED + 1):
2261+
if other_thread_id != thread_id:
2262+
other_manifest = thread_manifest_data[other_thread_id]
2263+
self.assertNotEqual(
2264+
current_manifest["active_manifest"],
2265+
other_manifest["active_manifest"],
2266+
f"Thread {thread_id} and {other_thread_id} share the same active manifest ID")
22472267

22482268
if __name__ == '__main__':
22492269
unittest.main()

0 commit comments

Comments
 (0)