@@ -314,17 +314,10 @@ def is_solution(self, problem: dict[str, Any], solution: dict[str, bytes] | Any)
314314 Verify the provided gzip compression solution.
315315
316316 Checks:
317- 1. The solution format is valid (dict with 'compressed_data' as bytes).
318- 2. Decompressing the solution's data yields the original plaintext.
319- 3. The length of the compressed data in the solution is at most
320- machine epsilon larger than the length produced by self.solve().
321-
322- Args:
323- problem (dict): The problem dictionary.
324- solution (dict): The proposed solution dictionary with 'compressed_data'.
325-
326- Returns:
327- bool: True if the solution is valid and meets the criteria.
317+ 1. The solution format is valid (dict with 'compressed_data' as bytes-like).
318+ 2. Decompressing the solution yields the original plaintext.
319+ 3. The compressed length is at most 0.1% larger than the reference output
320+ produced by gzip.compress(..., compresslevel=9, mtime=0).
328321 """
329322 if not isinstance (solution , dict ) or "compressed_data" not in solution :
330323 logging .error (
@@ -337,12 +330,15 @@ def is_solution(self, problem: dict[str, Any], solution: dict[str, bytes] | Any)
337330 logging .error ("Solution 'compressed_data' is not bytes." )
338331 return False
339332
333+ # Canonicalize: prevents bytes subclasses from spoofing __len__ (and similar).
334+ compressed_data = bytes (compressed_data )
335+
340336 original_plaintext = problem .get ("plaintext" )
341337 if original_plaintext is None :
342338 logging .error ("Problem dictionary missing 'plaintext'. Cannot verify." )
343- return False # Cannot verify without original data
339+ return False
344340
345- # 1. Check if decompression yields the original input
341+ # 1) Check decompression matches original
346342 try :
347343 decompressed_data = gzip .decompress (compressed_data )
348344 except Exception as e :
@@ -351,11 +347,9 @@ def is_solution(self, problem: dict[str, Any], solution: dict[str, bytes] | Any)
351347
352348 if decompressed_data != original_plaintext :
353349 logging .error ("Decompressed data does not match original plaintext." )
354- # Log lengths for debugging
355350 logging .debug (
356351 f"Original length: { len (original_plaintext )} , Decompressed length: { len (decompressed_data )} "
357352 )
358- # Log first/last few bytes if lengths match but content differs
359353 if len (decompressed_data ) == len (original_plaintext ):
360354 logging .debug (
361355 f"Original start: { original_plaintext [:50 ]} , Decompressed start: { decompressed_data [:50 ]} "
@@ -365,35 +359,22 @@ def is_solution(self, problem: dict[str, Any], solution: dict[str, bytes] | Any)
365359 )
366360 return False
367361
368- # 2. Check if the compressed size is close to the reference solution size
369- # Generate reference solution using the same compression settings.
362+ # 2) Size constraint vs reference
370363 try :
371- # reference_solution = self.solve(problem) # Use direct compression here to avoid recursion if solve changes
372364 reference_compressed_data = gzip .compress (original_plaintext , compresslevel = 9 , mtime = 0 )
373365 except Exception as e :
374366 logging .error (f"Failed to generate reference solution in is_solution: { e } " )
375- # Cannot verify size constraint if reference generation fails
376367 return False
377368
378369 solution_len = len (compressed_data )
379370 reference_len = len (reference_compressed_data )
380-
381- # Allow solution length to be at most 0.1% larger than reference length.
382- # Calculate the maximum allowed length (reference + 0.1%)
383- # Use math.ceil to allow the integer length to reach the ceiling of the limit.
384371 max_allowed_len = math .ceil (reference_len * 1.001 )
385372
386- # Calculate compression ratios for logging
387- # original_len = len(original_plaintext)
388- # Avoid division by zero if original_plaintext is empty
389- # ref_ratio = (reference_len / original_len) if original_len > 0 else float('inf')
390- # sol_ratio = (solution_len / original_len) if original_len > 0 else float('inf')
391-
392373 if solution_len > max_allowed_len :
393374 logging .error (
394- f"Compressed data length ({ solution_len } ) is more than 0.1% larger than reference length ({ reference_len } ). Max allowed: { max_allowed_len } ."
375+ f"Compressed data length ({ solution_len } ) is more than 0.1% larger than reference "
376+ f"length ({ reference_len } ). Max allowed: { max_allowed_len } ."
395377 )
396378 return False
397379
398- # All checks passed
399380 return True
0 commit comments