Skip to content

Commit 2a9a4e8

Browse files
committed
Progress toward analyze_entropy
o Create defaults for signal_entropy symbols, threshold (incomplete)
1 parent fdf45ac commit 2a9a4e8

File tree

2 files changed

+223
-51
lines changed

2 files changed

+223
-51
lines changed

slip39/recovery/entropy.py

Lines changed: 128 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,26 @@ def entropy_bin_dfts( entropy_bin, offset, symbols, stride, cancel_dc=True ):
277277
return dfts
278278

279279

280+
def int_decode( c, stride=8 ):
281+
"""Output the decimal (if possible) and decoded view of the integer datum 'c'"""
282+
if stride == 6:
283+
return f"{'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'[c]:<{stride}}"
284+
if stride == 5:
285+
return f"{'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'[c]:<{stride}}"
286+
if 4 >= stride >= 1:
287+
return f"{'0123456789abcdef'[c]:<{stride}}"
288+
if stride <= 8 and 32 <= c < 127:
289+
return f"{chr(c):<{stride}}"
290+
hex = f"{c:0{(stride+3)//4}}"
291+
return f"{hex:<{stride}}"
292+
293+
280294
def signal_entropy(
281295
entropy: bytes,
282296
stride: int = 8, # bits per symbol
283-
symbols: int = 8, # symbols per DFT
297+
symbols: int = None, # symbols per DFT; default to ~128 bits and a power of 2, but < length
284298
overlap: bool = True, # sweep across n-1 bits for symbol start
285-
threshold: float = 100 / 100, # Default: signal must stand 100% above noise floor
299+
threshold: Optional[float] = 300 / 100, # Default: signal must stand >300% above noise floor
286300
middle: Optional[Callable[List[float],float]] = None, # default to simple average; or eg. statistics.median
287301
ignore_dc: bool = False, # For eg. character data, we know there'll be a big DC component
288302
) -> Optional[Tuple[float, int]]:
@@ -305,6 +319,18 @@ def signal_entropy(
305319
entropy_bin = ''.join( f"{int(h,16):0>4b}" for h in entropy_hex )
306320
length = len( entropy_bin )
307321

322+
if symbols is None:
323+
symbols = 8
324+
while ( symbols * 2 ) * stride <= min( 128, length-1 ):
325+
symbols *= 2
326+
if threshold is None:
327+
try:
328+
threshold = signal_entropy.signal_limits[length][overlap].get( stride )
329+
except Exception:
330+
pass
331+
assert threshold and 0 < threshold, \
332+
f"A small +'ve ratio threshold of Signal energy (0,...) is required for {length}-bit entropy w/ {stride}-bit symbols eg. 300%, not {threshold=!r}"
333+
308334
dc = 0+0j
309335
strongest = None
310336
mags_all = []
@@ -336,6 +362,10 @@ def signal_entropy(
336362
snr_dB = into_dB( snr )
337363
if strongest and snr_dB <= strongest.dB:
338364
continue
365+
if snr_dB < 0:
366+
strongest = Signal( dB=snr_dB, stride=stride, symbols=symbols, offset=offset, indices=[], details='' )
367+
continue
368+
339369
# Find the strongest signal frequency bin, and the compute indices of the symbols. The
340370
# max frequency (last) bin indicates some pattern sensed in every second symbol (the
341371
# Nyquist rate, sampled at 2x the max frequency detectable). The min frequency (first)
@@ -368,43 +398,38 @@ def signal_entropy(
368398
for b in range( max_beat * stride ):
369399
indices.append( beat + b )
370400
'''
371-
# Draw the signal area of interest over 'symbols' of the 'stride'-bit symbols beginning at bit 'offset'. If the symbols are
372-
# ASCII, show that otherwise displays the 2-nibble hex value.
401+
# Draw the signal area of interest over 'symbols' of the 'stride'-bit symbols beginning at bit 'offset'.
373402
details = '\n'
374-
details += f"{entropy_bin}\n"
375-
details += ''.join(
403+
offclip = 0
404+
offpref = ''
405+
if offset > 8:
406+
details += f"...x{offset:<3}>{entropy_bin[offset:]}\n"
407+
offclip = offset
408+
offpref = ' ' * 8
409+
else:
410+
details += f"{entropy_bin}\n"
411+
offpref = ' ' * offset
412+
details += offpref + ''.join(
376413
( '^' if i in indices else '-_'[( i // stride ) % 2] ) if ( offset <= i < ( offset + symbols * stride )) else ' '
377-
for i in range( length )
414+
for i in range( offset, length )
378415
) + '\n'
379416

380-
# Output the decimal (if possible) and decoded view of the data
381-
def bin_decode( c, stride=8 ):
382-
if stride == 6:
383-
return f"{'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'[c]:<{stride}}"
384-
if stride == 5:
385-
return f"{'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'[c]:<{stride}}"
386-
if 4 >= stride >= 1:
387-
return f"{'0123456789abcdef'[c]:<{stride}}"
388-
if stride <= 8 and 32 <= c < 127:
389-
return f"{chr(c):<{stride}}"
390-
hex = f"{c:0{(stride+3)//4}}"
391-
return f"{hex:<{stride}}"
392417
if stride >= 4:
393-
details += ' ' * offset + ''.join(
418+
details += offpref + ''.join(
394419
f"{c:<{stride}}"
395420
for c in entropy_bin_ints( entropy_bin, offset=offset, symbols=symbols, stride=stride )
396421
) + ' decimal\n'
397-
details += ' ' * offset + ''.join(
398-
bin_decode( c, stride=stride )
422+
details += offpref + ''.join(
423+
int_decode( c, stride=stride )
399424
for c in entropy_bin_ints( entropy_bin, offset=offset, symbols=symbols, stride=stride )
400425
) + f" base-{2**(4 if stride >= 7 else stride)}" + ("/ASCII" if stride >= 7 else "") + " decoding\n"
401426

402427
# Select the one or two highest energy harmonics, and scale the waveform (which is
403-
# denominated in stride-bit chunks) to the maximum extent that will fit within the binary
404-
# version of the entropy. So, if the 8-bit signal values are being recovered, we can scale
405-
# the waveform by 8x. Also, scale the amplitude by the (removed) DC component; this
406-
# counteracts the reduction in dynamic range inherent to typical ASCII values (eg. the
407-
# digits consume 0-9 are only 8% of the full ASCII range).
428+
# denominated in stride-bit chunks) to the extent needed to cover the binary version of the
429+
# entropy (then decimate to fit exactly). So, if the 8-bit signal values are being
430+
# recovered, we can scale the waveform by 8x. Also, scale the amplitude by the (removed) DC
431+
# component; this counteracts the reduction in dynamic range inherent to typical ASCII
432+
# values (eg. the digits 0-9 are only 8% of the full ASCII range).
408433
dfts_rec = [0+0j] * len( dfts)
409434
harmonic = []
410435
for max_b, max_i in sorted( ( (b, i) for i,b in enumerate( mags ) ), reverse=True ):
@@ -438,16 +463,53 @@ def bin_decode( c, stride=8 ):
438463
#print( f" - {i=:3} --> {o=:3} ==> {sigs[o]:7}" )
439464
pos += signal_draw( sigs[o], pos=True )
440465
neg += signal_draw( sigs[o], neg=True )
441-
harmonic_dBs = [ f"{ordinal(h)} {into_dB(mags[h]/avg(mags)):.1f}dB" for h in harmonic ]
442-
details += f"{' ' * offset}{pos} {len(harmonic)} harmonics: {', '.join( harmonic_dBs )}\n"
443-
details += f"{' ' * offset}{neg} - every {', '.join( map( str, harmonic_freq ))} symbols\n"
466+
harmonic_dBs = [ f"{ordinal(h) if h else 'DC'} {into_dB(mags[h]/avg(mags)):.1f}dB" for h in harmonic ]
467+
details += offpref + f"{pos} {len(harmonic)} harmonics: {', '.join( harmonic_dBs )}\n"
468+
details += offpref + f"{neg} - " + f"{'DC offset and ' if 0 in harmonic else '' }" + f"every {', '.join( map( str, harmonic_freq ))} symbols\n"
444469

445470
strongest = Signal( dB=snr_dB, stride=stride, symbols=symbols, offset=offset, indices=indices, details=details )
446-
#print( f"strongest signal: {strongest}" )
447471

448472
mags_avgs = [sum(col)/len(mags_all) for col in zip(*mags_all)]
449473
#print( f"avgs: {' '.join( f'{m:{stride*2}.1f}' for m in mags_avgs )}: {sum(mags_avgs):7.2f}" )
450474
return strongest
475+
signal_entropy.signal_limits = {
476+
128: {
477+
False: {
478+
3: 3.6290539642295268,
479+
4: 3.507324239128957,
480+
5: 4.0,
481+
6: 4.0,
482+
7: 4.0,
483+
8: 4.0,
484+
},
485+
True: {
486+
3: 3.980779955578968,
487+
4: 3.757892208378965,
488+
5: 4.0,
489+
6: 4.0,
490+
7: 4.0,
491+
8: 4.0,
492+
}
493+
},
494+
256: {
495+
False: {
496+
3: 4.110541587940857,
497+
4: 3.552135847422123,
498+
5: 4.0,
499+
6: 4.0,
500+
7: 4.0,
501+
8: 4.0,
502+
},
503+
True: {
504+
3: 4.432511412288454,
505+
4: 4.096532008409936,
506+
5: 4.0,
507+
6: 4.0,
508+
7: 4.0,
509+
8: 4.0,
510+
}
511+
}
512+
}
451513

452514

453515
def shannon_entropy(
@@ -485,7 +547,7 @@ def shannon_entropy(
485547
except Exception:
486548
pass
487549
assert threshold and 0 < threshold < 1, \
488-
f"A small +'ve ratio threshold of Shannon entropy deficit > 0 is required for {length}-bit entropy w/ {stride}-bit symbols eg. 10%, not {threshold=!r}"
550+
f"A small +'ve ratio threshold of Shannon entropy deficit (0, 1] is required for {length}-bit entropy w/ {stride}-bit symbols eg. 10%, not {threshold=!r}"
489551

490552
# Find all the unique n-bit symbols in the entropy at the desired offset(s)
491553
strongest = None
@@ -615,11 +677,28 @@ def shannon_entropy(
615677
def analyze_entropy(
616678
entropy: bytes,
617679
strides: Optional[Union[int,Tuple[int,int]]] = None, # If only a specific stride/s makes sense, eg. for ASCII symbols
618-
symbols: int = 16,
619680
overlap: bool = True,
620681
ignore_dc: bool = False,
621682
) -> Optional[str]:
622-
"""Analyzes the provided entropy. If patterns are found, reports the findings."""
683+
"""Analyzes the provided entropy. If patterns are found, reports the findings; the peak Signal
684+
and the aggregate report: (Signal, "...").
685+
686+
Seek strong Signals or weak Shannon Entropy, across a number of different interpretations (bit
687+
strides and overlaps) of the entropy data. We do not know where poor entropy may hide...
688+
689+
Since the probability of a signal being found is:
690+
691+
P( A or B or ... or Z ) = P(A) + P(B) + ... + P(Z) - P(A and B and ... Z )
692+
= P( A or B or ... or Z ) = P(A) + P(B) + ... + P(Z) - ( P(A) * P(B) * ... P(Z) )
693+
694+
since we set the {signal,shannon}_entropy threshold values at ~ <1%, we know the P(A and B...)
695+
term is very small; the probability we'll find an entropy failure it basically is the sum of the
696+
individual probabilities of each test failing.
697+
698+
So, if we try 6 of each analysis, that's 12 * 1% =~= 12%. We want a total failure of about 1%,
699+
so we must target a 0.1% failure on good entropy for each test.
700+
701+
"""
623702
if strides is None:
624703
strides = (3, 9)
625704
else:
@@ -628,11 +707,11 @@ def analyze_entropy(
628707
except TypeError:
629708
strides = (int(strides), int(strides)+1)
630709

631-
# For signal analysis, we limit ourself to symbols-sized groups of stride-bit symbols
710+
# For signal analysis, we use the default <= ~128-bit groups of stride-bit symbols
632711
signals = sorted(
633712
(
634713
s for s in (
635-
signal_entropy( entropy, stride=stride, symbols=symbols, overlap=overlap, ignore_dc=ignore_dc )
714+
signal_entropy( entropy, stride=stride, overlap=overlap, ignore_dc=ignore_dc )
636715
for stride in range( *strides )
637716
)
638717
if s.dB >= 0
@@ -654,17 +733,22 @@ def analyze_entropy(
654733

655734
result = None
656735
if signals or shannons:
657-
result = ''
736+
report = ''
658737
if signals:
659-
result += f"Signal analysis indicates {len(signals)} non-random energy pattern in"
660-
result += f" {', '.join( map( str, (s.stride for s in signals))) }-bit symbols:\n"
661-
result += '\n'.join( s.details for s in signals )
738+
report += f"Signal analysis indicates {len(signals)} non-random energy pattern in"
739+
report += f" {', '.join( map( str, (s.stride for s in signals))) }-bit symbols:\n"
740+
for s in signals:
741+
report += f"\n - A {s.dB:.1f}dB Signal artifact at bit {s.offset:3} in {s.symbols:2} x {s.stride}-bit symbols:\n"
742+
report += f"{s.details}\n"
662743
if signals and shannons:
663-
result += '\n'
744+
report += '\n'
664745
if shannons:
665-
result += f"Shannon entropy indicates {len(shannons)} unexpectedly large numbers of some"
666-
result += f" {', '.join( map( str, (s.stride for s in shannons))) }-bit symbols:\n"
667-
result += '\n'.join( s.details for s in shannons )
746+
report += f"Shannon entropy indicates {len(shannons)} unexpectedly non-random distribution of some"
747+
report += f" {', '.join( map( str, (s.stride for s in shannons))) }-bit symbols:\n"
748+
for s in shannons:
749+
report += f"\n - A {s.dB:.1f}dB Shannon entropy deficit over {s.stride}-bit symbols:\n"
750+
report += f"{s.details}\n"
751+
result = report
668752

669753
return result
670754

0 commit comments

Comments
 (0)