Skip to content

Commit 224b8a2

Browse files
committed
delta encoder
1 parent d2c0b6a commit 224b8a2

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

delta_encoder.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from copy import copy
2+
import zlib
3+
import sys
4+
5+
sys.set_int_max_str_digits(10000)
6+
7+
def readfile(filename):
8+
with open(filename,"rb") as fp:
9+
return fp.read()
10+
11+
12+
from copy import copy
13+
14+
def encode_m1(data):
15+
"""Returns indices of negative values in data."""
16+
return [i for i, x in enumerate(data) if x < 0]
17+
18+
def apply_m1_delta(delta, m1):
19+
"""Applies sign inversion at specified indices."""
20+
delta2 = copy(delta)
21+
for i in m1:
22+
delta2[i] *= -1
23+
return delta2
24+
25+
def delta_encode(data):
26+
"""Returns absolute deltas and the m1 sign index."""
27+
if not data:
28+
return [], []
29+
deltas = [data[0]]
30+
for i in range(1, len(data)):
31+
deltas.append(data[i] - data[i - 1])
32+
m1 = encode_m1(deltas)
33+
abs_deltas = [abs(x) for x in deltas]
34+
return abs_deltas, m1
35+
36+
def delta_decode(abs_deltas, m1):
37+
"""Decodes absolute deltas with sign correction."""
38+
if not abs_deltas:
39+
return []
40+
corrected = apply_m1_delta(abs_deltas, m1)
41+
data = [corrected[0]]
42+
for i in range(1, len(corrected)):
43+
data.append(data[-1] + corrected[i])
44+
return data
45+
46+
from copy import copy
47+
48+
def encode_m1(data):
49+
"""Returns indices of negative values in data."""
50+
return [i for i, x in enumerate(data) if x < 0]
51+
52+
def apply_m1_delta(delta, m1):
53+
"""Applies sign inversion at specified indices."""
54+
delta2 = copy(delta)
55+
for i in m1:
56+
delta2[i] *= -1
57+
return delta2
58+
59+
def to_integer(digits, base):
60+
"""Encodes a list of digits into a single integer in given base."""
61+
result = 0
62+
for d in digits:
63+
result = result * base + d
64+
return result
65+
66+
def from_integer(value, base, length):
67+
"""Decodes a base-encoded integer back into a list of digits of given length."""
68+
digits = []
69+
for _ in range(length):
70+
digits.append(value % base)
71+
value //= base
72+
return list(reversed(digits))
73+
74+
def delta_encode_to_integers(data):
75+
"""Encodes data into abs_deltas and m1 as integers."""
76+
if not data:
77+
return 0, 0, 1, 1, 0 # dummy values
78+
deltas = [data[0]]
79+
for i in range(1, len(data)):
80+
deltas.append(data[i] - data[i - 1])
81+
m1 = encode_m1(deltas)
82+
abs_deltas = [abs(x) for x in deltas]
83+
84+
base_delta = max(abs_deltas) + 1
85+
base_m1 = (max(m1) + 1) if m1 else 1
86+
87+
encoded_delta = to_integer(abs_deltas, base_delta)
88+
encoded_m1 = to_integer(m1, base_m1) if m1 else 0
89+
90+
return encoded_delta, encoded_m1, base_delta, base_m1, len(data)
91+
92+
def delta_decode_from_integers(encoded_delta, encoded_m1, base_delta, base_m1, length):
93+
"""Decodes data from encoded integers."""
94+
abs_deltas = from_integer(encoded_delta, base_delta, length)
95+
m1 = from_integer(encoded_m1, base_m1, len([i for i in abs_deltas if i != 0])) if encoded_m1 else []
96+
corrected = apply_m1_delta(abs_deltas, m1)
97+
data = [corrected[0]]
98+
for i in range(1, len(corrected)):
99+
data.append(data[-1] + corrected[i])
100+
return data
101+
102+
103+
104+
data = readfile(sys.argv[1])
105+
106+
encoded_delta, encoded_m1, base_d, base_m1, length = delta_encode_to_integers(data)
107+
decoded = delta_decode_from_integers(encoded_delta, encoded_m1, base_d, base_m1, length)
108+
109+
def int_to_bytes(n):
110+
length = (n.bit_length() + 7) // 8 or 1
111+
return n.to_bytes(length, 'big')
112+
113+
def bytes_to_int(b):
114+
return int.from_bytes(b, 'big')
115+
116+
def write_encoded_to_file(filename, encoded_delta, encoded_m1):
117+
delta_bytes = int_to_bytes(encoded_delta)
118+
m1_bytes = int_to_bytes(encoded_m1)
119+
120+
with open(filename, 'wb') as f:
121+
# Write lengths first so you can parse later
122+
f.write(len(delta_bytes).to_bytes(2, 'big')) # 2 bytes = max 65535
123+
f.write(len(m1_bytes).to_bytes(2, 'big'))
124+
f.write(delta_bytes)
125+
f.write(m1_bytes)
126+
127+
128+
129+
130+
write_encoded_to_file("delta.bin",encoded_delta,encoded_m1)
131+

0 commit comments

Comments
 (0)