Skip to content

Commit 455e627

Browse files
committed
1.1.0: Added autosaving
Enabling autosaving to disk in a user-defined time interval. The file content can be read using a new function "recover". With each autosave, an automatic backup file (filename + ".tmp") is created. that is used during recovery if the main file is broken.
1 parent f882a54 commit 455e627

File tree

5 files changed

+192
-12
lines changed

5 files changed

+192
-12
lines changed

example.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import faultguard
22
import numpy as np
3+
import os
4+
import time
35

46
def launch(faultguard_data, args):
57
"""
@@ -16,6 +18,7 @@ def launch(faultguard_data, args):
1618

1719
# Some dummy important data manipulation
1820
for i in range(10):
21+
time.sleep(1)
1922
important_data_1[i%3] = i
2023
important_data_2 += str(i)
2124
print("important_data_1:", important_data_1)
@@ -29,7 +32,7 @@ def launch(faultguard_data, args):
2932
if i == 7:
3033
import ctypes
3134
ctypes.string_at(0)
32-
35+
3336
def rescue(faultguard_data, exit_code, args):
3437
"""
3538
Demo rescue handler
@@ -40,6 +43,9 @@ def rescue(faultguard_data, exit_code, args):
4043
"""
4144
print("Fault occured. Exit code: {}. Rescued data:".format(exit_code))
4245

46+
recover(faultguard_data)
47+
48+
def recover(faultguard_data):
4349
# Check if fault occurs before data was initialized
4450
if "important_data_1" not in faultguard_data or "important_data_2" not in faultguard_data:
4551
return
@@ -51,9 +57,17 @@ def rescue(faultguard_data, exit_code, args):
5157
# You might need to assign the class here by important_data_1.__class__ = ...
5258
print("important_data_1:", important_data_1)
5359
print("important_data_2:", important_data_2)
54-
55-
def main():
56-
faultguard.start(launch, rescue, args=("Hello", "World"))
60+
61+
def main(use_autosave=True):
62+
if use_autosave:
63+
if os.path.isfile("test.tmp.xz"):
64+
print("Autosave exists:")
65+
faultguard.recover(recover, "test.tmp.xz")
66+
os.remove("test.tmp.xz")
67+
68+
faultguard.start(launch, rescue, args=("Hello", "World"), autosave_interval=3, autosave_file="test.tmp.xz")
69+
else:
70+
faultguard.start(launch, rescue, args=("Hello", "World"))
5771

5872
if __name__ == "__main__":
5973
main()

faultguard.py

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,81 @@ def wrapped_launch(launch, managed_dict, signal_handlers, args):
3232
# Attach signal handlers
3333
for sig in signal_handlers:
3434
signal.signal(sig, signal_handlers[sig])
35-
35+
3636
faultguard_data = FaultguardDict(managed_dict)
3737
if args is None:
3838
launch(faultguard_data)
3939
else:
4040
launch(faultguard_data, args)
4141

42-
def start(launch, rescue, args = None):
42+
def recover(rescue, autosave_file=None):
43+
"""
44+
Load the given faultguard data dictionary from an autosave file and pass it to a rescue function.
45+
46+
:param rescue: The method to call with the recovered faultguard data dictionary.
47+
:param autosave_file: The file to recover the data from.
48+
:returns: Whether the main (0) or the backup file (1) was used for recovery
49+
"""
50+
# Compression library
51+
import lzma
52+
import os
53+
54+
success = True
55+
try:
56+
with lzma.open(autosave_file, "r") as f:
57+
faultguard_data = FaultguardDict(pickle.load(f))
58+
except Exception as e:
59+
print("The following issue occured during recovery:", e)
60+
success = False
61+
62+
if success:
63+
rescue(faultguard_data)
64+
return 0
65+
66+
if not os.path.isfile(autosave_file + ".tmp"):
67+
raise RuntimeError("Recovery unsuccessful.")
68+
69+
print("Switching to try recovery of backup file")
70+
71+
with lzma.open(autosave_file + ".tmp", "r") as f:
72+
faultguard_data = FaultguardDict(pickle.load(f))
73+
74+
rescue(faultguard_data)
75+
return 1
76+
77+
def start(launch, rescue, args=None, autosave_interval=None, autosave_file=None):
4378
"""
4479
Start application through faultguard.
4580
4681
Launch and rescue have access to the same dictionary. Each entry in this dictionary is stored as serialized data using the python internal 'pickle' method. The "launch" method runs in a seperate process so a fault in that process should not affect the data stored in the dictionary.
4782
83+
If the autosave parameters are set, the dictionary is compressed and saved in the specified time interval to the specified path. Throws an error if the autosave file already exists. After successful exit of the monitored application, the autosave file is deleted.
84+
4885
:param launch: The applications main method. Accepts faultguard data dictionary as first and args (if not None) as second parameter.
4986
:param rescue: The method to call on a fault. Accepts faultguard data dictionary as first and args (if not None) as second parameter.
5087
:param args: Data passed to launch and rescue methods.
88+
:param autosave_interval: Time in seconds between each autosave of the `faultguard` dictionary.
89+
:param autosave_file: Path to file to use for autosaves.
5190
:returns: The applications exit code.
5291
"""
5392

93+
# Ensure valid parameters
94+
if autosave_interval is not None or autosave_file is not None:
95+
if autosave_interval is None or autosave_file is None:
96+
raise TypeError("Only one of the arguments 'autosave_interval' and 'autosave_file' is defined")
97+
98+
import os
99+
100+
if os.path.isfile(autosave_file):
101+
raise RuntimeError("The given autosave file already exists")
102+
103+
with open(autosave_file, "w") as f:
104+
if not f.writable():
105+
raise RuntimeError("The given autosave file is not writable")
106+
107+
if os.path.isfile(autosave_file + ".tmp"):
108+
os.remove(autosave_file + ".tmp")
109+
54110
# Detach signal handlers from faultguard process
55111
# Ensures faultguard does not interfere with signals like SIGINT
56112
orig_handlers = {}
@@ -71,17 +127,46 @@ def start(launch, rescue, args = None):
71127

72128
# Run process
73129
p.start()
74-
p.join()
130+
131+
if autosave_interval is None:
132+
p.join()
133+
else:
134+
# Compression library
135+
import lzma
136+
137+
while p.is_alive():
138+
# Wait for next autosave
139+
p.join(autosave_interval)
140+
141+
# Autosave
142+
if os.path.isfile(autosave_file + ".tmp"):
143+
os.remove(autosave_file + ".tmp")
144+
os.rename(autosave_file, autosave_file + ".tmp")
145+
146+
with lzma.open(autosave_file, "w") as f:
147+
pickle.dump(dict(managed_dict), f)
148+
149+
# Close Manager process
150+
# If this is not done and the faultguard process is terminated, the Manager process
151+
# would keep running.
152+
if p.exitcode != 0:
153+
faultguard_data = FaultguardDict(dict(managed_dict))
154+
manager.shutdown()
75155

76156
# Re-attach signal handlers
77157
for sig in orig_handlers:
78158
signal.signal(sig, orig_handlers[sig])
79159

80160
if p.exitcode != 0:
81-
faultguard_data = FaultguardDict(managed_dict)
82161
if args is None:
83162
rescue(faultguard_data, p.exitcode)
84163
else:
85164
rescue(faultguard_data, p.exitcode, args)
86-
165+
166+
if autosave_interval is not None and os.path.isfile(autosave_file):
167+
# Remove autosave file
168+
os.remove(autosave_file)
169+
if os.path.isfile(autosave_file + ".tmp"):
170+
os.remove(autosave_file + ".tmp")
171+
87172
return p.exitcode

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='faultguard',
9-
version='1.0.3.post1',
9+
version='1.1.0',
1010
py_modules=['faultguard'],
1111
url='https://github.com/2xB/faultguard',
1212
license='BSD 3-Clause License',

test_autosave.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from multiprocessing import Process
2+
import multiprocessing
3+
import faultguard
4+
import numpy as np
5+
import time
6+
import sys
7+
import os
8+
9+
def launch(faultguard_data, args):
10+
"""
11+
Demo software main method
12+
13+
:param faultguard_data: Faultguard data dictionary
14+
:param args: Data passed from faultguard.start.
15+
"""
16+
print("Launching demo")
17+
print(multiprocessing.current_process())
18+
19+
# Some important data
20+
important_data_1 = np.array([1,2,3])
21+
important_data_2 = args[0] + " " + args[1]
22+
23+
# Some dummy important data manipulation
24+
for i in range(1):
25+
important_data_1[i%3] = i
26+
important_data_2 += str(i)
27+
print("important_data_1:", important_data_1)
28+
print("important_data_2:", important_data_2)
29+
30+
# Sending important data to faultguard process
31+
faultguard_data["important_data_1"] = important_data_1
32+
faultguard_data["important_data_2"] = important_data_2
33+
time.sleep(4)
34+
35+
def rescue(faultguard_data, exit_code, args):
36+
raise RuntimeError("Rescue handler was triggered unexpectedly...")
37+
38+
def run_test():
39+
# Run test with long autosave interval to be independent on startup times
40+
faultguard.start(launch, rescue, args=("Hello", "World"), autosave_interval=1, autosave_file="test.tmp.xz")
41+
42+
def recover(faultguard_data):
43+
important_data_1 = faultguard_data["important_data_1"]
44+
important_data_2 = faultguard_data["important_data_2"]
45+
46+
# You might need to assign the class here by important_data_1.__class__ = ...
47+
print("important_data_1:", important_data_1)
48+
print("important_data_2:", important_data_2)
49+
assert np.all(important_data_1 == [0, 2, 3])
50+
assert important_data_2 == "Hello World0"
51+
52+
def test_main():
53+
# Prepare test environment
54+
if os.path.isfile("test.tmp.xz"):
55+
os.remove("test.tmp.xz")
56+
57+
p = Process(target=run_test)
58+
59+
# Run process
60+
p.start()
61+
p.join(2)
62+
os.rename("test.tmp.xz", "test.tmp.xz.backup")
63+
p.join()
64+
65+
os.rename("test.tmp.xz.backup", "test.tmp.xz")
66+
assert faultguard.recover(recover, "test.tmp.xz") == 0
67+
68+
os.rename("test.tmp.xz", "test.tmp.xz.tmp")
69+
with open("test.tmp.xz", "w") as f:
70+
f.write("Test")
71+
assert faultguard.recover(recover, "test.tmp.xz") == 1

test_example.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import faultguard
22
import numpy as np
3+
import os
34

45
def launch(faultguard_data, args):
56
"""
@@ -42,7 +43,7 @@ def rescue(faultguard_data, exit_code, args):
4243

4344
# Check if fault occurs before data was initialized
4445
if "important_data_1" not in faultguard_data or "important_data_2" not in faultguard_data:
45-
return
46+
raise RuntimeError("Faultguard dict content missing")
4647

4748
# Restore data
4849
important_data_1 = faultguard_data["important_data_1"]
@@ -54,5 +55,14 @@ def rescue(faultguard_data, exit_code, args):
5455
assert np.all(important_data_1 == [6, 7, 5])
5556
assert important_data_2 == "Hello World01234567"
5657

57-
def test_main():
58+
def test_main_autosave():
59+
if os.path.isfile("test2.tmp.xz"):
60+
os.remove("test2.tmp.xz")
61+
62+
faultguard.start(launch, rescue, args=("Hello", "World"), autosave_interval=3, autosave_file="test2.tmp.xz")
63+
64+
assert not os.path.isfile("test2.tmp.xz")
65+
assert not os.path.isfile("test2.tmp.xz.tmp")
66+
67+
def test_main_no_autosave():
5868
faultguard.start(launch, rescue, args=("Hello", "World"))

0 commit comments

Comments
 (0)