Skip to content

Commit 73cc854

Browse files
shohamd4mrkbacvram0gh2Fizzadarsengo4hd
authored
api+cli: add retry global arguments
Co-authored-by: Marko Bausch <[email protected]> Co-authored-by: vram0gh2 <[email protected]> Co-authored-by: Nick Mills-Barrett <[email protected]> Co-authored-by: Samuel Enguehard <[email protected]> Co-authored-by: Thomas Dendale <[email protected]>
1 parent 5a3c924 commit 73cc854

File tree

12 files changed

+712
-59
lines changed

12 files changed

+712
-59
lines changed

docs/cli.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,21 @@ By default pyinfra only prints high level information (this host connected, this
4343
+ `-vv`: as above plus print shell input to the remote host
4444
+ `-vvv` as above plus print shell output from the remote host
4545

46+
### Retry Options
47+
48+
pyinfra supports automatic retry of failed operations via CLI options:
49+
50+
+ `--retry N`: Retry failed operations up to N times (default: 0)
51+
+ `--retry-delay N`: Wait N seconds between retry attempts (default: 5)
52+
53+
```sh
54+
# Retry failed operations up to 3 times with default 5 second delay
55+
pyinfra inventory.py deploy.py --retry 3
56+
57+
# Retry with custom delay
58+
pyinfra inventory.py deploy.py --retry 2 --retry-delay 10
59+
```
60+
4661

4762
## Inventory
4863

docs/faq.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,32 @@ Use the LINK ``files.file``, ``files.directory`` or ``files.link`` operations to
4949
group="pyinfra",
5050
mode=644,
5151
)
52+
53+
How do I handle unreliable operations or network issues?
54+
--------------------------------------------------------
55+
56+
Use the `retry behavior arguments <arguments.html#retry-behavior>`_ to automatically retry failed operations. This is especially useful for network operations or services that may be temporarily unavailable:
57+
58+
.. code:: python
59+
60+
# Retry a network operation up to 3 times
61+
server.shell(
62+
name="Download file with retries",
63+
commands=["wget https://example.com/file.zip"],
64+
_retries=3,
65+
_retry_delay=5, # wait 5 seconds between retries
66+
)
67+
68+
# Use custom retry logic for specific error conditions
69+
def should_retry_download(output_data):
70+
# Retry only on temporary network errors, not permanent failures
71+
stderr_text = " ".join(output_data["stderr_lines"]).lower()
72+
temporary_errors = ["timeout", "connection refused", "temporary failure"]
73+
return any(error in stderr_text for error in temporary_errors)
74+
75+
server.shell(
76+
name="Download with smart retry logic",
77+
commands=["wget https://example.com/large-file.zip"],
78+
_retries=3,
79+
_retry_until=should_retry_download,
80+
)

docs/using-operations.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,45 @@ Global arguments are covered in detail here: :doc:`arguments`. There is a set of
4949
_sudo_user="pyinfra",
5050
)
5151
52+
Retry Functionality
53+
-------------------
54+
55+
Operations can be configured to retry automatically on failure using retry arguments:
56+
57+
.. code:: python
58+
59+
from pyinfra.operations import server
60+
61+
# Retry a flaky command up to 3 times with default 5 second delay
62+
server.shell(
63+
name="Download file with retries",
64+
commands=["curl -o /tmp/file.tar.gz https://example.com/file.tar.gz"],
65+
_retries=3,
66+
)
67+
68+
# Retry with custom delay between attempts
69+
server.shell(
70+
name="Check service status with retries",
71+
commands=["systemctl is-active myservice"],
72+
_retries=2,
73+
_retry_delay=10, # 10 second delay between retries
74+
)
75+
76+
# Use custom retry condition to control when to retry
77+
def retry_on_network_error(output_data):
78+
# Retry if stderr contains network-related errors
79+
for line in output_data["stderr_lines"]:
80+
if any(keyword in line.lower() for keyword in ["network", "timeout", "connection"]):
81+
return True
82+
return False
83+
84+
server.shell(
85+
name="Network operation with conditional retry",
86+
commands=["wget https://example.com/large-file.zip"],
87+
_retries=5,
88+
_retry_until=retry_on_network_error,
89+
)
90+
5291
5392
The ``host`` Object
5493
-------------------

pyinfra/api/arguments.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ class ConnectorArguments(TypedDict, total=False):
7272
_get_pty: bool
7373
_stdin: Union[str, Iterable[str]]
7474

75+
# Retry arguments
76+
_retries: int
77+
_retry_delay: Union[int, float]
78+
_retry_until: Optional[Callable[[dict], bool]]
79+
7580

7681
def generate_env(config: "Config", value: dict) -> dict:
7782
env = config.ENV.copy()
@@ -232,11 +237,28 @@ def all_global_arguments() -> List[tuple[str, Type]]:
232237
return list(get_type_hints(AllArguments).items())
233238

234239

240+
# Create a dictionary for retry arguments
241+
retry_argument_meta: dict[str, ArgumentMeta] = {
242+
"_retries": ArgumentMeta(
243+
"Number of times to retry failed operations.",
244+
default=lambda config: config.RETRY,
245+
),
246+
"_retry_delay": ArgumentMeta(
247+
"Delay in seconds between retry attempts.",
248+
default=lambda config: config.RETRY_DELAY,
249+
),
250+
"_retry_until": ArgumentMeta(
251+
"Callable taking output data that returns True to continue retrying.",
252+
default=lambda config: None,
253+
),
254+
}
255+
235256
all_argument_meta: dict[str, ArgumentMeta] = {
236257
**auth_argument_meta,
237258
**shell_argument_meta,
238259
**meta_argument_meta,
239260
**execution_argument_meta,
261+
**retry_argument_meta, # Add retry arguments
240262
}
241263

242264
EXECUTION_KWARG_KEYS = list(ExecutionArguments.__annotations__.keys())
@@ -286,6 +308,45 @@ def all_global_arguments() -> List[tuple[str, Type]]:
286308
),
287309
"Operation meta & callbacks": (meta_argument_meta, "", ""),
288310
"Execution strategy": (execution_argument_meta, "", ""),
311+
"Retry behavior": (
312+
retry_argument_meta,
313+
"""
314+
Retry arguments allow you to automatically retry operations that fail. You can specify
315+
how many times to retry, the delay between retries, and optionally a condition
316+
function to determine when to stop retrying.
317+
""",
318+
"""
319+
.. code:: python
320+
321+
# Retry a command up to 3 times with the default 5 second delay
322+
server.shell(
323+
name="Run flaky command with retries",
324+
commands=["flaky_command"],
325+
_retries=3,
326+
)
327+
# Retry with a custom delay
328+
server.shell(
329+
name="Run flaky command with custom delay",
330+
commands=["flaky_command"],
331+
_retries=2,
332+
_retry_delay=10, # 10 second delay between retries
333+
)
334+
# Retry with a custom condition
335+
def retry_on_specific_error(output_data):
336+
# Retry if stderr contains "temporary failure"
337+
for line in output_data["stderr_lines"]:
338+
if "temporary failure" in line.lower():
339+
return True
340+
return False
341+
342+
server.shell(
343+
name="Run command with conditional retry",
344+
commands=["flaky_command"],
345+
_retries=5,
346+
_retry_until=retry_on_specific_error,
347+
)
348+
""",
349+
),
289350
}
290351

291352

pyinfra/api/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ class ConfigDefaults:
5353
IGNORE_ERRORS: bool = False
5454
# Shell to use to execute commands
5555
SHELL: str = "sh"
56+
# Number of times to retry failed operations
57+
RETRY: int = 0
58+
# Delay in seconds between retry attempts
59+
RETRY_DELAY: int = 5
5660

5761

5862
config_defaults = {key: value for key, value in ConfigDefaults.__dict__.items() if key.isupper()}

pyinfra/api/operation.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ class OperationMeta:
4747
_commands: Optional[list[Any]] = None
4848
_maybe_is_change: Optional[bool] = None
4949
_success: Optional[bool] = None
50+
_retry_attempts: int = 0
51+
_max_retries: int = 0
52+
_retry_succeeded: Optional[bool] = None
5053

5154
def __init__(self, hash, is_change: Optional[bool]):
5255
self._hash = hash
@@ -59,9 +62,17 @@ def __repr__(self) -> str:
5962
"""
6063

6164
if self._commands is not None:
65+
retry_info = ""
66+
if self._retry_attempts > 0:
67+
retry_result = "succeeded" if self._retry_succeeded else "failed"
68+
retry_info = (
69+
f", retries={self._retry_attempts}/{self._max_retries} ({retry_result})"
70+
)
71+
6272
return (
6373
"OperationMeta(executed=True, "
64-
f"success={self.did_succeed()}, hash={self._hash}, commands={len(self._commands)})"
74+
f"success={self.did_succeed()}, hash={self._hash}, "
75+
f"commands={len(self._commands)}{retry_info})"
6576
)
6677
return (
6778
"OperationMeta(executed=False, "
@@ -74,12 +85,20 @@ def set_complete(
7485
success: bool,
7586
commands: list[Any],
7687
combined_output: "CommandOutput",
88+
retry_attempts: int = 0,
89+
max_retries: int = 0,
7790
) -> None:
7891
if self.is_complete():
7992
raise RuntimeError("Cannot complete an already complete operation")
8093
self._success = success
8194
self._commands = commands
8295
self._combined_output = combined_output
96+
self._retry_attempts = retry_attempts
97+
self._max_retries = max_retries
98+
99+
# Determine if operation succeeded after retries
100+
if retry_attempts > 0:
101+
self._retry_succeeded = success
83102

84103
def is_complete(self) -> bool:
85104
return self._success is not None
@@ -150,6 +169,40 @@ def stdout(self) -> str:
150169
def stderr(self) -> str:
151170
return "\n".join(self.stderr_lines)
152171

172+
@property
173+
def retry_attempts(self) -> int:
174+
return self._retry_attempts
175+
176+
@property
177+
def max_retries(self) -> int:
178+
return self._max_retries
179+
180+
@property
181+
def was_retried(self) -> bool:
182+
"""
183+
Returns whether this operation was retried at least once.
184+
"""
185+
return self._retry_attempts > 0
186+
187+
@property
188+
def retry_succeeded(self) -> Optional[bool]:
189+
"""
190+
Returns whether this operation succeeded after retries.
191+
Returns None if the operation was not retried.
192+
"""
193+
return self._retry_succeeded
194+
195+
def get_retry_info(self) -> dict[str, Any]:
196+
"""
197+
Returns a dictionary with all retry-related information.
198+
"""
199+
return {
200+
"retry_attempts": self._retry_attempts,
201+
"max_retries": self._max_retries,
202+
"was_retried": self.was_retried,
203+
"retry_succeeded": self._retry_succeeded,
204+
}
205+
153206

154207
def add_op(state: State, op_func, *args, **kwargs):
155208
"""

0 commit comments

Comments
 (0)