Skip to content

Commit 1bf7214

Browse files
mprycclaude
andcommitted
Add --request-timeout flag with TCP dial support for all commands
This commit adds a --request-timeout flag (following kubectl conventions) to all OADP CLI commands, with full support for cluster unreachability: - Add --request-timeout flag to nonadmin backup logs and describe commands (takes precedence over OADP_CLI_REQUEST_TIMEOUT env var) - Add timeoutFactory wrapper that applies dial timeout to all Velero commands by overriding all client-creating methods (KubeClient, DynamicClient, DiscoveryClient, KubebuilderClient, KubebuilderWatchClient) - Add renameTimeoutFlag to rename Velero's --timeout to --request-timeout for consistent kubectl-style CLI experience - Set custom net.Dialer with timeout to handle TCP dial timeouts - Add context-based timeout handling with proper cancellation detection - Add FormatDownloadRequestTimeoutError for helpful timeout diagnostics - Add tests for timeout functionality (global timeout, config application, dialer timeout behavior) The timeout now applies to both HTTP requests and TCP connection attempts across all commands, ensuring the CLI times out quickly when the cluster is unreachable instead of waiting for the default ~30s TCP timeout. Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: Michal Pryc <[email protected]>
1 parent 2dc20a5 commit 1bf7214

File tree

7 files changed

+684
-59
lines changed

7 files changed

+684
-59
lines changed

cmd/non-admin/backup/describe.go

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,35 +18,52 @@ import (
1818
)
1919

2020
func NewDescribeCommand(f client.Factory, use string) *cobra.Command {
21+
var requestTimeout time.Duration
22+
2123
c := &cobra.Command{
2224
Use: use + " NAME",
2325
Short: "Describe a non-admin backup",
2426
Args: cobra.ExactArgs(1),
2527
RunE: func(cmd *cobra.Command, args []string) error {
2628
backupName := args[0]
2729

30+
// Get effective timeout (flag takes precedence over env var)
31+
effectiveTimeout := shared.GetHTTPTimeoutWithOverride(requestTimeout)
32+
33+
// Create context with the effective timeout
34+
ctx, cancel := context.WithTimeout(context.Background(), effectiveTimeout)
35+
defer cancel()
36+
2837
// Get the current namespace from kubectl context
2938
userNamespace, err := shared.GetCurrentNamespace()
3039
if err != nil {
3140
return fmt.Errorf("failed to determine current namespace: %w", err)
3241
}
3342

34-
// Create client with required scheme types
43+
// Create client with required scheme types and timeout
3544
kbClient, err := shared.NewClientWithScheme(f, shared.ClientOptions{
3645
IncludeNonAdminTypes: true,
3746
IncludeVeleroTypes: true,
3847
IncludeCoreTypes: true,
48+
Timeout: effectiveTimeout,
3949
})
4050
if err != nil {
4151
return err
4252
}
4353

4454
// Get the specific backup
4555
var nab nacv1alpha1.NonAdminBackup
46-
if err := kbClient.Get(context.Background(), kbclient.ObjectKey{
56+
if err := kbClient.Get(ctx, kbclient.ObjectKey{
4757
Namespace: userNamespace,
4858
Name: backupName,
4959
}, &nab); err != nil {
60+
// Check for context cancellation
61+
if ctx.Err() == context.DeadlineExceeded {
62+
return fmt.Errorf("timed out after %v getting NonAdminBackup %q", effectiveTimeout, backupName)
63+
}
64+
if ctx.Err() == context.Canceled {
65+
return fmt.Errorf("operation cancelled: %w", ctx.Err())
66+
}
5067
return fmt.Errorf("NonAdminBackup %q not found in namespace %q: %w", backupName, userNamespace, err)
5168
}
5269

@@ -55,9 +72,12 @@ func NewDescribeCommand(f client.Factory, use string) *cobra.Command {
5572

5673
return nil
5774
},
58-
Example: ` kubectl oadp nonadmin backup describe my-backup`,
75+
Example: ` kubectl oadp nonadmin backup describe my-backup
76+
kubectl oadp nonadmin backup describe my-backup --request-timeout=30m`,
5977
}
6078

79+
c.Flags().DurationVar(&requestTimeout, "request-timeout", 0, fmt.Sprintf("The length of time to wait before giving up on a single server request (e.g., 30s, 5m, 1h). Overrides %s env var. Default: %v", shared.TimeoutEnvVar, shared.DefaultHTTPTimeout))
80+
6181
output.BindFlags(c.Flags())
6282
output.ClearOutputFlagDefault(c)
6383

@@ -349,9 +369,16 @@ func colorizePhase(phase string) string {
349369
}
350370

351371
// NonAdminDescribeBackup mirrors Velero's output.DescribeBackup functionality
352-
// but works within non-admin RBAC boundaries using NonAdminDownloadRequest
353-
func NonAdminDescribeBackup(cmd *cobra.Command, kbClient kbclient.Client, nab *nacv1alpha1.NonAdminBackup, userNamespace string) error {
354-
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
372+
// but works within non-admin RBAC boundaries using NonAdminDownloadRequest.
373+
// The timeout parameter controls how long to wait for download requests to complete.
374+
// If timeout is 0, DefaultOperationTimeout is used.
375+
func NonAdminDescribeBackup(cmd *cobra.Command, kbClient kbclient.Client, nab *nacv1alpha1.NonAdminBackup, userNamespace string, timeout time.Duration) error {
376+
// Use provided timeout or fall back to default
377+
if timeout == 0 {
378+
timeout = shared.DefaultOperationTimeout
379+
}
380+
381+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
355382
defer cancel()
356383

357384
// Print basic backup information
@@ -401,39 +428,43 @@ func NonAdminDescribeBackup(cmd *cobra.Command, kbClient kbclient.Client, nab *n
401428

402429
// Get backup results using NonAdminDownloadRequest (most important data)
403430
if results, err := shared.ProcessDownloadRequest(ctx, kbClient, shared.DownloadRequestOptions{
404-
BackupName: veleroBackupName,
405-
DataType: "BackupResults",
406-
Namespace: userNamespace,
431+
BackupName: veleroBackupName,
432+
DataType: "BackupResults",
433+
Namespace: userNamespace,
434+
HTTPTimeout: timeout,
407435
}); err == nil {
408436
fmt.Fprintf(cmd.OutOrStdout(), "\nBackup Results:\n")
409437
fmt.Fprintf(cmd.OutOrStdout(), "%s", indent(results, " "))
410438
}
411439

412440
// Get backup details using NonAdminDownloadRequest for BackupResourceList
413441
if resourceList, err := shared.ProcessDownloadRequest(ctx, kbClient, shared.DownloadRequestOptions{
414-
BackupName: veleroBackupName,
415-
DataType: "BackupResourceList",
416-
Namespace: userNamespace,
442+
BackupName: veleroBackupName,
443+
DataType: "BackupResourceList",
444+
Namespace: userNamespace,
445+
HTTPTimeout: timeout,
417446
}); err == nil {
418447
fmt.Fprintf(cmd.OutOrStdout(), "\nBackup Resource List:\n")
419448
fmt.Fprintf(cmd.OutOrStdout(), "%s", indent(resourceList, " "))
420449
}
421450

422451
// Get backup volume info using NonAdminDownloadRequest
423452
if volumeInfo, err := shared.ProcessDownloadRequest(ctx, kbClient, shared.DownloadRequestOptions{
424-
BackupName: veleroBackupName,
425-
DataType: "BackupVolumeInfos",
426-
Namespace: userNamespace,
453+
BackupName: veleroBackupName,
454+
DataType: "BackupVolumeInfos",
455+
Namespace: userNamespace,
456+
HTTPTimeout: timeout,
427457
}); err == nil {
428458
fmt.Fprintf(cmd.OutOrStdout(), "\nBackup Volume Info:\n")
429459
fmt.Fprintf(cmd.OutOrStdout(), "%s", indent(volumeInfo, " "))
430460
}
431461

432462
// Get backup item operations using NonAdminDownloadRequest
433463
if itemOps, err := shared.ProcessDownloadRequest(ctx, kbClient, shared.DownloadRequestOptions{
434-
BackupName: veleroBackupName,
435-
DataType: "BackupItemOperations",
436-
Namespace: userNamespace,
464+
BackupName: veleroBackupName,
465+
DataType: "BackupItemOperations",
466+
Namespace: userNamespace,
467+
HTTPTimeout: timeout,
437468
}); err == nil {
438469
fmt.Fprintf(cmd.OutOrStdout(), "\nBackup Item Operations:\n")
439470
fmt.Fprintf(cmd.OutOrStdout(), "%s", indent(itemOps, " "))

cmd/non-admin/backup/logs.go

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ limitations under the License.
1919
import (
2020
"context"
2121
"fmt"
22+
"net"
2223
"time"
2324

2425
"github.com/migtools/oadp-cli/cmd/shared"
@@ -31,12 +32,18 @@ import (
3132
)
3233

3334
func NewLogsCommand(f client.Factory, use string) *cobra.Command {
34-
return &cobra.Command{
35+
var requestTimeout time.Duration
36+
37+
c := &cobra.Command{
3538
Use: use + " NAME",
3639
Short: "Show logs for a non-admin backup",
3740
Args: cobra.ExactArgs(1),
3841
RunE: func(cmd *cobra.Command, args []string) error {
39-
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
42+
// Get effective timeout (flag takes precedence over env var)
43+
effectiveTimeout := shared.GetHTTPTimeoutWithOverride(requestTimeout)
44+
45+
// Create context with the effective timeout for the entire operation
46+
ctx, cancel := context.WithTimeout(context.Background(), effectiveTimeout)
4047
defer cancel()
4148

4249
// Get the current namespace from kubectl context
@@ -59,6 +66,18 @@ func NewLogsCommand(f client.Factory, use string) *cobra.Command {
5966
if err != nil {
6067
return fmt.Errorf("failed to get rest config: %w", err)
6168
}
69+
// Set timeout on REST config to prevent hanging when cluster is unreachable
70+
restConfig.Timeout = effectiveTimeout
71+
72+
// Set a custom dial function with timeout to ensure TCP connection attempts
73+
// also respect the timeout (the default TCP dial timeout is ~30s)
74+
dialer := &net.Dialer{
75+
Timeout: effectiveTimeout,
76+
}
77+
restConfig.Dial = func(ctx context.Context, network, address string) (net.Conn, error) {
78+
return dialer.DialContext(ctx, network, address)
79+
}
80+
6281
kbClient, err := kbclient.New(restConfig, kbclient.Options{Scheme: scheme})
6382
if err != nil {
6483
return fmt.Errorf("failed to create controller-runtime client: %w", err)
@@ -97,26 +116,34 @@ func NewLogsCommand(f client.Factory, use string) *cobra.Command {
97116
_ = kbClient.Delete(deleteCtx, req)
98117
}()
99118

100-
fmt.Fprintf(cmd.OutOrStdout(), "Waiting for backup logs to be processed...\n")
119+
fmt.Fprintf(cmd.OutOrStdout(), "Waiting for backup logs to be processed (timeout: %v)...\n", effectiveTimeout)
101120

102-
// Wait for the download request to be processed using shared utility
103-
// Note: We create a custom waiting implementation here to provide user feedback
104-
timeout := time.After(120 * time.Second)
105-
tick := time.Tick(2 * time.Second)
121+
// Wait for the download request to be processed
122+
ticker := time.NewTicker(2 * time.Second)
123+
defer ticker.Stop()
106124

107125
var signedURL string
108126
Loop:
109127
for {
110128
select {
111-
case <-timeout:
112-
return fmt.Errorf("timed out waiting for NonAdminDownloadRequest to be processed")
113-
case <-tick:
129+
case <-ctx.Done():
130+
// Check if context was cancelled due to timeout or other reason
131+
if ctx.Err() == context.DeadlineExceeded {
132+
return shared.FormatDownloadRequestTimeoutError(kbClient, req, effectiveTimeout)
133+
}
134+
// Context cancelled for other reason (e.g., user interruption)
135+
return fmt.Errorf("operation cancelled: %w", ctx.Err())
136+
case <-ticker.C:
114137
fmt.Fprintf(cmd.OutOrStdout(), ".")
115138
var updated nacv1alpha1.NonAdminDownloadRequest
116139
if err := kbClient.Get(ctx, kbclient.ObjectKey{
117140
Namespace: req.Namespace,
118141
Name: req.Name,
119142
}, &updated); err != nil {
143+
// If context expired during Get, handle it in next iteration
144+
if ctx.Err() != nil {
145+
continue
146+
}
120147
return fmt.Errorf("failed to get NonAdminDownloadRequest: %w", err)
121148
}
122149

@@ -141,12 +168,18 @@ func NewLogsCommand(f client.Factory, use string) *cobra.Command {
141168
}
142169

143170
// Use the shared StreamDownloadContent function to download and stream logs
144-
if err := shared.StreamDownloadContent(signedURL, cmd.OutOrStdout()); err != nil {
171+
// Note: We use the same effective timeout for the HTTP download
172+
if err := shared.StreamDownloadContentWithTimeout(signedURL, cmd.OutOrStdout(), effectiveTimeout); err != nil {
145173
return fmt.Errorf("failed to download and stream logs: %w", err)
146174
}
147175

148176
return nil
149177
},
150-
Example: ` kubectl oadp nonadmin backup logs my-backup`,
178+
Example: ` kubectl oadp nonadmin backup logs my-backup
179+
kubectl oadp nonadmin backup logs my-backup --request-timeout=30m`,
151180
}
181+
182+
c.Flags().DurationVar(&requestTimeout, "request-timeout", 0, fmt.Sprintf("The length of time to wait before giving up on a single server request (e.g., 30s, 5m, 1h). Overrides %s env var. Default: %v", shared.TimeoutEnvVar, shared.DefaultHTTPTimeout))
183+
184+
return c
152185
}

0 commit comments

Comments
 (0)