Skip to content

Commit 3cfdd48

Browse files
authored
fix(pure): Make Pure FlashArray HTTP client timeout configurable (#5551)
**Make Pure FlashArray HTTP client timeout configurable** **Problem:** During migrations of VMs with many disks, simultaneous `CopyVolume` requests to Pure FlashArray were timing out, leaving PVCs stuck in `Pending`. In one observed case, 15 disks were migrated but only 7 reached `Bound` status — the remaining 8 populator pods failed with: ``` failed to copy VMDK using VVol storage API: copy operation failed: Pure FlashArray CopyVolume failed: failed to send copy volume request: Post "https://<array>/api/2.46/volumes?overwrite=true": context deadline exceeded (Client.Timeout exceeded while awaiting headers) ``` The root cause is that the HTTP client timeout was hardcoded to 30 seconds with no way to extend it, making it impossible to accommodate slower or heavily-loaded arrays. **Changes:** - `NewRestClient` now accepts an `httpTimeoutSeconds int` parameter instead of a hardcoded value. A value of `<= 0` falls back to the 30s default. - `NewFlashArrayClonner` threads the parameter through to `NewRestClient`. - A `--storage-api-timeout-seconds` CLI flag (default: `30`) is added to the `vsphere-xcopy-volume-populator` binary. **How to configure:** Pass `--storage-api-timeout-seconds=<value>` to the populator binary. Full operator-side wiring (CRD field → `VSphereXcopyPluginConfig` → `VSphereXcopyVolumePopulatorSpec` → populator-controller pod args) is a follow-up. **Default behaviour is unchanged** — the timeout remains 30 seconds unless explicitly overridden. --------- Signed-off-by: Michael Jons <Michael.Jons@tre.se>
1 parent 1794d0e commit 3cfdd48

File tree

5 files changed

+18
-6
lines changed

5 files changed

+18
-6
lines changed

cmd/vsphere-copy-offload-populator/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ to have a secret with the following fields:
159159
| STORAGE_PASSWORD | string | y* | |
160160
| STORAGE_TOKEN | string | n** | |
161161
| STORAGE_SKIP_SSL_VERIFICATION | true/false | n | false |
162+
| STORAGE_HTTP_TIMEOUT_SECONDS | integer | n | 30 |
162163

163164
\* For most storage vendors, `STORAGE_USERNAME` and `STORAGE_PASSWORD` are required. Pure FlashArray is an exception - see below.
164165

cmd/vsphere-copy-offload-populator/internal/pure/flashArray.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ printf "px_%s" $(oc get storagecluster -A -o=jsonpath='{.items[0].status.cluster
3737
// Authentication is mutually exclusive:
3838
// - If apiToken is provided (non-empty), it will be used for authentication (username/password ignored)
3939
// - If apiToken is empty, username and password will be used for authentication
40-
func NewFlashArrayClonner(hostname, username, password, apiToken string, skipSSLVerification bool, clusterPrefix string) (FlashArrayClonner, error) {
40+
func NewFlashArrayClonner(hostname, username, password, apiToken string, skipSSLVerification bool, clusterPrefix string, httpTimeoutSeconds int) (FlashArrayClonner, error) {
4141
if clusterPrefix == "" {
4242
return FlashArrayClonner{}, errors.New(helpMessage)
4343
}
4444

4545
// Create the REST client for all operations
46-
restClient, err := NewRestClient(hostname, username, password, apiToken, skipSSLVerification)
46+
restClient, err := NewRestClient(hostname, username, password, apiToken, skipSSLVerification, httpTimeoutSeconds)
4747
if err != nil {
4848
return FlashArrayClonner{}, fmt.Errorf("failed to create REST client: %w", err)
4949
}

cmd/vsphere-copy-offload-populator/internal/pure/flashArray_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ func TestAuthenticationMethods(t *testing.T) {
310310
hostname := strings.TrimPrefix(server.URL, "https://")
311311

312312
// Create REST client with test parameters
313-
client, err := NewRestClient(hostname, tc.username, tc.password, tc.token, true)
313+
client, err := NewRestClient(hostname, tc.username, tc.password, tc.token, true, 30)
314314

315315
if tc.expectError {
316316
if err == nil {

cmd/vsphere-copy-offload-populator/internal/pure/rest_client.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,11 @@ type HostConnectionRequest struct {
119119
// NewRestClient creates a new REST client for Pure FlashArray
120120
// If apiToken is provided (non-empty), it will be used directly, skipping username/password authentication
121121
// If apiToken is empty, username and password will be used to obtain an API token
122-
func NewRestClient(hostname, username, password, apiToken string, skipSSLVerify bool) (*RestClient, error) {
122+
// httpTimeoutSeconds controls the HTTP client timeout; pass 0 to use the default of 30 seconds
123+
func NewRestClient(hostname, username, password, apiToken string, skipSSLVerify bool, httpTimeoutSeconds int) (*RestClient, error) {
124+
if httpTimeoutSeconds <= 0 {
125+
httpTimeoutSeconds = 30
126+
}
123127
// Create base transport with TLS config
124128
baseTransport := &http.Transport{
125129
TLSClientConfig: &tls.Config{
@@ -135,7 +139,7 @@ func NewRestClient(hostname, username, password, apiToken string, skipSSLVerify
135139
client := &RestClient{
136140
hostname: hostname,
137141
httpClient: &http.Client{
138-
Timeout: 30 * time.Second,
142+
Timeout: time.Duration(httpTimeoutSeconds) * time.Second,
139143
Transport: transport,
140144
},
141145
}

cmd/vsphere-copy-offload-populator/vsphere-copy-offload-populator.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"os"
1111
"path"
12+
"strconv"
1213
"strings"
1314

1415
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -59,6 +60,7 @@ var (
5960
vspherePassword string
6061
esxiCloneMethod string
6162
sshTimeoutSeconds int
63+
storageAPITimeoutSeconds string
6264

6365
// kube args
6466
httpEndpoint string
@@ -104,8 +106,12 @@ func main() {
104106
}
105107
storageApi = &sm
106108
case forklift.StorageVendorProductPureFlashArray:
109+
apiTimeout, err := strconv.Atoi(storageAPITimeoutSeconds)
110+
if err != nil && storageAPITimeoutSeconds != "" {
111+
klog.Warningf("invalid value %q for storage-http-timeout-seconds, using default (30s): %v", storageAPITimeoutSeconds, err)
112+
}
107113
sm, err := pure.NewFlashArrayClonner(
108-
storageHostname, storageUsername, storagePassword, storageToken, storageSkipSSLVerification == "true", os.Getenv(pure.ClusterPrefixEnv))
114+
storageHostname, storageUsername, storagePassword, storageToken, storageSkipSSLVerification == "true", os.Getenv(pure.ClusterPrefixEnv), apiTimeout)
109115
if err != nil {
110116
klog.Fatalf("failed to initialize Pure FlashArray clonner with %s", err)
111117
}
@@ -306,6 +312,7 @@ func handleArgs() {
306312
flag.StringVar(&vspherePassword, "vsphere-password", os.Getenv("GOVMOMI_PASSWORD"), "vSphere's API password")
307313
flag.StringVar(&esxiCloneMethod, "esxi-clone-method", os.Getenv("ESXI_CLONE_METHOD"), "ESXi clone method: 'vib' (default) or 'ssh'")
308314
flag.IntVar(&sshTimeoutSeconds, "ssh-timeout-seconds", 30, "SSH timeout in seconds for ESXi operations (default: 30)")
315+
flag.StringVar(&storageAPITimeoutSeconds, "storage-http-timeout-seconds", os.Getenv("STORAGE_HTTP_TIMEOUT_SECONDS"), "HTTP client timeout in seconds for storage API requests (default: 30)")
309316
flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster.")
310317
flag.StringVar(&masterURL, "master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
311318
// Metrics args

0 commit comments

Comments
 (0)