Skip to content

Commit 19a3d2f

Browse files
swift: Retry connecting to OpenStack
The function IsSwiftEnabled probes a connection to Swift to determine whether CIRO should fall back to using a persistent volume claim as a storage backend. This function runs on the OpenStack platform to provide an alternative solution for when Swift is not available. Before this patch, any failure to communicate with Swift would make CIRO fall back to using a PVC. However, a failure to probing Swift could come from a temporary failure to reach the OpenStack API. With this patch, when an issue with the connection to the OpenStack identity service is not allowing CIRO to probe Swift, the probing reports an error instead of directly falling back to Swift. As a consequence, after this patch, temporary failures in early steps of the connection to Swift do not make CIRO fall back to using persistent volume claims when object storage might be available in OpenStack.
1 parent 115d07e commit 19a3d2f

File tree

3 files changed

+81
-30
lines changed

3 files changed

+81
-30
lines changed

pkg/storage/storage.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,11 @@ func GetPlatformStorage(listers *regopclient.StorageListers) (imageregistryv1.Im
202202
cfg.IBMCOS = &imageregistryv1.ImageRegistryConfigStorageIBMCOS{}
203203
replicas = 2
204204
case configapiv1.OpenStackPlatformType:
205-
if swift.IsSwiftEnabled(listers) {
205+
swiftEnabled, err := swift.IsSwiftEnabled(listers)
206+
if err != nil {
207+
return cfg, 0, err
208+
}
209+
if swiftEnabled {
206210
cfg.Swift = &imageregistryv1.ImageRegistryConfigStorageSwift{}
207211
replicas = 2
208212
break

pkg/storage/swift/swift.go

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package swift
33
import (
44
"crypto/tls"
55
"crypto/x509"
6+
"errors"
67
"fmt"
78
"net/http"
89
"reflect"
@@ -19,7 +20,7 @@ import (
1920
yamlv2 "gopkg.in/yaml.v2"
2021

2122
corev1 "k8s.io/api/core/v1"
22-
"k8s.io/apimachinery/pkg/api/errors"
23+
apimachineryerrors "k8s.io/apimachinery/pkg/api/errors"
2324
k8sutilerrors "k8s.io/apimachinery/pkg/util/errors"
2425
"k8s.io/klog/v2"
2526

@@ -63,19 +64,26 @@ func replaceEmpty(a string, b string) string {
6364
}
6465

6566
// IsSwiftEnabled checks if Swift service is available for OpenStack platform
66-
func IsSwiftEnabled(listers *regopclient.StorageListers) bool {
67+
func IsSwiftEnabled(listers *regopclient.StorageListers) (bool, error) {
6768
driver := NewDriver(&imageregistryv1.ImageRegistryConfigStorageSwift{}, listers)
6869
conn, err := driver.getSwiftClient()
6970
if err != nil {
70-
klog.Errorf("swift storage inaccessible: %v", err)
71-
return false
71+
if errors.As(err, &ErrContainerEndpointNotFound{}) {
72+
klog.Errorf("error connecting to Swift: %v", err)
73+
return false, nil
74+
}
75+
klog.Errorf("error connecting to OpenStack: %v", err)
76+
return false, err
7277
}
78+
7379
// Try to list containers to make sure the user has required permissions to do that
74-
if _, err = containers.List(conn, containers.ListOpts{}).AllPages(); err != nil {
80+
if err := containers.List(conn, containers.ListOpts{}).EachPage(func(_ pagination.Page) (bool, error) {
81+
return false, nil
82+
}); err != nil {
7583
klog.Errorf("error listing swift containers: %v", err)
76-
return false
84+
return false, nil
7785
}
78-
return true
86+
return true, nil
7987
}
8088

8189
// GetConfig reads credentials
@@ -84,7 +92,7 @@ func GetConfig(listers *regopclient.StorageListers) (*Swift, error) {
8492

8593
// Look for a user defined secret to get the Swift credentials
8694
sec, err := listers.Secrets.Get(defaults.ImageRegistryPrivateConfigurationUser)
87-
if err != nil && errors.IsNotFound(err) {
95+
if err != nil && apimachineryerrors.IsNotFound(err) {
8896
// If no user defined credentials were provided, then try to find them in the secret,
8997
// created by cloud-credential-operator.
9098
sec, err = listers.Secrets.Get(defaults.CloudCredentialsName)
@@ -103,7 +111,7 @@ func GetConfig(listers *regopclient.StorageListers) (*Swift, error) {
103111
var cloudName string
104112
cloudInfra, err := util.GetInfrastructure(listers)
105113
if err != nil {
106-
if !errors.IsNotFound(err) {
114+
if !apimachineryerrors.IsNotFound(err) {
107115
return nil, fmt.Errorf("failed to get cluster infrastructure info: %v", err)
108116
}
109117
}
@@ -173,7 +181,7 @@ func GetConfig(listers *regopclient.StorageListers) (*Swift, error) {
173181
// system trust bundle should be used instead.
174182
func (d *driver) CABundle() (string, bool, error) {
175183
cm, err := d.Listers.OpenShiftConfig.Get("cloud-provider-config")
176-
if errors.IsNotFound(err) {
184+
if apimachineryerrors.IsNotFound(err) {
177185
return "", true, nil
178186
}
179187
if err != nil {
@@ -186,6 +194,15 @@ func (d *driver) CABundle() (string, bool, error) {
186194
return caBundle, false, nil
187195
}
188196

197+
type ErrContainerEndpointNotFound struct {
198+
wrapped error
199+
}
200+
201+
func (err ErrContainerEndpointNotFound) Unwrap() error { return err.wrapped }
202+
func (err ErrContainerEndpointNotFound) Error() string {
203+
return fmt.Sprintf("container endpoint not found in the OpenStack catalog: %v", err.wrapped)
204+
}
205+
189206
// getSwiftClient returns a client that allows to interact with the OpenStack Swift service
190207
func (d *driver) getSwiftClient() (*gophercloud.ServiceClient, error) {
191208
cfg, err := GetConfig(d.Listers)
@@ -215,18 +232,18 @@ func (d *driver) getSwiftClient() (*gophercloud.ServiceClient, error) {
215232

216233
provider, err := openstack.NewClient(opts.IdentityEndpoint)
217234
if err != nil {
218-
return nil, fmt.Errorf("Create new provider client failed: %v", err)
235+
return nil, fmt.Errorf("failed to create a new OpenStack provider client: %w", err)
219236
}
220237

221238
cert, _, err := d.CABundle()
222239
if err != nil {
223-
return nil, fmt.Errorf("Failed to get cloud provider CA certificate: %v", err)
240+
return nil, fmt.Errorf("failed to get cloud provider CA certificate: %w", err)
224241
}
225242

226243
if cert != "" {
227244
certPool, err := x509.SystemCertPool()
228245
if err != nil {
229-
return nil, fmt.Errorf("Create system cert pool failed: %v", err)
246+
return nil, fmt.Errorf("failed to read the system cert pool: %w", err)
230247
}
231248
certPool.AppendCertsFromPEM([]byte(cert))
232249
client := http.Client{
@@ -241,24 +258,42 @@ func (d *driver) getSwiftClient() (*gophercloud.ServiceClient, error) {
241258

242259
err = openstack.Authenticate(provider, *opts)
243260
if err != nil {
244-
return nil, fmt.Errorf("Failed to authenticate provider client: %v", err)
261+
return nil, fmt.Errorf("failed to authenticate against OpenStack: %w", err)
245262
}
246263

247264
endpointOpts := gophercloud.EndpointOpts{
248265
Region: regionName,
249266
Name: "swift",
250267
}
251268

252-
var client *gophercloud.ServiceClient
253-
client, err = openstack.NewContainerV1(provider, endpointOpts)
254-
if _, ok := err.(*gophercloud.ErrEndpointNotFound); ok {
255-
endpointOpts.Type = "object-store"
256-
client, err = openstack.NewContainerV1(provider, endpointOpts)
257-
if err != nil {
258-
return nil, err
269+
client, err := openstack.NewContainerV1(provider, endpointOpts)
270+
if err != nil {
271+
if _, ok := err.(*gophercloud.ErrEndpointNotFound); ok {
272+
// In gophercloud the default endpoint type for
273+
// containers is "container". However, some OpenStack
274+
// clouds are deployed with a single endpoint type for
275+
// all Swift entities - "object-store".
276+
//
277+
// If a "container" endpoint is not found, then try
278+
// "object-store".
279+
endpointOpts.Type = "object-store"
280+
281+
var errOnAlternativeEndpoint error
282+
client, errOnAlternativeEndpoint = openstack.NewContainerV1(provider, endpointOpts)
283+
if errOnAlternativeEndpoint != nil {
284+
if _, ok := errOnAlternativeEndpoint.(*gophercloud.ErrEndpointNotFound); ok {
285+
// If none of the endpoints is found, then
286+
// return the error we got on the default one
287+
// so that we limit confusion on the error
288+
// trace.
289+
return nil, ErrContainerEndpointNotFound{err}
290+
} else {
291+
return nil, fmt.Errorf("failed to get the object storage alternative endpoint: %w", errOnAlternativeEndpoint)
292+
}
293+
}
294+
return client, nil
259295
}
260-
} else if err != nil {
261-
return nil, err
296+
return nil, fmt.Errorf("failed to get the object storage endpoint: %w", err)
262297
}
263298

264299
return client, nil

pkg/storage/swift/swift_test.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package swift
22

33
import (
44
"encoding/json"
5+
"errors"
56
"fmt"
67
"net/http"
78
"reflect"
@@ -910,7 +911,9 @@ func TestSwiftIsAvailable(t *testing.T) {
910911
Infrastructures: fakeInfrastructureLister(cloudName),
911912
OpenShiftConfig: MockConfigMapNamespaceLister{},
912913
}
913-
th.AssertEquals(t, true, IsSwiftEnabled(listers))
914+
isSwiftEnabled, err := IsSwiftEnabled(listers)
915+
th.AssertNoErr(t, err)
916+
th.AssertEquals(t, true, isSwiftEnabled)
914917
}
915918

916919
func TestSwiftIsNotAvailable(t *testing.T) {
@@ -953,16 +956,21 @@ func TestSwiftIsNotAvailable(t *testing.T) {
953956

954957
_, err := d.getSwiftClient()
955958
// if Swift endpoint is not registered, getSwiftClient should return *ErrEndpointNotFound
956-
_, ok := err.(*gophercloud.ErrEndpointNotFound)
957-
th.AssertEquals(t, true, ok)
959+
gophercloudNotFound := new(gophercloud.ErrEndpointNotFound)
960+
th.AssertEquals(t, true, errors.As(err, &gophercloudNotFound))
961+
962+
// ...which should be reported as the sentinel error type ErrContainerEndpointNotFound
963+
th.AssertEquals(t, true, errors.As(err, &ErrContainerEndpointNotFound{}))
958964

959965
// IsSwiftEnabled should return false in this case
960966
listers := &regopclient.StorageListers{
961967
Secrets: MockIPISecretNamespaceLister{},
962968
Infrastructures: fakeInfrastructureLister(cloudName),
963969
OpenShiftConfig: MockConfigMapNamespaceLister{},
964970
}
965-
th.AssertEquals(t, false, IsSwiftEnabled(listers))
971+
isSwiftEnabled, err := IsSwiftEnabled(listers)
972+
th.AssertNoErr(t, err)
973+
th.AssertEquals(t, false, isSwiftEnabled)
966974
}
967975

968976
func TestNoPermissionsKeystone(t *testing.T) {
@@ -1012,7 +1020,9 @@ func TestNoPermissionsKeystone(t *testing.T) {
10121020
Infrastructures: fakeInfrastructureLister(cloudName),
10131021
OpenShiftConfig: MockConfigMapNamespaceLister{},
10141022
}
1015-
th.AssertEquals(t, false, IsSwiftEnabled(listers))
1023+
isSwiftEnabled, err := IsSwiftEnabled(listers)
1024+
th.AssertNoErr(t, err)
1025+
th.AssertEquals(t, false, isSwiftEnabled)
10161026
}
10171027

10181028
func TestNoPermissionsSwauth(t *testing.T) {
@@ -1062,7 +1072,9 @@ func TestNoPermissionsSwauth(t *testing.T) {
10621072
Infrastructures: fakeInfrastructureLister(cloudName),
10631073
OpenShiftConfig: MockConfigMapNamespaceLister{},
10641074
}
1065-
th.AssertEquals(t, false, IsSwiftEnabled(listers))
1075+
isSwiftEnabled, err := IsSwiftEnabled(listers)
1076+
th.AssertNoErr(t, err)
1077+
th.AssertEquals(t, false, isSwiftEnabled)
10661078
}
10671079

10681080
func TestConfigStatusUpdate(t *testing.T) {

0 commit comments

Comments
 (0)