Skip to content

Commit 5b28549

Browse files
toolkit: enable downloads using Azure CLI credentials (microsoft#14494)
Signed-off-by: Manuel Huber <[email protected]> Co-authored-by: Pawel Winogrodzki <[email protected]>
1 parent 36d88eb commit 5b28549

File tree

6 files changed

+411
-5
lines changed

6 files changed

+411
-5
lines changed

toolkit/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ PACKAGE_BUILD_LIST ?=
2323
PACKAGE_REBUILD_LIST ?=
2424
##help:var:PACKAGE_IGNORE_LIST:<spec_list>=List of space-separated spec folders to ignore during the build. Must not overlap with "PACKAGE_REBUILD_LIST", may overlap with "PACKAGE_BUILD_LIST". Example: PACKAGE_IGNORE_LIST="zlib".
2525
PACKAGE_IGNORE_LIST ?=
26+
##help:var:SOURCE_AUTH_MODE:<mode>=Mode to use for downloading source files for SRPM packing. Valid options: anonymous, azurecli (as defined in the srpmpacker code base).
27+
SOURCE_AUTH_MODE ?=
2628
##help:var:SRPM_PACK_LIST:<spec_list>=List of space-separated spec folders inside "SPECS_DIR" to analyze for the build. If empty, all items inside the "SPECS_DIR" will be analyzed. Example: SRPM_PACK_LIST="kernel go which".
2729
SRPM_PACK_LIST ?=
2830
##help:var:TEST_RUN_LIST:<spec_list>=List of space-separated spec folders to consider for package tests. Specs from the listed folders MUST contain the "%check" section. If empty, all testable items from "SRPM_PACK_LIST" will be considered. Will not re-test previously built packages. Example: TEST_RUN_LIST="libguestfs zlib".

toolkit/docs/building/building.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,15 +370,22 @@ Daily build packages are available via `DAILY_BUILD_ID`. Use `DAILY_BUILD_ID=lkg
370370

371371
### Authentication
372372

373-
If supplying custom endpoints for source/SRPM/package servers, accessing these resources may require keys and certificates. The keys and certificates can be set using:
373+
If supplying custom endpoints for source/SRPM/package servers, accessing these resources may require authentication.
374+
Keys and certificates for TLS based authentication can be set using:
374375

375376
```bash
376377
sudo make image CONFIG_FILE="./imageconfigs/core-efi.json" CA_CERT=/path/to/rootca.crt TLS_CERT=/path/to/user.crt TLS_KEY=/path/to/user.key
377378
```
378379

380+
For SRPM packing (i.e., for retrieving package sources), Azure CLI login can be used to access authenticated Azure blob storages, which do not support anonymous access:
381+
```bash
382+
sudo make build-packages SOURCE_AUTH_MODE="azurecli"
383+
```
384+
Using this mode requires prior `az login` with your managed identity ID.
385+
379386
## Building Everything From Scratch
380387

381-
**NOTE: Source files must be made available for all packages. They can be placed manually in the corresponding SPEC/\* folders, `SOURCE_URL=<YOUR_SOURCE_SERVER>` may be provided, or DOWNLOAD_SRPMS=y may be used to use pre-packages sources. Core Azure Linux source packages are available at `SOURCE_URL=https://azurelinuxsrcstorage.blob.core.windows.net/sources/core`**
388+
**NOTE: Source files must be made available for all packages. They can be placed manually in the corresponding SPEC/\* folders, `SOURCE_URL=<YOUR_SOURCE_SERVER>` may be provided, or DOWNLOAD_SRPMS=y may be used to use pre-packages sources. Core Azure Linux source packages are available at `SOURCE_URL=https://azurelinuxsrcstorage.blob.core.windows.net/sources/core` and support anonymous access.**
382389

383390
The build system can operate without using pre-built components if desired. There are several variables which enable/disable build components and sources of data. They are listed here along with their default values:
384391

@@ -840,6 +847,8 @@ To reproduce an ISO build, run the same make invocation as before, but set:
840847
| CA_CERT | | CA cert to access the above resources, in addition to the system certificate store
841848
| TLS_CERT | | TLS cert to access the above resources
842849
| TLS_KEY | | TLS key to access the above resources
850+
| SOURCE_AUTH_MODE | |
851+
Authentication mode for downloading source files for SRPM packing. Valid options: anonymous, azurecli (as defined in the srpmpacker code base). The azurecli option enables Azure CLI based authentication for accessing Azure Blob Storages which do not allow for public access. The default method is anonymous access using HTTP GET.
843852

844853
---
845854

toolkit/scripts/srpm_pack.mk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ $(STATUS_FLAGS_DIR)/build_srpms.flag: $(chroot_worker) $(local_specs) $(local_sp
8282
--dir=$(SPECS_DIR) \
8383
--output-dir=$(BUILD_SRPMS_DIR) \
8484
--source-url=$(SOURCE_URL) \
85+
$(if $(SOURCE_AUTH_MODE),--source-auth-mode=$(SOURCE_AUTH_MODE)) \
8586
--dist-tag=$(DIST_TAG) \
8687
--ca-cert=$(CA_CERT) \
8788
--tls-cert=$(TLS_CERT) \
@@ -108,6 +109,7 @@ $(STATUS_FLAGS_DIR)/build_toolchain_srpms.flag: $(toolchain_files) $(go-srpmpack
108109
--dir=$(SPECS_DIR) \
109110
--output-dir=$(BUILD_SRPMS_DIR) \
110111
--source-url=$(SOURCE_URL) \
112+
$(if $(SOURCE_AUTH_MODE),--source-auth-mode=$(SOURCE_AUTH_MODE)) \
111113
--dist-tag=$(DIST_TAG) \
112114
--ca-cert=$(CA_CERT) \
113115
--tls-cert=$(TLS_CERT) \

toolkit/tools/internal/azureblobstorage/azureblobstorage.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ import (
77
"context"
88
"errors"
99
"fmt"
10+
"net/url"
1011
"os"
12+
"regexp"
13+
"strings"
1114
"time"
1215

1316
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
1417
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
1518
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
1619
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
20+
"github.com/microsoft/azurelinux/toolkit/tools/internal/retry"
1721
)
1822

1923
const (
@@ -22,6 +26,60 @@ const (
2226
AzureCLIAccess = 2
2327
)
2428

29+
// Azure SDK error for 404-like condition for storage blobs (a similar message is returned for storage containers):
30+
// RESPONSE 404: 404 The specified blob does not exist.
31+
// ERROR CODE: BlobNotFound
32+
const AzureSDK404ErrorPattern = "RESPONSE 404"
33+
34+
var (
35+
// Every valid blob URL will be of the form: <storage_account>.blob.core.windows.net/<container>/<blob_name>
36+
// With <blob_name> being optional.
37+
//
38+
// For:
39+
// https://mystorageaccount.blob.core.windows.net/mycontainer/my/blob/name
40+
//
41+
// We'd get:
42+
// - storage account: mystorageaccount
43+
// - container: mycontainer
44+
// - blob name: my/blob/name
45+
blobStorageURLRegex = regexp.MustCompile(`^([^.]+)\.blob\.core\.windows\.net/([^/]+)(?:/([^?#]+))?`)
46+
)
47+
48+
const (
49+
blobStorageURLMatchSubString = iota
50+
blobStorageURLStorageName
51+
blobStorageURLContainerName
52+
blobStorageURLBlobName
53+
blobStorageURLMaxMatchLen
54+
)
55+
56+
// ParseAzureBlobStorageURL parses an Azure Blob Storage URL and extracts storage account, container, and optionally blob information.
57+
func ParseAzureBlobStorageURL(urlStr string) (storageAccountName, containerName, blobName string, err error) {
58+
parsedURL, err := url.Parse(urlStr)
59+
if err != nil {
60+
return "", "", "", fmt.Errorf("failed to parse URL (%s):\n%w", urlStr, err)
61+
}
62+
63+
if parsedURL.Scheme == "" {
64+
return "", "", "", fmt.Errorf("URL (%s) is not a valid Azure Blob Storage URL - must start with a scheme", urlStr)
65+
}
66+
67+
matches := blobStorageURLRegex.FindStringSubmatch(parsedURL.Host + parsedURL.Path)
68+
if len(matches) < blobStorageURLBlobName {
69+
return "", "", "", fmt.Errorf("URL (%s) is not a valid Azure Blob Storage URL"+
70+
" (expected: <scheme>://<storage_account>.blob.core.windows.net/<container>/<optional_blob_name>)", urlStr)
71+
}
72+
73+
storageAccountName = matches[blobStorageURLStorageName]
74+
containerName = matches[blobStorageURLContainerName]
75+
76+
if len(matches) > blobStorageURLBlobName {
77+
blobName = matches[blobStorageURLBlobName]
78+
}
79+
80+
return storageAccountName, containerName, blobName, nil
81+
}
82+
2583
type AzureBlobStorage struct {
2684
theClient *azblob.Client
2785
}
@@ -148,3 +206,88 @@ func Create(tenantId string, userName string, password string, storageAccount st
148206

149207
return nil, errors.New("unknown authentication type")
150208
}
209+
210+
// CreateFromURL creates an AzureBlobStorage client from a storage account URL
211+
func CreateFromURL(storageAccountURL string) (abs *AzureBlobStorage, err error) {
212+
// Parse the URL to extract storage account information
213+
storageAccountName, _, _, parseErr := ParseAzureBlobStorageURL(storageAccountURL)
214+
if parseErr != nil {
215+
return nil, fmt.Errorf("failed to parse storage account URL:\n%w", parseErr)
216+
}
217+
218+
abs, err = Create("", "", "", storageAccountName, AzureCLIAccess)
219+
if err != nil {
220+
return nil, fmt.Errorf("failed to create Azure Blob Storage client:\n%w", err)
221+
}
222+
223+
return abs, nil
224+
}
225+
226+
// DownloadFileWithRetry downloads a file from an Azure Blob Storage using the Azure SDK for Go with retry logic
227+
// ctx: The context to use for the download. Use context.Background() if no other context is available.
228+
// azureBlobStorage: The Azure Blob Storage client.
229+
// srcUrl: The full Azure Blob Storage URL including container and blob path.
230+
// dstFile: The local file to save the download to.
231+
// timeout: The maximum duration for the download operation, use 0 for no timeout.
232+
// returns: wasCancelled: true if the download was cancelled via the context, false otherwise.
233+
// returns: err: An error if the download failed (including being cancelled), nil otherwise.
234+
func DownloadFileWithRetry(
235+
ctx context.Context,
236+
azureBlobStorage *AzureBlobStorage,
237+
srcUrl, dstFile string,
238+
timeout time.Duration,
239+
) (wasCancelled bool, err error) {
240+
var closeCtx context.CancelFunc
241+
242+
if ctx == nil {
243+
return false, fmt.Errorf("context is nil")
244+
}
245+
246+
if timeout < 0 {
247+
return false, fmt.Errorf("invalid timeout: %s", timeout)
248+
}
249+
250+
if timeout == 0 {
251+
ctx, closeCtx = context.WithCancel(ctx)
252+
} else {
253+
ctx, closeCtx = context.WithTimeout(ctx, timeout)
254+
}
255+
defer closeCtx()
256+
257+
// Parse the URL to get container and blob names
258+
_, containerName, blobName, parseErr := ParseAzureBlobStorageURL(srcUrl)
259+
if parseErr != nil {
260+
return false, fmt.Errorf("failed to parse source URL:\n%w", parseErr)
261+
}
262+
263+
logger.Log.Infof("Attempting Azure SDK download for blob (%s/%s)", containerName, blobName)
264+
265+
retryNum := 1
266+
errorWas404 := false
267+
wasCancelled, err = retry.RunWithDefaultDownloadBackoff(ctx, func() error {
268+
netErr := azureBlobStorage.Download(ctx, containerName, blobName, dstFile)
269+
if netErr != nil {
270+
// Check if the error is a 404-like condition (blob or container not found)
271+
if strings.Contains(netErr.Error(), AzureSDK404ErrorPattern) {
272+
logger.Log.Warnf("Attempt %d/%d: failed to download (%s/%s) with error: (%s)", retryNum, retry.DefaultDownloadRetryAttempts, containerName, blobName, netErr)
273+
logger.Log.Warnf("This error is likely unrecoverable, will not retry")
274+
errorWas404 = true
275+
closeCtx()
276+
} else {
277+
logger.Log.Infof("Attempt %d/%d: failed to download (%s/%s) with error: (%s)", retryNum, retry.DefaultDownloadRetryAttempts, containerName, blobName, netErr)
278+
}
279+
}
280+
retryNum++
281+
return netErr
282+
})
283+
284+
// If the error was a 404-like error, we should not consider the download as cancelled
285+
if errorWas404 {
286+
wasCancelled = false
287+
}
288+
289+
if err != nil {
290+
err = fmt.Errorf("failed to download (%s/%s) to (%s):\n%w", containerName, blobName, dstFile, err)
291+
}
292+
return
293+
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package azureblobstorage
5+
6+
import (
7+
"os"
8+
"testing"
9+
10+
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
11+
)
12+
13+
func TestMain(m *testing.M) {
14+
logger.InitStderrLog()
15+
os.Exit(m.Run())
16+
}
17+
18+
func TestParseAzureBlobStorageURL(t *testing.T) {
19+
tests := []struct {
20+
name string
21+
urlStr string
22+
wantStorageAccountName string
23+
wantContainerName string
24+
wantBlobName string
25+
wantErr bool
26+
}{
27+
{
28+
name: "Valid URL with container only",
29+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer",
30+
wantStorageAccountName: "mystorageaccount",
31+
wantContainerName: "mycontainer",
32+
wantBlobName: "",
33+
wantErr: false,
34+
},
35+
{
36+
name: "Valid URL with container and blob",
37+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob.txt",
38+
wantStorageAccountName: "mystorageaccount",
39+
wantContainerName: "mycontainer",
40+
wantBlobName: "myblob.txt",
41+
wantErr: false,
42+
},
43+
{
44+
name: "Valid URL with container and blob in folder",
45+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/folder/myblob.txt",
46+
wantStorageAccountName: "mystorageaccount",
47+
wantContainerName: "mycontainer",
48+
wantBlobName: "folder/myblob.txt",
49+
wantErr: false,
50+
},
51+
{
52+
name: "Valid URL with deep folder hierarchy",
53+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/folder1/folder2/myfile-1.2.3.tar.gz",
54+
wantStorageAccountName: "mystorageaccount",
55+
wantContainerName: "mycontainer",
56+
wantBlobName: "folder1/folder2/myfile-1.2.3.tar.gz",
57+
wantErr: false,
58+
},
59+
{
60+
name: "Valid URL with trailing slash",
61+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/",
62+
wantStorageAccountName: "mystorageaccount",
63+
wantContainerName: "mycontainer",
64+
wantBlobName: "",
65+
wantErr: false,
66+
},
67+
{
68+
name: "Invalid URL - not Azure Blob Storage format",
69+
urlStr: "https://example.com/container/blob",
70+
wantErr: true,
71+
},
72+
{
73+
name: "Invalid URL - missing storage account",
74+
urlStr: "https://blob.core.windows.net/container/blob",
75+
wantErr: true,
76+
},
77+
{
78+
name: "Invalid URL - malformed hostname",
79+
urlStr: "https://invalid.hostname/container/blob",
80+
wantErr: true,
81+
},
82+
{
83+
name: "Invalid URL - missing container",
84+
urlStr: "https://mystorageaccount.blob.core.windows.net/",
85+
wantErr: true,
86+
},
87+
{
88+
name: "Invalid URL - empty path",
89+
urlStr: "https://mystorageaccount.blob.core.windows.net",
90+
wantErr: true,
91+
},
92+
{
93+
name: "Invalid URL - unparseable",
94+
urlStr: "not-a-url",
95+
wantErr: true,
96+
},
97+
{
98+
name: "Invalid URL - scheme missing",
99+
urlStr: "mystorageaccount.blob.core.windows.net/container",
100+
wantErr: true,
101+
},
102+
{
103+
name: "Valid URL with query parameters",
104+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/myblob.txt?sv=2021-06-08",
105+
wantStorageAccountName: "mystorageaccount",
106+
wantContainerName: "mycontainer",
107+
wantBlobName: "myblob.txt",
108+
wantErr: false,
109+
},
110+
{
111+
name: "Valid URL with special characters in blob name",
112+
urlStr: "https://mystorageaccount.blob.core.windows.net/mycontainer/my-blob_file.2023.txt",
113+
wantStorageAccountName: "mystorageaccount",
114+
wantContainerName: "mycontainer",
115+
wantBlobName: "my-blob_file.2023.txt",
116+
wantErr: false,
117+
},
118+
}
119+
120+
for _, tt := range tests {
121+
t.Run(tt.name, func(t *testing.T) {
122+
gotStorageAccountName, gotContainerName, gotBlobName, err := ParseAzureBlobStorageURL(tt.urlStr)
123+
if (err != nil) != tt.wantErr {
124+
t.Errorf("ParseAzureBlobStorageURL() error = %v, wantErr %v", err, tt.wantErr)
125+
return
126+
}
127+
if !tt.wantErr {
128+
if gotStorageAccountName != tt.wantStorageAccountName {
129+
t.Errorf("ParseAzureBlobStorageURL() gotStorageAccountName = %v, want %v", gotStorageAccountName, tt.wantStorageAccountName)
130+
}
131+
if gotContainerName != tt.wantContainerName {
132+
t.Errorf("ParseAzureBlobStorageURL() gotContainerName = %v, want %v", gotContainerName, tt.wantContainerName)
133+
}
134+
if gotBlobName != tt.wantBlobName {
135+
t.Errorf("ParseAzureBlobStorageURL() gotBlobName = %v, want %v", gotBlobName, tt.wantBlobName)
136+
}
137+
}
138+
})
139+
}
140+
}

0 commit comments

Comments
 (0)