From a83d1d5b5fdd0aa0fac8e924e1a216b78afc5ab6 Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 10:36:06 +1000 Subject: [PATCH 01/10] chore: add release notes --- docs/4.6.1-release-notes.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/4.6.1-release-notes.md diff --git a/docs/4.6.1-release-notes.md b/docs/4.6.1-release-notes.md new file mode 100644 index 0000000..af34e97 --- /dev/null +++ b/docs/4.6.1-release-notes.md @@ -0,0 +1,2 @@ +### Features +- Additional information output to help diagnose common azure issues From f512eaec0a81eca07fcc27cbd5b3023f4da2051a Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 11:41:54 +1000 Subject: [PATCH 02/10] added logging --- src/Connector.DataLake.Common/Connector/DataLakeClient.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs index 6bca5f7..0f85491 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.IO; using System.Threading.Tasks; +using Serilog; namespace CluedIn.Connector.DataLake.Common.Connector { @@ -150,6 +151,8 @@ private async Task GetDirectoryClientAsync( string subDirectory, bool ensureExists) { + Log.Logger.Information("GetDirectoryClientAsync - '{RootDirectoryPath}', '{SubDirectory}', '{EnsureExists}'", configuration.RootDirectoryPath, subDirectory, ensureExists); + var directory = configuration.RootDirectoryPath; var directoryClient = fileSystemClient.GetDirectoryClient(directory); if (string.IsNullOrWhiteSpace(subDirectory)) @@ -161,6 +164,7 @@ private async Task GetDirectoryClientAsync( if (ensureExists && !await directoryClient.ExistsAsync()) { + Log.Logger.Information("GetDirectoryClientAsync - Calling CreateDirectoryAsync '{Path}'", directoryClient.Path); directoryClient = await fileSystemClient.CreateDirectoryAsync(directoryClient.Path); } From 3f8ad7b29bf646080aeadf78cf610b2ac42a2b4d Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 12:31:15 +1000 Subject: [PATCH 03/10] recursive folder creation --- .../Connector/DataLakeClient.cs | 74 ++++++++++++++++--- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs index 0f85491..59be649 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs @@ -1,10 +1,11 @@ -using Azure.Storage.Files.DataLake; -using Azure.Storage.Files.DataLake.Models; -using CluedIn.Core.Connectors; using System; using System.Collections.Generic; using System.IO; using System.Threading.Tasks; +using Azure; +using Azure.Storage.Files.DataLake; +using Azure.Storage.Files.DataLake.Models; +using CluedIn.Core.Connectors; using Serilog; namespace CluedIn.Connector.DataLake.Common.Connector @@ -151,22 +152,71 @@ private async Task GetDirectoryClientAsync( string subDirectory, bool ensureExists) { - Log.Logger.Information("GetDirectoryClientAsync - '{RootDirectoryPath}', '{SubDirectory}', '{EnsureExists}'", configuration.RootDirectoryPath, subDirectory, ensureExists); - var directory = configuration.RootDirectoryPath; var directoryClient = fileSystemClient.GetDirectoryClient(directory); - if (string.IsNullOrWhiteSpace(subDirectory)) + + // Log the initial directory calculation + Log.Logger.Information("Requested directory path relative to file system: {RootDirectory} with subdirectory {SubDirectory}.", + directory, subDirectory); + + if (!string.IsNullOrWhiteSpace(subDirectory)) { - return directoryClient; + directoryClient = directoryClient.GetSubDirectoryClient(subDirectory); } - directoryClient = directoryClient.GetSubDirectoryClient(subDirectory); - - if (ensureExists && !await directoryClient.ExistsAsync()) + // --- START OF REPLACEMENT LOGIC --- + if (ensureExists) { - Log.Logger.Information("GetDirectoryClientAsync - Calling CreateDirectoryAsync '{Path}'", directoryClient.Path); - directoryClient = await fileSystemClient.CreateDirectoryAsync(directoryClient.Path); + var fullRelativePath = directoryClient.Path; + + Log.Logger.Information("Starting recursive path creation for full path: {FullPath}", fullRelativePath); + + if (!string.IsNullOrWhiteSpace(fullRelativePath)) + { + var pathSegments = fullRelativePath.Split('/'); + var currentPath = ""; + + foreach (var segment in pathSegments) + { + if (string.IsNullOrWhiteSpace(segment)) + continue; + + currentPath = string.IsNullOrEmpty(currentPath) ? segment : $"{currentPath}/{segment}"; + + // Get a client for the CURRENT segment (A, then A/B, then A/B/C) + var segmentClient = fileSystemClient.GetDirectoryClient(currentPath); + + Log.Logger.Information("Checking/creating directory segment: {CurrentPath}", currentPath); + + var response = await segmentClient.CreateIfNotExistsAsync(); + var statusCode = response.GetRawResponse().Status; + + // Log the outcome based on the status code + if (statusCode == 201) + { + Log.Logger.Information("Successfully CREATED directory segment: {CurrentPath}", currentPath); + } + else if (statusCode == 409 || statusCode == 200) + { + Log.Logger.Information("Directory segment already exists: {CurrentPath}", currentPath); + } + else + { + // For any unexpected status code, log the failure and throw. + Log.Logger.Error("Failed to ensure directory segment exists. Path: {CurrentPath}, Status Code: {StatusCode}", + currentPath, statusCode); + + throw new RequestFailedException(response.GetRawResponse()); + } + } + } + + Log.Logger.Information("Recursive path creation complete. Full path is guaranteed to exist."); } + // --- END OF REPLACEMENT LOGIC --- + + // Final check for debugging purposes—this URL is where the OpenWriteAsync will target. + Log.Logger.Information("Returning directory client for URI: {DirectoryUri}", directoryClient.Uri); return directoryClient; } From bfed797e59d08ca451f75f495309573b355348b1 Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 12:51:27 +1000 Subject: [PATCH 04/10] . --- .../Connector/DataLakeClient.cs | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs index 59be649..8015dec 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs @@ -152,6 +152,7 @@ private async Task GetDirectoryClientAsync( string subDirectory, bool ensureExists) { + // ... (Initialization and logging remain the same) var directory = configuration.RootDirectoryPath; var directoryClient = fileSystemClient.GetDirectoryClient(directory); @@ -168,7 +169,6 @@ private async Task GetDirectoryClientAsync( if (ensureExists) { var fullRelativePath = directoryClient.Path; - Log.Logger.Information("Starting recursive path creation for full path: {FullPath}", fullRelativePath); if (!string.IsNullOrWhiteSpace(fullRelativePath)) @@ -182,42 +182,50 @@ private async Task GetDirectoryClientAsync( continue; currentPath = string.IsNullOrEmpty(currentPath) ? segment : $"{currentPath}/{segment}"; - - // Get a client for the CURRENT segment (A, then A/B, then A/B/C) var segmentClient = fileSystemClient.GetDirectoryClient(currentPath); Log.Logger.Information("Checking/creating directory segment: {CurrentPath}", currentPath); var response = await segmentClient.CreateIfNotExistsAsync(); - var statusCode = response.GetRawResponse().Status; + var rawResponse = response.GetRawResponse(); + var statusCode = rawResponse.Status; - // Log the outcome based on the status code if (statusCode == 201) { Log.Logger.Information("Successfully CREATED directory segment: {CurrentPath}", currentPath); } - else if (statusCode == 409 || statusCode == 200) + else if (statusCode == 409) // Most common "already exists" for this operation + { + Log.Logger.Information("Directory segment already exists (409 Conflict): {CurrentPath}", currentPath); + } + else if (statusCode == 400 && + rawResponse.Headers.TryGetValue("x-ms-error-code", out var errorCode) && + errorCode == "OperationNotAllowedOnThePath") { - Log.Logger.Information("Directory segment already exists: {CurrentPath}", currentPath); + // This specifically indicates the directory *is* there, but the operation + // (which includes setting default properties) failed because of an existing state. + Log.Logger.Information("OperationNotAllowedOnThePath: Path exists or is immutable: {CurrentPath}", currentPath); + } + else if (statusCode == 200) // Less common "already exists" (as discussed) + { + Log.Logger.Information("Directory segment already exists (200 OK): {CurrentPath}", currentPath); } else { - // For any unexpected status code, log the failure and throw. + // All other unexpected failures (Auth, Internal Server Error, etc.) Log.Logger.Error("Failed to ensure directory segment exists. Path: {CurrentPath}, Status Code: {StatusCode}", currentPath, statusCode); - throw new RequestFailedException(response.GetRawResponse()); + // Use RequestFailedException(rawResponse) to capture the full error details + throw new RequestFailedException(rawResponse); } } } - Log.Logger.Information("Recursive path creation complete. Full path is guaranteed to exist."); } // --- END OF REPLACEMENT LOGIC --- - // Final check for debugging purposes—this URL is where the OpenWriteAsync will target. Log.Logger.Information("Returning directory client for URI: {DirectoryUri}", directoryClient.Uri); - return directoryClient; } From 4a6d7cdb67de487a70bb6c017699cb9f29a1ec06 Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 14:38:01 +1000 Subject: [PATCH 05/10] . --- .../Connector/DataLakeClient.cs | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs index 8015dec..8699c68 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs @@ -186,7 +186,29 @@ private async Task GetDirectoryClientAsync( Log.Logger.Information("Checking/creating directory segment: {CurrentPath}", currentPath); - var response = await segmentClient.CreateIfNotExistsAsync(); + Response response; + try + { + response = await segmentClient.CreateIfNotExistsAsync(); + } + catch(RequestFailedException ex) + { + if (ex.ErrorCode == "OperationNotAllowedOnThePath") + { + continue; + } + + throw; + } + + if (response == null) + { + // weird behaviour where null is being returned for + // - XXXXXX.MountedRelationalDatabase/Files + // - XXXXXX.MountedRelationalDatabase/Files/LandingZone + continue; + } + var rawResponse = response.GetRawResponse(); var statusCode = rawResponse.Status; From 1f247b55580a20468c9e757fe3e7d3b226002726 Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 15:19:18 +1000 Subject: [PATCH 06/10] . --- .../Connector/DataLakeExportEntitiesJobBase.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs index 6b5f0e7..cbcb077 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs @@ -178,11 +178,12 @@ public override async Task DoRunAsync(ExecutionContext context, IDataLakeJobArgs } context.Log.LogInformation( - "Begin writing to file '{OutputFileName}' using data at {DataTime} and {TemporaryOutputFileName} ({TemporaryFileClientUri}).", + "Begin writing to file '{OutputFileName}' using data at {DataTime} and {TemporaryOutputFileName} ({TemporaryFileClientUri}) IsOverwriteEnabled={IsOverwriteEnabled}.", outputFileName, asOfTime, temporaryOutputFileName, - temporaryFileClient.Uri); + temporaryFileClient.Uri, + configuration.IsOverwriteEnabled); var totalRows = await writeFileContentsAsync(); if (configuration.IsDeltaMode && totalRows == 0 && !GetIsEmptyFileAllowed(exportJobData)) From a3ed3b1858b9d4a73022aa4aee470bd2f7117f4e Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 15:22:43 +1000 Subject: [PATCH 07/10] . --- src/Connector.DataLake.Common/Connector/DataLakeClient.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs index 8699c68..0442f37 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeClient.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeClient.cs @@ -203,6 +203,7 @@ private async Task GetDirectoryClientAsync( if (response == null) { + Log.Logger.Information("Null response received for: {CurrentPath}", currentPath); // weird behaviour where null is being returned for // - XXXXXX.MountedRelationalDatabase/Files // - XXXXXX.MountedRelationalDatabase/Files/LandingZone @@ -243,7 +244,7 @@ private async Task GetDirectoryClientAsync( } } } - Log.Logger.Information("Recursive path creation complete. Full path is guaranteed to exist."); + Log.Logger.Information("Recursive path creation complete. Full path is guaranteed to exist. {FullPath}", fullRelativePath); } // --- END OF REPLACEMENT LOGIC --- From 639998f316f21af46089b22686b15e4c899f123d Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 15:32:09 +1000 Subject: [PATCH 08/10] . --- .../Connector/DataLakeExportEntitiesJobBase.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs index cbcb077..0ebdc03 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs @@ -228,6 +228,7 @@ async Task writeFileContentsAsync() { var fieldNamesToUse = await GetFieldNamesAsync(context, exportJobData, configuration, fieldNames); var sqlDataWriter = GetSqlDataWriter(outputFormat); + await directoryClient.CreateIfNotExistsAsync(); await using var outputStream = await temporaryFileClient.OpenWriteAsync(configuration.IsOverwriteEnabled); using var bufferedStream = new DataLakeBufferedWriteStream(outputStream); return await sqlDataWriter?.WriteAsync(context, configuration, bufferedStream, fieldNamesToUse, IsInitialExport, reader); From 1c2ca88c346828ddcc24c3abe0a50c6bc8894d87 Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 15:49:39 +1000 Subject: [PATCH 09/10] . --- .../DataLakeExportEntitiesJobBase.cs | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs index 0ebdc03..9baa74f 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs @@ -6,7 +6,7 @@ using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Transactions; - +using Azure; using Azure.Storage.Files.DataLake; using CluedIn.Connector.DataLake.Common.Connector.SqlDataWriter; @@ -229,9 +229,28 @@ async Task writeFileContentsAsync() var fieldNamesToUse = await GetFieldNamesAsync(context, exportJobData, configuration, fieldNames); var sqlDataWriter = GetSqlDataWriter(outputFormat); await directoryClient.CreateIfNotExistsAsync(); - await using var outputStream = await temporaryFileClient.OpenWriteAsync(configuration.IsOverwriteEnabled); - using var bufferedStream = new DataLakeBufferedWriteStream(outputStream); - return await sqlDataWriter?.WriteAsync(context, configuration, bufferedStream, fieldNamesToUse, IsInitialExport, reader); + + try + { + await using var outputStream = await temporaryFileClient.OpenWriteAsync(configuration.IsOverwriteEnabled); + using var bufferedStream = new DataLakeBufferedWriteStream(outputStream); + return await sqlDataWriter?.WriteAsync(context, configuration, bufferedStream, fieldNamesToUse, IsInitialExport, reader); + } + catch (RequestFailedException e) + { + context.Log.LogInformation("Request failed"); + if (e.Status == 404) + { + context.Log.LogInformation("Request failed 404 - creating file"); + + await temporaryFileClient.CreateIfNotExistsAsync(); + + await using var outputStream = await temporaryFileClient.OpenWriteAsync(configuration.IsOverwriteEnabled); + using var bufferedStream = new DataLakeBufferedWriteStream(outputStream); + return await sqlDataWriter?.WriteAsync(context, configuration, bufferedStream, fieldNamesToUse, IsInitialExport, reader); + } + throw new ApplicationException($"Failed to write to {temporaryFileClient.Uri}", e); + } } async Task setFilePropertiesAsync() From 8f3450f933c688d77da7e6b50c2d4c8c8cc02b2a Mon Sep 17 00:00:00 2001 From: Andrew Hardy Date: Wed, 22 Oct 2025 15:52:40 +1000 Subject: [PATCH 10/10] . --- .../Connector/DataLakeExportEntitiesJobBase.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs index 9baa74f..6901a0f 100644 --- a/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs +++ b/src/Connector.DataLake.Common/Connector/DataLakeExportEntitiesJobBase.cs @@ -243,7 +243,7 @@ async Task writeFileContentsAsync() { context.Log.LogInformation("Request failed 404 - creating file"); - await temporaryFileClient.CreateIfNotExistsAsync(); + await temporaryFileClient.CreateAsync(); await using var outputStream = await temporaryFileClient.OpenWriteAsync(configuration.IsOverwriteEnabled); using var bufferedStream = new DataLakeBufferedWriteStream(outputStream);