Skip to content

Commit f03fb8d

Browse files
Merge branch 'release/4.4.4'
2 parents 7fb6f93 + e81495f commit f03fb8d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2200
-324
lines changed

CluedIn.Connector.AzureDataLake.sln

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.DataLake.Common",
5353
EndProject
5454
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.OneLake", "src\Connector.OneLake\Connector.OneLake.csproj", "{1AA8B845-9762-47DD-B0E4-3B2C20C7486A}"
5555
EndProject
56-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Connector.AzureDatabricks", "src\Connector.AzureDatabricks\Connector.AzureDatabricks.csproj", "{B0A8FAB9-8809-492F-A2B1-C141CE53A724}"
56+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.AzureDatabricks", "src\Connector.AzureDatabricks\Connector.AzureDatabricks.csproj", "{B0A8FAB9-8809-492F-A2B1-C141CE53A724}"
5757
EndProject
58-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Connector.SynapseDataEngineering", "src\Connector.SynapseDataEngineering\Connector.SynapseDataEngineering.csproj", "{61607CFA-7A18-4DD6-9512-83B620B288EB}"
58+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.SynapseDataEngineering", "src\Connector.SynapseDataEngineering\Connector.SynapseDataEngineering.csproj", "{61607CFA-7A18-4DD6-9512-83B620B288EB}"
5959
EndProject
60-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Connector.AzureAIStudio", "src\Connector.AzureAIStudio\Connector.AzureAIStudio.csproj", "{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE}"
60+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.AzureAIStudio", "src\Connector.AzureAIStudio\Connector.AzureAIStudio.csproj", "{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE}"
61+
EndProject
62+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connector.FabricOpenMirroring", "src\Connector.FabricOpenMirroring\Connector.FabricOpenMirroring.csproj", "{B6B64EFA-397F-467F-984E-B7FE73ED92A4}"
6163
EndProject
6264
Global
6365
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -97,6 +99,10 @@ Global
9799
{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE}.Debug|Any CPU.Build.0 = Debug|Any CPU
98100
{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE}.Release|Any CPU.ActiveCfg = Release|Any CPU
99101
{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE}.Release|Any CPU.Build.0 = Release|Any CPU
102+
{B6B64EFA-397F-467F-984E-B7FE73ED92A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
103+
{B6B64EFA-397F-467F-984E-B7FE73ED92A4}.Debug|Any CPU.Build.0 = Debug|Any CPU
104+
{B6B64EFA-397F-467F-984E-B7FE73ED92A4}.Release|Any CPU.ActiveCfg = Release|Any CPU
105+
{B6B64EFA-397F-467F-984E-B7FE73ED92A4}.Release|Any CPU.Build.0 = Release|Any CPU
100106
EndGlobalSection
101107
GlobalSection(SolutionProperties) = preSolution
102108
HideSolutionNode = FALSE
@@ -116,6 +122,7 @@ Global
116122
{B0A8FAB9-8809-492F-A2B1-C141CE53A724} = {5256D9B9-8A1D-480D-A8F0-1A69AFA59B31}
117123
{61607CFA-7A18-4DD6-9512-83B620B288EB} = {5256D9B9-8A1D-480D-A8F0-1A69AFA59B31}
118124
{5F8EDA0E-5F95-4A7A-A7F3-217210674FDE} = {5256D9B9-8A1D-480D-A8F0-1A69AFA59B31}
125+
{B6B64EFA-397F-467F-984E-B7FE73ED92A4} = {5256D9B9-8A1D-480D-A8F0-1A69AFA59B31}
119126
EndGlobalSection
120127
GlobalSection(ExtensibilityGlobals) = postSolution
121128
SolutionGuid = {E6A866CB-824C-4271-8EA6-053B7FC4B134}

Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22
<PropertyGroup Label="Compilation Metadata">
33
<TargetFramework>net6.0</TargetFramework>
4-
<LangVersion>11.0</LangVersion>
4+
<LangVersion>12.0</LangVersion>
55
</PropertyGroup>
66

77
<PropertyGroup Label="Assembly Metadata">

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,25 @@ We're different because we use enhanced data management techniques like [Graph](
88
To learn more about CluedIn, [contact the team](https://www.cluedin.com/discovery-call) today.
99

1010
[https://www.cluedin.com](https://www.cluedin.com)
11+
12+
13+
## Development
14+
15+
### Parquet File Output
16+
17+
Microsoft support for Parquet format varies across products:
18+
1. GUID. One Lake and Open Mirroring doesn't support GUID type. It has to be serialized as a string
19+
1. Array of Strings.
20+
1. One Lake will not have an error but preview and SQL Analytics endpoint doesn't work
21+
1. Open Mirroring will have an error. Thus it cannot be added to the export
22+
1. For One Lake and Azure Data Lake, extra columns are added to facilitate usecases where the values are needed. It is in the form of X_String
23+
1. Characters in columns. One Lake and Open Mirroring only support alphanumeric and space. No other characters are supported, including dot for vocabulary keys.
24+
Thus they have to be escaped.
25+
26+
The table below shows what the connectors will output.
27+
28+
| Item | Array of String | JSON String | GUID as String | Escape Vocabulary Keys |
29+
|:---------------:|:---------------:|:-----------:|:--------------:|:----------------------:|
30+
| Azure Data Lake | YES | YES | OPTIONAL | OPTIONAL |
31+
| One Lake | YES | YES | YES | YES |
32+
| Open Mirroring | NO | YES | YES | YES |

azure-pipelines.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ resources:
2929
ref: ${{ parameters.pipelineTemplateRef }}
3030

3131
pool:
32-
vmImage: 'ubuntu-latest'
32+
vmImage: 'ubuntu-22.04'
3333

3434
variables:
3535
- name: testFolderPath
@@ -60,4 +60,5 @@ steps:
6060
deleteIntegrationEnvironmentScriptFilePath: './build/integration-test.ps1'
6161
deleteIntegrationEnvironmentScriptArguments: '-Action TearDown'
6262
publishCodeCoverage: true
63-
useGitVersionDotNetTool: true
63+
useGitVersionDotNetTool: true
64+
publishToDevFeed: true

build/integration-test.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ function Run-Setup() {
5353
Write-Host "##[command]docker run -d -e `"ACCEPT_EULA=Y`" --name $containerName -p `":1433`" -e `"MSSQL_SA_PASSWORD=$($databasePassword)`" $sqlServerImage"
5454
docker run -d -e "ACCEPT_EULA=Y" --name $containerName -p ":1433" -e "MSSQL_SA_PASSWORD=$($databasePassword)" $sqlServerImage
5555
$port = ((docker inspect $containerName | convertfrom-json).NetworkSettings.Ports."1433/tcp" | Where-Object { $_.HostIp -eq '0.0.0.0'}).HostPort
56-
$connectionString = "Data Source=$($databaseHost),$($port);Initial Catalog=$($databaseName);User Id=$($databaseUser);Password=$($databasePassword);connection timeout=0;Max Pool Size=200;Pooling=True"
56+
$connectionString = "Data Source=$($databaseHost),$($port);Initial Catalog=$($databaseName);User Id=$($databaseUser);Password=$($databasePassword);connection timeout=0;Max Pool Size=200;Pooling=True;Encrypt=false"
5757
$connectionStringEncoded = [Convert]::ToBase64String([char[]]$connectionString)
5858
Set-Variable "ADL2_STREAMCACHE" $connectionStringEncoded
5959
Write-Host "##[command]docker logs -f $($containerName)"

docs/4.4.4-release-notes.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Features
2+
- Adds ability to load to Lakehouse table after exporting it in OneLake Connector
3+
- Adds Fabric Open Mirroring Connector

src/Connector.AzureDataLake/AzureDataLakeConstants.cs

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,35 +37,63 @@ private static AuthMethods GetAzureDataLakeAuthMethods(ApplicationContext applic
3737
{
3838
new ()
3939
{
40-
name = AccountName,
41-
displayName = AccountName,
42-
type = "input",
43-
isRequired = true
40+
Name = AccountName,
41+
DisplayName = AccountName,
42+
Type = "input",
43+
IsRequired = true,
44+
//ValidationRules = new List<Dictionary<string, string>>()
45+
//{
46+
// new() {
47+
// { "regex", "\\s" },
48+
// { "message", "Spaces are not allowed" }
49+
// }
50+
//},
4451
},
4552
new ()
4653
{
47-
name = AccountKey,
48-
displayName = AccountKey,
49-
type = "password",
50-
isRequired = true
54+
Name = AccountKey,
55+
DisplayName = AccountKey,
56+
Type = "password",
57+
IsRequired = true,
58+
//ValidationRules = new List<Dictionary<string, string>>()
59+
//{
60+
// new() {
61+
// { "regex", "\\s" },
62+
// { "message", "Spaces are not allowed" }
63+
// }
64+
//},
5165
},
5266
new ()
5367
{
54-
name = FileSystemName,
55-
displayName = FileSystemName,
56-
type = "input",
57-
isRequired = true
68+
Name = FileSystemName,
69+
DisplayName = FileSystemName,
70+
Type = "input",
71+
IsRequired = true,
72+
//ValidationRules = new List<Dictionary<string, string>>()
73+
//{
74+
// new() {
75+
// { "regex", "\\s" },
76+
// { "message", "Spaces are not allowed" }
77+
// }
78+
//},
5879
},
5980
new ()
6081
{
61-
name = DirectoryName,
62-
displayName = DirectoryName,
63-
type = "input",
64-
isRequired = true
82+
Name = DirectoryName,
83+
DisplayName = DirectoryName,
84+
Type = "input",
85+
IsRequired = true,
86+
//ValidationRules = new List<Dictionary<string, string>>()
87+
//{
88+
// new() {
89+
// { "regex", "\\s" },
90+
// { "message", "Spaces are not allowed" }
91+
// }
92+
//},
6593
},
6694
};
6795

68-
controls.AddRange(GetAuthMethods(applicationContext));
96+
controls.AddRange(GetAuthMethods(applicationContext, isArrayColumnOptionEnabled: true));
6997
controls.Add(
7098
new()
7199
{
@@ -81,9 +109,8 @@ Replaces characters in the column names that are not in this list ('a-z', 'A-Z',
81109
{
82110
new ControlDisplayDependency
83111
{
84-
Name = OutputFormat,
85-
Operator = ControlDependencyOperator.Equals,
86-
Value = OutputFormats.Parquet.ToLowerInvariant(),
112+
Name = IsStreamCacheEnabled,
113+
Operator = ControlDependencyOperator.Exists,
87114
UnfulfilledAction = ControlDependencyUnfulfilledAction.Hidden,
88115
},
89116
},
@@ -115,7 +142,7 @@ Write Guid values as string instead of byte array.
115142

116143
return new AuthMethods
117144
{
118-
token = controls
145+
Token = controls
119146
};
120147
}
121148
}

src/Connector.DataLake.Common/Connector/DataLakeClient.cs

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,27 @@ namespace CluedIn.Connector.DataLake.Common.Connector
1010
{
1111
public abstract class DataLakeClient : IDataLakeClient
1212
{
13-
public async Task<DataLakeDirectoryClient> EnsureDataLakeDirectoryExist(IDataLakeJobData configuration)
13+
public Task<DataLakeDirectoryClient> EnsureDataLakeDirectoryExist(IDataLakeJobData configuration)
1414
{
15-
var fileSystemClient = await GetFileSystemClientAsync(configuration);
16-
var directory = GetDirectory(configuration);
17-
var directoryClient = fileSystemClient.GetDirectoryClient(directory);
18-
if (!await directoryClient.ExistsAsync())
19-
{
20-
directoryClient = await fileSystemClient.CreateDirectoryAsync(directory);
21-
}
15+
return EnsureDataLakeDirectoryExist(configuration, string.Empty);
16+
}
17+
18+
public async Task<DataLakeDirectoryClient> EnsureDataLakeDirectoryExist(IDataLakeJobData configuration, string subDirectory)
19+
{
20+
var fileSystemClient = await GetFileSystemClientAsync(configuration, ensureExists: true);
21+
var directoryClient = await GetDirectoryClientAsync(configuration, fileSystemClient, subDirectory, ensureExists: true);
2222

2323
return directoryClient;
2424
}
2525

26+
public async Task DeleteDirectory(IDataLakeJobData configuration, string subDirectory)
27+
{
28+
var fileSystemClient = await GetFileSystemClientAsync(configuration, ensureExists: true);
29+
var directoryClient = await GetDirectoryClientAsync(configuration, fileSystemClient, subDirectory, ensureExists: true);
30+
31+
await directoryClient.DeleteIfExistsAsync();
32+
}
33+
2634
public async Task SaveData(IDataLakeJobData configuration, string content, string fileName, string contentType)
2735
{
2836
using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(content));
@@ -69,19 +77,20 @@ protected static TJobData CastJobData<TJobData>(IDataLakeJobData jobData) where
6977
return castedJobData;
7078
}
7179

72-
public async Task<bool> FileInPathExists(IDataLakeJobData configuration, string fileName)
80+
public Task<bool> FileInPathExists(IDataLakeJobData configuration, string fileName)
7381
{
74-
var serviceClient = GetDataLakeServiceClient(configuration);
75-
var fileSystemName = GetFileSystemName(configuration);
76-
var fileSystemClient = serviceClient.GetFileSystemClient(fileSystemName);
82+
return FileInPathExists(configuration, fileName, string.Empty);
83+
}
7784

85+
public async Task<bool> FileInPathExists(IDataLakeJobData configuration, string fileName, string subDirectory)
86+
{
87+
var fileSystemClient = await GetFileSystemClientAsync(configuration, ensureExists: false);
7888
if (!await fileSystemClient.ExistsAsync())
7989
{
8090
return false;
8191
}
8292

83-
var directory = GetDirectory(configuration);
84-
var directoryClient = fileSystemClient.GetDirectoryClient(directory);
93+
var directoryClient = await GetDirectoryClientAsync(configuration, fileSystemClient, subDirectory, ensureExists: false);
8594
if (!await directoryClient.ExistsAsync())
8695
{
8796
return false;
@@ -91,19 +100,38 @@ public async Task<bool> FileInPathExists(IDataLakeJobData configuration, string
91100
return await dataLakeFileClient.ExistsAsync();
92101
}
93102

94-
public async Task<PathProperties> GetFilePathProperties(IDataLakeJobData configuration, string fileName)
103+
public Task<bool> DirectoryExists(IDataLakeJobData configuration)
95104
{
96-
var serviceClient = GetDataLakeServiceClient(configuration);
97-
var fileSystemName = GetFileSystemName(configuration);
98-
var fileSystemClient = serviceClient.GetFileSystemClient(fileSystemName);
105+
return DirectoryExists(configuration, string.Empty);
106+
}
107+
108+
public async Task<bool> DirectoryExists(IDataLakeJobData configuration, string subDirectory)
109+
{
110+
var fileSystemClient = await GetFileSystemClientAsync(configuration, ensureExists: false);
111+
if (!await fileSystemClient.ExistsAsync())
112+
{
113+
return false;
114+
}
115+
116+
var directoryClient = await GetDirectoryClientAsync(configuration, fileSystemClient, subDirectory, ensureExists: false);
117+
return await directoryClient.ExistsAsync();
118+
}
119+
120+
public Task<PathProperties> GetFilePathProperties(IDataLakeJobData configuration, string fileName)
121+
{
122+
return GetFilePathProperties(configuration, fileName, string.Empty);
123+
}
124+
125+
public async Task<PathProperties> GetFilePathProperties(IDataLakeJobData configuration, string fileName, string subDirectory)
126+
{
127+
var fileSystemClient = await GetFileSystemClientAsync(configuration, ensureExists: false);
99128

100129
if (!await fileSystemClient.ExistsAsync())
101130
{
102131
return null;
103132
}
104133

105-
var directory = GetDirectory(configuration);
106-
var directoryClient = fileSystemClient.GetDirectoryClient(directory);
134+
var directoryClient = await GetDirectoryClientAsync(configuration, fileSystemClient, subDirectory, ensureExists: false);
107135
if (!await directoryClient.ExistsAsync())
108136
{
109137
return null;
@@ -118,13 +146,37 @@ public async Task<PathProperties> GetFilePathProperties(IDataLakeJobData configu
118146
return await dataLakeFileClient.GetPropertiesAsync();
119147
}
120148

149+
private async Task<DataLakeDirectoryClient> GetDirectoryClientAsync(
150+
IDataLakeJobData configuration,
151+
DataLakeFileSystemClient fileSystemClient,
152+
string subDirectory,
153+
bool ensureExists)
154+
{
155+
var directory = GetDirectory(configuration);
156+
var directoryClient = fileSystemClient.GetDirectoryClient(directory);
157+
if (string.IsNullOrWhiteSpace(subDirectory))
158+
{
159+
return directoryClient;
160+
}
161+
162+
directoryClient = directoryClient.GetSubDirectoryClient(subDirectory);
163+
164+
if (ensureExists && !await directoryClient.ExistsAsync())
165+
{
166+
directoryClient = await fileSystemClient.CreateDirectoryAsync(directoryClient.Path);
167+
}
168+
169+
return directoryClient;
170+
}
171+
121172
private async Task<DataLakeFileSystemClient> GetFileSystemClientAsync(
122-
IDataLakeJobData configuration)
173+
IDataLakeJobData configuration,
174+
bool ensureExists)
123175
{
124176
var dataLakeServiceClient = GetDataLakeServiceClient(configuration);
125177
var fileSystemName = GetFileSystemName(configuration);
126178
var dataLakeFileSystemClient = dataLakeServiceClient.GetFileSystemClient(fileSystemName);
127-
if (!await dataLakeFileSystemClient.ExistsAsync())
179+
if (ensureExists && !await dataLakeFileSystemClient.ExistsAsync())
128180
{
129181
dataLakeFileSystemClient = await dataLakeServiceClient.CreateFileSystemAsync(fileSystemName);
130182
}

0 commit comments

Comments
 (0)