Skip to content

Commit d426842

Browse files
author
Niall Langley
committed
Refactored code with bug fixes and added more tests
1 parent 8c1c27f commit d426842

File tree

8 files changed

+149
-37
lines changed

8 files changed

+149
-37
lines changed

DataPipelineTools.Functions/DataLake/DataLakeConfigFactory.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,11 @@ private IEnumerable<Filter<DataLakeItem>> ParseFilters(HttpRequest req)
8282
var filters = req.Query.Keys
8383
.Where(k => k.StartsWith("filter[") && k.EndsWith("]"))
8484
// Clean up the column name by removing the filter[...] parts
85-
.Select(f => f[7..^1])
86-
.SelectMany(k => req.Query[k].Select(v => FilterFactory<DataLakeItem>.Create(k, v, _logger)))
85+
//.Select(f => f[7..^1])
86+
.SelectMany(k => req.Query[k].Select(v => FilterFactory<DataLakeItem>.Create(k[7..^1], v, _logger)))
8787
.Where(f => f != null);
8888

89-
return filters;
89+
return filters.ToArray();
9090
}
9191

9292
private dynamic GetRequestData(HttpRequest req)

DataPipelineTools.Tests/DataLake/DataLakeServiceTests/CheckPathAsyncTests.cs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,5 @@ public void Given_FilePathWithIncorrectCase_Should_ThrowWhenMultipleFilesMatch()
179179
var exception = Assert.CatchAsync(() => Sut.CheckPathAsync(testPath, false));
180180
Assert.That(exception, Is.TypeOf(typeof(Exception)));
181181
}
182-
183-
184-
//[Test]
185-
//public void Given__Should_()
186-
//{
187-
// var testPath = "some/invalid/path";
188-
// var resultPath = Sut.CheckPathAsync(testPath, true).Result;
189-
190-
// Assert.That(null, Is.EqualTo(resultPath));
191-
//}
192-
193182
}
194183
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Moq;
5+
using Newtonsoft.Json;
6+
using Newtonsoft.Json.Linq;
7+
using NUnit.Framework;
8+
using SqlCollaborative.Azure.DataPipelineTools.DataLake;
9+
using SqlCollaborative.Azure.DataPipelineTools.DataLake.Model;
10+
11+
namespace DataPipelineTools.Tests.DataLake.DataLakeServiceTests
12+
{
13+
[TestFixture]
14+
public class GetItemsAsyncTests : DataLakeTestBase
15+
{
16+
17+
protected readonly DataLakeService Sut;
18+
private DataLakeConfig DatalakeConfig => new DataLakeConfig();
19+
20+
public GetItemsAsyncTests()
21+
{
22+
// Use the factory to inject the mock logger to get the mock client...
23+
var factory = new DataLakeServiceFactory(MockLogger.Object);
24+
Sut = factory.CreateDataLakeService(MockFileSystemClient.Object);
25+
}
26+
27+
[SetUp]
28+
public void Setup()
29+
{
30+
// Reset the logger for each test, and add a setup to the moq to write log entries to the console so they are captured
31+
// as additional output in the test result
32+
MockLogger.Reset();
33+
SetupConsoleLogging();
34+
}
35+
36+
[Test]
37+
public void Given_ValidDirectoryPath_Should_ReturnContents()
38+
{
39+
var itemsConfig = new DataLakeGetItemsConfig
40+
{
41+
Directory = "raw/api/feb"
42+
};
43+
44+
var result = Sut.GetItemsAsync(DatalakeConfig, itemsConfig).Result;
45+
46+
Assert.That(result.Count, Is.EqualTo(2));
47+
}
48+
49+
[Test]
50+
public void Given_DirectoryPathWithIncorrectCase_Should_ReturnContentsForCorrectedPath()
51+
{
52+
var itemsConfig = new DataLakeGetItemsConfig
53+
{
54+
Directory = "raw/aPi/feb"
55+
};
56+
57+
58+
var r = Sut.CheckPathAsync("raw/Api/jan/delta_extract_1.json", false).Result;
59+
60+
var result = Sut.GetItemsAsync(DatalakeConfig, itemsConfig).Result;
61+
62+
Assert.That(result.ContainsKey("fileCount"), Is.True);
63+
Assert.That(result.ContainsKey("files"), Is.True);
64+
65+
Assert.That( (int)result.Property("fileCount").Value, Is.EqualTo(2) );
66+
67+
var s = result.ToObject<GetItemsResponse>();
68+
var responseObject = JsonConvert.DeserializeObject<GetItemsResponse>(result.ToString());
69+
70+
var files = result.Property("files");
71+
var itemsInfo = JsonConvert.DeserializeObject<List<DataLakeItem>>(files.ToString());
72+
Assert.That(itemsInfo, Is.EqualTo(2));
73+
//Assert.That(itemsInfo.Count(x => x.FullPath == "raw/api/feb"), Is.True);
74+
//Assert.That(itemsInfo.Count(x => x.FullPath == "raw/api/feb/delta_extract_3.json"), Is.True);
75+
}
76+
77+
//[Test]
78+
//public void Given_ValidDirectoryPath_Should_ReturnDirectoryPath()
79+
//{
80+
// var testPath = "raw/database";
81+
// var resultPath = Sut.CheckPathAsync(testPath, true).Result;
82+
83+
// Assert.That(resultPath, Is.EqualTo(testPath));
84+
//}
85+
86+
}
87+
88+
public class GetItemsResponse
89+
{
90+
public int fileCount { get; set; }
91+
public string correctedFilePath { get; set; }
92+
public List<DataLakeItem> files { get; set; }
93+
}
94+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Name,IsDirectory,ContentLength,LastModified
2+
raw/database/jan/extract_1.csv,FALSE,10,02/01/2021 13:00
3+
raw/database/feb/extract_2.csv,FALSE,20,03/01/2021 13:00
4+
raw/database/feb/EXTRACT_2.csv,FALSE,25,03/01/2021 13:00
5+
raw/api/jan/delta_extract_1.json,FALSE,10,01/01/2021 14:00
6+
raw/api/jan/delta_extract_2.json,FALSE,20,02/01/2021 14:00
7+
raw/api/feb/delta_extract_3.json,FALSE,30,03/01/2021 14:00
8+
raw/API/jan/delta_extract_1.json,FALSE,10,01/01/2021 15:00

DataPipelineTools.Tests/DataPipelineTools.Tests.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
<None Update="DataLake\DataLakeServiceTests\CheckPathAsyncTests_Data_PathItem.csv">
2323
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
2424
</None>
25+
<None Update="DataLake\DataLakeServiceTests\GetItemsAsyncTests_Data_PathItem.csv">
26+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
27+
</None>
2528
</ItemGroup>
2629

2730
</Project>

DataPipelineTools/Common/FilterFactory.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ public static Filter<T> Create(string columnName, string filter, ILogger log)
9292
DateTime dateTimeVal;
9393
isValueParseable = DateTime.TryParse(val, out dateTimeVal);
9494
break;
95+
case nameof(DateTimeOffset):
96+
DateTimeOffset dateTimeOffsetVal;
97+
isValueParseable = DateTimeOffset.TryParse(val, out dateTimeOffsetVal);
98+
break;
9599
}
96100

97101
var parseError = isValueParseable ? null : $"The filter '{val}' cannot be applied to the property '{columnName}' as it cannot be cast to a '{propertyType}'";

DataPipelineTools/DataLake/DataLakeService.cs

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,37 +47,51 @@ public async Task<string> CheckPathAsync(string path, bool isDirectory)
4747

4848
// If the directory does not exist, we find it
4949
string validDirectory = null;
50+
IEnumerable<string> validDirectories = null;
5051
if (!await _client.GetDirectoryClient(path).ExistsAsync())
5152
{
52-
var directoryParts = directoryPath.Split('/');
53-
foreach (var directoryPart in directoryParts)
54-
{
55-
var searchItem = directoryPart;
56-
var validPaths = MatchPathItemsCaseInsensitive(validDirectory, searchItem, true);
57-
58-
if (validPaths.Count == 0)
59-
return null;
60-
else if (validPaths.Count > 1)
61-
throw new Exception("Multiple paths matched with case insensitive compare.");
62-
63-
validDirectory = validPaths[0];
64-
}
53+
var validPaths = MatchPaths(null, true,directoryPath.Split('/')).ToList();
54+
if (validPaths.Count == 0)
55+
return null;
56+
if (validPaths.Count > 1 && isDirectory)
57+
throw new Exception("Multiple directories matched with case insensitive compare.");
58+
59+
validDirectory = validPaths[0];
60+
validDirectories = validPaths;
6561
}
6662

63+
6764
if (isDirectory)
6865
return validDirectory;
6966

7067
// Now check if the file exists using the corrected directory, and if not find a match...
71-
var testFilePath = $"{validDirectory ?? ""}/{filename}".TrimStart('/');
72-
if (_client.GetFileClient(testFilePath).Exists())
73-
return testFilePath;
68+
var files = validDirectories.SelectMany(x => MatchPaths(x, false, filename)).ToList();
7469

75-
var files = MatchPathItemsCaseInsensitive(validDirectory, filename, false);
7670
if (files.Count > 1)
77-
throw new Exception("Multiple paths matched with case insensitive compare.");
71+
throw new Exception("Multiple files matched with case insensitive compare.");
7872
return files.FirstOrDefault();
7973
}
8074

75+
private IEnumerable<string> MatchPaths(string basePath, bool directoriesOnly, params string[] directoryParts)
76+
{
77+
if (directoryParts == null)
78+
return null;
79+
80+
if (directoryParts.Count() == 0)
81+
return new []{ basePath };
82+
83+
var matchedDirectories = MatchPathItemsCaseInsensitive(basePath, directoryParts.First(), directoriesOnly);
84+
var matchedChildDirectories = new List<string>();
85+
86+
foreach (var directory in matchedDirectories)
87+
{
88+
var childDirectories = MatchPaths(directory, true, directoryParts.Skip(1).ToArray());
89+
matchedChildDirectories.AddRange(childDirectories);
90+
}
91+
92+
return matchedChildDirectories;
93+
}
94+
8195
private IList<string> MatchPathItemsCaseInsensitive(string basePath, string searchItem, bool isDirectory)
8296
{
8397
var paths = _client.GetPaths(basePath).ToList();
@@ -94,7 +108,7 @@ await CheckPathAsync(getItemsConfig.Directory, true) :
94108
getItemsConfig.Directory;
95109

96110
if (!_client.GetDirectoryClient(directory).Exists())
97-
throw new DirectoryNotFoundException("Directory '{directory} could not be found'");
111+
throw new DirectoryNotFoundException($"Directory '{directory} could not be found'");
98112

99113
var paths = _client
100114
.GetPaths(path: directory ?? string.Empty, recursive: getItemsConfig.Recursive)

DataPipelineTools/DataLake/Model/DataLakeGetItemsConfig.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ namespace SqlCollaborative.Azure.DataPipelineTools.DataLake.Model
66
public class DataLakeGetItemsConfig
77
{
88
public string Directory { get; set; }
9-
public bool IgnoreDirectoryCase { get; set; }
10-
public bool Recursive { get; set; }
9+
public bool IgnoreDirectoryCase { get; set; } = true;
10+
public bool Recursive { get; set; } = true;
1111
public string OrderByColumn { get; set; }
1212
public bool OrderByDescending { get; set; }
13-
public int Limit { get; set; }
14-
public IEnumerable<Filter<DataLakeItem>> Filters { get; set; }
13+
public int Limit { get; set; } = 0;
14+
public IEnumerable<Filter<DataLakeItem>> Filters { get; set; } = new Filter<DataLakeItem>[0];
1515
}
1616
}

0 commit comments

Comments
 (0)