Skip to content

Commit 828910e

Browse files
author
Niall Langley
committed
Fixed bug where an incorrectly cased directory in a file path matched two or more directories, and threw an exception even if only a single file within those directories matched. Now all case insensitive matched directories are searched when checking a file path, and an error is only thrown if more than one file matches.
1 parent d426842 commit 828910e

File tree

6 files changed

+33
-47
lines changed

6 files changed

+33
-47
lines changed

DataPipelineTools.Tests/Common/FilterFactoryTests/CreateTests.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ public void Given_ValueDoesNotCastToNamedColumnType_Should_LogWarningOnce(string
129129
{nameof(TestPoco.Int64Prop), "42"},
130130
{nameof(TestPoco.DoubleProp), "42.1"},
131131
{nameof(TestPoco.DecimalProp), "42.1"},
132-
{ nameof(TestPoco.DateTimeProp), "2021-01-01T12:00:00"}
132+
{nameof(TestPoco.DateTimeProp), "2021-01-01T12:00:00"},
133+
{nameof(TestPoco.DateTimeOffsetProp), "2021-01-01T12:00:00"}
133134
};
134135
private static readonly string[] SimpleFilterTypes = {"eq", "ne", "lt", "gt", "le", "ge" };
135136

DataPipelineTools.Tests/DataLake/DataLakeServiceTests/CheckPathAsyncTests.cs

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,14 @@ public void Given_ValidDirectoryPath_Should_ReturnDirectoryPath()
3636
Assert.That(resultPath, Is.EqualTo(testPath));
3737
}
3838

39-
[Test]
40-
public void Given_DirectoryPathWithIncorrectCase_Should_ReturnCorrectedDirectoryPath()
39+
[TestCase("raw/database/JAN")]
40+
[TestCase("raw/DataBase/jan")]
41+
[TestCase("RAW/database/jan")]
42+
public void Given_DirectoryPathWithIncorrectCase_Should_ReturnCorrectedDirectoryPath(string testPath)
4143
{
42-
var testPath = "raw/DATABASE";
4344
var resultPath = Sut.CheckPathAsync(testPath, true).Result;
4445

45-
Assert.That(resultPath, Is.EqualTo("raw/database"));
46+
Assert.That(resultPath, Is.EqualTo("raw/database/jan"));
4647
}
4748

4849
[Test]
@@ -64,10 +65,11 @@ public void Given_FilePathWithIncorrectFilenameCase_Should_ReturnCorrectedFilePa
6465
}
6566

6667

67-
[Test]
68-
public void Given_FilePathWithIncorrectDirectoryCase_Should_ReturnCorrectedFilePath()
68+
[TestCase("raw/database/JAN/extract_1.csv")]
69+
[TestCase("raw/DataBase/jan/extract_1.csv")]
70+
[TestCase("RAW/database/jan/extract_1.csv")]
71+
public void Given_FilePathWithIncorrectDirectoryCase_Should_ReturnCorrectedFilePath(string testPath)
6972
{
70-
var testPath = "raw/database/JAN/extract_1.csv";
7173
var resultPath = Sut.CheckPathAsync(testPath, false).Result;
7274

7375
Assert.That(resultPath, Is.EqualTo("raw/database/jan/extract_1.csv"));
@@ -109,11 +111,10 @@ public void Given_InvalidFilePath_Should_ReturnNull()
109111
Assert.That(resultPath, Is.EqualTo(null));
110112
}
111113

112-
[Test]
113-
public void Given_DirectoryPathWithIncorrectCase_When_MatchesMultiplePaths_Should_Throw()
114+
[TestCase("raw/aPi")]
115+
[TestCase("RAW/api")]
116+
public void Given_DirectoryPathWithIncorrectCase_When_MatchesMultiplePaths_Should_Throw(string testPath)
114117
{
115-
var testPath = "raw/aPi";
116-
117118
Assert.CatchAsync(() => Sut.CheckPathAsync(testPath, true));
118119
}
119120

@@ -153,29 +154,23 @@ public void Given_ForwardSlashPath_Should_Return_EmptyString()
153154
Assert.That(resultPath, Is.EqualTo(string.Empty));
154155
}
155156

156-
[Test]
157-
public void Given_DirectoryPathWithIncorrectCase_Should_ThrowWhenMultipleDirectoriesMatch()
157+
[TestCase("RaW/api/jan")]
158+
[TestCase("raw/ApI/jan")]
159+
[TestCase("raw/api/JaN")]
160+
public void Given_DirectoryPathWithIncorrectCase_Should_ThrowWhenMultipleDirectoriesMatch(string testPath)
158161
{
159-
var testPath = "RaW/api/jan";
160-
161162
var exception = Assert.CatchAsync(() => Sut.CheckPathAsync(testPath, true));
162163
Assert.That(exception, Is.TypeOf(typeof(Exception)));
163164
}
164165

165-
[Test]
166-
public void Given_FilePathWithIncorrectCase_Should_ThrowWhenMultipleDirectoriesMatch()
166+
[TestCase("RaW/api/jan/delta_extract_1.json")]
167+
[TestCase("raw/ApI/jan/delta_extract_1.json")]
168+
[TestCase("raw/api/JaN/delta_extract_1.json")]
169+
[TestCase("raw/api/jan/delta_EXTRACT_1.json")]
170+
[TestCase("raw/DataBase/feb/extract_2.csv")]
171+
[TestCase("raw/database/feb/Extract_2.csv")]
172+
public void Given_PathWithIncorrectCase_Should_ThrowWhenMultipleFilesMatch(string testPath)
167173
{
168-
var testPath = "RaW/api/jan/delta_extract_1.json";
169-
170-
var exception = Assert.CatchAsync(() => Sut.CheckPathAsync(testPath, false));
171-
Assert.That(exception, Is.TypeOf(typeof(Exception)));
172-
}
173-
174-
[Test]
175-
public void Given_FilePathWithIncorrectCase_Should_ThrowWhenMultipleFilesMatch()
176-
{
177-
var testPath = "raw/database/feb/Extract_2.csv";
178-
179174
var exception = Assert.CatchAsync(() => Sut.CheckPathAsync(testPath, false));
180175
Assert.That(exception, Is.TypeOf(typeof(Exception)));
181176
}

DataPipelineTools.Tests/DataLake/DataLakeServiceTests/GetItemsAsyncTests.cs

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,11 @@ public void Given_DirectoryPathWithIncorrectCase_Should_ReturnContentsForCorrect
5454
Directory = "raw/aPi/feb"
5555
};
5656

57+
var result = Sut.GetItemsAsync(DatalakeConfig, itemsConfig).Result.ToObject<GetItemsResponse>();
5758

58-
var r = Sut.CheckPathAsync("raw/Api/jan/delta_extract_1.json", false).Result;
59-
60-
var result = Sut.GetItemsAsync(DatalakeConfig, itemsConfig).Result;
61-
62-
Assert.That(result.ContainsKey("fileCount"), Is.True);
63-
Assert.That(result.ContainsKey("files"), Is.True);
64-
65-
Assert.That( (int)result.Property("fileCount").Value, Is.EqualTo(2) );
66-
67-
var s = result.ToObject<GetItemsResponse>();
68-
var responseObject = JsonConvert.DeserializeObject<GetItemsResponse>(result.ToString());
69-
70-
var files = result.Property("files");
71-
var itemsInfo = JsonConvert.DeserializeObject<List<DataLakeItem>>(files.ToString());
72-
Assert.That(itemsInfo, Is.EqualTo(2));
73-
//Assert.That(itemsInfo.Count(x => x.FullPath == "raw/api/feb"), Is.True);
74-
//Assert.That(itemsInfo.Count(x => x.FullPath == "raw/api/feb/delta_extract_3.json"), Is.True);
59+
Assert.That(result.fileCount, Is.EqualTo(1));
60+
Assert.That(result.files.Count, Is.EqualTo(1));
61+
Assert.That(result.files.Count(x => x.FullPath == "raw/api/feb/delta_extract_3.json"), Is.EqualTo(1));
7562
}
7663

7764
//[Test]

DataPipelineTools.Tests/DataLake/DataLakeTestBase.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ protected Mock<DataLakeFileSystemClient> BuildMockDataLakeFileSystemClient()
5454
var pathLength = 1 + (path?.Length ?? 0);
5555
var items = TestData
5656
// Include all files starting with the test path, or root paths if the test path is null
57-
.Where(x => x.Name.StartsWith(path ?? string.Empty))// || (path == null && !x.Name.Contains('/')))
57+
.Where(x => x.Name.StartsWith(path ?? string.Empty) && x.Name != path)
5858
// Still include them if the recursive flag is set, otherwise check if the relative path after the search path contains
5959
// directory separator to exclude sub dirs
6060
.Where(x => recursive || !x.Name.Substring(pathLength > x.Name.Length ? x.Name.Length : pathLength).Contains('/'))

DataPipelineTools.Tests/TestPoco.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public class TestPoco
1212
public decimal DecimalProp { get; set; }
1313
public double DoubleProp { get; set; }
1414
public DateTime DateTimeProp { get; set; }
15+
public DateTimeOffset DateTimeOffsetProp { get; set; }
1516
public object ObjectProp { get; set; }
1617
}
1718
}

DataPipelineTools/DataLake/DataLakeService.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ private IList<string> MatchPathItemsCaseInsensitive(string basePath, string sear
103103

104104
public async Task<JObject> GetItemsAsync(DataLakeConfig dataLakeConfig, DataLakeGetItemsConfig getItemsConfig)
105105
{
106+
// Check the directory exists. If multiple directories match (ie different casing), it will throw an error, as we don't know
107+
// which one we wanted the files from.
106108
var directory = getItemsConfig.IgnoreDirectoryCase ?
107109
await CheckPathAsync(getItemsConfig.Directory, true) :
108110
getItemsConfig.Directory;

0 commit comments

Comments
 (0)