Skip to content

Commit 80eb0e0

Browse files
authored
Merge pull request #55 from JesseMckinzie/directory_capturing
Add directory capturing to filepattern
2 parents bb73de9 + c0922e6 commit 80eb0e0

File tree

7 files changed

+207
-10
lines changed

7 files changed

+207
-10
lines changed

docs/source/Examples.rst

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,45 @@ the output will be
365365
366366
['r', 'c']
367367
368+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
369+
Capturing directory names
370+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
371+
372+
Directory names can be also be captured when the the `recursive` option is set to `True`. In this case,
373+
the directory name can be treated the same as a variable in the `filepattern`. For example, if a directory has the
374+
structure:
375+
376+
.. code-block:: bash
377+
378+
data
379+
DAPI
380+
img_r001_c001.tif
381+
TXREAD
382+
img_r001_c001.tif
383+
GFP
384+
img_r001_c001.tif
385+
386+
Then the following `filepattern` will capture the directory names.
387+
388+
.. code-block:: python
389+
390+
path = '/path/to/data'
391+
392+
filepattern = '/{directory:c+}/img_r{r:ddd}_c{c:ddd}.tif'
393+
394+
files = fp.FilePattern(path, filepattern, recursive=True)
395+
396+
for file in files():
397+
print(file)
398+
399+
The output will be:
400+
401+
.. code-block:: bash
402+
403+
({'c': 1, 'directory': 'DAPI', 'r': 1}, ['path/to/data/img_r001_c001.tif'])
404+
({'c': 1, 'directory': 'GFP', 'r': 1}, ['path/to/data/img_r001_c001.tif'])
405+
({'c': 1, 'directory': 'TXREAD', 'r': 1}, ['path/to/data/img_r001_c001.tif'])
406+
368407
369408
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
370409
Pydantic models as return values
@@ -384,7 +423,7 @@ are dynamically created at runtime, allowing the fields of the model to be the v
384423
385424
pattern = "img_r{r:ddd}_c{c:ddd}_{channel:c+}.tif"
386425
387-
files = fp.FilePattern(filepath, pattern)
426+
files = fp.FilePattern(filepath, pattern, recursive=True)
388427
389428
for file in files(pydantic_output=True):
390429
print(file.r)

src/filepattern/cpp/internal/filepattern.cpp

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,21 @@ FilePatternObject::FilePatternObject(const string& path, const string& file_patt
2222

2323
} else {
2424

25-
this->setJustPath(false);
26-
this->setPath(path); // store path to target directory
27-
this->setFilePattern(file_pattern); // cast input string to regex
2825
this->recursive_ = recursive; // Iterate over subdirectories
2926

27+
// check if filepattern contains directory capturing
28+
if (file_pattern.find('/') != std::string::npos || file_pattern.find('\\') != std::string::npos) {
29+
this->setCaptureDirectoryNames(true);
30+
this->recursive_ = true; // need to be recursive to capture directory names
31+
32+
this->setFilePattern(s::escapeForwardSlashes(file_pattern));
33+
} else {
34+
this->setFilePattern(file_pattern); // cast input string to regex
35+
}
36+
37+
this->setJustPath(false);
38+
this->setPath(path); // store path to target directory
39+
3040
try {
3141
if(recursive){
3242
this->recursive_iterator_ = fs::recursive_directory_iterator(this->getPath());
@@ -86,29 +96,48 @@ void FilePatternObject::matchFilesOneDir(){
8696
}
8797

8898
void FilePatternObject::matchFilesMultDir(){
89-
// Iterate over every file in directory
99+
90100
regex pattern_regex = regex(this->getRegexFilePattern());
101+
91102
Tuple tup;
92103
smatch sm;
93104
string file, file_path;
105+
106+
bool is_pushed = false;
107+
94108
// Iterate over directories and subdirectories
95109
for (const auto& entry : this->recursive_iterator_) {
110+
96111
file_path = entry.path().string();
97-
replace(file_path.begin(), file_path.end(), '\\', '/');
98-
if(this->getJustPath()){
112+
113+
replace(file_path.begin(), file_path.end(), '\\', '/'); // escape slashes for regex
114+
115+
if(this->getJustPath() || this->captureDirectoryNames()){
99116
file = s::eraseSubStr(file_path, this->getPath());
100117
} else {
101118
file = s::getBaseName(file_path);
102119
}
103120

104121
if(regex_match(file, sm, pattern_regex)){
105-
if(this->getJustPath()) tup = getVariableMap(file_path, sm);
106-
else tup = getVariableMapMultDir(file_path, sm);
122+
123+
if(this->getJustPath() || this->captureDirectoryNames()) {
124+
tup = getVariableMap(file_path, sm);
125+
} else {
126+
tup = getVariableMapMultDir(file_path, sm);
127+
}
128+
107129
if(get<0>(tup).size() > 0){
108-
this->valid_files_.push_back(tup); // write to txt file
130+
this->valid_files_.push_back(tup);
131+
is_pushed = true;
132+
} else {
133+
is_pushed = false;
109134
}
110135
}
111136
}
137+
138+
if (!is_pushed && get<1>(tup).size() > 0) {
139+
this->valid_files_.push_back(tup);
140+
}
112141
}
113142

114143
void FilePatternObject::matchFiles() {

src/filepattern/cpp/pattern.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,14 @@ bool Pattern::getJustPath(){
760760
return this->just_path_;
761761
}
762762

763+
bool Pattern::captureDirectoryNames() {
764+
return this->capture_directory_names_;
765+
}
766+
767+
void Pattern::setCaptureDirectoryNames(bool capture) {
768+
this->capture_directory_names_ = capture;
769+
}
770+
763771
bool Pattern::getSuppressWarnings(){
764772
return this->suppress_warnings_;
765773
}

src/filepattern/cpp/pattern.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class Pattern : public PatternObject {
3030
//std::string path_;
3131
bool just_path_;
3232
bool suppress_warnings_;
33+
bool capture_directory_names_;
3334

3435
std::string VARIABLES_;
3536

@@ -49,6 +50,7 @@ class Pattern : public PatternObject {
4950
std::string getPath();
5051
bool getJustPath();
5152
bool getSuppressWarnings();
53+
void setCaptureDirectoryNames(bool capture);
5254

5355

5456
/**
@@ -236,4 +238,6 @@ class Pattern : public PatternObject {
236238
*/
237239
std::vector<std::string> getTmpDirs();
238240

241+
bool captureDirectoryNames();
242+
239243
};

src/filepattern/cpp/util/util.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ using Tuple = std::tuple<Map, std::vector<std::string>>;
4949
using Tuple = std::tuple<Map, std::vector<fs::path>>;
5050
#endif
5151

52+
5253
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
5354
static const std::string SLASH = "\\";
5455
#else
@@ -62,6 +63,20 @@ static const std::string SLASH = "/";
6263
*/
6364
namespace s {
6465

66+
inline std::string escapeForwardSlashes(const std::string& input) {
67+
std::string result;
68+
69+
for (char ch : input) {
70+
if (ch == '/') {
71+
result += "\\/";
72+
} else {
73+
result += ch;
74+
}
75+
}
76+
77+
return result;
78+
}
79+
6580
inline std::string escape_regex_characters(const std::string& str) {
6681

6782
const std::unordered_set<char> escape_chars = {'*', '?', '^', '$', '(', ')', '[', ']', '|', '\\'};

tests/test_filepattern.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,81 @@ def test_named_group_direcotry(self):
269269
basename = os.path.basename(mapping[1][0])
270270
for filepath in mapping[1]:
271271
assert basename == os.path.basename(filepath)
272+
273+
def test_recursive_directory_fp(self):
274+
275+
path = self.root_directory + '/test_data/recursive_data'
276+
277+
filepattern = '/{directory:c+}/img_r{r:ddd}_c{c:ddd}.tif'
278+
279+
files = fp.FilePattern(path, filepattern, recursive=True)
280+
281+
result = []
282+
283+
for file in files():
284+
result.append(file)
285+
286+
# test that same number of files are returned
287+
assert len(result) == len(fp_data.test_recursive_directory_fp)
288+
289+
# test that each variable and path are equal for each file in list
290+
for i in range(len(fp_data.test_recursive_directory_fp)):
291+
print(result[i])
292+
assert fp_data.test_recursive_directory_fp[i][0]["r"] == result[i][0]["r"]
293+
assert fp_data.test_recursive_directory_fp[i][0]["c"] == result[i][0]["c"]
294+
assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"]
295+
assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0])
296+
297+
def test_recursive_directory_regex_fp(self):
298+
299+
path = self.root_directory + '/test_data/recursive_data'
300+
301+
filepattern = '/(?P<directory>[a-zA-Z]+)/img_r{r:ddd}_c{c:ddd}.tif'
302+
303+
files = fp.FilePattern(path, filepattern, recursive=True)
272304

305+
result = []
306+
307+
for file in files():
308+
result.append(file)
309+
310+
# test that same number of files are returned
311+
assert len(result) == len(fp_data.test_recursive_directory_fp)
312+
313+
# test that each variable and path are equal for each file in list
314+
for i in range(len(fp_data.test_recursive_directory_fp)):
315+
print(result[i])
316+
assert fp_data.test_recursive_directory_fp[i][0]["r"] == result[i][0]["r"]
317+
assert fp_data.test_recursive_directory_fp[i][0]["c"] == result[i][0]["c"]
318+
assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"]
319+
assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0])
273320

274321

275322

323+
def test_recursive_multi_directory_regex_fp(self):
324+
325+
path = self.root_directory + '/test_data'
326+
327+
filepattern = '/.*/{directory:c+}/img_r{r:ddd}_c{c:ddd}.tif'
328+
329+
files = fp.FilePattern(path, filepattern, recursive=True)
330+
331+
result = []
332+
333+
for file in files():
334+
result.append(file)
335+
336+
# test that same number of files are returned
337+
assert len(result) == len(fp_data.test_recursive_directory_fp)
338+
339+
# test that each variable and path are equal for each file in list
340+
for i in range(len(fp_data.test_recursive_directory_fp)):
341+
print(result[i])
342+
assert fp_data.test_recursive_directory_fp[i][0]["r"] == result[i][0]["r"]
343+
assert fp_data.test_recursive_directory_fp[i][0]["c"] == result[i][0]["c"]
344+
assert fp_data.test_recursive_directory_fp[i][0]["directory"] == result[i][0]["directory"]
345+
assert str(os.path.basename(fp_data.test_recursive_directory_fp[i][1][0])) == os.path.basename(result[i][1][0])
346+
276347
# Todo: These tests need new data to be added after replacing the old version of filepattern.
277348
"""
278349
def test_group_by_multi(self):

tests/test_filepattern_data.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,3 +616,34 @@
616616
({'c': 2, 'dir': 'TXREAD', 'r': 2},
617617
['test_data/recursive_data/TXREAD/img_r002_c002.tif'])
618618
]
619+
620+
621+
test_recursive_directory_fp = [
622+
({'c': 0, 'directory': 'DAPI', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r000_c000.tif']),
623+
({'c': 1, 'directory': 'DAPI', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r000_c001.tif']),
624+
({'c': 2, 'directory': 'DAPI', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r000_c002.tif']),
625+
({'c': 0, 'directory': 'DAPI', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r001_c000.tif']),
626+
({'c': 1, 'directory': 'DAPI', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r001_c001.tif']),
627+
({'c': 2, 'directory': 'DAPI', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r001_c002.tif']),
628+
({'c': 0, 'directory': 'DAPI', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r002_c000.tif']),
629+
({'c': 1, 'directory': 'DAPI', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r002_c001.tif']),
630+
({'c': 2, 'directory': 'DAPI', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/DAPI/img_r002_c002.tif']),
631+
({'c': 0, 'directory': 'GFP', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r000_c000.tif']),
632+
({'c': 1, 'directory': 'GFP', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r000_c001.tif']),
633+
({'c': 2, 'directory': 'GFP', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r000_c002.tif']),
634+
({'c': 0, 'directory': 'GFP', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r001_c000.tif']),
635+
({'c': 1, 'directory': 'GFP', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r001_c001.tif']),
636+
({'c': 2, 'directory': 'GFP', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r001_c002.tif']),
637+
({'c': 0, 'directory': 'GFP', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r002_c000.tif']),
638+
({'c': 1, 'directory': 'GFP', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r002_c001.tif']),
639+
({'c': 2, 'directory': 'GFP', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/GFP/img_r002_c002.tif']),
640+
({'c': 0, 'directory': 'TXREAD', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r000_c000.tif']),
641+
({'c': 1, 'directory': 'TXREAD', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r000_c001.tif']),
642+
({'c': 2, 'directory': 'TXREAD', 'r': 0}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r000_c002.tif']),
643+
({'c': 0, 'directory': 'TXREAD', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r001_c000.tif']),
644+
({'c': 1, 'directory': 'TXREAD', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r001_c001.tif']),
645+
({'c': 2, 'directory': 'TXREAD', 'r': 1}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r001_c002.tif']),
646+
({'c': 0, 'directory': 'TXREAD', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r002_c000.tif']),
647+
({'c': 1, 'directory': 'TXREAD', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r002_c001.tif']),
648+
({'c': 2, 'directory': 'TXREAD', 'r': 2}, ['/Users/jmckinzie/Documents/GitHub/filepattern-1/tests/test_data/recursive_data/TXREAD/img_r002_c002.tif']),
649+
]

0 commit comments

Comments
 (0)