Skip to content

Commit 456af71

Browse files
authored
Merge pull request #82 from JesseMckinzie/array_input_v2
Add list of strings as an input to the FilePattern constructor
2 parents a16a1bf + d71aee4 commit 456af71

File tree

15 files changed

+402
-22
lines changed

15 files changed

+402
-22
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ option(RUN_GTEST "Downloads google unit test API and runs google test scripts to
1414

1515

1616
if(BUILD_PYTHON_LIB)
17-
find_package(pybind11 CONFIG REQUIRED)
17+
find_package(pybind11 CONFIG REQUIRED)
1818
pybind11_add_module(backend
1919
src/filepattern/cpp/bindings.cpp
2020
)

src/filepattern/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from .filepattern import FilePattern
55

6-
from .functions import infer_pattern, get_regex
6+
from .functions import infer_pattern, get_regex, get_variables
77

88
__all__ = ["FilePattern", "infer_pattern", "get_regex"]
99
from . import _version

src/filepattern/cpp/bindings.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <pybind11/operators.h>
44
#include <pybind11/complex.h>
55
#include <pybind11/stl/filesystem.h>
6+
#include <string>
67

78
#include "pattern_object.hpp"
89

@@ -17,7 +18,15 @@ namespace py = pybind11;
1718
PYBIND11_MODULE(backend, m){
1819

1920
py::class_<FilePattern>(m, "FilePattern")
20-
.def(py::init<const std::string &, const std::string &, const std::string&, bool, bool>())
21+
.def(py::init<const std::string&,
22+
const std::string&,
23+
const std::string&,
24+
bool,
25+
bool>())
26+
.def(py::init<const std::vector<std::string>&,
27+
const std::string&,
28+
bool,
29+
bool>())
2130
.def("getMatching", &FilePattern::getMatching)
2231
.def("getOccurrences", &FilePattern::getOccurrences)
2332
.def("getUniqueValues", &FilePattern::getUniqueValues)
@@ -34,6 +43,7 @@ PYBIND11_MODULE(backend, m){
3443
.def("setGroupStr", py::overload_cast<std::string&>(&FilePattern::setGroup))
3544
.def("length", &FilePattern::length)
3645
.def_static("getRegex", &FilePattern::getRegex)
46+
.def_static("getVariablesFromPattern", &FilePattern::getVariablesFromPattern)
3747
.def_static("inferPattern", py::overload_cast<const std::string&, std::string&, const std::string&>(&FilePattern::inferPattern))
3848
.def_static("inferPattern", py::overload_cast<std::vector<std::string>&, std::string&>(&FilePattern::inferPattern))
3949
.def("isGrouped", [](FilePattern &v){

src/filepattern/cpp/include/filepattern.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,16 @@ using Tuple = std::tuple<Map, std::vector<std::string>>;
3232
using Tuple = std::tuple<Map, std::vector<fs::path>>;
3333
#endif
3434

35+
3536
class PatternObject; // forward declaration
3637
class FILEPATTERN_EXPORT FilePattern {
3738

3839
public:
3940

4041
FilePattern(const std::string& path, const std::string& filePattern="", const std::string& block_size="", bool recursive=false, bool suppressWarnings=false);
4142

43+
FilePattern(const std::vector<std::string>& file_array, const std::string& filePattern, bool recursive=false, bool suppressWarnings=false);
44+
4245
~FilePattern();
4346

4447
std::vector<Tuple> getMatchingByMap (Map& variables);
@@ -101,10 +104,12 @@ class FILEPATTERN_EXPORT FilePattern {
101104

102105
static std::string getRegex(std::string filepattern, bool suppress_warnings);
103106

107+
static std::vector<std::string> getVariablesFromPattern(std::string& filepattern, bool supress_warnings);
108+
104109
std::string getPattern();
105110
void setPattern(std::string& pattern);
106111
std::string getPath();
107112
const std::unique_ptr<PatternObject>& getPatternObject() const;
108-
private:
113+
private:
109114
std::unique_ptr<PatternObject> fp_;
110115
};

src/filepattern/cpp/interface/filepattern.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#include "../include/filepattern.h"
2+
#include "../pattern.hpp"
23
#include "filepattern_factory.h"
34

5+
#include <tuple>
6+
47
FilePattern::FilePattern(const std::string& path, const std::string& filePattern, const std::string& block_size, bool recursive, bool suppressWarnings) {
58

69
FilePatternFactory fpf = FilePatternFactory();
@@ -14,6 +17,17 @@ FilePattern::FilePattern(const std::string& path, const std::string& filePattern
1417
}
1518

1619
}
20+
21+
FilePattern::FilePattern(const std::vector<std::string>& file_array, const std::string& filePattern, bool recursive, bool suppressWarnings) {
22+
23+
FilePatternFactory fpf = FilePatternFactory();
24+
25+
this->fp_ = std::unique_ptr<PatternObject>(fpf.getObject(file_array, filePattern, suppressWarnings));
26+
27+
this->fp_->external = false;
28+
29+
}
30+
1731
FilePattern::~FilePattern() {
1832
this->fp_.reset();
1933
}
@@ -120,6 +134,7 @@ std::string FilePattern::inferPattern(const std::string& path, std::string& vari
120134

121135
// create dummy object to avoid the need for static methods in virtual class
122136
std::unique_ptr<PatternObject> fp;
137+
123138
if (block_size == "") {
124139
fp = std::unique_ptr<PatternObject>(fpf.getObject(path, "", block_size, false, true));
125140
} else {
@@ -189,3 +204,7 @@ std::pair<std::vector<std::pair<std::string, Types>> , std::vector<Tuple>> FileP
189204
std::string FilePattern::getRegex(std::string filepattern, bool suppress_warnings) {
190205
return std::get<0>(Pattern::getRegex(filepattern, suppress_warnings));
191206
}
207+
208+
std::vector<std::string> FilePattern::getVariablesFromPattern(std::string& filepattern, bool supress_warnings) {
209+
return std::get<1>(Pattern::getRegex(filepattern, supress_warnings));
210+
}

src/filepattern/cpp/interface/filepattern_factory.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,28 @@
1010
#include "../util/vector_parser.hpp"
1111

1212

13-
1413
class FilePatternFactory {
1514

1615
public:
1716

1817
FilePatternFactory() {}
1918

20-
std::unique_ptr<PatternObject> getObject(const std::string& path, const std::string& file_pattern, const std::string& block_size, bool recursive, bool suppressWarnings) {
19+
std::unique_ptr<PatternObject> getObject(
20+
const std::vector<std::string>& file_array,
21+
const std::string& file_pattern,
22+
bool suppressWarnings) {
23+
24+
return std::make_unique<ArrayPattern>(file_array, file_pattern, suppressWarnings);
25+
26+
}
27+
28+
std::unique_ptr<PatternObject> getObject(
29+
const std::string& path,
30+
const std::string& file_pattern,
31+
const std::string& block_size,
32+
bool recursive,
33+
bool suppressWarnings) {
34+
2135
if (block_size == "") {
2236
if(fs::is_regular_file(path)) {
2337
std::ifstream infile(path);

src/filepattern/cpp/internal/filepattern.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,51 @@ void FilePatternObject::matchFiles() {
143143
this->matchFilesOneDir();
144144
}
145145
}
146+
147+
ArrayPattern::ArrayPattern(const std::vector<std::string>& file_array, const std::string& pattern, bool suppress_warnings){
148+
149+
this->setSuppressWarnings(suppress_warnings);
150+
151+
this->setFilePattern(pattern);
152+
this->setRegexFilePattern(""); // Regex version of pattern
153+
154+
this->matchFiles(file_array);
155+
156+
this->sortFiles();
157+
};
158+
159+
void ArrayPattern::matchFiles(const std::vector<std::string>& file_array) {
160+
161+
filePatternToRegex(); // Get regex of filepattern
162+
163+
Map mapping;
164+
std::vector<std::string> parsed_regex;
165+
166+
std::string s;
167+
std::string file, file_path;
168+
Tuple member;
169+
// Iterate over every file in directory
170+
171+
// check if bracket expression was not properly parsed
172+
if (this->getRegexFilePattern().find('{') != std::string::npos || this->getRegexFilePattern().find('}') != std::string::npos) {
173+
auto start = this->getRegexFilePattern().find('{');
174+
auto end = this->getRegexFilePattern().find('}');
175+
auto length = end - start;
176+
throw std::invalid_argument("Invalid pattern found in bracket expressions in filepattern: \"" + this->getRegexFilePattern().substr(start, length+1) + "\"");
177+
}
178+
179+
std::regex pattern_regex = std::regex(this->getRegexFilePattern());
180+
std::smatch sm;
181+
182+
// iterate over file array and add valid files
183+
for (const auto& path : file_array) {
184+
// Get the current file
185+
auto file_path = path;
186+
std::replace(file_path.begin(), file_path.end(), '\\', '/');
187+
file = s::getBaseName(file_path);
188+
189+
if(regex_match(file, sm, pattern_regex)){
190+
this->valid_files_.push_back(getVariableMap(file_path, sm)); // write to txt file
191+
}
192+
}
193+
};

src/filepattern/cpp/internal/filepattern.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,10 @@ class FilePatternObject : public InternalPattern {
6060
void matchFilesMultDir();
6161

6262
};
63+
64+
class ArrayPattern : public InternalPattern {
65+
public:
66+
ArrayPattern(const std::vector<std::string>& file_array, const std::string& pattern, bool suppress_warnings);
67+
68+
void matchFiles(const std::vector<std::string>& file_array);
69+
};

src/filepattern/cpp/util/sort.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class ExternalMergeSort {
7373
std::string tmpdir_; // Create a directory to store temporary files
7474
std::string sort_variable_; // Variable to sort by if std_map
7575
std::string block_size_str_; // String input of blocksize
76-
int map_size_; // Number of variables in the map
76+
long unsigned int map_size_; // Number of variables in the map
7777
Map temp_map_;
7878

7979
/**

src/filepattern/filepattern.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Dict, List, Tuple, Union, Set, Any
44
import os
55
from .pydantic_filepattern import create_pydantic_fp, get_pydantic_fp
6+
from pathlib import Path
67

78

89
class PatternObject:
@@ -383,7 +384,7 @@ class FilePattern(PatternObject):
383384

384385
def __init__(
385386
self,
386-
path: str,
387+
input: Union[str, Path, list] ="",
387388
pattern: str = "",
388389
block_size: str = "",
389390
recursive: bool = False,
@@ -400,16 +401,20 @@ def __init__(
400401
the names of the channel subdirectories will be captured for each file.
401402
402403
Args:
403-
path: Path to directory or text file
404+
input: Path to directory or text file or a list of strings to be matched to the filepattern
404405
pattern: Pattern to compare each filename to
405406
block_size: Maximum amount of RAM to consume at once. Defaults to "".
406407
recursive: Iterate over subdirectories. Defaults to False.
407408
suppress_warnings: True to suppress warning printed to console. Defaults to False.
408409
"""
409410

410-
path = str(path) # change path type to string to support pathlib paths
411-
412-
self._file_pattern = backend.FilePattern(path, pattern, block_size, recursive, suppress_warnings)
411+
if (isinstance(input, list)):
412+
self._file_pattern = backend.FilePattern(input, pattern, recursive, suppress_warnings)
413+
elif (isinstance(input, str) or isinstance(input, Path)):
414+
input = str(input) # change path type to string to support pathlib paths
415+
self._file_pattern = backend.FilePattern(input, pattern, block_size, recursive, suppress_warnings)
416+
else:
417+
raise TypeError("Error: input type must either be a string/path to a file or directory or a list of strings")
413418

414419
super().__init__(self._file_pattern, block_size)
415420

0 commit comments

Comments
 (0)