Skip to content

Commit 21e20d2

Browse files
authored
Merge pull request #85 from JesseMckinzie/sort_flag
Add flag to avoid sorting captured files
2 parents fbbe090 + 3829f3b commit 21e20d2

26 files changed

+160
-84
lines changed

.github/workflows/build_and_test_mac.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,18 @@ env:
1212
jobs:
1313
Build_and_Run_GTest:
1414
name: Build and Run GoogleTest
15-
runs-on: macos-12
15+
runs-on: macos-13
1616
defaults:
1717
run:
1818
shell: bash -l {0}
1919

2020
steps:
2121
- uses: actions/checkout@v3
2222

23-
- uses: conda-incubator/setup-miniconda@v2
23+
- uses: conda-incubator/setup-miniconda@v3
2424
with:
2525
activate-environment: anaconda-client-env
26+
miniconda-version: "latest"
2627
python-version: 3.8
2728
channels: conda-forge
2829
auto-activate-base: false
@@ -45,18 +46,19 @@ jobs:
4546

4647
Build_and_Run_PyTest:
4748
name: Build and Run PyTest
48-
runs-on: macos-12
49+
runs-on: macos-13
4950
defaults:
5051
run:
5152
shell: bash -l {0}
5253

5354
steps:
5455
- uses: actions/checkout@v3
5556

56-
- uses: conda-incubator/setup-miniconda@v2
57+
- uses: conda-incubator/setup-miniconda@v3
5758
with:
5859
activate-environment: anaconda-client-env
5960
python-version: 3.8
61+
miniconda-version: "latest"
6062
channels: conda-forge
6163
auto-activate-base: false
6264

.github/workflows/build_wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
MACOSX_DEPLOYMENT_TARGET: "10.15"
1313
strategy:
1414
matrix:
15-
os: [ubuntu-20.04, macos-12, windows-latest]
15+
os: [ubuntu-20.04, macos-13, windows-latest]
1616
cibw_archs: ["auto64"]
1717
cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*"]
1818

.github/workflows/publish_pypi.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
MACOSX_DEPLOYMENT_TARGET: "10.15"
1515
strategy:
1616
matrix:
17-
os: [ubuntu-20.04, macos-12, windows-latest]
17+
os: [ubuntu-20.04, macos-13, windows-latest]
1818
cibw_archs: ["auto64"]
1919
cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*"]
2020

src/filepattern/cpp/bindings.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ PYBIND11_MODULE(backend, m){
2222
const std::string&,
2323
const std::string&,
2424
bool,
25+
bool,
2526
bool>())
2627
.def(py::init<const std::vector<std::string>&,
2728
const std::string&,
2829
bool,
30+
bool,
2931
bool>())
3032
.def("getMatching", &FilePattern::getMatching)
3133
.def("getOccurrences", &FilePattern::getOccurrences)

src/filepattern/cpp/external/external_filepattern.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using namespace std;
44

5-
ExternalFilePattern::ExternalFilePattern(const string& path, const string& filePattern, const string& block_size, bool recursive, bool suppressWarnings):
5+
ExternalFilePattern::ExternalFilePattern(const string& path, const string& filePattern, const string& block_size, bool recursive, bool suppressWarnings, bool sorted):
66
ExternalPattern(path, block_size, recursive) {
77

88
this->setSuppressWarnings(suppressWarnings);
@@ -23,13 +23,17 @@ ExternalPattern(path, block_size, recursive) {
2323
this->setFirstCall(true); // first call to next() has not occurred
2424

2525
this->matchFiles(); // match files to pattern
26-
27-
ExternalMergeSort sort = ExternalMergeSort(std_map,
28-
this->getValidFilesPath(),
29-
this->getValidFilesPath(),
30-
this->stream_.getBlockSizeStr(),
31-
"",
32-
this->stream_.map_size_);
26+
27+
this->setIsSorted(sorted);
28+
29+
if (isSorted()) {
30+
ExternalMergeSort sort = ExternalMergeSort(std_map,
31+
this->getValidFilesPath(),
32+
this->getValidFilesPath(),
33+
this->stream_.getBlockSizeStr(),
34+
"",
35+
this->stream_.map_size_);
36+
}
3337

3438
this->group_stream_.open(this->stream_.getValidFilesPath());
3539
this->infile_.open(this->getValidFilesPath()); // open temp file for the valid files

src/filepattern/cpp/external/external_filepattern.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class ExternalFilePattern : public ExternalPattern {
3333
* @param recursive Iterate over all subdirectories if true
3434
*/
3535

36-
ExternalFilePattern(const std::string& path, const std::string& file_pattern, const std::string& block_size="50 MB", bool recursive=false, bool suppress_warnings=false);
36+
ExternalFilePattern(const std::string& path, const std::string& file_pattern, const std::string& block_size="50 MB", bool recursive=false, bool suppress_warnings=false, bool sorted=true);
3737

3838
ExternalFilePattern(){}
3939

src/filepattern/cpp/external/external_pattern.cpp

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,12 @@ void ExternalPattern::groupByHelper(){
185185
grouped_variables.clear();
186186
for(auto& g: vec.first) grouped_variables.push_back(g);
187187
// Sort the matched files by the group_by parameter
188-
sort(vec.second.begin(), vec.second.end(), [&group_by = as_const(group_by)](Tuple& p1, Tuple& p2){
189-
return get<0>(p1)[group_by] < get<0>(p2)[group_by];
190-
});
188+
189+
if (isSorted()) {
190+
sort(vec.second.begin(), vec.second.end(), [&group_by = as_const(group_by)](Tuple& p1, Tuple& p2){
191+
return get<0>(p1)[group_by] < get<0>(p2)[group_by];
192+
});
193+
}
191194

192195
Types current_value = get<0>(vec.second[0])[group_by]; // get the value of variable
193196
vector<Tuple> empty_vec;
@@ -207,9 +210,12 @@ void ExternalPattern::groupByHelper(){
207210

208211
grouped_variables.push_back(make_pair(group_by, current_value));
209212
temp_group.push_back(make_pair(grouped_variables, temp_vec));
210-
sort(temp_group[group_ptr].second.begin(), temp_group[group_ptr].second.end(), [](Tuple& m1, Tuple& m2){
211-
return get<1>(m1)[0] < get<1>(m2)[0];
212-
});
213+
214+
if (isSorted()) {
215+
sort(temp_group[group_ptr].second.begin(), temp_group[group_ptr].second.end(), [](Tuple& m1, Tuple& m2){
216+
return get<1>(m1)[0] < get<1>(m2)[0];
217+
});
218+
}
213219
temp_vec.clear();
214220

215221
if (i < vec.second.size()){
@@ -290,10 +296,12 @@ void ExternalPattern::nextGroup(){
290296
} else {
291297

292298
// update variable value and end loop on variable value change
293-
// sort block by basename
294-
sort(this->current_group_[0].second.begin(), this->current_group_[0].second.end(), [](Tuple& m1, Tuple& m2){
295-
return get<1>(m1)[0] < get<1>(m2)[0];
296-
});
299+
// sort block by basename
300+
if (isSorted()) {
301+
sort(this->current_group_[0].second.begin(), this->current_group_[0].second.end(), [](Tuple& m1, Tuple& m2){
302+
return get<1>(m1)[0] < get<1>(m2)[0];
303+
});
304+
}
297305
this->current_value_ = get<0>(this->temp_)[this->group_[0]];
298306
value_added = false;
299307

@@ -302,9 +310,12 @@ void ExternalPattern::nextGroup(){
302310
};
303311
}
304312
}
305-
sort(this->current_group_[0].second.begin(), this->current_group_[0].second.end(), [](Tuple& m1, Tuple& m2){
306-
return get<1>(m1)[0] < get<1>(m2)[0];
307-
});
313+
314+
if (isSorted()) {
315+
sort(this->current_group_[0].second.begin(), this->current_group_[0].second.end(), [](Tuple& m1, Tuple& m2){
316+
return get<1>(m1)[0] < get<1>(m2)[0];
317+
});
318+
}
308319
this->groupByHelper();
309320
}
310321

@@ -334,12 +345,15 @@ void ExternalPattern::groupBy(vector<string>& group_by) {
334345
// sort valid files externally
335346
string path = this->stream_.getValidFilesPath();
336347
this->tmp_directories_.push_back(path);
337-
ExternalMergeSort sort = ExternalMergeSort(std_map,
338-
path,
339-
path,
340-
this->stream_.getBlockSizeStr(),
341-
group_by[0],
342-
this->stream_.map_size_);
348+
349+
if (isSorted()) {
350+
ExternalMergeSort sort = ExternalMergeSort(std_map,
351+
path,
352+
path,
353+
this->stream_.getBlockSizeStr(),
354+
group_by[0],
355+
this->stream_.map_size_);
356+
}
343357
}
344358

345359
string ExternalPattern::externalOutPutName(){

src/filepattern/cpp/external/external_stringpattern.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using namespace std;
44

5-
ExternalStringPattern::ExternalStringPattern(const string& path, const string& file_pattern, const string& block_size, bool suppress_warnings):
5+
ExternalStringPattern::ExternalStringPattern(const string& path, const string& file_pattern, const string& block_size, bool suppress_warnings, bool sorted):
66
ExternalPattern(path, block_size, false) {
77
this->setSuppressWarnings(suppress_warnings);
88
this->setPath(path); // store path to target directory
@@ -23,6 +23,8 @@ ExternalPattern(path, block_size, false) {
2323

2424
this->setFirstCall(true); // first call to next() has not occurred
2525

26+
this->setIsSorted(sorted);
27+
2628
this->matchFiles(); // match files to pattern
2729
this->group_stream_.open(this->stream_.getValidFilesPath());
2830
this->infile_.open(this->getValidFilesPath()); // open temp file for the valid files

src/filepattern/cpp/external/external_stringpattern.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class ExternalStringPattern : public ExternalPattern {
3434
* @param recursive Iterate over all subdirectories if true
3535
*/
3636

37-
ExternalStringPattern(const std::string& path, const std::string& file_pattern, const std::string& block_size="50 MB", bool suppress_warnings=false);
37+
ExternalStringPattern(const std::string& path, const std::string& file_pattern, const std::string& block_size="50 MB", bool suppress_warnings=false, bool sorted=true);
3838

3939
~ExternalStringPattern();
4040

src/filepattern/cpp/external/external_vectorpattern.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const std::regex ExternalVectorPattern::STITCH_REGEX_ = std::regex("(corr): (.*)
66
const std::vector<std::regex> ExternalVectorPattern::STITCH_REGEX_VECTOR_ = {std::regex("(corr):\\s*(.*?);"), std::regex("(position):\\s*\\((.*?),\\s*(.*?)\\);"), std::regex("(grid):\\s*\\((.*),\\s*(.*)\\);")};
77
const std::vector<std::string> ExternalVectorPattern::STITCH_VARIABLES_ = {"correlation","posX","posY","gridX","gridY"}; // stitching vector variables
88

9-
ExternalVectorPattern::ExternalVectorPattern(const string& path, const string& file_pattern, const string& block_size, bool suppress_warnings):
9+
ExternalVectorPattern::ExternalVectorPattern(const string& path, const string& file_pattern, const string& block_size, bool suppress_warnings, bool sorted):
1010
ExternalPattern(path, block_size, false){
1111
this->setSuppressWarnings(suppress_warnings);
1212
this->path_ = path; // store path to target directory
@@ -26,6 +26,7 @@ ExternalPattern(path, block_size, false){
2626
this->setFirstCall(true); // first call to next() has not occurred
2727

2828
this->matchFiles();
29+
this->setIsSorted(sorted);
2930

3031
this->group_stream_.open(this->stream_.getValidFilesPath());
3132
this->infile_.open(this->getValidFilesPath()); // open temp file for the valid files

0 commit comments

Comments
 (0)