Skip to content

Commit ccbb583

Browse files
authored
Merge pull request #67 from JesseMckinzie/iterator_bug_fix
Iterator bug fixes
2 parents b7a50e3 + 44c6bbd commit ccbb583

File tree

5 files changed

+412
-7
lines changed

5 files changed

+412
-7
lines changed

src/filepattern/cpp/internal/internal_pattern.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "internal_pattern.hpp"
2+
#include "../util/alphanum.hpp"
23

34
using namespace std;
45

@@ -216,8 +217,14 @@ void InternalPattern::sortFiles(){
216217

217218
if (this->valid_files_.size() == 0) return;
218219

219-
sort(this->valid_files_.begin(), this->valid_files_.end(), [](Tuple& m1, Tuple& m2){
220-
return get<1>(m1)[0] < get<1>(m2)[0];
220+
doj::alphanum_less<std::string> comparator; // alphanum comparison algorithm for strings with numeric and alphabetic chars
221+
sort(this->valid_files_.begin(), this->valid_files_.end(), [comparator](Tuple& m1, Tuple& m2){
222+
223+
#ifdef JAVA_BINDING
224+
return comparator(get<1>(m1)[0], get<1>(m2)[0]);
225+
#else
226+
return comparator(get<1>(m1)[0].u8string(), get<1>(m2)[0].u8string());
227+
#endif
221228
});
222229
}
223230

Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
#ifndef ALPHANUM__HPP
2+
#define ALPHANUM__HPP
3+
4+
/*
5+
from https://github.com/readium/readium-sdk/tree/master on 5/8/2024
6+
7+
The Alphanum Algorithm is an improved sorting algorithm for strings
8+
containing numbers. Instead of sorting numbers in ASCII order like a
9+
standard sort, this algorithm sorts numbers in numeric order.
10+
11+
The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
12+
13+
This implementation is Copyright (c) 2008 Dirk Jagdmann <[email protected]>.
14+
It is a cleanroom implementation of the algorithm and not derived by
15+
other's works. In contrast to the versions written by Dave Koelle this
16+
source code is distributed with the libpng/zlib license.
17+
18+
This software is provided 'as-is', without any express or implied
19+
warranty. In no event will the authors be held liable for any damages
20+
arising from the use of this software.
21+
22+
Permission is granted to anyone to use this software for any purpose,
23+
including commercial applications, and to alter it and redistribute it
24+
freely, subject to the following restrictions:
25+
26+
1. The origin of this software must not be misrepresented; you
27+
must not claim that you wrote the original software. If you use
28+
this software in a product, an acknowledgment in the product
29+
documentation would be appreciated but is not required.
30+
31+
2. Altered source versions must be plainly marked as such, and
32+
must not be misrepresented as being the original software.
33+
34+
3. This notice may not be removed or altered from any source
35+
distribution. */
36+
37+
/* $Header: /code/doj/alphanum.hpp,v 1.3 2008/01/28 23:06:47 doj Exp $ */
38+
39+
#include <cassert>
40+
#include <functional>
41+
#include <string>
42+
#include <sstream>
43+
44+
#ifdef ALPHANUM_LOCALE
45+
#include <cctype>
46+
#endif
47+
48+
#ifdef DOJDEBUG
49+
#include <iostream>
50+
#include <typeinfo>
51+
#endif
52+
53+
// TODO: make comparison with hexadecimal numbers. Extend the alphanum_comp() function by traits to choose between decimal and hexadecimal.
54+
55+
namespace doj
56+
{
57+
58+
// anonymous namespace for functions we use internally. But if you
59+
// are coding in C, you can use alphanum_impl() directly, since it
60+
// uses not C++ features.
61+
namespace {
62+
63+
// if you want to honour the locale settings for detecting digit
64+
// characters, you should define ALPHANUM_LOCALE
65+
#ifdef ALPHANUM_LOCALE
66+
/** wrapper function for ::isdigit() */
67+
bool alphanum_isdigit(int c)
68+
{
69+
return isdigit(c);
70+
}
71+
#else
72+
/** this function does not consider the current locale and only
73+
works with ASCII digits.
74+
@return true if c is a digit character
75+
*/
76+
bool alphanum_isdigit(const char c)
77+
{
78+
return c>='0' && c<='9';
79+
}
80+
#endif
81+
82+
/**
83+
compare l and r with strcmp() semantics, but using
84+
the "Alphanum Algorithm". This function is designed to read
85+
through the l and r strings only one time, for
86+
maximum performance. It does not allocate memory for
87+
substrings. It can either use the C-library functions isdigit()
88+
and atoi() to honour your locale settings, when recognizing
89+
digit characters when you "#define ALPHANUM_LOCALE=1" or use
90+
it's own digit character handling which only works with ASCII
91+
digit characters, but provides better performance.
92+
93+
@param l NULL-terminated C-style string
94+
@param r NULL-terminated C-style string
95+
@return negative if l<r, 0 if l equals r, positive if l>r
96+
*/
97+
int alphanum_impl(const char *l, const char *r)
98+
{
99+
enum mode_t { STRING, NUMBER } mode=STRING;
100+
101+
while(*l && *r)
102+
{
103+
if(mode == STRING)
104+
{
105+
char l_char, r_char;
106+
while((l_char=*l) && (r_char=*r))
107+
{
108+
// check if this are digit characters
109+
const bool l_digit=alphanum_isdigit(l_char), r_digit=alphanum_isdigit(r_char);
110+
// if both characters are digits, we continue in NUMBER mode
111+
if(l_digit && r_digit)
112+
{
113+
mode=NUMBER;
114+
break;
115+
}
116+
// if only the left character is a digit, we have a result
117+
if(l_digit) return -1;
118+
// if only the right character is a digit, we have a result
119+
if(r_digit) return +1;
120+
// compute the difference of both characters
121+
const int diff=l_char - r_char;
122+
// if they differ we have a result
123+
if(diff != 0) return diff;
124+
// otherwise process the next characters
125+
++l;
126+
++r;
127+
}
128+
}
129+
else // mode==NUMBER
130+
{
131+
#ifdef ALPHANUM_LOCALE
132+
// get the left number
133+
char *end;
134+
unsigned long l_int=strtoul(l, &end, 0);
135+
l=end;
136+
137+
// get the right number
138+
unsigned long r_int=strtoul(r, &end, 0);
139+
r=end;
140+
#else
141+
// get the left number
142+
unsigned long l_int=0;
143+
while(*l && alphanum_isdigit(*l))
144+
{
145+
// TODO: this can overflow
146+
l_int=l_int*10 + *l-'0';
147+
++l;
148+
}
149+
150+
// get the right number
151+
unsigned long r_int=0;
152+
while(*r && alphanum_isdigit(*r))
153+
{
154+
// TODO: this can overflow
155+
r_int=r_int*10 + *r-'0';
156+
++r;
157+
}
158+
#endif
159+
160+
// if the difference is not equal to zero, we have a comparison result
161+
const long diff=l_int-r_int;
162+
if(diff != 0)
163+
return diff;
164+
165+
// otherwise we process the next substring in STRING mode
166+
mode=STRING;
167+
}
168+
}
169+
170+
if(*r) return -1;
171+
if(*l) return +1;
172+
return 0;
173+
}
174+
175+
}
176+
177+
/**
178+
Compare left and right with the same semantics as strcmp(), but with the
179+
"Alphanum Algorithm" which produces more human-friendly
180+
results. The classes lT and rT must implement "std::ostream
181+
operator<< (std::ostream&, const Ty&)".
182+
183+
@return negative if left<right, 0 if left==right, positive if left>right.
184+
*/
185+
template <typename lT, typename rT>
186+
int alphanum_comp(const lT& left, const rT& right)
187+
{
188+
std::ostringstream l; l << left;
189+
std::ostringstream r; r << right;
190+
return alphanum_impl(l.str().c_str(), r.str().c_str());
191+
}
192+
193+
/**
194+
Compare l and r with the same semantics as strcmp(), but with
195+
the "Alphanum Algorithm" which produces more human-friendly
196+
results.
197+
198+
@return negative if l<r, 0 if l==r, positive if l>r.
199+
*/
200+
template <>
201+
int alphanum_comp<std::string>(const std::string& l, const std::string& r)
202+
{
203+
#ifdef DOJDEBUG
204+
std::clog << "alphanum_comp<std::string,std::string> " << l << "," << r << std::endl;
205+
#endif
206+
return alphanum_impl(l.c_str(), r.c_str());
207+
}
208+
209+
////////////////////////////////////////////////////////////////////////////
210+
211+
// now follow a lot of overloaded alphanum_comp() functions to get a
212+
// direct call to alphanum_impl() upon the various combinations of c
213+
// and c++ strings.
214+
215+
/**
216+
Compare l and r with the same semantics as strcmp(), but with
217+
the "Alphanum Algorithm" which produces more human-friendly
218+
results.
219+
220+
@return negative if l<r, 0 if l==r, positive if l>r.
221+
*/
222+
int alphanum_comp(char* l, char* r)
223+
{
224+
assert(l);
225+
assert(r);
226+
#ifdef DOJDEBUG
227+
std::clog << "alphanum_comp<char*,char*> " << l << "," << r << std::endl;
228+
#endif
229+
return alphanum_impl(l, r);
230+
}
231+
232+
int alphanum_comp(const char* l, const char* r)
233+
{
234+
assert(l);
235+
assert(r);
236+
#ifdef DOJDEBUG
237+
std::clog << "alphanum_comp<const char*,const char*> " << l << "," << r << std::endl;
238+
#endif
239+
return alphanum_impl(l, r);
240+
}
241+
242+
int alphanum_comp(char* l, const char* r)
243+
{
244+
assert(l);
245+
assert(r);
246+
#ifdef DOJDEBUG
247+
std::clog << "alphanum_comp<char*,const char*> " << l << "," << r << std::endl;
248+
#endif
249+
return alphanum_impl(l, r);
250+
}
251+
252+
int alphanum_comp(const char* l, char* r)
253+
{
254+
assert(l);
255+
assert(r);
256+
#ifdef DOJDEBUG
257+
std::clog << "alphanum_comp<const char*,char*> " << l << "," << r << std::endl;
258+
#endif
259+
return alphanum_impl(l, r);
260+
}
261+
262+
int alphanum_comp(const std::string& l, char* r)
263+
{
264+
assert(r);
265+
#ifdef DOJDEBUG
266+
std::clog << "alphanum_comp<std::string,char*> " << l << "," << r << std::endl;
267+
#endif
268+
return alphanum_impl(l.c_str(), r);
269+
}
270+
271+
int alphanum_comp(char* l, const std::string& r)
272+
{
273+
assert(l);
274+
#ifdef DOJDEBUG
275+
std::clog << "alphanum_comp<char*,std::string> " << l << "," << r << std::endl;
276+
#endif
277+
return alphanum_impl(l, r.c_str());
278+
}
279+
280+
int alphanum_comp(const std::string& l, const char* r)
281+
{
282+
assert(r);
283+
#ifdef DOJDEBUG
284+
std::clog << "alphanum_comp<std::string,const char*> " << l << "," << r << std::endl;
285+
#endif
286+
return alphanum_impl(l.c_str(), r);
287+
}
288+
289+
int alphanum_comp(const char* l, const std::string& r)
290+
{
291+
assert(l);
292+
#ifdef DOJDEBUG
293+
std::clog << "alphanum_comp<const char*,std::string> " << l << "," << r << std::endl;
294+
#endif
295+
return alphanum_impl(l, r.c_str());
296+
}
297+
298+
////////////////////////////////////////////////////////////////////////////
299+
300+
template<class Arg1, class Arg2, class Result>
301+
struct binary_function
302+
{
303+
using first_argument_type = Arg1;
304+
using second_argument_type = Arg2;
305+
using result_type = Result;
306+
};
307+
308+
/**
309+
Functor class to compare two objects with the "Alphanum
310+
Algorithm". If the objects are no std::string, they must
311+
implement "std::ostream operator<< (std::ostream&, const Ty&)".
312+
*/
313+
template<class Ty>
314+
struct alphanum_less : public binary_function<Ty, Ty, bool>
315+
{
316+
bool operator()(const Ty& left, const Ty& right) const
317+
{
318+
return alphanum_comp(left, right) < 0;
319+
}
320+
};
321+
322+
}
323+
324+
325+
#endif

0 commit comments

Comments
 (0)