Skip to content

Commit 178998d

Browse files
committed
fix re2 spm build
1 parent a655bde commit 178998d

22 files changed

+4435
-0
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// Copyright 2016 The RE2 Authors. All Rights Reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#ifndef RE2_BITMAP256_H_
6+
#define RE2_BITMAP256_H_
7+
8+
#ifdef _MSC_VER
9+
#include <intrin.h>
10+
#endif
11+
#include <stdint.h>
12+
#include <string.h>
13+
14+
#include "util/util.h"
15+
#include "util/logging.h"
16+
17+
namespace re2 {
18+
19+
class Bitmap256 {
20+
public:
21+
Bitmap256() {
22+
Clear();
23+
}
24+
25+
// Clears all of the bits.
26+
void Clear() {
27+
memset(words_, 0, sizeof words_);
28+
}
29+
30+
// Tests the bit with index c.
31+
bool Test(int c) const {
32+
DCHECK_GE(c, 0);
33+
DCHECK_LE(c, 255);
34+
35+
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
36+
}
37+
38+
// Sets the bit with index c.
39+
void Set(int c) {
40+
DCHECK_GE(c, 0);
41+
DCHECK_LE(c, 255);
42+
43+
words_[c / 64] |= (uint64_t{1} << (c % 64));
44+
}
45+
46+
// Finds the next non-zero bit with index >= c.
47+
// Returns -1 if no such bit exists.
48+
int FindNextSetBit(int c) const;
49+
50+
private:
51+
// Finds the least significant non-zero bit in n.
52+
static int FindLSBSet(uint64_t n) {
53+
DCHECK_NE(n, 0);
54+
#if defined(__GNUC__)
55+
return __builtin_ctzll(n);
56+
#elif defined(_MSC_VER) && defined(_M_X64)
57+
unsigned long c;
58+
_BitScanForward64(&c, n);
59+
return static_cast<int>(c);
60+
#elif defined(_MSC_VER) && defined(_M_IX86)
61+
unsigned long c;
62+
if (static_cast<uint32_t>(n) != 0) {
63+
_BitScanForward(&c, static_cast<uint32_t>(n));
64+
return static_cast<int>(c);
65+
} else {
66+
_BitScanForward(&c, static_cast<uint32_t>(n >> 32));
67+
return static_cast<int>(c) + 32;
68+
}
69+
#else
70+
int c = 63;
71+
for (int shift = 1 << 5; shift != 0; shift >>= 1) {
72+
uint64_t word = n << shift;
73+
if (word != 0) {
74+
n = word;
75+
c -= shift;
76+
}
77+
}
78+
return c;
79+
#endif
80+
}
81+
82+
uint64_t words_[4];
83+
};
84+
85+
int Bitmap256::FindNextSetBit(int c) const {
86+
DCHECK_GE(c, 0);
87+
DCHECK_LE(c, 255);
88+
89+
// Check the word that contains the bit. Mask out any lower bits.
90+
int i = c / 64;
91+
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
92+
if (word != 0)
93+
return (i * 64) + FindLSBSet(word);
94+
95+
// Check any following words.
96+
i++;
97+
switch (i) {
98+
case 1:
99+
if (words_[1] != 0)
100+
return (1 * 64) + FindLSBSet(words_[1]);
101+
FALLTHROUGH_INTENDED;
102+
case 2:
103+
if (words_[2] != 0)
104+
return (2 * 64) + FindLSBSet(words_[2]);
105+
FALLTHROUGH_INTENDED;
106+
case 3:
107+
if (words_[3] != 0)
108+
return (3 * 64) + FindLSBSet(words_[3]);
109+
FALLTHROUGH_INTENDED;
110+
default:
111+
return -1;
112+
}
113+
}
114+
115+
} // namespace re2
116+
117+
#endif // RE2_BITMAP256_H_
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright 2009 The RE2 Authors. All Rights Reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#ifndef RE2_FILTERED_RE2_H_
6+
#define RE2_FILTERED_RE2_H_
7+
8+
// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
9+
// It provides a prefilter mechanism that helps in cutting down the
10+
// number of regexps that need to be actually searched.
11+
//
12+
// By design, it does not include a string matching engine. This is to
13+
// allow the user of the class to use their favorite string matching
14+
// engine. The overall flow is: Add all the regexps using Add, then
15+
// Compile the FilteredRE2. Compile returns strings that need to be
16+
// matched. Note that the returned strings are lowercased and distinct.
17+
// For applying regexps to a search text, the caller does the string
18+
// matching using the returned strings. When doing the string match,
19+
// note that the caller has to do that in a case-insensitive way or
20+
// on a lowercased version of the search text. Then call FirstMatch
21+
// or AllMatches with a vector of indices of strings that were found
22+
// in the text to get the actual regexp matches.
23+
24+
#include <memory>
25+
#include <string>
26+
#include <vector>
27+
28+
#include "re2/re2.h"
29+
30+
namespace re2 {
31+
32+
class PrefilterTree;
33+
34+
class FilteredRE2 {
35+
public:
36+
FilteredRE2();
37+
explicit FilteredRE2(int min_atom_len);
38+
~FilteredRE2();
39+
40+
// Not copyable.
41+
FilteredRE2(const FilteredRE2&) = delete;
42+
FilteredRE2& operator=(const FilteredRE2&) = delete;
43+
// Movable.
44+
FilteredRE2(FilteredRE2&& other);
45+
FilteredRE2& operator=(FilteredRE2&& other);
46+
47+
// Uses RE2 constructor to create a RE2 object (re). Returns
48+
// re->error_code(). If error_code is other than NoError, then re is
49+
// deleted and not added to re2_vec_.
50+
RE2::ErrorCode Add(const StringPiece& pattern,
51+
const RE2::Options& options,
52+
int* id);
53+
54+
// Prepares the regexps added by Add for filtering. Returns a set
55+
// of strings that the caller should check for in candidate texts.
56+
// The returned strings are lowercased and distinct. When doing
57+
// string matching, it should be performed in a case-insensitive
58+
// way or the search text should be lowercased first. Call after
59+
// all Add calls are done.
60+
void Compile(std::vector<std::string>* strings_to_match);
61+
62+
// Returns the index of the first matching regexp.
63+
// Returns -1 on no match. Can be called prior to Compile.
64+
// Does not do any filtering: simply tries to Match the
65+
// regexps in a loop.
66+
int SlowFirstMatch(const StringPiece& text) const;
67+
68+
// Returns the index of the first matching regexp.
69+
// Returns -1 on no match. Compile has to be called before
70+
// calling this.
71+
int FirstMatch(const StringPiece& text,
72+
const std::vector<int>& atoms) const;
73+
74+
// Returns the indices of all matching regexps, after first clearing
75+
// matched_regexps.
76+
bool AllMatches(const StringPiece& text,
77+
const std::vector<int>& atoms,
78+
std::vector<int>* matching_regexps) const;
79+
80+
// Returns the indices of all potentially matching regexps after first
81+
// clearing potential_regexps.
82+
// A regexp is potentially matching if it passes the filter.
83+
// If a regexp passes the filter it may still not match.
84+
// A regexp that does not pass the filter is guaranteed to not match.
85+
void AllPotentials(const std::vector<int>& atoms,
86+
std::vector<int>* potential_regexps) const;
87+
88+
// The number of regexps added.
89+
int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
90+
91+
// Get the individual RE2 objects.
92+
const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
93+
94+
private:
95+
// Print prefilter.
96+
void PrintPrefilter(int regexpid);
97+
98+
// Useful for testing and debugging.
99+
void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
100+
std::vector<int>* passed_regexps);
101+
102+
// All the regexps in the FilteredRE2.
103+
std::vector<RE2*> re2_vec_;
104+
105+
// Has the FilteredRE2 been compiled using Compile()
106+
bool compiled_;
107+
108+
// An AND-OR tree of string atoms used for filtering regexps.
109+
std::unique_ptr<PrefilterTree> prefilter_tree_;
110+
};
111+
112+
} // namespace re2
113+
114+
#endif // RE2_FILTERED_RE2_H_
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright 2018 The RE2 Authors. All Rights Reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#ifndef RE2_POD_ARRAY_H_
6+
#define RE2_POD_ARRAY_H_
7+
8+
#include <memory>
9+
#include <type_traits>
10+
11+
namespace re2 {
12+
13+
template <typename T>
14+
class PODArray {
15+
public:
16+
static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
17+
"T must be POD");
18+
19+
PODArray()
20+
: ptr_() {}
21+
explicit PODArray(int len)
22+
: ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
23+
24+
T* data() const {
25+
return ptr_.get();
26+
}
27+
28+
int size() const {
29+
return ptr_.get_deleter().len_;
30+
}
31+
32+
T& operator[](int pos) const {
33+
return ptr_[pos];
34+
}
35+
36+
private:
37+
struct Deleter {
38+
Deleter()
39+
: len_(0) {}
40+
explicit Deleter(int len)
41+
: len_(len) {}
42+
43+
void operator()(T* ptr) const {
44+
std::allocator<T>().deallocate(ptr, len_);
45+
}
46+
47+
int len_;
48+
};
49+
50+
std::unique_ptr<T[], Deleter> ptr_;
51+
};
52+
53+
} // namespace re2
54+
55+
#endif // RE2_POD_ARRAY_H_
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright 2009 The RE2 Authors. All Rights Reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#ifndef RE2_PREFILTER_H_
6+
#define RE2_PREFILTER_H_
7+
8+
// Prefilter is the class used to extract string guards from regexps.
9+
// Rather than using Prefilter class directly, use FilteredRE2.
10+
// See filtered_re2.h
11+
12+
#include <set>
13+
#include <string>
14+
#include <vector>
15+
16+
#include "util/util.h"
17+
#include "util/logging.h"
18+
19+
namespace re2 {
20+
21+
class RE2;
22+
23+
class Regexp;
24+
25+
class Prefilter {
26+
// Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
27+
public:
28+
enum Op {
29+
ALL = 0, // Everything matches
30+
NONE, // Nothing matches
31+
ATOM, // The string atom() must match
32+
AND, // All in subs() must match
33+
OR, // One of subs() must match
34+
};
35+
36+
explicit Prefilter(Op op);
37+
~Prefilter();
38+
39+
Op op() { return op_; }
40+
const std::string& atom() const { return atom_; }
41+
void set_unique_id(int id) { unique_id_ = id; }
42+
int unique_id() const { return unique_id_; }
43+
44+
// The children of the Prefilter node.
45+
std::vector<Prefilter*>* subs() {
46+
DCHECK(op_ == AND || op_ == OR);
47+
return subs_;
48+
}
49+
50+
// Set the children vector. Prefilter takes ownership of subs and
51+
// subs_ will be deleted when Prefilter is deleted.
52+
void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
53+
54+
// Given a RE2, return a Prefilter. The caller takes ownership of
55+
// the Prefilter and should deallocate it. Returns NULL if Prefilter
56+
// cannot be formed.
57+
static Prefilter* FromRE2(const RE2* re2);
58+
59+
// Returns a readable debug string of the prefilter.
60+
std::string DebugString() const;
61+
62+
private:
63+
class Info;
64+
65+
// Combines two prefilters together to create an AND. The passed
66+
// Prefilters will be part of the returned Prefilter or deleted.
67+
static Prefilter* And(Prefilter* a, Prefilter* b);
68+
69+
// Combines two prefilters together to create an OR. The passed
70+
// Prefilters will be part of the returned Prefilter or deleted.
71+
static Prefilter* Or(Prefilter* a, Prefilter* b);
72+
73+
// Generalized And/Or
74+
static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
75+
76+
static Prefilter* FromRegexp(Regexp* a);
77+
78+
static Prefilter* FromString(const std::string& str);
79+
80+
static Prefilter* OrStrings(std::set<std::string>* ss);
81+
82+
static Info* BuildInfo(Regexp* re);
83+
84+
Prefilter* Simplify();
85+
86+
// Kind of Prefilter.
87+
Op op_;
88+
89+
// Sub-matches for AND or OR Prefilter.
90+
std::vector<Prefilter*>* subs_;
91+
92+
// Actual string to match in leaf node.
93+
std::string atom_;
94+
95+
// If different prefilters have the same string atom, or if they are
96+
// structurally the same (e.g., OR of same atom strings) they are
97+
// considered the same unique nodes. This is the id for each unique
98+
// node. This field is populated with a unique id for every node,
99+
// and -1 for duplicate nodes.
100+
int unique_id_;
101+
102+
Prefilter(const Prefilter&) = delete;
103+
Prefilter& operator=(const Prefilter&) = delete;
104+
};
105+
106+
} // namespace re2
107+
108+
#endif // RE2_PREFILTER_H_

0 commit comments

Comments
 (0)