Skip to content

Commit 5de06bf

Browse files
authored
Merge pull request Tencent#882 from StilesCrisis/lookaheadparser
Lookahead Parser
2 parents 2df32fb + 8da89f5 commit 5de06bf

File tree

2 files changed

+343
-0
lines changed

2 files changed

+343
-0
lines changed

example/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(EXAMPLES
1010
filterkey
1111
filterkeydom
1212
jsonx
13+
lookaheadparser
1314
messagereader
1415
parsebyparts
1516
pretty
Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
#include "rapidjson/reader.h"
2+
#include "rapidjson/document.h"
3+
#include <iostream>
4+
5+
// This example demonstrates JSON token-by-token parsing with an API that is
6+
// more direct; you don't need to design your logic around a handler object and
7+
// callbacks. Instead, you retrieve values from the JSON stream by calling
8+
// GetInt(), GetDouble(), GetString() and GetBool(), traverse into structures
9+
// by calling EnterObject() and EnterArray(), and skip over unwanted data by
10+
// calling SkipValue(). When you know your JSON's structure, this can be quite
11+
// convenient.
12+
//
13+
// If you aren't sure of what's next in the JSON data, you can use PeekType() and
14+
// PeekValue() to look ahead to the next object before reading it.
15+
//
16+
// If you call the wrong retrieval method--e.g. GetInt when the next JSON token is
17+
// not an int, EnterObject or EnterArray when there isn't actually an object or array
18+
// to read--the stream parsing will end immediately and no more data will be delivered.
19+
//
20+
// After calling EnterObject, you retrieve keys via NextObjectKey() and values via
21+
// the normal getters. When NextObjectKey() returns null, you have exited the
22+
// object, or you can call ExitObject() to skip to the end of the object
23+
// immediately. If you fetch the entire object (i.e. NextObjectKey() returned null),
24+
// you should not call ExitObject().
25+
//
26+
// After calling EnterArray(), you must alternate between calling NextArrayValue()
27+
// to see if the array has more data, and then retrieving values via the normal
28+
// getters. You can call ExitArray() to skip to the end of the array immediately.
29+
// If you fetch the entire array (i.e. NextArrayValue() returned null),
30+
// you should not call ExitArray().
31+
//
32+
// This parser uses in-situ strings, so the JSON buffer will be altered during the
33+
// parse.
34+
35+
using namespace rapidjson;
36+
37+
38+
class LookaheadParserHandler {
39+
public:
40+
bool Null() { st_ = kHasValue; v_.SetNull(); return true; }
41+
bool Bool(bool b) { st_ = kHasValue; v_.SetBool(b); return true; }
42+
bool Int(int i) { st_ = kHasValue; v_.SetInt(i); return true; }
43+
bool Uint(unsigned u) { st_ = kHasValue; v_.SetUint(u); return true; }
44+
bool Int64(int64_t i) { st_ = kHasValue; v_.SetInt64(i); return true; }
45+
bool Uint64(uint64_t u) { st_ = kHasValue; v_.SetUint64(u); return true; }
46+
bool Double(double d) { st_ = kHasValue; v_.SetDouble(d); return true; }
47+
bool RawNumber(const char*, SizeType, bool) { return false; }
48+
bool String(const char* str, SizeType length, bool) { st_ = kHasValue; v_.SetString(str, length); return true; }
49+
bool StartObject() { st_ = kEnteringObject; return true; }
50+
bool Key(const char* str, SizeType length, bool) { st_ = kHasKey; v_.SetString(str, length); return true; }
51+
bool EndObject(SizeType) { st_ = kExitingObject; return true; }
52+
bool StartArray() { st_ = kEnteringArray; return true; }
53+
bool EndArray(SizeType) { st_ = kExitingArray; return true; }
54+
55+
protected:
56+
LookaheadParserHandler(char* str);
57+
void ParseNext();
58+
59+
protected:
60+
enum LookaheadParsingState {
61+
kError,
62+
kHasValue,
63+
kHasKey,
64+
kEnteringObject,
65+
kExitingObject,
66+
kEnteringArray,
67+
kExitingArray
68+
};
69+
70+
Value v_;
71+
LookaheadParsingState st_;
72+
Reader r_;
73+
InsituStringStream ss_;
74+
75+
static const int parseFlags = kParseDefaultFlags | kParseInsituFlag;
76+
};
77+
78+
LookaheadParserHandler::LookaheadParserHandler(char* str) : ss_(str) {
79+
r_.IterativeParseInit();
80+
ParseNext();
81+
}
82+
83+
void LookaheadParserHandler::ParseNext() {
84+
if (r_.HasParseError()) {
85+
st_ = kError;
86+
return;
87+
}
88+
89+
r_.IterativeParseNext<parseFlags>(ss_, *this);
90+
}
91+
92+
class LookaheadParser : protected LookaheadParserHandler {
93+
public:
94+
LookaheadParser(char* str) : LookaheadParserHandler(str) {}
95+
96+
void EnterObject();
97+
void EnterArray();
98+
void ExitObject();
99+
void ExitArray();
100+
const char* NextObjectKey();
101+
bool NextArrayValue();
102+
int GetInt();
103+
double GetDouble();
104+
const char* GetString();
105+
bool GetBool();
106+
void GetNull();
107+
108+
void SkipValue();
109+
Value* PeekValue();
110+
int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array)
111+
112+
bool IsValid() { return st_ != kError; }
113+
};
114+
115+
void LookaheadParser::EnterObject() {
116+
if (st_ != kEnteringObject) {
117+
st_ = kError;
118+
return;
119+
}
120+
121+
ParseNext();
122+
}
123+
124+
void LookaheadParser::EnterArray() {
125+
if (st_ != kEnteringArray) {
126+
st_ = kError;
127+
return;
128+
}
129+
130+
ParseNext();
131+
}
132+
133+
void LookaheadParser::ExitObject() {
134+
while (NextObjectKey()) {
135+
SkipValue();
136+
}
137+
}
138+
139+
void LookaheadParser::ExitArray() {
140+
while (NextArrayValue()) {
141+
SkipValue();
142+
}
143+
}
144+
145+
const char* LookaheadParser::NextObjectKey() {
146+
if (st_ == kExitingObject) {
147+
ParseNext();
148+
return 0;
149+
}
150+
151+
if (st_ != kHasKey || !v_.IsString()) {
152+
st_ = kError;
153+
return 0;
154+
}
155+
156+
const char* result = v_.GetString();
157+
ParseNext();
158+
return result;
159+
}
160+
161+
bool LookaheadParser::NextArrayValue() {
162+
if (st_ == kExitingArray) {
163+
ParseNext();
164+
return false;
165+
}
166+
167+
return true;
168+
}
169+
170+
int LookaheadParser::GetInt() {
171+
if (st_ != kHasValue || !v_.IsInt()) {
172+
st_ = kError;
173+
return 0;
174+
}
175+
176+
int result = v_.GetInt();
177+
ParseNext();
178+
return result;
179+
}
180+
181+
double LookaheadParser::GetDouble() {
182+
if (st_ != kHasValue || !v_.IsNumber()) {
183+
st_ = kError;
184+
return 0.;
185+
}
186+
187+
double result = v_.GetDouble();
188+
ParseNext();
189+
return result;
190+
}
191+
192+
bool LookaheadParser::GetBool() {
193+
if (st_ != kHasValue || !v_.IsBool()) {
194+
st_ = kError;
195+
return false;
196+
}
197+
198+
bool result = v_.GetBool();
199+
ParseNext();
200+
return result;
201+
}
202+
203+
void LookaheadParser::GetNull() {
204+
if (st_ != kHasValue || !v_.IsNull()) {
205+
st_ = kError;
206+
return;
207+
}
208+
209+
ParseNext();
210+
}
211+
212+
const char* LookaheadParser::GetString() {
213+
if (st_ != kHasValue || !v_.IsString()) {
214+
st_ = kError;
215+
return 0;
216+
}
217+
218+
const char* result = v_.GetString();
219+
ParseNext();
220+
return result;
221+
}
222+
223+
void LookaheadParser::SkipValue() {
224+
int depth = 0;
225+
do {
226+
switch (st_) {
227+
case kEnteringArray:
228+
case kEnteringObject:
229+
++depth;
230+
break;
231+
232+
case kExitingArray:
233+
case kExitingObject:
234+
--depth;
235+
break;
236+
237+
case kError:
238+
return;
239+
240+
case kHasKey:
241+
case kHasValue:
242+
break;
243+
}
244+
ParseNext();
245+
}
246+
while (depth > 0);
247+
}
248+
249+
Value* LookaheadParser::PeekValue() {
250+
if (st_ == kHasValue || st_ == kHasKey) {
251+
return &v_;
252+
}
253+
254+
return 0;
255+
}
256+
257+
int LookaheadParser::PeekType() {
258+
switch (st_) {
259+
case kHasValue:
260+
case kHasKey:
261+
return v_.GetType();
262+
263+
case kEnteringArray:
264+
return kArrayType;
265+
266+
case kEnteringObject:
267+
return kObjectType;
268+
269+
case kExitingArray:
270+
case kExitingObject:
271+
case kError:
272+
default:
273+
return -1;
274+
}
275+
}
276+
277+
//-------------------------------------------------------------------------
278+
279+
int main() {
280+
using namespace std;
281+
282+
char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null,"
283+
"\"i\":123, \"pi\": 3.1416, \"a\":[-1, 2, 3, 4, \"array\", []], \"skipArrays\":[1, 2, [[[3]]]], "
284+
"\"skipObject\":{ \"i\":0, \"t\":true, \"n\":null, \"d\":123.45 }, "
285+
"\"skipNested\":[[[[{\"\":0}, {\"\":[-9.87]}]]], [], []], "
286+
"\"skipString\":\"zzz\", \"reachedEnd\":null, \"t\":true }";
287+
288+
LookaheadParser r(json);
289+
290+
RAPIDJSON_ASSERT(r.PeekType() == kObjectType);
291+
292+
r.EnterObject();
293+
while (const char* key = r.NextObjectKey()) {
294+
if (0 == strcmp(key, "hello")) {
295+
RAPIDJSON_ASSERT(r.PeekType() == kStringType);
296+
cout << key << ":" << r.GetString() << endl;
297+
}
298+
else if (0 == strcmp(key, "t") || 0 == strcmp(key, "f")) {
299+
RAPIDJSON_ASSERT(r.PeekType() == kTrueType || r.PeekType() == kFalseType);
300+
cout << key << ":" << r.GetBool() << endl;
301+
continue;
302+
}
303+
else if (0 == strcmp(key, "n")) {
304+
RAPIDJSON_ASSERT(r.PeekType() == kNullType);
305+
r.GetNull();
306+
cout << key << endl;
307+
continue;
308+
}
309+
else if (0 == strcmp(key, "pi")) {
310+
RAPIDJSON_ASSERT(r.PeekType() == kNumberType);
311+
cout << key << ":" << r.GetDouble() << endl;
312+
continue;
313+
}
314+
else if (0 == strcmp(key, "a")) {
315+
RAPIDJSON_ASSERT(r.PeekType() == kArrayType);
316+
317+
r.EnterArray();
318+
319+
cout << key << ":[ ";
320+
while (r.NextArrayValue()) {
321+
if (r.PeekType() == kNumberType) {
322+
cout << r.GetDouble() << " ";
323+
}
324+
else if (r.PeekType() == kStringType) {
325+
cout << r.GetString() << " ";
326+
}
327+
else {
328+
r.ExitArray();
329+
break;
330+
}
331+
}
332+
333+
cout << "]" << endl;
334+
}
335+
else {
336+
cout << key << ":skipped" << endl;
337+
r.SkipValue();
338+
}
339+
}
340+
341+
return 0;
342+
}

0 commit comments

Comments
 (0)