Skip to content

Commit 82295b1

Browse files
authored
Merge pull request Tencent#842 from StilesCrisis/token-by-token-parsing
Token-by-token pull parsing
2 parents a677b85 + ecf3d64 commit 82295b1

File tree

6 files changed

+348
-93
lines changed

6 files changed

+348
-93
lines changed

example/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set(EXAMPLES
1818
serialize
1919
simpledom
2020
simplereader
21+
simplepullreader
2122
simplewriter
2223
tutorial)
2324

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#include "rapidjson/reader.h"
2+
#include <iostream>
3+
#include <sstream>
4+
5+
using namespace rapidjson;
6+
using namespace std;
7+
8+
// If you can require C++11, you could use std::to_string here
9+
template <typename T> std::string stringify(T x) {
10+
std::stringstream ss;
11+
ss << x;
12+
return ss.str();
13+
}
14+
15+
struct MyHandler {
16+
const char* type;
17+
std::string data;
18+
19+
bool Null() { type = "Null"; data.clear(); return true; }
20+
bool Bool(bool b) { type = "Bool:"; data = b? "true": "false"; return true; }
21+
bool Int(int i) { type = "Int:"; data = stringify(i); return true; }
22+
bool Uint(unsigned u) { type = "Uint:"; data = stringify(u); return true; }
23+
bool Int64(int64_t i) { type = "Int64:"; data = stringify(i); return true; }
24+
bool Uint64(uint64_t u) { type = "Uint64:"; data = stringify(u); return true; }
25+
bool Double(double d) { type = "Double:"; data = stringify(d); return true; }
26+
bool RawNumber(const char* str, SizeType length, bool) { type = "Number:"; data = std::string(str, length); return true; }
27+
bool String(const char* str, SizeType length, bool) { type = "String:"; data = std::string(str, length); return true; }
28+
bool StartObject() { type = "StartObject"; data.clear(); return true; }
29+
bool Key(const char* str, SizeType length, bool) { type = "Key:"; data = std::string(str, length); return true; }
30+
bool EndObject(SizeType memberCount) { type = "EndObject:"; data = stringify(memberCount); return true; }
31+
bool StartArray() { type = "StartArray"; data.clear(); return true; }
32+
bool EndArray(SizeType elementCount) { type = "EndArray:"; data = stringify(elementCount); return true; }
33+
};
34+
35+
int main() {
36+
const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } ";
37+
38+
MyHandler handler;
39+
Reader reader;
40+
StringStream ss(json);
41+
reader.IterativeParseInit();
42+
while (!reader.IterativeParseComplete()) {
43+
reader.IterativeParseNext<kParseDefaultFlags>(ss, handler);
44+
cout << handler.type << handler.data << endl;
45+
}
46+
47+
return 0;
48+
}

include/rapidjson/reader.h

Lines changed: 153 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,83 @@ class GenericReader {
513513
return Parse<kParseDefaultFlags>(is, handler);
514514
}
515515

516+
//! Initialize JSON text token-by-token parsing
517+
/*!
518+
*/
519+
void IterativeParseInit() {
520+
parseResult_.Clear();
521+
state_ = IterativeParsingStartState;
522+
}
523+
524+
//! Parse one token from JSON text
525+
/*! \tparam InputStream Type of input stream, implementing Stream concept
526+
\tparam Handler Type of handler, implementing Handler concept.
527+
\param is Input stream to be parsed.
528+
\param handler The handler to receive events.
529+
\return Whether the parsing is successful.
530+
*/
531+
template <unsigned parseFlags, typename InputStream, typename Handler>
532+
bool IterativeParseNext(InputStream& is, Handler& handler) {
533+
while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
534+
SkipWhitespaceAndComments<parseFlags>(is);
535+
536+
Token t = Tokenize(is.Peek());
537+
IterativeParsingState n = Predict(state_, t);
538+
IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
539+
540+
// If we've finished or hit an error...
541+
if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
542+
// Report errors.
543+
if (d == IterativeParsingErrorState) {
544+
HandleError(state_, is);
545+
return false;
546+
}
547+
548+
// Transition to the finish state.
549+
RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
550+
state_ = d;
551+
552+
// If StopWhenDone is not set...
553+
if (!(parseFlags & kParseStopWhenDoneFlag)) {
554+
// ... and extra non-whitespace data is found...
555+
SkipWhitespaceAndComments<parseFlags>(is);
556+
if (is.Peek() != '\0') {
557+
// ... this is considered an error.
558+
HandleError(state_, is);
559+
return false;
560+
}
561+
}
562+
563+
// Success! We are done!
564+
return true;
565+
}
566+
567+
// Transition to the new state.
568+
state_ = d;
569+
570+
// If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
571+
if (!IsIterativeParsingDelimiterState(n))
572+
return true;
573+
}
574+
575+
// We reached the end of file.
576+
stack_.Clear();
577+
578+
if (state_ != IterativeParsingFinishState) {
579+
HandleError(state_, is);
580+
return false;
581+
}
582+
583+
return true;
584+
}
585+
586+
//! Check if token-by-token parsing JSON text is complete
587+
/*! \return Whether the JSON has been fully decoded.
588+
*/
589+
RAPIDJSON_FORCEINLINE bool IterativeParseComplete() {
590+
return IsIterativeParsingCompleteState(state_);
591+
}
592+
516593
//! Whether a parse error has occured in the last parsing.
517594
bool HasParseError() const { return parseResult_.IsError(); }
518595

@@ -1402,30 +1479,32 @@ class GenericReader {
14021479

14031480
// States
14041481
enum IterativeParsingState {
1405-
IterativeParsingStartState = 0,
1406-
IterativeParsingFinishState,
1407-
IterativeParsingErrorState,
1482+
IterativeParsingFinishState = 0, // sink states at top
1483+
IterativeParsingErrorState, // sink states at top
1484+
IterativeParsingStartState,
14081485

14091486
// Object states
14101487
IterativeParsingObjectInitialState,
14111488
IterativeParsingMemberKeyState,
1412-
IterativeParsingKeyValueDelimiterState,
14131489
IterativeParsingMemberValueState,
1414-
IterativeParsingMemberDelimiterState,
14151490
IterativeParsingObjectFinishState,
14161491

14171492
// Array states
14181493
IterativeParsingArrayInitialState,
14191494
IterativeParsingElementState,
1420-
IterativeParsingElementDelimiterState,
14211495
IterativeParsingArrayFinishState,
14221496

14231497
// Single value state
1424-
IterativeParsingValueState
1498+
IterativeParsingValueState,
1499+
1500+
// Delimiter states (at bottom)
1501+
IterativeParsingElementDelimiterState,
1502+
IterativeParsingMemberDelimiterState,
1503+
IterativeParsingKeyValueDelimiterState,
1504+
1505+
cIterativeParsingStateCount
14251506
};
14261507

1427-
enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
1428-
14291508
// Tokens
14301509
enum Token {
14311510
LeftBracketToken = 0,
@@ -1476,6 +1555,18 @@ class GenericReader {
14761555
RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
14771556
// current state x one lookahead token -> new state
14781557
static const char G[cIterativeParsingStateCount][kTokenCount] = {
1558+
// Finish(sink state)
1559+
{
1560+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1561+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1562+
IterativeParsingErrorState
1563+
},
1564+
// Error(sink state)
1565+
{
1566+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1567+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1568+
IterativeParsingErrorState
1569+
},
14791570
// Start
14801571
{
14811572
IterativeParsingArrayInitialState, // Left bracket
@@ -1490,18 +1581,6 @@ class GenericReader {
14901581
IterativeParsingValueState, // Null
14911582
IterativeParsingValueState // Number
14921583
},
1493-
// Finish(sink state)
1494-
{
1495-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1496-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1497-
IterativeParsingErrorState
1498-
},
1499-
// Error(sink state)
1500-
{
1501-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1502-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1503-
IterativeParsingErrorState
1504-
},
15051584
// ObjectInitial
15061585
{
15071586
IterativeParsingErrorState, // Left bracket
@@ -1530,20 +1609,6 @@ class GenericReader {
15301609
IterativeParsingErrorState, // Null
15311610
IterativeParsingErrorState // Number
15321611
},
1533-
// KeyValueDelimiter
1534-
{
1535-
IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1536-
IterativeParsingErrorState, // Right bracket
1537-
IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1538-
IterativeParsingErrorState, // Right curly bracket
1539-
IterativeParsingErrorState, // Comma
1540-
IterativeParsingErrorState, // Colon
1541-
IterativeParsingMemberValueState, // String
1542-
IterativeParsingMemberValueState, // False
1543-
IterativeParsingMemberValueState, // True
1544-
IterativeParsingMemberValueState, // Null
1545-
IterativeParsingMemberValueState // Number
1546-
},
15471612
// MemberValue
15481613
{
15491614
IterativeParsingErrorState, // Left bracket
@@ -1558,20 +1623,6 @@ class GenericReader {
15581623
IterativeParsingErrorState, // Null
15591624
IterativeParsingErrorState // Number
15601625
},
1561-
// MemberDelimiter
1562-
{
1563-
IterativeParsingErrorState, // Left bracket
1564-
IterativeParsingErrorState, // Right bracket
1565-
IterativeParsingErrorState, // Left curly bracket
1566-
IterativeParsingObjectFinishState, // Right curly bracket
1567-
IterativeParsingErrorState, // Comma
1568-
IterativeParsingErrorState, // Colon
1569-
IterativeParsingMemberKeyState, // String
1570-
IterativeParsingErrorState, // False
1571-
IterativeParsingErrorState, // True
1572-
IterativeParsingErrorState, // Null
1573-
IterativeParsingErrorState // Number
1574-
},
15751626
// ObjectFinish(sink state)
15761627
{
15771628
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
@@ -1606,6 +1657,18 @@ class GenericReader {
16061657
IterativeParsingErrorState, // Null
16071658
IterativeParsingErrorState // Number
16081659
},
1660+
// ArrayFinish(sink state)
1661+
{
1662+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1663+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1664+
IterativeParsingErrorState
1665+
},
1666+
// Single Value (sink state)
1667+
{
1668+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1669+
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1670+
IterativeParsingErrorState
1671+
},
16091672
// ElementDelimiter
16101673
{
16111674
IterativeParsingArrayInitialState, // Left bracket(push Element state)
@@ -1620,18 +1683,34 @@ class GenericReader {
16201683
IterativeParsingElementState, // Null
16211684
IterativeParsingElementState // Number
16221685
},
1623-
// ArrayFinish(sink state)
1686+
// MemberDelimiter
16241687
{
1625-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1626-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1627-
IterativeParsingErrorState
1688+
IterativeParsingErrorState, // Left bracket
1689+
IterativeParsingErrorState, // Right bracket
1690+
IterativeParsingErrorState, // Left curly bracket
1691+
IterativeParsingObjectFinishState, // Right curly bracket
1692+
IterativeParsingErrorState, // Comma
1693+
IterativeParsingErrorState, // Colon
1694+
IterativeParsingMemberKeyState, // String
1695+
IterativeParsingErrorState, // False
1696+
IterativeParsingErrorState, // True
1697+
IterativeParsingErrorState, // Null
1698+
IterativeParsingErrorState // Number
16281699
},
1629-
// Single Value (sink state)
1700+
// KeyValueDelimiter
16301701
{
1631-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1632-
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1633-
IterativeParsingErrorState
1634-
}
1702+
IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1703+
IterativeParsingErrorState, // Right bracket
1704+
IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1705+
IterativeParsingErrorState, // Right curly bracket
1706+
IterativeParsingErrorState, // Comma
1707+
IterativeParsingErrorState, // Colon
1708+
IterativeParsingMemberValueState, // String
1709+
IterativeParsingMemberValueState, // False
1710+
IterativeParsingMemberValueState, // True
1711+
IterativeParsingMemberValueState, // Null
1712+
IterativeParsingMemberValueState // Number
1713+
},
16351714
}; // End of G
16361715

16371716
return static_cast<IterativeParsingState>(G[state][token]);
@@ -1812,44 +1891,53 @@ class GenericReader {
18121891
}
18131892
}
18141893

1894+
RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) {
1895+
return s >= IterativeParsingElementDelimiterState;
1896+
}
1897+
1898+
RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) {
1899+
return s <= IterativeParsingErrorState;
1900+
}
1901+
18151902
template <unsigned parseFlags, typename InputStream, typename Handler>
18161903
ParseResult IterativeParse(InputStream& is, Handler& handler) {
18171904
parseResult_.Clear();
18181905
ClearStackOnExit scope(*this);
18191906
IterativeParsingState state = IterativeParsingStartState;
1820-
1907+
18211908
SkipWhitespaceAndComments<parseFlags>(is);
18221909
RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
18231910
while (is.Peek() != '\0') {
18241911
Token t = Tokenize(is.Peek());
18251912
IterativeParsingState n = Predict(state, t);
18261913
IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1827-
1914+
18281915
if (d == IterativeParsingErrorState) {
18291916
HandleError(state, is);
18301917
break;
18311918
}
1832-
1919+
18331920
state = d;
1834-
1921+
18351922
// Do not further consume streams if a root JSON has been parsed.
18361923
if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
18371924
break;
1838-
1925+
18391926
SkipWhitespaceAndComments<parseFlags>(is);
18401927
RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
18411928
}
1842-
1929+
18431930
// Handle the end of file.
18441931
if (state != IterativeParsingFinishState)
18451932
HandleError(state, is);
1846-
1933+
18471934
return parseResult_;
18481935
}
18491936

18501937
static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
18511938
internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
18521939
ParseResult parseResult_;
1940+
IterativeParsingState state_;
18531941
}; // class GenericReader
18541942

18551943
//! Reader with UTF8 encoding and default allocator.

0 commit comments

Comments
 (0)