diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6740ba8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + container_image: ["fedora:latest", "ubuntu:latest"] + compiler: [g++, clang++] + optflag: ["-O0", "-O2"] + memcheck: [off, asan, valgrind] + container: + image: ${{ matrix.container_image }} + + steps: + - uses: actions/checkout@v1 + - name: install dependencies (Fedora) + if: ${{ matrix.container_image == 'fedora:latest' }} + run: dnf -y install clang gcc-c++ valgrind libasan libubsan jq + - name: install dependencies (Ubuntu) + if: ${{ matrix.container_image == 'ubuntu:latest' }} + run: | + apt-get -y update + apt-get -y install bash clang g++ valgrind libasan5 libubsan1 jq + - name: build and test + shell: bash + run: | + export CXX=${{ matrix.compiler }} + export CXXFLAGS="-std=gnu++14 -g -pthread -lpthread ${{ matrix.optflag }} -Wall -Wextra" + export TESTFLAG="" + if [[ "${{ matrix.memcheck }}" = "asan" ]]; then + export CXXFLAGS="${CXXFLAGS} -fsanitize=address,undefined -fno-omit-frame-pointer" + if [[ "${{ matrix.compiler }}" = "g++" ]]; then + export CXXFLAGS="${CXXFLAGS} -fno-sanitize-recover" + else + export CXXFLAGS="${CXXFLAGS} -fno-sanitize-recover=all" + fi + elif [[ "${{ matrix.memcheck }}" = "valgrind" ]]; then + export TESTFLAG="--valgrind" + fi + ${{ matrix.compiler }} -o jtc $CXXFLAGS jtc.cpp + python3 run_tests.py -v $TESTFLAG User\ Guide.md Walk-path\ tutorial.md README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf1497e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +jtc +file.json diff --git a/README.md b/README.md index 1c98df9..9212bf9 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ Enabling too many debugs might be overwhelming, though one specific case many wo a failing JSON: ```bash bash $ :263): expected_json_value ``` If JSON is big, it's desirable to locate the parsing failure point. Passing just one `-d` let easily spotting the parsing failure point and its locus: @@ -343,9 +343,9 @@ parsing failure point and its locus: bash $ -.exception_locus_(), ... }| ],| "children": [,],| "spouse": null| },| {| ... -.exception_spot_(), -------------------------------------------->| (offset: 967) -jtc json parsing exception (:967): expected_json_value +.exception_locus_(), ... "age": 31,| "children": [,],| "phoneNumbers": [| ... +.exception_spot_(), --------------------------------------->| (offset: 263) +jtc json parsing exception (:263): expected_json_value bash $ ``` @@ -442,7 +442,7 @@ bash $ <<<$case3 jtc -w'l:v[-1]' -rT'[{{$a}},{{$b}}]' [ "Patrick", "Lynch" ] [ "Alice", "Price" ] [ "Rebecca", "Hernandez" ] - +# #jq: bash $ <<<$case3 jq -c 'if type == "array" then .[] else . end | [.Name, .Surname]' [null,null] @@ -455,7 +455,7 @@ bash $ case2='[{"Surname":"Lynch", "gender":"male", "age":29},{"Name":"Alice", " bash $ bash $ <<<$case2 jtc -w'l:v[-1]' -rT'[{{$a}},{{$b}}]' [ "Alice", "Price" ] - +# #jq: bash $ <<<$case2 jq -c 'if type == "array" then .[] else . end | [.Name, .Surname]' [null,"Lynch"] @@ -528,7 +528,7 @@ _**`updating JSON recursively by label:`**_ | _**`updating JSON recursively by l **_Comparison of `jtc` to `jtc` (single-threaded to multi-threaded parsing performance):_** -```bash +```bash SKIP bash $ unset TIMEFORMAT bash $ bash $ # concurrent (multi-threaded) parsing: diff --git a/User Guide.md b/User Guide.md index c927d51..6cb42f3 100644 --- a/User Guide.md +++ b/User Guide.md @@ -196,7 +196,7 @@ bash $ ``` option `-t` controls the indentation of the pretty-printing format (default is 3 white spaces): ```bash -bash $ :1214): unexpected_end_of_line +bash $ :1215): unexpected_end_of_line bash $ ``` and though the message lets us knowing that there's a problem with the input JSON, it not very informative with regards whereabouts the the problem. To visualize the spot where the problem is, as well as its locus pass a single debug option (`-d`): ```bash -bash $ -.exception_locus_(), ...e": 80206,| "state": "CO,| "street address": "6213... -.exception_spot_(), --------------------------------------->| (offset: 1214) -jtc json parsing exception (:1214): unexpected_end_of_line +.exception_locus_(), ...e": 80206,| "state": "CO,| "street address": "621... +.exception_spot_(), --------------------------------------->| (offset: 1215) +jtc json parsing exception (:1215): unexpected_end_of_line bash $ ``` the vertical pipe symbol `|` in the debug showing JSON locus replaces new lines, thus it becomes easy to spot the problem. @@ -665,7 +664,7 @@ bash $ L' -ddd .display_opts(), option set[0]: -w'<>L' -d -d -d (internally imposed: ) .init_inputs(), reading json from ..ss_init_(), initializing mode: buffered_cin -..ss_init_(), buffer (from ) size after initialization: 1674 +..ss_init_(), buffer (from ) size after initialization: 1675 ..run_decomposed_optsets(), pass for set[0] ...parse(), finished parsing json ..demux_opt(), option: '-w', hits: 1 @@ -677,7 +676,7 @@ bash $ L' -ddd ...parse_lexemes_(), walked string: <>L ...parse_lexemes_(), parsing here: -->| ...parse_suffix_(), search type sfx: Label_RE_search -..main(), exception raised by: file: './lib/Json.hpp', func: 'parse_suffix_()', line: 3573 +..main(), exception raised by: file: 'lib/Json.hpp', func: 'parse_suffix_()', line: 3590 jtc json exception: walk_empty_lexeme bash $ ``` @@ -1400,55 +1399,55 @@ bash $ ``` >Note: such grouping is only possible with labeled values (obviously), it won't be possible to group array elements that easily, > e.g., let's break array into pairs: ->```bash ->bash $ array='[0,1,2,3,4,5,6,7,8,9]' ->bash $ <<<$array jtc -w[::2] -w[1::2] -j -tc ->[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ->bash $ ->``` +```bash +bash $ array='[0,1,2,3,4,5,6,7,8,9]' +bash $ <<<$array jtc -w[::2] -w[1::2] -j -tc +[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] +bash $ +``` > it won't work even if we try relating walks: ->```bash ->bash $ <<<$array jtc -w[::2] -w'[::2]k[-1]>I[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ->bash $ ->``` +```bash +bash $ <<<$array jtc -w[::2] -w'[::2]k[-1]>IThus grouping here should be achieved differently. One way is to use only a single walk collecting required elements of the group into >the namespaces and then using template interpolating the latter: ->```bash ->bash $ <<<$array jtc -w'[::2]kv[-1]>I[ -> [ 0, 1 ], -> [ 2, 3 ], -> [ 4, 5 ], -> [ 6, 7 ], -> [ 8, 9 ] ->] ->bash $ ->``` +```bash +bash $ <<<$array jtc -w'[::2]kv[-1]>IAnother way is to transofrm the walks into objects assigning labels from the first walk's index: ->```bash ->bash $ <<<$array jtc -w'[::2]k' -w[1::2] -T'{"{I}":{{}}}' -ll / -tc ->{ -> "0": [ 0, 1 ], -> "2": [ 2, 3 ], -> "4": [ 4, 5 ], -> "6": [ 6, 7 ], -> "8": [ 8, 9 ] ->} ->bash $ ->``` +```bash +bash $ <<<$array jtc -w'[::2]k' -w[1::2] -T'{"{I}":{{}}}' -ll / -tc +{ + "0": [ 0, 1 ], + "2": [ 2, 3 ], + "4": [ 4, 5 ], + "6": [ 6, 7 ], + "8": [ 8, 9 ] +} +bash $ +``` >and then re-walk dropping labels and encapsulating into the outer array: ->```bash ->bash $ <<<$array jtc -w'[::2]k' -w[1::2] -T'{"{I}":{{}}}' -ll / -jw[:] -tc ->[ -> [ 0, 1 ], -> [ 2, 3 ], -> [ 4, 5 ], -> [ 6, 7 ], -> [ 8, 9 ] ->] ->bash $ - +```bash +bash $ <<<$array jtc -w'[::2]k' -w[1::2] -T'{"{I}":{{}}}' -ll / -jw[:] -tc +[ + [ 0, 1 ], + [ 2, 3 ], + [ 4, 5 ], + [ 6, 7 ], + [ 8, 9 ] +] +bash $ +``` #### Aggregating walks the walks results that have labels also could be aggregated (per label), option `-nn` facilitates the ask: @@ -1529,9 +1528,9 @@ Sometimes, when displaying outputs wrapped into an object, it's desirable to ext (i.e., reach inside the object and use inner label rather than outer). This become especially handy when dealing with templates. Let's consider a following query: -Say, the ask here is to extract all names of all the people from `ab.json` and group them with newly crafted record indicating if a person +Say, the task here is to extract all names of all the people from `ab.json` and group them with newly crafted record indicating if a person has children or not, like this: -```bash +```json [ { "name": "John", "has children": "yes" }, ... @@ -1650,15 +1649,15 @@ bash $ > all the above examples just illustrate capabilities of the options for instructional purpose. Practically, the same ask would be easier to achive using just a single walk: ->```bash ->bash $ l:v[-1][children]f[0]v' -T'{"name":{{N}}, "has children": {{C}}}' -jtc ->[ -> { "has children": "yes", "name": "John" }, -> { "has children": "no", "name": "Ivan" }, -> { "has children": "yes", "name": "Jane" } ->] ->bash $ ->``` +```bash +bash $ l:v[-1][children]f[0]v' -T'{"name":{{N}}, "has children": {{C}}}' -jtc +[ + { "has children": "yes", "name": "John" }, + { "has children": "no", "name": "Ivan" }, + { "has children": "yes", "name": "Jane" } +] +bash $ +``` #### Succinct walk-path syntax @@ -1691,8 +1690,7 @@ bash $ l:' -y'l:' -jlnn a syntactical sugar and do not apply any walk-path parsing or validation, instead they just reconcile into respective `-w` options created internally, then the latter get processed. Thus, it's even possible to write it with what it seems a broken syntax at first: ```bash -bash $ l:' -y']l:' -jlnn -... +bash $ l:' -y']l:' -jlnn > /dev/null ``` However, if a reinstatement of the options results in a valid walk-path - that's all what matters. @@ -1756,7 +1754,7 @@ bash $ <<<$jsn jtc -w[:] -x4 4 8 bash $ - ``` +``` To display _every 4th walk starting from 3rd one_, use this notation: ```bash bash $ <<<$jsn jtc -w[:] -x4/2 @@ -2085,11 +2083,11 @@ bash $ <<<$jsn jtc -w'l:' -u'l:<(.+)[ +*]+(.+)>R[-1][args]' -T'[{}, [ { "Func": "x + y", - "args": [ 123, "x", "y" ] + "args": [ 123, "x +", "y" ] }, { "Func": "a * b", - "args": [ "a", "b" ] + "args": [ "a *", "b" ] } ] bash $ @@ -2133,10 +2131,10 @@ applied onto each value of the iterable one by one). By default, for such kind of interpolations (stringifying iterables) the enumeration separator used is held in the namespace `$#` (default value `", "`), which means, it could be altered by a user: -```json +```bash bash $ <<<'[1,2,3,4,5]' jtc -w'<$#:\t>v' -qqT'"good for TSV conversion:\n{}"' good for TSV conversion: -1 2 3 4 5 +1 2 3 4 5 bash $ ``` @@ -2237,11 +2235,11 @@ _to play safe with the templates, always surround them with single quotes (to do here's an example how to join path tokens using a custom separator: ```bash bash $ v' -qqT'{{$path}}' -Directory 0 address state +Directory 0 address state bash $ ``` Equally, the same could be achived with the `$PATH` token: -```bash +```bash SKIP FIXME bash $ v' -qqT'"{$PATH}"' Directory 0 address state bash $ @@ -2311,14 +2309,14 @@ bash $ > The example above is shown for instructive purpose. Probably the easier (and more efficient) way achieving the same result > would be this one: ->```bash ->bash $ l:v[-1][children]' -T'{"{N}": "{}"}' -jjll / -pw'<>' ->{ -> "Jane": "Robert, Lila", -> "John": "Olivia" ->} ->bash $ ->``` +```bash +bash $ l:v[-1][children]' -T'{"{N}": "{}"}' -jjll / -pw'<>' +{ + "Jane": "Robert, Lila", + "John": "Olivia" +} +bash $ +``` #### Iterables auto tokens @@ -2817,11 +2815,11 @@ By default `jtc` expects the input from `stdin`. If the standalone argument(s) ` file (ignoring `stdin`), see below: ```bash # show content of the file: -bash $ cat file.json +bash $ echo '[ "JSON", "in", "file" ]' | tee file.json [ "JSON", "in", "file" ] bash $ # both input sources present: stdin and file -bash $ <<<'[ "", "JSON" ]' jtc file.json +bash $ echo '[ "", "JSON" ]' | jtc file.json [ "JSON", "in", @@ -2846,7 +2844,7 @@ format of the file. #### Forcing input read from `stdin` The bare hyphen (`-`) overrides file _input_ and ensures that the input is read from the `stdin`: ```bash - bash $ <<<'[ "", "JSON" ]' jtc -f - file.json +bash $ <<<'[ "", "JSON" ]' jtc -f - file.json bash $ cat file.json [ "", @@ -2874,10 +2872,26 @@ bash $ Of course there's a bit more succinct syntax: ```bash bash $ :[-1]' -y':[-1]' -p / -w'l:' -ltc +"phone": [ + { "number": "112-555-1234", "type": "mobile" }, + { "number": "113-123-2368", "type": "mobile" } +] +"phone": [ + { "number": "223-283-0372", "type": "mobile" } +] +"phone": [] ``` or, using even a single walk-path: ```bash bash $ R:[-1]' -p / -w'l:' -ltc +"phone": [ + { "number": "112-555-1234", "type": "mobile" }, + { "number": "113-123-2368", "type": "mobile" } +] +"phone": [ + { "number": "223-283-0372", "type": "mobile" } +] +"phone": [] ``` Another use-case example: remove all the JSON elements _except_ walked ones, while preserving original JSON structure - that's @@ -2987,6 +3001,7 @@ argument w.r.t. any trailing characters is _relaxed_ - it attempts parsing what if insert/update occurs from a _file_ and such file caters multiple JSONS (a.k.a. _stream of JSONs_), then the stream of JSON is automatically converted into array of JSONs: ```bash +bash $ cp inserting_updating.json file.json bash $ cat file.json [ "first", "JSON" ] { "second": "JSON" } @@ -3194,9 +3209,8 @@ The labels can be updated with any atomic value (and not with the iterable value ```bash bash $ <>k' -u true / -w'' -l "true": "John" -bash $ l<>k' -u '[true]' / -w'[0][0]' -tc +bash $ l<>k' -u '[true]' / -w'[0][0]' -tc > /dev/null error: label could be updated only with JSON atomic value -... ``` @@ -3517,10 +3531,9 @@ bash $ ``` Otherwise (JSONs are different) a non-zero code is returned: ```bash -bash $ <<<'[1,2,3]' jtc -c'[2,3]' -lr +bash $ <<<'[1,2,3]' jtc -c'[2,3]' -lr; echo $? "json_1": [ 1, 2, 3 ] "json_2": [ 2, 3 ] -bash $ echo $? 4 bash $ ``` @@ -3566,11 +3579,11 @@ bash $ ``` > NOTE: _usage of '<>k' is only restricted to JSON elements which have labels/indices. JSON `root` does not have any of those, thus attempting to print a label of the root always results in the exception:_ ->```bash ->bash $ k' ->jtc json exception: walk_root_has_no_label ->bash $ ->``` +```bash +bash $ k' +jtc json exception: walk_root_has_no_label +bash $ +``` ## Processing input JSONs @@ -3635,7 +3648,7 @@ bash $ Couple options allow altering the behavior and process all the input JSONs: Option `-a` instructs to process each of the input JSONS: -```bash +```bash SKIP FIXME bash $ <<<'[ "1st json" ] { "2nd": "json" } "3rd json"' jtc -ar [ "1st json" ] { "2nd": "json" } @@ -3643,7 +3656,7 @@ bash $ <<<'[ "1st json" ] { "2nd": "json" } "3rd json"' jtc -ar bash $ ``` \- respected processing (of all given options) will occur for all of the input JSONs: -```bash +```bash SKIP FIXME bash $ <<<'[ "1st json" ] { "2nd": "json" } "3rd json"' jtc -a -w'R' "1st json" "json" @@ -3668,20 +3681,21 @@ bash $ > The exception locus is only shown up to a violating point and not past it because of `streamed_cin` type of read. If the same > steam of JSONs was in the file, then the read type woudl be `buffered_file` and then the entire locus would be shown: ->```bash ->bash $ jtc -ad file.json ->.display_opts(), option set[0]: -a -d 'file.json' (internally imposed: ) ->.init_inputs(), reading json from file-arguments: ->.init_inputs(), file argument: file.json ->.write_json(), outputting json to ->[ -> "1st json" ->] ->.exception_locus_(), { "2nd": json" } "3rd json" ->.exception_spot_(), --------->| (offset: 9) ->jtc json parsing exception (file.json:9): expected_json_value ->bash $ ->``` +```bash +bash $ echo '["1st json"] { "2nd": json" } "3rd json"' > file.json +bash $ jtc -ad file.json +.display_opts(), option set[0]: -a -d 'file.json' (internally imposed: ) +.init_inputs(), reading json from file-arguments: +.init_inputs(), file argument: file.json +.write_json(), outputting json to +[ + "1st json" +] +.exception_locus_(), { "2nd": json" } "3rd json" +.exception_spot_(), --------->| (offset: 9) +jtc json parsing exception (file.json:9): expected_json_value +bash $ +``` Another option trigerring all JSONs processing (from any number of sources) is `-J` - that option tells that aggregation of all JSONs is required and thus assumes option `-a` implicitely (no need giving both): @@ -3729,7 +3743,9 @@ bash $ jtc -w'[0][:][name]' -aj ab.json ab.json "Jane" ] bash $ -# process all input JSONs and wrap them into an array: +``` +process all input JSONs and wrap them into an array: +```bash bash $ jtc -w'[0][:][name]' -J ab.json ab.json [ "John", @@ -3740,7 +3756,9 @@ bash $ jtc -w'[0][:][name]' -J ab.json ab.json "Jane" ] bash $ -# process and wrap each input JSON into an array and then wrap all the processed into a super array: +``` +process and wrap each input JSON into an array and then wrap all the processed into a super array: +```bash bash $ jtc -w'[0][:][name]' -Jj ab.json ab.json [ [ @@ -3783,34 +3801,32 @@ a network-based streaming) We can see the difference in the parsing when debugging `jtc`: \- in a _buffered read_ mode, the debug will show the _parsing point_ with the data following behind it: ```bash -bash $ &1 | head -9 .display_opts(), option set[0]: -d -d -d -d -d -d (internally imposed: ) .init_inputs(), reading json from ..ss_init_(), initializing mode: buffered_cin -..ss_init_(), buffer (from ) size after initialization: 1674 +..ss_init_(), buffer (from ) size after initialization: 1675 ..run_decomposed_optsets(), pass for set[0] -......parse_(), parsing point ->{| "Directory": [| {| "address": {| ... -......parse_(), parsing point ->"Directory": [| {| "address": {| "... -......parse_(), parsing point ->[| {| "address": {| "city": "New Y... -......parse_(), parsing point ->{| "address": {| "city": "New York",| ... -... +......parse_(), parsing point ->{| "Directory": [| {| "address": {| "... +......parse_(), parsing point ->"Directory": [| {| "address": {| "city":... +......parse_(), parsing point ->[| {| "address": {| "city": "New York",|... +......parse_(), parsing point ->{| "address": {| "city": "New York",| ... ``` \- in a _streamed read_ mode, the _parsing point_ would point to the last read character from the ``: ```bash -bash $ &1 | head -12 .display_opts(), option set[0]: -d -d -d -d -d -d -a (internally imposed: ) .init_inputs(), reading json from ..ss_init_(), initializing mode: streamed_cin ..ss_init_(), buffer (stream) size after initialization: 1 ..run_decomposed_optsets(), pass for set[0] ......parse_(), {<- parsing point -......parse_(), {| "<- parsing point -......parse_(), {| "Directory": [<- parsing point -......parse_(), {| "Directory": [| {<- parsing point -......parse_(), {| "Directory": [| {| "<- parsing point -......parse_(), {| "Directory": [| {| "address": {<- parsing point -......parse_(), ..."Directory": [| {| "address": {| "<- parsing point -... +......parse_(), {| "<- parsing point +......parse_(), {| "Directory": [<- parsing point +......parse_(), {| "Directory": [| {<- parsing point +......parse_(), {| "Directory": [| {| "<- parsing point +......parse_(), {| "Directory": [| {| "address": {<- parsing point +......parse_(), {| "Directory": [| {| "address": {| "<- parsing point ``` Here's an example of how _streamed read_ works in `jtc`: @@ -3856,7 +3872,7 @@ too many of very tiny JSONs, then such processing might be even slower (due to t To disable multithreaded parsing and revert to a single-threaded mode use option `-a` (in the initial option set). Compare: -```bash +```bash SKIP bash $ # multithreaded input file parsing bash $ /usr/bin/time jtc -J / -zz big.json big.json 30000033 @@ -3917,6 +3933,7 @@ If `file.json` contains multiple JSONs (a.k.a. _stream of JSONs_) and predicated from the file will be processed first in `option-set1` then all (walk) ouputputs are passed to the input of `option-set2` and again all JSONs will be processed in `option-set2` (predicated `option-set2` caters `-a`) and so on and so forth: ```bash +bash $ cp stream.json file.json bash $ jtc -ar file.json { "1": "first JSON" } { "2": "second JSON" } @@ -3940,7 +3957,7 @@ could be virtually endless), for the same reason the behavior of option `-J` is an endless stream of JSONs). Thus, if neither of option-sets caters `-J` option, then the result of the operations should be identical (it might not be identical if there was namespace dependency in the walks - due to difference in processing it might result in a discrepancy of the results): -```bash +```bash SKIP FIXME bash $ R[-1]' -u'[{{$1}}, {{$2}}];' { "1": [ "first", "JSON" ] } { "2": [ "second", "JSON" ] } @@ -4132,7 +4149,7 @@ it's just a reverse action. Counting any number of properties is JSON could be done using external `wc` unix utility. E.g., let's count all `number`s in `ab.json`: ```bash bash $ l:' | wc -l - 6 +6 bash $ ``` diff --git a/Walk-path tutorial.md b/Walk-path tutorial.md index 75a4cad..d24852f 100644 --- a/Walk-path tutorial.md +++ b/Walk-path tutorial.md @@ -223,15 +223,14 @@ bash $ <<<$jsn jtc -w[4] -tc { "number three": 3 } ] bash $ - +# # dig the second level: bash $ <<<$jsn jtc -w[4][2] -tc { "number three": 3 } - +# # dig the 3rd level: bash $ <<<$jsn jtc -w[4][2][number three] error: could not open file 'three]' -jtc json parsing exception (three]:0): unexpected_end_of_string bash $ ``` \- why? @@ -428,23 +427,23 @@ bash $ ``` >Note how `jtc` _interleaves_ the walks: it puts relevant walks in a good (relevant) order, rather than dumping results of the first walk and then of the second. If one prefers seeing the latter behavior, option `-n` will do the trick, compare: ->```bash ->bash $ <<<$jsn jtc -w[:] -w[:][:] -tc -n ->"abc" ->false ->null ->{ "pi": 3.14 } ->[ -> 1, -> "two", -> { "number three": 3 } ->] ->3.14 ->1 ->"two" ->{ "number three": 3 } ->bash $ ->``` +```bash +bash $ <<<$jsn jtc -w[:] -w[:][:] -tc -n +"abc" +false +null +{ "pi": 3.14 } +[ + 1, + "two", + { "number three": 3 } +] +3.14 +1 +"two" +{ "number three": 3 } +bash $ +``` ## ##### Alternative range notation @@ -576,7 +575,7 @@ but internally, the path towards this JSON element would be built as: ```bash bash $ <<<$jsn jtc -w'<3>d' -dddd 2>&1 | grep "built path vector" ....walk_(), built path vector: [00000004]->[00000002]->[number three] -....walk_(), finished walking: with built path vector: [00000004]->[00000002]->[number three] +....walk_(), finished walking with built path vector: [00000004]->[00000002]->[number three] bash $ ``` i.e. it still would be `[4][2][number three]`. That's why `jtc` is known to be a _**`walk-path`**_ based utility. @@ -594,36 +593,36 @@ Thus in order to select either of parents, we just need to pick a respective ind - `[-3]` wil address the _JSON root_ itself. >_Note_: `[-0]` will address the value `3` itself, so there's no much of a point to use such addressing, while indices greater _root's (in that example are `[-4]`, `[-5]`, etc will keep addressing the JSON root)_. Take a look: ->```bash -># addressing an immediate parent: ->bash $ <<<$jsn jtc -w'[4][2][number three][-1]' -tc ->{ "number three": 3 } ->bash $ -> -># addressing a parent of a parent: ->bash $ <<<$jsn jtc -w'[4][2][number three][-2]' -tc ->[ -> 1, -> "two", -> { "number three": 3 } ->] ->bash $ -> -># addressing the next parent (which happens to be the root): ->bash $ <<<$jsn jtc -w'[4][2][number three][-3]' -tc ->[ -> "abc", -> false, -> null, -> { "pi": 3.14 }, -> [ -> 1, -> "two", -> { "number three": 3 } -> ] ->] ->bash $ ->``` +```bash +# addressing an immediate parent: +bash $ <<<$jsn jtc -w'[4][2][number three][-1]' -tc +{ "number three": 3 } +bash $ + +# addressing a parent of a parent: +bash $ <<<$jsn jtc -w'[4][2][number three][-2]' -tc +[ + 1, + "two", + { "number three": 3 } +] +bash $ + +# addressing the next parent (which happens to be the root): +bash $ <<<$jsn jtc -w'[4][2][number three][-3]' -tc +[ + "abc", + false, + null, + { "pi": 3.14 }, + [ + 1, + "two", + { "number three": 3 } + ] +] +bash $ +``` ## #### Offsetting path from the root @@ -721,25 +720,19 @@ The lexeme might be empty or hold the `namespace` where matched value will be st Examples: - Find an exact string value: ```bash -bash $ <<<$JSN jtc -w'' -``` -```json +bash $ <<<$jsn jtc -w'' "two" ``` - Find a string value matching _RE_: ```bash -bash $ <<<$JSN jtc -w'<^t>R' -``` -```json +bash $ <<<$jsn jtc -w'<^t>R' "two" ``` - Find the first _JSON string_ value: ```bash -bash $ <<<$JSN jtc -w'<>P' -``` -```json +bash $ <<<$jsn jtc -w'<>P' "abc" ``` @@ -767,8 +760,6 @@ let's work with this JSON: ```bash bash $ JSS='["one", "two", ["three", "four", {"5 to 7": [ "five", "six", "seven"], "second 1": "one" } ] ]' bash $ <<<$JSS jtc -``` -```json [ "one", "two", @@ -790,8 +781,6 @@ bash $ <<<$JSS jtc - among all _JSON strings_ find those from 2nd till 5th inclusive: ```bash bash $ <<<$JSS jtc -w'<>P1:5' -``` -```json "two" "three" "four" @@ -806,8 +795,6 @@ So, let's repeat the last example, but now using quantifier indices references i - among all _JSON strings_ find those from 2nd till 5th inclusive: ```bash bash $ <<<$JSS jtc -w'vv <>P{Start}:{End}' -``` -```json "two" "three" "four" @@ -818,8 +805,6 @@ bash $ <<<$JSS jtc -w'vv <>P{Start}:{End}' - find all the string occurrences where letter `e` is present: ```bash bash $ <<<$JSS jtc -w'R:' -``` -```json "one" "three" "five" @@ -830,8 +815,6 @@ bash $ <<<$JSS jtc -w'R:' - find all the occurrences of string `"one"`: ```bash bash $ <<<$JSS jtc -w':' -``` -```json "one" "one" ``` @@ -845,8 +828,6 @@ only among immediate children of a current _iterable_. the JSON's root in the example is an _array_, so if we apply a non-recursive search on the root's array, only one match will be found: ```bash bash $ <<<$JSS jtc -w'>one<:' -``` -```json "one" ``` @@ -858,8 +839,6 @@ The recursive search always begins from checking the currently selected (walked) even onto atomic types and match those: ```bash bash $ <<<$JSS jtc -w'[0]' -``` -```json "one" ``` @@ -875,16 +854,12 @@ bash $ <<<$JSS jtc -w'[0]' The lexeme might be empty or hold the `namespace` where matched value will be preserved (upon a match) ```bash -bash $ <<<$JSN jtc -w'<[13]>D1:' -``` -```json +bash $ <<<$jsn jtc -w'<[13]>D1:' 1 3 ``` ```bash -bash $ <<<$JSN jtc -w'<3.14>d:' -``` -```json +bash $ <<<$jsn jtc -w'<3.14>d:' 3.14 ``` @@ -899,9 +874,7 @@ in the `namespace` shall it be present in the lexeme but rather a spelled boolean value will be matched ```bash -bash $ <<<$JSN jtc -w'<>b:' -``` -```json +bash $ <<<$jsn jtc -w'<>b:' false ``` @@ -923,9 +896,7 @@ The others are: All of those lexemes can stay empty, or hold the _namespace_ that will be filled upon a successful match. ```bash -bash $ <<<$JSN jtc -rw'<>c:' -``` -```json +bash $ <<<$jsn jtc -rw'<>c:' [ "abc", false, null, { "pi": 3.14 }, [ 1, "two", { "number three": 3 } ] ] { "pi": 3.14 } [ 1, "two", { "number three": 3 } ] @@ -937,9 +908,7 @@ bash $ <<<$JSN jtc -rw'<>c:' ### Arbitrary Json searches lexeme with the suffix `j` can match any arbitrary JSON value: ```bash -bash $ <<<$JSN jtc -w'<{ "pi":3.14 }>j' -``` -```json +bash $ <<<$jsn jtc -w'<{ "pi":3.14 }>j' { "pi": 3.14 } @@ -947,9 +916,7 @@ bash $ <<<$JSN jtc -w'<{ "pi":3.14 }>j' Even more, the parameter in the `j` lexeme can be a _templated JSON_: ```bash -bash $ <<<$JSN jtc -w'[4][2][0] v [^0] <{"pi": {Nr3}.14}>j [pi]' -``` -```json +bash $ <<<$jsn jtc -w'[4][2][0] v [^0] <{"pi": {Nr3}.14}>j [pi]' 3.14 ``` @@ -967,9 +934,7 @@ Obviously the `j` lexeme cannot be empty or result in an empty lexeme after temp ## There's another search lexeme suffix - `s` - that one will find a JSON pointed by a _namespace_: ```bash -bash $ <<<$JSN jtc -w'v s' -``` -```json +bash $ <<<$jsn jtc -w'v s' { "pi": 3.14 } @@ -988,8 +953,6 @@ lexemes search for original or duplicate entries of any JSONs, not necessarily a ```bash bash $ JSD='{"Orig 1": 1, "Orig 2": "two", "list": [ "three", { "dup 1": 1, "dup 2": "two", "second dup 1": 1 } ]}' bash $ <<<$JSD jtc -``` -```json { "Orig 1": 1, "Orig 2": "two", @@ -1007,8 +970,6 @@ bash $ <<<$JSD jtc Let's see _all_ the original elements in the above JSON: ```bash bash $ <<<$JSD jtc -lrw'q:' -``` -```json { "Orig 1": 1, "Orig 2": "two", "list": [ "three", { "dup 1": 1, "dup 2": "two", "second dup 1": 1 } ] } "Orig 1": 1 "Orig 2": "two" @@ -1021,8 +982,6 @@ As you can see there were listed _all_ first seen JSON values (including the roo Now, let's list _all_ the duplicates: ```bash bash $ <<<$JSD jtc -lrw'Q:' -``` -```json "dup 1": 1 "dup 2": "two" "second dup 1": 1 @@ -1047,8 +1006,6 @@ First two variants should not require much of a clarification, let's work with t ```bash bash $ JSL='{"One": 1, "obj": { "One": true, "Two": 2, "": 3 }, "45": "forty-five"}' bash $ <<<$JSL jtc -``` -```json { "45": "forty-five", "One": 1, @@ -1061,8 +1018,6 @@ bash $ <<<$JSL jtc ``` ```bash bash $ <<<$JSL jtc -rlw'<[oO]>L:' -``` -```json "One": 1 "obj": { "": 3, "One": true, "Two": 2 } "One": true @@ -1070,8 +1025,6 @@ bash $ <<<$JSL jtc -rlw'<[oO]>L:' ``` ```bash bash $ <<<$JSL jtc -rlw'l:' -``` -```json "One": 1 "One": true ```` @@ -1082,14 +1035,10 @@ or _JSON numeric_, in the latter case, it's automatically converted to a _string a numerical value: ```bash bash $ <<<$JSL jtc -lrw'v t' -``` -```json "45": "forty-five" ``` ```bash bash $ <<<$JSL jtc -lrw'v t' -``` -```json "45": "forty-five" ``` All other _JSON types_ in the `NS` will be ignored, such search will always return _false_. @@ -1111,8 +1060,6 @@ value `"forty-five"` via literal subscript, but using `>..45..v >lblv [obj]>idxOneOneOneOneq:' -``` -```json 1 true ``` @@ -1240,8 +1171,6 @@ the second will do `$2` and so on. Plus, the entire match will populate the name That way it's possible to extract any part(s) from the found JSON values for a later re-use. ```bash bash $ <<<$JSL jtc -w'<(.*)[oO](.*)>L:' -T'{ "sub-group 1":{{$1}}, "sub-group 2":{{$2}}, "entire match":{{$0}} }' -``` -```json { "entire match": "One", "sub-group 1": "", @@ -1304,9 +1233,7 @@ The directive `v` preserves currently walked value in the _namespace_ `NS`. are capable of doing the same on their own, but for others, as well as for the subscripts, it's still a useful feature. ```bash -bash $ <<<$JSN jtc -``` -```json +bash $ <<<$jsn jtc [ "abc", false, @@ -1324,17 +1251,13 @@ bash $ <<<$JSN jtc ] ``` ```bash -bash $ <<<$JSN jtc -w'[4][0]v[-1]>Idxv[-1]>Idxv' -``` -```json +bash $ <<<$jsn jtc -w'[4][0]v' 1 ``` \- memorized a currently walked JSON in the namespace `Idx` (which is the _JSON numeric_ `1`) ## ```bash -bash $ <<<$JSN jtc -w'[4][0]v[-1]' -``` -```json +bash $ <<<$jsn jtc -w'[4][0]v[-1]' [ 1, "two", @@ -1376,9 +1293,7 @@ bash $ <<<$JSN jtc -w'[4][0]v[-1]' \- stepped one level up (towards the root) from the last walked JSON ## ```bash -bash $ <<<$JSN jtc -w'[4][0]v[-1]>Idxv[-1]>Idxjk' -T'{idx}' -``` -```json +bash $ <<<$jsn jtc -w'<{"pi":3.14}>jk' -T'{idx}' 3 ``` @@ -1422,15 +1333,11 @@ If the lexeme is _**empty**_ (`<>k`) _AND_ is the last one in the walk-path, the in the namespace, but instead re-interprets the label as the JSON value. That way it become possible to rewrite labels in update (`-u`) operations, or re-use it in template interpolation. ```bash -bash $ <<<$JSN jtc -w'<{"pi":3.14}>j<>k' -``` -```json +bash $ <<<$jsn jtc -w'<{"pi":3.14}>j<>k' 3 ``` ```bash -bash $ <<<$JSN jtc -w'<{"pi":3.14}>j<>k' -T'{"idx": {{}}}' -r -``` -```json +bash $ <<<$jsn jtc -w'<{"pi":3.14}>j<>k' -T'{"idx": {{}}}' -r { "idx": 3 } ``` The described effect occurs only if the empty `<>k` lexeme appears the last in the walk-path, if the lexeme appears somewhere in the @@ -1443,29 +1350,22 @@ The directive `z` allows erasing the namespace `NS`. Mostly, this would be r _[walk branching](https://github.com/ldn-softdev/jtc/blob/master/Walk-path%20tutorial.md#walk-branching)_. For example, let's replace all even numbers in the array with their negative values: -```bash +```bash SKIP FIXME bash $ <<<$'[1,2,3,4,5,6,7,8,9]' jtc -w'z[:]<>f<[02468]$>D:v' -T'-{Num}' -jr -``` -```json [ 1, -2, 3, -4, 5, -6, 7, -8, 9 ] ``` If the walk began w/o initial lexeme erasing namespace `Num`, then the whole attempt would fail: ```bash bash $ <<<$'[1,2,3,4,5,6,7,8,9]' jtc -w'[:]<>f<[02468]$>D:v' -T'-{Num}' -jr -``` -```json [ 1, -2, -2, -4, -4, -6, -6, -8, -8 ] - ``` Of course, knowing _how [Regex lexemes](https://github.com/ldn-softdev/jtc/blob/master/Walk-path%20tutorial.md#regex-searches) work_, it's possible to rewrite the walk-path in a bit more succinct way: -```bash +```bash SKIP FIXME bash $ <<<$'[1,2,3,4,5,6,7,8,9]' jtc -w'<$0>z[:]<>f<[02468]$>D:' -T'-{$0}' -jr -``` -```json [ 1, -2, 3, -4, 5, -6, 7, -8, 9 ] ``` diff --git a/ab.json b/ab.json new file mode 100644 index 0000000..d54f6df --- /dev/null +++ b/ab.json @@ -0,0 +1,75 @@ +{ + "Directory": [ + { + "address": { + "city": "New York", + "postal code": 10012, + "state": "NY", + "street address": "599 Lafayette St" + }, + "age": 25, + "children": [ + "Olivia" + ], + "name": "John", + "phone": [ + { + "number": "112-555-1234", + "type": "mobile" + }, + { + "number": "113-123-2368", + "type": "mobile" + } + ], + "spouse": "Martha" + }, + { + "address": { + "city": "Seattle", + "postal code": 98104, + "state": "WA", + "street address": "5423 Madison St" + }, + "age": 31, + "children": [], + "name": "Ivan", + "phone": [ + { + "number": "273-923-6483", + "type": "home" + }, + { + "number": "223-283-0372", + "type": "mobile" + } + ], + "spouse": null + }, + { + "address": { + "city": "Denver", + "postal code": 80206, + "state": "CO", + "street address": "6213 E Colfax Ave" + }, + "age": 25, + "children": [ + "Robert", + "Lila" + ], + "name": "Jane", + "phone": [ + { + "number": "358-303-0373", + "type": "office" + }, + { + "number": "333-638-0238", + "type": "home" + } + ], + "spouse": "Chuck" + } + ] +} diff --git a/ab_eol.json b/ab_eol.json new file mode 100644 index 0000000..db2b8ef --- /dev/null +++ b/ab_eol.json @@ -0,0 +1,75 @@ +{ + "Directory": [ + { + "address": { + "city": "New York", + "postal code": 10012, + "state": "NY", + "street address": "599 Lafayette St" + }, + "age": 25, + "children": [ + "Olivia" + ], + "name": "John", + "phone": [ + { + "number": "112-555-1234", + "type": "mobile" + }, + { + "number": "113-123-2368", + "type": "mobile" + } + ], + "spouse": "Martha" + }, + { + "address": { + "city": "Seattle", + "postal code": 98104, + "state": "WA", + "street address": "5423 Madison St" + }, + "age": 31, + "children": [], + "name": "Ivan", + "phone": [ + { + "number": "273-923-6483", + "type": "home" + }, + { + "number": "223-283-0372", + "type": "mobile" + } + ], + "spouse": null + }, + { + "address": { + "city": "Denver", + "postal code": 80206, + "state": "CO, + "street address": "6213 E Colfax Ave" + }, + "age": 25, + "children": [ + "Robert", + "Lila" + ], + "name": "Jane", + "phone": [ + { + "number": "358-303-0373", + "type": "office" + }, + { + "number": "333-638-0238", + "type": "home" + } + ], + "spouse": "Chuck" + } + ] +} diff --git a/addressbook-sample.json b/addressbook-sample.json new file mode 100644 index 0000000..0019d72 --- /dev/null +++ b/addressbook-sample.json @@ -0,0 +1,54 @@ +{ + "AddressBook": [ + [ + { + "Name": "Ivan", + "address": { + "city": "Seattle", + "postal code": 98104, + "state": "WA", + "street address": "5423 Madison St" + }, + "age": 31, + "children": [,], + "phoneNumbers": [ + { "number": "573 923-6483", "type": "home" }, + { "number": "523 283-0372", "type": "mobile" } + ], + "spouse": null + }, + { + "Name": "Jane", + "address": { + "city": "Denver", + "postal code": 80206, + "state": "CO", + "street address": "6213 E Colfax Ave" + }, + "age": 25, + "children": ["Robert", "Lila"], + "phoneNumbers": [ + { "number": "358 303-0373", "type": "office" }, + { "number": "333 638-0238", "type": "home" } + ], + "spouse": "Chuck" + }, + { + "Name": "John", + "address": { + "city": "New York", + "postal code": 10012, + "state": "NY", + "street address": "599 Lafayette St" + }, + "age": 25, + "children": ["Olivia"], + "phoneNumbers": [ + { "number": "212 555-1234", "type": "mobile" }, + { "number": "213 123-2368", "type": "mobile" } + ], + "spouse": "Martha" + } + ] + ] +} diff --git a/id.json b/id.json new file mode 100644 index 0000000..c30c5f3 --- /dev/null +++ b/id.json @@ -0,0 +1,5 @@ +[ + { "id": 3, "title": "Smoke on the Water" }, + { "id": 1, "title": "The Winner Takes It All" }, + { "id": 2, "title": "The Show Must Go On" } +] diff --git a/ill.json b/ill.json new file mode 100644 index 0000000..5247015 --- /dev/null +++ b/ill.json @@ -0,0 +1,4 @@ +{ + "label": "first entry", + "label": "second entry" +} diff --git a/inserting_updating.json b/inserting_updating.json new file mode 100644 index 0000000..59e9b8e --- /dev/null +++ b/inserting_updating.json @@ -0,0 +1,3 @@ +[ "first", "JSON" ] +{ "second": "JSON" } +"third JSON" diff --git a/lib/extensions.hpp b/lib/extensions.hpp index 83eacda..c9af22f 100644 --- a/lib/extensions.hpp +++ b/lib/extensions.hpp @@ -146,8 +146,12 @@ __funcname__, __filename__, __line__}; } +constexpr const char * drop_leading_dot_slash(const char * path) { + return path[0] == '.' && path[1] == '/' ? path + 2 : path; +} + // for in-place throw parameter -#define EXP(TROW_REASON) __exp__(TROW_REASON, __func__, __FILE__, __LINE__) +#define EXP(TROW_REASON) __exp__(TROW_REASON, __func__, drop_leading_dot_slash(__FILE__), __LINE__) diff --git a/main.json b/main.json new file mode 100644 index 0000000..f364b0c --- /dev/null +++ b/main.json @@ -0,0 +1,5 @@ +[ + { "name": "Abba", "rec": 1, "songs": [] }, + { "name": "Deep Purple", "rec": 3, "songs": [] }, + { "name": "Queen", "rec": 2, "songs": [] } +] diff --git a/run_tests.py b/run_tests.py new file mode 100644 index 0000000..97a1534 --- /dev/null +++ b/run_tests.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 +from dataclasses import dataclass +from typing import List +import subprocess +import os +import unittest + + +ENV = os.environ.copy() +ENV["PATH"] = f".:{ENV['PATH']}" + + +@dataclass +class CodeBlock(unittest.TestCase): + command: str + output: str + + maxDiff = None + + def check(self, verbosity: int = 0, valgrind: bool = False) -> None: + cmd = self.command if not valgrind else "valgrind -q " + self.command + if verbosity > 0: + print(f"command: {cmd}") + if verbosity > 1: + print(f"output: {self.output}") + + if self.command.find("=") != -1 and len(self.command.split("=")) == 2: + var, val = self.command.split("=") + ENV[var] = val.strip("'") + return + + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True, + env=ENV, + executable="/usr/bin/bash", + ) + stdout, stderr = proc.communicate() + self.assertMultiLineEqual(str(stdout, encoding="utf-8"), self.output) + + def __str__(self): + return f"CodeBlock: cmd='{self.command}'" + + +def extract_markdown_code_blocks(fname: str) -> List[List[str]]: + block_started: bool = False + block_lines: List[str] = [] + blocks: List[List[str]] = [] + + with open(fname, "r") as md_file: + for line in md_file: + block_border = line[0:3] == "```" + + if block_border: + if block_started: + block_started = False + blocks.append(block_lines) + block_lines = [] + else: + if ( + line[3:].find("bash") != -1 + and line[3:].find("SKIP") == -1 + ): + block_started = True + + if block_started and not block_border: + block_lines.append(line) + + return blocks + + +def extract_code_blocks(lines: List[str]) -> List[CodeBlock]: + output: List[str] = [] + command: str = "" + code_blocks = [] + + cmd_on_line: bool = False + cmd_pushed: bool = False + + for i, line in enumerate(lines): + cmd_on_line = line[0:6] == "bash $" + + if cmd_on_line: + # if command == "" and line[6:] != "": + # command = line[6:] + # command = command.strip() + + if command != "": + code_blocks.append(CodeBlock(command, "".join(output))) + cmd_pushed = True + + output = [] + command = line[6:] + command = command.strip() + cmd_pushed = False + + elif line[0] != "#": + output.append(line) + # special handling for lines that just set an environment variable + # e.g.: + # bash $ jsn="foo" + # bash $ <<<$jsn jtc + # here the first line has no output, but must be added nevertheless + # if output or (lines[i-1][0:6] == "bash $" and i > 0): + + if not cmd_pushed: + code_blocks.append(CodeBlock(command, "".join(output))) + + return (block for block in code_blocks if block.command != "") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="jtc test runner") + parser.add_argument("md_files", nargs="+", type=str) + parser.add_argument("-v", "--verbose", action="count", default=0) + parser.add_argument("--valgrind", action="store_true") + args = parser.parse_args() + + for md_file in args.md_files: + for blocks in extract_markdown_code_blocks(md_file): + # print(blocks) + for block in extract_code_blocks(blocks): + # print(block) + block.check(verbosity=args.verbose, valgrind=args.valgrind) diff --git a/stream.json b/stream.json new file mode 100644 index 0000000..3da37b0 --- /dev/null +++ b/stream.json @@ -0,0 +1,3 @@ +{ "1": "first JSON" } +{ "2": "second JSON" } +{ "3": "third JSON" }