Skip to content

Commit ad8d9c5

Browse files
authored
Merge pull request github#12269 from github/tausbn/ql-add-json-extraction
QL: Add JSON (+C/L) extraction
2 parents aac65b0 + b5ebd1a commit ad8d9c5

File tree

8 files changed

+286
-0
lines changed

8 files changed

+286
-0
lines changed

ql/Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ql/autobuilder/src/main.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ fn main() -> std::io::Result<()> {
1818
.arg("--include-extension=.ql")
1919
.arg("--include-extension=.qll")
2020
.arg("--include-extension=.dbscheme")
21+
.arg("--include-extension=.json")
22+
.arg("--include-extension=.jsonc")
23+
.arg("--include-extension=.jsonl")
2124
.arg("--include=**/qlpack.yml")
2225
.arg("--include=deprecated.blame")
2326
.arg("--size-limit=5m")

ql/extractor/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", re
1414
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
1515
tree-sitter-ql-yaml = {git = "https://github.com/erik-krogh/tree-sitter-ql.git", rev = "cf704bf3671e1ae148e173464fb65a4d2bbf5f99"}
1616
tree-sitter-blame = {path = "../buramu/tree-sitter-blame"}
17+
tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json.git", rev = "471ceac44d127e609afa349cf0a59370791fe8b3"}
1718
clap = "2.33"
1819
tracing = "0.1"
1920
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }

ql/extractor/src/main.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,13 @@ fn main() -> std::io::Result<()> {
8888
let dbscheme = tree_sitter_ql_dbscheme::language();
8989
let yaml = tree_sitter_ql_yaml::language();
9090
let blame = tree_sitter_blame::language();
91+
let json = tree_sitter_json::language();
9192
let schema = node_types::read_node_types_str("ql", tree_sitter_ql::NODE_TYPES)?;
9293
let dbscheme_schema =
9394
node_types::read_node_types_str("dbscheme", tree_sitter_ql_dbscheme::NODE_TYPES)?;
9495
let yaml_schema = node_types::read_node_types_str("yaml", tree_sitter_ql_yaml::NODE_TYPES)?;
9596
let blame_schema = node_types::read_node_types_str("blame", tree_sitter_blame::NODE_TYPES)?;
97+
let json_schema = node_types::read_node_types_str("json", tree_sitter_json::NODE_TYPES)?;
9698

9799
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
98100
let lines = lines?;
@@ -134,6 +136,19 @@ fn main() -> std::io::Result<()> {
134136
&source,
135137
&code_ranges,
136138
)?
139+
} else if line.ends_with(".json")
140+
|| line.ends_with(".jsonl")
141+
|| line.ends_with(".jsonc")
142+
{
143+
extractor::extract(
144+
json,
145+
"json",
146+
&json_schema,
147+
&mut trap_writer,
148+
&path,
149+
&source,
150+
&code_ranges,
151+
)?
137152
} else if line.ends_with(".blame") {
138153
extractor::extract(
139154
blame,

ql/generator/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", re
1515
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
1616
tree-sitter-ql-yaml = {git = "https://github.com/erik-krogh/tree-sitter-ql.git", rev = "cf704bf3671e1ae148e173464fb65a4d2bbf5f99"}
1717
tree-sitter-blame = {path = "../buramu/tree-sitter-blame"}
18+
tree-sitter-json = { git = "https://github.com/tausbn/tree-sitter-json.git", rev = "471ceac44d127e609afa349cf0a59370791fe8b3"}

ql/generator/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,10 @@ fn main() -> std::io::Result<()> {
581581
name: "Blame".to_owned(),
582582
node_types: tree_sitter_blame::NODE_TYPES,
583583
},
584+
Language {
585+
name: "JSON".to_owned(),
586+
node_types: tree_sitter_json::NODE_TYPES,
587+
},
584588
];
585589
let mut dbscheme_writer = LineWriter::new(File::create(dbscheme_path)?);
586590
write!(

ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,3 +1856,162 @@ module Blame {
18561856
final override string getAPrimaryQlClass() { result = "Number" }
18571857
}
18581858
}
1859+
1860+
module JSON {
1861+
/** The base class for all AST nodes */
1862+
class AstNode extends @json_ast_node {
1863+
/** Gets a string representation of this element. */
1864+
string toString() { result = this.getAPrimaryQlClass() }
1865+
1866+
/** Gets the location of this element. */
1867+
final L::Location getLocation() { json_ast_node_info(this, _, _, result) }
1868+
1869+
/** Gets the parent of this element. */
1870+
final AstNode getParent() { json_ast_node_info(this, result, _, _) }
1871+
1872+
/** Gets the index of this node among the children of its parent. */
1873+
final int getParentIndex() { json_ast_node_info(this, _, result, _) }
1874+
1875+
/** Gets a field or child node of this node. */
1876+
AstNode getAFieldOrChild() { none() }
1877+
1878+
/** Gets the name of the primary QL class for this element. */
1879+
string getAPrimaryQlClass() { result = "???" }
1880+
1881+
/** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */
1882+
string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
1883+
}
1884+
1885+
/** A token. */
1886+
class Token extends @json_token, AstNode {
1887+
/** Gets the value of this token. */
1888+
final string getValue() { json_tokeninfo(this, _, result) }
1889+
1890+
/** Gets a string representation of this element. */
1891+
final override string toString() { result = this.getValue() }
1892+
1893+
/** Gets the name of the primary QL class for this element. */
1894+
override string getAPrimaryQlClass() { result = "Token" }
1895+
}
1896+
1897+
/** A reserved word. */
1898+
class ReservedWord extends @json_reserved_word, Token {
1899+
/** Gets the name of the primary QL class for this element. */
1900+
final override string getAPrimaryQlClass() { result = "ReservedWord" }
1901+
}
1902+
1903+
/** A class representing `array` nodes. */
1904+
class Array extends @json_array, AstNode {
1905+
/** Gets the name of the primary QL class for this element. */
1906+
final override string getAPrimaryQlClass() { result = "Array" }
1907+
1908+
/** Gets the `i`th child of this node. */
1909+
final Value getChild(int i) { json_array_child(this, i, result) }
1910+
1911+
/** Gets a field or child node of this node. */
1912+
final override AstNode getAFieldOrChild() { json_array_child(this, _, result) }
1913+
}
1914+
1915+
/** A class representing `comment` tokens. */
1916+
class Comment extends @json_token_comment, Token {
1917+
/** Gets the name of the primary QL class for this element. */
1918+
final override string getAPrimaryQlClass() { result = "Comment" }
1919+
}
1920+
1921+
/** A class representing `document` nodes. */
1922+
class Document extends @json_document, AstNode {
1923+
/** Gets the name of the primary QL class for this element. */
1924+
final override string getAPrimaryQlClass() { result = "Document" }
1925+
1926+
/** Gets the `i`th child of this node. */
1927+
final Value getChild(int i) { json_document_child(this, i, result) }
1928+
1929+
/** Gets a field or child node of this node. */
1930+
final override AstNode getAFieldOrChild() { json_document_child(this, _, result) }
1931+
}
1932+
1933+
/** A class representing `escape_sequence` tokens. */
1934+
class EscapeSequence extends @json_token_escape_sequence, Token {
1935+
/** Gets the name of the primary QL class for this element. */
1936+
final override string getAPrimaryQlClass() { result = "EscapeSequence" }
1937+
}
1938+
1939+
/** A class representing `false` tokens. */
1940+
class False extends @json_token_false, Token {
1941+
/** Gets the name of the primary QL class for this element. */
1942+
final override string getAPrimaryQlClass() { result = "False" }
1943+
}
1944+
1945+
/** A class representing `null` tokens. */
1946+
class Null extends @json_token_null, Token {
1947+
/** Gets the name of the primary QL class for this element. */
1948+
final override string getAPrimaryQlClass() { result = "Null" }
1949+
}
1950+
1951+
/** A class representing `number` tokens. */
1952+
class Number extends @json_token_number, Token {
1953+
/** Gets the name of the primary QL class for this element. */
1954+
final override string getAPrimaryQlClass() { result = "Number" }
1955+
}
1956+
1957+
/** A class representing `object` nodes. */
1958+
class Object extends @json_object, AstNode {
1959+
/** Gets the name of the primary QL class for this element. */
1960+
final override string getAPrimaryQlClass() { result = "Object" }
1961+
1962+
/** Gets the `i`th child of this node. */
1963+
final Pair getChild(int i) { json_object_child(this, i, result) }
1964+
1965+
/** Gets a field or child node of this node. */
1966+
final override AstNode getAFieldOrChild() { json_object_child(this, _, result) }
1967+
}
1968+
1969+
/** A class representing `pair` nodes. */
1970+
class Pair extends @json_pair, AstNode {
1971+
/** Gets the name of the primary QL class for this element. */
1972+
final override string getAPrimaryQlClass() { result = "Pair" }
1973+
1974+
/** Gets the node corresponding to the field `key`. */
1975+
final AstNode getKey() { json_pair_def(this, result, _) }
1976+
1977+
/** Gets the node corresponding to the field `value`. */
1978+
final Value getValue() { json_pair_def(this, _, result) }
1979+
1980+
/** Gets a field or child node of this node. */
1981+
final override AstNode getAFieldOrChild() {
1982+
json_pair_def(this, result, _) or json_pair_def(this, _, result)
1983+
}
1984+
}
1985+
1986+
/** A class representing `string` nodes. */
1987+
class String extends @json_string__, AstNode {
1988+
/** Gets the name of the primary QL class for this element. */
1989+
final override string getAPrimaryQlClass() { result = "String" }
1990+
1991+
/** Gets the child of this node. */
1992+
final StringContent getChild() { json_string_child(this, result) }
1993+
1994+
/** Gets a field or child node of this node. */
1995+
final override AstNode getAFieldOrChild() { json_string_child(this, result) }
1996+
}
1997+
1998+
/** A class representing `string_content` nodes. */
1999+
class StringContent extends @json_string_content, AstNode {
2000+
/** Gets the name of the primary QL class for this element. */
2001+
final override string getAPrimaryQlClass() { result = "StringContent" }
2002+
2003+
/** Gets the `i`th child of this node. */
2004+
final EscapeSequence getChild(int i) { json_string_content_child(this, i, result) }
2005+
2006+
/** Gets a field or child node of this node. */
2007+
final override AstNode getAFieldOrChild() { json_string_content_child(this, _, result) }
2008+
}
2009+
2010+
/** A class representing `true` tokens. */
2011+
class True extends @json_token_true, Token {
2012+
/** Gets the name of the primary QL class for this element. */
2013+
final override string getAPrimaryQlClass() { result = "True" }
2014+
}
2015+
2016+
class Value extends @json_value, AstNode { }
2017+
}

ql/ql/src/ql.dbscheme

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,3 +1249,95 @@ blame_ast_node_info(
12491249
int loc: @location ref
12501250
);
12511251

1252+
#keyset[json_array, index]
1253+
json_array_child(
1254+
int json_array: @json_array ref,
1255+
int index: int ref,
1256+
unique int child: @json_value ref
1257+
);
1258+
1259+
json_array_def(
1260+
unique int id: @json_array
1261+
);
1262+
1263+
#keyset[json_document, index]
1264+
json_document_child(
1265+
int json_document: @json_document ref,
1266+
int index: int ref,
1267+
unique int child: @json_value ref
1268+
);
1269+
1270+
json_document_def(
1271+
unique int id: @json_document
1272+
);
1273+
1274+
#keyset[json_object, index]
1275+
json_object_child(
1276+
int json_object: @json_object ref,
1277+
int index: int ref,
1278+
unique int child: @json_pair ref
1279+
);
1280+
1281+
json_object_def(
1282+
unique int id: @json_object
1283+
);
1284+
1285+
@json_pair_key_type = @json_string__ | @json_token_number
1286+
1287+
json_pair_def(
1288+
unique int id: @json_pair,
1289+
int key__: @json_pair_key_type ref,
1290+
int value: @json_value ref
1291+
);
1292+
1293+
json_string_child(
1294+
unique int json_string__: @json_string__ ref,
1295+
unique int child: @json_string_content ref
1296+
);
1297+
1298+
json_string_def(
1299+
unique int id: @json_string__
1300+
);
1301+
1302+
#keyset[json_string_content, index]
1303+
json_string_content_child(
1304+
int json_string_content: @json_string_content ref,
1305+
int index: int ref,
1306+
unique int child: @json_token_escape_sequence ref
1307+
);
1308+
1309+
json_string_content_def(
1310+
unique int id: @json_string_content
1311+
);
1312+
1313+
@json_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true
1314+
1315+
json_tokeninfo(
1316+
unique int id: @json_token,
1317+
int kind: int ref,
1318+
string value: string ref
1319+
);
1320+
1321+
case @json_token.kind of
1322+
0 = @json_reserved_word
1323+
| 1 = @json_token_comment
1324+
| 2 = @json_token_escape_sequence
1325+
| 3 = @json_token_false
1326+
| 4 = @json_token_null
1327+
| 5 = @json_token_number
1328+
| 6 = @json_token_true
1329+
;
1330+
1331+
1332+
@json_ast_node = @json_array | @json_document | @json_object | @json_pair | @json_string__ | @json_string_content | @json_token
1333+
1334+
@json_ast_node_parent = @file | @json_ast_node
1335+
1336+
#keyset[parent, parent_index]
1337+
json_ast_node_info(
1338+
unique int node: @json_ast_node ref,
1339+
int parent: @json_ast_node_parent ref,
1340+
int parent_index: int ref,
1341+
int loc: @location ref
1342+
);
1343+

0 commit comments

Comments
 (0)