Skip to content

Commit 1dc288f

Browse files
committed
Introduce C parser library
Add a shared library with a C API that provides access to the syntactic parser with callbacks for the inference of raw syntax nodes. This is primarily intended to be used by SwiftSyntax to speed-up source code parsing for it.
1 parent aba7fa1 commit 1dc288f

File tree

18 files changed

+866
-0
lines changed

18 files changed

+866
-0
lines changed
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
//===--- SwiftSyntaxParser.h - C API for Swift Syntax Parsing -----*- C -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// This C API is primarily intended to serve as the Swift parsing component
14+
// of SwiftSyntax (https://github.com/apple/swift-syntax).
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#ifndef SWIFT_C_SYNTAX_PARSER_H
19+
#define SWIFT_C_SYNTAX_PARSER_H
20+
21+
#include <stdbool.h>
22+
#include <stddef.h>
23+
#include <stdint.h>
24+
25+
/// The version constants for the SwiftSyntaxParser C API.
26+
/// SWIFTPARSE_VERSION_MINOR should increase when there are API additions.
27+
/// SWIFTPARSE_VERSION_MAJOR is intended for "major" source/ABI breaking changes.
28+
#define SWIFTPARSE_VERSION_MAJOR 0
29+
#define SWIFTPARSE_VERSION_MINOR 1
30+
31+
#define SWIFTPARSE_VERSION_ENCODE(major, minor) ( \
32+
((major) * 10000) \
33+
+ ((minor) * 1))
34+
35+
#define SWIFTPARSE_VERSION SWIFTPARSE_VERSION_ENCODE( \
36+
SWIFTPARSE_VERSION_MAJOR, \
37+
SWIFTPARSE_VERSION_MINOR )
38+
39+
#define SWIFTPARSE_VERSION_STRINGIZE_(major, minor) \
40+
#major"."#minor
41+
#define SWIFTPARSE_VERSION_STRINGIZE(major, minor) \
42+
SWIFTPARSE_VERSION_STRINGIZE_(major, minor)
43+
44+
#define SWIFTPARSE_VERSION_STRING SWIFTPARSE_VERSION_STRINGIZE( \
45+
SWIFTPARSE_VERSION_MAJOR, \
46+
SWIFTPARSE_VERSION_MINOR)
47+
48+
#ifdef __cplusplus
49+
# define SWIFTPARSE_BEGIN_DECLS extern "C" {
50+
# define SWIFTPARSE_END_DECLS }
51+
#else
52+
# define SWIFTPARSE_BEGIN_DECLS
53+
# define SWIFTPARSE_END_DECLS
54+
#endif
55+
56+
#ifndef SWIFTPARSE_PUBLIC
57+
# ifdef _WIN32
58+
# ifdef SwiftSyntaxParser_EXPORTS
59+
# define SWIFTPARSE_PUBLIC __declspec(dllexport)
60+
# else
61+
# define SWIFTPARSE_PUBLIC __declspec(dllimport)
62+
# endif
63+
# else
64+
# define SWIFTPARSE_PUBLIC
65+
# endif
66+
#endif
67+
68+
#ifndef __has_feature
69+
# define __has_feature(x) 0
70+
#endif
71+
72+
#if !__has_feature(blocks)
73+
# error -fblocks is a requirement to use this library
74+
#endif
75+
76+
SWIFTPARSE_BEGIN_DECLS
77+
78+
//=== Syntax Data Types ---------------------------------------------------===//
79+
80+
/// Offset+length in UTF8 bytes.
81+
typedef struct {
82+
uint32_t offset;
83+
uint32_t length;
84+
} swiftparse_range_t;
85+
86+
typedef uint8_t swiftparse_trivia_kind_t;
87+
typedef uint8_t swiftparse_token_kind_t;
88+
typedef uint16_t swiftparse_syntax_kind_t;
89+
90+
/// This is for the client to provide an opaque pointer that the parser will
91+
/// associate with a syntax node.
92+
typedef void *swiftparse_client_node_t;
93+
94+
typedef struct {
95+
/// The length in source this trivia piece occupies, in UTF8 bytes.
96+
uint32_t length;
97+
swiftparse_trivia_kind_t kind;
98+
} swiftparse_trivia_piece_t;
99+
100+
typedef struct {
101+
const swiftparse_trivia_piece_t *leading_trivia;
102+
const swiftparse_trivia_piece_t *trailing_trivia;
103+
uint16_t leading_trivia_count;
104+
uint16_t trailing_trivia_count;
105+
swiftparse_token_kind_t kind;
106+
} swiftparse_token_data_t;
107+
108+
typedef struct {
109+
const swiftparse_client_node_t *nodes;
110+
uint32_t nodes_count;
111+
} swiftparse_layout_data_t;
112+
113+
typedef struct {
114+
union {
115+
swiftparse_token_data_t token_data;
116+
swiftparse_layout_data_t layout_data;
117+
};
118+
/// Represents the range for the node. For a token node the range includes
119+
/// the trivia associated with it.
120+
swiftparse_range_t range;
121+
/// The syntax kind. A value of '0' means this is a token node.
122+
swiftparse_syntax_kind_t kind;
123+
bool present;
124+
} swiftparse_syntax_node_t;
125+
126+
//=== Parser Functions ----------------------------------------------------===//
127+
128+
typedef void *swiftparse_parser_t;
129+
typedef void *swiftparse_alloc_t;
130+
131+
SWIFTPARSE_PUBLIC swiftparse_parser_t
132+
swiftparse_parser_create(void);
133+
134+
SWIFTPARSE_PUBLIC void
135+
swiftparse_parser_dispose(swiftparse_parser_t);
136+
137+
/// Invoked by the parser when a syntax node is parsed. The client should
138+
/// return a pointer to associate with that particular node.
139+
typedef swiftparse_client_node_t
140+
(^swiftparse_node_handler_t)(const swiftparse_syntax_node_t *);
141+
142+
/// Set the \c swiftparse_node_handler_t block to be used by the parser.
143+
///
144+
/// It is required to set a \c swiftparse_node_handler_t block before any calls
145+
/// to \c swiftparse_parse_string. \c swiftparse_parser_set_node_handler can be
146+
/// called multiple times to change the block before subsequent parses.
147+
SWIFTPARSE_PUBLIC void
148+
swiftparse_parser_set_node_handler(swiftparse_parser_t,
149+
swiftparse_node_handler_t);
150+
151+
typedef struct {
152+
/// Length of the source region in UTF8 bytes that the parser should skip.
153+
/// If it is set to 0 it indicates that the parser should continue parsing
154+
/// and the \c node object is ignored.
155+
size_t length;
156+
/// Node to associate for the skipped source region. It will be ignored if
157+
/// \c length is 0.
158+
swiftparse_client_node_t node;
159+
} swiftparse_lookup_result_t;
160+
161+
/// Invoked by the parser at certain points to query whether a source region
162+
/// should be skipped. See \c swiftparse_lookup_result_t.
163+
typedef swiftparse_lookup_result_t
164+
(^swiftparse_node_lookup_t)(size_t offset, swiftparse_syntax_kind_t);
165+
166+
/// Set the \c swiftparse_node_lookup_t block to be used by the parser.
167+
///
168+
/// It is not required to set a \c swiftparse_node_lookup_t block before calling
169+
/// \c swiftparse_parse_string. Not setting a \c swiftparse_node_lookup_t block
170+
/// has same semantics as never skipping any source regions.
171+
/// \c swiftparse_node_lookup_t can be called multiple times to change the block
172+
/// before subsequent parses.
173+
SWIFTPARSE_PUBLIC void
174+
swiftparse_parser_set_node_lookup(swiftparse_parser_t,
175+
swiftparse_node_lookup_t);
176+
177+
/// Parse the provided \c source and invoke the callback that was set via
178+
/// \c swiftparse_parser_set_node_handler as each syntax node is parsed.
179+
///
180+
/// Syntax nodes are provided in a top-bottom, depth-first order. For example,
181+
/// token nodes will be provided ahead of the syntax node whose layout they are
182+
/// a part of. The memory that \c swiftparse_syntax_node_t points to is only
183+
/// valid to access for the duration of the \c swiftparse_node_handler_t block
184+
/// execution, the client should copy the data if it wants to persist it beyond
185+
/// the duration of the block.
186+
///
187+
/// The client provides \c swiftparse_client_node_t pointers to associate with
188+
/// each syntax node and the parser will pass back these pointers as part of the
189+
/// \c swiftparse_layout_data_t of the syntax node that they are a part of.
190+
/// \c swiftparse_client_node_t pointers are completely opaque to the parser,
191+
/// it doesn't try to interpret them in any way. There is no requirement that
192+
/// each node gets a unique \c swiftparse_client_node_t, it is up to the client
193+
/// to reuse \c swiftparse_client_node_t pointers as it deems necessary.
194+
///
195+
/// If the \c swiftparse_client_node_t pointers represent managed memory, the
196+
/// semantics of interacting with the parser should be considered as follows:
197+
///
198+
/// * \c swiftparse_node_handler_t and \c swiftparse_node_lookup_t return a
199+
/// \c swiftparse_client_node_t and transfer ownership of the pointer to the
200+
/// parser.
201+
///
202+
/// * The array of \c swiftparse_client_node_t pointers in
203+
/// \c swiftparse_layout_data_t should be considered as the parser transferring
204+
/// ownership of those pointers back to the client.
205+
///
206+
/// * The \c swiftparse_client_node_t returned by \c swiftparse_parse_string
207+
/// should be considered as the parser transferring back ownership of the
208+
/// pointer.
209+
///
210+
/// The parser guarantees that any \c swiftparse_client_node_t, given to the
211+
/// parser by \c swiftparse_node_handler_t or \c swiftparse_node_lookup_t, will
212+
/// be returned back to the client, either via \c swiftparse_layout_data_t or
213+
/// via the return value of \c swiftparse_parse_string.
214+
///
215+
/// \param source a null-terminated UTF8 string buffer.
216+
SWIFTPARSE_PUBLIC swiftparse_client_node_t
217+
swiftparse_parse_string(swiftparse_parser_t, const char *source);
218+
219+
SWIFTPARSE_END_DECLS
220+
221+
#endif
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
module _InternalSwiftSyntaxParser {
2+
header "SwiftSyntaxParser.h"
3+
link "_InternalSwiftSyntaxParser"
4+
}

test/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ function(get_test_dependencies SDK result_var_name)
5050
llvm-bcanalyzer llvm-nm llvm-readobj llvm-profdata count not
5151
llvm-strings)
5252
endif()
53+
if(SWIFT_BUILD_SYNTAXPARSERLIB)
54+
list(APPEND deps_binaries swift-syntax-parser-test)
55+
endif()
5356
if(SWIFT_BUILD_SOURCEKIT)
5457
list(APPEND deps_binaries sourcekitd-test complete-test)
5558
endif()
@@ -131,6 +134,7 @@ normalize_boolean_spelling(LLVM_ENABLE_ASSERTIONS)
131134
normalize_boolean_spelling(SWIFT_STDLIB_ASSERTIONS)
132135
normalize_boolean_spelling(SWIFT_AST_VERIFIER)
133136
normalize_boolean_spelling(SWIFT_ASAN_BUILD)
137+
normalize_boolean_spelling(SWIFT_BUILD_SYNTAXPARSERLIB)
134138
normalize_boolean_spelling(SWIFT_ENABLE_SOURCEKIT_TESTS)
135139
is_build_type_optimized("${SWIFT_STDLIB_BUILD_TYPE}" SWIFT_OPTIMIZED)
136140

test/Syntax/Parser/tree.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: syntax_parser_lib
2+
// RUN: %swift-syntax-parser-test %s -dump-tree > %t.result
3+
// RUN: diff -u %s.result %t.result
4+
5+
func test() {
6+
"a\(b)c"
7+
}

test/Syntax/Parser/tree.swift.result

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<s118><s163><s92><s13><NULL/><NULL/><t6>// REQUIRES: syntax_parser_lib
2+
// RUN: %swift-syntax-parser-test %s -dump-tree > %t.result
3+
// RUN: diff -u %s.result %t.result
4+
5+
|func| </t6><t105>|test|</t105><NULL/><s110><s108><t88>|(|</t88><s174></s174><t89>|)| </t89></s108><NULL/><NULL/></s110><NULL/><s93><t90>|{|</t90><s163><s92><s64><t102>
6+
|"|</t102><s168><s104><t104>|a|</t104></s104><s105><t100>|\|</t100><t88>|(|</t88><s28><t105>|b|</t105><NULL/></s28><t101>|)|</t101></s105><s104><t104>|c|</t104></s104></s168><t102>|"|</t102></s64><NULL/><NULL/></s92></s163><t91>
7+
|}|</t91></s93></s13><NULL/><NULL/></s92></s163><t0>
8+
||</t0></s118>

test/lit.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ config.sil_passpipeline_dumper = inferSwiftBinary('sil-passpipeline-dumper')
267267
config.lldb_moduleimport_test = inferSwiftBinary('lldb-moduleimport-test')
268268
config.swift_ide_test = inferSwiftBinary('swift-ide-test')
269269
config.swift_syntax_test = inferSwiftBinary('swift-syntax-test')
270+
if 'syntax_parser_lib' in config.available_features:
271+
config.swift_syntax_parser_test = inferSwiftBinary('swift-syntax-parser-test')
270272
config.swift_reflection_dump = inferSwiftBinary('swift-reflection-dump')
271273
config.swift_remoteast_test = inferSwiftBinary('swift-remoteast-test')
272274
config.swift_format = inferSwiftBinary('swift-format')
@@ -382,6 +384,8 @@ config.substitutions.append( ('%lldb-moduleimport-test', "%r %s" % (config.lldb_
382384
config.substitutions.append( ('%swift-ide-test_plain', config.swift_ide_test) )
383385
config.substitutions.append( ('%swift-ide-test', "%r %s %s -swift-version %s" % (config.swift_ide_test, mcp_opt, ccp_opt, swift_version)) )
384386
config.substitutions.append( ('%swift-syntax-test', config.swift_syntax_test) )
387+
if 'syntax_parser_lib' in config.available_features:
388+
config.substitutions.append( ('%swift-syntax-parser-test', config.swift_syntax_parser_test) )
385389
config.substitutions.append( ('%swift-format', config.swift_format) )
386390
config.substitutions.append( ('%llvm-link', config.llvm_link) )
387391
config.substitutions.append( ('%swift-llvm-opt', config.swift_llvm_opt) )

test/lit.site.cfg.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ if "@CMAKE_GENERATOR@" == "Xcode":
8585

8686
config.available_features.add("CMAKE_GENERATOR=@CMAKE_GENERATOR@")
8787

88+
if "@SWIFT_BUILD_SYNTAXPARSERLIB@" == "TRUE":
89+
config.available_features.add('syntax_parser_lib')
90+
8891
if "@SWIFT_ENABLE_SOURCEKIT_TESTS@" == "TRUE":
8992
config.available_features.add('sourcekit')
9093

tools/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ add_swift_tool_subdirectory(swift-llvm-opt)
3030
add_swift_tool_subdirectory(swift-api-digester)
3131
add_swift_tool_subdirectory(swift-syntax-test)
3232
add_swift_tool_subdirectory(swift-refactor)
33+
if(SWIFT_BUILD_SYNTAXPARSERLIB)
34+
add_swift_tool_subdirectory(libSwiftSyntaxParser)
35+
add_swift_tool_subdirectory(swift-syntax-parser-test)
36+
endif()
3337

3438
if(LLVM_USE_SANITIZE_COVERAGE)
3539
add_swift_tool_subdirectory(swift-demangle-fuzzer)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libSwiftSyntaxParser.exports)
2+
3+
add_swift_host_library(libSwiftSyntaxParser SHARED
4+
c-include-check.c
5+
libSwiftSyntaxParser.cpp
6+
7+
LINK_LIBRARIES
8+
swiftParse
9+
)
10+
set_target_properties(libSwiftSyntaxParser
11+
PROPERTIES
12+
OUTPUT_NAME _InternalSwiftSyntaxParser)
13+
14+
add_llvm_symbol_exports(libSwiftSyntaxParser ${LLVM_EXPORTED_SYMBOL_FILE})
15+
16+
# Adds -dead_strip option
17+
add_link_opts(libSwiftSyntaxParser)
18+
19+
set_property(TARGET libSwiftSyntaxParser APPEND_STRING PROPERTY
20+
COMPILE_FLAGS " -fblocks")
21+
if(SWIFT_NEED_EXPLICIT_LIBDISPATCH)
22+
target_link_libraries(libSwiftSyntaxParser PRIVATE BlocksRuntime)
23+
endif()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file serves as a sanity check to ensure that the header is parseable
2+
// from C and that no C++ code has sneaked in.
3+
4+
#include "swift-c/SyntaxParser/SwiftSyntaxParser.h"
5+
typedef swiftparse_syntax_node_t _check_it_exists;

0 commit comments

Comments
 (0)