2
2
// SPDX-License-Identifier: Apache-2.0
3
3
4
4
pub mod headers {
5
- use regex:: { Match , Regex , RegexBuilder } ;
5
+ use regex:: { Regex , RegexBuilder } ;
6
6
use std:: collections:: HashSet ;
7
7
use std:: fs:: { File , OpenOptions } ;
8
8
use std:: io:: { self , BufReader , BufWriter , Read , Seek , Write } ;
9
9
use std:: sync:: LazyLock ;
10
10
11
- static HEADER_TYPE_DECL_RE : LazyLock < Regex > = LazyLock :: new ( || {
12
- RegexBuilder :: new ( r"^(/\*\*([^*]|\*+[^*/])*\*+/\n)?(#define [a-zA-Z_0-9]+ [^\n]+|typedef (struct|enum) [a-zA-Z_0-9]+ +(\{.*?\} )?[a-zA-Z_0-9]+;)\n+" )
13
- . multi_line ( true )
14
- . dot_matches_new_line ( true )
15
- . build ( )
16
- . unwrap ( )
11
+ #[ derive( Debug , PartialEq , Eq , Hash ) ]
12
+ struct Span < ' a > {
13
+ start : usize ,
14
+ end : usize ,
15
+ str : & ' a str ,
16
+ }
17
+
18
+ static ITEM_DEFINITION_HEAD : LazyLock < Regex > = LazyLock :: new ( || {
19
+ RegexBuilder :: new (
20
+ r"^(?:/\*\*(?:[^*]|\*+[^*/])*\*+/\n)?(?:# *(define [a-zA-Z_0-9]+ [^\n]+)|(typedef))" ,
21
+ )
22
+ . multi_line ( true )
23
+ . dot_matches_new_line ( true )
24
+ . build ( )
25
+ . unwrap ( )
17
26
} ) ;
18
27
19
- fn collect_definitions ( header : & str ) -> Vec < regex:: Match < ' _ > > {
20
- HEADER_TYPE_DECL_RE . find_iter ( header) . collect ( )
28
+ /// Gather all top level typedef and #define definitions from a C header file
29
+ fn collect_definitions ( header : & str ) -> Vec < Span < ' _ > > {
30
+ let mut items = Vec :: new ( ) ;
31
+ let mut start = 0 ;
32
+
33
+ loop {
34
+ let Some ( head) = ITEM_DEFINITION_HEAD . captures_at ( header, start) else {
35
+ break ;
36
+ } ;
37
+ start = head. get ( 0 ) . unwrap ( ) . start ( ) ;
38
+ let end: usize ;
39
+ if let Some ( capture) = head. get ( 2 ) {
40
+ let mut depth: i32 = 0 ;
41
+ let mut typedef_end = None ;
42
+ for ( pos, c) in header. bytes ( ) . enumerate ( ) . skip ( capture. end ( ) ) {
43
+ match c {
44
+ b';' if depth == 0 => {
45
+ typedef_end = Some ( pos + 1 ) ;
46
+ break ;
47
+ }
48
+ b'{' => {
49
+ depth += 1 ;
50
+ }
51
+ b'}' => {
52
+ depth = depth
53
+ . checked_sub ( 1 )
54
+ . expect ( "Unmatched closing brace in typedef" ) ;
55
+ }
56
+ _ => { }
57
+ }
58
+ }
59
+ let typedef_end = typedef_end. expect ( "No closing semicolon found for typedef" ) ;
60
+ end = typedef_end
61
+ + header[ typedef_end..]
62
+ . bytes ( )
63
+ . take_while ( |c| matches ! ( c, b'\n' | b'\r' | b' ' ) )
64
+ . count ( ) ;
65
+ } else if let Some ( capture) = head. get ( 1 ) {
66
+ let define_end = capture. end ( ) ;
67
+ end = define_end
68
+ + header[ define_end..]
69
+ . bytes ( )
70
+ . take_while ( |c| matches ! ( c, b'\n' | b'\r' | b' ' ) )
71
+ . count ( ) ;
72
+ } else {
73
+ unreachable ! (
74
+ "the regex should only capture typedef and #define, got {:?}" ,
75
+ head
76
+ ) ;
77
+ }
78
+
79
+ items. push ( Span {
80
+ start,
81
+ end,
82
+ str : & header[ start..end] ,
83
+ } ) ;
84
+ start = end;
85
+ }
86
+ items
21
87
}
22
88
23
89
fn read ( f : & mut BufReader < & File > ) -> String {
@@ -35,12 +101,12 @@ pub mod headers {
35
101
Ok ( ( ) )
36
102
}
37
103
38
- fn content_without_defs < ' a > ( content : & ' a str , defs : & [ Match ] ) -> Vec < & ' a str > {
104
+ fn content_without_defs < ' a > ( content : & ' a str , defs : & [ Span ] ) -> Vec < & ' a str > {
39
105
let mut new_content_parts = Vec :: new ( ) ;
40
106
let mut pos = 0 ;
41
107
for d in defs {
42
- new_content_parts. push ( & content[ pos..d. start ( ) ] ) ;
43
- pos = d. end ( ) ;
108
+ new_content_parts. push ( & content[ pos..d. start ] ) ;
109
+ pos = d. end ;
44
110
}
45
111
new_content_parts. push ( & content[ pos..] ) ;
46
112
new_content_parts
@@ -61,7 +127,7 @@ pub mod headers {
61
127
62
128
child_defs
63
129
. into_iter ( )
64
- . map ( |m| m. as_str ( ) . to_owned ( ) )
130
+ . map ( |m| m. str . to_owned ( ) )
65
131
. collect :: < Vec < _ > > ( )
66
132
} ) {
67
133
if present. contains ( & child_def) {
@@ -79,90 +145,197 @@ pub mod headers {
79
145
80
146
let base_header_content = read ( & mut BufReader :: new ( & base_header) ) ;
81
147
let base_defs = collect_definitions ( & base_header_content) ;
82
- let base_defs_set: HashSet < _ > = base_defs. iter ( ) . map ( Match :: as_str ) . collect ( ) ;
148
+ let base_defs_set: HashSet < _ > = base_defs. iter ( ) . map ( |s| s . str ) . collect ( ) ;
83
149
84
- let mut base_new_parts = vec ! [ & base_header_content[ ..base_defs. last( ) . unwrap( ) . end( ) ] ] ;
150
+ let mut base_new_parts = vec ! [ & base_header_content[ ..base_defs. last( ) . unwrap( ) . end] ] ;
85
151
for child_def in & unique_child_defs {
86
152
if base_defs_set. contains ( child_def. as_str ( ) ) {
87
153
continue ;
88
154
}
89
155
base_new_parts. push ( child_def) ;
90
156
}
91
- base_new_parts. push ( & base_header_content[ base_defs. last ( ) . unwrap ( ) . end ( ) ..] ) ;
157
+ base_new_parts. push ( & base_header_content[ base_defs. last ( ) . unwrap ( ) . end ..] ) ;
92
158
write_parts ( & mut BufWriter :: new ( & base_header) , & base_new_parts) . unwrap ( ) ;
93
159
}
94
160
95
161
#[ cfg( test) ]
96
162
mod tests {
97
163
use super :: * ;
98
164
99
- #[ ignore]
165
+ #[ track_caller]
166
+ fn test_regex_match ( input : & str , expected : Vec < & str > ) {
167
+ let matches = collect_definitions ( input) ;
168
+ assert_eq ! (
169
+ matches. len( ) ,
170
+ expected. len( ) ,
171
+ "Expected:\n {:#?}\n Actual:\n {:#?}" ,
172
+ expected,
173
+ matches
174
+ ) ;
175
+ for ( i, m) in matches. iter ( ) . enumerate ( ) {
176
+ assert_eq ! ( m. str , expected[ i] ) ;
177
+ }
178
+ }
179
+
100
180
#[ test]
101
- fn collect_definitions_comments ( ) {
102
- let header = r"/**
103
- * `QueueId` is a struct that represents a unique identifier for a queue.
104
- * It contains a single field, `inner`, which is a 64-bit unsigned integer.
105
- */
106
- typedef uint64_t ddog_QueueId;
107
-
108
- /**
109
- * Holds the raw parts of a Rust Vec; it should only be created from Rust,
110
- * never from C.
111
- **/
112
- typedef struct ddog_Vec_U8 {
113
- const uint8_t *ptr;
114
- uintptr_t len;
115
- uintptr_t capacity;
116
- } ddog_Vec_U8;
181
+ fn collect_typedef ( ) {
182
+ let input = "typedef void *Foo;\n " ;
183
+ let expected = vec ! [ "typedef void *Foo;\n " ] ;
184
+ test_regex_match ( input, expected) ;
185
+ }
186
+
187
+ #[ test]
188
+ fn collect_typedef_comment ( ) {
189
+ let input = r"
190
+ /**
191
+ * This is a typedef for a pointer to Foo.
192
+ */
193
+ typedef void *Foo;
194
+ " ;
195
+ let expected = vec ! [
196
+ r"/**
197
+ * This is a typedef for a pointer to Foo.
198
+ */
199
+ typedef void *Foo;
200
+ " ,
201
+ ] ;
202
+ test_regex_match ( input, expected) ;
203
+ }
204
+
205
+ #[ test]
206
+ fn collect_struct_typedef ( ) {
207
+ let input = r"/**
208
+ * This is a typedef for a pointer to a struct.
209
+ */
210
+ typedef struct ddog_Vec_U8 {
211
+ const uint8_t *ptr;
212
+ uintptr_t len;
213
+ uintptr_t capacity;
214
+ } ddog_Vec_U8;
215
+ " ;
216
+ let expected = vec ! [ input] ;
217
+ test_regex_match ( input, expected) ;
218
+ }
219
+
220
+ #[ test]
221
+ fn collect_union_typedef ( ) {
222
+ let input = r"/**
223
+ * This is a typedef for a pointer to a union.
224
+ */
225
+ typedef union my_union {
226
+ int a;
227
+ float b;
228
+ } my_union;
229
+ " ;
230
+ let expected = vec ! [ input] ;
231
+ test_regex_match ( input, expected) ;
232
+ }
233
+
234
+ #[ test]
235
+ fn collect_union_nested ( ) {
236
+ let input = r"typedef union ddog_Union_U8 {
237
+ struct inner1 {
238
+ const uint8_t *ptr;
239
+ uintptr_t len;
240
+ uintptr_t capacity;
241
+ } inner;
242
+ struct inner2 {
243
+ const uint8_t *ptr;
244
+ uintptr_t len;
245
+ uintptr_t capacity;
246
+ } inner2;
247
+ } ddog_Union_U8;
248
+ " ;
249
+ let expected = vec ! [ input] ;
250
+ test_regex_match ( input, expected) ;
251
+ }
252
+
253
+ #[ test]
254
+ fn collect_define ( ) {
255
+ let input = r#"#define FOO __attribute__((unused))
256
+ "# ;
257
+ let expected = vec ! [ input] ;
258
+ test_regex_match ( input, expected) ;
259
+ }
260
+
261
+ #[ test]
262
+ fn collect_multiple_definitions ( ) {
263
+ let input = r"
264
+ /**
265
+ * `QueueId` is a struct that represents a unique identifier for a queue.
266
+ * It contains a single field, `inner`, which is a 64-bit unsigned integer.
267
+ */
268
+ typedef uint64_t ddog_QueueId;
269
+
270
+ void foo() {
271
+ }
272
+
273
+ /**
274
+ * Holds the raw parts of a Rust Vec; it should only be created from Rust,
275
+ * never from C.
276
+ **/
277
+ typedef struct ddog_Vec_U8 {
278
+ const uint8_t *ptr;
279
+ uintptr_t len;
280
+ uintptr_t capacity;
281
+ } ddog_Vec_U8;
117
282
" ;
118
- let matches = collect_definitions ( header) ;
119
283
120
- assert_eq ! ( matches. len( ) , 1 ) ;
121
- assert_eq ! (
122
- matches[ 0 ] . as_str( ) ,
284
+ let expected = vec ! [
123
285
r"/**
124
- * Holds the raw parts of a Rust Vec; it should only be created from Rust,
125
- * never from C.
126
- **/
127
- typedef struct ddog_Vec_U8 {
128
- const uint8_t *ptr;
129
- uintptr_t len;
130
- uintptr_t capacity;
131
- } ddog_Vec_U8;
132
- "
133
- ) ;
286
+ * `QueueId` is a struct that represents a unique identifier for a queue.
287
+ * It contains a single field, `inner`, which is a 64-bit unsigned integer.
288
+ */
289
+ typedef uint64_t ddog_QueueId;
290
+
291
+ " ,
292
+ r"/**
293
+ * Holds the raw parts of a Rust Vec; it should only be created from Rust,
294
+ * never from C.
295
+ **/
296
+ typedef struct ddog_Vec_U8 {
297
+ const uint8_t *ptr;
298
+ uintptr_t len;
299
+ uintptr_t capacity;
300
+ } ddog_Vec_U8;
301
+ " ,
302
+ ] ;
303
+ test_regex_match ( input, expected) ;
304
+ }
134
305
306
+ #[ test]
307
+ fn collect_definitions_comments ( ) {
135
308
let header = r"/** foo */
136
- typedef struct ddog_Vec_U8 {
137
- const uint8_t *ptr;
138
- } ddog_Vec_U8;
139
- ";
309
+ typedef struct ddog_Vec_U8 {
310
+ const uint8_t *ptr;
311
+ } ddog_Vec_U8;
312
+ " ;
140
313
let matches = collect_definitions ( header) ;
141
314
142
315
assert_eq ! ( matches. len( ) , 1 ) ;
143
316
assert_eq ! (
144
- matches[ 0 ] . as_str ( ) ,
317
+ matches[ 0 ] . str ,
145
318
r"/** foo */
146
- typedef struct ddog_Vec_U8 {
147
- const uint8_t *ptr;
148
- } ddog_Vec_U8;
149
- "
319
+ typedef struct ddog_Vec_U8 {
320
+ const uint8_t *ptr;
321
+ } ddog_Vec_U8;
322
+ "
150
323
) ;
151
324
152
325
let header = r"/** foo **/ */
153
- typedef struct ddog_Vec_U8 {
154
- const uint8_t *ptr;
155
- } ddog_Vec_U8;
156
- ";
326
+ typedef struct ddog_Vec_U8 {
327
+ const uint8_t *ptr;
328
+ } ddog_Vec_U8;
329
+ " ;
157
330
let matches = collect_definitions ( header) ;
158
331
159
332
assert_eq ! ( matches. len( ) , 1 ) ;
160
333
assert_eq ! (
161
- matches[ 0 ] . as_str ( ) ,
334
+ matches[ 0 ] . str ,
162
335
r"typedef struct ddog_Vec_U8 {
163
- const uint8_t *ptr;
164
- } ddog_Vec_U8;
165
- "
336
+ const uint8_t *ptr;
337
+ } ddog_Vec_U8;
338
+ "
166
339
) ;
167
340
}
168
341
}
0 commit comments