Skip to content

Commit 2f3d821

Browse files
committed
cgo: support preprocessor macros passed on the command line
Go code might sometimes want to use preprocessor macros that were passed on the command line. This wasn't working before and resulted in the following error: internal error: could not find file where macro is defined This is now supported, though location information isn't available (which makes sense: the command line is not a file). I had to use the `clang_tokenize` API for this and reconstruct the original source location. Apparently this is the only way to do it: https://stackoverflow.com/a/19074846/559350 In the future we could consider replacing our own tokenization with the tokenizer that's built into Clang directly. This should reduce the possibility of bugs a bit.
1 parent 6f462fb commit 2f3d821

File tree

5 files changed

+72
-37
lines changed

5 files changed

+72
-37
lines changed

cgo/cgo_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"go/ast"
88
"go/format"
99
"go/parser"
10+
"go/scanner"
1011
"go/token"
1112
"go/types"
1213
"os"
@@ -219,7 +220,13 @@ func (i simpleImporter) Import(path string) (*types.Package, error) {
219220
// formatDiagnostic formats the error message to be an indented comment. It
220221
// also fixes Windows path name issues (backward slashes).
221222
func formatDiagnostic(err error) string {
222-
msg := err.Error()
223+
var msg string
224+
switch err := err.(type) {
225+
case scanner.Error:
226+
msg = err.Pos.String() + ": " + err.Msg
227+
default:
228+
msg = err.Error()
229+
}
223230
if runtime.GOOS == "windows" {
224231
// Fix Windows path slashes.
225232
msg = strings.ReplaceAll(msg, "testdata\\", "testdata/")

cgo/const.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,9 @@ func (t *tokenizer) Next() {
195195
t.curValue = t.peekValue
196196

197197
// Parse the next peek token.
198-
t.peekPos += token.Pos(len(t.curValue))
198+
if t.peekPos != token.NoPos {
199+
t.peekPos += token.Pos(len(t.curValue))
200+
}
199201
for {
200202
if len(t.buf) == 0 {
201203
t.peekToken = token.EOF
@@ -207,7 +209,9 @@ func (t *tokenizer) Next() {
207209
// Skip whitespace.
208210
// Based on this source, not sure whether it represents C whitespace:
209211
// https://en.cppreference.com/w/cpp/string/byte/isspace
210-
t.peekPos++
212+
if t.peekPos != token.NoPos {
213+
t.peekPos++
214+
}
211215
t.buf = t.buf[1:]
212216
case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"):
213217
// Two-character tokens.

cgo/libclang.go

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package cgo
44
// modification. It does not touch the AST itself.
55

66
import (
7+
"bytes"
78
"crypto/sha256"
89
"crypto/sha512"
910
"encoding/hex"
@@ -369,42 +370,45 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
369370
gen.Specs = append(gen.Specs, valueSpec)
370371
return gen, nil
371372
case C.CXCursor_MacroDefinition:
373+
// Extract tokens from the Clang tokenizer.
374+
// See: https://stackoverflow.com/a/19074846/559350
372375
sourceRange := C.tinygo_clang_getCursorExtent(c)
373-
start := C.clang_getRangeStart(sourceRange)
374-
end := C.clang_getRangeEnd(sourceRange)
375-
var file, endFile C.CXFile
376-
var startOffset, endOffset C.unsigned
377-
C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset)
378-
if file == nil {
379-
f.addError(pos, "internal error: could not find file where macro is defined")
380-
return nil, nil
381-
}
382-
C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset)
383-
if file != endFile {
384-
f.addError(pos, "internal error: expected start and end location of a macro to be in the same file")
385-
return nil, nil
386-
}
387-
if startOffset > endOffset {
388-
f.addError(pos, "internal error: start offset of macro is after end offset")
389-
return nil, nil
390-
}
391-
392-
// read file contents and extract the relevant byte range
393376
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
394-
var size C.size_t
395-
sourcePtr := C.clang_getFileContents(tu, file, &size)
396-
if endOffset >= C.uint(size) {
397-
f.addError(pos, "internal error: end offset of macro lies after end of file")
398-
return nil, nil
399-
}
400-
source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset])
401-
if !strings.HasPrefix(source, name) {
402-
f.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
403-
return nil, nil
377+
var rawTokens *C.CXToken
378+
var numTokens C.unsigned
379+
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
380+
tokens := unsafe.Slice(rawTokens, numTokens)
381+
// Convert this range of tokens back to source text.
382+
// Ugly, but it works well enough.
383+
sourceBuf := &bytes.Buffer{}
384+
var startOffset int
385+
for i, token := range tokens {
386+
spelling := getString(C.clang_getTokenSpelling(tu, token))
387+
location := C.clang_getTokenLocation(tu, token)
388+
var tokenOffset C.unsigned
389+
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
390+
if i == 0 {
391+
// The first token is the macro name itself.
392+
// Skip it (after using its location).
393+
startOffset = int(tokenOffset) + len(name)
394+
} else {
395+
// Later tokens are the macro contents.
396+
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
397+
// Pad the source text with whitespace (that must have been
398+
// present in the original source as well).
399+
sourceBuf.WriteByte(' ')
400+
}
401+
sourceBuf.WriteString(spelling)
402+
}
404403
}
405-
value := source[len(name):]
404+
C.clang_disposeTokens(tu, rawTokens, numTokens)
405+
value := sourceBuf.String()
406406
// Try to convert this #define into a Go constant expression.
407-
expr, scannerError := parseConst(pos+token.Pos(len(name)), f.fset, value)
407+
tokenPos := token.NoPos
408+
if pos != token.NoPos {
409+
tokenPos = pos + token.Pos(len(name))
410+
}
411+
expr, scannerError := parseConst(tokenPos, f.fset, value)
408412
if scannerError != nil {
409413
f.errors = append(f.errors, *scannerError)
410414
return nil, nil

cgo/testdata/errors.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,22 @@ typedef someType noType; // undefined type
1313
#define SOME_CONST_1 5) // invalid const syntax
1414
#define SOME_CONST_2 6) // const not used (so no error)
1515
#define SOME_CONST_3 1234 // const too large for byte
16+
#define SOME_CONST_b 3 ) // const with lots of weird whitespace (to test error locations)
17+
# define SOME_CONST_startspace 3)
1618
*/
1719
//
1820
//
1921
// #define SOME_CONST_4 8) // after some empty lines
22+
// #cgo CFLAGS: -DSOME_PARAM_CONST_invalid=3/+3
23+
// #cgo CFLAGS: -DSOME_PARAM_CONST_valid=3+4
2024
import "C"
2125

2226
// #warning another warning
2327
import "C"
2428

2529
// Make sure that errors for the following lines won't change with future
2630
// additions to the CGo preamble.
31+
//
2732
//line errors.go:100
2833
var (
2934
// constant too large
@@ -38,4 +43,12 @@ var (
3843
_ byte = C.SOME_CONST_3
3944

4045
_ = C.SOME_CONST_4
46+
47+
_ = C.SOME_CONST_b
48+
49+
_ = C.SOME_CONST_startspace
50+
51+
// constants passed by a command line parameter
52+
_ = C.SOME_PARAM_CONST_invalid
53+
_ = C.SOME_PARAM_CONST_valid
4154
)

cgo/testdata/errors.out.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
11
// CGo errors:
22
// testdata/errors.go:4:2: warning: some warning
33
// testdata/errors.go:11:9: error: unknown type name 'someType'
4-
// testdata/errors.go:22:5: warning: another warning
4+
// testdata/errors.go:26:5: warning: another warning
55
// testdata/errors.go:13:23: unexpected token ), expected end of expression
6-
// testdata/errors.go:19:26: unexpected token ), expected end of expression
6+
// testdata/errors.go:21:26: unexpected token ), expected end of expression
7+
// testdata/errors.go:16:33: unexpected token ), expected end of expression
8+
// testdata/errors.go:17:34: unexpected token ), expected end of expression
9+
// -: unexpected token INT, expected end of expression
710

811
// Type checking errors after CGo processing:
912
// testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows)
1013
// testdata/errors.go:105: unknown field z in struct literal
1114
// testdata/errors.go:108: undefined: C.SOME_CONST_1
1215
// testdata/errors.go:110: cannot use C.SOME_CONST_3 (untyped int constant 1234) as byte value in variable declaration (overflows)
1316
// testdata/errors.go:112: undefined: C.SOME_CONST_4
17+
// testdata/errors.go:114: undefined: C.SOME_CONST_b
18+
// testdata/errors.go:116: undefined: C.SOME_CONST_startspace
19+
// testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid
1420

1521
package main
1622

@@ -58,3 +64,4 @@ type C.struct_point_t struct {
5864
type C.point_t = C.struct_point_t
5965

6066
const C.SOME_CONST_3 = 1234
67+
const C.SOME_PARAM_CONST_valid = 3 + 4

0 commit comments

Comments
 (0)