Skip to content

Commit e963f89

Browse files
committed
Analyser: handle scanf format strings
* add `@(scanf_format)` attribute * update format attribute bits * accept `%n` pseudo-conversion in `printf_format` format strings * add declarations in libc for `scanf` family of functions * define `EXPERIMENTAL` feature when compiling new compiler to enable features not supported by bootstrap compiler
1 parent 6bd52cd commit e963f89

File tree

12 files changed

+483
-225
lines changed

12 files changed

+483
-225
lines changed

analyser/module_analyser_call.c2

Lines changed: 230 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
module module_analyser;
1717

1818
import ast local;
19-
import printf_utils;
19+
import printf_utils local;
2020
import scope;
2121
import src_loc local;
2222
import string_buffer;
@@ -164,8 +164,8 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
164164
}
165165

166166
//stdio.printf("CALL (%d | %d)\n", func_num_args, call_num_args);
167-
bool has_printf_format = false;
168-
u32 format_arg_idx = 0;
167+
u8 format_arg_idx = 0;
168+
FormatAttr format_attr = fd.getFormatAttr(&format_arg_idx);
169169
while (1) {
170170
//stdio.printf("ARG [%d | %d]\n", func_arg_index, call_arg_index);
171171
if (func_arg_index >= func_num_args) break;
@@ -212,11 +212,6 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
212212
if (!ma.analyseInitExpr(&call_args[call_arg_index], d.getType(), arg.getLoc(), false, false))
213213
return QualType_Invalid;
214214

215-
if (vd.hasPrintfFormat()) {
216-
has_printf_format = true;
217-
format_arg_idx = call_arg_index;
218-
}
219-
220215
func_arg_index++;
221216
call_arg_index++;
222217
}
@@ -254,10 +249,11 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
254249
call_arg_index++;
255250
}
256251

257-
if (has_printf_format) {
252+
if (format_attr) {
253+
format_arg_idx -= num_auto_args + isTypeFuncCall;
258254
u32 num_args = call_num_args - format_arg_idx - 1;
259-
call.setPrintfFormat(format_arg_idx);
260-
if (!ma.checkPrintfArgs(&call_args[format_arg_idx], num_args, &call_args[format_arg_idx+1]))
255+
call.setFormatAttr(format_attr);
256+
if (!ma.checkFormatArgs(&call_args[format_arg_idx], num_args, &call_args[format_arg_idx+1], format_attr))
261257
return QualType_Invalid;
262258
}
263259
}
@@ -273,58 +269,54 @@ type FormatAnalyser struct {
273269
SrcLoc loc;
274270
u32 last_offset;
275271
string_buffer.Buf* out;
272+
FormatAttr format_attr;
276273
}
277274

278-
const bool[] Format_needs_l_prefix = {
279-
[BuiltinKind.Char] = false,
280-
[BuiltinKind.Int8] = false,
281-
[BuiltinKind.Int16] = false,
282-
[BuiltinKind.Int32] = false,
283-
[BuiltinKind.Int64] = true,
284-
[BuiltinKind.UInt8] = false,
285-
[BuiltinKind.UInt16] = false,
286-
[BuiltinKind.UInt32] = false,
287-
[BuiltinKind.UInt64] = true,
288-
[BuiltinKind.Float32] = false,
289-
[BuiltinKind.Float64] = true,
290-
[BuiltinKind.ISize] = true,
291-
[BuiltinKind.USize] = true,
292-
[BuiltinKind.Bool] = false,
293-
}
294-
295-
const bool[] Format_needs_u_prefix = {
296-
[BuiltinKind.Char] = false,
297-
[BuiltinKind.Int8] = false,
298-
[BuiltinKind.Int16] = false,
299-
[BuiltinKind.Int32] = false,
300-
[BuiltinKind.Int64] = false,
301-
[BuiltinKind.UInt8] = false,
302-
[BuiltinKind.UInt16] = false,
303-
[BuiltinKind.UInt32] = true,
304-
[BuiltinKind.UInt64] = true,
305-
[BuiltinKind.Float32] = false,
306-
[BuiltinKind.Float64] = false,
307-
[BuiltinKind.ISize] = false,
308-
[BuiltinKind.USize] = true,
309-
[BuiltinKind.Bool] = false,
310-
}
275+
fn bool on_printf_specifier(void* context, u32 offset, PrintfSpecifier specifier, PrintfConversion* conv) {
276+
if (specifier == Percent) return true;
311277

312-
fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32 offset, i32 stars, char c) {
313278
FormatAnalyser* fa = context;
314279
Analyser* ma = fa.ma;
315280
Expr** args = fa.args;
316281

282+
offset += conv.len;
317283
fa.out.add2(fa.format + fa.last_offset, offset - fa.last_offset);
318284

319-
if (c == '\0') {
320-
ma.error(fa.loc + offset, "missing conversion specifier at end of format string");
285+
char c = conv.c;
286+
if (specifier == Invalid) {
287+
SrcLoc loc = fa.loc + offset; // approximate location of offending letter
288+
switch (c) {
289+
case '\0':
290+
ma.error(loc, "missing conversion specifier at end of format string");
291+
break;
292+
case '%':
293+
ma.error(loc, "invalid '%%%%' conversion specifier");
294+
break;
295+
case 'h':
296+
case 'j':
297+
case 'l':
298+
case 't':
299+
case 'w':
300+
case 'z':
301+
case 'L':
302+
ma.error(loc, "format length modifier '%c' should be omitted", c);
303+
break;
304+
case 'i':
305+
case 'u':
306+
ma.error(loc, "invalid format specifier '%%%c', should use '%%d'", c);
307+
break;
308+
default:
309+
ma.error(loc, "invalid format specifier '%%%c'", c);
310+
break;
311+
}
321312
return false;
322313
}
314+
u32 stars = conv.has_width_star + conv.has_prec_star;
323315
if (fa.idx + stars >= fa.num_args) {
324316
ma.error(fa.loc + offset, "too many format specifiers or not enough arguments");
325317
return false;
326318
}
327-
for (i32 i = 0; i < stars; i++) {
319+
for (u32 i = 0; i < stars; i++) {
328320
Expr* arg = args[fa.idx];
329321
QualType qt = arg.getType();
330322
qt = qt.getCanonicalType();
@@ -339,7 +331,7 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
339331
qt = qt.getCanonicalType();
340332

341333
switch (specifier) {
342-
case Other:
334+
case Invalid:
343335
break;
344336
case String:
345337
if (!qt.isCharPointer()) {
@@ -361,11 +353,11 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
361353
ma.error(arg.getStartLoc(), "format '%%%c' expects an integer argument", c);
362354
break;
363355
}
364-
BuiltinKind kind = bi.getKind();
365356
// TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
366-
if (Format_needs_l_prefix[kind]) fa.out.add1('l');
367-
if (c == 'd' && Format_needs_u_prefix[kind]) c = 'u';
368-
// Assume all supported formats are implemented in the target libc
357+
u32 width = bi.getWidth();
358+
if (width > 32) fa.out.add1('l');
359+
if (c == 'd' && width >= 32 && bi.isUnsigned()) c = 'u';
360+
// Assume target libc supports %b
369361
break;
370362
case FloatingPoint:
371363
BuiltinType* bi = qt.getBuiltinTypeOrNil();
@@ -378,50 +370,220 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
378370
ma.error(arg.getStartLoc(), "format '%%p' expects a pointer argument");
379371
}
380372
break;
381-
case Invalid:
373+
case Offset:
374+
if (qt.isPointer()) {
375+
const PointerType* p = qt.getPointerType();
376+
QualType t = p.getInner();
377+
BuiltinType* bi = t.getBuiltinTypeOrNil();
378+
if (bi && bi.isInteger()) {
379+
u32 width = bi.getWidth();
380+
// TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
381+
if (width < 16) fa.out.add1('h');
382+
if (width < 32) fa.out.add1('h');
383+
if (width > 32) fa.out.add1('l');
384+
break;
385+
}
386+
}
387+
ma.error(arg.getStartLoc(), "format '%%n' expects an integer pointer argument");
388+
break;
389+
case Percent:
390+
break;
391+
}
392+
fa.out.add1(c);
393+
394+
fa.last_offset = offset + 1;
395+
fa.idx++;
396+
return true;
397+
}
398+
399+
fn bool on_scanf_specifier(void* context, u32 offset, ScanfSpecifier specifier, ScanfConversion* conv) {
400+
FormatAnalyser* fa = context;
401+
Analyser* ma = fa.ma;
402+
Expr** args = fa.args;
403+
404+
offset += conv.len;
405+
fa.out.add2(fa.format + fa.last_offset, offset - fa.last_offset);
406+
407+
char c = conv.c;
408+
if (specifier == Invalid) {
409+
SrcLoc loc = fa.loc + offset; // approximate location of offending letter
382410
switch (c) {
411+
case '\0':
412+
ma.error(loc, "missing conversion specifier at end of format string");
413+
break;
414+
case '%':
415+
ma.error(loc, "invalid '%%%%' conversion specifier");
416+
break;
383417
case 'h':
384418
case 'j':
385419
case 'l':
386420
case 't':
387421
case 'w':
388422
case 'z':
389423
case 'L':
390-
ma.error(fa.loc + offset, "format length modifier '%c' should be omitted", c);
424+
ma.error(loc, "conversion length modifier '%c' should be omitted", c);
391425
break;
392426
case 'i':
393427
case 'u':
394-
ma.error(fa.loc + offset, "invalid format specifier '%%%c', should use '%%d'", c);
428+
ma.error(loc, "invalid conversion specifier '%%%c', should use '%%d'", c);
395429
break;
396430
default:
397-
ma.error(fa.loc + offset, "invalid format specifier '%%%c'", c);
431+
ma.error(loc, "invalid conversion specifier '%%%c'", c);
398432
break;
399433
}
400434
return false;
401435
}
436+
if (!conv.has_star) {
437+
if (fa.idx >= fa.num_args) {
438+
ma.error(fa.loc + offset, "too many conversion specifiers or not enough arguments");
439+
return false;
440+
}
441+
Expr* arg = args[fa.idx++];
442+
QualType qt = arg.getType();
443+
qt = qt.getCanonicalType();
444+
QualType tt = QualType_Invalid;
445+
BuiltinType* bi = nil;
446+
if (qt.isPointer()) {
447+
const PointerType* p = qt.getPointerType();
448+
tt = p.getInner();
449+
tt = tt.getImplType();
450+
bi = tt.getBuiltinTypeOrNil();
451+
}
452+
switch (specifier) {
453+
case Invalid:
454+
break;
455+
case String:
456+
case Scanset:
457+
if (!qt.isCharPointer()) {
458+
ma.error(arg.getStartLoc(), "conversion '%%s' expects a string argument");
459+
}
460+
// TODO: check array length and specify it if missing
461+
break;
462+
case Char:
463+
if (!bi || !tt.isCharCompatible()) {
464+
ma.error(arg.getStartLoc(), "conversion '%%c' expects a character pointer argument");
465+
}
466+
// TODO: check array length if field width is present
467+
break;
468+
case Integer:
469+
case Offset:
470+
if (!bi || !bi.isIntegerOrBool()) {
471+
ma.error(arg.getStartLoc(), "conversion '%%%c' expects an integer pointer argument", c);
472+
break;
473+
}
474+
u32 width = bi.getWidth();
475+
// TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
476+
if (width < 16) fa.out.add1('h');
477+
if (width < 32) fa.out.add1('h');
478+
if (width > 32) fa.out.add1('l');
479+
if (c == 'd' && bi.isUnsigned()) c = 'u';
480+
// Assuming target libc supports %b
481+
break;
482+
case FloatingPoint:
483+
if (!bi || !bi.isFloatingPoint()) {
484+
ma.error(arg.getStartLoc(), "conversion '%%%c' expects a floating-point pointer argument", c);
485+
}
486+
if (bi.getWidth() > 32) fa.out.add1('l');
487+
break;
488+
case Pointer:
489+
if (!bi || (!tt.isPointer() && !tt.isFunction())) {
490+
ma.error(arg.getStartLoc(), "conversion '%%p' expects a pointer to a pointer argument");
491+
}
492+
break;
493+
case Percent:
494+
break;
495+
}
496+
}
402497
fa.out.add1(c);
403-
404-
fa.last_offset = offset + 1;
405-
fa.idx++;
498+
if (conv.len2) fa.out.add2(fa.format + offset + 1, conv.len2);
499+
fa.last_offset = offset + 1 + conv.len2;
406500
return true;
407501
}
408502

409-
fn bool Analyser.checkPrintfArgs(Analyser* ma, Expr** format_ptr, u32 num_args, Expr** args) {
503+
fn const char* get_format(Expr* format, SrcLoc* format_loc) {
504+
// if format is StringLiteral or vardecl of type char[], it will wrapped in ArrayToPointerDecay
505+
// otherwise it's a VarDecl of type (const) char* we cannot check and report this as an error
506+
507+
if (!format.isImplicitCast()) return nil;
508+
ImplicitCastExpr* ic = (ImplicitCastExpr*)format;
509+
if (!ic.isArrayToPointerDecay()) return nil;
510+
format = ic.getInner();
511+
*format_loc = format.getLoc();
512+
513+
for (;;) {
514+
switch (format.getKind()) {
515+
case StringLiteral:
516+
// set location to first character of string literal
517+
// this works for simple strings only: multi-strings, raw strings,
518+
// strings with escape sequences may show an incorrect position for
519+
// format specifier errors.
520+
*format_loc = format.getLoc() + 1;
521+
StringLiteral* s = (StringLiteral*)format;
522+
return s.getText();
523+
case Identifier:
524+
QualType qt = format.getType();
525+
assert(qt.isArray());
526+
ArrayType* at = qt.getArrayType();
527+
qt = at.getElemType();
528+
if (!qt.isConst()) return nil;
529+
530+
IdentifierExpr* id = (IdentifierExpr*)format;
531+
Decl* decl = id.getDecl();
532+
assert(decl.isVariable());
533+
VarDecl* vd = (VarDecl*)decl;
534+
format = vd.getInit();
535+
assert(format);
536+
continue;
537+
case Member:
538+
QualType qt = format.getType();
539+
assert(qt.isArray());
540+
ArrayType* at = qt.getArrayType();
541+
qt = at.getElemType();
542+
if (!qt.isConst()) return nil;
543+
544+
MemberExpr* m = (MemberExpr*)format;
545+
Decl* decl = m.getFullDecl();
546+
assert (decl.isVariable());
547+
VarDecl* vd = (VarDecl*)decl;
548+
format = vd.getInit();
549+
assert(format);
550+
continue;
551+
default:
552+
assert(0);
553+
return nil;
554+
}
555+
}
556+
}
557+
558+
fn bool Analyser.checkFormatArgs(Analyser* ma, Expr** format_ptr, u32 num_args, Expr** args, FormatAttr format_attr) {
410559
Expr* format = *format_ptr;
411560
SrcLoc format_loc = format.getLoc();
412-
const char* format_text = printf_utils.get_format(format, &format_loc);
561+
const char* format_text = get_format(format, &format_loc);
413562
if (!format_text) {
414563
ma.error(format_loc, "format argument is not a constant string");
415564
return false;
416565
}
417566

418567
string_buffer.Buf* out = string_buffer.create(256, false, 0);
419-
FormatAnalyser fa = { ma, format_text, args, num_args, 0, format_loc, 0, out }
420-
421-
if (!printf_utils.parseFormat(format_text, on_format_specifier, &fa)) {
422-
// error already reported
423-
out.free();
424-
return false;
568+
FormatAnalyser fa = { ma, format_text, args, num_args, 0, format_loc, 0, out, format_attr }
569+
570+
switch (format_attr) {
571+
case None:
572+
break; // should not happen;
573+
case Printf:
574+
if (!parsePrintfFormat(format_text, on_printf_specifier, &fa)) {
575+
// error already reported
576+
out.free();
577+
return false;
578+
}
579+
break;
580+
case Scanf:
581+
if (!parseScanfFormat(format_text, on_scanf_specifier, &fa)) {
582+
// error already reported
583+
out.free();
584+
return false;
585+
}
586+
break;
425587
}
426588
out.add(format_text + fa.last_offset);
427589

0 commit comments

Comments
 (0)