1616module module_analyser;
1717
1818import ast local;
19- import printf_utils;
19+ import printf_utils local ;
2020import scope;
2121import src_loc local;
2222import string_buffer;
@@ -164,8 +164,8 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
164164 }
165165
166166 //stdio.printf("CALL (%d | %d)\n", func_num_args, call_num_args);
167- bool has_printf_format = false ;
168- u32 format_arg_idx = 0 ;
167+ u8 format_arg_idx = 0 ;
168+ FormatAttr format_attr = fd.getFormatAttr(&format_arg_idx) ;
169169 while (1) {
170170 //stdio.printf("ARG [%d | %d]\n", func_arg_index, call_arg_index);
171171 if (func_arg_index >= func_num_args) break;
@@ -212,11 +212,6 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
212212 if (!ma.analyseInitExpr(&call_args[call_arg_index], d.getType(), arg.getLoc(), false, false))
213213 return QualType_Invalid;
214214
215- if (vd.hasPrintfFormat()) {
216- has_printf_format = true;
217- format_arg_idx = call_arg_index;
218- }
219-
220215 func_arg_index++;
221216 call_arg_index++;
222217 }
@@ -254,10 +249,11 @@ fn QualType Analyser.analyseCallExpr(Analyser* ma, Expr** e_ptr) {
254249 call_arg_index++;
255250 }
256251
257- if (has_printf_format) {
252+ if (format_attr) {
253+ format_arg_idx -= num_auto_args + isTypeFuncCall;
258254 u32 num_args = call_num_args - format_arg_idx - 1;
259- call.setPrintfFormat(format_arg_idx );
260- if (!ma.checkPrintfArgs (&call_args[format_arg_idx], num_args, &call_args[format_arg_idx+1]))
255+ call.setFormatAttr(format_attr );
256+ if (!ma.checkFormatArgs (&call_args[format_arg_idx], num_args, &call_args[format_arg_idx+1], format_attr ))
261257 return QualType_Invalid;
262258 }
263259 }
@@ -273,58 +269,54 @@ type FormatAnalyser struct {
273269 SrcLoc loc;
274270 u32 last_offset;
275271 string_buffer.Buf* out;
272+ FormatAttr format_attr;
276273}
277274
278- const bool[] Format_needs_l_prefix = {
279- [BuiltinKind.Char] = false,
280- [BuiltinKind.Int8] = false,
281- [BuiltinKind.Int16] = false,
282- [BuiltinKind.Int32] = false,
283- [BuiltinKind.Int64] = true,
284- [BuiltinKind.UInt8] = false,
285- [BuiltinKind.UInt16] = false,
286- [BuiltinKind.UInt32] = false,
287- [BuiltinKind.UInt64] = true,
288- [BuiltinKind.Float32] = false,
289- [BuiltinKind.Float64] = true,
290- [BuiltinKind.ISize] = true,
291- [BuiltinKind.USize] = true,
292- [BuiltinKind.Bool] = false,
293- }
294-
295- const bool[] Format_needs_u_prefix = {
296- [BuiltinKind.Char] = false,
297- [BuiltinKind.Int8] = false,
298- [BuiltinKind.Int16] = false,
299- [BuiltinKind.Int32] = false,
300- [BuiltinKind.Int64] = false,
301- [BuiltinKind.UInt8] = false,
302- [BuiltinKind.UInt16] = false,
303- [BuiltinKind.UInt32] = true,
304- [BuiltinKind.UInt64] = true,
305- [BuiltinKind.Float32] = false,
306- [BuiltinKind.Float64] = false,
307- [BuiltinKind.ISize] = false,
308- [BuiltinKind.USize] = true,
309- [BuiltinKind.Bool] = false,
310- }
275+ fn bool on_printf_specifier(void* context, u32 offset, PrintfSpecifier specifier, PrintfConversion* conv) {
276+ if (specifier == Percent) return true;
311277
312- fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32 offset, i32 stars, char c) {
313278 FormatAnalyser* fa = context;
314279 Analyser* ma = fa.ma;
315280 Expr** args = fa.args;
316281
282+ offset += conv.len;
317283 fa.out.add2(fa.format + fa.last_offset, offset - fa.last_offset);
318284
319- if (c == '\0') {
320- ma.error(fa.loc + offset, "missing conversion specifier at end of format string");
285+ char c = conv.c;
286+ if (specifier == Invalid) {
287+ SrcLoc loc = fa.loc + offset; // approximate location of offending letter
288+ switch (c) {
289+ case '\0':
290+ ma.error(loc, "missing conversion specifier at end of format string");
291+ break;
292+ case '%':
293+ ma.error(loc, "invalid '%%%%' conversion specifier");
294+ break;
295+ case 'h':
296+ case 'j':
297+ case 'l':
298+ case 't':
299+ case 'w':
300+ case 'z':
301+ case 'L':
302+ ma.error(loc, "format length modifier '%c' should be omitted", c);
303+ break;
304+ case 'i':
305+ case 'u':
306+ ma.error(loc, "invalid format specifier '%%%c', should use '%%d'", c);
307+ break;
308+ default:
309+ ma.error(loc, "invalid format specifier '%%%c'", c);
310+ break;
311+ }
321312 return false;
322313 }
314+ u32 stars = conv.has_width_star + conv.has_prec_star;
323315 if (fa.idx + stars >= fa.num_args) {
324316 ma.error(fa.loc + offset, "too many format specifiers or not enough arguments");
325317 return false;
326318 }
327- for (i32 i = 0; i < stars; i++) {
319+ for (u32 i = 0; i < stars; i++) {
328320 Expr* arg = args[fa.idx];
329321 QualType qt = arg.getType();
330322 qt = qt.getCanonicalType();
@@ -339,7 +331,7 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
339331 qt = qt.getCanonicalType();
340332
341333 switch (specifier) {
342- case Other :
334+ case Invalid :
343335 break;
344336 case String:
345337 if (!qt.isCharPointer()) {
@@ -361,11 +353,11 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
361353 ma.error(arg.getStartLoc(), "format '%%%c' expects an integer argument", c);
362354 break;
363355 }
364- BuiltinKind kind = bi.getKind();
365356 // TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
366- if (Format_needs_l_prefix[kind]) fa.out.add1('l');
367- if (c == 'd' && Format_needs_u_prefix[kind]) c = 'u';
368- // Assume all supported formats are implemented in the target libc
357+ u32 width = bi.getWidth();
358+ if (width > 32) fa.out.add1('l');
359+ if (c == 'd' && width >= 32 && bi.isUnsigned()) c = 'u';
360+ // Assume target libc supports %b
369361 break;
370362 case FloatingPoint:
371363 BuiltinType* bi = qt.getBuiltinTypeOrNil();
@@ -378,50 +370,220 @@ fn bool on_format_specifier(void* context, printf_utils.Specifier specifier, u32
378370 ma.error(arg.getStartLoc(), "format '%%p' expects a pointer argument");
379371 }
380372 break;
381- case Invalid:
373+ case Offset:
374+ if (qt.isPointer()) {
375+ const PointerType* p = qt.getPointerType();
376+ QualType t = p.getInner();
377+ BuiltinType* bi = t.getBuiltinTypeOrNil();
378+ if (bi && bi.isInteger()) {
379+ u32 width = bi.getWidth();
380+ // TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
381+ if (width < 16) fa.out.add1('h');
382+ if (width < 32) fa.out.add1('h');
383+ if (width > 32) fa.out.add1('l');
384+ break;
385+ }
386+ }
387+ ma.error(arg.getStartLoc(), "format '%%n' expects an integer pointer argument");
388+ break;
389+ case Percent:
390+ break;
391+ }
392+ fa.out.add1(c);
393+
394+ fa.last_offset = offset + 1;
395+ fa.idx++;
396+ return true;
397+ }
398+
399+ fn bool on_scanf_specifier(void* context, u32 offset, ScanfSpecifier specifier, ScanfConversion* conv) {
400+ FormatAnalyser* fa = context;
401+ Analyser* ma = fa.ma;
402+ Expr** args = fa.args;
403+
404+ offset += conv.len;
405+ fa.out.add2(fa.format + fa.last_offset, offset - fa.last_offset);
406+
407+ char c = conv.c;
408+ if (specifier == Invalid) {
409+ SrcLoc loc = fa.loc + offset; // approximate location of offending letter
382410 switch (c) {
411+ case '\0':
412+ ma.error(loc, "missing conversion specifier at end of format string");
413+ break;
414+ case '%':
415+ ma.error(loc, "invalid '%%%%' conversion specifier");
416+ break;
383417 case 'h':
384418 case 'j':
385419 case 'l':
386420 case 't':
387421 case 'w':
388422 case 'z':
389423 case 'L':
390- ma.error(fa. loc + offset , "format length modifier '%c' should be omitted", c);
424+ ma.error(loc, "conversion length modifier '%c' should be omitted", c);
391425 break;
392426 case 'i':
393427 case 'u':
394- ma.error(fa. loc + offset , "invalid format specifier '%%%c', should use '%%d'", c);
428+ ma.error(loc, "invalid conversion specifier '%%%c', should use '%%d'", c);
395429 break;
396430 default:
397- ma.error(fa. loc + offset , "invalid format specifier '%%%c'", c);
431+ ma.error(loc, "invalid conversion specifier '%%%c'", c);
398432 break;
399433 }
400434 return false;
401435 }
436+ if (!conv.has_star) {
437+ if (fa.idx >= fa.num_args) {
438+ ma.error(fa.loc + offset, "too many conversion specifiers or not enough arguments");
439+ return false;
440+ }
441+ Expr* arg = args[fa.idx++];
442+ QualType qt = arg.getType();
443+ qt = qt.getCanonicalType();
444+ QualType tt = QualType_Invalid;
445+ BuiltinType* bi = nil;
446+ if (qt.isPointer()) {
447+ const PointerType* p = qt.getPointerType();
448+ tt = p.getInner();
449+ tt = tt.getImplType();
450+ bi = tt.getBuiltinTypeOrNil();
451+ }
452+ switch (specifier) {
453+ case Invalid:
454+ break;
455+ case String:
456+ case Scanset:
457+ if (!qt.isCharPointer()) {
458+ ma.error(arg.getStartLoc(), "conversion '%%s' expects a string argument");
459+ }
460+ // TODO: check array length and specify it if missing
461+ break;
462+ case Char:
463+ if (!bi || !tt.isCharCompatible()) {
464+ ma.error(arg.getStartLoc(), "conversion '%%c' expects a character pointer argument");
465+ }
466+ // TODO: check array length if field width is present
467+ break;
468+ case Integer:
469+ case Offset:
470+ if (!bi || !bi.isIntegerOrBool()) {
471+ ma.error(arg.getStartLoc(), "conversion '%%%c' expects an integer pointer argument", c);
472+ break;
473+ }
474+ u32 width = bi.getWidth();
475+ // TODO add ll prefix on LLP targets (64-bit long long and pointers, but 32-bit long)
476+ if (width < 16) fa.out.add1('h');
477+ if (width < 32) fa.out.add1('h');
478+ if (width > 32) fa.out.add1('l');
479+ if (c == 'd' && bi.isUnsigned()) c = 'u';
480+ // Assuming target libc supports %b
481+ break;
482+ case FloatingPoint:
483+ if (!bi || !bi.isFloatingPoint()) {
484+ ma.error(arg.getStartLoc(), "conversion '%%%c' expects a floating-point pointer argument", c);
485+ }
486+ if (bi.getWidth() > 32) fa.out.add1('l');
487+ break;
488+ case Pointer:
489+ if (!bi || (!tt.isPointer() && !tt.isFunction())) {
490+ ma.error(arg.getStartLoc(), "conversion '%%p' expects a pointer to a pointer argument");
491+ }
492+ break;
493+ case Percent:
494+ break;
495+ }
496+ }
402497 fa.out.add1(c);
403-
404- fa.last_offset = offset + 1;
405- fa.idx++;
498+ if (conv.len2) fa.out.add2(fa.format + offset + 1, conv.len2);
499+ fa.last_offset = offset + 1 + conv.len2;
406500 return true;
407501}
408502
409- fn bool Analyser.checkPrintfArgs(Analyser* ma, Expr** format_ptr, u32 num_args, Expr** args) {
503+ fn const char* get_format(Expr* format, SrcLoc* format_loc) {
504+ // if format is StringLiteral or vardecl of type char[], it will wrapped in ArrayToPointerDecay
505+ // otherwise it's a VarDecl of type (const) char* we cannot check and report this as an error
506+
507+ if (!format.isImplicitCast()) return nil;
508+ ImplicitCastExpr* ic = (ImplicitCastExpr*)format;
509+ if (!ic.isArrayToPointerDecay()) return nil;
510+ format = ic.getInner();
511+ *format_loc = format.getLoc();
512+
513+ for (;;) {
514+ switch (format.getKind()) {
515+ case StringLiteral:
516+ // set location to first character of string literal
517+ // this works for simple strings only: multi-strings, raw strings,
518+ // strings with escape sequences may show an incorrect position for
519+ // format specifier errors.
520+ *format_loc = format.getLoc() + 1;
521+ StringLiteral* s = (StringLiteral*)format;
522+ return s.getText();
523+ case Identifier:
524+ QualType qt = format.getType();
525+ assert(qt.isArray());
526+ ArrayType* at = qt.getArrayType();
527+ qt = at.getElemType();
528+ if (!qt.isConst()) return nil;
529+
530+ IdentifierExpr* id = (IdentifierExpr*)format;
531+ Decl* decl = id.getDecl();
532+ assert(decl.isVariable());
533+ VarDecl* vd = (VarDecl*)decl;
534+ format = vd.getInit();
535+ assert(format);
536+ continue;
537+ case Member:
538+ QualType qt = format.getType();
539+ assert(qt.isArray());
540+ ArrayType* at = qt.getArrayType();
541+ qt = at.getElemType();
542+ if (!qt.isConst()) return nil;
543+
544+ MemberExpr* m = (MemberExpr*)format;
545+ Decl* decl = m.getFullDecl();
546+ assert (decl.isVariable());
547+ VarDecl* vd = (VarDecl*)decl;
548+ format = vd.getInit();
549+ assert(format);
550+ continue;
551+ default:
552+ assert(0);
553+ return nil;
554+ }
555+ }
556+ }
557+
558+ fn bool Analyser.checkFormatArgs(Analyser* ma, Expr** format_ptr, u32 num_args, Expr** args, FormatAttr format_attr) {
410559 Expr* format = *format_ptr;
411560 SrcLoc format_loc = format.getLoc();
412- const char* format_text = printf_utils. get_format(format, &format_loc);
561+ const char* format_text = get_format(format, &format_loc);
413562 if (!format_text) {
414563 ma.error(format_loc, "format argument is not a constant string");
415564 return false;
416565 }
417566
418567 string_buffer.Buf* out = string_buffer.create(256, false, 0);
419- FormatAnalyser fa = { ma, format_text, args, num_args, 0, format_loc, 0, out }
420-
421- if (!printf_utils.parseFormat(format_text, on_format_specifier, &fa)) {
422- // error already reported
423- out.free();
424- return false;
568+ FormatAnalyser fa = { ma, format_text, args, num_args, 0, format_loc, 0, out, format_attr }
569+
570+ switch (format_attr) {
571+ case None:
572+ break; // should not happen;
573+ case Printf:
574+ if (!parsePrintfFormat(format_text, on_printf_specifier, &fa)) {
575+ // error already reported
576+ out.free();
577+ return false;
578+ }
579+ break;
580+ case Scanf:
581+ if (!parseScanfFormat(format_text, on_scanf_specifier, &fa)) {
582+ // error already reported
583+ out.free();
584+ return false;
585+ }
586+ break;
425587 }
426588 out.add(format_text + fa.last_offset);
427589
0 commit comments