|
| 1 | +/** |
| 2 | + * This file is part of AtomVM. |
| 3 | + * |
| 4 | + * Copyright 2026 Paul Guyot <pguyot@kallisys.net> |
| 5 | + * |
| 6 | + * @name Mismatched AtomString length |
| 7 | + * @kind problem |
| 8 | + * @problem.severity error |
| 9 | + * @id atomvm/mismatched-atom-string-length |
| 10 | + * @tags correctness |
| 11 | + * @precision high |
| 12 | + * |
| 13 | + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later |
| 14 | + */ |
| 15 | + |
| 16 | +import cpp |
| 17 | + |
| 18 | +int hexCharToInt(string c) { |
| 19 | + c = "0" and result = 0 |
| 20 | + or |
| 21 | + c = "1" and result = 1 |
| 22 | + or |
| 23 | + c = "2" and result = 2 |
| 24 | + or |
| 25 | + c = "3" and result = 3 |
| 26 | + or |
| 27 | + c = "4" and result = 4 |
| 28 | + or |
| 29 | + c = "5" and result = 5 |
| 30 | + or |
| 31 | + c = "6" and result = 6 |
| 32 | + or |
| 33 | + c = "7" and result = 7 |
| 34 | + or |
| 35 | + c = "8" and result = 8 |
| 36 | + or |
| 37 | + c = "9" and result = 9 |
| 38 | + or |
| 39 | + c = "a" and result = 10 |
| 40 | + or |
| 41 | + c = "A" and result = 10 |
| 42 | + or |
| 43 | + c = "b" and result = 11 |
| 44 | + or |
| 45 | + c = "B" and result = 11 |
| 46 | + or |
| 47 | + c = "c" and result = 12 |
| 48 | + or |
| 49 | + c = "C" and result = 12 |
| 50 | + or |
| 51 | + c = "d" and result = 13 |
| 52 | + or |
| 53 | + c = "D" and result = 13 |
| 54 | + or |
| 55 | + c = "e" and result = 14 |
| 56 | + or |
| 57 | + c = "E" and result = 14 |
| 58 | + or |
| 59 | + c = "f" and result = 15 |
| 60 | + or |
| 61 | + c = "F" and result = 15 |
| 62 | +} |
| 63 | + |
| 64 | +bindingset[lenStr] |
| 65 | +int parseLenStr(string lenStr) { |
| 66 | + exists(string inner | |
| 67 | + inner = lenStr.substring(1, lenStr.length() - 1) and |
| 68 | + ( |
| 69 | + // \xH - single hex digit |
| 70 | + inner.length() = 3 and |
| 71 | + inner.prefix(2) = "\\x" and |
| 72 | + result = hexCharToInt(inner.charAt(2)) |
| 73 | + or |
| 74 | + // \xHH - two hex digits |
| 75 | + inner.length() = 4 and |
| 76 | + inner.prefix(2) = "\\x" and |
| 77 | + result = hexCharToInt(inner.charAt(2)) * 16 + hexCharToInt(inner.charAt(3)) |
| 78 | + ) |
| 79 | + ) |
| 80 | +} |
| 81 | + |
| 82 | +bindingset[str] |
| 83 | +int parseStrLen(string str) { |
| 84 | + result = str.length() - 2 |
| 85 | +} |
| 86 | + |
| 87 | +/** |
| 88 | + * Check 1: ATOM_STR and X macro invocations where the length byte |
| 89 | + * doesn't match the string length. |
| 90 | + */ |
| 91 | +predicate macroMismatch(MacroInvocation mi, string msg) { |
| 92 | + exists(string lenArg, string strArg, int declaredLen, int actualLen | |
| 93 | + ( |
| 94 | + mi.getMacroName() = "ATOM_STR" and |
| 95 | + lenArg = mi.getUnexpandedArgument(0) and |
| 96 | + strArg = mi.getUnexpandedArgument(1) |
| 97 | + or |
| 98 | + mi.getMacroName() = "X" and |
| 99 | + mi.getFile().getBaseName().matches("%.def") and |
| 100 | + lenArg = mi.getUnexpandedArgument(1) and |
| 101 | + strArg = mi.getUnexpandedArgument(2) |
| 102 | + ) and |
| 103 | + declaredLen = parseLenStr(lenArg) and |
| 104 | + actualLen = parseStrLen(strArg) and |
| 105 | + declaredLen != actualLen and |
| 106 | + msg = |
| 107 | + "AtomString length mismatch: declared length " + declaredLen.toString() + " (from " + lenArg + |
| 108 | + ") but actual string " + strArg + " has length " + actualLen.toString() |
| 109 | + ) |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * Holds if `f` has a parameter at position `i` whose type resolves to AtomString |
| 114 | + * (i.e. const void *). |
| 115 | + */ |
| 116 | +predicate isAtomStringParam(Function f, int i) { |
| 117 | + exists(Parameter p | |
| 118 | + p = f.getParameter(i) and |
| 119 | + p.getType().(TypedefType).getName() = "AtomString" |
| 120 | + ) |
| 121 | +} |
| 122 | + |
| 123 | +/** |
| 124 | + * Holds if `f` has a parameter at position `i` whose struct field type is AtomString. |
| 125 | + * This catches AtomStringIntPair initializers. |
| 126 | + */ |
| 127 | +predicate isAtomStringField(Struct s, int i) { |
| 128 | + exists(Field field | |
| 129 | + field = s.getCanonicalMember(i) and |
| 130 | + field.getType().(TypedefType).getName() = "AtomString" |
| 131 | + ) |
| 132 | +} |
| 133 | + |
| 134 | +/** |
| 135 | + * Check 2: String literals used where AtomString is expected (function arguments |
| 136 | + * or variable initializers), where the first byte (length prefix) doesn't match |
| 137 | + * the remaining string length. |
| 138 | + * |
| 139 | + * After ATOM_STR expansion, the string literal is e.g. "\x05hello" (6 bytes). |
| 140 | + * getValue() returns the decoded string, so charAt(0).toUnicode() == 5 and |
| 141 | + * getValue().length() == 6. The check is: first byte != length - 1. |
| 142 | + */ |
| 143 | +predicate stringLiteralMismatch(StringLiteral sl, string msg) { |
| 144 | + not sl.isInMacroExpansion() and |
| 145 | + exists(int declaredLen, int actualLen | |
| 146 | + declaredLen = sl.getValue().codePointAt(0) and |
| 147 | + actualLen = sl.getValue().length() - 1 and |
| 148 | + declaredLen != actualLen and |
| 149 | + declaredLen > 0 and |
| 150 | + ( |
| 151 | + // Passed as argument to a function with AtomString parameter |
| 152 | + exists(FunctionCall fc, int i | |
| 153 | + isAtomStringParam(fc.getTarget(), i) and |
| 154 | + fc.getArgument(i) = sl |
| 155 | + ) |
| 156 | + or |
| 157 | + // Used in initializer of a const char * variable with _atom suffix (typical pattern) |
| 158 | + exists(Variable v | |
| 159 | + v.getInitializer().getExpr() = sl and |
| 160 | + v.getType().stripTopLevelSpecifiers().(PointerType).getBaseType().stripTopLevelSpecifiers() |
| 161 | + instanceof CharType and |
| 162 | + v.getName().matches("%_atom") |
| 163 | + ) |
| 164 | + or |
| 165 | + // Used in an aggregate literal initializing an AtomStringIntPair-like struct |
| 166 | + exists(ClassAggregateLiteral agg, int i | |
| 167 | + isAtomStringField(agg.getType().stripType(), i) and |
| 168 | + agg.getAFieldExpr(agg.getType().stripType().(Struct).getCanonicalMember(i)) = sl |
| 169 | + ) |
| 170 | + ) and |
| 171 | + msg = |
| 172 | + "AtomString length mismatch in string literal: first byte is " + declaredLen.toString() + |
| 173 | + " but string data length is " + actualLen.toString() |
| 174 | + ) |
| 175 | +} |
| 176 | + |
| 177 | +from Element e, string msg |
| 178 | +where |
| 179 | + macroMismatch(e, msg) |
| 180 | + or |
| 181 | + stringLiteralMismatch(e, msg) |
| 182 | +select e, msg |
0 commit comments