|
| 1 | +// MathJS bundle with only numbers is ~2x smaller than the default one. |
| 2 | +import { |
| 3 | + addDependencies, |
| 4 | + andDependencies, |
| 5 | + compileDependencies, |
| 6 | + create, |
| 7 | + divideDependencies, |
| 8 | + evaluateDependencies, |
| 9 | + maxDependencies, |
| 10 | + minDependencies, |
| 11 | + multiplyDependencies, |
| 12 | + notDependencies, |
| 13 | + // @ts-expect-error nullishDependencies is not declared in types. https://github.com/josdejong/mathjs/issues/3597 |
| 14 | + nullishDependencies, |
| 15 | + orDependencies, |
| 16 | + subtractDependencies, |
| 17 | + xorDependencies, |
| 18 | +} from 'mathjs'; |
| 19 | + |
| 20 | +import { ACTOR_LIMITS } from '@apify/consts'; |
| 21 | + |
| 22 | +import type { ActorRunOptions, CompilationCache, CompilationResult, MemoryEvaluationContext } from './types.js'; |
| 23 | + |
| 24 | +// In theory, users could create expressions longer than 1000 characters, |
| 25 | +// but in practice, it's unlikely anyone would need that much complexity. |
| 26 | +// Later we can increase this limit if needed. |
| 27 | +export const DEFAULT_MEMORY_MBYTES_EXPRESSION_MAX_LENGTH = 1000; |
| 28 | + |
| 29 | +/** |
| 30 | + * A Set of allowed keys from ActorRunOptions that can be used in |
| 31 | + * the {{runOptions.variable}} syntax. |
| 32 | + */ |
| 33 | +const ALLOWED_RUN_OPTION_KEYS = new Set<keyof ActorRunOptions>([ |
| 34 | + 'build', |
| 35 | + 'timeoutSecs', |
| 36 | + 'memoryMbytes', |
| 37 | + 'diskMbytes', |
| 38 | + 'maxItems', |
| 39 | + 'maxTotalChargeUsd', |
| 40 | + 'restartOnError', |
| 41 | +]); |
| 42 | + |
| 43 | +/** |
| 44 | + * Create a mathjs instance with selected dependencies, then disable potentially dangerous ones. |
| 45 | + * MathJS security recommendations: https://mathjs.org/docs/expressions/security.html |
| 46 | + */ |
| 47 | +const math = create({ |
| 48 | + // expression dependencies |
| 49 | + // Required for compiling and evaluating root expressions. |
| 50 | + // We disable it below to prevent users from calling `evaluate()` inside their expressions. |
| 51 | + // For example: defaultMemoryMbytes = "evaluate('2 + 2')" |
| 52 | + compileDependencies, |
| 53 | + evaluateDependencies, |
| 54 | + |
| 55 | + // arithmetic dependencies |
| 56 | + addDependencies, |
| 57 | + subtractDependencies, |
| 58 | + multiplyDependencies, |
| 59 | + divideDependencies, |
| 60 | + // statistics dependencies |
| 61 | + maxDependencies, |
| 62 | + minDependencies, |
| 63 | + // logical dependencies |
| 64 | + andDependencies, |
| 65 | + notDependencies, |
| 66 | + orDependencies, |
| 67 | + xorDependencies, |
| 68 | + // without that dependency 'null ?? 5', won't work |
| 69 | + nullishDependencies, |
| 70 | +}); |
| 71 | +const { compile } = math; |
| 72 | + |
| 73 | +// Disable potentially dangerous functions |
| 74 | +math.import({ |
| 75 | + // We disable evaluate to prevent users from calling it inside their expressions. |
| 76 | + // For example: defaultMemoryMbytes = "evaluate('2 + 2')" |
| 77 | + evaluate() { throw new Error('Function evaluate is disabled.'); }, |
| 78 | + compile() { throw new Error('Function compile is disabled.'); }, |
| 79 | + // We need to disable it, because compileDependencies imports parseDependencies. |
| 80 | + parse() { throw new Error('Function parse is disabled.'); }, |
| 81 | +}, { override: true }); |
| 82 | + |
| 83 | +/** |
| 84 | + * Safely retrieves a nested property from an object using a dot-notation string path. |
| 85 | + * |
| 86 | + * This is custom function designed to be injected into the math expression evaluator, |
| 87 | + * allowing expressions like `get(input, 'user.settings.memory', 512)` or `get(input, 'startUrls.length', 10)` to get array length. |
| 88 | + * |
| 89 | + * @param obj The source object to search within. |
| 90 | + * @param path A dot-separated string representing the nested path (e.g., "input.payload.size"). |
| 91 | + * @param defaultVal The value to return if the path is not found or the value is `null` or `undefined`. |
| 92 | + * @returns The retrieved value, or `defaultVal` if the path is unreachable. |
| 93 | +*/ |
| 94 | +const customGetFunc = (obj: any, path: string, defaultVal?: number) => { |
| 95 | + return (path.split('.').reduce((current, key) => current?.[key], obj)) ?? defaultVal; |
| 96 | +}; |
| 97 | + |
| 98 | +/** |
| 99 | + * Rounds a number to the closest power of 2. |
| 100 | + * The result is clamped to the allowed range (ACTOR_LIMITS.MIN_RUN_MEMORY_MBYTES - ACTOR_LIMITS.MAX_RUN_MEMORY_MBYTES). |
| 101 | + * @param num The number to round. |
| 102 | + * @returns The closest power of 2 within min/max range. |
| 103 | +*/ |
| 104 | +const roundToClosestPowerOf2 = (num: number): number => { |
| 105 | + if (typeof num !== 'number' || Number.isNaN(num) || !Number.isFinite(num)) { |
| 106 | + throw new Error(`Calculated memory value is not a valid number: ${num}.`); |
| 107 | + } |
| 108 | + |
| 109 | + // Handle 0 or negative values. |
| 110 | + if (num <= 0) { |
| 111 | + throw new Error(`Calculated memory value must be a positive number, greater than 0, got: ${num}.`); |
| 112 | + } |
| 113 | + |
| 114 | + const log2n = Math.log2(num); |
| 115 | + |
| 116 | + const roundedLog = Math.round(log2n); |
| 117 | + const result = 2 ** roundedLog; |
| 118 | + |
| 119 | + return Math.max(ACTOR_LIMITS.MIN_RUN_MEMORY_MBYTES, Math.min(result, ACTOR_LIMITS.MAX_RUN_MEMORY_MBYTES)); |
| 120 | +}; |
| 121 | + |
| 122 | +/** |
| 123 | + * Replaces all `{{variable}}` placeholders in an expression into direct |
| 124 | + * property access (e.g. `{{runOptions.memoryMbytes}}` → `runOptions.memoryMbytes`). |
| 125 | + * |
| 126 | + * All `input.*` values are accepted, while `runOptions.*` are validated (7 variables from ALLOWED_RUN_OPTION_KEYS). |
| 127 | + * |
| 128 | + * Note: While not really needed for Math.js, this approach allows developers |
| 129 | + * to use a consistent double-brace templating syntax `{{runOptions.timeoutSecs}}` |
| 130 | + * across the Apify platform. We also want to avoid compiling the expression with the |
| 131 | + * actual values as that would make caching less effective. |
| 132 | + * |
| 133 | + * @example |
| 134 | + * // Returns "runOptions.memoryMbytes + 1024" |
| 135 | + * preprocessDefaultMemoryExpression("{{runOptions.memoryMbytes}} + 1024"); |
| 136 | + * |
| 137 | + * @param defaultMemoryMbytes The raw string expression, e.g., "{{runOptions.memoryMbytes}} * 2". |
| 138 | + * @returns A safe, processed expression for evaluation, e.g., "runOptions.memoryMbytes * 2". |
| 139 | + */ |
| 140 | +const processTemplateVariables = (defaultMemoryMbytes: string): string => { |
| 141 | + const variableRegex = /{{\s*([a-zA-Z0-9_.]+)\s*}}/g; |
| 142 | + |
| 143 | + const processedExpression = defaultMemoryMbytes.replace( |
| 144 | + variableRegex, |
| 145 | + (_, variableName: string) => { |
| 146 | + // 1. Check if the variable is accessing input (e.g. {{input.someValue}}) |
| 147 | + // We do not validate the specific property name because `input` is dynamic. |
| 148 | + if (variableName.startsWith('input.')) { |
| 149 | + return variableName; |
| 150 | + } |
| 151 | + |
| 152 | + // 2. Check if the variable is accessing runOptions (e.g. {{runOptions.memoryMbytes}}) and validate the keys. |
| 153 | + if (variableName.startsWith('runOptions.')) { |
| 154 | + const key = variableName.slice('runOptions.'.length); |
| 155 | + if (!ALLOWED_RUN_OPTION_KEYS.has(key as keyof ActorRunOptions)) { |
| 156 | + throw new Error( |
| 157 | + `Invalid variable '{{${variableName}}}' in expression. Only the following runOptions are allowed: ${Array.from(ALLOWED_RUN_OPTION_KEYS).map((k) => `runOptions.${k}`).join(', ')}.`, |
| 158 | + ); |
| 159 | + } |
| 160 | + return variableName; |
| 161 | + } |
| 162 | + |
| 163 | + // 3. Throw error for unrecognized variables (e.g. {{someVariable}}) |
| 164 | + throw new Error( |
| 165 | + `Invalid variable '{{${variableName}}}' in expression.`, |
| 166 | + ); |
| 167 | + }, |
| 168 | + ); |
| 169 | + |
| 170 | + return processedExpression; |
| 171 | +}; |
| 172 | + |
| 173 | +/* |
| 174 | +* Retrieves a compiled expression from the cache or compiles it if not present. |
| 175 | +* |
| 176 | +* @param expression The expression string to compile. |
| 177 | +* @param cache An optional cache to store/retrieve compiled expressions. |
| 178 | +* @returns The compiled CompilationResult. |
| 179 | +*/ |
| 180 | +const getCompiledExpression = async (expression: string, cache: CompilationCache | undefined): Promise<CompilationResult> => { |
| 181 | + if (!cache) { |
| 182 | + return compile(expression); |
| 183 | + } |
| 184 | + |
| 185 | + let compiledExpression = await cache.get(expression); |
| 186 | + |
| 187 | + if (!compiledExpression) { |
| 188 | + compiledExpression = compile(expression); |
| 189 | + await cache.set(expression, compiledExpression!); |
| 190 | + } |
| 191 | + |
| 192 | + return compiledExpression; |
| 193 | +}; |
| 194 | + |
| 195 | +/** |
| 196 | + * Evaluates a dynamic memory expression string using the provided context. |
| 197 | + * Result is rounded to the closest power of 2 and clamped within allowed limits. |
| 198 | + * |
| 199 | + * @param defaultMemoryMbytes The string expression to evaluate (e.g., `get(input, 'urls.length', 10) * 1024` for `input = { urls: ['url1', 'url2'] }`). |
| 200 | + * @param context The `MemoryEvaluationContext` (containing `input` and `runOptions`) available to the expression. |
| 201 | + * @param options.cache Optional synchronous cache. Since compiled functions cannot be saved to a database/Redis, they are kept in local memory. |
| 202 | + * @returns The calculated memory value rounded to the closest power of 2 and clamped within allowed limits. |
| 203 | +*/ |
| 204 | +export const calculateRunDynamicMemory = async ( |
| 205 | + defaultMemoryMbytes: string, |
| 206 | + context: MemoryEvaluationContext, |
| 207 | + options: { cache: CompilationCache } | undefined = undefined, |
| 208 | +) => { |
| 209 | + if (defaultMemoryMbytes.length > DEFAULT_MEMORY_MBYTES_EXPRESSION_MAX_LENGTH) { |
| 210 | + throw new Error(`The defaultMemoryMbytes expression is too long. Max length is ${DEFAULT_MEMORY_MBYTES_EXPRESSION_MAX_LENGTH} characters.`); |
| 211 | + } |
| 212 | + |
| 213 | + // Replaces all occurrences of {{variable}} with variable |
| 214 | + // e.g., "{{runOptions.memoryMbytes}} + 1024" becomes "runOptions.memoryMbytes + 1024" |
| 215 | + const preprocessedExpression = processTemplateVariables(defaultMemoryMbytes); |
| 216 | + |
| 217 | + const preparedContext = { |
| 218 | + ...context, |
| 219 | + get: customGetFunc, |
| 220 | + }; |
| 221 | + |
| 222 | + const compiledExpression = await getCompiledExpression(preprocessedExpression, options?.cache); |
| 223 | + |
| 224 | + let finalResult: number | { entries: number[] } = compiledExpression.evaluate(preparedContext); |
| 225 | + |
| 226 | + // Mathjs wraps multi-line expressions in an object, so we need to extract the last entry. |
| 227 | + // Note: one-line expressions return a number directly. |
| 228 | + if (finalResult && typeof finalResult === 'object' && 'entries' in finalResult) { |
| 229 | + const { entries } = finalResult; |
| 230 | + finalResult = entries[entries.length - 1]; |
| 231 | + } |
| 232 | + |
| 233 | + return roundToClosestPowerOf2(finalResult); |
| 234 | +}; |
0 commit comments