diff --git a/benchmarks/000.microbenchmarks/040.server-reply/config.json b/benchmarks/000.microbenchmarks/040.server-reply/config.json index 93ce2f561..8ff6eec59 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/config.json +++ b/benchmarks/000.microbenchmarks/040.server-reply/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python"], "modules": [] } diff --git a/benchmarks/100.webapps/120.uploader/python/function_cloudflare.py b/benchmarks/100.webapps/120.uploader/python/function_cloudflare.py new file mode 100644 index 000000000..98372cf0f --- /dev/null +++ b/benchmarks/100.webapps/120.uploader/python/function_cloudflare.py @@ -0,0 +1,56 @@ + +import datetime +import os + +from pyodide.ffi import run_sync +from pyodide.http import pyfetch + +from . import storage +client = storage.storage.get_instance() + +SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" + +async def do_request(url, download_path): + headers = {'User-Agent': SEBS_USER_AGENT} + + res = await pyfetch(url, headers=headers) + bs = await res.bytes() + + with open(download_path, 'wb') as f: + f.write(bs) + +def handler(event): + + bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + url = event.get('object').get('url') + name = os.path.basename(url) + download_path = '/tmp/{}'.format(name) + + process_begin = datetime.datetime.now() + + run_sync(do_request(url, download_path)) + + size = os.path.getsize(download_path) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'url': url, + 'key': key_name + }, + 'measurement': { + 'download_time': 0, + 'download_size': 0, + 'upload_time': upload_time, + 'upload_size': size, + 'compute_time': process_time + } + } diff --git a/benchmarks/100.webapps/130.crud-api/nodejs/function.js b/benchmarks/100.webapps/130.crud-api/nodejs/function.js new file mode 100644 index 000000000..e1504598a --- /dev/null +++ b/benchmarks/100.webapps/130.crud-api/nodejs/function.js @@ -0,0 +1,78 @@ +const nosql = require('./nosql'); + +const nosqlClient = nosql.nosql.get_instance(); +const nosqlTableName = "shopping_cart"; + +async function addProduct(cartId, productId, productName, price, quantity) { + await nosqlClient.insert( + nosqlTableName, + ["cart_id", cartId], + ["product_id", productId], + { price: price, quantity: quantity, name: productName } + ); +} + +async function getProducts(cartId, productId) { + return await nosqlClient.get( + nosqlTableName, + ["cart_id", cartId], + ["product_id", productId] + ); +} + +async function queryProducts(cartId) { + const res = await nosqlClient.query( + nosqlTableName, + ["cart_id", cartId], + "product_id" + ); + + const products = []; + let priceSum = 0; + let quantitySum = 0; + + for (const product of res) { + products.push(product.name); + priceSum += product.price; + quantitySum += product.quantity; + } + + const avgPrice = quantitySum > 0 ? priceSum / quantitySum : 0.0; + + return { + products: products, + total_cost: priceSum, + avg_price: avgPrice + }; +} + +exports.handler = async function(event) { + const results = []; + + for (const request of event.requests) { + const route = request.route; + const body = request.body; + let res; + + if (route === "PUT /cart") { + await addProduct( + body.cart, + body.product_id, + body.name, + body.price, + body.quantity + ); + res = {}; + } else if (route === "GET /cart/{id}") { + res = await getProducts(body.cart, request.path.id); + } else if (route === "GET /cart") { + res = await queryProducts(body.cart); + } else { + throw new Error(`Unknown request route: ${route}`); + } + + results.push(res); + } + + return { result: results }; +}; diff --git a/benchmarks/100.webapps/130.crud-api/nodejs/package.json b/benchmarks/100.webapps/130.crud-api/nodejs/package.json new file mode 100644 index 000000000..e00c83ddf --- /dev/null +++ b/benchmarks/100.webapps/130.crud-api/nodejs/package.json @@ -0,0 +1,9 @@ +{ + "name": "crud-api", + "version": "1.0.0", + "description": "CRUD API benchmark", + "author": "", + "license": "", + "dependencies": { + } +} diff --git a/benchmarks/300.utilities/311.compression/nodejs/function.js b/benchmarks/300.utilities/311.compression/nodejs/function.js new file mode 100644 index 000000000..5f7cc04d4 --- /dev/null +++ b/benchmarks/300.utilities/311.compression/nodejs/function.js @@ -0,0 +1,147 @@ +const fs = require('fs'); +const path = require('path'); +const zlib = require('zlib'); +const { v4: uuidv4 } = require('uuid'); +const storage = require('./storage'); + +let storage_handler = new storage.storage(); + +/** + * Calculate total size of a directory recursively + * @param {string} directory - Path to directory + * @returns {number} Total size in bytes + */ +function parseDirectory(directory) { + let size = 0; + + function walkDir(dir) { + const files = fs.readdirSync(dir); + for (const file of files) { + const filepath = path.join(dir, file); + const stat = fs.statSync(filepath); + if (stat.isDirectory()) { + walkDir(filepath); + } else { + size += stat.size; + } + } + } + + walkDir(directory); + return size; +} + +/** + * Create a simple tar.gz archive from a directory using native zlib + * This creates a gzip-compressed tar archive without external dependencies + * @param {string} sourceDir - Directory to compress + * @param {string} outputPath - Path for the output archive file + * @returns {Promise} + */ +async function createTarGzArchive(sourceDir, outputPath) { + // Create a simple tar-like format (concatenated files with headers) + const files = []; + + function collectFiles(dir, baseDir = '') { + const entries = fs.readdirSync(dir); + for (const entry of entries) { + const fullPath = path.join(dir, entry); + const relativePath = path.join(baseDir, entry); + const stat = fs.statSync(fullPath); + + if (stat.isDirectory()) { + collectFiles(fullPath, relativePath); + } else { + files.push({ + path: relativePath, + fullPath: fullPath, + size: stat.size + }); + } + } + } + + collectFiles(sourceDir); + + // Create a concatenated buffer of all files with simple headers + const chunks = []; + for (const file of files) { + const content = fs.readFileSync(file.fullPath); + // Simple header: filename length (4 bytes) + filename + content length (4 bytes) + content + const pathBuffer = Buffer.from(file.path); + const pathLengthBuffer = Buffer.allocUnsafe(4); + pathLengthBuffer.writeUInt32BE(pathBuffer.length, 0); + const contentLengthBuffer = Buffer.allocUnsafe(4); + contentLengthBuffer.writeUInt32BE(content.length, 0); + + chunks.push(pathLengthBuffer); + chunks.push(pathBuffer); + chunks.push(contentLengthBuffer); + chunks.push(content); + } + + const combined = Buffer.concat(chunks); + + // Compress using gzip + const compressed = zlib.gzipSync(combined, { level: 9 }); + fs.writeFileSync(outputPath, compressed); +} + +exports.handler = async function(event) { + const bucket = event.bucket.bucket; + const input_prefix = event.bucket.input; + const output_prefix = event.bucket.output; + const key = event.object.key; + + // Create unique download path + const download_path = path.join('/tmp', `${key}-${uuidv4()}`); + fs.mkdirSync(download_path, { recursive: true }); + + // Download directory from storage + const s3_download_begin = Date.now(); + await storage_handler.download_directory(bucket, path.join(input_prefix, key), download_path); + const s3_download_stop = Date.now(); + + // Calculate size of downloaded files + const size = parseDirectory(download_path); + + // Compress directory + const compress_begin = Date.now(); + const archive_name = `${key}.tar.gz`; + const archive_path = path.join(download_path, archive_name); + await createTarGzArchive(download_path, archive_path); + const compress_end = Date.now(); + + // Get archive size + const archive_size = fs.statSync(archive_path).size; + + // Upload compressed archive + const s3_upload_begin = Date.now(); + const [key_name, uploadPromise] = storage_handler.upload( + bucket, + path.join(output_prefix, archive_name), + archive_path + ); + await uploadPromise; + const s3_upload_stop = Date.now(); + + // Calculate times in microseconds + const download_time = (s3_download_stop - s3_download_begin) * 1000; + const upload_time = (s3_upload_stop - s3_upload_begin) * 1000; + const process_time = (compress_end - compress_begin) * 1000; + + return { + result: { + bucket: bucket, + key: key_name + }, + measurement: { + download_time: download_time, + download_size: size, + upload_time: upload_time, + upload_size: archive_size, + compute_time: process_time + } + }; +}; + diff --git a/benchmarks/300.utilities/311.compression/nodejs/package.json b/benchmarks/300.utilities/311.compression/nodejs/package.json new file mode 100644 index 000000000..56827265a --- /dev/null +++ b/benchmarks/300.utilities/311.compression/nodejs/package.json @@ -0,0 +1,9 @@ +{ + "name": "compression-benchmark", + "version": "1.0.0", + "description": "Compression benchmark for serverless platforms", + "main": "function.js", + "dependencies": { + "uuid": "^10.0.0" + } +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/build.js b/benchmarks/wrappers/cloudflare/nodejs/build.js new file mode 100644 index 000000000..834ec5c16 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/build.js @@ -0,0 +1,173 @@ +const { build } = require('esbuild'); +const fs = require('fs'); +const { join, extname, dirname, relative } = require('path'); + +function getAllFiles(dir, fileList = []) { + const files = fs.readdirSync(dir, { withFileTypes: true }); + for (const file of files) { + const filePath = join(dir, file.name); + if (file.isDirectory()) { + if (file.name !== 'node_modules' && + file.name !== 'test' && + file.name !== 'tests' && + file.name !== '__tests__' && + file.name !== 'dist' && + !file.name.startsWith('.')) { + getAllFiles(filePath, fileList); + } + } else { + if (!file.name.includes('.test.') && + !file.name.includes('.spec.') && + file.name !== 'build.js' && + file.name !== 'wrangler.toml') { + fileList.push(filePath); + } + } + } + return fileList; +} + +function copyFile(src, dest) { + const destDir = dirname(dest); + if (!fs.existsSync(destDir)) { + fs.mkdirSync(destDir, { recursive: true }); + } + fs.copyFileSync(src, dest); +} + +const nodeBuiltinsPlugin = { + name: 'node-builtins-external', + setup(build) { + const { resolve } = require('path'); + + // Keep node: prefixed modules external + build.onResolve({ filter: /^(node:|cloudflare:)/ }, (args) => { + return { path: args.path, external: true }; + }); + + // Map bare node built-in names to node: versions and keep external + build.onResolve({ filter: /^(fs|querystring|path|crypto|stream|buffer|util|events|http|https|net|tls|zlib|os|child_process|tty|assert|url)$/ }, (args) => { + return { path: 'node:' + args.path, external: true }; + }); + + // Polyfill 'request' module with fetch-based implementation + build.onResolve({ filter: /^request$/ }, (args) => { + // Get the directory where build.js is located (wrapper directory) + const wrapperDir = __dirname; + return { + path: resolve(wrapperDir, 'request-polyfill.js') + }; + }); + } +}; + + +async function customBuild() { + const srcDir = './'; + const outDir = './dist'; + + if (fs.existsSync(outDir)) { + fs.rmSync(outDir, { recursive: true }); + } + fs.mkdirSync(outDir, { recursive: true }); + + try { + const files = getAllFiles(srcDir); + + const jsFiles = files.filter(f => + ['.js', '.ts', '.jsx', '.tsx'].includes(extname(f)) + ); + + const otherFiles = files.filter(f => + !['.js', '.ts', '.jsx', '.tsx'].includes(extname(f)) + ); + + console.log('Building JS files:', jsFiles); + + if (jsFiles.length > 0) { + await build({ + entryPoints: jsFiles, + bundle: true, + format: 'esm', + outdir: outDir, + outbase: srcDir, + platform: 'neutral', + target: 'es2020', + sourcemap: true, + allowOverwrite: true, + plugins: [nodeBuiltinsPlugin], + define: { + 'process.env.NODE_ENV': '"production"', + 'global': 'globalThis', + '__dirname': '"/bundle"' + }, + mainFields: ['module', 'main'], + treeShaking: true, + }); + + // POST-PROCESS: Replace dynamic requires with static imports + console.log('Post-processing to fix node: module imports...'); + + for (const jsFile of jsFiles) { + const outPath = join(outDir, relative(srcDir, jsFile)); + + if (fs.existsSync(outPath)) { + let content = fs.readFileSync(outPath, 'utf-8'); + + // Find all node: modules being dynamically required + const nodeModules = new Set(); + const requireRegex = /__require\d*\("(node:[^"]+)"\)/g; + let match; + while ((match = requireRegex.exec(content)) !== null) { + nodeModules.add(match[1]); + } + + if (nodeModules.size > 0) { + // Generate static imports at the top + let imports = ''; + const mapping = {}; + let i = 0; + for (const mod of nodeModules) { + const varName = `__node_${mod.replace('node:', '').replace(/[^a-z0-9]/gi, '_')}_${i++}`; + imports += `import * as ${varName} from '${mod}';\n`; + mapping[mod] = varName; + } + + // Add cache object + imports += '\nconst __node_cache = {\n'; + for (const [mod, varName] of Object.entries(mapping)) { + imports += ` '${mod}': ${varName},\n`; + } + imports += '};\n\n'; + + // Replace all __require calls with cache lookups + content = content.replace(/__require(\d*)\("(node:[^"]+)"\)/g, (match, num, mod) => { + return `__node_cache['${mod}']`; + }); + + // Prepend imports to the file + content = imports + content; + + fs.writeFileSync(outPath, content, 'utf-8'); + console.log(`✓ Fixed ${nodeModules.size} node: imports in ${relative(srcDir, jsFile)}`); + } + } + } + } + + // Copy non-JS files (templates, etc.) + for (const file of otherFiles) { + const relativePath = relative(srcDir, file); + const destPath = join(outDir, relativePath); + copyFile(file, destPath); + console.log(`Copied: ${relativePath}`); + } + + console.log('✓ Build completed successfully'); + } catch (error) { + console.error('Build failed:', error); + process.exit(1); + } +} + +customBuild(); \ No newline at end of file diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/handler.js b/benchmarks/wrappers/cloudflare/nodejs/container/handler.js new file mode 100644 index 000000000..9b8b25e19 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/handler.js @@ -0,0 +1,194 @@ +// Container handler for Cloudflare Workers - Node.js +// This handler is used when deploying as a container worker + +const http = require('http'); + +// Monkey-patch the 'request' library to always include a User-Agent header +// This is needed because Wikimedia (and other sites) require a User-Agent +try { + const Module = require('module'); + const originalRequire = Module.prototype.require; + + Module.prototype.require = function(id) { + const module = originalRequire.apply(this, arguments); + + if (id === 'request') { + // Wrap the request function to inject default headers + const originalRequest = module; + const wrappedRequest = function(options, callback) { + if (typeof options === 'string') { + options = { uri: options }; + } + if (!options.headers) { + options.headers = {}; + } + if (!options.headers['User-Agent'] && !options.headers['user-agent']) { + options.headers['User-Agent'] = 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2'; + } + return originalRequest(options, callback); + }; + // Copy all properties from original request + Object.keys(originalRequest).forEach(key => { + wrappedRequest[key] = originalRequest[key]; + }); + return wrappedRequest; + } + + return module; + }; +} catch (e) { + console.error('Failed to patch request module:', e); +} + +// Import the benchmark function +const { handler: benchmarkHandler } = require('./function'); + +// Import storage and nosql if they exist +let storage, nosql; +try { + storage = require('./storage'); +} catch (e) { + console.log('Storage module not available'); +} +try { + nosql = require('./nosql'); +} catch (e) { + console.log('NoSQL module not available'); +} + +const PORT = process.env.PORT || 8080; + +const server = http.createServer(async (req, res) => { + // Handle favicon requests + if (req.url.includes('favicon')) { + res.writeHead(200); + res.end('None'); + return; + } + + try { + // Get unique request ID from Cloudflare (CF-Ray header) + const crypto = require('crypto'); + const reqId = req.headers['cf-ray'] || crypto.randomUUID(); + + // Extract Worker URL from header for R2 and NoSQL proxy + const workerUrl = req.headers['x-worker-url']; + if (workerUrl) { + if (storage && storage.storage && storage.storage.set_worker_url) { + storage.storage.set_worker_url(workerUrl); + } + if (nosql && nosql.nosql && nosql.nosql.set_worker_url) { + nosql.nosql.set_worker_url(workerUrl); + } + console.log(`Set worker URL for R2/NoSQL proxy: ${workerUrl}`); + } + + // Start timing measurements + const begin = Date.now() / 1000; + const start = performance.now(); + + // Read request body + let body = ''; + for await (const chunk of req) { + body += chunk; + } + + // Parse event from JSON body or URL params + let event = {}; + if (body && body.length > 0) { + try { + event = JSON.parse(body); + } catch (e) { + console.error('Failed to parse JSON body:', e); + } + } + + // Parse URL parameters + const url = new URL(req.url, `http://${req.headers.host}`); + for (const [key, value] of url.searchParams) { + if (!event[key]) { + const intValue = parseInt(value); + event[key] = isNaN(intValue) ? value : intValue; + } + } + + // Add request metadata + const incomeTimestamp = Math.floor(Date.now() / 1000); + event['request-id'] = reqId; + event['income-timestamp'] = incomeTimestamp; + + // For debugging: check /tmp directory before and after benchmark + const fs = require('fs'); + + // Call the benchmark function + const ret = await benchmarkHandler(event); + + // Check what was downloaded + const tmpFiles = fs.readdirSync('/tmp'); + for (const file of tmpFiles) { + const filePath = `/tmp/${file}`; + const stats = fs.statSync(filePath); + if (stats.size < 500) { + const content = fs.readFileSync(filePath, 'utf8'); + } + } + + // Calculate elapsed time + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Build log_data similar to native handler + const log_data = { output: ret && ret.result !== undefined ? ret.result : ret }; + if (ret && ret.measurement !== undefined) { + log_data.measurement = ret.measurement; + } else { + log_data.measurement = {}; + } + + // Add memory usage to measurement + const memUsage = process.memoryUsage(); + const memory_mb = memUsage.heapUsed / 1024 / 1024; + log_data.measurement.memory_used_mb = memory_mb; + + console.log('Sending response with log_data:', log_data); + + // Send response matching Python handler format exactly + if (event.html) { + res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }); + res.end(String(ret && ret.result !== undefined ? ret.result : ret)); + } else { + const responseBody = JSON.stringify({ + begin: begin, + end: end, + results_time: 0, + result: log_data, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: reqId, + }); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(responseBody); + } + + } catch (error) { + console.error('Error processing request:', error); + console.error('Stack trace:', error.stack); + + const errorPayload = JSON.stringify({ + error: error.message, + stack: error.stack + }); + + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(errorPayload); + } +}); + +// Ensure server is listening before handling requests +server.listen(PORT, '0.0.0.0', () => { + console.log(`Container server listening on 0.0.0.0:${PORT}`); + console.log('Server ready to accept connections'); +}); diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js b/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js new file mode 100644 index 000000000..3469bf6b9 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/nosql.js @@ -0,0 +1,118 @@ +/** + * NoSQL module for Cloudflare Node.js Containers + * Uses HTTP proxy to access Durable Objects through the Worker's binding + */ + +class nosql { + constructor() { + // Container accesses Durable Objects through worker.js proxy + } + + static worker_url = null; // Set by handler from X-Worker-URL header + + static init_instance(entry) { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } + + static set_worker_url(url) { + nosql.worker_url = url; + } + + async _make_request(operation, params) { + if (!nosql.worker_url) { + throw new Error('Worker URL not set - cannot access NoSQL'); + } + + const url = `${nosql.worker_url}/nosql/${operation}`; + const data = JSON.stringify(params); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: data, + }); + + if (!response.ok) { + let errorMsg; + try { + const errorData = await response.json(); + errorMsg = errorData.error || await response.text(); + } catch { + errorMsg = await response.text(); + } + throw new Error(`NoSQL operation failed: ${errorMsg}`); + } + + return await response.json(); + } catch (error) { + throw new Error(`NoSQL operation failed: ${error.message}`); + } + } + + async insert(tableName, primaryKey, secondaryKey, data) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + data: data, + }; + return this._make_request('insert', params); + } + + async get(tableName, primaryKey, secondaryKey) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + }; + const result = await this._make_request('get', params); + return result.data || null; + } + + async update(tableName, primaryKey, secondaryKey, updates) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + data: updates, + }; + return this._make_request('update', params); + } + + async query(tableName, primaryKey, secondaryKeyName) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key_name: secondaryKeyName, + }; + const result = await this._make_request('query', params); + console.error(`[nosql.query] result:`, JSON.stringify(result)); + console.error(`[nosql.query] result.items:`, result.items); + console.error(`[nosql.query] Array.isArray(result.items):`, Array.isArray(result.items)); + const items = result.items || []; + console.error(`[nosql.query] returning items:`, items); + return items; + } + + async delete(tableName, primaryKey, secondaryKey) { + const params = { + table_name: tableName, + primary_key: primaryKey, + secondary_key: secondaryKey, + }; + return this._make_request('delete', params); + } + + static get_instance() { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } +} + +module.exports.nosql = nosql; diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/package.json b/benchmarks/wrappers/cloudflare/nodejs/container/package.json new file mode 100644 index 000000000..729c56fdc --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/package.json @@ -0,0 +1,10 @@ +{ + "name": "cloudflare-container-worker", + "version": "1.0.0", + "description": "Cloudflare Container Worker wrapper", + "main": "worker.js", + "type": "module", + "dependencies": { + "@cloudflare/containers": "^1.0.0" + } +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/storage.js b/benchmarks/wrappers/cloudflare/nodejs/container/storage.js new file mode 100644 index 000000000..f05d2fb14 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/storage.js @@ -0,0 +1,287 @@ +const fs = require('fs'); +const path = require('path'); +const uuid = require('uuid'); + +/** + * Storage module for Cloudflare Node.js Containers + * Uses HTTP proxy to access R2 storage through the Worker's R2 binding + */ + +class storage { + constructor() { + this.r2_enabled = true; + } + + static worker_url = null; // Set by handler from X-Worker-URL header + + + static worker_url = null; // Set by handler from X-Worker-URL header + + static unique_name(name) { + const parsed = path.parse(name); + const uuid_name = uuid.v4().split('-')[0]; + return path.join(parsed.dir, `${parsed.name}.${uuid_name}${parsed.ext}`); + } + + static init_instance(entry) { + if (!storage.instance) { + storage.instance = new storage(); + } + return storage.instance; + } + + static set_worker_url(url) { + storage.worker_url = url; + } + + static get_instance() { + if (!storage.instance) { + storage.init_instance(); + } + return storage.instance; + } + + async upload_stream(bucket, key, data) { + if (!this.r2_enabled) { + console.log('Warning: R2 not configured, skipping upload'); + return key; + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + const unique_key = storage.unique_name(key); + + // Convert data to Buffer if needed + let buffer; + if (Buffer.isBuffer(data)) { + buffer = data; + } else if (typeof data === 'string') { + buffer = Buffer.from(data, 'utf-8'); + } else if (data instanceof ArrayBuffer) { + buffer = Buffer.from(data); + } else { + buffer = Buffer.from(String(data), 'utf-8'); + } + + // Upload via worker proxy + const params = new URLSearchParams({ bucket, key: unique_key }); + const url = `${storage.worker_url}/r2/upload?${params}`; + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/octet-stream' }, + body: buffer, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + const result = await response.json(); + return result.key; + } catch (error) { + console.error('R2 upload error:', error); + throw new Error(`Failed to upload to R2: ${error.message}`); + } + } + + async download_stream(bucket, key) { + if (!this.r2_enabled) { + throw new Error('R2 not configured'); + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + // Download via worker proxy + const params = new URLSearchParams({ bucket, key }); + const url = `${storage.worker_url}/r2/download?${params}`; + + try { + const response = await fetch(url); + + if (response.status === 404) { + throw new Error(`Object not found: ${key}`); + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + const arrayBuffer = await response.arrayBuffer(); + return Buffer.from(arrayBuffer); + } catch (error) { + console.error('R2 download error:', error); + throw new Error(`Failed to download from R2: ${error.message}`); + } + } + + upload(bucket, key, filepath) { + // Generate unique key synchronously so it can be returned immediately + const unique_key = storage.unique_name(key); + + // Read file from disk and upload + if (fs.existsSync(filepath)) { + const data = fs.readFileSync(filepath); + // Call internal version that doesn't generate another unique key + const uploadPromise = this._upload_stream_with_key(bucket, unique_key, data); + return [unique_key, uploadPromise]; + } + + console.error(`!!! [storage.upload] File not found: ${filepath}`); + throw new Error(`upload(): file not found: ${filepath}`); + } + + async _upload_stream_with_key(bucket, key, data) { + // Internal method that uploads with exact key (no unique naming) + console.log(`[storage._upload_stream_with_key] Starting upload: bucket=${bucket}, key=${key}, data_size=${data.length}`); + + if (!this.r2_enabled) { + console.log('Warning: R2 not configured, skipping upload'); + return key; + } + + if (!storage.worker_url) { + console.error('[storage._upload_stream_with_key] Worker URL not set!'); + throw new Error('Worker URL not set - cannot access R2'); + } + + console.log(`[storage._upload_stream_with_key] Worker URL: ${storage.worker_url}`); + + // Convert data to Buffer if needed + let buffer; + if (Buffer.isBuffer(data)) { + buffer = data; + } else if (typeof data === 'string') { + buffer = Buffer.from(data, 'utf-8'); + } else if (data instanceof ArrayBuffer) { + buffer = Buffer.from(data); + } else { + buffer = Buffer.from(String(data), 'utf-8'); + } + + // Upload via worker proxy + const params = new URLSearchParams({ bucket, key }); + const url = `${storage.worker_url}/r2/upload?${params}`; + console.log(`[storage._upload_stream_with_key] Uploading to URL: ${url}, buffer size: ${buffer.length}`); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/octet-stream' }, + body: buffer, + }); + + console.log(`[storage._upload_stream_with_key] Response status: ${response.status}`); + + if (!response.ok) { + const errorText = await response.text(); + console.error(`[storage._upload_stream_with_key] Upload failed: ${response.status} - ${errorText}`); + throw new Error(`HTTP ${response.status}: ${errorText}`); + } + + const result = await response.json(); + console.log(`[storage._upload_stream_with_key] Upload successful, returned key: ${result.key}`); + return result.key; + } catch (error) { + console.error('R2 upload error:', error); + throw new Error(`Failed to upload to R2: ${error.message}`); + } + } + + async download(bucket, key, filepath) { + const data = await this.download_stream(bucket, key); + + let real_fp = filepath; + if (!filepath.startsWith('/tmp')) { + real_fp = path.join('/tmp', path.resolve(filepath)); + } + + // Write data to file + fs.mkdirSync(path.dirname(real_fp), { recursive: true }); + fs.writeFileSync(real_fp, data); + } + + async download_directory(bucket, prefix, out_path) { + // List all objects with the prefix and download each one + if (!this.r2_enabled) { + console.log('Warning: R2 not configured, skipping download_directory'); + return; + } + + if (!storage.worker_url) { + throw new Error('Worker URL not set - cannot access R2'); + } + + // List objects via worker proxy + const listParams = new URLSearchParams({ bucket, prefix }); + const listUrl = `${storage.worker_url}/r2/list?${listParams}`; + + try { + const response = await fetch(listUrl, { + method: 'GET', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP ${response.status}: ${errorText}`); + } + + const result = await response.json(); + const objects = result.objects || []; + + for (const obj of objects) { + const file_name = obj.key; + const path_to_file = path.dirname(file_name); + fs.mkdirSync(path.join(out_path, path_to_file), { recursive: true }); + await this.download(bucket, file_name, path.join(out_path, file_name)); + } + } catch (error) { + console.error('R2 download_directory error:', error); + throw new Error(`Failed to download directory from R2: ${error.message}`); + } + } + + uploadStream(bucket, key) { + // Return [stream, promise, unique_key] to match native wrapper API + const unique_key = storage.unique_name(key); + + const stream = require('stream'); + const passThrough = new stream.PassThrough(); + const chunks = []; + + passThrough.on('data', (chunk) => chunks.push(chunk)); + + const upload = new Promise((resolve, reject) => { + passThrough.on('end', async () => { + try { + const buffer = Buffer.concat(chunks); + await this._upload_stream_with_key(bucket, unique_key, buffer); + resolve(); + } catch (err) { + reject(err); + } + }); + passThrough.on('error', reject); + }); + + return [passThrough, upload, unique_key]; + } + + async downloadStream(bucket, key) { + // Return a Promise that resolves to a readable stream + const data = await this.download_stream(bucket, key); + const stream = require('stream'); + const readable = new stream.Readable(); + readable.push(data); + readable.push(null); // Signal end of stream + return readable; + } +} + +module.exports.storage = storage; diff --git a/benchmarks/wrappers/cloudflare/nodejs/container/worker.js b/benchmarks/wrappers/cloudflare/nodejs/container/worker.js new file mode 100644 index 000000000..8dee914a0 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/container/worker.js @@ -0,0 +1,362 @@ +import { Container, getContainer } from "@cloudflare/containers"; +import { DurableObject } from "cloudflare:workers"; + +// Container wrapper class +export class ContainerWorker extends Container { + defaultPort = 8080; + sleepAfter = "30m"; +} + +// Durable Object for NoSQL storage (simple proxy to ctx.storage) +export class KVApiObject extends DurableObject { + constructor(ctx, env) { + super(ctx, env); + } + + async insert(key, value) { + await this.ctx.storage.put(key.join(':'), value); + return { success: true }; + } + + async update(key, value) { + await this.ctx.storage.put(key.join(':'), value); + return { success: true }; + } + + async get(key) { + const value = await this.ctx.storage.get(key.join(':')); + return { data: value || null }; + } + + async query(keyPrefix) { + const list = await this.ctx.storage.list(); + const items = []; + for (const [k, v] of list) { + items.push(v); + } + return { items }; + } + + async delete(key) { + await this.ctx.storage.delete(key.join(':')); + return { success: true }; + } +} + +export default { + async fetch(request, env) { + const url = new URL(request.url); + + // Health check endpoint + if (url.pathname === '/health' || url.pathname === '/_health') { + try { + const containerId = 'default'; + const id = env.CONTAINER_WORKER.idFromName(containerId); + const stub = env.CONTAINER_WORKER.get(id); + + // Make a simple GET request to the root path to verify container is responsive + const healthRequest = new Request('http://localhost/', { + method: 'GET', + headers: { + 'X-Health-Check': 'true' + } + }); + + const response = await stub.fetch(healthRequest); + + // Container is ready if it responds (even with an error from the benchmark handler) + // A 500 from the handler means the container is running, just not a valid benchmark request + if (response.status >= 200 && response.status < 600) { + return new Response('OK', { status: 200 }); + } else { + return new Response(JSON.stringify({ + error: 'Container not responding', + status: response.status + }), { + status: 503, + headers: { 'Content-Type': 'application/json' } + }); + } + + } catch (error) { + return new Response(JSON.stringify({ + error: 'Container failed to start', + details: error.message, + stack: error.stack + }), { + status: 503, + headers: { 'Content-Type': 'application/json' } + }); + } + } + + try { + // Handle NoSQL proxy requests - intercept BEFORE forwarding to container + if (url.pathname.startsWith('/nosql/')) { + return await handleNoSQLRequest(request, env); + } + + // Handle R2 proxy requests - intercept BEFORE forwarding to container + if (url.pathname.startsWith('/r2/')) { + return await handleR2Request(request, env); + } + + // Get or create container instance + const containerId = request.headers.get('x-container-id') || 'default'; + const id = env.CONTAINER_WORKER.idFromName(containerId); + const stub = env.CONTAINER_WORKER.get(id); + + // Clone request and add Worker URL as header so container knows where to proxy R2 requests + const modifiedRequest = new Request(request); + modifiedRequest.headers.set('X-Worker-URL', url.origin); + + // Forward the request to the container + return await stub.fetch(modifiedRequest); + + } catch (error) { + console.error('Worker error:', error); + + const errorMessage = error.message || String(error); + + // Handle container not ready errors with 503 + if (errorMessage.includes('Container failed to start') || + errorMessage.includes('no container instance') || + errorMessage.includes('Durable Object') || + errorMessage.includes('provisioning')) { + + return new Response(JSON.stringify({ + error: 'Container failed to start', + details: 'there is no container instance that can be provided to this durable object', + message: errorMessage + }), { + status: 503, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Other errors get 500 + return new Response(JSON.stringify({ + error: 'Internal server error', + details: errorMessage, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + } +}; + +/** + * Handle NoSQL (Durable Object) requests proxied from the container + * Routes: + * - POST /nosql/insert - insert item + * - POST /nosql/update - update item + * - POST /nosql/get - get item + * - POST /nosql/query - query items + * - POST /nosql/delete - delete item + */ +async function handleNoSQLRequest(request, env) { + try { + const url = new URL(request.url); + const operation = url.pathname.split('/').pop(); + + // Parse request body + const params = await request.json(); + const { table_name, primary_key, secondary_key, secondary_key_name, data } = params; + + // Get Durable Object stub - table_name should match the DO class name + if (!env[table_name]) { + return new Response(JSON.stringify({ + error: `Durable Object binding '${table_name}' not found` + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Create DO ID from primary key + const doId = env[table_name].idFromName(primary_key.join(':')); + const doStub = env[table_name].get(doId); + + // Forward operation to Durable Object + let result; + switch (operation) { + case 'insert': + result = await doStub.insert(secondary_key, data); + break; + case 'update': + result = await doStub.update(secondary_key, data); + break; + case 'get': + result = await doStub.get(secondary_key); + break; + case 'query': + result = await doStub.query(secondary_key_name); + break; + case 'delete': + result = await doStub.delete(secondary_key); + break; + default: + return new Response(JSON.stringify({ + error: 'Unknown NoSQL operation' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + return new Response(JSON.stringify(result || {}), { + headers: { 'Content-Type': 'application/json' } + }); + + } catch (error) { + console.error('NoSQL proxy error:', error); + return new Response(JSON.stringify({ + error: error.message, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } +} + +/** + * Handle R2 storage requests proxied from the container + * Routes: + * - GET /r2/download?bucket=X&key=Y - download object + * - POST /r2/upload?bucket=X&key=Y - upload object (body contains data) + */ +async function handleR2Request(request, env) { + try { + const url = new URL(request.url); + const bucket = url.searchParams.get('bucket'); + const key = url.searchParams.get('key'); + + // Check if R2 binding exists + if (!env.R2) { + return new Response(JSON.stringify({ + error: 'R2 binding not configured' + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + + if (url.pathname === '/r2/list') { + // List objects in R2 with a prefix (only needs bucket) + if (!bucket) { + return new Response(JSON.stringify({ + error: 'Missing bucket parameter' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + try { + const prefix = url.searchParams.get('prefix') || ''; + const list_res = await env.R2.list({ prefix }); + + return new Response(JSON.stringify({ + objects: list_res.objects || [] + }), { + headers: { 'Content-Type': 'application/json' } + }); + } catch (error) { + console.error('[worker.js /r2/list] Error:', error); + return new Response(JSON.stringify({ + error: error.message + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } + } + + // All other R2 operations require both bucket and key + if (!bucket || !key) { + return new Response(JSON.stringify({ + error: 'Missing bucket or key parameter' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + if (url.pathname === '/r2/download') { + // Download from R2 + const object = await env.R2.get(key); + + if (!object) { + return new Response(JSON.stringify({ + error: 'Object not found' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Return the object data + return new Response(object.body, { + headers: { + 'Content-Type': object.httpMetadata?.contentType || 'application/octet-stream', + 'Content-Length': object.size.toString() + } + }); + + } else if (url.pathname === '/r2/upload') { + // Upload to R2 + console.log(`[worker.js /r2/upload] bucket=${bucket}, key=${key}`); + console.log(`[worker.js /r2/upload] env.R2 exists:`, !!env.R2); + const data = await request.arrayBuffer(); + console.log(`[worker.js /r2/upload] Received ${data.byteLength} bytes`); + + // Use the key as-is (container already generates unique keys if needed) + try { + const putResult = await env.R2.put(key, data); + console.log(`[worker.js /r2/upload] R2.put() returned:`, putResult); + console.log(`[worker.js /r2/upload] Successfully uploaded to R2 with key=${key}`); + } catch (error) { + console.error(`[worker.js /r2/upload] R2.put() error:`, error); + throw error; + } + + return new Response(JSON.stringify({ + key: key + }), { + headers: { 'Content-Type': 'application/json' } + }); + + } else { + return new Response(JSON.stringify({ + error: 'Unknown R2 operation' + }), { + status: 404, + headers: { 'Content-Type': 'application/json' } + }); + } + + } catch (error) { + console.error('R2 proxy error:', error); + return new Response(JSON.stringify({ + error: error.message, + stack: error.stack + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } +} + +/** + * Generate unique key for uploaded files + */ +function generateUniqueKey(key) { + const parts = key.split('.'); + const ext = parts.length > 1 ? '.' + parts.pop() : ''; + const name = parts.join('.'); + const uuid = crypto.randomUUID().split('-')[0]; + return `${name}.${uuid}${ext}`; +} diff --git a/benchmarks/wrappers/cloudflare/nodejs/handler.js b/benchmarks/wrappers/cloudflare/nodejs/handler.js new file mode 100644 index 000000000..df0cee97b --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/handler.js @@ -0,0 +1,284 @@ +import { DurableObject } from "cloudflare:workers"; + +// Durable Object class for KV API compatibility +export class KVApiObject extends DurableObject { + constructor(state, env) { + super(state, env); + this.storage = state.storage; + } + + // Proxy methods to make the storage API accessible from the stub + async put(key, value) { + return await this.storage.put(key, value); + } + + async get(key) { + return await this.storage.get(key); + } + + async delete(key) { + return await this.storage.delete(key); + } + + async list(options) { + return await this.storage.list(options); + } +} + +export default { + async fetch(request, env) { + try { + // Store R2 bucket binding and benchmark name in globals for fs-polyfill access + if (env.R2) { + globalThis.R2_BUCKET = env.R2; + } + if (env.BENCHMARK_NAME) { + globalThis.BENCHMARK_NAME = env.BENCHMARK_NAME; + } + + if (request.url.includes('favicon')) { + return new Response('None'); + } + + // Get unique request ID from Cloudflare (CF-Ray header) + const req_id = request.headers.get('CF-Ray') || crypto.randomUUID(); + + // Start timing measurements + const start = performance.now(); + const begin = Date.now() / 1000; + + + // Parse JSON body first (similar to Azure handler which uses req.body) + const req_text = await request.text(); + let event = {}; + if (req_text && req_text.length > 0) { + try { + event = JSON.parse(req_text); + } catch (e) { + // If body isn't JSON, keep event empty + event = {}; + } + } + + // Parse query string into event (URL parameters override/merge with body) + // This makes it compatible with both input formats + const urlParts = request.url.split('?'); + if (urlParts.length > 1) { + const query = urlParts[1]; + const pairs = query.split('&'); + for (const p of pairs) { + const [k, v] = p.split('='); + try { + if (v === undefined) { + event[k] = null; + } else if (!Number.isNaN(Number(v)) && Number.isFinite(Number(v))) { + // mirror Python attempt to convert to int + const n = Number(v); + event[k] = Number.isInteger(n) ? parseInt(v, 10) : n; + } else { + event[k] = decodeURIComponent(v); + } + } catch (e) { + event[k] = v; + } + } + } + + // Set timestamps + const income_timestamp = Math.floor(Date.now() / 1000); + event['request-id'] = req_id; + event['income-timestamp'] = income_timestamp; + + // Load the benchmark function module and initialize storage if available + // With nodejs_compat enabled, we can use require() for CommonJS modules + let funcModule; + try { + // Fallback to dynamic import for ES modules + funcModule = await import('./function.js'); + } catch (e2) { + throw new Error('Failed to import benchmark function module: ' + e2.message); + } + + // Initialize storage - try function module first, then fall back to wrapper storage + try { + if (funcModule && funcModule.storage && typeof funcModule.storage.init_instance === 'function') { + funcModule.storage.init_instance({ env, request }); + } else { + // Function doesn't export storage, so initialize wrapper storage directly + try { + const storageModule = await import('./storage.js'); + if (storageModule && storageModule.storage && typeof storageModule.storage.init_instance === 'function') { + storageModule.storage.init_instance({ env, request }); + } + } catch (storageErr) { + // Ignore errors from storage initialization + } + } + } catch (e) { + // don't fail the request if storage init isn't available + } + + // Initialize nosql if environment variable is set + if (env.NOSQL_STORAGE_DATABASE) { + try { + const nosqlModule = await import('./nosql.js'); + if (nosqlModule && nosqlModule.nosql && typeof nosqlModule.nosql.init_instance === 'function') { + nosqlModule.nosql.init_instance({ env, request }); + } + } catch (e) { + // nosql module might not exist for all benchmarks + console.log('Could not initialize nosql:', e.message); + } + } + + // Execute the benchmark handler + let ret; + try { + // Wrap the handler execution to handle sync-style async code + // The benchmark code calls async nosql methods but doesn't await them + // We need to serialize the execution + if (funcModule && typeof funcModule.handler === 'function') { + // Create a promise-aware execution context + const handler = funcModule.handler; + + // Execute handler - it will return { result: [Promise, Promise, ...] } + ret = await Promise.resolve(handler(event)); + + // Deeply resolve all promises in the result + if (ret && ret.result && Array.isArray(ret.result)) { + ret.result = await Promise.all(ret.result.map(async item => await Promise.resolve(item))); + } + } else if (funcModule && funcModule.default && typeof funcModule.default.handler === 'function') { + const handler = funcModule.default.handler; + ret = await Promise.resolve(handler(event)); + + if (ret && ret.result && Array.isArray(ret.result)) { + ret.result = await Promise.all(ret.result.map(async item => await Promise.resolve(item))); + } + } else { + throw new Error('benchmark handler function not found'); + } + } catch (err) { + // Trigger a fetch request to update the timer before measuring + // Time measurements only update after a fetch request or R2 operation + try { + // Fetch the worker's own URL with favicon to minimize overhead + const finalUrl = new URL(request.url); + finalUrl.pathname = '/favicon'; + await fetch(finalUrl.toString(), { method: 'HEAD' }); + } catch (e) { + // Ignore fetch errors + } + // Calculate timing even for errors + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Mirror Python behavior: return structured error payload + const errorPayload = JSON.stringify({ + begin: begin, + end: end, + compute_time: micro, + results_time: 0, + result: { output: null }, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: '0', + error: String(err && err.message ? err.message : err), + stack: err && err.stack ? err.stack : undefined, + event: event, + env: env, + }); + return new Response(errorPayload, { status: 500, headers: { 'Content-Type': 'application/json' } }); + } + + // Trigger a fetch request to update the timer before measuring + // Time measurements only update after a fetch request or R2 operation + try { + // Fetch the worker's own URL with favicon to minimize overhead + const finalUrl = new URL(request.url); + finalUrl.pathname = '/favicon'; + await fetch(finalUrl.toString(), { method: 'HEAD' }); + } catch (e) { + // Ignore fetch errors + } + + // Now read the updated timer + const end = Date.now() / 1000; + const elapsed = performance.now() - start; + const micro = elapsed * 1000; // Convert milliseconds to microseconds + + // Build log_data similar to Python handler + const log_data = { output: ret && ret.result !== undefined ? ret.result : ret }; + if (ret && ret.measurement !== undefined) { + log_data.measurement = ret.measurement; + } else { + log_data.measurement = {}; + } + + // Add memory usage to measurement + const memUsage = process.memoryUsage(); + const memory_mb = memUsage.heapUsed / 1024 / 1024; + log_data.measurement.memory_used_mb = memory_mb; + + if (event.logs !== undefined) { + log_data.time = 0; + } + + if (event.html) { + return new Response(String(ret && ret.result !== undefined ? ret.result : ''), { + headers: { 'Content-Type': 'text/html; charset=utf-8' }, + }); + } + + const responseBody = JSON.stringify({ + begin: begin, + end: end, + compute_time: micro, + results_time: 0, + result: log_data, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: req_id, + }); + + return new Response(responseBody, { headers: { 'Content-Type': 'application/json' } }); + } catch (topLevelError) { + // Catch any uncaught errors (module loading, syntax errors, etc.) + // Try to include timing if available + let errorBegin = 0; + let errorEnd = 0; + let errorMicro = 0; + try { + errorEnd = Date.now() / 1000; + if (typeof begin !== 'undefined' && typeof start !== 'undefined') { + errorBegin = begin; + const elapsed = performance.now() - start; + errorMicro = elapsed * 1000; + } + } catch (e) { + // Ignore timing errors in error handler + } + + const errorPayload = JSON.stringify({ + begin: errorBegin, + end: errorEnd, + compute_time: errorMicro, + results_time: 0, + result: { output: null }, + is_cold: false, + is_cold_worker: false, + container_id: '0', + environ_container_id: 'no_id', + request_id: '0', + error: `Top-level error: ${topLevelError && topLevelError.message ? topLevelError.message : String(topLevelError)}`, + stack: topLevelError && topLevelError.stack ? topLevelError.stack : undefined, + }); + return new Response(errorPayload, { status: 500, headers: { 'Content-Type': 'application/json' } }); + } + }, +}; diff --git a/benchmarks/wrappers/cloudflare/nodejs/nosql.js b/benchmarks/wrappers/cloudflare/nodejs/nosql.js new file mode 100644 index 000000000..67b73a1fd --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/nosql.js @@ -0,0 +1,98 @@ +// NoSQL wrapper for Cloudflare Workers +// Uses Durable Objects for storage +// Returns Promises that the handler will resolve + +class nosql { + constructor() { + this.env = null; + } + + static init_instance(entry) { + // Reuse existing instance if it exists, otherwise create new one + if (!nosql.instance) { + nosql.instance = new nosql(); + } + + if (entry && entry.env) { + nosql.instance.env = entry.env; + } + } + + _get_table(tableName) { + // Don't cache stubs - they are request-scoped and cannot be reused + // Always create a fresh stub for each request + if (!this.env) { + throw new Error(`nosql env not initialized for table ${tableName}`); + } + + if (!this.env.DURABLE_STORE) { + // Debug: log what we have + const envKeys = Object.keys(this.env || {}); + const durableStoreType = typeof this.env.DURABLE_STORE; + throw new Error( + `DURABLE_STORE binding not found. env keys: [${envKeys.join(', ')}], DURABLE_STORE type: ${durableStoreType}` + ); + } + + // Get a Durable Object ID based on the table name and create a fresh stub + const id = this.env.DURABLE_STORE.idFromName(tableName); + return this.env.DURABLE_STORE.get(id); + } + + // Async methods - build.js will patch function.js to await these + async insert(tableName, primaryKey, secondaryKey, data) { + const keyData = { ...data }; + keyData[primaryKey[0]] = primaryKey[1]; + keyData[secondaryKey[0]] = secondaryKey[1]; + + const durableObjStub = this._get_table(tableName); + const compositeKey = `${primaryKey[1]}#${secondaryKey[1]}`; + + await durableObjStub.put(compositeKey, keyData); + } + + async get(tableName, primaryKey, secondaryKey) { + const durableObjStub = this._get_table(tableName); + const compositeKey = `${primaryKey[1]}#${secondaryKey[1]}`; + + const result = await durableObjStub.get(compositeKey); + return result || null; + } + + async update(tableName, primaryKey, secondaryKey, updates) { + const existing = await this.get(tableName, primaryKey, secondaryKey) || {}; + const merged = { ...existing, ...updates }; + await this.insert(tableName, primaryKey, secondaryKey, merged); + } + + async query(tableName, primaryKey, secondaryKeyName) { + const durableObjStub = this._get_table(tableName); + const prefix = `${primaryKey[1]}#`; + + // List all keys with the prefix + const allEntries = await durableObjStub.list({ prefix }); + const results = []; + + for (const [key, value] of allEntries) { + results.push(value); + } + + return results; + } + + async delete(tableName, primaryKey, secondaryKey) { + const durableObjStub = this._get_table(tableName); + const compositeKey = `${primaryKey[1]}#${secondaryKey[1]}`; + + await durableObjStub.delete(compositeKey); + } + + static get_instance() { + if (!nosql.instance) { + nosql.instance = new nosql(); + } + return nosql.instance; + } +} + +export { nosql }; diff --git a/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js b/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js new file mode 100644 index 000000000..f44bfa232 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/request-polyfill.js @@ -0,0 +1,100 @@ +/** + * Polyfill for the 'request' module using Cloudflare Workers fetch API + * Implements the minimal interface needed for benchmark compatibility + */ + +const { Writable } = require('node:stream'); +const fs = require('node:fs'); + +function request(url, options, callback) { + // Handle different call signatures + if (typeof options === 'function') { + callback = options; + options = {}; + } + + // Add default headers to mimic a browser request + const fetchOptions = { + ...options, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': '*/*', + ...((options && options.headers) || {}) + } + }; + + // Create a simple object that has a pipe method + const requestObj = { + pipe(destination) { + // Perform the fetch and write to destination + fetch(url, fetchOptions) + .then(async (response) => { + if (!response.ok) { + const error = new Error(`HTTP ${response.status}: ${response.statusText}`); + error.statusCode = response.status; + destination.emit('error', error); + if (callback) callback(error, response, null); + return destination; + } + + // Get the response as arrayBuffer and write it all at once + const buffer = await response.arrayBuffer(); + + // Write the buffer to the destination + if (destination.write) { + destination.write(Buffer.from(buffer)); + destination.end(); + } + + if (callback) callback(null, response, Buffer.from(buffer)); + }) + .catch((error) => { + destination.emit('error', error); + if (callback) callback(error, null, null); + }); + + return destination; + }, + + abort() { + // No-op for compatibility + } + }; + + return requestObj; +} + +// Add common request methods +request.get = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'GET' }, callback); +}; + +request.post = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'POST' }, callback); +}; + +request.put = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'PUT' }, callback); +}; + +request.delete = (url, options, callback) => { + if (typeof options === 'function') { + callback = options; + options = {}; + } + return request(url, { ...options, method: 'DELETE' }, callback); +}; + +module.exports = request; diff --git a/benchmarks/wrappers/cloudflare/nodejs/storage.js b/benchmarks/wrappers/cloudflare/nodejs/storage.js new file mode 100644 index 000000000..a49cc3347 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/nodejs/storage.js @@ -0,0 +1,264 @@ +const fs = require('fs'); +const path = require('path'); +const uuid = require('uuid'); + +// Storage wrapper compatible with the Python storage implementation. +// Supports Cloudflare R2 (via env.R2) when available; falls back to +// filesystem-based operations when running in Node.js (for local tests). + +class storage { + constructor() { + this.handle = null; // R2 binding + this.written_files = new Set(); + } + + static unique_name(name) { + const parsed = path.parse(name); + const uuid_name = uuid.v4().split('-')[0]; + return path.join(parsed.dir, `${parsed.name}.${uuid_name}${parsed.ext}`); + } + + // entry is expected to be an object with `env` (Workers) or nothing for Node + static init_instance(entry) { + storage.instance = new storage(); + if (entry && entry.env && entry.env.R2) { + storage.instance.handle = entry.env.R2; + } + storage.instance.written_files = new Set(); + } + + // Upload a file given a local filepath. In Workers env this is not available + // so callers should use upload_stream or pass raw data. For Node.js we read + // the file from disk and put it into R2 if available, otherwise throw. + upload(__bucket, key, filepath) { + // Use singleton instance if available, otherwise use this instance + const instance = storage.instance || this; + + // If file was previously written during this invocation, use /tmp absolute + let realPath = filepath; + if (instance.written_files.has(filepath)) { + realPath = path.join('/tmp', path.resolve(filepath)); + } + + const unique_key = storage.unique_name(key); + + // Try filesystem first (for Workers with nodejs_compat that have /tmp) + if (fs && fs.existsSync(realPath)) { + const data = fs.readFileSync(realPath); + + if (instance.handle) { + const uploadPromise = instance.handle.put(unique_key, data); + return [unique_key, uploadPromise]; + } else { + return [unique_key, Promise.resolve()]; + } + } + + // Fallback: In Workers environment with R2, check if file exists in R2 + // (it may have been written by fs-polyfill's createWriteStream) + if (instance.handle) { + // Normalize the path to match what fs-polyfill would use + let normalizedPath = realPath.replace(/^\.?\//, '').replace(/^tmp\//, ''); + + // Add benchmark name prefix if available (matching fs-polyfill behavior) + if (typeof globalThis !== 'undefined' && globalThis.BENCHMARK_NAME && + !normalizedPath.startsWith(globalThis.BENCHMARK_NAME + '/')) { + normalizedPath = globalThis.BENCHMARK_NAME + '/' + normalizedPath; + } + + // Read from R2 and re-upload with unique key + const uploadPromise = instance.handle.get(normalizedPath).then(async (obj) => { + if (obj) { + const data = await obj.arrayBuffer(); + return instance.handle.put(unique_key, data); + } else { + throw new Error(`File not found in R2: ${normalizedPath} (original path: ${filepath})`); + } + }); + + return [unique_key, uploadPromise]; + } + + // If running in Workers (no fs) and caller provided Buffer/Stream, they + // should call upload_stream directly. Otherwise, throw. + throw new Error('upload(): file not found on disk and no R2 handle provided'); + } + + async download(__bucket, key, filepath) { + const instance = storage.instance || this; + const data = await this.download_stream(__bucket, key); + + let real_fp = filepath; + if (!filepath.startsWith('/tmp')) { + real_fp = path.join('/tmp', path.resolve(filepath)); + } + + instance.written_files.add(filepath); + + // Write data to file if we have fs + if (fs) { + fs.mkdirSync(path.dirname(real_fp), { recursive: true }); + if (Buffer.isBuffer(data)) { + fs.writeFileSync(real_fp, data); + } else { + fs.writeFileSync(real_fp, Buffer.from(String(data))); + } + return; + } + + // In Workers environment, callers should use stream APIs directly. + return; + } + + async download_directory(__bucket, prefix, out_path) { + const instance = storage.instance || this; + + if (!instance.handle) { + throw new Error('download_directory requires R2 binding (env.R2)'); + } + + const list_res = await instance.handle.list({ prefix }); + const objects = list_res.objects || []; + for (const obj of objects) { + const file_name = obj.key; + const path_to_file = path.dirname(file_name); + fs.mkdirSync(path.join(out_path, path_to_file), { recursive: true }); + await this.download(__bucket, file_name, path.join(out_path, file_name)); + } + } + + async upload_stream(__bucket, key, data) { + const instance = storage.instance || this; + const unique_key = storage.unique_name(key); + if (instance.handle) { + // R2 put accepts ArrayBuffer, ReadableStream, or string + await instance.handle.put(unique_key, data); + return unique_key; + } + + // If no R2, write to local fs as fallback + if (fs) { + const outPath = path.join('/tmp', unique_key); + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + if (Buffer.isBuffer(data)) fs.writeFileSync(outPath, data); + else fs.writeFileSync(outPath, Buffer.from(String(data))); + return unique_key; + } + + throw new Error('upload_stream(): no storage backend available'); + } + + async download_stream(__bucket, key) { + const instance = storage.instance || this; + + if (instance.handle) { + const obj = await instance.handle.get(key); + if (!obj) return null; + // R2 object provides arrayBuffer()/text() helpers in Workers + if (typeof obj.arrayBuffer === 'function') { + const ab = await obj.arrayBuffer(); + return Buffer.from(ab); + } + if (typeof obj.text === 'function') { + return await obj.text(); + } + // Fallback: return null + return null; + } + + // Fallback to local filesystem + const localPath = path.join('/tmp', key); + if (fs && fs.existsSync(localPath)) { + return fs.readFileSync(localPath); + } + + throw new Error('download_stream(): object not found'); + } + + // Additional stream methods for compatibility with Azure storage API + // These provide a stream-based interface similar to Azure's uploadStream/downloadStream + uploadStream(__bucket, key) { + const unique_key = storage.unique_name(key); + + if (this.handle) { + // For R2, we create a PassThrough stream that collects data + // then uploads when ended + const stream = require('stream'); + const passThrough = new stream.PassThrough(); + const chunks = []; + + passThrough.on('data', (chunk) => chunks.push(chunk)); + + const upload = new Promise((resolve, reject) => { + passThrough.on('end', async () => { + try { + const buffer = Buffer.concat(chunks); + await this.handle.put(unique_key, buffer); + resolve(); + } catch (err) { + reject(err); + } + }); + passThrough.on('error', reject); + }); + + return [passThrough, upload, unique_key]; + } + + // Fallback to filesystem + if (fs) { + const stream = require('stream'); + const outPath = path.join('/tmp', unique_key); + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + const writeStream = fs.createWriteStream(outPath); + const upload = new Promise((resolve, reject) => { + writeStream.on('finish', resolve); + writeStream.on('error', reject); + }); + return [writeStream, upload, unique_key]; + } + + throw new Error('uploadStream(): no storage backend available'); + } + + async downloadStream(__bucket, key) { + if (this.handle) { + const obj = await this.handle.get(key); + if (!obj) return null; + + // R2 object has a body ReadableStream + if (obj.body) { + return obj.body; + } + + // Fallback: convert to buffer then to stream + if (typeof obj.arrayBuffer === 'function') { + const stream = require('stream'); + const ab = await obj.arrayBuffer(); + const buffer = Buffer.from(ab); + const readable = new stream.PassThrough(); + readable.end(buffer); + return readable; + } + + return null; + } + + // Fallback to local filesystem + const localPath = path.join('/tmp', key); + if (fs && fs.existsSync(localPath)) { + return fs.createReadStream(localPath); + } + + throw new Error('downloadStream(): object not found'); + } + + static get_instance() { + if (!storage.instance) { + throw new Error('must init storage singleton first'); + } + return storage.instance; + } +} + +module.exports.storage = storage; diff --git a/benchmarks/wrappers/cloudflare/python/container/handler.py b/benchmarks/wrappers/cloudflare/python/container/handler.py new file mode 100644 index 000000000..810c26ee3 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/handler.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Container handler for Cloudflare Workers - Python +This handler is used when deploying as a container worker +""" + +import json +import sys +import os +import traceback +import resource +from http.server import HTTPServer, BaseHTTPRequestHandler +from urllib.parse import urlparse, parse_qs +import datetime + +# Monkey-patch requests library to add User-Agent header +# This is needed because many HTTP servers (like Wikimedia) reject requests without User-Agent +try: + import requests + original_request = requests.request + + def patched_request(method, url, **kwargs): + if 'headers' not in kwargs: + kwargs['headers'] = {} + if 'User-Agent' not in kwargs['headers']: + kwargs['headers']['User-Agent'] = 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2' + return original_request(method, url, **kwargs) + + requests.request = patched_request + print("Monkey-patched requests library to add User-Agent header") +except ImportError: + print("requests library not available, skipping User-Agent monkey-patch") + +# Also patch urllib for libraries that use it directly +import urllib.request +original_urlopen = urllib.request.urlopen + +def patched_urlopen(url, data=None, timeout=None, **kwargs): + if isinstance(url, str): + req = urllib.request.Request(url, data=data) + req.add_header('User-Agent', 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2') + return original_urlopen(req, timeout=timeout, **kwargs) + elif isinstance(url, urllib.request.Request): + if not url.has_header('User-Agent'): + url.add_header('User-Agent', 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2') + return original_urlopen(url, data=data, timeout=timeout, **kwargs) + else: + return original_urlopen(url, data=data, timeout=timeout, **kwargs) + +urllib.request.urlopen = patched_urlopen +print("Monkey-patched urllib.request.urlopen to add User-Agent header") + +# Import the benchmark handler function +from function import handler as benchmark_handler + +# Import storage and nosql if available +try: + import storage +except ImportError: + storage = None + print("Storage module not available") + +try: + import nosql +except ImportError: + nosql = None + print("NoSQL module not available") + +PORT = int(os.environ.get('PORT', 8080)) + + +class ContainerHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.handle_request() + + def do_POST(self): + self.handle_request() + + def handle_request(self): + # Handle favicon requests + if 'favicon' in self.path: + self.send_response(200) + self.end_headers() + self.wfile.write(b'None') + return + + try: + # Get unique request ID from Cloudflare (CF-Ray header) + import uuid + req_id = self.headers.get('CF-Ray', str(uuid.uuid4())) + + # Extract Worker URL from header for R2 and NoSQL proxy + worker_url = self.headers.get('X-Worker-URL') + if worker_url: + if storage: + storage.storage.set_worker_url(worker_url) + if nosql: + nosql.nosql.set_worker_url(worker_url) + print(f"Set worker URL for R2/NoSQL proxy: {worker_url}") + + # Read request body + content_length = int(self.headers.get('Content-Length', 0)) + body = self.rfile.read(content_length).decode('utf-8') if content_length > 0 else '' + + # Parse event from JSON body or URL params + event = {} + if body: + try: + event = json.loads(body) + except json.JSONDecodeError as e: + print(f'Failed to parse JSON body: {e}') + + # Parse URL parameters + parsed_url = urlparse(self.path) + params = parse_qs(parsed_url.query) + for key, values in params.items(): + if key not in event and values: + value = values[0] + try: + event[key] = int(value) + except ValueError: + event[key] = value + + # Add request metadata + income_timestamp = datetime.datetime.now().timestamp() + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + + # Measure execution time + begin = datetime.datetime.now().timestamp() + + # Call the benchmark function + result = benchmark_handler(event) + + # Calculate timing + end = datetime.datetime.now().timestamp() + compute_time = end - begin + + # Prepare response matching native handler format exactly + log_data = { + 'output': result['result'] + } + if 'measurement' in result: + log_data['measurement'] = result['measurement'] + else: + log_data['measurement'] = {} + + # Add memory usage to measurement + memory_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 + log_data['measurement']['memory_used_mb'] = memory_mb + + response_data = { + 'begin': begin, + 'end': end, + 'results_time': 0, + 'result': log_data, + 'is_cold': False, + 'is_cold_worker': False, + 'container_id': "0", + 'environ_container_id': "no_id", + 'request_id': req_id + } + + # Send response + if event.get('html'): + # For HTML requests, return just the result + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + html_result = result.get('result', result) + self.wfile.write(str(html_result).encode('utf-8')) + else: + # For API requests, return structured response + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response_data).encode('utf-8')) + + except Exception as error: + print(f'Error processing request: {error}') + traceback.print_exc() + self.send_response(500) + self.send_header('Content-Type', 'application/json') + self.end_headers() + error_response = { + 'error': str(error), + 'traceback': traceback.format_exc() + } + self.wfile.write(json.dumps(error_response).encode('utf-8')) + + def log_message(self, format, *args): + # Override to use print instead of stderr + print(f"{self.address_string()} - {format % args}") + + +if __name__ == '__main__': + server = HTTPServer(('0.0.0.0', PORT), ContainerHandler) + print(f'Container server listening on port {PORT}') + server.serve_forever() diff --git a/benchmarks/wrappers/cloudflare/python/container/nosql.py b/benchmarks/wrappers/cloudflare/python/container/nosql.py new file mode 100644 index 000000000..936a49901 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/nosql.py @@ -0,0 +1,117 @@ +""" +NoSQL module for Cloudflare Python Containers +Uses HTTP proxy to access Durable Objects through the Worker's binding +""" +import json +import urllib.request +import urllib.parse +from typing import List, Optional, Tuple + + +class nosql: + """NoSQL client for containers using HTTP proxy to Worker's Durable Object""" + + instance: Optional["nosql"] = None + worker_url = None # Set by handler from X-Worker-URL header + + @staticmethod + def init_instance(*args, **kwargs): + """Initialize singleton instance""" + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance + + @staticmethod + def set_worker_url(url): + """Set worker URL for NoSQL proxy (called by handler)""" + nosql.worker_url = url + + def _make_request(self, operation: str, params: dict) -> dict: + """Make HTTP request to worker nosql proxy""" + if not nosql.worker_url: + raise RuntimeError("Worker URL not set - cannot access NoSQL") + + url = f"{nosql.worker_url}/nosql/{operation}" + data = json.dumps(params).encode('utf-8') + + req = urllib.request.Request(url, data=data, method='POST') + req.add_header('Content-Type', 'application/json') + + try: + with urllib.request.urlopen(req) as response: + return json.loads(response.read().decode('utf-8')) + except urllib.error.HTTPError as e: + error_body = e.read().decode('utf-8') + try: + error_data = json.loads(error_body) + raise RuntimeError(f"NoSQL operation failed: {error_data.get('error', error_body)}") + except json.JSONDecodeError: + raise RuntimeError(f"NoSQL operation failed: {error_body}") + except Exception as e: + raise RuntimeError(f"NoSQL operation failed: {e}") + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key), + 'data': data + } + return self._make_request('insert', params) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key), + 'data': data + } + return self._make_request('update', params) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key) + } + result = self._make_request('get', params) + return result.get('data') + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key_name': secondary_key_name + } + result = self._make_request('query', params) + return result.get('items', []) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + params = { + 'table_name': table_name, + 'primary_key': list(primary_key), + 'secondary_key': list(secondary_key) + } + return self._make_request('delete', params) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/cloudflare/python/container/storage.py b/benchmarks/wrappers/cloudflare/python/container/storage.py new file mode 100644 index 000000000..53ab90d54 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/container/storage.py @@ -0,0 +1,201 @@ +""" +Storage module for Cloudflare Python Containers +Uses HTTP proxy to access R2 storage through the Worker's R2 binding +""" +import io +import os +import json +import urllib.request +import urllib.parse + +class storage: + """R2 storage client for containers using HTTP proxy to Worker""" + instance = None + worker_url = None # Set by handler from X-Worker-URL header + + def __init__(self): + # Container accesses R2 through worker.js proxy + # Worker URL is injected via X-Worker-URL header in each request + self.r2_enabled = True + + @staticmethod + def init_instance(entry=None): + """Initialize singleton instance""" + if storage.instance is None: + storage.instance = storage() + return storage.instance + + @staticmethod + def get_instance(): + """Get singleton instance""" + if storage.instance is None: + storage.init_instance() + return storage.instance + + @staticmethod + def set_worker_url(url): + """Set worker URL for R2 proxy (called by handler)""" + storage.worker_url = url + + @staticmethod + def unique_name(name): + """Generate unique name for file""" + import uuid + name_part, extension = os.path.splitext(name) + return f'{name_part}.{str(uuid.uuid4()).split("-")[0]}{extension}' + + def upload_stream(self, bucket: str, key: str, data): + """Upload data to R2 via worker proxy""" + if not self.r2_enabled: + print("Warning: R2 not configured, skipping upload") + return key + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Handle BytesIO objects + if isinstance(data, io.BytesIO): + data = data.getvalue() + + # Convert to bytes if needed + if isinstance(data, str): + data = data.encode('utf-8') + + # Upload via worker proxy + params = urllib.parse.urlencode({'bucket': bucket, 'key': key}) + url = f"{storage.worker_url}/r2/upload?{params}" + + req = urllib.request.Request(url, data=data, method='POST') + req.add_header('Content-Type', 'application/octet-stream') + + try: + with urllib.request.urlopen(req) as response: + result = json.loads(response.read().decode('utf-8')) + return result['key'] + except Exception as e: + print(f"R2 upload error: {e}") + raise RuntimeError(f"Failed to upload to R2: {e}") + + def download_stream(self, bucket: str, key: str) -> bytes: + """Download data from R2 via worker proxy""" + if not self.r2_enabled: + raise RuntimeError("R2 not configured") + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Download via worker proxy + params = urllib.parse.urlencode({'bucket': bucket, 'key': key}) + url = f"{storage.worker_url}/r2/download?{params}" + + try: + with urllib.request.urlopen(url) as response: + return response.read() + except urllib.error.HTTPError as e: + if e.code == 404: + raise RuntimeError(f"Object not found: {key}") + else: + raise RuntimeError(f"Failed to download from R2: {e}") + except Exception as e: + print(f"R2 download error: {e}") + raise RuntimeError(f"Failed to download from R2: {e}") + + def upload(self, bucket, key, filepath): + """Upload file from disk with unique key generation""" + # Generate unique key to avoid conflicts + unique_key = self.unique_name(key) + + with open(filepath, 'rb') as f: + data = f.read() + # Upload with the unique key + self._upload_with_key(bucket, unique_key, data) + return unique_key + + def _upload_with_key(self, bucket: str, key: str, data): + """Upload data to R2 via worker proxy with exact key (internal method)""" + if not self.r2_enabled: + print("Warning: R2 not configured, skipping upload") + return + + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Handle BytesIO objects + if isinstance(data, io.BytesIO): + data = data.getvalue() + + # Convert to bytes if needed + if isinstance(data, str): + data = data.encode('utf-8') + + # Upload via worker proxy with exact key + params = urllib.parse.urlencode({'bucket': bucket, 'key': key}) + url = f"{storage.worker_url}/r2/upload?{params}" + + req = urllib.request.Request(url, data=data, method='POST') + req.add_header('Content-Type', 'application/octet-stream') + + try: + with urllib.request.urlopen(req) as response: + result = json.loads(response.read().decode('utf-8')) + print(f"[storage._upload_with_key] Upload successful, key={result['key']}") + except Exception as e: + print(f"R2 upload error: {e}") + raise RuntimeError(f"Failed to upload to R2: {e}") + + def download(self, bucket, key, filepath): + """Download file to disk""" + data = self.download_stream(bucket, key) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + with open(filepath, 'wb') as f: + f.write(data) + + def download_directory(self, bucket, prefix, local_path): + """ + Download all files with a given prefix to a local directory. + Lists objects via /r2/list endpoint and downloads each one. + """ + if not storage.worker_url: + raise RuntimeError("Worker URL not set - cannot access R2") + + # Create local directory + os.makedirs(local_path, exist_ok=True) + + # List objects with prefix via worker proxy + params = urllib.parse.urlencode({'bucket': bucket, 'prefix': prefix}) + list_url = f"{storage.worker_url}/r2/list?{params}" + + try: + req = urllib.request.Request(list_url) + req.add_header('User-Agent', 'SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2') + + with urllib.request.urlopen(req) as response: + result = json.loads(response.read().decode('utf-8')) + objects = result.get('objects', []) + + print(f"Found {len(objects)} objects with prefix '{prefix}'") + + # Download each object + for obj in objects: + obj_key = obj['key'] + # Create local file path by removing the prefix + relative_path = obj_key + if prefix and obj_key.startswith(prefix): + relative_path = obj_key[len(prefix):].lstrip('/') + + local_file_path = os.path.join(local_path, relative_path) + + # Create directory structure if needed + local_dir = os.path.dirname(local_file_path) + if local_dir: + os.makedirs(local_dir, exist_ok=True) + + # Download the file + print(f"Downloading {obj_key} to {local_file_path}") + self.download(bucket, obj_key, local_file_path) + + return local_path + + except Exception as e: + print(f"Error listing/downloading directory: {e}") + raise RuntimeError(f"Failed to download directory: {e}") diff --git a/benchmarks/wrappers/cloudflare/python/handler.py b/benchmarks/wrappers/cloudflare/python/handler.py new file mode 100644 index 000000000..19eff8baf --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/handler.py @@ -0,0 +1,200 @@ +import datetime, io, json, os, uuid, sys, ast +import asyncio +import importlib.util +import traceback +import time +try: + import resource + HAS_RESOURCE = True +except ImportError: + # Pyodide (Python native workers) doesn't support resource module + HAS_RESOURCE = False +from workers import WorkerEntrypoint, Response, DurableObject +from js import fetch as js_fetch, URL + +## sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +""" +currently assumed file structure: + +handler.py +function/ + function.py + .py + storage.py + nosql.py + +""" + +class KVApiObject(DurableObject): + def __getattr__(self, name): + return getattr(self.ctx.storage, name) + +class Default(WorkerEntrypoint): + async def fetch(self, request, env): + try: + return await self.fetch2(request, env) + except Exception as e: + t = traceback.format_exc() + print(t) + return Response(t) + + async def fetch2(self, request, env): + if "favicon" in request.url: return Response("None") + + # Get unique request ID from Cloudflare (CF-Ray header) + req_id = request.headers.get('CF-Ray', str(uuid.uuid4())) + + # Start timing measurements + start = time.perf_counter() + begin = datetime.datetime.now().timestamp() + + req_text = await request.text() + + event = json.loads(req_text) if len(req_text) > 0 else {} + ## print(event) + + # dirty url parameters parsing, for testing + tmp = request.url.split("?") + if len(tmp) > 1: + urlparams = tmp[1] + urlparams = [chunk.split("=") for chunk in urlparams.split("&")] + for param in urlparams: + try: + event[param[0]] = int(param[1]) + except ValueError: + event[param[0]] = param[1] + except IndexError: + event[param[0]] = None + + + + + ## note: time fixed in worker + income_timestamp = datetime.datetime.now().timestamp() + + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + + + + from function import storage + + storage.storage.init_instance(self) + + + if hasattr(self.env, 'NOSQL_STORAGE_DATABASE'): + from function import nosql + + nosql.nosql.init_instance(self) + + print("event:", event) + + +## make_benchmark_func() +## function = import_from_path("function.function", "/tmp/function.py") + + from function import function + + ret = function.handler(event) + + log_data = { + 'output': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + else: + log_data['measurement'] = {} + + # Add memory usage to measurement (if resource module is available) + if HAS_RESOURCE: + memory_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 + log_data['measurement']['memory_used_mb'] = memory_mb + else: + # Pyodide doesn't support resource module + log_data['measurement']['memory_used_mb'] = 0.0 + + if 'logs' in event: + log_data['time'] = 0 + + if "html" in event: + headers = {"Content-Type" : "text/html; charset=utf-8"} + return Response(str(ret["result"]), headers = headers) + else: + # Trigger a fetch request to update the timer before measuring + # Time measurements only update after a fetch request or R2 operation + try: + # Fetch the worker's own URL with favicon to minimize overhead + final_url = URL.new(request.url) + final_url.pathname = '/favicon' + await js_fetch(str(final_url), method='HEAD') + except: + # Ignore fetch errors + pass + + # Calculate timestamps + end = datetime.datetime.now().timestamp() + elapsed = time.perf_counter() - start + micro = elapsed * 1_000_000 # Convert seconds to microseconds + + return Response(json.dumps({ + 'begin': begin, + 'end': end, + 'compute_time': micro, + 'results_time': 0, + 'result': log_data, + 'is_cold': False, + 'is_cold_worker': False, + 'container_id': "0", + 'environ_container_id': "no_id", + 'request_id': req_id + })) + + +### ---------- old ------- + +def import_from_path(module_name, file_path): + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +working_dir = os.path.dirname(__file__) + +class MakeAsync(ast.NodeTransformer): + def visit_FunctionDef(self, node): + if node.name != "handler": + return node + return ast.AsyncFunctionDef( + name=node.name, + args=node.args, + body=node.body, + decorator_list=node.decorator_list, + returns=node.returns, + type_params=node.type_params) + +class AddAwait(ast.NodeTransformer): + to_find = ["upload_stream", "download_stream", "upload", "download", "download_directory"] + + def visit_Call(self, node): + if isinstance(node.func, ast.Attribute) and node.func.attr in self.to_find: + #print(ast.dump(node.func, indent=2)) + return ast.Await(value=node) + + return node + +def make_benchmark_func(): + with open(working_dir +"/function/function.py") as f: + module = ast.parse(f.read()) + module = ast.fix_missing_locations(MakeAsync().visit(module)) + module = ast.fix_missing_locations(AddAwait().visit(module)) + new_source = ast.unparse(module) + ##print("new_source:") + ##print(new_source) + ##print() + with open("/tmp/function.py", "w") as wf: + wf.write(new_source) + + diff --git a/benchmarks/wrappers/cloudflare/python/nosql.py b/benchmarks/wrappers/cloudflare/python/nosql.py new file mode 100644 index 000000000..105590ad5 --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/nosql.py @@ -0,0 +1,212 @@ +from typing import List, Optional, Tuple +import json +import pickle +from pyodide.ffi import to_js, run_sync +from workers import WorkerEntrypoint, DurableObject + + +class nosql_do: + instance: Optional["nosql_do"] = None + DO_BINDING_NAME = "DURABLE_STORE" + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + nosql_do.instance = nosql_do() + nosql_do.instance.binding = getattr(entry.env, nosql_do.DO_BINDING_NAME) + + + def get_table(self, table_name): + kvapiobj = self.binding.getByName(table_name) + return kvapiobj + + def key_maker(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]},{key2[1]})" + + def key_maker_partial(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]}" + +## these data conversion funcs should not be necessary. i couldn't get pyodide to clone the data otherwise + def data_pre(self, data): + return pickle.dumps(data, 0).decode("ascii") + + def data_post(self, data): + # Handle None (key not found in storage) + if data is None: + return None + # Handle both string and bytes data from Durable Object storage + if isinstance(data, str): + return pickle.loads(bytes(data, "ascii")) + else: + return pickle.loads(data) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + send_data = self.data_pre(data) + k=self.key_maker(primary_key, secondary_key) + put_res = run_sync(self.get_table(table_name).put(k, send_data)) + return + + ## does this really need different behaviour from insert? + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + self.insert(table_name, primary_key, secondary_key, data) + return + + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + k=self.key_maker(primary_key, secondary_key) + get_res = run_sync(self.get_table(table_name).get(k)) + ## print(get_res) + return self.data_post(get_res) + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + + prefix_key = self.key_maker_partial(primary_key, (secondary_key_name,)) + list_res = run_sync(self.get_table(table_name).list()) + + keys = [] + for key in list_res: + if key.startswith(prefix_key): + print(key) + keys.append(key) + ##print("keys", keys) + assert len(keys) <= 100 + + + # todo: please use bulk sometime (it didn't work when i tried it) + res = [] + for key in keys: + + get_res = run_sync(self.get_table(table_name).get(key)) + ## print(get_res) + res.append(self.data_post(get_res)) + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + run_sync(self.get_table(table_name).delete(self.key_maker(primary_key, secondary_key))) + return + + @staticmethod + def get_instance(): + if nosql_do.instance is None: + nosql_do.instance = nosql_do() + return nosql_do.instance + +### ------------------------------ + +class nosql_kv: + + instance: Optional["nosql_kv"] = None + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + nosql_kv.instance = nosql_kv() + nosql_kv.instance.env = entry.env + + def key_maker(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]},{key2[1]})" + + def key_maker_partial(self, key1, key2): + return f"({key1[0]},{str(key1[1])})+({key2[0]}" + + def get_table(self, table_name): + return getattr(self.env, (table_name)) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + put_res = ( + run_sync(self.get_table(table_name).put( + self.key_maker(primary_key, secondary_key), + json.dumps(data)) + )) + return + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + put_res = run_sync( + self.get_table(table_name).put( + self.key_maker(primary_key, secondary_key), + json.dumps(data) + )) + return + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + get_res = run_sync( + self.get_table(table_name).get( + self.key_maker(primary_key, secondary_key) + )) + return get_res + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + _options = {"prefix" : self.key_maker_partial(primary_key, (secondary_key_name,) )} + list_res = run_sync(self.get_table(table_name).list(options=_options)) + + keys = [] + for key in list_res.keys: + keys.append(key.name) + ##print("keys", keys) + assert len(keys) <= 100 + + + # todo: please use bulk sometime (it didn't work when i tried it) + res = [] + for key in keys: + + get_res = run_sync(self.get_table(table_name).get(key)) + get_res = get_res.replace("\'", "\"") + ##print("gr", get_res) + + res.append(json.loads(get_res)) + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + run_sync(self.get_table(table_name).delete(self.key_maker(primary_key, secondary_key))) + + return + + @staticmethod + def get_instance(): + if nosql_kv.instance is None: + nosql_kv.instance = nosql_kv() + return nosql_kv.instance + + + + +nosql = nosql_do diff --git a/benchmarks/wrappers/cloudflare/python/storage.py b/benchmarks/wrappers/cloudflare/python/storage.py new file mode 100644 index 000000000..e7968eb5a --- /dev/null +++ b/benchmarks/wrappers/cloudflare/python/storage.py @@ -0,0 +1,116 @@ +import io +import os +import uuid +import asyncio +import base64 +from pyodide.ffi import to_js, jsnull, run_sync, JsProxy +from pyodide.webloop import WebLoop +import js + +from workers import WorkerEntrypoint + +## all filesystem calls will rely on the node:fs flag +""" layout +/bundle +└── (one file for each module in your Worker bundle) +/tmp +└── (empty, but you can write files, create directories, symlinks, etc) +/dev +├── null +├── random +├── full +└── zero +""" +class storage: + instance = None + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + def get_bucket(self, bucket): + # R2 buckets are always bound as 'R2' in wrangler.toml + # The bucket parameter is the actual bucket name but we access via the binding + return self.entry_env.R2 + + @staticmethod + def init_instance(entry: WorkerEntrypoint): + storage.instance = storage() + storage.instance.entry_env = entry.env + storage.instance.written_files = set() + + def upload(self, bucket, key, filepath): + if filepath in self.written_files: + filepath = "/tmp" + os.path.abspath(filepath) + with open(filepath, "rb") as f: + unique_key = self.upload_stream(bucket, key, f.read()) + return unique_key + + def download(self, bucket, key, filepath): + data = self.download_stream(bucket, key) + # should only allow writes to tmp dir. so do have to edit the filepath here? + real_fp = filepath + if not filepath.startswith("/tmp"): + real_fp = "/tmp" + os.path.abspath(filepath) + + self.written_files.add(filepath) + with open(real_fp, "wb") as f: + f.write(data) + return + + def download_directory(self, bucket, prefix, out_path): + bobj = self.get_bucket(bucket) + list_res = run_sync(bobj.list(to_js({"prefix": prefix}))) + for obj in list_res.objects: + file_name = obj.key + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(out_path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(out_path, file_name)) + return + + def upload_stream(self, bucket, key, data): + return run_sync(self.aupload_stream(bucket, key, data)) + + async def aupload_stream(self, bucket, key, data): + unique_key = storage.unique_name(key) + # Handle BytesIO objects - extract bytes + if hasattr(data, 'getvalue'): + data = data.getvalue() + # Convert bytes to Blob using base64 encoding as intermediate step + if isinstance(data, bytes): + # Encode as base64 + b64_str = base64.b64encode(data).decode('ascii') + # Create a Response from base64, then get the blob + # This creates a proper JavaScript Blob that R2 will accept + response = await js.fetch(f"data:application/octet-stream;base64,{b64_str}") + blob = await response.blob() + data_js = blob + else: + data_js = str(data) + bobj = self.get_bucket(bucket) + put_res = await bobj.put(unique_key, data_js) + return unique_key + + def download_stream(self, bucket, key): + return run_sync(self.adownload_stream(bucket, key)) + + async def adownload_stream(self, bucket, key): + bobj = self.get_bucket(bucket) + get_res = await bobj.get(key) + if get_res == jsnull: + print("key not stored in bucket") + return b'' + # Always read as raw binary data (Blob/ArrayBuffer) + data = await get_res.bytes() + return bytes(data) + + @staticmethod + def get_instance(): + if storage.instance is None: + raise RuntimeError("must init storage singleton first") + return storage.instance + return storage.instance diff --git a/config/cloudflare-test.json b/config/cloudflare-test.json new file mode 100644 index 000000000..275aa021f --- /dev/null +++ b/config/cloudflare-test.json @@ -0,0 +1,26 @@ +{ + "experiments": { + "deployment": "cloudflare", + "update_code": false, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": false, + "runtime": { + "language": "nodejs", + "version": "18" + } + }, + "deployment": { + "name": "cloudflare", + "cloudflare": { + "credentials": { + "api_token": "", + "account_id": "", + "r2_access_key_id": "", + "r2_secret_access_key": "" + } + }, + "container": false + } +} diff --git a/config/systems.json b/config/systems.json index 5a38b4965..9b8015b84 100644 --- a/config/systems.json +++ b/config/systems.json @@ -24,6 +24,13 @@ "3.9": "python:3.9-slim", "3.10": "python:3.10-slim", "3.11": "python:3.11-slim" + }, + "arm64": { + "3.7": "python:3.7-slim", + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim" } }, "images": [ @@ -66,7 +73,7 @@ } } }, - "architecture": ["x64"], + "architecture": ["x64", "arm64"], "deployments": ["package"] }, "aws": { @@ -315,5 +322,87 @@ }, "architecture": ["x64"], "deployments": ["container"] + }, + "cloudflare": { + "languages": { + "python": { + "base_images": { + "x64": { + "3.8": "ubuntu:22.04", + "3.9": "ubuntu:22.04", + "3.10": "ubuntu:22.04", + "3.11": "ubuntu:22.04", + "3.12": "ubuntu:22.04" + } + }, + "container_images": { + "x64": { + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" + } + }, + "images": [], + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": {} + }, + "container_deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": {} + } + }, + "nodejs": { + "base_images": { + "x64": { + "18": "ubuntu:22.04", + "20": "ubuntu:22.04" + } + }, + "container_images": { + "x64": { + "18": "node:18-slim", + "20": "node:20-slim" + } + }, + "images": [], + "deployment": { + "files": [ + "handler.js", + "storage.js", + "nosql.js", + "build.js", + "request-polyfill.js" + ], + "packages": { + "uuid": "3.4.0" + } + }, + "container_deployment": { + "files": [ + "handler.js", + "storage.js", + "nosql.js" + ], + "packages": { + "uuid": "3.4.0" + } + } + } + }, + "architecture": ["x64"], + "deployments": ["package", "container"] } } diff --git a/dockerfiles/cloudflare/Dockerfile.manage b/dockerfiles/cloudflare/Dockerfile.manage new file mode 100644 index 000000000..ac18ac336 --- /dev/null +++ b/dockerfiles/cloudflare/Dockerfile.manage @@ -0,0 +1,35 @@ +FROM node:20-slim + +# Disable telemetry +ENV WRANGLER_SEND_METRICS=false + +# Install system dependencies including Docker CLI +RUN apt-get clean && apt-get update \ + && apt-get install -y ca-certificates curl gnupg gosu python3 python3-pip python3-venv git \ + && install -m 0755 -d /etc/apt/keyrings \ + && curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc \ + && chmod a+r /etc/apt/keyrings/docker.asc \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable" > /etc/apt/sources.list.d/docker.list \ + && apt-get update \ + && apt-get install -y docker-ce-cli \ + && apt-get purge -y --auto-remove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install wrangler globally +RUN npm install -g wrangler + +# Install uv (fast Python package installer) and pywrangler +# uv install script puts the binary in ~/.local/bin by default (not ~/.cargo/bin) +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + /root/.local/bin/uv tool install workers-py + +# Add paths to environment +ENV PATH="/root/.local/bin:/root/.local/share/uv/tools/workers-py/bin:${PATH}" + +# Create working directory +RUN mkdir -p /sebs/ +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/cloudflare/nodejs/Dockerfile b/dockerfiles/cloudflare/nodejs/Dockerfile new file mode 100644 index 000000000..1bf6a89cb --- /dev/null +++ b/dockerfiles/cloudflare/nodejs/Dockerfile @@ -0,0 +1,36 @@ +ARG BASE_IMAGE=node:18-slim +FROM ${BASE_IMAGE} + +# Install system dependencies needed for benchmarks +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy package files first for better caching +COPY package*.json ./ + +# Install dependencies +RUN npm install --production + +# Copy all application files +COPY . . + +# Run benchmark init script if it exists (e.g., for ffmpeg in video-processing) +# This downloads static binaries needed by the benchmark +# Note: ignore errors from init.sh (e.g., when resources already exist) +RUN if [ -f "init.sh" ]; then \ + chmod +x init.sh && \ + ./init.sh /app verbose x64 || true; \ + fi + +# Expose port 8080 for container communication +EXPOSE 8080 + +# Set environment variable for port +ENV PORT=8080 + +# Start the HTTP server +CMD ["node", "handler.js"] diff --git a/dockerfiles/cloudflare/python/Dockerfile b/dockerfiles/cloudflare/python/Dockerfile new file mode 100644 index 000000000..e9ecc0e86 --- /dev/null +++ b/dockerfiles/cloudflare/python/Dockerfile @@ -0,0 +1,38 @@ +ARG BASE_IMAGE=python:3.11-slim +FROM ${BASE_IMAGE} + +# Install system dependencies needed for benchmarks +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy all application files first +COPY . . + +# Run benchmark init script if it exists (e.g., for ffmpeg in video-processing) +# This downloads static binaries needed by the benchmark +# Note: ignore errors from init.sh (e.g., when resources already exist) +RUN if [ -f "init.sh" ]; then \ + chmod +x init.sh && \ + ./init.sh /app verbose x64 || true; \ + fi + +# Install dependencies +# Core dependencies for wrapper modules: +# - storage.py uses urllib (stdlib) to proxy R2 requests through worker.js +# - nosql.py, worker.py, handler.py use stdlib only +# Then install benchmark-specific requirements from requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi + +# Expose port 8080 for container communication +EXPOSE 8080 + +# Set environment variable for port +ENV PORT=8080 + +# Start the HTTP server +CMD ["python", "handler.py"] diff --git a/docs/platforms.md b/docs/platforms.md index 2069c36ae..cf204f592 100644 --- a/docs/platforms.md +++ b/docs/platforms.md @@ -1,5 +1,5 @@ -SeBS supports three commercial serverless platforms: AWS Lambda, Azure Functions, and Google Cloud Functions. +SeBS supports four commercial serverless platforms: AWS Lambda, Azure Functions, Google Cloud Functions, and Cloudflare Workers. Furthermore, we support the open source FaaS system OpenWhisk. The file `config/example.json` contains all parameters that users can change @@ -16,6 +16,7 @@ Supported platforms: * [Amazon Web Services (AWS) Lambda](#aws-lambda) * [Microsoft Azure Functions](#azure-functions) * [Google Cloud (GCP) Functions](#google-cloud-functions) +* [Cloudflare Workers](#cloudflare-workers) * [OpenWhisk](#openwhisk) ## Storage Configuration @@ -172,6 +173,82 @@ or in the JSON input configuration: } ``` +## Cloudflare Workers + +Cloudflare offers a free tier for Workers with generous limits for development and testing. To use Cloudflare Workers with SeBS, you need to create a Cloudflare account and obtain API credentials. + +### Credentials + +You can authenticate with Cloudflare using an API token (recommended) or email + API key. Additionally, you need your account ID which can be found in the Cloudflare dashboard. + +You can pass credentials using environment variables: + +```bash +# Option 1: Using API Token (recommended) +export CLOUDFLARE_API_TOKEN="your-api-token" +export CLOUDFLARE_ACCOUNT_ID="your-account-id" + +# Option 2: Using Email + API Key +export CLOUDFLARE_EMAIL="your-email@example.com" +export CLOUDFLARE_API_KEY="your-global-api-key" +export CLOUDFLARE_ACCOUNT_ID="your-account-id" +``` + +or in the JSON configuration file: + +```json +"deployment": { + "name": "cloudflare", + "cloudflare": { + "credentials": { + "api_token": "your-api-token", + "account_id": "your-account-id" + }, + "resources": { + "resources_id": "unique-resource-id" + } + } +} +``` + +**Note**: The `resources_id` is used to uniquely identify and track resources created by SeBS for a specific deployment. + +### Language Support + +Cloudflare Workers support multiple languages through different deployment methods: + +- **JavaScript/Node.js**: Supported via script-based deployment or container-based deployment using Wrangler CLI +- **Python**: Supported via script-based deployment or container-based deployment using Wrangler CLI + +### CLI Container + +SeBS uses a containerized CLI approach for Cloudflare deployments, eliminating the need to install Node.js, npm, wrangler, pywrangler, or uv on your host system. The CLI container (`sebs/manage.cloudflare`) is automatically built on first use and contains all necessary tools. This ensures consistent behavior across platforms and simplifies setup—only Docker is required. + +### Trigger Support + +- **HTTP Trigger**: ✅ Fully supported - Workers are automatically accessible at `https://{name}.{account}.workers.dev` +- **Library Trigger**: ❌ Not currently supported + +### Platform Limitations + +- **Cold Start Detection**: Cloudflare does not expose cold start information. All invocations report `is_cold: false` in the metrics. This limitation means cold start metrics are not available for Cloudflare Workers benchmarks. +- **Memory/Timeout Configuration (Workers)**: Managed by Cloudflare (128MB memory, 30s CPU time on free tier) +- **Memory/Timeout Configuration (Containers)**: Managed by Cloudflare, available in different tiers: + + | Instance Type | vCPU | Memory | Disk | + |---------------|------|--------|------| + | lite | 1/16 | 256 MiB | 2 GB | + | basic | 1/4 | 1 GiB | 4 GB | + | standard-1 | 1/2 | 4 GiB | 8 GB | + | standard-2 | 1 | 6 GiB | 12 GB | + | standard-3 | 2 | 8 GiB | 16 GB | + | standard-4 | 4 | 12 GiB | 20 GB | +- **Metrics Collection**: Uses response-based per-invocation metrics. During each function invocation, the worker handler measures performance metrics (CPU time, wall time, memory usage) and embeds them directly in the JSON response. SeBS extracts these metrics immediately from each response. When `download_metrics()` is called for postprocessing, it only aggregates the metrics that were already collected during invocations—no additional data is fetched from external services. This approach provides immediate per-invocation granularity without delays. Note that while Cloudflare does expose an Analytics Engine, it only provides aggregated metrics without individual request-level data, making it unsuitable for detailed benchmarking purposes. + +### Storage Configuration + +Cloudflare Workers integrate with Cloudflare R2 for object storage and Durable Objects for NoSQL storage. For detailed storage configuration, see the [storage documentation](storage.md#cloudflare-storage). + ## OpenWhisk SeBS expects users to deploy and configure an OpenWhisk instance. diff --git a/docs/storage.md b/docs/storage.md index 627216041..737750d10 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -108,6 +108,40 @@ Date: Mon, 30 May 2022 10:01:21 GMT ``` +## Cloudflare Storage + +Cloudflare Workers integrate with cloud-native storage services provided by Cloudflare: + +### R2 Object Storage + +Cloudflare R2 provides S3-compatible object storage for benchmarks that require persistent file storage. SeBS automatically configures R2 buckets for benchmark input and output data. + +**Key Features:** +- S3-compatible API +- No egress fees +- Global edge storage +- Integrated with Workers through bindings + +**Configuration:** +R2 configuration is handled automatically by SeBS when deploying to Cloudflare Workers. The storage resources are defined in your deployment configuration and SeBS manages bucket creation and access. + +**Limitations:** +- Geographic location hints (locationHint) are not currently supported. R2 buckets are created with Cloudflare's automatic location selection, which places data near where it's most frequently accessed. + +### Durable Objects for NoSQL + +Cloudflare Durable Objects provide stateful storage for NoSQL operations required by benchmarks like the CRUD API (130.crud-api). + +**Key Features:** +- Strongly consistent storage +- Low-latency access from Workers +- Built-in coordination primitives +- Global replication + +**Usage:** +SeBS configures Durable Objects bindings automatically when deploying container-based Workers that require NoSQL storage. The benchmark wrappers handle the interaction with Durable Objects through the standard SeBS storage interface. + + ## Lifecycle Management By default, storage containers are retained after experiments complete. This allows you to run multiple experiments without redeploying and repopulating storage. diff --git a/sebs.py b/sebs.py index 80fb11ed3..1ea6c0156 100755 --- a/sebs.py +++ b/sebs.py @@ -89,7 +89,7 @@ def common_params(func): @click.option( "--deployment", default=None, - type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]), + type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk", "cloudflare"]), help="Cloud deployment to use.", ) @click.option( @@ -155,7 +155,10 @@ def parse_common_params( update_nested_dict(config_obj, ["experiments", "update_code"], update_code) update_nested_dict(config_obj, ["experiments", "update_storage"], update_storage) update_nested_dict(config_obj, ["experiments", "architecture"], architecture) - update_nested_dict(config_obj, ["experiments", "container_deployment"], container_deployment) + # Only override container_deployment if explicitly set via CLI + # If not in config, use CLI default (False) + if container_deployment or "container_deployment" not in config_obj.get("experiments", {}): + update_nested_dict(config_obj, ["experiments", "container_deployment"], container_deployment) # set the path the configuration was loaded from update_nested_dict(config_obj, ["deployment", "local", "path"], config) diff --git a/sebs/cloudflare/__init__.py b/sebs/cloudflare/__init__.py new file mode 100644 index 000000000..5a2c557d3 --- /dev/null +++ b/sebs/cloudflare/__init__.py @@ -0,0 +1,4 @@ +from sebs.cloudflare.cloudflare import Cloudflare +from sebs.cloudflare.config import CloudflareConfig + +__all__ = ["Cloudflare", "CloudflareConfig"] diff --git a/sebs/cloudflare/cli.py b/sebs/cloudflare/cli.py new file mode 100644 index 000000000..426db5cac --- /dev/null +++ b/sebs/cloudflare/cli.py @@ -0,0 +1,249 @@ +import io +import logging +import os +import tarfile + +import docker + +from sebs.config import SeBSConfig +from sebs.utils import LoggingBase + + +class CloudflareCLI(LoggingBase): + """ + Manages a Docker container with Cloudflare Wrangler and related tools pre-installed. + + This approach isolates Cloudflare CLI tools (wrangler, pywrangler) from the host system, + avoiding global npm/uv installations and ensuring consistent behavior across platforms. + """ + + def __init__(self, system_config: SeBSConfig, docker_client: docker.client): + super().__init__() + + repo_name = system_config.docker_repository() + image_name = "manage.cloudflare" + full_image_name = repo_name + ":" + image_name + + # Try to get the image, pull if not found, build if pull fails + try: + docker_client.images.get(full_image_name) + logging.info(f"Using existing Docker image: {full_image_name}") + except docker.errors.ImageNotFound: + # Try to pull the image first + try: + logging.info(f"Pulling Docker image {full_image_name}...") + docker_client.images.pull(repo_name, image_name) + logging.info(f"Successfully pulled {full_image_name}") + except docker.errors.APIError as pull_error: + # If pull fails, try to build the image locally + logging.info(f"Pull failed: {pull_error}. Building image locally...") + + # Find the Dockerfile path + dockerfile_path = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "dockerfiles", + "cloudflare", + "Dockerfile.manage" + ) + + if not os.path.exists(dockerfile_path): + raise RuntimeError( + f"Dockerfile not found at {dockerfile_path}. " + "Cannot build Cloudflare CLI container." + ) + + # Build the image + build_path = os.path.join(os.path.dirname(__file__), "..", "..") + logging.info(f"Building {full_image_name} from {dockerfile_path}...") + + try: + image, build_logs = docker_client.images.build( + path=build_path, + dockerfile=dockerfile_path, + tag=full_image_name, + rm=True, + pull=True + ) + + # Log build output + for log in build_logs: + if 'stream' in log: + logging.debug(log['stream'].strip()) + + logging.info(f"Successfully built {full_image_name}") + except docker.errors.BuildError as build_error: + raise RuntimeError( + f"Failed to build Docker image {full_image_name}: {build_error}" + ) + + # Start the container in detached mode + self.docker_instance = docker_client.containers.run( + image=full_image_name, + command="/bin/bash", + environment={ + "CONTAINER_UID": str(os.getuid()), + "CONTAINER_GID": str(os.getgid()), + "CONTAINER_USER": "docker_user", + }, + volumes={ + # Mount Docker socket for wrangler container deployments + "/var/run/docker.sock": {"bind": "/var/run/docker.sock", "mode": "rw"} + }, + remove=True, + stdout=True, + stderr=True, + detach=True, + tty=True, + ) + + self.logging.info(f"Started Cloudflare CLI container: {self.docker_instance.id}.") + + # Wait for container to be ready + while True: + try: + dkg = self.docker_instance.logs(stream=True, follow=True) + next(dkg).decode("utf-8") + break + except StopIteration: + pass + + @staticmethod + def typename() -> str: + return "Cloudflare.CLI" + + def execute(self, cmd: str, env: dict = None): + """ + Execute the given command in Cloudflare CLI container. + Throws an exception on failure (commands are expected to execute successfully). + + Args: + cmd: Shell command to execute + env: Optional environment variables dict + + Returns: + Command output as bytes + """ + # Wrap command in sh -c to support shell features like cd, pipes, etc. + shell_cmd = ["/bin/sh", "-c", cmd] + exit_code, out = self.docker_instance.exec_run( + shell_cmd, + user="root", # Run as root since entrypoint creates docker_user but we don't wait for it + environment=env + ) + if exit_code != 0: + raise RuntimeError( + "Command {} failed at Cloudflare CLI docker!\n Output {}".format( + cmd, out.decode("utf-8") + ) + ) + return out + + def upload_package(self, directory: str, dest: str): + """ + Upload a directory to the Docker container. + + This is not an efficient and memory-intensive implementation. + So far, we didn't have very large functions that require many gigabytes. + + Since docker-py does not support a straightforward copy, and we can't + put_archive in chunks. + + Args: + directory: Local directory to upload + dest: Destination path in container + """ + handle = io.BytesIO() + with tarfile.open(fileobj=handle, mode="w:gz") as tar: + for f in os.listdir(directory): + tar.add(os.path.join(directory, f), arcname=f) + + # Move to the beginning of memory before writing + handle.seek(0) + self.execute("mkdir -p {}".format(dest)) + self.docker_instance.put_archive(path=dest, data=handle.read()) + + def check_wrangler_version(self) -> str: + """ + Check wrangler version. + + Returns: + Version string + """ + out = self.execute("wrangler --version") + return out.decode("utf-8").strip() + + def check_pywrangler_version(self) -> str: + """ + Check pywrangler version. + + Returns: + Version string + """ + out = self.execute("pywrangler --version") + return out.decode("utf-8").strip() + + def wrangler_deploy(self, package_dir: str, env: dict = None) -> str: + """ + Deploy a worker using wrangler. + + Args: + package_dir: Path to package directory in container + env: Environment variables for deployment + + Returns: + Deployment output + """ + cmd = "cd {} && wrangler deploy".format(package_dir) + out = self.execute(cmd, env=env) + return out.decode("utf-8") + + def pywrangler_deploy(self, package_dir: str, env: dict = None) -> str: + """ + Deploy a Python worker using pywrangler. + + Args: + package_dir: Path to package directory in container + env: Environment variables for deployment + + Returns: + Deployment output + """ + cmd = "cd {} && pywrangler deploy".format(package_dir) + out = self.execute(cmd, env=env) + return out.decode("utf-8") + + def npm_install(self, package_dir: str) -> str: + """ + Run npm install in a directory. + + Args: + package_dir: Path to package directory in container + + Returns: + npm output + """ + cmd = "cd {} && npm install".format(package_dir) + out = self.execute(cmd) + return out.decode("utf-8") + + def docker_build(self, package_dir: str, image_tag: str) -> str: + """ + Build a Docker image for container deployment. + + Args: + package_dir: Path to package directory in container + image_tag: Tag for the Docker image + + Returns: + Docker build output + """ + cmd = "cd {} && docker build --no-cache -t {} .".format(package_dir, image_tag) + out = self.execute(cmd) + return out.decode("utf-8") + + def shutdown(self): + """Shutdown Docker instance.""" + self.logging.info("Stopping Cloudflare CLI Docker instance") + self.docker_instance.stop() diff --git a/sebs/cloudflare/cloudflare.py b/sebs/cloudflare/cloudflare.py new file mode 100644 index 000000000..a9b9a7be4 --- /dev/null +++ b/sebs/cloudflare/cloudflare.py @@ -0,0 +1,807 @@ +import os +import uuid +import time +from datetime import datetime +from typing import cast, Dict, List, Optional, Tuple, Type + +import docker +import requests + +from sebs.cloudflare.config import CloudflareConfig +from sebs.cloudflare.function import CloudflareWorker +from sebs.cloudflare.resources import CloudflareSystemResources +from sebs.cloudflare.workers import CloudflareWorkersDeployment +from sebs.cloudflare.containers import CloudflareContainersDeployment +from sebs.benchmark import Benchmark +from sebs.cache import Cache +from sebs.config import SeBSConfig +from sebs.utils import LoggingHandlers +from sebs.faas.function import Function, ExecutionResult, Trigger, FunctionConfig +from sebs.faas.system import System +from sebs.faas.config import Resources + + +class Cloudflare(System): + """ + Cloudflare Workers serverless platform implementation. + + Cloudflare Workers run on Cloudflare's edge network, providing + low-latency serverless execution globally. + """ + + _config: CloudflareConfig + + @staticmethod + def name(): + return "cloudflare" + + @staticmethod + def typename(): + return "Cloudflare" + + @staticmethod + def function_type() -> "Type[Function]": + return CloudflareWorker + + @property + def config(self) -> CloudflareConfig: + return self._config + + def __init__( + self, + sebs_config: SeBSConfig, + config: CloudflareConfig, + cache_client: Cache, + docker_client: docker.client, + logger_handlers: LoggingHandlers, + ): + super().__init__( + sebs_config, + cache_client, + docker_client, + CloudflareSystemResources(config, cache_client, docker_client, logger_handlers), + ) + self.logging_handlers = logger_handlers + self._config = config + self._api_base_url = "https://api.cloudflare.com/client/v4" + # cached workers.dev subdomain for the account + # This is different from the account ID and is required to build + # public worker URLs like ..workers.dev + self._workers_dev_subdomain: Optional[str] = None + + # Initialize deployment handlers + self._workers_deployment = CloudflareWorkersDeployment( + self.logging, sebs_config, docker_client, self.system_resources + ) + self._containers_deployment = CloudflareContainersDeployment( + self.logging, sebs_config, docker_client, self.system_resources + ) + + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize the Cloudflare Workers platform. + + Args: + config: Additional configuration parameters + resource_prefix: Prefix for resource naming + """ + # Verify credentials are valid + self._verify_credentials() + self.initialize_resources(select_prefix=resource_prefix) + + def initialize_resources(self, select_prefix: Optional[str] = None): + """ + Initialize Cloudflare resources. + + Overrides the base class method to handle R2 storage gracefully. + Cloudflare Workers can operate without R2 storage for many benchmarks. + + Args: + select_prefix: Optional prefix for resource naming + """ + deployments = self.find_deployments() + + # Check if we have an existing deployment + if deployments: + res_id = deployments[0] + self.config.resources.resources_id = res_id + self.logging.info(f"Using existing resource deployment {res_id}") + return + + # Create new resource ID + if select_prefix is not None: + res_id = f"{select_prefix}-{str(uuid.uuid1())[0:8]}" + else: + res_id = str(uuid.uuid1())[0:8] + + self.config.resources.resources_id = res_id + self.logging.info(f"Generating unique resource name {res_id}") + + # Try to create R2 bucket, but don't fail if R2 is not enabled + try: + self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) + self.logging.info("R2 storage initialized successfully") + except Exception as e: + self.logging.warning( + f"R2 storage initialization failed: {e}. " + f"R2 must be enabled in your Cloudflare dashboard to use storage-dependent benchmarks. " + f"Continuing without R2 storage - only benchmarks that don't require storage will work." + ) + + def _verify_credentials(self): + """Verify that the Cloudflare API credentials are valid.""" + # Check if credentials are set + if not self.config.credentials.api_token and not (self.config.credentials.email and self.config.credentials.api_key): + raise RuntimeError( + "Cloudflare API credentials are not set. Please set CLOUDFLARE_API_TOKEN " + "and CLOUDFLARE_ACCOUNT_ID environment variables." + ) + + if not self.config.credentials.account_id: + raise RuntimeError( + "Cloudflare Account ID is not set. Please set CLOUDFLARE_ACCOUNT_ID " + "environment variable." + ) + + headers = self._get_auth_headers() + + # Log credential type being used (without exposing the actual token) + if self.config.credentials.api_token: + token_preview = self.config.credentials.api_token[:8] + "..." if len(self.config.credentials.api_token) > 8 else "***" + self.logging.info(f"Using API Token authentication (starts with: {token_preview})") + else: + self.logging.info(f"Using Email + API Key authentication (email: {self.config.credentials.email})") + + response = requests.get(f"{self._api_base_url}/user/tokens/verify", headers=headers) + + if response.status_code != 200: + raise RuntimeError( + f"Failed to verify Cloudflare credentials: {response.status_code} - {response.text}\n" + f"Please check that your CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID are correct." + ) + + self.logging.info("Cloudflare credentials verified successfully") + + def _get_deployment_handler(self, container_deployment: bool): + """Get the appropriate deployment handler based on deployment type. + + Args: + container_deployment: Whether this is a container deployment + + Returns: + CloudflareWorkersDeployment or CloudflareContainersDeployment + """ + if container_deployment: + return self._containers_deployment + else: + return self._workers_deployment + + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + architecture: str, + benchmark: str, + is_cached: bool, + container_deployment: bool, + ) -> Tuple[str, int, str]: + """ + Package code for Cloudflare Workers deployment using Wrangler. + + Uses Wrangler CLI to bundle dependencies and prepare for deployment. + Delegates to either CloudflareWorkersDeployment or CloudflareContainersDeployment + based on the deployment type. + + Args: + directory: Path to the code directory + language_name: Programming language name + language_version: Programming language version + architecture: Target architecture (not used for Workers) + benchmark: Benchmark name + is_cached: Whether the code is cached + container_deployment: Whether to deploy as container + + Returns: + Tuple of (package_path, package_size, container_uri) + """ + handler = self._get_deployment_handler(container_deployment) + + # Container deployment flow - build Docker image + if container_deployment: + self.logging.info(f"Building container image for {benchmark}") + return handler.package_code( + directory, language_name, language_version, architecture, benchmark + ) + + # Native worker deployment flow + return handler.package_code( + directory, language_name, language_version, benchmark, is_cached + ) + + def _get_auth_headers(self) -> Dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self.config.credentials.api_token: + return { + "Authorization": f"Bearer {self.config.credentials.api_token}", + "Content-Type": "application/json", + } + elif self.config.credentials.email and self.config.credentials.api_key: + return { + "X-Auth-Email": self.config.credentials.email, + "X-Auth-Key": self.config.credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def _generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_deployment: bool = False, + container_uri: str = "", + ) -> str: + """ + Generate wrangler.toml by delegating to the appropriate deployment handler. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_deployment: Whether this is a container deployment + container_uri: Container image URI/tag + + Returns: + Path to the generated wrangler.toml file + """ + handler = self._get_deployment_handler(container_deployment) + return handler.generate_wrangler_toml( + worker_name, package_dir, language, account_id, + benchmark_name, code_package, container_uri + ) + + def create_function( + self, + code_package: Benchmark, + func_name: str, + container_deployment: bool, + container_uri: str, + ) -> CloudflareWorker: + """ + Create a new Cloudflare Worker. + + If a worker with the same name already exists, it will be updated. + + Args: + code_package: Benchmark containing the function code + func_name: Name of the worker + container_deployment: Whether to deploy as container + container_uri: URI of container image + + Returns: + CloudflareWorker instance + """ + package = code_package.code_location + benchmark = code_package.benchmark + language = code_package.language_name + language_runtime = code_package.language_version + function_cfg = FunctionConfig.from_benchmark(code_package) + + func_name = self.format_function_name(func_name, container_deployment) + account_id = self.config.credentials.account_id + + if not account_id: + raise RuntimeError("Cloudflare account ID is required to create workers") + + # Check if worker already exists + existing_worker = self._get_worker(func_name, account_id) + + if existing_worker: + self.logging.info(f"Worker {func_name} already exists, updating it") + worker = CloudflareWorker( + func_name, + code_package.benchmark, + func_name, # script_id is the same as name + code_package.hash, + language_runtime, + function_cfg, + account_id, + ) + self.update_function(worker, code_package, container_deployment, container_uri) + worker.updated_code = True + else: + self.logging.info(f"Creating new worker {func_name}") + + # Create the worker with all package files + self._create_or_update_worker(func_name, package, account_id, language, benchmark, code_package, container_deployment, container_uri) + + worker = CloudflareWorker( + func_name, + code_package.benchmark, + func_name, + code_package.hash, + language_runtime, + function_cfg, + account_id, + ) + + # Add HTTPTrigger + from sebs.cloudflare.triggers import HTTPTrigger + + # Build worker URL using the account's workers.dev subdomain when possible. + # Falls back to account_id-based host or plain workers.dev with warnings. + worker_url = self._build_workers_dev_url(func_name, account_id) + http_trigger = HTTPTrigger(func_name, worker_url) + http_trigger.logging_handlers = self.logging_handlers + worker.add_trigger(http_trigger) + + return worker + + def _get_worker(self, worker_name: str, account_id: str) -> Optional[dict]: + """Get information about an existing worker.""" + headers = self._get_auth_headers() + url = f"{self._api_base_url}/accounts/{account_id}/workers/scripts/{worker_name}" + + response = requests.get(url, headers=headers) + + if response.status_code == 200: + try: + return response.json().get("result") + except: + return None + elif response.status_code == 404: + return None + else: + self.logging.warning(f"Unexpected response checking worker: {response.status_code}") + return None + + def _create_or_update_worker( + self, worker_name: str, package_dir: str, account_id: str, language: str, benchmark_name: Optional[str] = None, code_package: Optional[Benchmark] = None, container_deployment: bool = False, container_uri: str = "" + ) -> dict: + """Create or update a Cloudflare Worker using Wrangler CLI in container. + + Args: + worker_name: Name of the worker + package_dir: Directory containing handler and all benchmark files + account_id: Cloudflare account ID + language: Programming language (nodejs or python) + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_deployment: Whether this is a container deployment + container_uri: Container image URI/tag + + Returns: + Worker deployment result + """ + # Generate wrangler.toml for this worker + self._generate_wrangler_toml(worker_name, package_dir, language, account_id, benchmark_name, code_package, container_deployment, container_uri) + + # Set up environment for Wrangler CLI in container + env = {} + + if self.config.credentials.api_token: + env['CLOUDFLARE_API_TOKEN'] = self.config.credentials.api_token + elif self.config.credentials.email and self.config.credentials.api_key: + env['CLOUDFLARE_EMAIL'] = self.config.credentials.email + env['CLOUDFLARE_API_KEY'] = self.config.credentials.api_key + + env['CLOUDFLARE_ACCOUNT_ID'] = account_id + + # Get CLI container instance from appropriate deployment handler + handler = self._get_deployment_handler(container_deployment) + cli = handler._get_cli() + + # Upload package directory to container + container_package_path = f"/tmp/workers/{worker_name}" + self.logging.info(f"Uploading package to container: {container_package_path}") + cli.upload_package(package_dir, container_package_path) + + # Deploy using Wrangler in container + self.logging.info(f"Deploying worker {worker_name} using Wrangler in container...") + + try: + # For container deployments, always use wrangler (not pywrangler) + # For native deployments, use wrangler for nodejs, pywrangler for python + if container_deployment or language == "nodejs": + output = cli.wrangler_deploy(container_package_path, env=env) + else: # python native + output = cli.pywrangler_deploy(container_package_path, env=env) + + self.logging.info(f"Worker {worker_name} deployed successfully") + self.logging.debug(f"Wrangler deploy output: {output}") + + # The container binding needs time to propagate before first invocation + if container_deployment: + self.logging.info("Waiting for container Durable Object to initialize...") + account_id = env.get('CLOUDFLARE_ACCOUNT_ID') + worker_url = self._build_workers_dev_url(worker_name, account_id) + self._containers_deployment.wait_for_durable_object_ready( + worker_name, worker_url + ) + + # The container binding needs time to propagate before first invocation + if container_deployment: + self.logging.info("Waiting 60 seconds for container to be fully provisioned (can sometimes take a bit longer)...") + time.sleep(60) + + return {"success": True, "output": output} + + except RuntimeError as e: + error_msg = f"Wrangler deployment failed for worker {worker_name}: {str(e)}" + self.logging.error(error_msg) + raise RuntimeError(error_msg) + + def _get_workers_dev_subdomain(self, account_id: str) -> Optional[str]: + """Fetch the workers.dev subdomain for the given account. + + Cloudflare exposes an endpoint that returns the account-level workers + subdomain (the readable name used in *.workers.dev), e.g. + GET /accounts/{account_id}/workers/subdomain + + Returns the subdomain string or None on failure. + """ + if self._workers_dev_subdomain: + return self._workers_dev_subdomain + + try: + headers = self._get_auth_headers() + url = f"{self._api_base_url}/accounts/{account_id}/workers/subdomain" + resp = requests.get(url, headers=headers) + if resp.status_code == 200: + body = resp.json() + sub = None + # result may contain 'subdomain' or nested structure + if isinstance(body, dict): + sub = body.get("result", {}).get("subdomain") + + if sub: + self._workers_dev_subdomain = sub + return sub + else: + self.logging.warning( + "Could not find workers.dev subdomain in API response; " + "please enable the workers.dev subdomain in your Cloudflare dashboard." + ) + return None + else: + self.logging.warning( + f"Failed to fetch workers.dev subdomain: {resp.status_code} - {resp.text}" + ) + return None + except Exception as e: + self.logging.warning(f"Error fetching workers.dev subdomain: {e}") + return None + + def _build_workers_dev_url(self, worker_name: str, account_id: Optional[str]) -> str: + """Build a best-effort public URL for a worker. + + Prefer using the account's readable workers.dev subdomain when available + (e.g. ..workers.dev). If we can't obtain that, fall + back to using the account_id as a last resort and log a warning. + """ + if account_id: + sub = self._get_workers_dev_subdomain(account_id) + if sub: + return f"https://{worker_name}.{sub}.workers.dev" + else: + # fallback: some code historically used account_id in the host + self.logging.warning( + "Using account ID in workers.dev URL as a fallback. " + "Enable the workers.dev subdomain in Cloudflare for proper URLs." + ) + return f"https://{worker_name}.{account_id}.workers.dev" + # Last fallback: plain workers.dev (may not resolve without a subdomain) + self.logging.warning( + "No account ID available; using https://{name}.workers.dev which may not be reachable." + ) + return f"https://{worker_name}.workers.dev" + + def cached_function(self, function: Function): + """ + Handle a function retrieved from cache. + + Refreshes triggers and logging handlers. + + Args: + function: The cached function + """ + from sebs.cloudflare.triggers import HTTPTrigger + + for trigger in function.triggers(Trigger.TriggerType.HTTP): + trigger.logging_handlers = self.logging_handlers + + def update_function( + self, + function: Function, + code_package: Benchmark, + container_deployment: bool, + container_uri: str, + ): + """ + Update an existing Cloudflare Worker. + + Args: + function: Existing function instance to update + code_package: New benchmark containing the function code + container_deployment: Whether to deploy as container + container_uri: URI of container image + """ + worker = cast(CloudflareWorker, function) + package = code_package.code_location + language = code_package.language_name + benchmark = code_package.benchmark + + # Update the worker with all package files + account_id = worker.account_id or self.config.credentials.account_id + if not account_id: + raise RuntimeError("Account ID is required to update worker") + + # For container deployments, skip redeployment if code hasn't changed + # Containers don't support runtime memory configuration changes + # Detect container deployment by checking if worker name starts with "container-" + is_container = worker.name.startswith("container-") + + if is_container: + self.logging.info(f"Skipping redeployment for container worker {worker.name} - containers don't support runtime memory updates") + else: + self._create_or_update_worker(worker.name, package, account_id, language, benchmark, code_package, container_deployment, container_uri) + self.logging.info(f"Updated worker {worker.name}") + + # Update configuration if needed (no-op for containers since they don't support runtime memory changes) + self.update_function_configuration(worker, code_package) + + def update_function_configuration( + self, cached_function: Function, benchmark: Benchmark + ): + """ + Update the configuration of a Cloudflare Worker. + + Note: Cloudflare Workers have limited configuration options compared + to traditional FaaS platforms. Memory and timeout are managed by Cloudflare. + + Args: + cached_function: The function to update + benchmark: The benchmark with new configuration + """ + # Cloudflare Workers have fixed resource limits: + # - CPU time: 50ms (free), 50ms-30s (paid) + # - Memory: 128MB + # Most configuration is handled via wrangler.toml or API settings + + worker = cast(CloudflareWorker, cached_function) + + # For environment variables or KV namespaces, we would use the API here + # For now, we'll just log that configuration update was requested + self.logging.info( + f"Configuration update requested for worker {worker.name}. " + "Note: Cloudflare Workers have limited runtime configuration options." + ) + + def default_function_name(self, code_package: Benchmark, resources=None) -> str: + """ + Generate a default function name for Cloudflare Workers. + + Args: + code_package: The benchmark package + resources: Optional resources (not used) + + Returns: + Default function name + """ + # Cloudflare Worker names must be lowercase and can contain hyphens + return ( + f"{code_package.benchmark}-{code_package.language_name}-" + f"{code_package.language_version.replace('.', '')}" + ).lower() + + @staticmethod + def format_function_name(name: str, container_deployment: bool = False) -> str: + """ + Format a function name to comply with Cloudflare Worker naming rules. + + Worker names must: + - Be lowercase + - Contain only alphanumeric characters and hyphens + - Not start or end with a hyphen + - Not start with a digit + + Args: + name: The original name + container_deployment: Whether this is a container worker (adds 'w-' prefix if name starts with digit) + + Returns: + Formatted name + """ + # Convert to lowercase and replace invalid characters + formatted = name.lower().replace('_', '-').replace('.', '-') + # Remove any characters that aren't alphanumeric or hyphen + formatted = ''.join(c for c in formatted if c.isalnum() or c == '-') + # Remove leading/trailing hyphens + formatted = formatted.strip('-') + # Ensure container worker names don't start with a digit (Cloudflare requirement) + # Only add prefix for container workers to differentiate from native workers + if container_deployment and formatted and formatted[0].isdigit(): + formatted = 'container-' + formatted + return formatted + + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Enforce cold start for Cloudflare Workers. + + Note: Cloudflare Workers don't have a traditional cold start mechanism + like AWS Lambda. Workers are instantiated on-demand at edge locations. + We can't force a cold start, but we can update the worker to invalidate caches. + + Args: + functions: List of functions to enforce cold start on + code_package: The benchmark package + """ + raise NotImplementedError( + "Cloudflare Workers do not support forced cold starts. " + "Workers are automatically instantiated on-demand at edge locations." + ) + + + def download_metrics( + self, + function_name: str, + start_time: int, + end_time: int, + requests: Dict[str, ExecutionResult], + metrics: dict, + ): + """ + Extract per-invocation metrics from ExecutionResult objects. + + The metrics are extracted from the 'measurement' field in the benchmark + response, which is populated by the Cloudflare Worker handler during execution. + This approach avoids dependency on Analytics Engine and provides immediate, + accurate metrics for each invocation. + + Args: + function_name: Name of the worker + start_time: Start time (Unix timestamp in seconds) - not used + end_time: End time (Unix timestamp in seconds) - not used + requests: Dict mapping request_id -> ExecutionResult + metrics: Dict to store aggregated metrics + """ + if not requests: + self.logging.warning("No requests to extract metrics from") + return + + self.logging.info( + f"Extracting metrics from {len(requests)} invocations " + f"of worker {function_name}" + ) + + # Aggregate statistics from all requests + total_invocations = len(requests) + cold_starts = 0 + warm_starts = 0 + cpu_times = [] + wall_times = [] + memory_values = [] + + for request_id, result in requests.items(): + # Count cold/warm starts + if result.stats.cold_start: + cold_starts += 1 + else: + warm_starts += 1 + + # Collect CPU times + if result.provider_times.execution > 0: + cpu_times.append(result.provider_times.execution) + + # Collect wall times (benchmark times) + if result.times.benchmark > 0: + wall_times.append(result.times.benchmark) + + # Collect memory usage + if result.stats.memory_used is not None and result.stats.memory_used > 0: + memory_values.append(result.stats.memory_used) + + # Set billing info for Cloudflare Workers + # Cloudflare billing: $0.50 per million requests + + # $12.50 per million GB-seconds of CPU time + if result.provider_times.execution > 0: + result.billing.memory = 128 # Cloudflare Workers: fixed 128MB + result.billing.billed_time = result.provider_times.execution # μs + + # GB-seconds calculation: (128MB / 1024MB/GB) * (cpu_time_us / 1000000 us/s) + cpu_time_seconds = result.provider_times.execution / 1_000_000.0 + gb_seconds = (128.0 / 1024.0) * cpu_time_seconds + result.billing.gb_seconds = int(gb_seconds * 1_000_000) # micro GB-seconds + + # Calculate statistics + metrics['cloudflare'] = { + 'total_invocations': total_invocations, + 'cold_starts': cold_starts, + 'warm_starts': warm_starts, + 'data_source': 'response_measurements', + 'note': 'Per-invocation metrics extracted from benchmark response' + } + + if cpu_times: + metrics['cloudflare']['avg_cpu_time_us'] = sum(cpu_times) // len(cpu_times) + metrics['cloudflare']['min_cpu_time_us'] = min(cpu_times) + metrics['cloudflare']['max_cpu_time_us'] = max(cpu_times) + metrics['cloudflare']['cpu_time_measurements'] = len(cpu_times) + + if wall_times: + metrics['cloudflare']['avg_wall_time_us'] = sum(wall_times) // len(wall_times) + metrics['cloudflare']['min_wall_time_us'] = min(wall_times) + metrics['cloudflare']['max_wall_time_us'] = max(wall_times) + metrics['cloudflare']['wall_time_measurements'] = len(wall_times) + + if memory_values: + metrics['cloudflare']['avg_memory_mb'] = sum(memory_values) / len(memory_values) + metrics['cloudflare']['min_memory_mb'] = min(memory_values) + metrics['cloudflare']['max_memory_mb'] = max(memory_values) + metrics['cloudflare']['memory_measurements'] = len(memory_values) + + self.logging.info( + f"Extracted metrics from {total_invocations} invocations: " + f"{cold_starts} cold starts, {warm_starts} warm starts" + ) + + if cpu_times: + avg_cpu_ms = sum(cpu_times) / len(cpu_times) / 1000.0 + self.logging.info(f"Average CPU time: {avg_cpu_ms:.2f} ms") + + if wall_times: + avg_wall_ms = sum(wall_times) / len(wall_times) / 1000.0 + self.logging.info(f"Average wall time: {avg_wall_ms:.2f} ms") + + def create_trigger( + self, function: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: + """ + Create a trigger for a Cloudflare Worker. + + Args: + function: The function to create a trigger for + trigger_type: Type of trigger to create + + Returns: + The created trigger + """ + from sebs.cloudflare.triggers import HTTPTrigger + + worker = cast(CloudflareWorker, function) + + if trigger_type == Trigger.TriggerType.HTTP: + account_id = worker.account_id or self.config.credentials.account_id + worker_url = self._build_workers_dev_url(worker.name, account_id) + trigger = HTTPTrigger(worker.name, worker_url) + trigger.logging_handlers = self.logging_handlers + return trigger + else: + raise NotImplementedError( + f"Trigger type {trigger_type} is not supported for Cloudflare Workers" + ) + + def shutdown(self) -> None: + """ + Shutdown the Cloudflare system. + + Saves configuration to cache and shuts down deployment handler CLI containers. + """ + try: + self.cache_client.lock() + self.config.update_cache(self.cache_client) + finally: + self.cache_client.unlock() + + # Shutdown deployment handler CLI containers + self._workers_deployment.shutdown() + self._containers_deployment.shutdown() diff --git a/sebs/cloudflare/config.py b/sebs/cloudflare/config.py new file mode 100644 index 000000000..b75c52ad8 --- /dev/null +++ b/sebs/cloudflare/config.py @@ -0,0 +1,260 @@ +import os +from typing import Optional, cast + +from sebs.cache import Cache +from sebs.faas.config import Config, Credentials, Resources +from sebs.utils import LoggingHandlers + + +class CloudflareCredentials(Credentials): + """ + Cloudflare API credentials. + + Requires: + - API token or email + global API key + - Account ID + - Optional: R2 S3-compatible credentials for file uploads + """ + + def __init__(self, api_token: Optional[str] = None, email: Optional[str] = None, + api_key: Optional[str] = None, account_id: Optional[str] = None, + r2_access_key_id: Optional[str] = None, r2_secret_access_key: Optional[str] = None): + super().__init__() + + self._api_token = api_token + self._email = email + self._api_key = api_key + self._account_id = account_id + self._r2_access_key_id = r2_access_key_id + self._r2_secret_access_key = r2_secret_access_key + + @staticmethod + def typename() -> str: + return "Cloudflare.Credentials" + + @property + def api_token(self) -> Optional[str]: + return self._api_token + + @property + def email(self) -> Optional[str]: + return self._email + + @property + def api_key(self) -> Optional[str]: + return self._api_key + + @property + def account_id(self) -> Optional[str]: + return self._account_id + + @property + def r2_access_key_id(self) -> Optional[str]: + return self._r2_access_key_id + + @property + def r2_secret_access_key(self) -> Optional[str]: + return self._r2_secret_access_key + + @staticmethod + def initialize(dct: dict) -> "CloudflareCredentials": + return CloudflareCredentials( + dct.get("api_token"), + dct.get("email"), + dct.get("api_key"), + dct.get("account_id"), + dct.get("r2_access_key_id"), + dct.get("r2_secret_access_key") + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + cached_config = cache.get_config("cloudflare") + ret: CloudflareCredentials + account_id: Optional[str] = None + + # Load cached values + if cached_config and "credentials" in cached_config: + account_id = cached_config["credentials"].get("account_id") + + # Check for new config + if "credentials" in config: + ret = CloudflareCredentials.initialize(config["credentials"]) + elif "CLOUDFLARE_API_TOKEN" in os.environ: + ret = CloudflareCredentials( + api_token=os.environ["CLOUDFLARE_API_TOKEN"], + account_id=os.environ.get("CLOUDFLARE_ACCOUNT_ID"), + r2_access_key_id=os.environ.get("CLOUDFLARE_R2_ACCESS_KEY_ID"), + r2_secret_access_key=os.environ.get("CLOUDFLARE_R2_SECRET_ACCESS_KEY") + ) + elif "CLOUDFLARE_EMAIL" in os.environ and "CLOUDFLARE_API_KEY" in os.environ: + ret = CloudflareCredentials( + email=os.environ["CLOUDFLARE_EMAIL"], + api_key=os.environ["CLOUDFLARE_API_KEY"], + account_id=os.environ.get("CLOUDFLARE_ACCOUNT_ID"), + r2_access_key_id=os.environ.get("CLOUDFLARE_R2_ACCESS_KEY_ID"), + r2_secret_access_key=os.environ.get("CLOUDFLARE_R2_SECRET_ACCESS_KEY") + ) + else: + raise RuntimeError( + "Cloudflare login credentials are missing! Please set " + "up environmental variables CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID, " + "or CLOUDFLARE_EMAIL, CLOUDFLARE_API_KEY, and CLOUDFLARE_ACCOUNT_ID" + ) + + if account_id is not None and ret.account_id is not None and account_id != ret.account_id: + ret.logging.error( + f"The account id {ret.account_id} from provided credentials is different " + f"from the account id {account_id} found in the cache! Please change " + "your cache directory or create a new one!" + ) + raise RuntimeError( + f"Cloudflare login credentials do not match the account {account_id} in cache!" + ) + + ret.logging_handlers = handlers + return ret + + def update_cache(self, cache: Cache): + if self._account_id: + cache.update_config(val=self._account_id, + keys=["cloudflare", "credentials", "account_id"]) + + def serialize(self) -> dict: + out = {} + if self._account_id: + out["account_id"] = self._account_id + return out + + +class CloudflareResources(Resources): + """ + Resources for Cloudflare Workers deployment. + """ + + def __init__(self): + super().__init__(name="cloudflare") + self._namespace_id: Optional[str] = None + + @staticmethod + def typename() -> str: + return "Cloudflare.Resources" + + @property + def namespace_id(self) -> Optional[str]: + return self._namespace_id + + @namespace_id.setter + def namespace_id(self, value: str): + self._namespace_id = value + + @staticmethod + def initialize(res: Resources, dct: dict): + ret = cast(CloudflareResources, res) + super(CloudflareResources, CloudflareResources).initialize(ret, dct) + + if "namespace_id" in dct: + ret._namespace_id = dct["namespace_id"] + + return ret + + def serialize(self) -> dict: + out = {**super().serialize()} + if self._namespace_id: + out["namespace_id"] = self._namespace_id + return out + + def update_cache(self, cache: Cache): + super().update_cache(cache) + if self._namespace_id: + cache.update_config( + val=self._namespace_id, + keys=["cloudflare", "resources", "namespace_id"] + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + ret = CloudflareResources() + cached_config = cache.get_config("cloudflare") + + # Load cached values + if cached_config and "resources" in cached_config: + CloudflareResources.initialize(ret, cached_config["resources"]) + ret.logging_handlers = handlers + ret.logging.info("Using cached resources for Cloudflare") + else: + # Check for new config + if "resources" in config: + CloudflareResources.initialize(ret, config["resources"]) + ret.logging_handlers = handlers + ret.logging.info("No cached resources for Cloudflare found, using user configuration.") + else: + CloudflareResources.initialize(ret, {}) + ret.logging_handlers = handlers + ret.logging.info("No resources for Cloudflare found, initialize!") + + return ret + + +class CloudflareConfig(Config): + """ + Configuration for Cloudflare Workers platform. + """ + + def __init__(self, credentials: CloudflareCredentials, resources: CloudflareResources): + super().__init__(name="cloudflare") + self._credentials = credentials + self._resources = resources + + @staticmethod + def typename() -> str: + return "Cloudflare.Config" + + @property + def credentials(self) -> CloudflareCredentials: + return self._credentials + + @property + def resources(self) -> CloudflareResources: + return self._resources + + @staticmethod + def initialize(cfg: Config, dct: dict): + config = cast(CloudflareConfig, cfg) + # Cloudflare Workers are globally distributed, no region needed + config._region = dct.get("region", "global") + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + cached_config = cache.get_config("cloudflare") + credentials = cast(CloudflareCredentials, + CloudflareCredentials.deserialize(config, cache, handlers)) + resources = cast(CloudflareResources, + CloudflareResources.deserialize(config, cache, handlers)) + config_obj = CloudflareConfig(credentials, resources) + config_obj.logging_handlers = handlers + + # Load cached values + if cached_config: + config_obj.logging.info("Using cached config for Cloudflare") + CloudflareConfig.initialize(config_obj, cached_config) + else: + config_obj.logging.info("Using user-provided config for Cloudflare") + CloudflareConfig.initialize(config_obj, config) + + resources.region = config_obj.region + return config_obj + + def update_cache(self, cache: Cache): + cache.update_config(val=self.region, keys=["cloudflare", "region"]) + self.credentials.update_cache(cache) + self.resources.update_cache(cache) + + def serialize(self) -> dict: + out = { + "name": "cloudflare", + "region": self._region, + "credentials": self._credentials.serialize(), + "resources": self._resources.serialize(), + } + return out diff --git a/sebs/cloudflare/containers.py b/sebs/cloudflare/containers.py new file mode 100644 index 000000000..5b1fd9fd4 --- /dev/null +++ b/sebs/cloudflare/containers.py @@ -0,0 +1,534 @@ +""" +Cloudflare Container Workers deployment implementation. + +Handles packaging, Docker image building, and deployment of containerized +Cloudflare Workers using @cloudflare/containers. +""" + +import os +import shutil +import json +import io +import re +import time +import tarfile +try: + import tomllib # Python 3.11+ +except ImportError: + import tomli as tomllib # Fallback for older Python +try: + import tomli_w +except ImportError: + # Fallback to basic TOML writing if tomli_w not available + import toml as tomli_w +from typing import Optional, Tuple + +import docker +import requests + +from sebs.benchmark import Benchmark +from sebs.cloudflare.cli import CloudflareCLI + + +class CloudflareContainersDeployment: + """Handles Cloudflare container worker deployment operations.""" + + def __init__(self, logging, system_config, docker_client, system_resources): + """ + Initialize CloudflareContainersDeployment. + + Args: + logging: Logger instance + system_config: System configuration + docker_client: Docker client instance + system_resources: System resources manager + """ + self.logging = logging + self.system_config = system_config + self.docker_client = docker_client + self.system_resources = system_resources + self._cli: Optional[CloudflareCLI] = None + + def _get_cli(self) -> CloudflareCLI: + """Get or initialize the Cloudflare CLI container.""" + if self._cli is None: + self._cli = CloudflareCLI(self.system_config, self.docker_client) + # Verify wrangler is available + version = self._cli.check_wrangler_version() + self.logging.info(f"Cloudflare CLI container ready: {version}") + return self._cli + + def generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_uri: str = "", + ) -> str: + """ + Generate a wrangler.toml configuration file for container workers. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + container_uri: Container image URI/tag + + Returns: + Path to the generated wrangler.toml file + """ + # Load template + template_path = os.path.join( + os.path.dirname(__file__), + "../..", + "templates", + "wrangler-container.toml" + ) + with open(template_path, 'rb') as f: + config = tomllib.load(f) + + # Update basic configuration + config['name'] = worker_name + config['account_id'] = account_id + + # Update container configuration with instance type if needed + if benchmark_name and ("411.image-recognition" in benchmark_name or + "311.compression" in benchmark_name or + "504.dna-visualisation" in benchmark_name): + self.logging.warning("Using standard-4 instance type for high resource benchmark") + config['containers'][0]['instance_type'] = "standard-4" + + # Add nosql table bindings if benchmark uses them + if code_package and code_package.uses_nosql: + # Get registered nosql tables for this benchmark + nosql_storage = self.system_resources.get_nosql_storage() + if nosql_storage.retrieve_cache(benchmark_name): + nosql_tables = nosql_storage._tables.get(benchmark_name, {}) + + # Add durable object bindings for each nosql table + for table_name in nosql_tables.keys(): + config['durable_objects']['bindings'].append({ + 'name': table_name, + 'class_name': 'KVApiObject' + }) + + # Update migrations to include KVApiObject + config['migrations'][0]['new_sqlite_classes'].append('KVApiObject') + + # Add environment variables + if benchmark_name or (code_package and code_package.uses_nosql): + config['vars'] = {} + if benchmark_name: + config['vars']['BENCHMARK_NAME'] = benchmark_name + if code_package and code_package.uses_nosql: + config['vars']['NOSQL_STORAGE_DATABASE'] = "durable_objects" + + # Add R2 bucket binding + try: + from sebs.faas.config import Resources + storage = self.system_resources.get_storage() + bucket_name = storage.get_bucket(Resources.StorageBucketType.BENCHMARKS) + if bucket_name: + config['r2_buckets'] = [{ + 'binding': 'R2', + 'bucket_name': bucket_name + }] + self.logging.info(f"R2 bucket '{bucket_name}' will be bound to worker as 'R2'") + except Exception as e: + self.logging.warning( + f"R2 bucket binding not configured: {e}. " + f"Benchmarks requiring file access will not work properly." + ) + + # Write wrangler.toml to package directory + toml_path = os.path.join(package_dir, "wrangler.toml") + try: + # Try tomli_w (writes binary) + with open(toml_path, 'wb') as f: + tomli_w.dump(config, f) + except TypeError: + # Fallback to toml library (writes text) + with open(toml_path, 'w') as f: + f.write(tomli_w.dumps(config)) + + self.logging.info(f"Generated wrangler.toml at {toml_path}") + return toml_path + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + architecture: str, + benchmark: str, + ) -> Tuple[str, int, str]: + """ + Package code for Cloudflare container worker deployment. + + Builds a Docker image and returns the image tag for deployment. + + Args: + directory: Path to the code directory + language_name: Programming language name + language_version: Programming language version + architecture: Target architecture + benchmark: Benchmark name + + Returns: + Tuple of (package_path, package_size, container_uri) + """ + self.logging.info(f"Packaging container for {language_name} {language_version}") + + # Get wrapper directory for container files + wrapper_base = os.path.join( + os.path.dirname(__file__), "..", "..", "benchmarks", "wrappers", "cloudflare" + ) + wrapper_container_dir = os.path.join(wrapper_base, language_name, "container") + + if not os.path.exists(wrapper_container_dir): + raise RuntimeError( + f"Container wrapper directory not found: {wrapper_container_dir}" + ) + + # Copy container wrapper files to the package directory + # Copy Dockerfile from dockerfiles/cloudflare/{language}/ + dockerfile_src = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "dockerfiles", + "cloudflare", + language_name, + "Dockerfile" + ) + dockerfile_dest = os.path.join(directory, "Dockerfile") + if os.path.exists(dockerfile_src): + # Read Dockerfile and update BASE_IMAGE based on language version + with open(dockerfile_src, 'r') as f: + dockerfile_content = f.read() + + # Get base image from systems.json for container deployments + container_images = self.system_config.benchmark_container_images( + "cloudflare", language_name, architecture + ) + base_image = container_images.get(language_version) + if not base_image: + raise RuntimeError( + f"No container base image found in systems.json for {language_name} {language_version} on {architecture}" + ) + + # Replace BASE_IMAGE default value in ARG line + dockerfile_content = re.sub( + r'ARG BASE_IMAGE=.*', + f'ARG BASE_IMAGE={base_image}', + dockerfile_content + ) + + # Write modified Dockerfile + with open(dockerfile_dest, 'w') as f: + f.write(dockerfile_content) + + self.logging.info(f"Copied Dockerfile from {dockerfile_src}") + else: + raise RuntimeError(f"Dockerfile not found at {dockerfile_src}") + + # Copy handler and utility files from wrapper/container + # Note: ALL containers use worker.js for orchestration (@cloudflare/containers is Node.js only) + # The handler inside the container can be Python or Node.js + container_files = ["handler.py" if language_name == "python" else "handler.js"] + + # For worker.js orchestration file, always use the nodejs version + nodejs_wrapper_dir = os.path.join(wrapper_base, "nodejs", "container") + worker_js_src = os.path.join(nodejs_wrapper_dir, "worker.js") + worker_js_dest = os.path.join(directory, "worker.js") + if os.path.exists(worker_js_src): + shutil.copy2(worker_js_src, worker_js_dest) + self.logging.info(f"Copied worker.js orchestration file from nodejs/container") + + # Copy storage and nosql utilities from language-specific wrapper + if language_name == "nodejs": + container_files.extend(["storage.js", "nosql.js"]) + else: + container_files.extend(["storage.py", "nosql.py"]) + + for file in container_files: + src = os.path.join(wrapper_container_dir, file) + dest = os.path.join(directory, file) + if os.path.exists(src): + shutil.copy2(src, dest) + self.logging.info(f"Copied container file: {file}") + + # Check if benchmark has init.sh and copy it (needed for some benchmarks like video-processing) + # Look in both the benchmark root and the language-specific directory + from sebs.utils import find_benchmark + benchmark_path = find_benchmark(benchmark, "benchmarks") + if benchmark_path: + paths = [ + benchmark_path, + os.path.join(benchmark_path, language_name), + ] + for path in paths: + init_sh = os.path.join(path, "init.sh") + if os.path.exists(init_sh): + shutil.copy2(init_sh, os.path.join(directory, "init.sh")) + self.logging.info(f"Copied init.sh from {path}") + break + + # For Python containers, fix relative imports in benchmark code + # Containers use flat structure, so "from . import storage" must become "import storage" + if language_name == "python": + for item in os.listdir(directory): + if item.endswith('.py') and item not in ['handler.py', 'storage.py', 'nosql.py', 'worker.py']: + file_path = os.path.join(directory, item) + with open(file_path, 'r') as f: + content = f.read() + # Fix relative imports + content = re.sub(r'from \. import ', 'import ', content) + with open(file_path, 'w') as f: + f.write(content) + + # For Node.js containers, transform benchmark code to be async-compatible + # The container wrapper uses async HTTP calls, but benchmarks expect sync + elif language_name == "nodejs": + for item in os.listdir(directory): + if item.endswith('.js') and item not in ['handler.js', 'storage.js', 'nosql.js', 'worker.js', 'build.js', 'request-polyfill.js']: + file_path = os.path.join(directory, item) + # Could add transformations here if needed + pass + + # Prepare package.json for container orchestration + # ALL containers need @cloudflare/containers for worker.js orchestration + worker_package_json = { + "name": f"{benchmark}-worker", + "version": "1.0.0", + "dependencies": { + "@cloudflare/containers": "*" + } + } + + if language_name == "nodejs": + # Read the benchmark's package.json if it exists and merge dependencies + benchmark_package_file = os.path.join(directory, "package.json") + if os.path.exists(benchmark_package_file): + with open(benchmark_package_file, 'r') as f: + benchmark_package = json.load(f) + # Merge dependencies + if "dependencies" in benchmark_package: + worker_package_json["dependencies"].update(benchmark_package["dependencies"]) + + # Write the combined package.json + with open(benchmark_package_file, 'w') as f: + json.dump(worker_package_json, f, indent=2) + else: # Python containers also need package.json for worker.js orchestration + # Create package.json just for @cloudflare/containers (Python code in container) + package_json_path = os.path.join(directory, "package.json") + with open(package_json_path, 'w') as f: + json.dump(worker_package_json, f, indent=2) + + # Install Node.js dependencies for wrangler deployment + # Note: These are needed for wrangler to bundle worker.js, not for the container + # The container also installs them during Docker build + self.logging.info(f"Installing Node.js dependencies for wrangler deployment in {directory}") + cli = self._get_cli() + container_path = f"/tmp/container_npm/{os.path.basename(directory)}" + + try: + # Upload package directory to CLI container + cli.upload_package(directory, container_path) + + # Install production dependencies + output = cli.execute(f"cd {container_path} && npm install --production") + self.logging.info("npm install completed successfully") + self.logging.debug(f"npm output: {output.decode('utf-8')}") + + # Download node_modules back to host for wrangler + bits, stat = cli.docker_instance.get_archive(f"{container_path}/node_modules") + file_obj = io.BytesIO() + for chunk in bits: + file_obj.write(chunk) + file_obj.seek(0) + with tarfile.open(fileobj=file_obj) as tar: + tar.extractall(directory) + + self.logging.info(f"Downloaded node_modules to {directory} for wrangler deployment") + except Exception as e: + self.logging.error(f"npm install failed: {e}") + raise RuntimeError(f"Failed to install Node.js dependencies: {e}") + + # For Python containers, also handle Python requirements + if language_name == "python": + # Python requirements will be installed in the Dockerfile + # Rename version-specific requirements.txt to requirements.txt + requirements_file = os.path.join(directory, "requirements.txt") + versioned_requirements = os.path.join(directory, f"requirements.txt.{language_version}") + + if os.path.exists(versioned_requirements): + shutil.copy2(versioned_requirements, requirements_file) + self.logging.info(f"Copied requirements.txt.{language_version} to requirements.txt") + + # Fix torch wheel URLs for container compatibility + # Replace direct wheel URLs with proper torch installation + with open(requirements_file, 'r') as f: + content = f.read() + + # Replace torch wheel URLs with proper installation commands + modified = False + if 'download.pytorch.org/whl' in content: + # Replace direct wheel URL with pip-installable torch + content = re.sub( + r'https://download\.pytorch\.org/whl/[^\s]+\.whl', + 'torch', + content + ) + modified = True + + if modified: + with open(requirements_file, 'w') as f: + f.write(content) + self.logging.info("Fixed torch URLs in requirements.txt for container compatibility") + + elif not os.path.exists(requirements_file): + # Create empty requirements.txt if none exists + with open(requirements_file, 'w') as f: + f.write("") + self.logging.info("Created empty requirements.txt") + + # Build Docker image locally for cache compatibility + # wrangler will re-build/push during deployment from the Dockerfile + image_tag = self._build_container_image_local(directory, benchmark, language_name, language_version) + + # Calculate package size (approximate, as it's a source directory) + total_size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + self.logging.info(f"Container package prepared with local image: {image_tag}") + + # Return local image tag (wrangler will rebuild from Dockerfile during deploy) + return (directory, total_size, image_tag) + + def _build_container_image_local( + self, + directory: str, + benchmark: str, + language_name: str, + language_version: str, + ) -> str: + """ + Build a Docker image locally for cache purposes. + wrangler will rebuild from Dockerfile during deployment. + + Returns the local image tag. + """ + # Generate image tag + image_name = f"{benchmark.replace('.', '-')}-{language_name}-{language_version.replace('.', '')}" + image_tag = f"{image_name}:latest" + + self.logging.info(f"Building local container image: {image_tag}") + + try: + # Build the Docker image using docker-py + # nocache=True ensures handler changes are picked up + _, build_logs = self.docker_client.images.build( + path=directory, + tag=image_tag, + nocache=True, + rm=True + ) + + # Log build output + for log in build_logs: + if 'stream' in log: + self.logging.debug(log['stream'].strip()) + elif 'error' in log: + self.logging.error(log['error']) + + self.logging.info(f"Local container image built: {image_tag}") + + return image_tag + + except docker.errors.BuildError as e: + error_msg = f"Docker build failed for {image_tag}: {e}" + self.logging.error(error_msg) + raise RuntimeError(error_msg) + except Exception as e: + error_msg = f"Unexpected error building Docker image {image_tag}: {e}" + self.logging.error(error_msg) + raise RuntimeError(error_msg) + + def wait_for_durable_object_ready( + self, + worker_name: str, + worker_url: str, + max_wait_seconds: int = 400 + ) -> bool: + """ + Wait for container Durable Object to be fully provisioned and ready. + + Args: + worker_name: Name of the worker + worker_url: URL of the worker + max_wait_seconds: Maximum time to wait in seconds + + Returns: + True if ready, False if timeout + """ + wait_interval = 10 + start_time = time.time() + + self.logging.info("Checking container Durable Object readiness via health endpoint...") + + consecutive_failures = 0 + max_consecutive_failures = 5 + + while time.time() - start_time < max_wait_seconds: + try: + # Use health check endpoint + response = requests.get( + f"{worker_url}/health", + timeout=60 + ) + + # 200 = ready + if response.status_code == 200: + self.logging.info("Container Durable Object is ready!") + return True + # 503 = not ready yet + elif response.status_code == 503: + elapsed = int(time.time() - start_time) + self.logging.info( + f"Container Durable Object not ready yet (503 Service Unavailable)... " + f"({elapsed}s elapsed, will retry)" + ) + # Other errors + else: + self.logging.warning(f"Unexpected status {response.status_code}: {response.text[:100]}") + + except requests.exceptions.Timeout: + elapsed = int(time.time() - start_time) + self.logging.info(f"Health check timeout (container may be starting)... ({elapsed}s elapsed)") + except requests.exceptions.RequestException as e: + elapsed = int(time.time() - start_time) + self.logging.debug(f"Connection error ({elapsed}s): {str(e)[:100]}") + + time.sleep(wait_interval) + + self.logging.warning( + f"Container Durable Object may not be fully ready after {max_wait_seconds}s. " + "First invocation may still experience initialization delay." + ) + return False + + def shutdown(self): + """Shutdown CLI container if initialized.""" + if self._cli is not None: + self._cli.shutdown() + self._cli = None diff --git a/sebs/cloudflare/durable_objects.py b/sebs/cloudflare/durable_objects.py new file mode 100644 index 000000000..258886cf3 --- /dev/null +++ b/sebs/cloudflare/durable_objects.py @@ -0,0 +1,229 @@ +import json +from collections import defaultdict +from typing import Dict, Optional, Tuple + +from sebs.cloudflare.config import CloudflareCredentials +from sebs.faas.nosql import NoSQLStorage +from sebs.faas.config import Resources +from sebs.cache import Cache + + +class DurableObjects(NoSQLStorage): + """ + Cloudflare Durable Objects implementation for NoSQL storage. + + Note: Durable Objects are not managed via API like DynamoDB or CosmosDB. + Instead, they are defined in the Worker code and wrangler.toml, and accessed + via bindings in the Worker environment. This implementation provides a minimal + interface to satisfy SeBS requirements by tracking table names without actual + API-based table creation. + """ + + @staticmethod + def typename() -> str: + return "Cloudflare.DurableObjects" + + @staticmethod + def deployment_name() -> str: + return "cloudflare" + + def __init__( + self, + region: str, + cache_client: Cache, + resources: Resources, + credentials: CloudflareCredentials, + ): + super().__init__(region, cache_client, resources) + self._credentials = credentials + # Tables are just logical names - Durable Objects are accessed via Worker bindings + self._tables: Dict[str, Dict[str, str]] = defaultdict(dict) + + def _get_auth_headers(self) -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self._credentials.api_token: + return { + "Authorization": f"Bearer {self._credentials.api_token}", + "Content-Type": "application/json", + } + elif self._credentials.email and self._credentials.api_key: + return { + "X-Auth-Email": self._credentials.email, + "X-Auth-Key": self._credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get all tables for a benchmark. + + :param benchmark: benchmark name + :return: dictionary mapping table names to their IDs + """ + return self._tables[benchmark] + + def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the full table name for a benchmark table. + + :param benchmark: benchmark name + :param table: table name + :return: full table name or None if not found + """ + if benchmark not in self._tables: + return None + + if table not in self._tables[benchmark]: + return None + + return self._tables[benchmark][table] + + def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve cached table information. + + :param benchmark: benchmark name + :return: True if cache was found and loaded + """ + if benchmark in self._tables: + return True + + cached_storage = self.cache_client.get_nosql_config(self.deployment_name(), benchmark) + if cached_storage is not None: + self._tables[benchmark] = cached_storage["tables"] + self.logging.info(f"Retrieved cached Durable Objects tables for {benchmark}") + return True + + return False + + def update_cache(self, benchmark: str): + """ + Update cache with current table information. + + :param benchmark: benchmark name + """ + self.cache_client.update_nosql( + self.deployment_name(), + benchmark, + { + "tables": self._tables[benchmark], + }, + ) + self.logging.info(f"Updated cache for Durable Objects tables for {benchmark}") + + def create_table( + self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + ) -> str: + """ + Register a table name for a benchmark. + + Note: Durable Objects don't have traditional table creation via API. + They are defined in the Worker code and wrangler.toml, and accessed via + bindings. This method just tracks the logical table name for the wrapper + to use when accessing the Durable Object binding. + + :param benchmark: benchmark name + :param name: table name + :param primary_key: primary key field name + :param secondary_key: optional secondary key field name + :return: table name (same as input name - used directly as binding name) + """ + # For Cloudflare, table names are used directly as the binding names + # in the wrapper code, so we just use the simple name + self._tables[benchmark][name] = name + + self.logging.info( + f"Registered Durable Object table '{name}' for benchmark {benchmark}" + ) + + return name + + def write_to_table( + self, + benchmark: str, + table: str, + data: dict, + primary_key: Tuple[str, str], + secondary_key: Optional[Tuple[str, str]] = None, + ): + """ + Write data to a table (Durable Object). + + Note: Cloudflare Durable Objects can only be written to from within the Worker, + not via external API calls. Data seeding for benchmarks is not supported. + Benchmarks that require pre-populated data (like test/small sizes of crud-api) + will return empty results. Use 'large' size which creates its own data. + + :param benchmark: benchmark name + :param table: table name + :param data: data to write + :param primary_key: primary key (field_name, value) + :param secondary_key: optional secondary key (field_name, value) + """ + table_name = self._get_table_name(benchmark, table) + + if not table_name: + raise ValueError(f"Table {table} not found for benchmark {benchmark}") + + # Silently skip data seeding for Cloudflare Durable Objects + # This is a platform limitation + pass + + def clear_table(self, name: str) -> str: + """ + Clear all data from a table. + + Note: Durable Object data is managed within the Worker. + + :param name: table name + :return: table name + """ + self.logging.warning(f"Durable Objects data is managed within the Worker") + return name + + def remove_table(self, name: str) -> str: + """ + Remove a table from tracking. + + :param name: table name + :return: table name + """ + # Remove from internal tracking - two-step approach to avoid mutation during iteration + benchmark_to_modify = None + table_key_to_delete = None + + # Step 1: Find the benchmark and table_key without deleting + for benchmark, tables in list(self._tables.items()): + if name in tables.values(): + # Find the table key + for table_key, table_name in list(tables.items()): + if table_name == name: + benchmark_to_modify = benchmark + table_key_to_delete = table_key + break + break + + # Step 2: Perform deletion after iteration + if benchmark_to_modify is not None and table_key_to_delete is not None: + del self._tables[benchmark_to_modify][table_key_to_delete] + + self.logging.info(f"Removed Durable Objects table {name} from tracking") + return name + + def envs(self) -> dict: + """ + Get environment variables for accessing Durable Objects. + + Durable Objects are accessed via bindings in the Worker environment, + which are configured in wrangler.toml. We set a marker environment + variable so the wrapper knows Durable Objects are available. + + :return: dictionary of environment variables + """ + # Set a marker that Durable Objects are enabled + # The actual bindings (DURABLE_STORE, etc.) are configured in wrangler.toml + return { + "NOSQL_STORAGE_DATABASE": "durable_objects" + } diff --git a/sebs/cloudflare/function.py b/sebs/cloudflare/function.py new file mode 100644 index 000000000..cd422dc30 --- /dev/null +++ b/sebs/cloudflare/function.py @@ -0,0 +1,61 @@ +from typing import Optional, cast + +from sebs.faas.function import Function, FunctionConfig + + +class CloudflareWorker(Function): + """ + Cloudflare Workers function implementation. + + A Cloudflare Worker is a serverless function that runs on Cloudflare's edge network. + """ + + def __init__( + self, + name: str, + benchmark: str, + script_id: str, + code_package_hash: str, + runtime: str, + cfg: FunctionConfig, + account_id: Optional[str] = None, + ): + super().__init__(benchmark, name, code_package_hash, cfg) + self.script_id = script_id + self.runtime = runtime + self.account_id = account_id + + @staticmethod + def typename() -> str: + return "Cloudflare.Worker" + + def serialize(self) -> dict: + return { + **super().serialize(), + "script_id": self.script_id, + "runtime": self.runtime, + "account_id": self.account_id, + } + + @staticmethod + def deserialize(cached_config: dict) -> "CloudflareWorker": + from sebs.faas.function import Trigger + from sebs.cloudflare.triggers import HTTPTrigger + + cfg = FunctionConfig.deserialize(cached_config["config"]) + ret = CloudflareWorker( + cached_config["name"], + cached_config["benchmark"], + cached_config["script_id"], + cached_config["hash"], + cached_config["runtime"], + cfg, + cached_config.get("account_id"), + ) + + for trigger in cached_config["triggers"]: + trigger_type = HTTPTrigger if trigger["type"] == HTTPTrigger.typename() else None + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + + return ret diff --git a/sebs/cloudflare/r2.py b/sebs/cloudflare/r2.py new file mode 100644 index 000000000..45a1167c6 --- /dev/null +++ b/sebs/cloudflare/r2.py @@ -0,0 +1,387 @@ +import json +import os + +import requests +from sebs.cloudflare.config import CloudflareCredentials +from sebs.faas.storage import PersistentStorage +from sebs.faas.config import Resources +from sebs.cache import Cache + +from typing import List, Optional +class R2(PersistentStorage): + @staticmethod + def typename() -> str: + return "Cloudlfare.R2" + + @staticmethod + def deployment_name() -> str: + return "cloudflare" + + @property + def replace_existing(self) -> bool: + return self._replace_existing + + @replace_existing.setter + def replace_existing(self, val: bool): + self._replace_existing = val + + def __init__( + self, + region: str, + cache_client: Cache, + resources: Resources, + replace_existing: bool, + credentials: CloudflareCredentials, + ): + super().__init__(region, cache_client, resources, replace_existing) + self._credentials = credentials + self._s3_client = None + + def _get_auth_headers(self) -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self._credentials.api_token: + return { + "Authorization": f"Bearer {self._credentials.api_token}", + "Content-Type": "application/json", + } + elif self._credentials.email and self._credentials.api_key: + return { + "X-Auth-Email": self._credentials.email, + "X-Auth-Key": self._credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def _get_s3_client(self): + """ + Get or initialize the S3-compatible client for R2 operations. + + :return: boto3 S3 client or None if credentials not available + """ + if self._s3_client is not None: + return self._s3_client + + # Check if we have S3-compatible credentials + if not self._credentials.r2_access_key_id or not self._credentials.r2_secret_access_key: + self.logging.warning( + "R2 S3-compatible API credentials not configured. " + "Set CLOUDFLARE_R2_ACCESS_KEY_ID and CLOUDFLARE_R2_SECRET_ACCESS_KEY environment variables." + ) + return None + + try: + import boto3 + from botocore.config import Config + + account_id = self._credentials.account_id + + self._s3_client = boto3.client( + 's3', + endpoint_url=f'https://{account_id}.r2.cloudflarestorage.com', + aws_access_key_id=self._credentials.r2_access_key_id, + aws_secret_access_key=self._credentials.r2_secret_access_key, + config=Config(signature_version='s3v4'), + region_name='auto' + ) + + return self._s3_client + + except ImportError: + self.logging.warning( + "boto3 not available. Install with: pip install boto3" + ) + return None + + def correct_name(self, name: str) -> str: + return name + + def _create_bucket( + self, name: str, buckets: list[str] = [], randomize_name: bool = False + ) -> str: + for bucket_name in buckets: + if name in bucket_name: + self.logging.info( + "Bucket {} for {} already exists, skipping.".format( + bucket_name, name + ) + ) + return bucket_name + + account_id = self._credentials.account_id + + create_bucket_uri = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets" + ) + + # R2 API only accepts "name" parameter - locationHint is optional and must be one of: + # "apac", "eeur", "enam", "weur", "wnam" + # WARNING: locationHint is not currently supported by SeBS. Buckets are created + # with Cloudflare's automatic location selection. + params = {"name": name} + + self.logging.warning( + f"Creating R2 bucket '{name}' without locationHint. " + "Geographic location is determined automatically by Cloudflare." + ) + + try: + create_bucket_response = requests.post( + create_bucket_uri, json=params, headers=self._get_auth_headers() + ) + + # Log the response for debugging + if create_bucket_response.status_code >= 400: + try: + error_data = create_bucket_response.json() + self.logging.error( + f"R2 bucket creation failed. Status: {create_bucket_response.status_code}, " + f"Response: {error_data}" + ) + except: + self.logging.error( + f"R2 bucket creation failed. Status: {create_bucket_response.status_code}, " + f"Response: {create_bucket_response.text}" + ) + + create_bucket_response.raise_for_status() + + bucket_info_json = create_bucket_response.json() + + if not bucket_info_json.get("success"): + self.logging.error(f"Failed to create R2 bucket: {bucket_info_json.get('errors')}") + raise RuntimeError(f"Failed to create R2 bucket {name}") + + bucket_name = bucket_info_json.get("result", {}).get("name", name) + self.logging.info(f"Created R2 bucket {bucket_name}") + return bucket_name + + except requests.exceptions.RequestException as e: + self.logging.error(f"Error creating R2 bucket {name}: {e}") + raise + + def download(self, bucket_name: str, key: str, filepath: str) -> None: + """ + Download a file from a bucket. + + :param bucket_name: + :param key: storage source filepath + :param filepath: local destination filepath + """ + # R2 requires S3-compatible access for object operations + # For now, this is not fully implemented + self.logging.warning(f"download not fully implemented for R2 bucket {bucket_name}") + pass + + def upload(self, bucket_name: str, filepath: str, key: str): + """ + Upload a file to R2 bucket using the S3-compatible API. + + Requires S3 credentials to be configured for the R2 bucket. + + :param bucket_name: R2 bucket name + :param filepath: local source filepath + :param key: R2 destination key/path + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning(f"Cannot upload {filepath} to R2 - S3 client not available") + return + + try: + with open(filepath, 'rb') as f: + s3_client.put_object( + Bucket=bucket_name, + Key=key, + Body=f + ) + + self.logging.debug(f"Uploaded {filepath} to R2 bucket {bucket_name} as {key}") + + except Exception as e: + self.logging.warning(f"Failed to upload {filepath} to R2: {e}") + + def upload_bytes(self, bucket_name: str, key: str, data: bytes): + """ + Upload bytes directly to R2 bucket using the S3-compatible API. + + :param bucket_name: R2 bucket name + :param key: R2 destination key/path + :param data: bytes to upload + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning(f"Cannot upload bytes to R2 - S3 client not available") + return + + try: + s3_client.put_object( + Bucket=bucket_name, + Key=key, + Body=data + ) + + self.logging.debug(f"Uploaded {len(data)} bytes to R2 bucket {bucket_name} as {key}") + + except Exception as e: + self.logging.warning(f"Failed to upload bytes to R2: {e}") + + """ + Retrieves list of files in a bucket. + + :param bucket_name: + :return: list of files in a given bucket + """ + + def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + Retrieves list of files in a bucket using S3-compatible API. + + :param bucket_name: + :param prefix: optional prefix filter + :return: list of files in a given bucket + """ + s3_client = self._get_s3_client() + if s3_client is None: + self.logging.warning(f"Cannot list R2 bucket {bucket_name} - S3 client not available") + return [] + + try: + # List objects with optional prefix + paginator = s3_client.get_paginator('list_objects_v2') + page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) + + files = [] + for page in page_iterator: + if 'Contents' in page: + for obj in page['Contents']: + files.append(obj['Key']) + + return files + + except Exception as e: + self.logging.warning(f"Failed to list R2 bucket {bucket_name}: {str(e)}") + return [] + + def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all R2 buckets in the account. + + :param bucket_name: optional filter (not used for R2) + :return: list of bucket names + """ + account_id = self._credentials.account_id + + list_buckets_uri = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets" + ) + + try: + response = requests.get(list_buckets_uri, headers=self._get_auth_headers()) + + # Log detailed error information + if response.status_code == 403: + try: + error_data = response.json() + self.logging.error( + f"403 Forbidden accessing R2 buckets. " + f"Response: {error_data}. " + f"Your API token may need 'R2 Read and Write' permissions." + ) + except: + self.logging.error( + f"403 Forbidden accessing R2 buckets. " + f"Your API token may need 'R2 Read and Write' permissions." + ) + return [] + + response.raise_for_status() + + data = response.json() + + if not data.get("success"): + self.logging.error(f"Failed to list R2 buckets: {data.get('errors')}") + return [] + + # Extract bucket names from response + buckets = data.get("result", {}).get("buckets", []) + bucket_names = [bucket["name"] for bucket in buckets] + + self.logging.info(f"Found {len(bucket_names)} R2 buckets") + return bucket_names + + except requests.exceptions.RequestException as e: + self.logging.error(f"Error listing R2 buckets: {e}") + return [] + + def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a bucket exists. + + :param bucket_name: + :return: True if bucket exists + """ + buckets = self.list_buckets() + return bucket_name in buckets + + def clean_bucket(self, bucket_name: str): + """ + Remove all objects from a bucket. + + :param bucket_name: + """ + self.logging.warning(f"clean_bucket not fully implemented for R2 bucket {bucket_name}") + pass + + def remove_bucket(self, bucket: str): + """ + Delete a bucket. + + :param bucket: + """ + account_id = self._credentials.account_id + + delete_bucket_uri = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/r2/buckets/{bucket}" + ) + + try: + response = requests.delete(delete_bucket_uri, headers=self._get_auth_headers()) + response.raise_for_status() + + data = response.json() + + if data.get("success"): + self.logging.info(f"Successfully deleted R2 bucket {bucket}") + else: + self.logging.error(f"Failed to delete R2 bucket {bucket}: {data.get('errors')}") + + except requests.exceptions.RequestException as e: + self.logging.error(f"Error deleting R2 bucket {bucket}: {e}") + + def uploader_func(self, bucket_idx: int, file: str, filepath: str) -> None: + """ + Upload a file to a bucket (used for parallel uploads). + + :param bucket_idx: index of the bucket/prefix to upload to + :param file: destination file name/key + :param filepath: source file path + """ + # Skip upload when using cached buckets and not updating storage + if self.cached and not self.replace_existing: + return + + # Build the key with the input prefix + key = os.path.join(self.input_prefixes[bucket_idx], file) + + bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) + + # Check if file already exists (if not replacing existing files) + if not self.replace_existing: + for f in self.input_prefixes_files[bucket_idx]: + if key == f: + self.logging.info(f"Skipping upload of {filepath} to {bucket_name} (already exists)") + return + + # Upload the file + self.upload(bucket_name, filepath, key) diff --git a/sebs/cloudflare/resources.py b/sebs/cloudflare/resources.py new file mode 100644 index 000000000..1b3d9dbc7 --- /dev/null +++ b/sebs/cloudflare/resources.py @@ -0,0 +1,95 @@ +import docker + +from typing import Optional + +from sebs.cache import Cache +from sebs.cloudflare.config import CloudflareConfig +from sebs.cloudflare.r2 import R2 +from sebs.cloudflare.durable_objects import DurableObjects +from sebs.faas.resources import SystemResources +from sebs.faas.storage import PersistentStorage +from sebs.faas.nosql import NoSQLStorage +from sebs.utils import LoggingHandlers +import json + + +class CloudflareSystemResources(SystemResources): + """ + System resources for Cloudflare Workers. + + Cloudflare Workers have a different resource model compared to + traditional cloud platforms. This class handles Cloudflare-specific + resources like KV namespaces and R2 storage. + """ + + def __init__( + self, + config: CloudflareConfig, + cache_client: Cache, + docker_client: docker.client, + logging_handlers: LoggingHandlers, + ): + super().__init__(config, cache_client, docker_client) + self._config = config + self.logging_handlers = logging_handlers + + @property + def config(self) -> CloudflareConfig: + return self._config + + def _get_auth_headers(self) -> dict[str, str]: + """Get authentication headers for Cloudflare API requests.""" + if self._config.credentials.api_token: + return { + "Authorization": f"Bearer {self._config.credentials.api_token}", + "Content-Type": "application/json", + } + elif self._config.credentials.email and self._config.credentials.api_key: + return { + "X-Auth-Email": self._config.credentials.email, + "X-Auth-Key": self._config.credentials.api_key, + "Content-Type": "application/json", + } + else: + raise RuntimeError("Invalid Cloudflare credentials configuration") + + def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """ + Get Cloudflare R2 storage instance. + + R2 is Cloudflare's S3-compatible object storage service. + This method will create a client for managing benchmark input/output data. + + Args: + replace_existing: Whether to replace existing files in storage + + Returns: + R2 storage instance + """ + if replace_existing is None: + replace_existing = False + + return R2( + region=self._config.region, + cache_client=self._cache_client, + resources=self._config.resources, + replace_existing=replace_existing, + credentials=self._config.credentials, + ) + + def get_nosql_storage(self) -> NoSQLStorage: + """ + Get Cloudflare Durable Objects storage instance. + + Durable Objects provide stateful storage for Workers. + Note: This is a minimal implementation to satisfy SeBS requirements. + + Returns: + DurableObjects storage instance + """ + return DurableObjects( + region=self._config.region, + cache_client=self._cache_client, + resources=self._config.resources, + credentials=self._config.credentials, + ) diff --git a/sebs/cloudflare/triggers.py b/sebs/cloudflare/triggers.py new file mode 100644 index 000000000..cecd0338f --- /dev/null +++ b/sebs/cloudflare/triggers.py @@ -0,0 +1,101 @@ +from typing import Optional +import concurrent.futures + +from sebs.faas.function import Trigger, ExecutionResult + + +class HTTPTrigger(Trigger): + """ + HTTP trigger for Cloudflare Workers. + Workers are automatically accessible via HTTPS endpoints. + """ + + def __init__(self, worker_name: str, url: Optional[str] = None): + super().__init__() + self.worker_name = worker_name + self._url = url + + @staticmethod + def typename() -> str: + return "Cloudflare.HTTPTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + @property + def url(self) -> str: + assert self._url is not None, "HTTP trigger URL has not been set" + return self._url + + @url.setter + def url(self, url: str): + self._url = url + + def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke a Cloudflare Worker via HTTP. + + Args: + payload: The payload to send to the worker + + Returns: + ExecutionResult with performance metrics extracted from the response + """ + self.logging.debug(f"Invoke function {self.url}") + result = self._http_invoke(payload, self.url) + + # Extract measurement data from the response if available + if result.output and 'result' in result.output: + result_data = result.output['result'] + if isinstance(result_data, dict) and 'measurement' in result_data: + measurement = result_data['measurement'] + + # Extract timing metrics if provided by the benchmark + if isinstance(measurement, dict): + # CPU time in microseconds + if 'cpu_time_us' in measurement: + result.provider_times.execution = measurement['cpu_time_us'] + elif 'cpu_time_ms' in measurement: + result.provider_times.execution = int(measurement['cpu_time_ms'] * 1000) + + # Wall time in microseconds + if 'wall_time_us' in measurement: + result.times.benchmark = measurement['wall_time_us'] + elif 'wall_time_ms' in measurement: + result.times.benchmark = int(measurement['wall_time_ms'] * 1000) + + # Cold/warm start detection + if 'is_cold' in measurement: + result.stats.cold_start = measurement['is_cold'] + + # Memory usage if available + if 'memory_used_mb' in measurement: + result.stats.memory_used = measurement['memory_used_mb'] + + # Store the full measurement for later analysis + result.output['measurement'] = measurement + + self.logging.debug(f"Extracted measurements: {measurement}") + + return result + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke a Cloudflare Worker via HTTP. + """ + pool = concurrent.futures.ThreadPoolExecutor() + fut = pool.submit(self.sync_invoke, payload) + return fut + + def serialize(self) -> dict: + return { + "type": self.typename(), + "worker_name": self.worker_name, + "url": self._url, + } + + @staticmethod + def deserialize(obj: dict) -> "HTTPTrigger": + trigger = HTTPTrigger(obj["worker_name"], obj.get("url")) + return trigger diff --git a/sebs/cloudflare/workers.py b/sebs/cloudflare/workers.py new file mode 100644 index 000000000..f78f0aad1 --- /dev/null +++ b/sebs/cloudflare/workers.py @@ -0,0 +1,370 @@ +""" +Cloudflare Workers native deployment implementation. + +Handles packaging, deployment, and management of native Cloudflare Workers +(non-container deployments using JavaScript/Python runtime). +""" + +import os +import shutil +import json +import io +import tarfile +try: + import tomllib # Python 3.11+ +except ImportError: + import tomli as tomllib # Fallback for older Python +try: + import tomli_w +except ImportError: + # Fallback to basic TOML writing if tomli_w not available + import toml as tomli_w +from typing import Optional, Tuple + +from sebs.benchmark import Benchmark +from sebs.cloudflare.cli import CloudflareCLI + + +class CloudflareWorkersDeployment: + """Handles native Cloudflare Workers deployment operations.""" + + def __init__(self, logging, system_config, docker_client, system_resources): + """ + Initialize CloudflareWorkersDeployment. + + Args: + logging: Logger instance + system_config: System configuration + docker_client: Docker client instance + system_resources: System resources manager + """ + self.logging = logging + self.system_config = system_config + self.docker_client = docker_client + self.system_resources = system_resources + self._cli: Optional[CloudflareCLI] = None + + def _get_cli(self) -> CloudflareCLI: + """Get or initialize the Cloudflare CLI container.""" + if self._cli is None: + self._cli = CloudflareCLI(self.system_config, self.docker_client) + # Verify wrangler is available + version = self._cli.check_wrangler_version() + self.logging.info(f"Cloudflare CLI container ready: {version}") + return self._cli + + def generate_wrangler_toml( + self, + worker_name: str, + package_dir: str, + language: str, + account_id: str, + benchmark_name: Optional[str] = None, + code_package: Optional[Benchmark] = None, + container_uri: str = "", + ) -> str: + """ + Generate a wrangler.toml configuration file for native workers. + + Args: + worker_name: Name of the worker + package_dir: Directory containing the worker code + language: Programming language (nodejs or python) + account_id: Cloudflare account ID + benchmark_name: Optional benchmark name for R2 file path prefix + code_package: Optional benchmark package for nosql configuration + + Returns: + Path to the generated wrangler.toml file + """ + # Load template + template_path = os.path.join( + os.path.dirname(__file__), + "../..", + "templates", + "wrangler-worker.toml" + ) + with open(template_path, 'rb') as f: + config = tomllib.load(f) + + # Update basic configuration + config['name'] = worker_name + config['main'] = "dist/handler.js" if language == "nodejs" else "handler.py" + config['account_id'] = account_id + + # Add language-specific configuration + if language == "nodejs": + config['compatibility_flags'] = ["nodejs_compat"] + config['no_bundle'] = True + config['build'] = {'command': 'node build.js'} + config['rules'] = [ + { + 'type': 'ESModule', + 'globs': ['**/*.js'], + 'fallthrough': True + }, + { + 'type': 'Text', + 'globs': ['**/*.html'], + 'fallthrough': True + } + ] + elif language == "python": + config['compatibility_flags'] = ["python_workers"] + + # Add environment variables + if benchmark_name or (code_package and code_package.uses_nosql): + config['vars'] = {} + if benchmark_name: + config['vars']['BENCHMARK_NAME'] = benchmark_name + if code_package and code_package.uses_nosql: + config['vars']['NOSQL_STORAGE_DATABASE'] = "durable_objects" + + # Add R2 bucket binding + try: + from sebs.faas.config import Resources + storage = self.system_resources.get_storage() + bucket_name = storage.get_bucket(Resources.StorageBucketType.BENCHMARKS) + if bucket_name: + config['r2_buckets'] = [{ + 'binding': 'R2', + 'bucket_name': bucket_name + }] + self.logging.info(f"R2 bucket '{bucket_name}' will be bound to worker as 'R2'") + except Exception as e: + self.logging.warning( + f"R2 bucket binding not configured: {e}. " + f"Benchmarks requiring file access will not work properly." + ) + + # Write wrangler.toml to package directory + toml_path = os.path.join(package_dir, "wrangler.toml") + try: + # Try tomli_w (writes binary) + with open(toml_path, 'wb') as f: + tomli_w.dump(config, f) + except TypeError: + # Fallback to toml library (writes text) + with open(toml_path, 'w') as f: + f.write(tomli_w.dumps(config)) + + self.logging.info(f"Generated wrangler.toml at {toml_path}") + return toml_path + + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + benchmark: str, + is_cached: bool, + ) -> Tuple[str, int, str]: + """ + Package code for native Cloudflare Workers deployment. + + Args: + directory: Path to the code directory + language_name: Programming language name + language_version: Programming language version + benchmark: Benchmark name + is_cached: Whether the code is cached + + Returns: + Tuple of (package_path, package_size, container_uri) + """ + # Install dependencies + if language_name == "nodejs": + package_file = os.path.join(directory, "package.json") + node_modules = os.path.join(directory, "node_modules") + + # Only install if package.json exists and node_modules doesn't + if os.path.exists(package_file) and not os.path.exists(node_modules): + self.logging.info(f"Installing Node.js dependencies in {directory}") + # Use CLI container for npm install - no Node.js/npm needed on host + cli = self._get_cli() + container_path = f"/tmp/npm_install/{os.path.basename(directory)}" + + try: + # Upload package directory to container + cli.upload_package(directory, container_path) + + # Install production dependencies + self.logging.info("Installing npm dependencies in container...") + output = cli.npm_install(container_path) + self.logging.info("npm install completed successfully") + self.logging.debug(f"npm output: {output}") + + # Install esbuild as a dev dependency (needed by build.js) + self.logging.info("Installing esbuild for custom build script...") + cli.execute(f"cd {container_path} && npm install --save-dev esbuild") + self.logging.info("esbuild installed successfully") + + # Download node_modules back to host + bits, stat = cli.docker_instance.get_archive(f"{container_path}/node_modules") + file_obj = io.BytesIO() + for chunk in bits: + file_obj.write(chunk) + file_obj.seek(0) + with tarfile.open(fileobj=file_obj) as tar: + tar.extractall(directory) + + self.logging.info(f"Downloaded node_modules to {directory}") + + except Exception as e: + self.logging.error(f"npm install in container failed: {e}") + raise RuntimeError(f"Failed to install Node.js dependencies: {e}") + elif os.path.exists(node_modules): + self.logging.info(f"Node.js dependencies already installed in {directory}") + + # Ensure esbuild is available even for cached installations + esbuild_path = os.path.join(node_modules, "esbuild") + if not os.path.exists(esbuild_path): + self.logging.info("Installing esbuild for custom build script...") + cli = self._get_cli() + container_path = f"/tmp/npm_install/{os.path.basename(directory)}" + + try: + cli.upload_package(directory, container_path) + cli.execute(f"cd {container_path} && npm install --save-dev esbuild") + + # Download node_modules back to host + bits, stat = cli.docker_instance.get_archive(f"{container_path}/node_modules") + file_obj = io.BytesIO() + for chunk in bits: + file_obj.write(chunk) + file_obj.seek(0) + with tarfile.open(fileobj=file_obj) as tar: + tar.extractall(directory) + + self.logging.info("esbuild installed successfully") + except Exception as e: + self.logging.error(f"Failed to install esbuild: {e}") + raise RuntimeError(f"Failed to install esbuild: {e}") + + elif language_name == "python": + requirements_file = os.path.join(directory, "requirements.txt") + if os.path.exists(f"{requirements_file}.{language_version}"): + src = f"{requirements_file}.{language_version}" + dest = requirements_file + shutil.move(src, dest) + self.logging.info(f"move {src} to {dest}") + + # move function_cloudflare.py into function.py + function_cloudflare_file = os.path.join(directory, "function_cloudflare.py") + if os.path.exists(function_cloudflare_file): + src = function_cloudflare_file + dest = os.path.join(directory, "function.py") + shutil.move(src, dest) + self.logging.info(f"move {src} to {dest}") + + if os.path.exists(requirements_file): + with open(requirements_file, 'r') as reqf: + reqtext = reqf.read() + supported_pkg = \ +['affine', 'aiohappyeyeballs', 'aiohttp', 'aiosignal', 'altair', 'annotated-types',\ +'anyio', 'apsw', 'argon2-cffi', 'argon2-cffi-bindings', 'asciitree', 'astropy', 'astropy_iers_data',\ +'asttokens', 'async-timeout', 'atomicwrites', 'attrs', 'audioop-lts', 'autograd', 'awkward-cpp', 'b2d',\ +'bcrypt', 'beautifulsoup4', 'bilby.cython', 'biopython', 'bitarray', 'bitstring', 'bleach', 'blosc2', 'bokeh',\ +'boost-histogram', 'brotli', 'cachetools', 'casadi', 'cbor-diag', 'certifi', 'cffi', 'cffi_example', 'cftime',\ +'charset-normalizer', 'clarabel', 'click', 'cligj', 'clingo', 'cloudpickle', 'cmyt', 'cobs', 'colorspacious',\ +'contourpy', 'coolprop', 'coverage', 'cramjam', 'crc32c', 'cryptography', 'css-inline', 'cssselect', 'cvxpy-base', 'cycler',\ +'cysignals', 'cytoolz', 'decorator', 'demes', 'deprecation', 'diskcache', 'distlib', 'distro', 'docutils', 'donfig',\ +'ewah_bool_utils', 'exceptiongroup', 'executing', 'fastapi', 'fastcan', 'fastparquet', 'fiona', 'fonttools', 'freesasa',\ +'frozenlist', 'fsspec', 'future', 'galpy', 'gmpy2', 'gsw', 'h11', 'h3', 'h5py', 'highspy', 'html5lib', 'httpcore',\ +'httpx', 'idna', 'igraph', 'imageio', 'imgui-bundle', 'iminuit', 'iniconfig', 'inspice', 'ipython', 'jedi', 'Jinja2',\ +'jiter', 'joblib', 'jsonpatch', 'jsonpointer', 'jsonschema', 'jsonschema_specifications', 'kiwisolver',\ +'lakers-python', 'lazy_loader', 'lazy-object-proxy', 'libcst', 'lightgbm', 'logbook', 'lxml', 'lz4', 'MarkupSafe',\ +'matplotlib', 'matplotlib-inline', 'memory-allocator', 'micropip', 'mmh3', 'more-itertools', 'mpmath',\ +'msgpack', 'msgspec', 'msprime', 'multidict', 'munch', 'mypy', 'narwhals', 'ndindex', 'netcdf4', 'networkx',\ +'newick', 'nh3', 'nlopt', 'nltk', 'numcodecs', 'numpy', 'openai', 'opencv-python', 'optlang', 'orjson',\ +'packaging', 'pandas', 'parso', 'patsy', 'pcodec', 'peewee', 'pi-heif', 'Pillow', 'pillow-heif', 'pkgconfig',\ +'platformdirs', 'pluggy', 'ply', 'pplpy', 'primecountpy', 'prompt_toolkit', 'propcache', 'protobuf', 'pure-eval',\ +'py', 'pyclipper', 'pycparser', 'pycryptodome', 'pydantic', 'pydantic_core', 'pyerfa', 'pygame-ce', 'Pygments',\ +'pyheif', 'pyiceberg', 'pyinstrument', 'pylimer-tools', 'PyMuPDF', 'pynacl', 'pyodide-http', 'pyodide-unix-timezones',\ +'pyparsing', 'pyrsistent', 'pysam', 'pyshp', 'pytaglib', 'pytest', 'pytest-asyncio', 'pytest-benchmark', 'pytest_httpx',\ +'python-calamine', 'python-dateutil', 'python-flint', 'python-magic', 'python-sat', 'python-solvespace', 'pytz', 'pywavelets',\ +'pyxel', 'pyxirr', 'pyyaml', 'rasterio', 'rateslib', 'rebound', 'reboundx', 'referencing', 'regex', 'requests',\ +'retrying', 'rich', 'river', 'RobotRaconteur', 'rpds-py', 'ruamel.yaml', 'rustworkx', 'scikit-image', 'scikit-learn',\ +'scipy', 'screed', 'setuptools', 'shapely', 'simplejson', 'sisl', 'six', 'smart-open', 'sniffio', 'sortedcontainers',\ +'soundfile', 'soupsieve', 'sourmash', 'soxr', 'sparseqr', 'sqlalchemy', 'stack-data', 'starlette', 'statsmodels', 'strictyaml',\ +'svgwrite', 'swiglpk', 'sympy', 'tblib', 'termcolor', 'texttable', 'texture2ddecoder', 'threadpoolctl', 'tiktoken', 'tomli',\ +'tomli-w', 'toolz', 'tqdm', 'traitlets', 'traits', 'tree-sitter', 'tree-sitter-go', 'tree-sitter-java', 'tree-sitter-python',\ +'tskit', 'typing-extensions', 'tzdata', 'ujson', 'uncertainties', 'unyt', 'urllib3', 'vega-datasets', 'vrplib', 'wcwidth',\ +'webencodings', 'wordcloud', 'wrapt', 'xarray', 'xgboost', 'xlrd', 'xxhash', 'xyzservices', 'yarl', 'yt', 'zengl', 'zfpy', 'zstandard'] + needed_pkg = [] + for pkg in supported_pkg: + if pkg.lower() in reqtext.lower(): + needed_pkg.append(pkg) + + project_file = os.path.join(directory, "pyproject.toml") + depstr = str(needed_pkg).replace("\'", "\"") + with open(project_file, 'w') as pf: + pf.write(f""" +[project] +name = "{benchmark.replace(".", "-")}-python-{language_version.replace(".", "")}" +version = "0.1.0" +description = "dummy description" +requires-python = ">={language_version}" +dependencies = {depstr} + +[dependency-groups] +dev = [ + "workers-py", + "workers-runtime-sdk" +] + """) + # move into function dir + funcdir = os.path.join(directory, "function") + if not os.path.exists(funcdir): + os.makedirs(funcdir) + + dont_move = ["handler.py", "function", "python_modules", "pyproject.toml"] + for thing in os.listdir(directory): + if thing not in dont_move: + src = os.path.join(directory, thing) + dest = os.path.join(directory, "function", thing) + shutil.move(src, dest) + + # Create package structure + CONFIG_FILES = { + "nodejs": ["handler.js", "package.json", "node_modules"], + "python": ["handler.py", "requirements.txt", "python_modules"], + } + + if language_name not in CONFIG_FILES: + raise NotImplementedError( + f"Language {language_name} is not yet supported for Cloudflare Workers" + ) + + # Verify the handler exists + handler_file = "handler.js" if language_name == "nodejs" else "handler.py" + package_path = os.path.join(directory, handler_file) + + if not os.path.exists(package_path): + if not os.path.exists(directory): + raise RuntimeError( + f"Package directory {directory} does not exist. " + "The benchmark build process may have failed to create the deployment package." + ) + raise RuntimeError( + f"Handler file {handler_file} not found in {directory}. " + f"Available files: {', '.join(os.listdir(directory)) if os.path.exists(directory) else 'none'}" + ) + + # Calculate total size of the package directory + total_size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + total_size += os.path.getsize(filepath) + + mbytes = total_size / 1024.0 / 1024.0 + self.logging.info(f"Worker package size: {mbytes:.2f} MB (Python: missing vendored modules)") + + return (directory, total_size, "") + + def shutdown(self): + """Shutdown CLI container if initialized.""" + if self._cli is not None: + self._cli.shutdown() + self._cli = None diff --git a/sebs/config.py b/sebs/config.py index c3030ea03..259d9abbc 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -64,6 +64,14 @@ def benchmark_base_images( architecture ] + def benchmark_container_images( + self, deployment_name: str, language_name: str, architecture: str + ) -> Dict[str, str]: + """Get container base images for container deployments.""" + return self._system_config[deployment_name]["languages"][language_name].get( + "container_images", {} + ).get(architecture, {}) + def version(self) -> str: return self._system_config["general"].get("SeBS_version", "unknown") diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 26aea9f29..9bc2b49e5 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -65,7 +65,7 @@ def deserialize(config: dict) -> "Config": cfg._update_code = config["update_code"] cfg._update_storage = config["update_storage"] cfg._download_results = config["download_results"] - cfg._container_deployment = config["container_deployment"] + cfg._container_deployment = config.get("container_deployment", False) cfg._runtime = Runtime.deserialize(config["runtime"]) cfg._flags = config["flags"] if "flags" in config else {} cfg._architecture = config["architecture"] diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 19c7d3abe..a2ee8c383 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -204,6 +204,14 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config from sebs.openwhisk.config import OpenWhiskConfig implementations["openwhisk"] = OpenWhiskConfig.deserialize + + # Cloudflare is available by default (like local) + try: + from sebs.cloudflare.config import CloudflareConfig + implementations["cloudflare"] = CloudflareConfig.deserialize + except ImportError: + pass + func = implementations.get(name) assert func, "Unknown config type!" return func(config[name] if name in config else config, cache, handlers) diff --git a/sebs/sebs.py b/sebs/sebs.py index 309c0b253..4bfa8f2a6 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -106,6 +106,10 @@ def get_deployment( from sebs.openwhisk import OpenWhisk implementations["openwhisk"] = OpenWhisk + if has_platform("cloudflare"): + from sebs.cloudflare import Cloudflare + + implementations["cloudflare"] = Cloudflare if name not in implementations: raise RuntimeError("Deployment {name} not supported!".format(name=name)) diff --git a/sebs/types.py b/sebs/types.py index b87516fba..edb87b755 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -12,6 +12,7 @@ class Platforms(str, Enum): GCP = "gcp" LOCAL = "local" OPENWHISK = "openwhisk" + CLOUDFLARE = "cloudflare" class Storage(str, Enum): diff --git a/templates/wrangler-container.toml b/templates/wrangler-container.toml new file mode 100644 index 000000000..d8e08fe33 --- /dev/null +++ b/templates/wrangler-container.toml @@ -0,0 +1,25 @@ +# Template for Cloudflare Container Workers +# This file is read and modified by the deployment system + +name = "PLACEHOLDER_WORKER_NAME" +main = "worker.js" +compatibility_date = "2025-11-18" +account_id = "PLACEHOLDER_ACCOUNT_ID" +compatibility_flags = ["nodejs_compat"] + +[observability] +enabled = true + +[[containers]] +max_instances = 10 +class_name = "ContainerWorker" +image = "./Dockerfile" + +# Durable Object binding for Container class (required by @cloudflare/containers) +[[durable_objects.bindings]] +name = "CONTAINER_WORKER" +class_name = "ContainerWorker" + +[[migrations]] +tag = "v1" +new_sqlite_classes = ["ContainerWorker"] diff --git a/templates/wrangler-worker.toml b/templates/wrangler-worker.toml new file mode 100644 index 000000000..b11821281 --- /dev/null +++ b/templates/wrangler-worker.toml @@ -0,0 +1,16 @@ +# Template for native Cloudflare Workers +# This file is read and modified by the deployment system + +name = "PLACEHOLDER_WORKER_NAME" +main = "PLACEHOLDER_MAIN_FILE" +compatibility_date = "2025-11-18" +account_id = "PLACEHOLDER_ACCOUNT_ID" + +# Durable Object binding for NoSQL storage +[[durable_objects.bindings]] +name = "DURABLE_STORE" +class_name = "KVApiObject" + +[[migrations]] +tag = "v3" +new_classes = ["KVApiObject"]