|
1 | | -import { access, readFile } from 'node:fs/promises'; |
2 | | -import { basename, dirname, join, resolve } from 'node:path'; |
| 1 | +import { access, readdir, readFile } from 'node:fs/promises'; |
| 2 | +import { join, resolve } from 'node:path'; |
3 | 3 | import process from 'node:process'; |
4 | 4 |
|
5 | | -import { ok, type Result } from '@sapphire/result'; |
| 5 | +import { err, ok, type Result } from '@sapphire/result'; |
6 | 6 |
|
7 | 7 | import { ScrapyProjectAnalyzer } from '../projects/scrapy/ScrapyProjectAnalyzer.js'; |
8 | 8 | import { cliDebugPrint } from '../utils/cliDebugPrint.js'; |
@@ -81,19 +81,47 @@ export async function useCwdProject({ |
81 | 81 | }; |
82 | 82 | } else { |
83 | 83 | // Fallback for scrapy projects that use apify, but are not "migrated" (like our templates) |
84 | | - const pythonFile = await checkPythonProject(cwd); |
85 | | - |
86 | | - if (pythonFile) { |
87 | | - project.entrypoint = { |
88 | | - path: pythonFile, |
89 | | - }; |
| 84 | + try { |
| 85 | + const pythonFile = await checkPythonProject(cwd); |
| 86 | + |
| 87 | + if (pythonFile) { |
| 88 | + project.entrypoint = { |
| 89 | + path: pythonFile, |
| 90 | + }; |
| 91 | + } |
| 92 | + } catch { |
| 93 | + // If we can't find the Python entrypoint, that's okay for Scrapy projects |
| 94 | + // Just continue without setting the entrypoint |
90 | 95 | } |
91 | 96 | } |
92 | 97 |
|
93 | 98 | return; |
94 | 99 | } |
95 | 100 |
|
96 | | - const isPython = await checkPythonProject(cwd); |
| 101 | + // Check for mixed projects (both Python and Node.js indicators) |
| 102 | + const hasPythonIndicators = await isPythonProject(cwd); |
| 103 | + const hasNodeIndicators = await fileExists(join(cwd, 'package.json')); |
| 104 | + |
| 105 | + if (hasPythonIndicators && hasNodeIndicators) { |
| 106 | + return err({ |
| 107 | + message: |
| 108 | + 'Mixed project detected (both Python and Node.js files found). ' + |
| 109 | + 'Please use explicit configuration to specify which runtime to use. ' + |
| 110 | + 'You can use the --entrypoint flag to specify the entrypoint explicitly.', |
| 111 | + }); |
| 112 | + } |
| 113 | + |
| 114 | + let isPython: string | null = null; |
| 115 | + try { |
| 116 | + // Pass the already-computed hasPythonIndicators to avoid redundant filesystem checks |
| 117 | + isPython = await checkPythonProject(cwd, hasPythonIndicators); |
| 118 | + } catch (error) { |
| 119 | + // If checkPythonProject throws an error, it means it detected Python but |
| 120 | + // couldn't determine the entrypoint. We should propagate this error. |
| 121 | + return err({ |
| 122 | + message: error instanceof Error ? error.message : String(error), |
| 123 | + }); |
| 124 | + } |
97 | 125 |
|
98 | 126 | if (isPython) { |
99 | 127 | project.type = ProjectLanguage.Python; |
@@ -210,30 +238,184 @@ async function checkNodeProject(cwd: string) { |
210 | 238 | return null; |
211 | 239 | } |
212 | 240 |
|
213 | | -async function checkPythonProject(cwd: string) { |
214 | | - const baseName = basename(cwd); |
| 241 | +// Helper functions for Python project detection |
215 | 242 |
|
216 | | - const filesToCheck = [ |
217 | | - join(cwd, 'src', '__main__.py'), |
218 | | - join(cwd, '__main__.py'), |
219 | | - join(cwd, baseName, '__main__.py'), |
220 | | - join(cwd, baseName.replaceAll('-', '_').replaceAll(' ', '_'), '__main__.py'), |
221 | | - ]; |
| 243 | +async function fileExists(path: string): Promise<boolean> { |
| 244 | + try { |
| 245 | + await access(path); |
| 246 | + return true; |
| 247 | + } catch { |
| 248 | + return false; |
| 249 | + } |
| 250 | +} |
222 | 251 |
|
223 | | - for (const path of filesToCheck) { |
224 | | - try { |
225 | | - await access(path); |
| 252 | +async function dirExists(path: string): Promise<boolean> { |
| 253 | + return fileExists(path); |
| 254 | +} |
226 | 255 |
|
227 | | - // By default in python, we run python3 -m <module> |
228 | | - // For some unholy reason, python does NOT support absolute paths for this -.- |
229 | | - // Effectively, this returns `src` from `/cwd/src/__main__.py`, et al. |
230 | | - return basename(dirname(path)); |
231 | | - } catch { |
232 | | - // Ignore errors |
| 256 | +function isValidPythonIdentifier(name: string): boolean { |
| 257 | + // Must start with letter or underscore, contain only alphanumerics and underscores |
| 258 | + return /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name); |
| 259 | +} |
| 260 | + |
| 261 | +async function hasPythonFiles(dir: string): Promise<boolean> { |
| 262 | + try { |
| 263 | + const entries = await readdir(dir, { withFileTypes: true }); |
| 264 | + return entries.some((entry) => entry.isFile() && entry.name.endsWith('.py')); |
| 265 | + } catch { |
| 266 | + return false; |
| 267 | + } |
| 268 | +} |
| 269 | + |
| 270 | +async function isPythonProject(cwd: string): Promise<boolean> { |
| 271 | + // Check for pyproject.toml |
| 272 | + if (await fileExists(join(cwd, 'pyproject.toml'))) { |
| 273 | + return true; |
| 274 | + } |
| 275 | + |
| 276 | + // Check for requirements.txt |
| 277 | + if (await fileExists(join(cwd, 'requirements.txt'))) { |
| 278 | + return true; |
| 279 | + } |
| 280 | + |
| 281 | + // Check for .py files in level 1 (CWD) |
| 282 | + const level1HasPython = await hasPythonFiles(cwd); |
| 283 | + if (level1HasPython) { |
| 284 | + return true; |
| 285 | + } |
| 286 | + |
| 287 | + // Check for .py files in level 2 (src/) |
| 288 | + const srcDir = join(cwd, 'src'); |
| 289 | + if (await dirExists(srcDir)) { |
| 290 | + const level2HasPython = await hasPythonFiles(srcDir); |
| 291 | + if (level2HasPython) { |
| 292 | + return true; |
233 | 293 | } |
234 | 294 | } |
235 | 295 |
|
236 | | - return null; |
| 296 | + return false; |
| 297 | +} |
| 298 | + |
| 299 | +async function findPackagesInDir(dir: string): Promise<{ name: string; path: string }[]> { |
| 300 | + try { |
| 301 | + const entries = await readdir(dir, { withFileTypes: true }); |
| 302 | + const packages = []; |
| 303 | + |
| 304 | + for (const entry of entries) { |
| 305 | + if (!entry.isDirectory()) continue; |
| 306 | + |
| 307 | + const { name } = entry; |
| 308 | + |
| 309 | + // Skip hidden directories (starting with .) and underscore-prefixed directories |
| 310 | + // (private/special packages like _internal or __pycache__ shouldn't be main entrypoints) |
| 311 | + if (name.startsWith('.') || name.startsWith('_')) continue; |
| 312 | + if (!isValidPythonIdentifier(name)) continue; |
| 313 | + |
| 314 | + // Check for __init__.py |
| 315 | + const initPath = join(dir, name, '__init__.py'); |
| 316 | + if (await fileExists(initPath)) { |
| 317 | + packages.push({ name, path: join(dir, name) }); |
| 318 | + } |
| 319 | + } |
| 320 | + |
| 321 | + return packages; |
| 322 | + } catch { |
| 323 | + return []; |
| 324 | + } |
| 325 | +} |
| 326 | + |
| 327 | +async function discoverPythonPackages(cwd: string): Promise<string[]> { |
| 328 | + const packages: string[] = []; |
| 329 | + |
| 330 | + // Search level 1 (CWD) |
| 331 | + const level1Packages = await findPackagesInDir(cwd); |
| 332 | + packages.push(...level1Packages.map((p) => p.name)); |
| 333 | + |
| 334 | + // Search level 2 (src/) - only if src/ is NOT itself a package |
| 335 | + // If src/ has __init__.py, it's a package and anything inside is a subpackage, not a top-level package |
| 336 | + const srcDir = join(cwd, 'src'); |
| 337 | + const srcIsPackage = await fileExists(join(srcDir, '__init__.py')); |
| 338 | + |
| 339 | + if ((await dirExists(srcDir)) && !srcIsPackage) { |
| 340 | + const level2Packages = await findPackagesInDir(srcDir); |
| 341 | + packages.push(...level2Packages.map((p) => `src.${p.name}`)); |
| 342 | + } |
| 343 | + |
| 344 | + return packages; |
| 345 | +} |
| 346 | + |
| 347 | +async function findPythonEntrypoint(cwd: string): Promise<string> { |
| 348 | + // Discover all valid Python packages |
| 349 | + const discoveredPackages = await discoverPythonPackages(cwd); |
| 350 | + |
| 351 | + if (discoveredPackages.length === 0) { |
| 352 | + // No packages found - provide helpful error with context |
| 353 | + const hasPyFiles = |
| 354 | + (await hasPythonFiles(cwd)) || |
| 355 | + ((await dirExists(join(cwd, 'src'))) && (await hasPythonFiles(join(cwd, 'src')))); |
| 356 | + |
| 357 | + if (hasPyFiles) { |
| 358 | + throw new Error( |
| 359 | + 'No Python package found. Found Python files, but no valid package structure detected.\n' + |
| 360 | + 'A Python package requires:\n' + |
| 361 | + ' - A directory with a valid Python identifier name (letters, numbers, underscores)\n' + |
| 362 | + ' - An __init__.py file inside the directory\n' + |
| 363 | + '\n' + |
| 364 | + 'Common package structures:\n' + |
| 365 | + ' my_package/\n' + |
| 366 | + ' __init__.py\n' + |
| 367 | + ' main.py\n' + |
| 368 | + '\n' + |
| 369 | + ' src/\n' + |
| 370 | + ' my_package/\n' + |
| 371 | + ' __init__.py\n' + |
| 372 | + ' main.py\n' + |
| 373 | + '\n' + |
| 374 | + 'Use --entrypoint flag to specify a custom entry point.', |
| 375 | + ); |
| 376 | + } else { |
| 377 | + throw new Error( |
| 378 | + 'No Python package or Python files found in the current directory or src/ subdirectory.\n' + |
| 379 | + 'Expected to find either:\n' + |
| 380 | + ' - A package directory (with __init__.py)\n' + |
| 381 | + ' - Python source files (.py)\n' + |
| 382 | + '\n' + |
| 383 | + 'Use --entrypoint flag to specify a custom entry point.', |
| 384 | + ); |
| 385 | + } |
| 386 | + } |
| 387 | + |
| 388 | + if (discoveredPackages.length > 1) { |
| 389 | + // Multiple packages found - list them and guide user |
| 390 | + const packageList = discoveredPackages.map((pkg) => ` - ${pkg}`).join('\n'); |
| 391 | + throw new Error( |
| 392 | + `Multiple Python packages found:\n${packageList}\n\n` + |
| 393 | + 'Apify CLI cannot determine which package to run.\n' + |
| 394 | + 'Please specify the package explicitly using: --entrypoint <package_name>\n' + |
| 395 | + '\n' + |
| 396 | + 'For example:\n' + |
| 397 | + ` apify run --entrypoint ${discoveredPackages[0]}`, |
| 398 | + ); |
| 399 | + } |
| 400 | + |
| 401 | + // Exactly one package found - success! |
| 402 | + return discoveredPackages[0]; |
| 403 | +} |
| 404 | + |
| 405 | +async function checkPythonProject(cwd: string, alreadyDetectedAsPython?: boolean): Promise<string | null> { |
| 406 | + // Step 1: Check if it's a Python project (skip if already checked) |
| 407 | + if (alreadyDetectedAsPython === undefined) { |
| 408 | + const isPython = await isPythonProject(cwd); |
| 409 | + if (!isPython) { |
| 410 | + return null; |
| 411 | + } |
| 412 | + } else if (!alreadyDetectedAsPython) { |
| 413 | + return null; |
| 414 | + } |
| 415 | + |
| 416 | + // Step 2: Find the entrypoint (this may throw with a helpful error message) |
| 417 | + const entrypoint = await findPythonEntrypoint(cwd); |
| 418 | + return entrypoint; |
237 | 419 | } |
238 | 420 |
|
239 | 421 | async function checkScrapyProject(cwd: string) { |
|
0 commit comments