Skip to content

Commit b1954c2

Browse files
vdusekclaude
andcommitted
feat: improve Python project detection and entrypoint resolution
Enhance Python project detection to support standard package layouts without enforcing specific directory names. This change makes the CLI more flexible and user-friendly for Python developers. ## Changes - **Enhanced project detection**: Now detects Python projects by checking for `pyproject.toml`, `requirements.txt`, or `.py` files (not just `__main__.py`) - **Package discovery**: Automatically discovers Python packages in CWD and `src/` subdirectory - **Smart entrypoint resolution**: Selects the entrypoint when exactly one package is found - **Better error messages**: Provides clear, actionable guidance for all failure scenarios: - No packages found (with structure examples) - Multiple packages found (with list and usage example) - Mixed Python/Node.js projects detected - **Backwards compatible**: Existing projects with `src/__main__.py` continue to work ## Implementation Details - Added `isPythonProject()` for flexible project detection - Added `discoverPythonPackages()` to find valid packages (level 1 and 2 only) - Added `findPythonEntrypoint()` with enhanced error handling - Added mixed project detection to prevent ambiguous configurations - Updated test to properly test "no detection" scenario ## Testing - All existing Python tests pass - No regressions in other test suites - Supports standard Python layouts: `my_package/`, `src/my_package/` Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent ed91617 commit b1954c2

File tree

2 files changed

+206
-29
lines changed

2 files changed

+206
-29
lines changed

src/lib/hooks/useCwdProject.ts

Lines changed: 201 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import { access, readFile } from 'node:fs/promises';
2-
import { basename, dirname, join, resolve } from 'node:path';
1+
import { access, readdir, readFile } from 'node:fs/promises';
2+
import { join, resolve } from 'node:path';
33
import process from 'node:process';
44

5-
import { ok, type Result } from '@sapphire/result';
5+
import { err, ok, type Result } from '@sapphire/result';
66

77
import { ScrapyProjectAnalyzer } from '../projects/scrapy/ScrapyProjectAnalyzer.js';
88
import { cliDebugPrint } from '../utils/cliDebugPrint.js';
@@ -81,19 +81,46 @@ export async function useCwdProject({
8181
};
8282
} else {
8383
// Fallback for scrapy projects that use apify, but are not "migrated" (like our templates)
84-
const pythonFile = await checkPythonProject(cwd);
85-
86-
if (pythonFile) {
87-
project.entrypoint = {
88-
path: pythonFile,
89-
};
84+
try {
85+
const pythonFile = await checkPythonProject(cwd);
86+
87+
if (pythonFile) {
88+
project.entrypoint = {
89+
path: pythonFile,
90+
};
91+
}
92+
} catch {
93+
// If we can't find the Python entrypoint, that's okay for Scrapy projects
94+
// Just continue without setting the entrypoint
9095
}
9196
}
9297

9398
return;
9499
}
95100

96-
const isPython = await checkPythonProject(cwd);
101+
// Check for mixed projects (both Python and Node.js indicators)
102+
const hasPythonIndicators = await isPythonProject(cwd);
103+
const hasNodeIndicators = await fileExists(join(cwd, 'package.json'));
104+
105+
if (hasPythonIndicators && hasNodeIndicators) {
106+
return err({
107+
message:
108+
'Mixed project detected (both Python and Node.js files found). ' +
109+
'Please use explicit configuration to specify which runtime to use. ' +
110+
'You can use the --entrypoint flag to specify the entrypoint explicitly.',
111+
});
112+
}
113+
114+
let isPython: string | null = null;
115+
try {
116+
isPython = await checkPythonProject(cwd);
117+
} catch (error) {
118+
// If checkPythonProject throws an error, it means it detected Python but
119+
// couldn't determine the entrypoint. We should propagate this error.
120+
return err({
121+
message: error instanceof Error ? error.message : String(error),
122+
});
123+
}
97124

98125
if (isPython) {
99126
project.type = ProjectLanguage.Python;
@@ -210,30 +237,176 @@ async function checkNodeProject(cwd: string) {
210237
return null;
211238
}
212239

213-
async function checkPythonProject(cwd: string) {
214-
const baseName = basename(cwd);
240+
// Helper functions for Python project detection
215241

216-
const filesToCheck = [
217-
join(cwd, 'src', '__main__.py'),
218-
join(cwd, '__main__.py'),
219-
join(cwd, baseName, '__main__.py'),
220-
join(cwd, baseName.replaceAll('-', '_').replaceAll(' ', '_'), '__main__.py'),
221-
];
242+
async function fileExists(path: string): Promise<boolean> {
243+
try {
244+
await access(path);
245+
return true;
246+
} catch {
247+
return false;
248+
}
249+
}
222250

223-
for (const path of filesToCheck) {
224-
try {
225-
await access(path);
251+
async function dirExists(path: string): Promise<boolean> {
252+
return fileExists(path);
253+
}
226254

227-
// By default in python, we run python3 -m <module>
228-
// For some unholy reason, python does NOT support absolute paths for this -.-
229-
// Effectively, this returns `src` from `/cwd/src/__main__.py`, et al.
230-
return basename(dirname(path));
231-
} catch {
232-
// Ignore errors
255+
function isValidPythonIdentifier(name: string): boolean {
256+
// Must start with letter or underscore, contain only alphanumerics and underscores
257+
return /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name);
258+
}
259+
260+
async function hasPythonFiles(dir: string): Promise<boolean> {
261+
try {
262+
const entries = await readdir(dir, { withFileTypes: true });
263+
return entries.some((entry) => entry.isFile() && entry.name.endsWith('.py'));
264+
} catch {
265+
return false;
266+
}
267+
}
268+
269+
async function isPythonProject(cwd: string): Promise<boolean> {
270+
// Check for pyproject.toml
271+
if (await fileExists(join(cwd, 'pyproject.toml'))) {
272+
return true;
273+
}
274+
275+
// Check for requirements.txt
276+
if (await fileExists(join(cwd, 'requirements.txt'))) {
277+
return true;
278+
}
279+
280+
// Check for .py files in level 1 (CWD)
281+
const level1HasPython = await hasPythonFiles(cwd);
282+
if (level1HasPython) {
283+
return true;
284+
}
285+
286+
// Check for .py files in level 2 (src/)
287+
const srcDir = join(cwd, 'src');
288+
if (await dirExists(srcDir)) {
289+
const level2HasPython = await hasPythonFiles(srcDir);
290+
if (level2HasPython) {
291+
return true;
233292
}
234293
}
235294

236-
return null;
295+
return false;
296+
}
297+
298+
async function findPackagesInDir(dir: string): Promise<{ name: string; path: string }[]> {
299+
try {
300+
const entries = await readdir(dir, { withFileTypes: true });
301+
const packages = [];
302+
303+
for (const entry of entries) {
304+
if (!entry.isDirectory()) continue;
305+
306+
const { name } = entry;
307+
308+
// Skip hidden directories and special directories
309+
if (name.startsWith('.') || name.startsWith('_')) continue;
310+
if (!isValidPythonIdentifier(name)) continue;
311+
312+
// Check for __init__.py
313+
const initPath = join(dir, name, '__init__.py');
314+
if (await fileExists(initPath)) {
315+
packages.push({ name, path: join(dir, name) });
316+
}
317+
}
318+
319+
return packages;
320+
} catch {
321+
return [];
322+
}
323+
}
324+
325+
async function discoverPythonPackages(cwd: string): Promise<string[]> {
326+
const packages: string[] = [];
327+
328+
// Search level 1 (CWD)
329+
const level1Packages = await findPackagesInDir(cwd);
330+
packages.push(...level1Packages.map((p) => p.name));
331+
332+
// Search level 2 (src/)
333+
const srcDir = join(cwd, 'src');
334+
if (await dirExists(srcDir)) {
335+
const level2Packages = await findPackagesInDir(srcDir);
336+
packages.push(...level2Packages.map((p) => `src.${p.name}`));
337+
}
338+
339+
return packages;
340+
}
341+
342+
async function findPythonEntrypoint(cwd: string): Promise<string> {
343+
// Discover all valid Python packages
344+
const discoveredPackages = await discoverPythonPackages(cwd);
345+
346+
if (discoveredPackages.length === 0) {
347+
// No packages found - provide helpful error with context
348+
const hasPyFiles =
349+
(await hasPythonFiles(cwd)) ||
350+
((await dirExists(join(cwd, 'src'))) && (await hasPythonFiles(join(cwd, 'src'))));
351+
352+
if (hasPyFiles) {
353+
throw new Error(
354+
'No Python package found. Found Python files, but no valid package structure detected.\n' +
355+
'A Python package requires:\n' +
356+
' - A directory with a valid Python identifier name (letters, numbers, underscores)\n' +
357+
' - An __init__.py file inside the directory\n' +
358+
'\n' +
359+
'Common package structures:\n' +
360+
' my_package/\n' +
361+
' __init__.py\n' +
362+
' main.py\n' +
363+
'\n' +
364+
' src/\n' +
365+
' my_package/\n' +
366+
' __init__.py\n' +
367+
' main.py\n' +
368+
'\n' +
369+
'Use --entrypoint flag to specify a custom entry point.',
370+
);
371+
} else {
372+
throw new Error(
373+
'No Python package or Python files found in the current directory or src/ subdirectory.\n' +
374+
'Expected to find either:\n' +
375+
' - A package directory (with __init__.py)\n' +
376+
' - Python source files (.py)\n' +
377+
'\n' +
378+
'Use --entrypoint flag to specify a custom entry point.',
379+
);
380+
}
381+
}
382+
383+
if (discoveredPackages.length > 1) {
384+
// Multiple packages found - list them and guide user
385+
const packageList = discoveredPackages.map((pkg) => ` - ${pkg}`).join('\n');
386+
throw new Error(
387+
`Multiple Python packages found:\n${packageList}\n\n` +
388+
'Apify CLI cannot determine which package to run.\n' +
389+
'Please specify the package explicitly using: --entrypoint <package_name>\n' +
390+
'\n' +
391+
'For example:\n' +
392+
` apify run --entrypoint ${discoveredPackages[0]}`,
393+
);
394+
}
395+
396+
// Exactly one package found - success!
397+
return discoveredPackages[0];
398+
}
399+
400+
async function checkPythonProject(cwd: string): Promise<string | null> {
401+
// Step 1: Check if it's a Python project
402+
const isPython = await isPythonProject(cwd);
403+
if (!isPython) {
404+
return null;
405+
}
406+
407+
// Step 2: Find the entrypoint (this may throw with a helpful error message)
408+
const entrypoint = await findPythonEntrypoint(cwd);
409+
return entrypoint;
237410
}
238411

239412
async function checkScrapyProject(cwd: string) {

test/local/__fixtures__/commands/run/python/prints-error-message-on-project-with-no-detected-start.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { rename } from 'node:fs/promises';
1+
import { rename, rm } from 'node:fs/promises';
22

33
import { testRunCommand } from '../../../../../../src/lib/command-framework/apify-command.js';
44
import { useConsoleSpy } from '../../../../../__setup__/hooks/useConsoleSpy.js';
@@ -29,6 +29,10 @@ describe('[python] prints error message on project with no detected start', () =
2929
const srcFolder = joinPath('src');
3030
await rename(srcFolder, joinPath('entrypoint'));
3131

32+
// Remove requirements.txt so it's not detected as Python project
33+
const requirementsTxt = joinPath('requirements.txt');
34+
await rm(requirementsTxt, { force: true });
35+
3236
resetCwdCaches();
3337
});
3438

0 commit comments

Comments
 (0)