Skip to content

Commit 1660242

Browse files
committed
feat: enhance stack detection and analysis for Python and Java frameworks, add variable naming style detection
1 parent a317e5d commit 1660242

File tree

8 files changed

+732
-8
lines changed

8 files changed

+732
-8
lines changed

app/api/scan-repo/route.ts

Lines changed: 270 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import type {
66
RepoScanSummary,
77
RepoStructureSummary,
88
} from "@/types/repo-scan"
9+
import { buildDependencyAnalysisTasks, hasDependencyDetectionRules } from "@/lib/stack-detection"
10+
import type { DependencyAnalysisTask } from "@/lib/stack-detection"
911
import { loadStackQuestionMetadata, normalizeConventionValue } from "@/lib/question-metadata"
1012
import { loadStackConventions } from "@/lib/conventions"
1113
import { inferStackFromScan } from "@/lib/scan-to-wizard"
@@ -423,6 +425,107 @@ const readTextFile = async (
423425
}
424426
}
425427

428+
type DependencyDetectionOutcome = {
429+
frameworks: Set<string>
430+
languages: Set<string>
431+
preferredStacks: Set<string>
432+
primaryLanguage: string | null
433+
}
434+
435+
const manifestHasDependency = (pkg: PackageJson, name: string): boolean => {
436+
const needle = name.trim().toLowerCase()
437+
if (!needle) {
438+
return false
439+
}
440+
441+
const sources = [
442+
pkg.dependencies,
443+
pkg.devDependencies,
444+
pkg.peerDependencies,
445+
pkg.optionalDependencies,
446+
]
447+
448+
return sources.some((source) => {
449+
if (!source) {
450+
return false
451+
}
452+
453+
return Object.keys(source).some((key) => key.toLowerCase() === needle)
454+
})
455+
}
456+
457+
const evaluateDependencyAnalysisTasks = async (
458+
owner: string,
459+
repo: string,
460+
ref: string,
461+
headers: Record<string, string>,
462+
tasks: DependencyAnalysisTask[],
463+
packageJson: PackageJson | null,
464+
): Promise<DependencyDetectionOutcome> => {
465+
const outcome: DependencyDetectionOutcome = {
466+
frameworks: new Set<string>(),
467+
languages: new Set<string>(),
468+
preferredStacks: new Set<string>(),
469+
primaryLanguage: null,
470+
}
471+
472+
for (const task of tasks) {
473+
const needsJson = task.signals.some((signal) => signal.type === "json-dependency")
474+
const needsText = task.signals.some((signal) => signal.type !== "json-dependency")
475+
let manifest: PackageJson | null = null
476+
let content: string | null = null
477+
478+
if (needsJson && packageJson && task.path.toLowerCase() === "package.json") {
479+
manifest = packageJson
480+
}
481+
482+
if (needsText || !manifest) {
483+
content = await readTextFile(owner, repo, ref, task.path, headers)
484+
if (content === null) {
485+
continue
486+
}
487+
}
488+
489+
if (needsJson && !manifest && content) {
490+
try {
491+
manifest = JSON.parse(content) as PackageJson
492+
} catch {
493+
manifest = null
494+
}
495+
}
496+
497+
const contentLower = content ? content.toLowerCase() : ""
498+
499+
task.signals.forEach((signal) => {
500+
let matched = false
501+
502+
if (signal.type === "json-dependency") {
503+
matched = Boolean(manifest && manifestHasDependency(manifest, signal.match))
504+
} else {
505+
matched = Boolean(contentLower && contentLower.includes(signal.matchLower))
506+
}
507+
508+
if (!matched) {
509+
return
510+
}
511+
512+
signal.addFrameworks.forEach((framework) => outcome.frameworks.add(framework))
513+
signal.addLanguages.forEach((language) => outcome.languages.add(language))
514+
515+
const preferredStack = signal.preferStack ?? signal.stack
516+
if (preferredStack) {
517+
outcome.preferredStacks.add(preferredStack)
518+
}
519+
520+
if (!outcome.primaryLanguage && signal.setPrimaryLanguage) {
521+
outcome.primaryLanguage = signal.setPrimaryLanguage
522+
}
523+
})
524+
}
525+
526+
return outcome
527+
}
528+
426529
type FileStyleKey = "pascal" | "camel" | "kebab" | "snake"
427530

428531
const stripExtension = (name: string) => name.replace(/\.[^.]+$/u, "")
@@ -459,11 +562,11 @@ const classifyNameStyle = (rawName: string): FileStyleKey | null => {
459562
return null
460563
}
461564

462-
const pickDominantStyle = (counts: Record<FileStyleKey, number>): FileStyleKey | null => {
463-
let winner: FileStyleKey | null = null
565+
const pickDominantStyle = <Key extends string>(counts: Record<Key, number>): Key | null => {
566+
let winner: Key | null = null
464567
let winnerCount = 0
465568

466-
for (const key of Object.keys(counts) as FileStyleKey[]) {
569+
for (const key of Object.keys(counts) as Key[]) {
467570
const value = counts[key]
468571
if (value > winnerCount) {
469572
winner = key
@@ -542,6 +645,130 @@ const analyzeNamingStyles = (paths: string[]) => {
542645
}
543646
}
544647

648+
type IdentifierStyleKey = "camel" | "snake" | "pascal"
649+
650+
const classifyIdentifierStyle = (rawName: string): IdentifierStyleKey | null => {
651+
const trimmed = rawName.trim().replace(/^_+/, "").replace(/_+$/u, "")
652+
if (!trimmed) {
653+
return null
654+
}
655+
656+
if (/^[A-Z0-9_]+$/u.test(trimmed)) {
657+
return null
658+
}
659+
660+
if (/^[a-z]+(?:_[a-z0-9]+)+$/u.test(trimmed)) {
661+
return "snake"
662+
}
663+
664+
if (/^[A-Z][A-Za-z0-9]*$/u.test(trimmed)) {
665+
return "pascal"
666+
}
667+
668+
if (/^[a-z][A-Za-z0-9]*$/u.test(trimmed)) {
669+
return "camel"
670+
}
671+
672+
if (/^[a-z]+(?:[A-Z][a-z0-9]*)+$/u.test(trimmed)) {
673+
return "camel"
674+
}
675+
676+
return null
677+
}
678+
679+
const VARIABLE_ANALYSIS_MAX_FILES = 20
680+
const VARIABLE_ANALYSIS_MAX_CONTENT_LENGTH = 20000
681+
682+
const extractIdentifiersFromJs = (contents: string): string[] => {
683+
const identifiers: string[] = []
684+
const simpleDeclaration = /\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)/gu
685+
for (const match of contents.matchAll(simpleDeclaration)) {
686+
const name = match[1]
687+
if (name) {
688+
identifiers.push(name.replace(/\$/g, ""))
689+
}
690+
}
691+
const functionDeclaration = /\bfunction\s+([A-Za-z_$][\w$]*)/gu
692+
for (const match of contents.matchAll(functionDeclaration)) {
693+
const name = match[1]
694+
if (name) {
695+
identifiers.push(name.replace(/\$/g, ""))
696+
}
697+
}
698+
return identifiers
699+
}
700+
701+
const extractIdentifiersFromPython = (contents: string): string[] => {
702+
const identifiers: string[] = []
703+
const assignment = /^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=/gmu
704+
for (const match of contents.matchAll(assignment)) {
705+
const name = match[1]
706+
if (name && !/^self$|^cls$/u.test(name)) {
707+
identifiers.push(name)
708+
}
709+
}
710+
const functionDef = /^\s*def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/gmu
711+
for (const match of contents.matchAll(functionDef)) {
712+
const name = match[1]
713+
if (name) {
714+
identifiers.push(name)
715+
}
716+
}
717+
return identifiers
718+
}
719+
720+
const analyzeVariableNamingStyle = async (
721+
owner: string,
722+
repo: string,
723+
ref: string,
724+
paths: string[],
725+
headers: Record<string, string>,
726+
): Promise<string | null> => {
727+
const counts: Record<IdentifierStyleKey, number> = { camel: 0, snake: 0, pascal: 0 }
728+
const candidates: string[] = []
729+
730+
for (const filePath of paths) {
731+
if (candidates.length >= VARIABLE_ANALYSIS_MAX_FILES) {
732+
break
733+
}
734+
if (/\.(ts|tsx|js|jsx|mjs|cjs)$/iu.test(filePath) || /\.py$/iu.test(filePath)) {
735+
candidates.push(filePath)
736+
}
737+
}
738+
739+
for (const candidate of candidates) {
740+
const contents = await readTextFile(owner, repo, ref, candidate, headers)
741+
if (!contents) {
742+
continue
743+
}
744+
const truncated = contents.slice(0, VARIABLE_ANALYSIS_MAX_CONTENT_LENGTH)
745+
const identifiers =
746+
/\.py$/iu.test(candidate) ? extractIdentifiersFromPython(truncated) : extractIdentifiersFromJs(truncated)
747+
if (identifiers.length === 0) {
748+
continue
749+
}
750+
identifiers.forEach((identifier) => {
751+
const style = classifyIdentifierStyle(identifier)
752+
if (style) {
753+
counts[style] += 1
754+
}
755+
})
756+
}
757+
758+
const dominant = pickDominantStyle(counts)
759+
if (!dominant) {
760+
return null
761+
}
762+
763+
const mapping: Record<IdentifierStyleKey, string> = {
764+
camel: "camelCase",
765+
snake: "snake_case",
766+
pascal: "PascalCase",
767+
}
768+
769+
return mapping[dominant] ?? null
770+
}
771+
545772
const detectEnrichedSignals = async (
546773
owner: string,
547774
repo: string,
@@ -654,6 +881,7 @@ const detectEnrichedSignals = async (
654881
if (hasMatch(/(^|\/)prettier\.config\.(js|cjs|mjs|ts)?$/) || hasMatch(/(^|\/)\.prettierrc(\.[a-z]+)?$/)) editor.push("prettier")
655882

656883
const { fileNamingStyle, componentNamingStyle } = analyzeNamingStyles(paths)
884+
const variableNamingStyle = await analyzeVariableNamingStyle(owner, repo, ref, paths, headers)
657885

658886
// Code style detection (ESLint presets)
659887
let codeStylePreference: string | null = null
@@ -704,6 +932,7 @@ const detectEnrichedSignals = async (
704932
codeQuality,
705933
editor,
706934
fileNamingStyle,
935+
variableNamingStyle,
707936
componentNamingStyle,
708937
codeStylePreference,
709938
commitMessageStyle,
@@ -796,7 +1025,7 @@ export async function GET(request: NextRequest): Promise<NextResponse<RepoScanRe
7961025
}
7971026

7981027
const languagesJson = (await languagesResponse.json()) as Record<string, number>
799-
const languages = Object.entries(languagesJson)
1028+
let languages = Object.entries(languagesJson)
8001029
.sort(([, bytesA], [, bytesB]) => bytesB - bytesA)
8011030
.map(([name]) => name)
8021031

@@ -852,18 +1081,52 @@ export async function GET(request: NextRequest): Promise<NextResponse<RepoScanRe
8521081

8531082
const { tooling, testing, frameworks } = await detectTooling(paths, packageJson)
8541083

1084+
const frameworkSet = new Set(frameworks)
1085+
const languageSet = new Set(languages)
1086+
let preferredPrimaryLanguage: string | null = null
1087+
1088+
if (hasDependencyDetectionRules) {
1089+
const dependencyTasks = buildDependencyAnalysisTasks(paths)
1090+
1091+
if (dependencyTasks.length > 0) {
1092+
const dependencyOutcome = await evaluateDependencyAnalysisTasks(
1093+
owner,
1094+
repo,
1095+
defaultBranch,
1096+
headers,
1097+
dependencyTasks,
1098+
packageJson,
1099+
)
1100+
1101+
dependencyOutcome.frameworks.forEach((framework) => frameworkSet.add(framework))
1102+
dependencyOutcome.languages.forEach((language) => languageSet.add(language))
1103+
1104+
if (dependencyOutcome.primaryLanguage) {
1105+
preferredPrimaryLanguage = dependencyOutcome.primaryLanguage
1106+
}
1107+
}
1108+
}
1109+
1110+
const mergedFrameworks = dedupeAndSort(frameworkSet)
1111+
languages = Array.from(languageSet)
1112+
8551113
if (lowestRateLimit !== null && lowestRateLimit < 5) {
8561114
warnings.push(`GitHub API rate limit is low (remaining: ${lowestRateLimit}).`)
8571115
}
8581116

8591117
const enriched = await detectEnrichedSignals(owner, repo, defaultBranch, paths, packageJson, headers)
8601118

1119+
const sortedLanguages = dedupeAndSort(languages)
1120+
const primaryLanguage = preferredPrimaryLanguage
1121+
?? repoJson.language
1122+
?? (sortedLanguages.length > 0 ? sortedLanguages[0] : null)
1123+
8611124
const summary: RepoScanSummary = {
8621125
repo: `${owner}/${repo}`,
8631126
defaultBranch,
864-
language: repoJson.language ?? (languages.length > 0 ? languages[0] : null),
865-
languages: dedupeAndSort(languages),
866-
frameworks,
1127+
language: primaryLanguage,
1128+
languages: sortedLanguages,
1129+
frameworks: mergedFrameworks,
8671130
tooling,
8681131
testing,
8691132
structure,

0 commit comments

Comments
 (0)