Skip to content

Commit 2791eae

Browse files
committed
fix: resolve Section 5 schema accuracy issues - use real statistics
- Replace hardcoded nullability (5%) and uniqueness (80%) with actual calculations - Now uses Section 2 completeness and uniqueness analysis results - Section 5 now reports accurate nullability and uniqueness percentages - Verified with test data: 0%/20%/20% nullability and 100%/100%/50% uniqueness Fixes critical accuracy issue reported in v1.6.4 testing where Section 5 schema statistics were wrong vs other sections.
1 parent 417dccb commit 2791eae

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

src/analyzers/engineering/section5-analyzer-fixed.ts

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,28 @@ export class Section5Analyzer {
113113
// Schema Analysis
114114
const schemaAnalysis = {
115115
currentSchema: {
116-
columns: section1Result.overview.structuralDimensions.columnInventory.map((col) => ({
117-
originalName: col.name,
118-
detectedType: 'string', // Simplified
119-
inferredSemanticType: 'unknown',
120-
nullabilityPercentage: 5,
121-
uniquenessPercentage: 80,
122-
sampleValues: ['sample1', 'sample2'],
123-
})),
116+
columns: section1Result.overview.structuralDimensions.columnInventory.map((col) => {
117+
// Get actual missing percentage from Section 2
118+
const completenessInfo = section2Result.qualityAudit?.completeness?.columnLevel?.find(
119+
(c) => c.columnName === col.name
120+
);
121+
const actualMissingPercentage = completenessInfo?.missingPercentage ?? 0;
122+
123+
// Get actual uniqueness percentage from Section 2
124+
const uniquenessInfo = section2Result.qualityAudit?.uniqueness?.columnUniqueness?.find(
125+
(u) => u.columnName === col.name
126+
);
127+
const actualUniquenessPercentage = uniquenessInfo?.uniquePercentage ?? 100;
128+
129+
return {
130+
originalName: col.name,
131+
detectedType: 'string', // Simplified
132+
inferredSemanticType: 'unknown',
133+
nullabilityPercentage: Math.round(actualMissingPercentage * 100) / 100,
134+
uniquenessPercentage: Math.round(actualUniquenessPercentage * 100) / 100,
135+
sampleValues: ['sample1', 'sample2'],
136+
};
137+
}),
124138
estimatedRowCount: section1Result.overview.structuralDimensions.totalDataRows,
125139
estimatedSizeBytes: section1Result.overview.fileDetails.fileSizeBytes,
126140
detectedEncoding: section1Result.overview.parsingMetadata.encoding.encoding,

0 commit comments

Comments
 (0)