@@ -7,7 +7,173 @@ import type {
77 Db ,
88 IndexDescriptionInfo ,
99 MongoClient ,
10+ Document ,
1011} from "mongodb" ;
12+ import { ObjectId } from "mongodb" ;
13+
14+ // Define interfaces for schema inference
15+ interface FieldInfo {
16+ name : string ;
17+ types : Set < string > ;
18+ nullable : boolean ;
19+ samples : unknown [ ] ;
20+ nestedSchema ?: SchemaResult ;
21+ }
22+
23+ interface SchemaResult {
24+ fields : FieldSummary [ ] ;
25+ }
26+
27+ interface FieldSummary {
28+ name : string ;
29+ types : string [ ] ;
30+ nullable : boolean ;
31+ prevalence : string ;
32+ examples : unknown [ ] ;
33+ nestedSchema ?: SchemaResult ;
34+ }
35+
36+ interface CollectionSchema {
37+ type : string ;
38+ name : string ;
39+ fields : FieldSummary [ ] ;
40+ indexes : Array < {
41+ name : string | undefined ;
42+ keys : Record < string , unknown > ;
43+ } > ;
44+ documentCount : number | string | null ;
45+ sampleSize : number ;
46+ lastUpdated : string ;
47+ }
48+
49+ /**
50+ * Detects the MongoDB-specific type of a value
51+ * @param value The value to detect the type of
52+ * @returns A string representing the detected type
53+ */
54+ function detectMongoType ( value : unknown ) : string {
55+ if ( value === null ) return 'null' ;
56+ if ( value === undefined ) return 'undefined' ;
57+
58+ if ( value instanceof ObjectId ) return 'ObjectId' ;
59+ if ( value instanceof Date ) return 'Date' ;
60+ if ( Array . isArray ( value ) ) {
61+ if ( value . length === 0 ) return 'Array' ;
62+
63+ // Check if array has consistent types
64+ const elementTypes = new Set ( value . map ( item => detectMongoType ( item ) ) ) ;
65+ if ( elementTypes . size === 1 ) {
66+ return `Array<${ Array . from ( elementTypes ) [ 0 ] } >` ;
67+ }
68+ return 'Array<mixed>' ;
69+ }
70+
71+ if ( typeof value === 'object' ) {
72+ // Handle nested documents
73+ return 'Document' ;
74+ }
75+
76+ return typeof value ;
77+ }
78+
79+ /**
80+ * Helper function to infer a schema from multiple documents
81+ * @param documents Array of sample documents from the collection
82+ * @returns Inferred schema with field names and types
83+ */
84+ function inferSchemaFromSamples ( documents : Document [ ] ) : SchemaResult {
85+ if ( ! documents || documents . length === 0 ) {
86+ return { fields : [ ] } ;
87+ }
88+
89+ // Use a Map to store field information, with the key being the field name
90+ const fieldMap = new Map < string , FieldInfo > ( ) ;
91+
92+ // Process each document to collect field information
93+ for ( const doc of documents ) {
94+ for ( const [ key , value ] of Object . entries ( doc ) ) {
95+ if ( ! fieldMap . has ( key ) ) {
96+ // Initialize field info if we haven't seen this field before
97+ fieldMap . set ( key , {
98+ name : key ,
99+ types : new Set ( [ detectMongoType ( value ) ] ) ,
100+ nullable : false ,
101+ // Store sample values for complex types
102+ samples : [ value ] ,
103+ } ) ;
104+ } else {
105+ // Update existing field info
106+ const fieldInfo = fieldMap . get ( key ) ! ;
107+ fieldInfo . types . add ( detectMongoType ( value ) ) ;
108+
109+ // Store up to 3 different sample values
110+ if ( fieldInfo . samples . length < 3 &&
111+ ! fieldInfo . samples . some ( ( sample : unknown ) =>
112+ JSON . stringify ( sample ) === JSON . stringify ( value ) ) ) {
113+ fieldInfo . samples . push ( value ) ;
114+ }
115+ }
116+ }
117+ }
118+
119+ // Check for nullable fields by seeing which fields are missing in some documents
120+ for ( const doc of documents ) {
121+ for ( const [ key ] of fieldMap . entries ( ) ) {
122+ if ( ! ( key in doc ) ) {
123+ const fieldInfo = fieldMap . get ( key ) ! ;
124+ fieldInfo . nullable = true ;
125+ }
126+ }
127+ }
128+
129+ // Process nested document schemas
130+ for ( const [ key , fieldInfo ] of fieldMap . entries ( ) ) {
131+ if ( fieldInfo . types . has ( 'Document' ) ) {
132+ // Extract nested documents for this field
133+ const nestedDocs = documents
134+ . filter ( doc => doc [ key ] && typeof doc [ key ] === 'object' && ! Array . isArray ( doc [ key ] ) )
135+ . map ( doc => doc [ key ] as Document ) ;
136+
137+ if ( nestedDocs . length > 0 ) {
138+ // Recursively infer schema for nested documents
139+ fieldInfo . nestedSchema = inferSchemaFromSamples ( nestedDocs ) ;
140+ }
141+ }
142+ }
143+
144+ // Convert the Map to an array of field objects with additional info
145+ const fields = Array . from ( fieldMap . values ( ) ) . map ( fieldInfo => {
146+ const result : FieldSummary = {
147+ name : fieldInfo . name ,
148+ types : Array . from ( fieldInfo . types ) ,
149+ nullable : fieldInfo . nullable ,
150+ prevalence : Math . round ( ( documents . filter ( doc => fieldInfo . name in doc ) . length / documents . length ) * 100 ) + '%' ,
151+ examples : [ ] ,
152+ } ;
153+
154+ // Include nested schema if available
155+ if ( fieldInfo . nestedSchema ) {
156+ result . nestedSchema = fieldInfo . nestedSchema ;
157+ }
158+
159+ // Include simplified sample values
160+ const sampleValues = fieldInfo . samples . map ( ( sample : unknown ) => {
161+ if ( sample instanceof ObjectId ) return sample . toString ( ) ;
162+ if ( sample instanceof Date ) return sample . toISOString ( ) ;
163+ if ( typeof sample === 'object' ) {
164+ // For objects/arrays, just indicate type rather than full structure
165+ return Array . isArray ( sample ) ? '[...]' : '{...}' ;
166+ }
167+ return sample ;
168+ } ) ;
169+
170+ result . examples = sampleValues ;
171+
172+ return result ;
173+ } ) ;
174+
175+ return { fields } ;
176+ }
11177
12178export async function handleReadResourceRequest ( {
13179 request,
@@ -25,28 +191,64 @@ export async function handleReadResourceRequest({
25191
26192 try {
27193 const collection = db . collection ( collectionName ) ;
28- const sample = await collection . findOne ( { } ) ;
194+
195+ // Set sample size for schema inference
196+ const sampleSize = 100 ;
197+ let sampleDocuments : Document [ ] = [ ] ;
198+
199+ try {
200+ // First try using MongoDB's $sample aggregation to get a diverse set of documents
201+ sampleDocuments = await collection
202+ . aggregate ( [ { $sample : { size : sampleSize } } ] )
203+ . toArray ( ) ;
204+ } catch ( sampleError ) {
205+ // Fallback to sequential scan if $sample is not available
206+ console . warn ( `$sample aggregation failed for ${ collectionName } , falling back to sequential scan: ${ sampleError } ` ) ;
207+ sampleDocuments = await collection
208+ . find ( { } )
209+ . limit ( sampleSize )
210+ . toArray ( ) ;
211+ }
212+
213+ // Get indexes for the collection
29214 const indexes = await collection . indexes ( ) ;
30-
31- const schema = sample
32- ? {
33- type : "collection" ,
34- name : collectionName ,
35- fields : Object . entries ( sample ) . map ( ( [ key , value ] ) => ( {
36- name : key ,
37- type : typeof value ,
38- } ) ) ,
39- indexes : indexes . map ( ( idx : IndexDescriptionInfo ) => ( {
40- name : idx . name ,
41- keys : idx . key ,
42- } ) ) ,
43- }
44- : {
45- type : "collection" ,
46- name : collectionName ,
47- fields : [ ] ,
48- indexes : [ ] ,
49- } ;
215+
216+ // Infer schema from samples
217+ const inferredSchema = inferSchemaFromSamples ( sampleDocuments ) ;
218+
219+ // Get document count with timeout protection
220+ let documentCount : number | string | null = null ;
221+ try {
222+ // Set a timeout for the count operation
223+ documentCount = await Promise . race ( [
224+ collection . countDocuments ( ) ,
225+ new Promise < never > ( ( _ , reject ) =>
226+ setTimeout ( ( ) => reject ( new Error ( 'Count operation timed out' ) ) , 5000 )
227+ )
228+ ] ) ;
229+ } catch ( countError ) {
230+ console . warn ( `Count operation failed or timed out for ${ collectionName } : ${ countError } ` ) ;
231+ // Estimate count based on sample size and collection stats
232+ try {
233+ const stats = await db . command ( { collStats : collectionName } ) ;
234+ documentCount = stats . count ;
235+ } catch {
236+ documentCount = 'unknown (count operation timed out)' ;
237+ }
238+ }
239+
240+ const schema : CollectionSchema = {
241+ type : "collection" ,
242+ name : collectionName ,
243+ fields : inferredSchema . fields ,
244+ indexes : indexes . map ( ( idx : IndexDescriptionInfo ) => ( {
245+ name : idx . name ,
246+ keys : idx . key ,
247+ } ) ) ,
248+ documentCount : documentCount ,
249+ sampleSize : sampleDocuments . length ,
250+ lastUpdated : new Date ( ) . toISOString ( ) ,
251+ } ;
50252
51253 return {
52254 contents : [
0 commit comments