-
Notifications
You must be signed in to change notification settings - Fork 228
feat(compass-collection): Schema Analysis Redux Integration for Collection Plugin CLOUDP-333846 #7177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
feat(compass-collection): Schema Analysis Redux Integration for Collection Plugin CLOUDP-333846 #7177
Changes from 2 commits
67297f4
e5a4bde
1409eb3
b9fe5e3
4397540
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,24 @@ | ||
import type { Reducer, AnyAction, Action } from 'redux'; | ||
import { | ||
analyzeDocuments, | ||
type SchemaParseOptions, | ||
type Schema, | ||
} from 'mongodb-schema'; | ||
|
||
import type { CollectionMetadata } from 'mongodb-collection-model'; | ||
import type { ThunkAction } from 'redux-thunk'; | ||
import type AppRegistry from '@mongodb-js/compass-app-registry'; | ||
import type { workspacesServiceLocator } from '@mongodb-js/compass-workspaces/provider'; | ||
import type { CollectionSubtab } from '@mongodb-js/compass-workspaces'; | ||
import type { DataService } from '@mongodb-js/compass-connections/provider'; | ||
import type { experimentationServiceLocator } from '@mongodb-js/compass-telemetry/provider'; | ||
import { calculateSchemaMetadata } from '@mongodb-js/compass-schema'; | ||
import { type Logger, mongoLogId } from '@mongodb-js/compass-logging/provider'; | ||
import { type PreferencesAccess } from 'compass-preferences-model/provider'; | ||
import { isInternalFieldPath } from 'hadron-document'; | ||
import toNS from 'mongodb-ns'; | ||
|
||
const DEFAULT_SAMPLE_SIZE = 100; | ||
|
||
function isAction<A extends AnyAction>( | ||
action: AnyAction, | ||
|
@@ -22,32 +35,79 @@ type CollectionThunkAction<R, A extends AnyAction = AnyAction> = ThunkAction< | |
dataService: DataService; | ||
workspaces: ReturnType<typeof workspacesServiceLocator>; | ||
experimentationServices: ReturnType<typeof experimentationServiceLocator>; | ||
logger: Logger; | ||
preferences: PreferencesAccess; | ||
analysisAbortControllerRef: { current?: AbortController }; | ||
}, | ||
A | ||
>; | ||
|
||
export enum SchemaAnalysisStatus { | ||
INITIAL = 'initial', | ||
ANALYZING = 'analyzing', | ||
COMPLETED = 'completed', | ||
ERROR = 'error', | ||
} | ||
|
||
type SchemaAnalysis = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You can break this into a type for each Status's state type SchemaAnalysis = SchemaAnalysisError | SchemaAnalysisInitial | SchemaAnalysisAnalyzing | SchemaAnalysisCompleted`
// example. notice there's no `error` field or `| null`
type SchemaAnalysisCompleted = {
schema: Schema;
status: SchemaAnalysisStatus;
sampleDocument: Document;
schemaMetaData: { ... };
}
// etc each type shares the The TypeScript handbook has an example here This should simplify the action action+reducer code as well by removing entries for nullable fields There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is very helpful! I'm gonna move these types to their own file as well! |
||
status: SchemaAnalysisStatus; | ||
schema: Schema | null; | ||
sampleDocument: Document | null; | ||
schemaMetadata: { | ||
maxNestingDepth: number; | ||
validationRules: Document; | ||
} | null; | ||
error: string | null; | ||
}; | ||
|
||
export type CollectionState = { | ||
workspaceTabId: string; | ||
namespace: string; | ||
metadata: CollectionMetadata | null; | ||
editViewName?: string; | ||
schemaAnalysis: SchemaAnalysis; | ||
}; | ||
|
||
enum CollectionActions { | ||
export enum CollectionActions { | ||
CollectionMetadataFetched = 'compass-collection/CollectionMetadataFetched', | ||
SchemaAnalysisStarted = 'compass-collection/SchemaAnalysisStarted', | ||
SchemaAnalysisFinished = 'compass-collection/SchemaAnalysisFinished', | ||
SchemaAnalysisFailed = 'compass-collection/SchemaAnalysisFailed', | ||
} | ||
|
||
interface CollectionMetadataFetchedAction { | ||
type: CollectionActions.CollectionMetadataFetched; | ||
metadata: CollectionMetadata; | ||
} | ||
|
||
interface SchemaAnalysisStartedAction { | ||
type: CollectionActions.SchemaAnalysisStarted; | ||
analysisStartTime: number; | ||
ncarbon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
interface SchemaAnalysisFinishedAction { | ||
type: CollectionActions.SchemaAnalysisFinished; | ||
schemaAnalysis: SchemaAnalysis; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actions shouldn't be controlling the state, reducer should, passing the whole state and just assigning it in the reducer is an antipattern that should be avoided, don't do this. Clearly separate what is the action payload and what is the state that reducer will derive based on the action There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated this. Wondering if it's worth it to move this to its own reducer. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks pretty self contained, so can be its own slice, yeah |
||
} | ||
|
||
interface SchemaAnalysisFailedAction { | ||
type: CollectionActions.SchemaAnalysisFailed; | ||
error: Error; | ||
} | ||
|
||
const reducer: Reducer<CollectionState, Action> = ( | ||
state = { | ||
// TODO(COMPASS-7782): use hook to get the workspace tab id instead | ||
workspaceTabId: '', | ||
namespace: '', | ||
metadata: null, | ||
schemaAnalysis: { | ||
status: SchemaAnalysisStatus.INITIAL, | ||
schema: null, | ||
sampleDocument: null, | ||
schemaMetadata: null, | ||
error: null, | ||
}, | ||
}, | ||
action | ||
) => { | ||
|
@@ -62,6 +122,53 @@ const reducer: Reducer<CollectionState, Action> = ( | |
metadata: action.metadata, | ||
}; | ||
} | ||
|
||
if ( | ||
isAction<SchemaAnalysisStartedAction>( | ||
action, | ||
CollectionActions.SchemaAnalysisStarted | ||
) | ||
) { | ||
return { | ||
...state, | ||
schemaAnalysis: { | ||
status: SchemaAnalysisStatus.ANALYZING, | ||
schema: null, | ||
sampleDocument: null, | ||
schemaMetadata: null, | ||
error: null, | ||
}, | ||
}; | ||
} | ||
|
||
if ( | ||
isAction<SchemaAnalysisFinishedAction>( | ||
action, | ||
CollectionActions.SchemaAnalysisFinished | ||
) | ||
) { | ||
return { | ||
...state, | ||
schemaAnalysis: action.schemaAnalysis, | ||
}; | ||
} | ||
|
||
if ( | ||
isAction<SchemaAnalysisFailedAction>( | ||
action, | ||
CollectionActions.SchemaAnalysisFailed | ||
) | ||
) { | ||
return { | ||
...state, | ||
schemaAnalysis: { | ||
...state.schemaAnalysis, | ||
status: SchemaAnalysisStatus.ERROR, | ||
error: action.error.message, | ||
}, | ||
}; | ||
} | ||
|
||
return state; | ||
}; | ||
|
||
|
@@ -82,6 +189,117 @@ export const selectTab = ( | |
}; | ||
}; | ||
|
||
export const analyzeCollectionSchema = (): CollectionThunkAction< | ||
Promise<void> | ||
> => { | ||
return async ( | ||
dispatch, | ||
getState, | ||
{ analysisAbortControllerRef, dataService, preferences, logger } | ||
) => { | ||
const { schemaAnalysis, namespace } = getState(); | ||
const analysisStatus = schemaAnalysis.status; | ||
if (analysisStatus === SchemaAnalysisStatus.ANALYZING) { | ||
logger.debug( | ||
'Schema analysis is already in progress, skipping new analysis.' | ||
); | ||
return; | ||
} | ||
|
||
analysisAbortControllerRef.current = new AbortController(); | ||
const abortSignal = analysisAbortControllerRef.current.signal; | ||
|
||
const analysisStartTime = Date.now(); | ||
|
||
try { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I presume it's possible to go to ANALYZING directly from COMPLETED or FAILED as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's possible. For now, this function will be invoked when collection metadata is fetched successfully. |
||
logger.debug('Schema analysis started.'); | ||
|
||
dispatch({ | ||
type: CollectionActions.SchemaAnalysisStarted, | ||
analysisStartTime, | ||
}); | ||
|
||
// Sample documents | ||
const samplingOptions = { size: DEFAULT_SAMPLE_SIZE }; | ||
const driverOptions = { | ||
maxTimeMS: preferences.getPreferences().maxTimeMS, | ||
signal: abortSignal, | ||
}; | ||
const sampleCursor = dataService.sampleCursor( | ||
namespace, | ||
samplingOptions, | ||
driverOptions, | ||
{ | ||
fallbackReadPreference: 'secondaryPreferred', | ||
} | ||
); | ||
const sampleDocuments = await sampleCursor.toArray(); | ||
|
||
// Analyze sampled documents | ||
const schemaParseOptions: SchemaParseOptions = { | ||
signal: abortSignal, | ||
}; | ||
const schemaAccessor = await analyzeDocuments( | ||
sampleDocuments, | ||
schemaParseOptions | ||
); | ||
if (abortSignal?.aborted) { | ||
throw new Error(abortSignal?.reason || new Error('Operation aborted')); | ||
ncarbon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
let schema: Schema | null = null; | ||
if (schemaAccessor) { | ||
schema = await schemaAccessor.getInternalSchema(); | ||
// Filter out internal fields from the schema | ||
schema.fields = schema.fields.filter( | ||
({ path }) => !isInternalFieldPath(path[0]) | ||
); | ||
// TODO: Transform schema to structure that will be used by the LLM. | ||
} | ||
|
||
let schemaMetadata = null; | ||
if (schema !== null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What should happen if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question! I was assuming a Should we consider this a "failure" state? Something else? @jcobis Do you have thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically, the return type of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I think it's an unnecessary check. We probably don't need to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the analysis fails I think the exception would be thrown by |
||
const { schema_depth } = await calculateSchemaMetadata(schema); | ||
const { database, collection } = toNS(namespace); | ||
const collInfo = await dataService.collectionInfo(database, collection); | ||
schemaMetadata = { | ||
maxNestingDepth: schema_depth, | ||
validationRules: collInfo?.validation?.validator || null, | ||
ncarbon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
} | ||
dispatch({ | ||
type: CollectionActions.SchemaAnalysisFinished, | ||
schemaAnalysis: { | ||
ncarbon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
status: SchemaAnalysisStatus.COMPLETED, | ||
schema, | ||
sampleDocument: sampleDocuments[0] ?? null, | ||
schemaMetadata, | ||
}, | ||
}); | ||
} catch (err: any) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there documented errors that can be caught here? Would help devs looking at stack traces narrow the source of the issue earlier, especially because there's multiple |
||
logger.log.error( | ||
mongoLogId(1_001_000_363), | ||
'Collection', | ||
'Schema analysis failed', | ||
{ | ||
namespace, | ||
error: err.message, | ||
aborted: abortSignal.aborted, | ||
...(abortSignal.aborted | ||
? { abortReason: abortSignal.reason?.message ?? abortSignal.reason } | ||
: {}), | ||
} | ||
); | ||
dispatch({ | ||
type: CollectionActions.SchemaAnalysisFailed, | ||
error: err as Error, | ||
}); | ||
} finally { | ||
analysisAbortControllerRef.current = undefined; | ||
} | ||
}; | ||
}; | ||
|
||
export type CollectionTabPluginMetadata = CollectionMetadata & { | ||
/** | ||
* Initial query for the query bar | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nit] fyi there's another thread that advocates the compass repos's preference for unions over enums in #7181 (comment). I personally prefer them over enums because they enable type algebra like creating intersection/union types, and enums add runtime artifacts that union types do not