Skip to content

Commit 8f7ec46

Browse files
authored
feat(datasets): create mutation for dataset creation (#5583)
1 parent 28fef56 commit 8f7ec46

File tree

8 files changed

+321
-3
lines changed

8 files changed

+321
-3
lines changed

apps/frontend/app/api/v1/osograph/schema.graphql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,26 @@ type Mutation {
235235
osoApp_saveNotebookPreview(
236236
input: SaveNotebookPreviewInput!
237237
): SaveNotebookPreviewPayload!
238+
239+
"""
240+
Create a new dataset
241+
"""
242+
osoApp_createDataset(input: CreateDatasetInput!): CreateDatasetPayload!
243+
}
244+
245+
input CreateDatasetInput {
246+
orgName: String!
247+
name: String!
248+
displayName: String!
249+
description: String
250+
datasetType: DatasetType!
251+
isPublic: Boolean
252+
}
253+
254+
type CreateDatasetPayload {
255+
dataset: Dataset
256+
message: String!
257+
success: Boolean!
238258
}
239259

240260
input CreateInvitationInput {

apps/frontend/app/api/v1/osograph/schema/resolvers/dataset.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ import { logger } from "@/lib/logger";
2222
import { assert } from "@opensource-observer/utils";
2323
import { createAdminClient } from "@/lib/supabase/admin";
2424
import { DatasetsRow } from "@/lib/types/schema-types";
25+
import z from "zod";
26+
import { validateInput } from "@/app/api/v1/osograph/utils/validation";
27+
import { v4 as uuidv4 } from "uuid";
2528

2629
const TRINO_SCHEMA_TIMEOUT = 10000; // 10 seconds
2730

@@ -215,6 +218,62 @@ export const datasetResolver: GraphQLResolverModule<GraphQLContext> = {
215218
return filteredResults;
216219
},
217220
},
221+
Mutation: {
222+
osoApp_createDataset: async (
223+
_: unknown,
224+
{ input }: { input: z.infer<typeof CreateDatasetSchema> },
225+
context: GraphQLContext,
226+
) => {
227+
const authenticatedUser = requireAuthentication(context.user);
228+
const validated = validateInput(CreateDatasetSchema, input);
229+
const organization = await getOrganizationByName(validated.orgName);
230+
await requireOrgMembership(authenticatedUser.userId, organization.id);
231+
232+
const supabase = createAdminClient();
233+
const datasetId = uuidv4();
234+
let catalog: string;
235+
let schema: string;
236+
237+
switch (validated.datasetType) {
238+
case "USER_MODEL":
239+
catalog = "user_iceberg";
240+
schema = `ds_${datasetId.replace(/-/g, "")}`;
241+
break;
242+
default:
243+
throw new Error(
244+
`Dataset type "${validated.datasetType}" is not supported yet.`,
245+
);
246+
}
247+
248+
const { data: dataset, error } = await supabase
249+
.from("datasets")
250+
.insert({
251+
id: datasetId,
252+
org_id: organization.id,
253+
name: validated.name,
254+
display_name: validated.displayName,
255+
description: validated.description,
256+
catalog,
257+
schema,
258+
created_by: authenticatedUser.userId,
259+
is_public: validated.isPublic ?? false,
260+
dataset_type: validated.datasetType,
261+
})
262+
.select()
263+
.single();
264+
265+
if (error) {
266+
logger.error("Failed to create dataset:", error);
267+
throw ServerErrors.database("Failed to create dataset");
268+
}
269+
270+
return {
271+
dataset,
272+
message: "Dataset created successfully",
273+
success: true,
274+
};
275+
},
276+
},
218277
Dataset: {
219278
id: (parent: DatasetsRow) => parent.id,
220279
orgId: (parent: DatasetsRow) => parent.org_id,
@@ -232,6 +291,21 @@ export const datasetResolver: GraphQLResolverModule<GraphQLContext> = {
232291
},
233292
};
234293

294+
export const CreateDatasetSchema = z.object({
295+
orgName: z.string().min(1, "Organization name is required"),
296+
name: z
297+
.string()
298+
.min(1, "Dataset name is required")
299+
.regex(
300+
/^[a-zA-Z][a-zA-Z0-9_]+$/,
301+
"Dataset name can only contain letters, numbers, and underscores",
302+
),
303+
displayName: z.string().min(1, "Display name is required"),
304+
description: z.string().optional(),
305+
isPublic: z.boolean().optional(),
306+
datasetType: z.enum(["USER_MODEL", "DATA_CONNECTOR", "DATA_INGESTION"]),
307+
});
308+
235309
async function getOrganizationDatasets(orgId: string): Promise<DatasetsRow[]> {
236310
const supabase = createAdminClient();
237311

apps/frontend/components/dataprovider/oso-global-context.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ const OsoGlobalActionNames: ExtractMethodNames<OsoAppClient>[] = _.sortBy([
9797
"deductOrganizationCredits",
9898
"updateOrganizationTier",
9999
"setOrganizationCredits",
100+
"createDataset",
100101
]);
101102
const OsoGlobalActions: Partial<ExtractMethods<OsoAppClient>> = _.fromPairs(
102103
OsoGlobalActionNames.map((name) => [

apps/frontend/lib/clients/oso-app/oso-app.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,83 @@ class OsoAppClient {
24422442
"X-Supabase-Auth": `${sessionData.session.access_token}:${sessionData.session.refresh_token}`,
24432443
};
24442444
}
2445+
2446+
async createDataset(
2447+
args: Partial<{
2448+
orgName: string;
2449+
name: string;
2450+
displayName: string;
2451+
description: string;
2452+
datasetType: "USER_MODEL" | "DATA_CONNECTOR" | "DATA_INGESTION";
2453+
isPublic: boolean;
2454+
}>,
2455+
) {
2456+
const { orgName, name, displayName, description, datasetType, isPublic } = {
2457+
orgName: ensure(args.orgName, "Missing orgName argument"),
2458+
name: ensure(args.name, "Missing name argument"),
2459+
displayName: ensure(args.displayName, "Missing displayName argument"),
2460+
description: args.description,
2461+
datasetType: ensure(args.datasetType, "Missing datasetType argument"),
2462+
isPublic: args.isPublic,
2463+
};
2464+
2465+
const CREATE_DATASET_MUTATION = gql(`
2466+
mutation CreateDataset($input: CreateDatasetInput!) {
2467+
osoApp_createDataset(input: $input) {
2468+
success
2469+
message
2470+
dataset {
2471+
id
2472+
name
2473+
displayName
2474+
description
2475+
catalog
2476+
schema
2477+
datasetType
2478+
isPublic
2479+
}
2480+
}
2481+
}
2482+
`);
2483+
2484+
const response = await fetch("/api/v1/osograph", {
2485+
method: "POST",
2486+
headers: {
2487+
"Content-Type": "application/json",
2488+
},
2489+
body: JSON.stringify({
2490+
query: print(CREATE_DATASET_MUTATION),
2491+
variables: {
2492+
input: {
2493+
orgName,
2494+
name,
2495+
displayName,
2496+
description,
2497+
datasetType,
2498+
isPublic,
2499+
},
2500+
},
2501+
}),
2502+
});
2503+
2504+
const result = await response.json();
2505+
2506+
if (result.errors) {
2507+
logger.error("Failed to create dataset:", result.errors[0].message);
2508+
throw new Error(`Failed to create dataset: ${result.errors[0].message}`);
2509+
}
2510+
2511+
const payload = result.data?.osoApp_createDataset;
2512+
if (!payload) {
2513+
throw new Error("No response data from create dataset mutation");
2514+
}
2515+
2516+
if (payload.success) {
2517+
logger.log(`Successfully created dataset "${displayName}"`);
2518+
}
2519+
2520+
return payload.dataset;
2521+
}
24452522
}
24462523

24472524
export { OsoAppClient };

apps/frontend/lib/graphql/generated/gql.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { TypedDocumentNode as DocumentNode } from "@graphql-typed-document-node/
1414
* Learn more about it here: https://the-guild.dev/graphql/codegen/plugins/presets/preset-client#reducing-bundle-size
1515
*/
1616
type Documents = {
17+
"\n mutation CreateDataset($input: CreateDatasetInput!) {\n osoApp_createDataset(input: $input) {\n success\n message\n dataset {\n id\n name\n displayName\n description\n catalog\n schema\n datasetType\n isPublic\n }\n }\n }\n ": typeof types.CreateDatasetDocument;
1718
"\n mutation SavePreview($input: SaveNotebookPreviewInput!) {\n osoApp_saveNotebookPreview(input: $input) {\n success\n message\n }\n }\n ": typeof types.SavePreviewDocument;
1819
"\nquery AssetGraph {\n assetNodes {\n assetKey {\n path\n }\n dependencyKeys {\n path\n }\n }\n}": typeof types.AssetGraphDocument;
1920
'\nquery AssetMaterializedData($assetKeys: [AssetKeyInput!] = {path: ""}) {\n assetNodes(assetKeys: $assetKeys) {\n assetKey {\n path\n }\n partitionStats {\n numFailed\n numMaterialized\n numMaterializing\n numPartitions\n }\n assetPartitionStatuses {\n ... on TimePartitionStatuses {\n __typename\n ranges {\n endKey\n startKey\n status\n }\n }\n }\n assetMaterializations(limit: 1) {\n runOrError {\n ... on Run {\n endTime\n }\n }\n }\n }\n}': typeof types.AssetMaterializedDataDocument;
@@ -22,6 +23,8 @@ type Documents = {
2223
"\n query TimeseriesMetricsByCollection(\n $collectionIds: [String!],\n $metricIds: [String!],\n $startDate: Oso_Date!,\n $endDate: Oso_Date!, \n ) {\n oso_timeseriesMetricsByCollectionV0(where: {\n collectionId: {_in: $collectionIds},\n metricId: {_in: $metricIds},\n sampleDate: { _gte: $startDate, _lte: $endDate }\n }) {\n amount\n metricId\n collectionId\n sampleDate\n unit\n }\n oso_collectionsV1(where: { collectionId: { _in: $collectionIds }}) {\n collectionId\n collectionSource\n collectionNamespace\n collectionName\n displayName\n description\n }\n oso_metricsV0(where: {metricId: {_in: $metricIds}}) {\n metricId\n metricSource\n metricNamespace\n metricName\n displayName\n description\n }\n }\n": typeof types.TimeseriesMetricsByCollectionDocument;
2324
};
2425
const documents: Documents = {
26+
"\n mutation CreateDataset($input: CreateDatasetInput!) {\n osoApp_createDataset(input: $input) {\n success\n message\n dataset {\n id\n name\n displayName\n description\n catalog\n schema\n datasetType\n isPublic\n }\n }\n }\n ":
27+
types.CreateDatasetDocument,
2528
"\n mutation SavePreview($input: SaveNotebookPreviewInput!) {\n osoApp_saveNotebookPreview(input: $input) {\n success\n message\n }\n }\n ":
2629
types.SavePreviewDocument,
2730
"\nquery AssetGraph {\n assetNodes {\n assetKey {\n path\n }\n dependencyKeys {\n path\n }\n }\n}":
@@ -50,6 +53,12 @@ const documents: Documents = {
5053
*/
5154
export function gql(source: string): unknown;
5255

56+
/**
57+
* The gql function is used to parse GraphQL queries into a document that can be used by GraphQL clients.
58+
*/
59+
export function gql(
60+
source: "\n mutation CreateDataset($input: CreateDatasetInput!) {\n osoApp_createDataset(input: $input) {\n success\n message\n dataset {\n id\n name\n displayName\n description\n catalog\n schema\n datasetType\n isPublic\n }\n }\n }\n ",
61+
): (typeof documents)["\n mutation CreateDataset($input: CreateDatasetInput!) {\n osoApp_createDataset(input: $input) {\n success\n message\n dataset {\n id\n name\n displayName\n description\n catalog\n schema\n datasetType\n isPublic\n }\n }\n }\n "];
5362
/**
5463
* The gql function is used to parse GraphQL queries into a document that can be used by GraphQL clients.
5564
*/

0 commit comments

Comments
 (0)