diff --git a/backend/src/ApiRequests/Queries/DataFormat.graphql b/backend/src/ApiRequests/Queries/DataFormat.graphql new file mode 100644 index 00000000..8dbf56c3 --- /dev/null +++ b/backend/src/ApiRequests/Queries/DataFormat.graphql @@ -0,0 +1,12 @@ +query DataFormat( + $id: Uuid! +) { + dataFormat(id: $id) { + uuid + name + extension + description + mediaType + schemaLocator + } +} \ No newline at end of file diff --git a/backend/src/ApiRequests/QueryDataFormat.cs b/backend/src/ApiRequests/QueryDataFormat.cs new file mode 100644 index 00000000..b5c3dd44 --- /dev/null +++ b/backend/src/ApiRequests/QueryDataFormat.cs @@ -0,0 +1,49 @@ +using System; +using System.Threading; +using System.Threading.Tasks; +using GraphQL; +using Database.Services; + +namespace Database.ApiRequests; + +public sealed class QueryDataFormat +{ + private const string QueryFileName = "DataFormat.graphql"; + + public static Uri GetGraphQlEndpoint(AppSettings appSettings) => + appSettings.MetabaseGraphQlEndpoint; + + public sealed record DataFormat( + Guid Uuid, + string Name, + string? Extension, + string Description, + string MediaType, + Uri? SchemaLocator + // DataFormatManagerEdge manager, + // Reference reference + ); + + private sealed record DataFormatData(DataFormat? DataFormat); + + public static async Task Do( + Guid dataFormatId, + AppSettings appSettings, + ApiRequestService apiRequestService, + CancellationToken cancellationToken + ) + { + return (await apiRequestService.QueryGraphQl( + GetGraphQlEndpoint(appSettings), + new GraphQLRequest( + await apiRequestService.ConstructGraphQlQuery(QueryFileName), + new + { + id = dataFormatId + }, + "DataFormat" + ), + cancellationToken + )).Data.DataFormat; + } +} \ No newline at end of file diff --git a/backend/src/Controllers/FileUploadController.cs b/backend/src/Controllers/FileUploadController.cs index 15db1e57..12b2f7a0 100644 --- a/backend/src/Controllers/FileUploadController.cs +++ b/backend/src/Controllers/FileUploadController.cs @@ -18,6 +18,7 @@ using Microsoft.Net.Http.Headers; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Routing; +using Database.ApiRequests; namespace Database.Controllers; @@ -91,6 +92,9 @@ public async Task UploadFile( [FromQuery] Guid getHttpsResourceUuid, [FromServices] ApplicationDbContext context, [FromServices] UserService userService, + [FromServices] ApiRequestService apiRequestService, + [FromServices] AppSettings appSettings, + [FromServices] JsonValidator jsonValidator, CancellationToken cancellationToken ) { @@ -187,6 +191,41 @@ CancellationToken cancellationToken // read the headers for the next section. section = await reader.ReadNextSectionAsync(cancellationToken); } + var dataFormat = await QueryDataFormat.Do( + getHttpsResource.DataFormatId, + appSettings, + apiRequestService, + cancellationToken + ); + if (dataFormat is null) + { + System.IO.File.Delete(getHttpsResource.FilePath); + ModelState.AddModelError("File", "Could not validate the uploaded file because the data format could not be queried from the metabase."); + return BadRequest(ModelState); + } + if (dataFormat.SchemaLocator is not null) + { + if (dataFormat.MediaType == "application/json") + { + var evaluationResults = await jsonValidator.ValidateAsync( + dataFormat.SchemaLocator, + getHttpsResource.FilePath, + cancellationToken + ); + if (!evaluationResults.IsValid) + { + var errors = evaluationResults.Errors is null ? "unknown" : string.Join( + ", ", + evaluationResults.Errors.Select(error => + $"{error.Key}: {error.Value}" + ) + ); + System.IO.File.Delete(getHttpsResource.FilePath); + ModelState.AddModelError("File", $"The JSON file does not conform to the JSON schema {dataFormat.SchemaLocator}. Validation gave the following errors: ${errors}"); + return BadRequest(ModelState); + } + } + } await getHttpsResource.RecomputeHashValue(cancellationToken); await context.SaveChangesAsync(cancellationToken); return Created(nameof(FileUploadController), null); diff --git a/backend/src/Database.csproj b/backend/src/Database.csproj index c2a5387d..8c93981d 100644 --- a/backend/src/Database.csproj +++ b/backend/src/Database.csproj @@ -33,6 +33,8 @@ + + diff --git a/backend/src/Services/JsonValidator.cs b/backend/src/Services/JsonValidator.cs new file mode 100644 index 00000000..372cb41c --- /dev/null +++ b/backend/src/Services/JsonValidator.cs @@ -0,0 +1,64 @@ +using System; +using System.IO; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Database.ApiRequests; +using Database.Utilities; +using Json.Schema; + +namespace Database.Services; + +public sealed class JsonValidator( + IHttpClientFactory httpClientFactory +) +{ + private static readonly EvaluationOptions s_evaluationOptions = + new() + { + ValidateAgainstMetaSchema = false, + OutputFormat = OutputFormat.Hierarchical + }; + + public async Task LoadJsonSchemaAsync( + Uri jsonSchemaLocator, + CancellationToken cancellationToken + ) + { + using var httpClient = httpClientFactory.CreateClient(); + return await JsonSchema.FromStream( + await httpClient.GetStreamAsync(jsonSchemaLocator, cancellationToken) + ); + } + + public async Task ValidateAsync( + Uri jsonSchemaLocator, + string jsonDataFilePath, + CancellationToken cancellationToken + ) + { + using var fileStream = File.OpenRead(jsonDataFilePath); + using var jsonDocument = await JsonDocument.ParseAsync( + fileStream, + JsonDocumentSettings.Lax, + cancellationToken + ); + return Validate( + await LoadJsonSchemaAsync(jsonSchemaLocator, cancellationToken), + jsonDocument.RootElement + ); + } + + public EvaluationResults Validate( + JsonSchema jsonSchema, + JsonElement jsonElement + ) + { + s_evaluationOptions.SchemaRegistry.Fetch = JsonSchemaFetcher.FetchWithCaching; + return jsonSchema.Evaluate( + jsonElement, + s_evaluationOptions + ); + } +} \ No newline at end of file diff --git a/backend/src/Startup.cs b/backend/src/Startup.cs index 8ccb0ab5..bbea8c65 100644 --- a/backend/src/Startup.cs +++ b/backend/src/Startup.cs @@ -244,6 +244,7 @@ public static void ConfigureCustomServices(IServiceCollection services) services.AddScoped(); services.AddSingleton(); services.AddSingleton(); + services.AddSingleton(); services.AddSingleton(); } diff --git a/backend/src/Utilities/JsonSchemaFetcher.cs b/backend/src/Utilities/JsonSchemaFetcher.cs new file mode 100644 index 00000000..d6aa41b2 --- /dev/null +++ b/backend/src/Utilities/JsonSchemaFetcher.cs @@ -0,0 +1,51 @@ +using Json.Schema; +using Microsoft.Extensions.Caching.Memory; +using System; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; + +namespace Database.Utilities; + +public static class JsonSchemaFetcher +{ + private static readonly MemoryCache s_schemaCache = new(new MemoryCacheOptions()); + private static readonly SemaphoreSlim s_lock = new(1, 1); + + public static JsonSchema? FetchWithCaching(Uri locator) + { + var task = FetchWithCachingAsync(locator); + task.Wait(); + return task.Result; + } + + public static async Task FetchWithCachingAsync(Uri locator) + { + // Lock to prevent multiple concurrent downloads for the same URI + await s_lock.WaitAsync(); + try + { + // Check if the schema is already in the cache + if (s_schemaCache.Get(locator.AbsoluteUri) is JsonSchema cachedSchema) + { + return cachedSchema; + } + // Download the schema if it's not in the cache + using var client = new HttpClient(); + using var stream = await client.GetStreamAsync(locator); + var downloadedSchema = await JsonSchema.FromStream(stream); + // Add the downloaded schema to the cache + s_schemaCache.Set(locator.AbsoluteUri, downloadedSchema, DateTimeOffset.Now.AddDays(1)); + return downloadedSchema; + } + catch (Exception exception) + { + // Console.WriteLine($"Error fetching schema from {locator}: {exception.Message}"); + return null; + } + finally + { + s_lock.Release(); + } + } +} \ No newline at end of file