Skip to content

Commit 8d56b98

Browse files
authored
feat: hash operation helper (#6876)
1 parent 92756d8 commit 8d56b98

File tree

8 files changed

+157
-102
lines changed

8 files changed

+157
-102
lines changed

.changeset/perfect-actors-develop.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
'@graphql-hive/core': minor
3+
---
4+
5+
Add helper function `hashOperation` for generating consistent hashes used within the Hive Console
6+
platform.

packages/libraries/core/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,15 @@
4848
"@graphql-tools/utils": "^10.0.0",
4949
"@whatwg-node/fetch": "^0.10.6",
5050
"async-retry": "^1.3.3",
51+
"js-md5": "0.8.3",
5152
"lodash.sortby": "^4.7.0",
5253
"tiny-lru": "^8.0.2"
5354
},
5455
"devDependencies": {
5556
"@apollo/federation": "0.38.1",
5657
"@apollo/subgraph": "2.9.3",
5758
"@types/async-retry": "1.4.8",
59+
"@types/js-md5": "0.8.0",
5860
"@types/lodash.sortby": "4.7.9",
5961
"graphql": "16.9.0",
6062
"nock": "14.0.0",

packages/libraries/core/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export * from './normalize/operation.js';
2+
export { collectSchemaCoordinates } from './client/collect-schema-coordinates.js';
23
export type {
34
HivePluginOptions,
45
HiveClient,

packages/libraries/core/src/normalize/operation.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,24 @@ import {
33
DefinitionNode,
44
DirectiveNode,
55
DocumentNode,
6+
GraphQLSchema,
67
Kind,
78
OperationDefinitionNode,
89
print,
910
SelectionNode,
1011
separateOperations,
1112
stripIgnoredCharacters,
13+
TypeInfo,
1214
VariableDefinitionNode,
1315
visit,
1416
} from 'graphql';
17+
import { md5 } from 'js-md5';
1518
import sortBy from 'lodash.sortby';
19+
import { collectSchemaCoordinates } from '../client/collect-schema-coordinates.js';
1620

21+
/**
22+
* Normalize a operation document.
23+
*/
1724
export function normalizeOperation({
1825
document,
1926
operationName,
@@ -152,3 +159,93 @@ function dropUnusedDefinitions(doc: DocumentNode, operationName?: string) {
152159

153160
return separateOperations(doc)[operationName] ?? doc;
154161
}
162+
163+
function findOperationDefinition(doc: DocumentNode) {
164+
return doc.definitions.find(isOperationDef);
165+
}
166+
167+
/** normalize a graphql operation into a stable hash as used internally within our ClickHouse Database. */
168+
export function preprocessOperation(operation: {
169+
document: DocumentNode;
170+
schemaCoordinates: Iterable<string>;
171+
operationName: string | null;
172+
}) {
173+
const body = normalizeOperation({
174+
document: operation.document,
175+
hideLiterals: true,
176+
removeAliases: true,
177+
});
178+
179+
// Two operations with the same hash has to be equal:
180+
// 1. body is the same
181+
// 2. name is the same
182+
// 3. used schema coordinates are equal - this is important to assign schema coordinate to an operation
183+
184+
const uniqueCoordinatesSet = new Set<string>();
185+
for (const field of operation.schemaCoordinates) {
186+
uniqueCoordinatesSet.add(field);
187+
// Add types as well:
188+
// `Query.foo` -> `Query`
189+
const at = field.indexOf('.');
190+
if (at > -1) {
191+
uniqueCoordinatesSet.add(field.substring(0, at));
192+
}
193+
}
194+
195+
const sortedCoordinates = Array.from(uniqueCoordinatesSet).sort();
196+
197+
const operationDefinition = findOperationDefinition(operation.document);
198+
199+
if (!operationDefinition) {
200+
return null;
201+
}
202+
203+
const operationName = operation.operationName ?? operationDefinition.name?.value;
204+
205+
const hash = md5
206+
.create()
207+
.update(body)
208+
.update(operationName ?? '')
209+
.update(sortedCoordinates.join(';')) // we do not need to sort from A to Z, default lexicographic sorting is enough
210+
.hex();
211+
212+
return {
213+
type: operationDefinition.operation,
214+
hash,
215+
body,
216+
coordinates: sortedCoordinates,
217+
name: operationName || null,
218+
};
219+
}
220+
221+
/**
222+
* Hash a executable GraphQL document according to Hive platforms algorithm
223+
* for identification.
224+
*
225+
* Return null if no executable operation definition was found.
226+
*/
227+
export function hashOperation(args: {
228+
documentNode: DocumentNode;
229+
variables: null | {
230+
[key: string]: unknown;
231+
};
232+
operationName: string | null;
233+
schema: GraphQLSchema;
234+
typeInfo?: TypeInfo;
235+
}) {
236+
const schemaCoordinates = collectSchemaCoordinates({
237+
documentNode: args.documentNode,
238+
processVariables: args.variables !== null,
239+
variables: args.variables ?? {},
240+
schema: args.schema,
241+
typeInfo: args.typeInfo ?? new TypeInfo(args.schema),
242+
});
243+
244+
const result = preprocessOperation({
245+
document: args.documentNode,
246+
schemaCoordinates: schemaCoordinates,
247+
operationName: args.operationName,
248+
});
249+
250+
return result?.hash ?? null;
251+
}

packages/services/api/src/modules/app-deployments/providers/persisted-document-ingester.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import { buildSchema, DocumentNode, GraphQLError, Kind, parse, TypeInfo, validate } from 'graphql';
22
import PromiseQueue from 'p-queue';
33
import { z } from 'zod';
4-
import { collectSchemaCoordinates } from '@graphql-hive/core/src/client/collect-schema-coordinates';
4+
import { collectSchemaCoordinates, preprocessOperation } from '@graphql-hive/core';
55
import { buildOperationS3BucketKey } from '@hive/cdn-script/artifact-storage-reader';
66
import { ServiceLogger } from '@hive/service-common';
7-
import { normalizeOperation } from '@hive/usage-ingestor/src/normalize-operation';
87
import { sql as c_sql, ClickHouse } from '../../operations/providers/clickhouse-client';
98
import { S3Config } from '../../shared/providers/s3-config';
109

@@ -186,18 +185,18 @@ export class PersistedDocumentIngester {
186185

187186
const operationName = operationNames[0] ?? null;
188187

189-
const coordinates = collectSchemaCoordinates({
188+
const schemaCoordinates = collectSchemaCoordinates({
190189
documentNode,
191190
processVariables: false,
192191
variables: null,
193192
schema,
194193
typeInfo,
195194
});
196195

197-
const normalizedOperation = normalizeOperation({
198-
document: operation.body,
199-
fields: coordinates,
196+
const normalizedOperation = preprocessOperation({
197+
document: documentNode,
200198
operationName,
199+
schemaCoordinates,
201200
});
202201

203202
documents.push({
@@ -206,7 +205,7 @@ export class PersistedDocumentIngester {
206205
internalHash: normalizedOperation?.hash ?? operation.hash,
207206
body: operation.body,
208207
operationName,
209-
schemaCoordinates: Array.from(coordinates),
208+
schemaCoordinates: Array.from(schemaCoordinates),
210209
});
211210

212211
index++;

packages/services/usage-ingestor/src/normalize-operation.ts

Lines changed: 0 additions & 80 deletions
This file was deleted.

packages/services/usage-ingestor/src/processor.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import type { OperationTypeNode } from 'graphql';
1+
import { parse, type DocumentNode, type OperationTypeNode } from 'graphql';
22
import LRU from 'tiny-lru';
3+
import { preprocessOperation } from '@graphql-hive/core';
34
import type { ServiceLogger } from '@hive/service-common';
45
import type {
56
ProcessedOperation,
@@ -18,7 +19,6 @@ import {
1819
schemaCoordinatesSize,
1920
totalOperations,
2021
} from './metrics';
21-
import { normalizeOperation } from './normalize-operation';
2222
import {
2323
stringifyAppDeploymentUsageRecord,
2424
stringifyQueryOrMutationOperation,
@@ -41,14 +41,28 @@ type NormalizeFunction = (arg: RawOperationMapRecord) => {
4141
const DAY_IN_MS = 86_400_000;
4242
const RETENTION_FALLBACK = 365;
4343

44+
function parseSafe(operation: string): DocumentNode | null {
45+
try {
46+
return parse(operation);
47+
} catch {
48+
return null;
49+
}
50+
}
51+
4452
export function createProcessor(config: { logger: ServiceLogger }) {
4553
const { logger } = config;
4654
const normalize = cache(
4755
(operation: RawOperationMapRecord) => {
4856
normalizeCacheMisses.inc();
49-
return normalizeOperation({
50-
document: operation.operation,
51-
fields: operation.fields,
57+
const document = parseSafe(operation.operation);
58+
59+
if (!document) {
60+
return null;
61+
}
62+
63+
return preprocessOperation({
64+
document,
65+
schemaCoordinates: operation.fields,
5266
operationName: operation.operationName ?? null,
5367
});
5468
},

0 commit comments

Comments
 (0)