Skip to content

Commit eefc68a

Browse files
committed
Filter out known hashes a bit more efficiently.
1 parent af361db commit eefc68a

File tree

2 files changed

+34
-26
lines changed

2 files changed

+34
-26
lines changed

sql/2025-01-31_dependencies-of-causal.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
CREATE OR REPLACE FUNCTION dependencies_of_causals(the_causal_ids INTEGER[]) RETURNS TABLE (hash TEXT) AS $$
33
WITH RECURSIVE all_causals(causal_id, causal_hash, causal_namespace_hash_id) AS (
44
-- Base causal
5-
SELECT causal.id, causal.hash, causal.namespace_hash_id
5+
SELECT DISTINCT causal.id, causal.hash, causal.namespace_hash_id
66
FROM UNNEST(the_causal_ids) AS causal_id
77
JOIN causals causal ON causal.id = causal_id
88
UNION

src/Share/Web/UCM/SyncV2/Queries.hs

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import Share.Postgres.IDs
1313
import Share.Prelude
1414
import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..))
1515
import U.Codebase.Sqlite.TempEntity (TempEntity)
16-
import Unison.Debug qualified as Debug
1716
import Unison.Hash32 (Hash32)
1817
import Unison.SyncV2.Types (CBORBytes)
1918

@@ -187,10 +186,10 @@ import Unison.SyncV2.Types (CBORBytes)
187186
allSerializedDependenciesOfCausalCursor :: CausalId -> Set CausalHash -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32))
188187
allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
189188
ownerUserId <- asks codebaseOwner
190-
Debug.debugLogM Debug.Temp "created except_hashes temp table."
191189
-- Create a temp table for storing the dependencies we know the calling client already has.
192-
execute_ [sql| CREATE TEMP TABLE except_hashes ( hash TEXT NOT NULL PRIMARY KEY ) ON COMMIT DROP |]
193-
Debug.debugLogM Debug.Temp "filling in except_hashes temp table."
190+
execute_ [sql| CREATE TEMP TABLE except_causals (causal_id INTEGER NULL ) ON COMMIT DROP |]
191+
execute_ [sql| CREATE TEMP TABLE except_components ( component_hash_id INTEGER NULL ) ON COMMIT DROP |]
192+
execute_ [sql| CREATE TEMP TABLE except_namespaces ( branch_hash_ids INTEGER NULL ) ON COMMIT DROP |]
194193
execute_
195194
[sql|
196195
WITH the_causal_hashes(hash) AS (
@@ -199,11 +198,24 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
199198
SELECT c.id
200199
FROM the_causal_hashes tch
201200
JOIN causals c ON tch.hash = c.hash
202-
) INSERT INTO except_hashes(hash)
203-
SELECT DISTINCT deps.hash FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps
204-
ON CONFLICT DO NOTHING
201+
), dependency_hashes(hash) AS (
202+
SELECT DISTINCT deps.hash
203+
FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps
204+
), do_causals AS (
205+
INSERT INTO except_causals(causal_id)
206+
SELECT causal.id
207+
FROM the_causal_hashes tch
208+
JOIN causals causal ON tch.hash = causal.hash
209+
), do_namespaces AS (
210+
INSERT INTO except_namespaces(branch_hash_ids)
211+
SELECT bh.id
212+
FROM dependency_hashes dh
213+
JOIN branch_hashes bh ON dh.hash = bh.base32
214+
) INSERT INTO except_components(component_hash_id)
215+
SELECT ch.id
216+
FROM dependency_hashes dh
217+
JOIN component_hashes ch ON dh.hash = ch.base32
205218
|]
206-
Debug.debugLogM Debug.Temp "Running cursor query"
207219
cursor <-
208220
PGCursor.newRowCursor
209221
"serialized_entities"
@@ -213,7 +225,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
213225
FROM causals causal
214226
WHERE causal.id = #{cid}
215227
AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id)
216-
AND NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = causal.id)
228+
AND NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = causal.id)
217229
UNION
218230
-- This nested CTE is required because RECURSIVE CTEs can't refer
219231
-- to the recursive table more than once.
@@ -225,25 +237,24 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
225237
FROM causal_ancestors ca
226238
JOIN rec tc ON ca.causal_id = tc.causal_id
227239
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
228-
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = ancestor_causal.id)
240+
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = ancestor_causal.id)
229241
UNION
230242
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id
231243
FROM rec tc
232244
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
233245
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
234-
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = child_causal.id)
246+
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = child_causal.id)
235247
)
236248
), all_namespaces(namespace_hash_id, namespace_hash) AS (
237249
SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash
238250
FROM transitive_causals tc
239251
JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id
240-
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = bh.base32)
252+
WHERE NOT EXISTS (SELECT FROM except_namespaces en WHERE en.branch_hash_ids = tc.causal_namespace_hash_id)
241253
), all_patches(patch_id, patch_hash) AS (
242254
SELECT DISTINCT patch.id, patch.hash
243255
FROM all_namespaces an
244256
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
245257
JOIN patches patch ON np.patch_id = patch.id
246-
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = patch.hash)
247258
),
248259
-- term components to start transitively joining dependencies to
249260
base_term_components(component_hash_id) AS (
@@ -270,6 +281,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
270281
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
271282
JOIN namespace_type_metadata meta ON nt.id = meta.named_type
272283
JOIN terms term ON meta.metadata_term_id = term.id
284+
WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = term.component_hash_id)
273285
),
274286
-- type components to start transitively joining dependencies to
275287
base_type_components(component_hash_id) AS (
@@ -294,6 +306,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
294306
JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id
295307
JOIN constructors con ON pcm.to_constructor_id = con.id
296308
JOIN types typ ON con.type_id = typ.id
309+
WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = typ.component_hash_id)
297310
),
298311
-- All the dependencies we join in transitively from the known term & type components we depend on.
299312
-- Unfortunately it's not possible to know which hashes are terms vs types :'(
@@ -315,6 +328,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
315328
-- component
316329
JOIN terms term ON atc.component_hash_id = term.component_hash_id
317330
JOIN term_local_component_references ref ON term.id = ref.term_id
331+
WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = ref.component_hash_id)
318332
UNION
319333
-- recursively union in type dependencies
320334
SELECT DISTINCT ref.component_hash_id
@@ -323,16 +337,14 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
323337
-- component
324338
JOIN types typ ON atc.component_hash_id = typ.component_hash_id
325339
JOIN type_local_component_references ref ON typ.id = ref.type_id
340+
WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = ref.component_hash_id)
326341
)
327342
)
328343
(SELECT bytes.bytes, ch.base32
329344
FROM transitive_components tc
330345
JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id
331346
JOIN bytes ON sc.bytes_id = bytes.id
332347
JOIN component_hashes ch ON tc.component_hash_id = ch.id
333-
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = ch.base32)
334-
-- TODO: Filter out components we know we already have,
335-
-- We should do this earlier in the process if possible.
336348
)
337349
UNION ALL
338350
(SELECT bytes.bytes, ap.patch_hash
@@ -364,28 +376,26 @@ spineAndLibDependenciesOfCausalCursor cid = do
364376
[sql|
365377
-- is_lib_causal indicates the causal itself is the library, whereas is_lib_root indicates
366378
-- the causal is the root of a library INSIDE 'lib'
367-
WITH RECURSIVE transitive_causals(causal_id, causal_hash, causal_namespace_hash_id, is_spine, is_lib_causal, is_lib_root, depth) AS (
368-
SELECT causal.id, causal.hash, causal.namespace_hash_id, true AS is_spine, false AS is_lib_causal, false AS is_lib_root, 1 AS depth
379+
WITH RECURSIVE transitive_causals(causal_id, causal_hash, causal_namespace_hash_id, is_spine, is_lib_causal, is_lib_root) AS (
380+
SELECT causal.id, causal.hash, causal.namespace_hash_id, true AS is_spine, false AS is_lib_causal, false AS is_lib_root
369381
FROM causals causal
370382
WHERE causal.id = #{cid}
371383
AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id)
372384
UNION
373385
-- This nested CTE is required because RECURSIVE CTEs can't refer
374386
-- to the recursive table more than once.
375387
( WITH rec AS (
376-
SELECT tc.causal_id, tc.causal_namespace_hash_id, tc.is_spine, tc.is_lib_causal, tc.is_lib_root, tc.depth
388+
SELECT tc.causal_id, tc.causal_namespace_hash_id, tc.is_spine, tc.is_lib_causal, tc.is_lib_root
377389
FROM transitive_causals tc
378390
)
379-
SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id, rec.is_spine, rec.is_lib_causal, rec.is_lib_root, rec.depth + 1
391+
SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id, rec.is_spine, rec.is_lib_causal, rec.is_lib_root
380392
FROM causal_ancestors ca
381393
JOIN rec ON ca.causal_id = rec.causal_id
382394
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
383395
-- Only get the history of the top level spine
384396
WHERE rec.is_spine
385397
UNION
386-
-- libs within a causal should have the same depth as the causal they're in so
387-
-- we order them locally with their causal.
388-
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id, false AS is_spine, nc.name_segment_id = #{libSegmentTextId} AS is_lib_causal, rec.is_lib_causal AS is_lib_root, rec.depth
398+
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id, false AS is_spine, nc.name_segment_id = #{libSegmentTextId} AS is_lib_causal, rec.is_lib_causal AS is_lib_root
389399
FROM rec
390400
JOIN namespace_children nc ON rec.causal_namespace_hash_id = nc.parent_namespace_hash_id
391401
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
@@ -396,8 +406,6 @@ spineAndLibDependenciesOfCausalCursor cid = do
396406
(SELECT tc.causal_hash, tc.is_spine, tc.is_lib_root
397407
FROM transitive_causals tc
398408
WHERE tc.is_spine OR tc.is_lib_causal
399-
-- Order by depth, causals first.
400-
ORDER BY tc.depth ASC, tc.is_spine DESC
401409
)
402410
|]
403411
<&> fmap (\(hash, isSpine, isLibRoot) -> (hash, if isSpine then IsCausalSpine else NotCausalSpine, if isLibRoot then IsLibRoot else NotLibRoot))

0 commit comments

Comments
 (0)