Skip to content

Commit af361db

Browse files
committed
Exclude dependencies of known hashes as well - This is hella slow
1 parent 2bc1d49 commit af361db

File tree

3 files changed

+158
-10
lines changed

3 files changed

+158
-10
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
-- Takes a causal_id and returns a table of ALL hashes which are dependencies of that causal.
2+
CREATE OR REPLACE FUNCTION dependencies_of_causals(the_causal_ids INTEGER[]) RETURNS TABLE (hash TEXT) AS $$
3+
WITH RECURSIVE all_causals(causal_id, causal_hash, causal_namespace_hash_id) AS (
4+
-- Base causal
5+
SELECT causal.id, causal.hash, causal.namespace_hash_id
6+
FROM UNNEST(the_causal_ids) AS causal_id
7+
JOIN causals causal ON causal.id = causal_id
8+
UNION
9+
-- This nested CTE is required because RECURSIVE CTEs can't refer
10+
-- to the recursive table more than once.
11+
-- I don't fully understand why or how this works, but it does
12+
( WITH rec AS (
13+
SELECT tc.causal_id, tc.causal_namespace_hash_id
14+
FROM all_causals tc
15+
)
16+
SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id
17+
FROM causal_ancestors ca
18+
JOIN rec tc ON ca.causal_id = tc.causal_id
19+
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
20+
UNION
21+
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id
22+
FROM rec tc
23+
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
24+
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
25+
)
26+
), all_namespaces(namespace_hash_id, namespace_hash) AS (
27+
SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash
28+
FROM all_causals tc
29+
JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id
30+
), all_patches(patch_id, patch_hash) AS (
31+
SELECT DISTINCT patch.id, patch.hash
32+
FROM all_namespaces an
33+
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
34+
JOIN patches patch ON np.patch_id = patch.id
35+
),
36+
-- term components to start transitively joining dependencies to
37+
base_term_components(component_hash_id) AS (
38+
SELECT DISTINCT term.component_hash_id
39+
FROM all_namespaces an
40+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
41+
JOIN terms term ON nt.term_id = term.id
42+
UNION
43+
SELECT DISTINCT term.component_hash_id
44+
FROM all_patches ap
45+
JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id
46+
JOIN terms term ON ptm.to_term_id = term.id
47+
UNION
48+
-- term metadata
49+
SELECT DISTINCT term.component_hash_id
50+
FROM all_namespaces an
51+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
52+
JOIN namespace_term_metadata meta ON nt.id = meta.named_term
53+
JOIN terms term ON meta.metadata_term_id = term.id
54+
UNION
55+
-- type metadata
56+
SELECT DISTINCT term.component_hash_id
57+
FROM all_namespaces an
58+
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
59+
JOIN namespace_type_metadata meta ON nt.id = meta.named_type
60+
JOIN terms term ON meta.metadata_term_id = term.id
61+
),
62+
-- type components to start transitively joining dependencies to
63+
base_type_components(component_hash_id) AS (
64+
SELECT DISTINCT typ.component_hash_id
65+
FROM all_namespaces an
66+
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
67+
JOIN types typ ON nt.type_id = typ.id
68+
UNION
69+
SELECT DISTINCT typ.component_hash_id
70+
FROM all_namespaces an
71+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
72+
JOIN constructors con ON nt.constructor_id = con.id
73+
JOIN types typ ON con.type_id = typ.id
74+
UNION
75+
SELECT DISTINCT typ.component_hash_id
76+
FROM all_patches ap
77+
JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id
78+
JOIN types typ ON ptm.to_type_id = typ.id
79+
UNION
80+
SELECT DISTINCT typ.component_hash_id
81+
FROM all_patches ap
82+
JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id
83+
JOIN constructors con ON pcm.to_constructor_id = con.id
84+
JOIN types typ ON con.type_id = typ.id
85+
),
86+
-- All the dependencies we join in transitively from the known term & type components we depend on.
87+
all_components(component_hash_id) AS (
88+
SELECT DISTINCT btc.component_hash_id
89+
FROM base_term_components btc
90+
UNION
91+
SELECT DISTINCT btc.component_hash_id
92+
FROM base_type_components btc
93+
UNION
94+
( WITH rec AS (
95+
SELECT DISTINCT ac.component_hash_id
96+
FROM all_components ac
97+
)
98+
-- recursively union in term dependencies
99+
SELECT DISTINCT ref.component_hash_id
100+
FROM rec atc
101+
-- This joins in ALL the terms from the component, not just the one that caused the dependency on the
102+
-- component
103+
JOIN terms term ON atc.component_hash_id = term.component_hash_id
104+
JOIN term_local_component_references ref ON term.id = ref.term_id
105+
UNION
106+
-- recursively union in type dependencies
107+
SELECT DISTINCT ref.component_hash_id
108+
FROM rec atc
109+
-- This joins in ALL the types from the component, not just the one that caused the dependency on the
110+
-- component
111+
JOIN types typ ON atc.component_hash_id = typ.component_hash_id
112+
JOIN type_local_component_references ref ON typ.id = ref.type_id
113+
)
114+
)
115+
(SELECT ch.base32 AS hash
116+
FROM all_components ac
117+
JOIN component_hashes ch ON ac.component_hash_id = ch.id
118+
)
119+
UNION ALL
120+
(SELECT ap.patch_hash AS hash
121+
FROM all_patches ap
122+
)
123+
UNION ALL
124+
(SELECT an.namespace_hash AS hash
125+
FROM all_namespaces an
126+
)
127+
UNION ALL
128+
(SELECT ac.causal_hash AS hash
129+
FROM all_causals ac
130+
)
131+
$$ LANGUAGE SQL;

src/Share/Web/UCM/SyncV2/Impl.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import Share.Web.UCM.Sync.HashJWT qualified as HashJWT
3737
import Share.Web.UCM.SyncV2.Queries qualified as SSQ
3838
import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..))
3939
import U.Codebase.Sqlite.Orphans ()
40+
import Unison.Debug qualified as Debug
4041
import Unison.Hash32 (Hash32)
4142
import Unison.Share.API.Hash (HashJWTClaims (..))
4243
import Unison.SyncV2.API qualified as SyncV2
@@ -108,10 +109,14 @@ causalDependenciesStreamImpl mayCallerUserId (SyncV2.CausalDependenciesRequest {
108109
codebase <- codebaseForBranchRef branchRef
109110
q <- UnliftIO.atomically $ STM.newTBMQueue 10
110111
streamResults <- lift $ UnliftIO.toIO do
112+
Logging.logInfoText "Starting causal dependencies stream"
111113
Codebase.runCodebaseTransaction codebase $ do
112114
(_bhId, causalId) <- CausalQ.expectCausalIdsOf id (hash32ToCausalHash causalHash)
115+
Debug.debugLogM Debug.Temp "Getting cursor"
113116
cursor <- SSQ.spineAndLibDependenciesOfCausalCursor causalId
117+
Debug.debugLogM Debug.Temp "Folding cursor"
114118
Cursor.foldBatched cursor batchSize \batch -> do
119+
Debug.debugLogM Debug.Temp "Got batch"
115120
let depBatch =
116121
batch <&> \(causalHash, isCausalSpine, isLibRoot) ->
117122
let dependencyType = case (isCausalSpine, isLibRoot) of

src/Share/Web/UCM/SyncV2/Queries.hs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import Share.Postgres.IDs
1313
import Share.Prelude
1414
import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..))
1515
import U.Codebase.Sqlite.TempEntity (TempEntity)
16+
import Unison.Debug qualified as Debug
1617
import Unison.Hash32 (Hash32)
1718
import Unison.SyncV2.Types (CBORBytes)
1819

@@ -186,17 +187,23 @@ import Unison.SyncV2.Types (CBORBytes)
186187
allSerializedDependenciesOfCausalCursor :: CausalId -> Set CausalHash -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32))
187188
allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
188189
ownerUserId <- asks codebaseOwner
189-
execute_ [sql| CREATE TEMP TABLE except_causals ( causal_id INTEGER NOT NULL ) ON COMMIT DROP |]
190+
Debug.debugLogM Debug.Temp "created except_hashes temp table."
191+
-- Create a temp table for storing the dependencies we know the calling client already has.
192+
execute_ [sql| CREATE TEMP TABLE except_hashes ( hash TEXT NOT NULL PRIMARY KEY ) ON COMMIT DROP |]
193+
Debug.debugLogM Debug.Temp "filling in except_hashes temp table."
190194
execute_
191-
[sql| INSERT INTO except_causals (causal_id)
192-
WITH the_causal_hashes(hash) AS (SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)})
193-
SELECT c.id
195+
[sql|
196+
WITH the_causal_hashes(hash) AS (
197+
SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)}
198+
), known_causal_ids(causal_id) AS (
199+
SELECT c.id
194200
FROM the_causal_hashes tch
195201
JOIN causals c ON tch.hash = c.hash
202+
) INSERT INTO except_hashes(hash)
203+
SELECT DISTINCT deps.hash FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps
204+
ON CONFLICT DO NOTHING
196205
|]
197-
execute_
198-
[sql|
199-
|]
206+
Debug.debugLogM Debug.Temp "Running cursor query"
200207
cursor <-
201208
PGCursor.newRowCursor
202209
"serialized_entities"
@@ -206,7 +213,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
206213
FROM causals causal
207214
WHERE causal.id = #{cid}
208215
AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id)
209-
AND NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = causal.id)
216+
AND NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = causal.id)
210217
UNION
211218
-- This nested CTE is required because RECURSIVE CTEs can't refer
212219
-- to the recursive table more than once.
@@ -218,23 +225,25 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
218225
FROM causal_ancestors ca
219226
JOIN rec tc ON ca.causal_id = tc.causal_id
220227
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
221-
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = ancestor_causal.id)
228+
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = ancestor_causal.id)
222229
UNION
223230
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id
224231
FROM rec tc
225232
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
226233
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
227-
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = child_causal.id)
234+
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = child_causal.id)
228235
)
229236
), all_namespaces(namespace_hash_id, namespace_hash) AS (
230237
SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash
231238
FROM transitive_causals tc
232239
JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id
240+
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = bh.base32)
233241
), all_patches(patch_id, patch_hash) AS (
234242
SELECT DISTINCT patch.id, patch.hash
235243
FROM all_namespaces an
236244
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
237245
JOIN patches patch ON np.patch_id = patch.id
246+
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = patch.hash)
238247
),
239248
-- term components to start transitively joining dependencies to
240249
base_term_components(component_hash_id) AS (
@@ -321,6 +330,9 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
321330
JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id
322331
JOIN bytes ON sc.bytes_id = bytes.id
323332
JOIN component_hashes ch ON tc.component_hash_id = ch.id
333+
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = ch.base32)
334+
-- TODO: Filter out components we know we already have,
335+
-- We should do this earlier in the process if possible.
324336
)
325337
UNION ALL
326338
(SELECT bytes.bytes, ap.patch_hash

0 commit comments

Comments
 (0)