Skip to content

Commit 4459864

Browse files
Merge pull request #203 from answerdigital/feature/fast-era
Alter era calculating stored procedure to use temp tables to improve performance.
2 parents e6062da + 5aae878 commit 4459864

File tree

1 file changed

+42
-19
lines changed

1 file changed

+42
-19
lines changed

Database/Migrations/sql/RepeatableMigrations/R__build_era.sql

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,26 @@ GROUP BY c.PERSON_ID
153153

154154
/* / */
155155

156-
INSERT INTO cdm.condition_era (
156+
157+
create index idx_cteConditionEnds on #cteConditionEnds
158+
(
159+
person_id,
160+
CONDITION_CONCEPT_ID,
161+
ERA_END_DATE
162+
)
163+
include
164+
(
165+
CONDITION_START_DATE
166+
)
167+
168+
INSERT INTO cdm.condition_era
169+
(
157170
person_id
158171
,condition_concept_id
159172
,condition_era_start_date
160173
,condition_era_end_date
161174
,condition_occurrence_count
162-
)
175+
)
163176
SELECT
164177
person_id
165178
,CONDITION_CONCEPT_ID
@@ -173,15 +186,16 @@ GROUP BY person_id
173186

174187

175188

176-
177189
-- Code taken from:
178190
-- https://github.com/OHDSI/ETL-CMS/blob/master/SQL/create_CDMv5_drug_era_non_stockpile.sql
179-
191+
-- JC: 2025-06-24 Use temp tables to avoid performance problems changes (4h+ to 10m).
180192

181193
if object_id('tempdb..#tmp_de', 'U') is not null drop table #tmp_de;
182194

183-
WITH
184-
ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
195+
drop table if exists #ctePreDrugTarget;
196+
drop table if exists #cteDrugExposureEnds;
197+
198+
with ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
185199
(-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
186200
SELECT
187201
d.drug_exposure_id
@@ -206,8 +220,16 @@ ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposu
206220
AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set to 0, so we don't want different drugs wrapped up in the same era
207221
AND coalesce(d.days_supply,0) >= 0 ---We have cases where days_supply is negative, and this can set the end_date before the start_date, which we don't want. So we're just looking over those rows. This is a data-quality issue.
208222
)
223+
select
224+
*
225+
into #ctePreDrugTarget
226+
from ctePreDrugTarget
209227

210-
, cteSubExposureEndDates (person_id, ingredient_concept_id, end_date) AS --- A preliminary sorting that groups all of the overlapping exposures into one exposure so that we don't double-count non-gap-days
228+
CREATE NONCLUSTERED INDEX IDX_ctePreDrugTarget
229+
ON [#ctePreDrugTarget] ([person_id],[ingredient_concept_id],[drug_exposure_start_date])
230+
INCLUDE ([drug_exposure_id])
231+
232+
;with cteSubExposureEndDates (person_id, ingredient_concept_id, end_date) AS --- A preliminary sorting that groups all of the overlapping exposures into one exposure so that we don't double-count non-gap-days
211233
(
212234
SELECT person_id, ingredient_concept_id, event_date AS end_date
213235
FROM
@@ -226,38 +248,39 @@ ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposu
226248
-1 AS event_type,
227249
ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id
228250
ORDER BY drug_exposure_start_date) AS start_ordinal
229-
FROM ctePreDrugTarget
251+
FROM #ctePreDrugTarget
230252

231253
UNION ALL
232254

233255
SELECT person_id, ingredient_concept_id, drug_exposure_end_date, 1 AS event_type, NULL
234-
FROM ctePreDrugTarget
256+
FROM #ctePreDrugTarget
235257
) RAWDATA
236258
) e
237259
WHERE (2 * e.start_ordinal) - e.overall_ord = 0
238260
)
239-
240-
, cteDrugExposureEnds (person_id, drug_concept_id, drug_exposure_start_date, drug_sub_exposure_end_date) AS
241-
(
242261
SELECT
243262
dt.person_id
244-
, dt.ingredient_concept_id
245-
, dt.drug_exposure_start_date
263+
, dt.ingredient_concept_id as drug_concept_id
264+
, dt.drug_exposure_start_date as drug_exposure_start_date
246265
, MIN(e.end_date) AS drug_sub_exposure_end_date
247-
FROM ctePreDrugTarget dt
248-
JOIN cteSubExposureEndDates e ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND e.end_date >= dt.drug_exposure_start_date
266+
into #cteDrugExposureEnds
267+
FROM #ctePreDrugTarget dt
268+
JOIN cteSubExposureEndDates e
269+
ON dt.person_id = e.person_id
270+
AND dt.ingredient_concept_id = e.ingredient_concept_id
271+
AND e.end_date >= dt.drug_exposure_start_date
249272
GROUP BY
250273
dt.drug_exposure_id
251274
, dt.person_id
252275
, dt.ingredient_concept_id
253276
, dt.drug_exposure_start_date
254-
)
277+
255278
--------------------------------------------------------------------------------------------------------------
256-
, cteSubExposures(row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count) AS
279+
;with cteSubExposures(row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count) AS
257280
(
258281
SELECT ROW_NUMBER() OVER (PARTITION BY person_id, drug_concept_id, drug_sub_exposure_end_date ORDER BY person_id)
259282
, person_id, drug_concept_id, MIN(drug_exposure_start_date) AS drug_sub_exposure_start_date, drug_sub_exposure_end_date, COUNT(*) AS drug_exposure_count
260-
FROM cteDrugExposureEnds
283+
FROM #cteDrugExposureEnds
261284
GROUP BY person_id, drug_concept_id, drug_sub_exposure_end_date
262285
--ORDER BY person_id, drug_concept_id
263286
)

0 commit comments

Comments
 (0)