Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions analytics/dbt/analytics/models/intermediate/mission/__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,11 @@ models:
description: Timestamp de dernière mise à jour propagé depuis la mission/adresse.
tests:
- not_null
- name: int_mission_first_events
- name: int_mission_click_apply_metrics
description: >
Vue intermédiaire au grain mission + diffuseur + source, qui expose les premières
dates de click/apply et les délais depuis la création de la mission.
dates de click/apply, les métriques multi-candidatures par click et les délais
depuis la création de la mission.
columns:
- name: mission_id
description: Identifiant de la mission.
Expand All @@ -207,18 +208,33 @@ models:
arguments:
to: ref('mission')
field: id
- name: publisher_id
description: Identifiant du partenaire annonceur de la mission.
tests:
- not_null
- relationships:
arguments:
to: ref('publisher')
field: id
- name: mission_created_at
description: Timestamp de création de la mission (base des délais).
tests:
- not_null
- name: from_publisher_id
description: Identifiant du diffuseur à l'origine de l'événement (nullable).
tests:
- relationships:
arguments:
to: ref('publisher')
field: id
- name: source
description: Source normalisée de l'événement (api, widget, campaign).
- name: source_id
description: Identifiant de la source (widget/campaign/publisher selon source).
- name: first_click_at
description: Timestamp du premier click observé sur la mission.
tests:
- not_null
- name: first_apply_at
description: Timestamp de la première candidature observée sur la mission.
- name: click_count
Expand All @@ -229,8 +245,18 @@ models:
description: Nombre total de candidatures (apply) observées pour la mission.
tests:
- not_null
- name: click_with_apply_count
description: Nombre de clicks reliés à au moins une candidature.
tests:
- not_null
- name: click_with_multi_apply_count
description: Nombre de clicks reliés à au moins deux candidatures.
tests:
- not_null
- name: conversion_rate
description: Ratio candidatures/redirections (`apply_count / click_count`).
- name: multi_apply_share
description: Part des clicks avec plusieurs candidatures (`>=2`) parmi les clicks avec candidature.
- name: time_to_click_secs
description: Délai en secondes entre création de mission et premier click.
- name: time_to_apply_secs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
incremental_strategy = 'delete+insert',
on_schema_change = 'sync_all_columns',
post_hook = [
'create index if not exists "int_mission_first_events_mission_id_idx" on {{ this }} (mission_id)',
'create index if not exists "int_mission_first_events_from_publisher_id_idx" on {{ this }} (from_publisher_id)'
'create index if not exists "int_mission_click_apply_metrics_mission_id_idx" on {{ this }} (mission_id)',
'create index if not exists "int_mission_click_apply_metrics_from_publisher_id_idx" on {{ this }} (from_publisher_id)'
]
) }}

Expand All @@ -27,11 +27,11 @@ affected_missions as (

union

select distinct ge.mission_id
from {{ ref('int_stat_event_global') }} as ge
select distinct secm.mission_id
from {{ ref('int_stat_event_click_metrics') }} as secm
where
ge.mission_id is not null
and coalesce(ge.updated_at, ge.created_at)
secm.mission_id is not null
and coalesce(secm.updated_at, secm.click_created_at)
>= (select lr.last_updated_at from last_run as lr)
),

Expand Down Expand Up @@ -60,21 +60,24 @@ missions as (

events as (
select
mission_id,
from_publisher_id,
source,
source_id,
min(created_at) filter (where type = 'click') as first_click_at,
min(created_at) filter (where type = 'apply') as first_apply_at,
count(*) filter (where type = 'click') as click_count,
count(*) filter (where type = 'apply') as apply_count,
max(updated_at) as events_updated_at
from {{ ref('int_stat_event_global') }}
secm.mission_id,
secm.from_publisher_id,
secm.source,
secm.source_id,
min(secm.click_created_at) as first_click_at,
min(secm.first_apply_at) as first_apply_at,
count(*) as click_count,
sum(secm.apply_count) as apply_count,
count(*) filter (where secm.has_apply) as click_with_apply_count,
count(*) filter (
where secm.has_multi_apply
) as click_with_multi_apply_count,
max(secm.updated_at) as events_updated_at
from {{ ref('int_stat_event_click_metrics') }} as secm
where
mission_id is not null
and type in ('click', 'apply')
secm.mission_id is not null
{% if is_incremental() %}
and mission_id in (
and secm.mission_id in (
select am.mission_id from affected_missions as am
)
{% endif %}
Expand Down Expand Up @@ -102,6 +105,8 @@ base as (
e.first_apply_at,
e.click_count,
e.apply_count,
e.click_with_apply_count,
e.click_with_multi_apply_count,
greatest(
coalesce(m.mission_updated_at, '1900-01-01'::timestamp),
coalesce(e.events_updated_at, '1900-01-01'::timestamp)
Expand Down Expand Up @@ -131,6 +136,8 @@ select
first_apply_at,
click_count,
apply_count,
click_with_apply_count,
click_with_multi_apply_count,
updated_at,
case
when mission_created_at is null or first_click_at is null then null
Expand All @@ -149,5 +156,9 @@ select
case
when click_count = 0 then null
else apply_count::numeric / click_count
end as conversion_rate
end as conversion_rate,
case
when click_with_apply_count = 0 then null
else click_with_multi_apply_count::numeric / click_with_apply_count
end as multi_apply_share
from base
104 changes: 104 additions & 0 deletions analytics/dbt/analytics/models/intermediate/stat_event/__models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
version: 2

models:
- name: int_stat_event_global
description: >
Vue intermédiaire des événements statistiques hors bots, avec normalisation
des sources et des identifiants nécessaires aux agrégations downstream.
columns:
- name: stat_event_id
description: Identifiant unique de l'événement source.
tests:
- not_null
- unique
- name: created_at
description: Horodatage de création de l'événement.
tests:
- not_null
- name: updated_at
description: Horodatage de dernière mise à jour.
tests:
- not_null
- name: type
description: Type d'événement (`click`, `apply`, `print`, `account`).
tests:
- not_null
- name: source
description: Source normalisée de l'événement.
tests:
- not_null
- name: from_publisher_id
description: Identifiant du diffuseur à l'origine de l'événement.
tests:
- relationships:
arguments:
to: ref('publisher')
field: id
- name: to_publisher_id
description: Identifiant de l'annonceur cible de l'événement.
tests:
- relationships:
arguments:
to: ref('publisher')
field: id
- name: mission_id
description: Identifiant de mission associé à l'événement (nullable).
tests:
- relationships:
arguments:
to: ref('mission')
field: id

- name: int_stat_event_click_metrics
description: >
Vue intermédiaire au grain click (une ligne par `stat_event` de type click),
qui centralise les candidatures rattachées au click via `click_id`.
columns:
- name: click_stat_event_id
description: Identifiant du click (`stat_event.id`).
tests:
- not_null
- unique
- name: mission_id
description: Identifiant de la mission portée par le click.
tests:
- not_null
- relationships:
arguments:
to: ref('mission')
field: id
- name: from_publisher_id
description: Identifiant du diffuseur à l'origine du click.
tests:
- relationships:
arguments:
to: ref('publisher')
field: id
- name: source
description: Source normalisée des événements du grain.
tests:
- not_null
- name: source_id
description: Identifiant de la source (widget/campaign/publisher selon source).
- name: click_created_at
description: Horodatage du click.
tests:
- not_null
- name: first_apply_at
description: Horodatage de la première candidature liée au click.
- name: apply_count
description: Nombre total de candidatures liées au click.
tests:
- not_null
- name: has_apply
description: Indique si le click est relié à au moins une candidature.
tests:
- not_null
- name: has_multi_apply
description: Indique si le click est relié à au moins deux candidatures.
tests:
- not_null
- name: updated_at
description: Timestamp de dernière mise à jour propagé depuis click/apply.
tests:
- not_null
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{{ config(
materialized = 'incremental',
unique_key = 'click_stat_event_id',
incremental_strategy = 'delete+insert',
on_schema_change = 'sync_all_columns',
post_hook = [
'create index if not exists "int_stat_event_click_metrics_mission_id_idx" on {{ this }} (mission_id)',
'create index if not exists "int_stat_event_click_metrics_from_publisher_id_idx" on {{ this }} (from_publisher_id)',
'create index if not exists "int_stat_event_click_metrics_click_created_at_idx" on {{ this }} (click_created_at)',
'create index if not exists "int_stat_event_click_metrics_click_stat_event_id_idx" on {{ this }} (click_stat_event_id)'
]
) }}

with last_run as (
{% if is_incremental() %}
select coalesce(max(updated_at), '1900-01-01'::timestamp) as last_updated_at
from {{ this }}
{% else %}
select '1900-01-01'::timestamp as last_updated_at
{% endif %}
),

affected_clicks as (
select distinct e.stat_event_id as click_stat_event_id
from {{ ref('int_stat_event_global') }} as e
where
e.type = 'click'
and e.mission_id is not null
and coalesce(e.updated_at, e.created_at)
>= (select lr.last_updated_at from last_run as lr)

union

select distinct a.click_id as click_stat_event_id
from {{ ref('int_stat_event_global') }} as a
where
a.type = 'apply'
and a.click_id <> ''
and coalesce(a.updated_at, a.created_at)
>= (select lr.last_updated_at from last_run as lr)
),

clicks as (
select
e.stat_event_id as click_stat_event_id,
e.mission_id,
e.from_publisher_id,
e.source,
e.source_id,
e.created_at as click_created_at,
e.updated_at as click_updated_at
from {{ ref('int_stat_event_global') }} as e
where
e.type = 'click'
and e.mission_id is not null
{% if is_incremental() %}
and e.stat_event_id in (
select ac.click_stat_event_id from affected_clicks as ac
)
{% endif %}
),

applies_by_click as (
select
a.click_id as click_stat_event_id,
min(a.created_at) as first_apply_at,
count(*) as apply_count,
max(coalesce(a.updated_at, a.created_at)) as apply_updated_at
from {{ ref('int_stat_event_global') }} as a
where
a.type = 'apply'
and a.click_id <> ''
{% if is_incremental() %}
and a.click_id in (
select ac.click_stat_event_id from affected_clicks as ac
)
{% endif %}
group by 1
)

select
c.click_stat_event_id,
c.mission_id,
c.from_publisher_id,
c.source,
c.source_id,
c.click_created_at,
abc.first_apply_at,
coalesce(abc.apply_count, 0) as apply_count,
(coalesce(abc.apply_count, 0) >= 1) as has_apply,
(coalesce(abc.apply_count, 0) >= 2) as has_multi_apply,
greatest(
coalesce(c.click_updated_at, c.click_created_at),
coalesce(abc.apply_updated_at, '1900-01-01'::timestamp)
) as updated_at
from clicks as c
left join applies_by_click as abc
on c.click_stat_event_id = abc.click_stat_event_id
Loading
Loading