-------------------------- I —----------------------------------------
-- вытаскиваем все касания пользователя, маркируем касания которые закончились покупкой, находим минимальный ивент и берем из него источник
WITH ecom_tab_raw AS (
SELECT
DISTINCT PARSE_DATE('%Y%m%d', event_date) AS date,
user_pseudo_id AS clientId,
MIN(event_timestamp) AS min_timestamp_hit,
-- получаем id сессии
param.value.int_value as session_id,
-- получаем самый ранний timestamp внутри каждой сессии
FIRST_VALUE(event_timestamp) OVER(partition by param.value.int_value ORDER BY event_timestamp ASC) first_ts,
CASE
WHEN event_name = 'purchase' THEN 1
ELSE 0 END
purchase,
CONCAT( traffic_source.name, ' / ', traffic_source.medium ) AS source_medium,
geo.country
FROM
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`,
UNNEST(event_params) as param
WHERE _TABLE_SUFFIX BETWEEN
'20210115' AND '20210131' AND geo.country = 'United States' AND param.key = 'ga_session_id'
GROUP BY
event_date, user_pseudo_id, event_name,traffic_source.name,traffic_source.medium,geo.country,param.value.int_value,event_timestamp
ORDER BY
date,clientId,session_id,min_timestamp_hit ASC
),
-------------------------- I end —-------------------------------------
--------------------------- II —----------------------------------------
ecom_tab AS (
SELECT
date,country,clientId,session_id,
CASE
WHEN source_medium = '(referral) / referral' THEN 'popular-blog.com / referral'
WHEN source_medium = '<Other> / <Other>' THEN 'facebook / cpm'
WHEN source_medium = '(organic) / organic' THEN 'google / organic'
WHEN source_medium = '<Other> / cpc' THEN 'bing / cpc'
WHEN source_medium = '(data deleted) / (data deleted)' THEN 'twitter / cpa'
ELSE source_medium END source_medium,
SUM(purchase) as purchase,first_ts
FROM ecom_tab_raw
GROUP BY
date,country,clientId,session_id,source_medium,first_ts
),
--------------------------- II end —-------------------------------------
----------------------------- III —----------------------------------------
-- вытаскиваем timestamp из unix
-- маркируем источники где была покупка дополнительным ярлыком
goal_markup_tab AS (
SELECT
date,clientId,
TIMESTAMP_MICROS(first_ts) AS timestamp_min,
CASE
WHEN purchase = 1 THEN CONCAT('GOAL_source = ', source_medium)
ELSE source_medium END
event_source_medium,purchase,source_medium,country
FROM ecom_tab
ORDER BY
-- сортируем все касания от раннего к позднему
timestamp_min asc ),
---------------------------- III end —------------------------------------
----------------------------- IV —-----------------------------------------
-- собираем массив из последовательных касаний для каждого clientId
ARRAY_tab AS (
SELECT
country,clientId,
FORMAT( '%T', ARRAY_AGG(event_source_medium) OVER (PARTITION BY clientId) ) AS array_agg
FROM
goal_markup_tab ),
-------------------------- IV end —-------------------------------------
----------------------------- V —------------------------------------------
-- агрегируем результат в пути к конверсии и сумму таких путей
GOAL_path AS (
SELECT
country, array_agg,
SUM(conversions) AS conversions,
FROM (
SELECT
country, array_agg,
CASE
WHEN array_agg LIKE '%GOAL%' THEN 1
ELSE 0 END
conversions
FROM ARRAY_tab )
GROUP BY country,array_agg )
---------------------------- V end —--------------------------------------
---------------------------- VI —-----------------------------------------
SELECT
*, ROUND(conversions / SUM(conversions) OVER() * 100, 1) as from_total
FROM
GOAL_path
ORDER BY
conversions DESC