前回では,
コンバージョンパスに着目した応用KPI分析
まず,
SELECT
COUNT(1) AS cnt,
AVG(path_length) AS avg_path_length,
AVG(last_time-landing_time) AS avg_conversion_time
FROM (
SELECT
cookie, cv_id,
MAX(node_id) AS path_length,
MIN(time) AS landing_time,
MAX(time) AS cv_time
FROM conversion_path
GROUP BY cookie, cv_id
) t1
JOIN (
SELECT
cookie,
cv_id,
MAX(path) as last_node,
MAX(time) as last_time
FROM conversion_path
WHERE 1 < node_id AND cv_flag = 0
GROUP BY 1,2
) t2 ON (t1.cookie=t2.cookie AND t1.cv_id=t2.cv_id)
結果例
2015年1月には5053回のコンバージョン
※ これらの値はサンプルデータに基づくもので,
cnt | avg_ | avg_ |
---|---|---|
5053 | 1047. | 66692. |
コンバージョンパス長の分布
先ほどは
SELECT
path_length,
COUNT(1) AS cnt
FROM (
SELECT
cookie, cv_id,
MAX(node_id) AS path_length
FROM conversion_path
GROUP BY cookie, cv_id
) t1
GROUP BY path_length
result = _
result.DataFrame().plot(x='path_length', xlim=(0,200), ylim=(0,200), figsize=(12, 10))
可視化してみると,
コンバージョン時間の分布
同様にコンバージョン時間の分布も見てみましょう。
SELECT CEIL(cv_time/width)*width AS x, COUNT(1) AS cnt
FROM
(
SELECT (last_time-landing_time)/3600 AS cv_time
FROM (
SELECT
cookie, cv_id,
MAX(node_id) AS path_length,
MIN(time) AS landing_time,
MAX(time) AS cv_time
FROM conversion_path
GROUP BY cookie, cv_id
) temp1
JOIN (
SELECT
cookie,
cv_id,
MAX(path) as last_node,
MAX(time) as last_time
FROM conversion_path
WHERE 1 < node_id AND cv_flag = 0
GROUP BY 1,2
) temp2 ON (temp1.cookie=temp2.cookie AND temp1.cv_id=temp2.cv_id)
) t1,
(
SELECT POW(10,floor(LOG10(MAX( (last_time-landing_time)/3600 )))-1) AS width
FROM (
SELECT
cookie, cv_id,
MAX(node_id) AS path_length,
MIN(time) AS landing_time,
MAX(time) AS cv_time
FROM conversion_path
GROUP BY cookie, cv_id
) temp3
JOIN (
SELECT
cookie,
cv_id,
MAX(path) as last_node,
MAX(time) as last_time
FROM conversion_path
WHERE 1 < node_id AND cv_flag = 0
GROUP BY 1,2
) temp4 ON (temp3.cookie=temp4.cookie AND temp3.cv_id=temp4.cv_id)
) t2
GROUP BY ceil(cv_time/width)*width
ORDER BY x
result = _
result.DataFrame().plot(x='x', figsize=(12, 10))
コンバージョン時間もパス長と同じように,