1. 월별과 주별 코호트 분석 및 리텐션율을 모두 구하는 MySQL 쿼리를 아래에 제시합니다.
먼저, 월별 코호트 분석과 리텐션율을 구하는 쿼리입니다:
SELECT
DATE_FORMAT(join_date, '%Y-%m') AS Cohort,
COUNT(DISTINCT user_id) AS CohortSize,
COUNT(DISTINCT CASE WHEN DATE_FORMAT(activity_date, '%Y-%m') = DATE_FORMAT(join_date, '%Y-%m') THEN user_id END) AS Month1,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 1 THEN user_id END) AS Month2,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 2 THEN user_id END) AS Month3,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 3 THEN user_id END) AS Month4,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 4 THEN user_id END) AS Month5,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 5 THEN user_id END) AS Month6,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 0 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month1_Retention,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 1 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month2_Retention,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 2 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month3_Retention,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 3 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month4_Retention,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 4 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month5_Retention,
ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 5 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month6_Retention
FROM user_activity
GROUP BY Cohort;
2. 주별 코호트분석
SELECT
DATE_FORMAT(join_date, '%Y-%u') AS Cohort,
COUNT(DISTINCT user_id) AS CohortSize,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) THEN user_id END) AS Week1,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 1 THEN user_id END) AS Week2,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 2 THEN user_id END) AS Week3,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 3 THEN user_id END) AS Week4,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 4 THEN user_id END) AS Week5,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 5 THEN user_id END) AS Week6,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week1_Retention,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 1 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week2_Retention,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 2 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week3_Retention,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 3 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week4_Retention,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 4 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week5_Retention,
ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 5 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week6_Retention
FROM user_activity
GROUP BY Cohort;
3. WITH문을 활용 가독성 높이게 MYSQL
WITH cohorts AS (
SELECT
user_id,
DATE_FORMAT(join_date, '%Y-%m') AS cohort_month,
DATE_FORMAT(join_date, '%Y-%u') AS cohort_week,
MIN(activity_date) AS first_activity_date
FROM user_activity
GROUP BY user_id, cohort_month, cohort_week
),
monthly_retention AS (
SELECT
cohort_month AS Cohort,
COUNT(DISTINCT user_id) AS CohortSize,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 0 THEN user_id END) AS Month1,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 1 THEN user_id END) AS Month2,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 2 THEN user_id END) AS Month3,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 3 THEN user_id END) AS Month4,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 4 THEN user_id END) AS Month5,
COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 5 THEN user_id END) AS Month6
FROM cohorts
GROUP BY Cohort
),
weekly_retention AS (
SELECT
cohort_week AS Cohort,
COUNT(DISTINCT user_id) AS CohortSize,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) THEN user_id END) AS Week1,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 1 THEN user_id END) AS Week2,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 2 THEN user_id END) AS Week3,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 3 THEN user_id END) AS Week4,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 4 THEN user_id END) AS Week5,
COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 5 THEN user_id END) AS Week6
FROM cohorts
GROUP BY Cohort
)
SELECT
m.Cohort AS Month_Cohort,
m.CohortSize AS Month_Cohort_Size,
m.Month1,
m.Month2,
m.Month3,
m.Month4,
m.Month5,
m.Month6,
ROUND(m.Month1 * 100.0 / m.CohortSize, 2) AS Month1_Retention,
ROUND(m.Month2 * 100.0 / m.CohortSize, 2) AS Month2_Retention,
ROUND(m.Month3 * 100.0 / m.CohortSize, 2) AS Month3_Retention,
ROUND(m.Month4 * 100.0 / m.CohortSize, 2) AS Month4_Retention,
ROUND(m.Month5 * 100.0 / m.CohortSize, 2) AS Month5_Retention,
ROUND(m.Month6 * 100.0 / m.CohortSize, 2) AS Month6_Retention,
w.Cohort AS Week_Cohort,
w.CohortSize AS Week_Cohort_Size,
w.Week1,
w.Week2,
w.Week3,
w.Week4,
w.Week5,
w.Week6,
ROUND(w.Week1 * 100.0 / w.CohortSize, 2) AS Week1_Retention,
ROUND(w.Week2 * 100.0 / w.CohortSize, 2) AS Week2_Retention,
ROUND(w.Week3 * 100.0 / w.CohortSize, 2) AS Week3_Retention,
ROUND(w.Week4 * 100.0 / w.CohortSize, 2) AS Week4_Retention,
ROUND(w.Week5 * 100.0 / w.CohortSize, 2) AS Week5_Retention,
ROUND(w.Week6 * 100.0 / w.CohortSize, 2) AS Week6_Retention
FROM monthly_retention m
JOIN weekly_retention w ON m.Cohort = w.Cohort
ORDER BY m.Cohort;
https://chat.openai.com/share/c1d4a4f1-4d39-4055-9c09-1f3f92d8f81c
'「SQL초급떼기' 카테고리의 다른 글
chat GPT 활용 MYSQL 소수점 계산 물어보기 (0) | 2023.08.13 |
---|---|
[프로그래머스sql고득점kit-level4]코딩테스트 연습JOIN5월 식품들의 총매출 조회하기 (0) | 2023.08.13 |
[JOIN-LEVEL4]코딩테스트 연습JOIN그룹별 조건에 맞는 식당 목록 출력하기 (0) | 2023.07.30 |
[JOIN-LEVEL3]코딩테스트 연습JOIN있었는데요 없었습니다 (0) | 2023.07.28 |
[JOIN-LEVEL2]코딩테스트 연습JOIN조건에 맞는 도서와 저자 리스트 출력하기 (0) | 2023.07.28 |
댓글