본문 바로가기
「SQL초급떼기

[chat GPT]코호트분석 mysql(월별/주별, 6개월기간)

by DayGo 2023. 7. 30.

1. 월별과 주별 코호트 분석 및 리텐션율을 모두 구하는 MySQL 쿼리를 아래에 제시합니다.

먼저, 월별 코호트 분석과 리텐션율을 구하는 쿼리입니다:

 

SELECT
  DATE_FORMAT(join_date, '%Y-%m') AS Cohort,
  COUNT(DISTINCT user_id) AS CohortSize,
  COUNT(DISTINCT CASE WHEN DATE_FORMAT(activity_date, '%Y-%m') = DATE_FORMAT(join_date, '%Y-%m') THEN user_id END) AS Month1,
  COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 1 THEN user_id END) AS Month2,
  COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 2 THEN user_id END) AS Month3,
  COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 3 THEN user_id END) AS Month4,
  COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 4 THEN user_id END) AS Month5,
  COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 5 THEN user_id END) AS Month6,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 0 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month1_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 1 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month2_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 2 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month3_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 3 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month4_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 4 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month5_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(join_date, '%Y%m')) = 5 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Month6_Retention
FROM user_activity
GROUP BY Cohort;

 

2. 주별 코호트분석

SELECT
  DATE_FORMAT(join_date, '%Y-%u') AS Cohort,
  COUNT(DISTINCT user_id) AS CohortSize,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) THEN user_id END) AS Week1,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 1 THEN user_id END) AS Week2,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 2 THEN user_id END) AS Week3,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 3 THEN user_id END) AS Week4,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 4 THEN user_id END) AS Week5,
  COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 5 THEN user_id END) AS Week6,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week1_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 1 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week2_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 2 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week3_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 3 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week4_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 4 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week5_Retention,
  ROUND(COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(join_date) + 5 THEN user_id END) * 100.0 / COUNT(DISTINCT user_id), 2) AS Week6_Retention
FROM user_activity
GROUP BY Cohort;

 

3. WITH문을 활용 가독성 높이게 MYSQL 

 

WITH cohorts AS (
  SELECT
    user_id,
    DATE_FORMAT(join_date, '%Y-%m') AS cohort_month,
    DATE_FORMAT(join_date, '%Y-%u') AS cohort_week,
    MIN(activity_date) AS first_activity_date
  FROM user_activity
  GROUP BY user_id, cohort_month, cohort_week
),
monthly_retention AS (
  SELECT
    cohort_month AS Cohort,
    COUNT(DISTINCT user_id) AS CohortSize,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 0 THEN user_id END) AS Month1,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 1 THEN user_id END) AS Month2,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 2 THEN user_id END) AS Month3,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 3 THEN user_id END) AS Month4,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 4 THEN user_id END) AS Month5,
    COUNT(DISTINCT CASE WHEN PERIOD_DIFF(DATE_FORMAT(activity_date, '%Y%m'), DATE_FORMAT(first_activity_date, '%Y%m')) = 5 THEN user_id END) AS Month6
  FROM cohorts
  GROUP BY Cohort
),
weekly_retention AS (
  SELECT
    cohort_week AS Cohort,
    COUNT(DISTINCT user_id) AS CohortSize,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) THEN user_id END) AS Week1,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 1 THEN user_id END) AS Week2,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 2 THEN user_id END) AS Week3,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 3 THEN user_id END) AS Week4,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 4 THEN user_id END) AS Week5,
    COUNT(DISTINCT CASE WHEN WEEK(activity_date) = WEEK(first_activity_date) + 5 THEN user_id END) AS Week6
  FROM cohorts
  GROUP BY Cohort
)
SELECT
  m.Cohort AS Month_Cohort,
  m.CohortSize AS Month_Cohort_Size,
  m.Month1,
  m.Month2,
  m.Month3,
  m.Month4,
  m.Month5,
  m.Month6,
  ROUND(m.Month1 * 100.0 / m.CohortSize, 2) AS Month1_Retention,
  ROUND(m.Month2 * 100.0 / m.CohortSize, 2) AS Month2_Retention,
  ROUND(m.Month3 * 100.0 / m.CohortSize, 2) AS Month3_Retention,
  ROUND(m.Month4 * 100.0 / m.CohortSize, 2) AS Month4_Retention,
  ROUND(m.Month5 * 100.0 / m.CohortSize, 2) AS Month5_Retention,
  ROUND(m.Month6 * 100.0 / m.CohortSize, 2) AS Month6_Retention,
  w.Cohort AS Week_Cohort,
  w.CohortSize AS Week_Cohort_Size,
  w.Week1,
  w.Week2,
  w.Week3,
  w.Week4,
  w.Week5,
  w.Week6,
  ROUND(w.Week1 * 100.0 / w.CohortSize, 2) AS Week1_Retention,
  ROUND(w.Week2 * 100.0 / w.CohortSize, 2) AS Week2_Retention,
  ROUND(w.Week3 * 100.0 / w.CohortSize, 2) AS Week3_Retention,
  ROUND(w.Week4 * 100.0 / w.CohortSize, 2) AS Week4_Retention,
  ROUND(w.Week5 * 100.0 / w.CohortSize, 2) AS Week5_Retention,
  ROUND(w.Week6 * 100.0 / w.CohortSize, 2) AS Week6_Retention
FROM monthly_retention m
JOIN weekly_retention w ON m.Cohort = w.Cohort
ORDER BY m.Cohort;

 

 

https://chat.openai.com/share/c1d4a4f1-4d39-4055-9c09-1f3f92d8f81c

댓글