Hamutaro - Hamtaro 4

Database

[DB] μœˆλ„μš° ν•¨μˆ˜

carsumin 2026. 5. 27. 14:17
일반 집계 vs μœˆλ„μš° ν•¨μˆ˜
  • κ°€μž₯ 큰 차이 : 행이 μœ μ§€λ˜λŠ”κ°€
  • 원본 데이터λ₯Ό μœ μ§€ν•˜λ©΄μ„œ 집계값을 ν•¨κ»˜ λ³Ό 수 μžˆλ‹€.
-- 일반 집계 : 행이 쀄어듦
SELECT region, SUM(amount)
FROM orders
GROUP BY region;
κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ region β”‚  sum   β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ μ„œμšΈ   β”‚ 500000 β”‚
β”‚ λΆ€μ‚°   β”‚ 300000 β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”˜
-- μœˆλ„μš° ν•¨μˆ˜ : 행이 μœ μ§€λ¨
SELECT
    order_id,
    region,
    amount,
    SUM(amount) OVER (PARTITION BY region) AS region_total
FROM orders;
κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ order_id β”‚ region β”‚ amount β”‚ region_total β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ 1        β”‚ μ„œμšΈ   β”‚ 200000 β”‚ 500000       β”‚
β”‚ 2        β”‚ μ„œμšΈ   β”‚ 300000 β”‚ 500000       β”‚
β”‚ 3        β”‚ λΆ€μ‚°   β”‚ 300000 β”‚ 300000       β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

 

 

κΈ°λ³Έ 문법
  • PARTITION BYλŠ” 그룹을 λ‚˜λˆ„κ³ , ORDER BYλŠ” κ·Έλ£Ή μ•ˆμ—μ„œ μˆœμ„œλ₯Ό μ •ν•œλ‹€.
ν•¨μˆ˜() OVER (
    PARTITION BY κ·Έλ£Ή κΈ°μ€€ 컬럼   -- GROUP BY 같은 μ—­ν• 
    ORDER BY μ •λ ¬ κΈ°μ€€ 컬럼       -- μˆœμ„œκ°€ ν•„μš”ν•œ ν•¨μˆ˜μ— μ‚¬μš©
)

 

 

ROW_NUMBER - 순번 맀기기
  • κ·Έλ£Ή μ•ˆμ—μ„œ μˆœλ²ˆμ„ λ§€κΈ΄λ‹€. 쀑볡 없이 1λΆ€ν„° μˆœμ„œλŒ€λ‘œ λΆ€μ—¬λœλ‹€.
SELECT
    customer_id,
    order_id,
    ordered_at,
    ROW_NUMBER() OVER (
    	PARTITION BY customer_id
        ORDER BY ordered_at
    ) AS order_seq
FROM orders;

 

κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ customer_id β”‚ order_id β”‚ ordered_at β”‚ order_seq β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ C1          β”‚ 1        β”‚ 2024-01-01 β”‚ 1         β”‚
β”‚ C1          β”‚ 5        β”‚ 2024-01-15 β”‚ 2         β”‚
β”‚ C2          β”‚ 2        β”‚ 2024-01-03 β”‚ 1         β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

 

  • 고객의 첫 번째 주문만 μΆ”μΆœ
WITH ranked AS (
    SELECT *,
    	ROW_NUMBER() OVER (
        	PARTITION BY customer_id
            ORDER BY ordered_at
        ) AS rn
    FROM orders
)
SELECT * FROM ranked WHERE rn = 1;

 

 

RANK / DENSE_RANK - μˆœμœ„ λ§€κΈ°κΈ°
  • RANKλŠ” 동점 μ‹œ λ‹€μŒ μˆœμœ„λ₯Ό κ±΄λ„ˆλ›°κ³ , DENSE_RANKλŠ” κ±΄λ„ˆλ›°μ§€ μ•ŠλŠ”λ‹€.
SELECT
    customer_id,
    SUM(amount) AS total,
    RANK()          OVER (ORDER BY SUM(amount) DESC)
    DENSE_RANK()    OVER (ORDER BY SUM(amount) DESC)
FROM orders
GROUP BY customer_id;

 

κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ customer_id β”‚ total  β”‚ rnk β”‚ dense_rnk β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ C1          β”‚ 500000 β”‚ 1   β”‚ 1         β”‚
β”‚ C2          β”‚ 300000 β”‚ 2   β”‚ 2         β”‚
β”‚ C3          β”‚ 300000 β”‚ 2   β”‚ 2         β”‚  ← 동점
β”‚ C4          β”‚ 100000 β”‚ 4   β”‚ 3         β”‚  ← RANKλŠ” 4, DENSE_RANKλŠ” 3
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

 

 

LAG / LEAD - 이전 / λ‹€μŒ ν–‰ μ°Έμ‘°
  • μ‹œκ³„μ—΄ λ°μ΄ν„°μ—μ„œ 이전 κ°’μ΄λ‚˜ λ‹€μŒ κ°’ μ°Έμ‘°ν•  λ•Œ μ“΄λ‹€.
  • LEADλŠ” λ°˜λŒ€λ‘œ λ‹€μŒ 행을 μ°Έμ‘°ν•œλ‹€. 예λ₯Ό λ“€μ–΄ "λ‹€μŒ μ£Όλ¬ΈκΉŒμ§€ 며칠이 κ±Έλ ΈλŠ”κ°€"λ₯Ό 계산할 λ•Œ μ“΄λ‹€.
-- μ „μ›” λŒ€λΉ„ 맀좜 증감
SELECT
    month,
    revenue,
    LAG(revenue) OVER (ORDER BY month)              AS prev_revenue,
    revenue - LAG(revenue) OVER (ORDER BY month)    AS diff
 FROM monthly_revenue;

 

κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ month β”‚ revenue β”‚ prev_revenue β”‚  diff  β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ 01    β”‚ 100000  β”‚ NULL         β”‚ NULL   β”‚
β”‚ 02    β”‚ 120000  β”‚ 100000       β”‚  20000 β”‚
β”‚ 03    β”‚ 110000  β”‚ 120000       β”‚ -10000 β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”˜

 

 

SUM / AVG OVER - λˆ„μ  집계
  • ORDER BYλ₯Ό ν•¨κ»˜ μ“°λ©΄ λˆ„μ  ν•©κ³„λ‚˜ 이동 평균을 ꡬ할 수 μžˆλ‹€.
-- λˆ„μ  맀좜
SELECT
    ordered_at,
    amount,
    SUM(amount) OVER (ORDER BY ordered_at) AS cumulative_sum
FROM orders;

 

κ²°κ³Ό:
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ ordered_at β”‚ amount β”‚ cumulative_sum β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ 2024-01-01 β”‚ 100000 β”‚ 100000         β”‚
β”‚ 2024-01-02 β”‚ 200000 β”‚ 300000         β”‚
β”‚ 2024-01-03 β”‚ 150000 β”‚ 450000         β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

 

  • 3일 이동 평균
SELECT
    ordered_at,
    amount,
    AVG(amount) OVER (
    	ORDER BY ordered_at
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS moving_avg_3d
FROM orders;

 

 

NTILE - λΆ„μœ„μˆ˜ λ‚˜λˆ„κΈ°
  • 데이터λ₯Ό N개의 그룹으둜 κ· λ“±ν•˜κ²Œ λ‚˜λˆˆλ‹€.
-- 고객을 맀좜 κΈ°μ€€μœΌλ‘œ 4λΆ„μœ„λ‘œ λ‚˜λˆ„κΈ°
SELECT
    customer_id,
    total_amount,
    NTILE(4) OVER (ORDER BY total_amount DESC) AS quartile
FROM customer_stats;
-- quartile 1 = μƒμœ„ 25% (VIP)
-- quartile 4 = ν•˜μœ„ 25%

 

 

예제
  • 고객별 첫 번째 ꡬ맀 λ‚ μ§œλ₯Ό κ΅¬ν•˜μ‹œμ˜€.
WITH ranked AS (
    SELECT *,
    	ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY ordered_at) AS rn
    FROM orders
)
SELECT customer_id, ordered_at AS first_order_date
FROM ranked WHERE rn = 1;

 

  • μ „μ›” λŒ€λΉ„ 맀좜 증감λ₯ μ„ κ΅¬ν•˜μ‹œμ˜€.
SELECT
    month,
    revenue,
    ROUND(
    	(revenue - LAG(revenue) OVER (ORDER BY month))
        / LAG(revenue) OVER (ORDER BY month) * 100, 2
    ) AS growth_rate
FROM monthly_revenue;

 

  • μƒμœ„ 10% 고객을 μΆ”μΆœν•˜μ‹œμ˜€.
WITH ranked AS (
    SELECT customer_id, total_amount,
    	NTILE(10) OVER (ORDER BY total_amount DESC) AS decile
    FROM customer_stats
)
SELECT * FROM ranked WHERE decile = 1;

 

  • 3일 이동 평균을 κ΅¬ν•˜μ‹œμ˜€.
SELECT
    date,
    revenue,
    AVG(revenue) OVER (
    	ORDER BY date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS moving_avg
FROM daily_revenue;

 

 

정리
  • μœˆλ„μš° ν•¨μˆ˜μ˜ 핡심은 행을 μœ μ§€ν•˜λ©΄μ„œ μ§‘κ³„ν•œλ‹€λŠ” 것이닀.
ν•¨μˆ˜ μš©λ„
ROW_NUMBER κ·Έλ£Ή λ‚΄ 순번 (쀑볡 μ—†μŒ)
RANK μˆœμœ„ (동점 μ‹œ λ‹€μŒ μˆœμœ„ κ±΄λ„ˆλœ€)
DENSE_RANK μˆœμœ„ (동점 μ‹œ λ‹€μŒ μˆœμœ„ μ•ˆ κ±΄λ„ˆλœ€)
LAG 이전 ν–‰ κ°’ μ°Έμ‘°
LEAD λ‹€μŒ ν–‰ κ°’ μ°Έμ‘°
SUM/AVG OVER λˆ„μ  합계/이동 평균
NTILE NλΆ„μœ„λ‘œ κ· λ“± λΆ„ν•