
오늘의 배움
고객 분석
1⃣ 국가별 고객 수, 누적 합계
2⃣ 국가별 고객수, 구성비, 누적비
3⃣ 구매 이력이 없는 고객
📌 1⃣ 국가 별 고객 수, 누적 합계 📌
select *, sum("고객 수") over (order by "고객 수" desc, country)
from (
select c.country , count(c.customer_id) as "고객 수"
from customers c
group by c.country
order by "고객 수" desc
) a;
📌 2⃣ 국가 별 고객 수, 구성비, 누적비 📌
-- 누적 구성비
select *
, sum("구성비") over (order by "고객 수" desc, country) as "누적 구성비"
from (
-- 구성비
select *
, "고객 수" / sum("고객 수") over() * 100 as "구성비"
from (
-- 고객 수
select c.country , count(c.customer_id) as "고객 수"
from customers c
group by c.country
order by "고객 수" desc
) a
) b;

📌 3⃣ 구매 이력이 없는 고객 📌
-- left join 이용
select c.customer_id , c.company_name , o.*
from customers c left join orders o on c.customer_id = o.customer_id
where o.order_id is null ;
-- 차집합 이용
(select c.customer_id , c.company_name
from customers c)
except
(select distinct c.customer_id , c.company_name
from customers c join orders o on c.customer_id = o.customer_id) ;

고객 구매 지수 분석
1⃣ 국가별 분석
2⃣ 지역분석
3⃣ 고객별 분석
📌 기본 테이블 만들기 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
select *
from cte_customers;

📌 1⃣ 국가별 고객수, 매출액, 주문건수 상관계수 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
✅ 국가 별 고객수, 매출액, 주문건수 : cte_country_customercnt_amount_ordercnt
, cte_country_customercnt_amount_ordercnt as (
select country, count(distinct customer_id ) as "고객수", sum(amount) as "매출액", count(distinct order_id) as "주문건수"
from cte_customers
group by country
)
✅ 상관관계 : cte_corr_customer_amount_ordercnt
, cte_corr_customer_amount_ordercnt as (
select corr(고객수, 매출액) as "고객수_매출액"
, corr(매출액, 주문건수) as "매출액_주문건수"
, corr(고객수, 주문건수) as "고객수_주문건수"
from cte_country_customercnt_amount_ordercnt
)
select *
from cte_corr_customer_amount_ordercnt;


📌 2⃣.1⃣ 지역 별 고객수, 매출액, 주문건수 구성비 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
✅ 국가 별 고객수, 매출액, 주문건수 : cte_country_customercnt_amount_ordercnt
, cte_country_customercnt_amount_ordercnt as (
select country, count(distinct customer_id ) as "고객수", sum(amount) as "매출액", count(distinct order_id) as "주문건수"
from cte_customers
group by country
)
✅ 지역 컬럼 추가 : cte_country_group
, cte_country_group as (
select *
, case
when lower(country) in ('usa', 'canada', 'mexico') then 'NorthAmerica'
when lower(country) in ('brazil', 'venezuela', 'argentina') then 'SouthAmerica'
else 'Europe'
end "지역"
from cte_country_customercnt_amount_ordercnt
)
✅ 지역 별 고객수, 매출액, 주문건수 : cte_country_group_customer_amount_ordercnt
, cte_country_group_customer_amount_ordercnt as (
select 지역, sum(고객수) as 고객수 , sum(매출액) as 매출액 , sum(주문건수) as 주문건수
from cte_country_group
group by 지역
order by 지역
)
✅ 지역 별 고객수, 매출액, 주문건수 구성비 : cte_country_group_ratio
, cte_country_group_ratio as (
select 지역
, 고객수
, 고객수 / sum(고객수) over() * 100 as 고객수구성비
, 매출액
, 매출액 / sum(매출액) over() * 100 as 매출액구성비
, 주문건수
, 주문건수 / sum(주문건수) over() * 100 as 주문건수구성비
from cte_country_group_customer_amount_ordercnt
)
select *
from cte_country_group_ratio;


📌 2⃣.2⃣ 지역 별 판매된 제품 순위 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
✅ 국가, 카테고리명, 제품ID, 제품명, 수량, 지역(파생컬럼)
, cte_customer_product_sale as (
select country, category_name, product_id, product_name, quantity
, case
when lower(country) in ('usa', 'canada', 'mexico') then 'NorthAmerica'
when lower(country) in ('brazil', 'venezuela', 'argentina') then 'SouthAmerica'
else 'Europe'
end as "지역"
from cte_customers
)
✅ 지역별 제품 판매수량
, cte_country_group_product_quantity as (
select 지역, '( '||category_name||') '||product_name||'('||product_id::varchar(10)||')' as "제품"
, sum(quantity) as 판매수량
from cte_customer_product_sale
group by 1, 2
)
✅ 지역별 제품 판매수량 순위
, cte_country_group_product_quantity_rank as (
select * , row_number() over(partition by 지역 order by 판매수량 desc) as 순위
from cte_country_group_product_quantity
)
✅ 지역별 판매순위 비교(pivot)
, cte_country_group_product_quantity_rank_pivot as (
select 순위
, max(case when 지역 = 'NorthAmerica' then 제품 end) as NorthAmerica
, max(case when 지역 = 'SouthAmerica' then 제품 end) as SouthAmerica
, max(case when 지역 = 'Europe' then 제품 end) as Europe
from cte_country_group_product_quantity_rank
group by 순위
order by 순위
)
select *
from cte_country_group_product_quantity_rank_pivot;

📌 3⃣ 고객별 분석 - 기본 테이블 만들기 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
select customer_id
, count(distinct order_id) as 주문건수
, rank() over(order by count(order_id) desc) as 주문건수순위
, sum(amount) as 매출액
, rank() over(order by sum(amount) desc) as 매출액순위
, sum(amount) / count(distinct order_id) as 건당평균주문액
, rank() over(order by sum(amount) / count(distinct order_id) desc) as 건당평균주문액순위
from cte_customers
group by customer_id
order by 매출액 desc;

십분위 분석
중요도를 결정하기 위해 데이터를 10단계로 나누는 방법
📌 decil 분석 📌
-- customers : coustomer_id
-- order_details : amount
with cte_customers as (
select c.customer_id
, od.unit_price * od.quantity * (1-od.discount) as amount
from customers c , orders o , order_details od
where c.customer_id = o.customer_id
and o.order_id = od.order_id
)
✅ 매출액 기준으로 상위부터 10%씩 나누어 10개의 그룹 할당
, cte_customer_info as (
select customer_id
, sum(amount) as 매출액
, ntile(10) over(order by sum(amount) desc) as decil
from cte_customers
group by customer_id
)
✅ decil 별 매출합계
, cte_decil_group as (
select decil, sum(매출액) as decil_sum_amount
from cte_customer_info
group by decil
)
✅ decil 별 구성비
, cte_decil_ratio as (
select *
, sum(decil_sum_amount) over () as total
, decil_sum_amount / sum(decil_sum_amount) over () * 100 as decil_sum_amount_rate
from cte_decil_group
)
✅ decil 별 구성비 누계
, cte_decil_agg as (
select *
, sum(decil_sum_amount_rate) over (order by decil) as cumsum
from cte_decil_ratio
order by decil
)
select *
from cte_decil_agg ;

Decil 분석의 단점
한 번의 구매로 고가의 상품을 구매하는 사용자와 저렴한 상품을 정기적으로 여러 번 구매하는 사용자를 동일 집단으로 판단하는 문제
검색 기간이 너무 길면 과거에 좋은 고객이었지만 현재 다른 서비스를 이용하는 인간 고객이 포함될 수 있습니다.
검색 기간이 너무 짧으면 해당 기간 동안 일시적으로 많이 구매한 사용자가 정기적으로 구매하는 안정적인 고객보다 우대 고객으로 포함될 수 있습니다.
RFM 분석
Recency : 구매가 얼마나 최근에 이루어졌습니까?
에프requency: 얼마나 자주 구매합니까?
남onetary: 얼마를 지불하셨나요?
· 구매 가능성이 높은 고객을 식별하는 데이터 분석 방법
· 마케팅에서 사용자 발권 방법
Decil 분석의 단점 보완

📌 RFM 분석 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
✅ 기준일 선정
, cte_customer_maxo as (
select *, max(order_date) over() as maxo
from cte_customers
)
✅ RFM 산출
, cte_rfm as (
select customer_id
, max(maxo) - max(order_date) as Recency
, count(distinct order_id) as Frequency
, sum(amount) as Monetary
from cte_customer_maxo
group by customer_id
order by 2 , 3 desc, 4 desc
)
select *
from cte_rfm;

오늘의 도전
왼쪽으로 조인된 기본 테이블을 사용한 분석
⏯ inner join으로 만든 기본 테이블
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
⏩ left join으로 나타낸 기본 테이블
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c
left join orders o on c.customer_id = o.customer_id
left join order_details od on o.order_id = od.order_id
left join products p on od.product_id = p.product_id
left join categories c2 on p.category_id = c2.category_id ;

고객별 ABC 분석
📌 고객별 ABC 분석 📌
-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
✅ 매출액 산출
, cte_customer_sales as (
select customer_id
, sum(amount) as 매출액
from cte_customers
group by customer_id
order by 매출액 desc
)
✅ 구성비 산출
, cte_customer_ratio as (
select *
, 매출액 / sum(매출액) over () * 100 as 구성비
from cte_customer_sales
)
✅ 구성비 누계 산출
, cte_customer_agg as (
select *
, sum(구성비) over (order by 구성비 desc) as 구성비누계
from cte_customer_ratio
)
✅ 등급 산출
, cte_class as (
select *
, case
when 구성비누계 <= 70 then 'A'
when 구성비누계 <= 90 then 'B'
else 'C'
end as 등급
from cte_customer_agg
order by 구성비 desc
)
select *
from cte_class;

