(스타터 4일까지) #50일차 (23.04.17)



오늘의 배움

고객 분석

1⃣ 국가별 고객 수, 누적 합계

2⃣ 국가별 고객수, 구성비, 누적비

3⃣ 구매 이력이 없는 고객

📌 1⃣ 국가 별 고객 수, 누적 합계 📌

select *, sum("고객 수") over (order by "고객 수" desc, country)
from (
	select c.country , count(c.customer_id) as "고객 수"
	from customers c 
	group by c.country 
	order by "고객 수" desc
	) a;
📌 2⃣ 국가 별 고객 수, 구성비, 누적비 📌

-- 누적 구성비
select *
	 , sum("구성비") over (order by "고객 수" desc, country) as "누적 구성비"
from (
	-- 구성비
	select *
	 , "고객 수" / sum("고객 수") over() * 100 as "구성비"
	from (
		-- 고객 수
		select c.country , count(c.customer_id) as "고객 수"
		from customers c 
		group by c.country
		order by "고객 수" desc
		) a 
	) b;
📌 3⃣ 구매 이력이 없는 고객 📌

-- left join 이용
select c.customer_id , c.company_name , o.*
from customers c left join orders o on c.customer_id = o.customer_id 
where o.order_id is null ;

-- 차집합 이용
(select c.customer_id , c.company_name
from customers c)
except
(select distinct c.customer_id , c.company_name
from customers c join orders o on c.customer_id = o.customer_id) ;


고객 구매 지수 분석

1⃣ 국가별 분석

2⃣ 지역분석

3⃣ 고객별 분석

📌 기본 테이블 만들기 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
select *
from cte_customers;


📌 1⃣ 국가별 고객수, 매출액, 주문건수 상관계수 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
✅ 국가 별 고객수, 매출액, 주문건수 : cte_country_customercnt_amount_ordercnt
 , cte_country_customercnt_amount_ordercnt as (
 select country, count(distinct customer_id ) as "고객수", sum(amount) as "매출액", count(distinct order_id) as "주문건수"
 from cte_customers
 group by country
)
✅ 상관관계 : cte_corr_customer_amount_ordercnt
, cte_corr_customer_amount_ordercnt as (
select corr(고객수, 매출액) as "고객수_매출액"
	 , corr(매출액, 주문건수) as "매출액_주문건수"
	 , corr(고객수, 주문건수) as "고객수_주문건수"
from cte_country_customercnt_amount_ordercnt
)
select *
from cte_corr_customer_amount_ordercnt;


국가별 고객수, 매출, 주문건수

상관관계

📌 2⃣.1⃣ 지역 별 고객수, 매출액, 주문건수 구성비 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
 ✅ 국가 별 고객수, 매출액, 주문건수 : cte_country_customercnt_amount_ordercnt
 , cte_country_customercnt_amount_ordercnt as (
 select country, count(distinct customer_id ) as "고객수", sum(amount) as "매출액", count(distinct order_id) as "주문건수"
 from cte_customers
 group by country
)
 ✅ 지역 컬럼 추가 : cte_country_group
, cte_country_group as (
select *
	 , case 
	 	when lower(country) in ('usa', 'canada', 'mexico') then 'NorthAmerica'
	 	when lower(country) in ('brazil', 'venezuela', 'argentina') then 'SouthAmerica'
	 	else 'Europe'
	 end "지역"
from cte_country_customercnt_amount_ordercnt	 
)
 ✅ 지역 별 고객수, 매출액, 주문건수 : cte_country_group_customer_amount_ordercnt
, cte_country_group_customer_amount_ordercnt as (
select 지역, sum(고객수) as 고객수 , sum(매출액) as 매출액 , sum(주문건수) as 주문건수
from cte_country_group
group by 지역
order by 지역
)
 ✅ 지역 별 고객수, 매출액, 주문건수 구성비 : cte_country_group_ratio
, cte_country_group_ratio as (
select 지역
	 , 고객수
	 , 고객수 / sum(고객수) over() * 100 as 고객수구성비
	 , 매출액
	 , 매출액 / sum(매출액) over() * 100 as 매출액구성비
	 , 주문건수
	 , 주문건수 / sum(주문건수) over() * 100 as 주문건수구성비 
from cte_country_group_customer_amount_ordercnt
) 
select *
from cte_country_group_ratio;


지역 열 추가
지역별 고객수, 매출, 주문건수 / 지역별 고객수, 매출, 주문건수 구성비

📌 2⃣.2⃣ 지역 별 판매된 제품 순위 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
 ✅ 국가, 카테고리명, 제품ID, 제품명, 수량, 지역(파생컬럼)
 , cte_customer_product_sale as (
 select country, category_name, product_id, product_name, quantity
 	  , case
 	  	when lower(country) in ('usa', 'canada', 'mexico') then 'NorthAmerica'
	 	when lower(country) in ('brazil', 'venezuela', 'argentina') then 'SouthAmerica'
	 	else 'Europe'
 	  end as "지역"
 from cte_customers
 )
 ✅ 지역별 제품 판매수량
 , cte_country_group_product_quantity as (
 select 지역, '( '||category_name||') '||product_name||'('||product_id::varchar(10)||')' as "제품"
 	  , sum(quantity) as 판매수량
 from cte_customer_product_sale
 group by 1, 2
 )
 ✅ 지역별 제품 판매수량 순위
 , cte_country_group_product_quantity_rank as (
 select * , row_number() over(partition by 지역 order by 판매수량 desc) as 순위
 from cte_country_group_product_quantity
 ) 
 ✅ 지역별 판매순위 비교(pivot)
 , cte_country_group_product_quantity_rank_pivot as (
 select 순위
 	  , max(case when 지역 = 'NorthAmerica' then 제품 end) as NorthAmerica
 	  , max(case when 지역 = 'SouthAmerica' then 제품 end) as SouthAmerica
 	  , max(case when 지역 = 'Europe' then 제품 end) as Europe
 from cte_country_group_product_quantity_rank
 group by 순위
 order by 순위
 )
select *
from cte_country_group_product_quantity_rank_pivot;


지역별 매출 순위 비교(피벗)

📌 3⃣ 고객별 분석 - 기본 테이블 만들기 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
 select customer_id
 	  , count(distinct order_id) as 주문건수
 	  , rank() over(order by count(order_id) desc) as 주문건수순위
 	  , sum(amount) as 매출액
 	  , rank() over(order by sum(amount) desc) as 매출액순위
 	  , sum(amount) / count(distinct order_id) as 건당평균주문액
 	  , rank() over(order by sum(amount) / count(distinct order_id) desc) as 건당평균주문액순위
 from cte_customers
 group by customer_id
 order by 매출액 desc;


십분위 분석

중요도를 결정하기 위해 데이터를 10단계로 나누는 방법

📌 decil 분석 📌

-- customers : coustomer_id
-- order_details : amount

with cte_customers as (
select c.customer_id
	 , od.unit_price * od.quantity * (1-od.discount) as amount
from customers c , orders o , order_details od 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
 )
 ✅ 매출액 기준으로 상위부터 10%씩 나누어 10개의 그룹 할당
 , cte_customer_info as (
 select customer_id
 	  , sum(amount) as 매출액
 	  , ntile(10) over(order by sum(amount) desc) as decil
 from cte_customers
 group by customer_id
 )
 ✅ decil 별 매출합계
 , cte_decil_group as (
 select decil, sum(매출액) as decil_sum_amount
 from cte_customer_info
 group by decil
 )
  ✅ decil 별 구성비
 , cte_decil_ratio as (
 select * 
 	  , sum(decil_sum_amount) over () as total
 	  , decil_sum_amount / sum(decil_sum_amount) over () * 100 as decil_sum_amount_rate
 from cte_decil_group
 )
  ✅ decil 별 구성비 누계
, cte_decil_agg as (
select *
	 , sum(decil_sum_amount_rate) over (order by decil) as cumsum
from cte_decil_ratio
order by decil
)
select *
from cte_decil_agg ;

Decil 분석의 단점

한 번의 구매로 고가의 상품을 구매하는 사용자와 저렴한 상품을 정기적으로 여러 번 구매하는 사용자를 동일 집단으로 판단하는 문제

검색 기간이 너무 길면 과거에 좋은 고객이었지만 현재 다른 서비스를 이용하는 인간 고객이 포함될 수 있습니다.

검색 기간이 너무 짧으면 해당 기간 동안 일시적으로 많이 구매한 사용자가 정기적으로 구매하는 안정적인 고객보다 우대 고객으로 포함될 수 있습니다.

RFM 분석

Recency : 구매가 얼마나 최근에 이루어졌습니까?

에프requency: 얼마나 자주 구매합니까?

onetary: 얼마를 지불하셨나요?

· 구매 가능성이 높은 고객을 식별하는 데이터 분석 방법

· 마케팅에서 사용자 발권 방법

Decil 분석의 단점 보완


📌 RFM 분석 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 ) 
✅ 기준일 선정
, cte_customer_maxo as (
select *, max(order_date) over() as maxo
from cte_customers
)
✅ RFM 산출
, cte_rfm as (
select customer_id 
	 , max(maxo) - max(order_date) as Recency
	 , count(distinct order_id) as Frequency
	 , sum(amount) as Monetary
from cte_customer_maxo
group by customer_id 
order by 2 , 3 desc, 4 desc
)
select *
from cte_rfm;


오늘의 도전

왼쪽으로 조인된 기본 테이블을 사용한 분석

⏯ inner join으로 만든 기본 테이블

select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id
⏩ left join으로 나타낸 기본 테이블

select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c 
	left join orders o on c.customer_id = o.customer_id 
	left join order_details od on o.order_id = od.order_id 
	left join products p on od.product_id = p.product_id 
	left join categories c2 on p.category_id = c2.category_id ;


두 기본 테이블의 차이점: Left 조인은 주문 내역이 없어도 모든 고객을 포함합니다.

고객별 ABC 분석

📌 고객별 ABC 분석 📌

-- customers : coustomer_id, company_name, cantact_title, country, city
-- orders : order_id, order_date, tear, month, day, quarter
-- order_details : product_id, unit_price, quantity, discount
-- products : product_name
-- categories : category_id, category_name

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 )
 ✅ 매출액 산출
 , cte_customer_sales as (
 select customer_id
 	  , sum(amount) as 매출액
 from cte_customers
 group by customer_id
 order by 매출액 desc
 )
 ✅ 구성비 산출
 , cte_customer_ratio as (
 select * 
 	  , 매출액 / sum(매출액) over () * 100 as 구성비
 from cte_customer_sales
 )
  ✅ 구성비 누계 산출
 , cte_customer_agg as (
 select *
 	  , sum(구성비) over (order by 구성비 desc) as 구성비누계
 from cte_customer_ratio
 )
  ✅ 등급 산출
 , cte_class as (
 select *
 	  , case
			when 구성비누계 <= 70 then 'A'
			when 구성비누계 <= 90 then 'B'
			else 'C'
		end as 등급
from cte_customer_agg
order by 구성비 desc
)
select *
from cte_class;