進入虛擬機,啟動HDFS和Yarn
?
1.創建表?
hive
show databases;
use test;
?銷售訂單表
create table t_dml (detail_id bigint,sale_date date, province string,city string,product_id bigint,cnt bigint,amt double
)row format delimited
fields terminated by ',';
?商品詳細表:
create table t_product (product_id bigint, product_name string, category_id bigint, category_name string, price double
)row format delimited
fields terminated by ',';
?2.加載數據
? 創建目錄存放數據
mkdir /opt/data
加載數據
load data local inpath '/opt/data/t_dml.csv' into table t_dml;
load data local inpath '/opt/data/t_product.csv' into table t_product;
3.?銷售數據分析
1.查詢t_dml中的銷售記錄的時間段:
select max(sale_date), min(sale_date) from t_dml;
2.查詢各商品類別的總銷售額
select t.category_name, sum(t.amt) as total_moneyfrom( select a.product_id, a.amt, b.category_namefrom t_dml ajoin t_product bon a.product_id=b.product_id) t
group by t.category_name;
3.店主想知道哪個商品最暢銷以及銷量排行榜,請查詢銷量前10的商品,顯示商品名稱,銷量,排名。
select a.product_name , t.cnt_total,rank() over (order by t.cnt_total desc) as rkfrom( select product_id, sum(cnt) as cnt_totalfrom t_dmlgroup by product_idorder by cnt_total desclimit 10) tjoin t_product aon t.product_id=a.product_id;
4.店主想知道各個市縣的購買力,同時也想知道自己的哪個商品在該地區最熱賣,通過創建中間表,優化查詢。
1. 創建結果存放表:
create table t_city_amt
( province string,city string,total_money double
);
create table t_city_prod
( province string,city string,product_id bigint,product_name string,cnt bigint
);
2.插入數據
insert into t_city_amt
select province,city,sum(amt)
from t_dml group by province,city;
SELECT * FROM t_city_amt;
?
insert into t_city_prod
select t.province,t.city,t.product_id,t.product_name,sum(t.cnt) from
(
select a.product_id,b.product_name,a.cnt,a.province,a.city
from t_dml a join t_product b
on a.product_id = b.product_id
) t
group by t.province,t.city,t.product_id,t.product_name;
SELECT * FROM t_city_prod;
3. 優化
from
( select a.*, b.product_namefrom t_dml ajoin t_product bon a.product_id=b.product_id
) t
insert overwrite table t_city_amtselect province, city, sum(amt)group by province, city
insert overwrite table t_city_prodselect province, city, product_id, product_name, sum(cnt)group by province, city, product_id, product_name;
5.統計各省最強購買力地區:
select province, city, total_moneyfrom( select province, city, total_money,dense_rank() over (partition by province order by total_money desc) as rkfrom t_city_amt) t
where t.rk=1
order by total_money desc;
6.統計各地區的最暢銷商品
select province, city, product_id, product_namefrom( select province, city, product_id, product_name,dense_rank() over (partition by province order by cnt desc) as rkfrom t_city_prod) t
where t.rk=1
order by province, city;
?
?
?
?
?
?
?
?