“采集分析购物平台数据”的版本间的差异

来自CloudWiki
跳转至: 导航搜索
 
(未显示同一用户的8个中间版本)
第15行: 第15行:
  
  
== 查看数据库列表: ==
+
== 建表,加载数据 ==
  
 +
创建hongya数据库,并且查看
  
 +
<nowiki>create database hongya;
 +
show databases;
 +
</nowiki>
 +
[[文件:Show.png]]
  
== 建数据库,建表 ==
+
建match_data表
 +
 
 +
<nowiki>create    table  match_data(
 +
user_id  int,
 +
age_range  int,
 +
gender  int,
 +
merchant_id int,
 +
label  int,
 +
activity_log varchar(1000)
 +
)
 +
row format  delimited fields terminated by  ',';</nowiki>
 +
加载数据
 +
 
 +
<nowiki>
 +
load data local
 +
inpath '/opt/soft/train_format2.csv'
 +
overwrite into table match_data;</nowiki>
 +
 
 +
[[文件:数据加载完成.png]]
 +
 
 +
select * from match_data limit 100;
 +
 
 +
[[文件:查看前一百行.png]]
 +
 
 +
 
 +
== 创建中间表 ==
 +
 
 +
<nowiki>CREATE  TABLE    RESULT  AS
 +
SELECT  USER_ID,
 +
SPLIT(LOG_SPLIT,':')[0]    AS  ITEM_ID,
 +
SPLIT(LOG_SPLIT,':')[2]    AS  BRAND_ID,
 +
SPLIT(LOG_SPLIT,':')[4]    AS  ATIION_TYPE
 +
FROM (SELECT  USER_ID,LOG_SPLIT
 +
FROM match_data
 +
LATERAL   VIEW  EXPLODE(SPLIT(ACTIVITY_LOG,'#')) ACTIVITY_LOG  AS LOG_SPLIT )  T1;</nowiki>
 +
 
 +
[[文件:中间表.png]]
 +
 
 +
<nowiki>select * from RESULT limit 100;</nowiki>
 +
[[文件:查看中间表.png]]
 +
 
 +
== 创建点击量表 ==
 +
 
 +
 
 +
<nowiki>
 +
CREATE TABLE CLICK  AS
 +
SELECT  ITEM_ID, COUNT(1)  COUNT_1
 +
FROM  RESULT
 +
WHERE ATIION_TYPE ='0'
 +
GROUP BY ITEM_ID
 +
ORDER BY COUNT_1  DESC
 +
LIMIT 100;</nowiki>
 +
 
 +
[[文件:创建点击量表.png]]
 +
 
 +
 
 +
<nowiki>select * from click;</nowiki>
 +
 
 +
[[文件:查看点击量表.png]]
 +
 
 +
 
 +
==创建add_to_cart表 ==
 +
 
 +
创建表 ADD_TO_CART,代表加入购物车量
 +
 
 +
<nowiki>CREATE  TABLE    ADD_TO_CART AS
 +
SELECT  ITEM_ID,COUNT(1)  COUNT_1
 +
FROM  RESULT
 +
WHERE  ATIION_TYPE = '1'
 +
GROUP BY ITEM_ID
 +
ORDER BY COUNT_1  DESC
 +
LIMIT 100 ;</nowiki>
 +
 
 +
[[文件:Add to cart.png]]
 +
==创建cololect表 ==
 +
 
 +
创建表 COLLECT,代表收藏量
 +
 
 +
<nowiki>
 +
CREATE    TABLE  COLLECT AS
 +
SELECT ITEM_ID,COUNT(1) COUNT_1
 +
FROM RESULT
 +
WHERE ATIION_TYPE ='2'
 +
GROUP BY ITEM_ID
 +
ORDER BY COUNT_1
 +
LIMIT 100; </nowiki>
 +
 
 +
==创建emption 表 ==
 +
 
 +
<nowiki>CREATE  TABLE  EMPTION AS
 +
SELECT ITEM_ID,COUNT(1) COUNT_1 
 +
FROM RESULT
 +
WHERE ATIION_TYPE ='3'
 +
GROUP BY ITEM_ID 
 +
ORDER BY COUNT_1
 +
LIMIT 100;</nowiki>
 +
 
 +
[[文件:创建emption表.png]]
 +
 
 +
== 创建 click_emp,写入商品点击购买转化率 ==
 +
 
 +
<nowiki>CREATE TABLE  CLICK_EMP AS
 +
SELECT  ITEM_ID,SUM(IF(ATIION_TYPE='0'  ,1,0))/COUNT(1)
 +
CLICK_EMP_RATE
 +
FROM RESULT T1
 +
GROUP BY ITEM_ID
 +
ORDER BY CLICK_EMP_RATE  DESC;</nowiki>
 +
 
 +
 
 +
== 创建ADD_EMP ==
 +
 
 +
<nowiki>CREATE TABLE  ADD_EMP AS 
 +
SELECT  ITEM_ID,SUM(IF(ATIION_TYPE = '1',1,0))/COUNT(1)
 +
CLICK_EMP_RATE 
 +
FROM RESULT T1
 +
GROUP BY ITEM_ID   
 +
ORDER BY CLICK_EMP_RATE  DESC;</nowiki>
 +
 
 +
 
 +
== 创建表 collect_emp ==
 +
<nowiki>
 +
CREATE TABLE  COLLECT_EMP  AS 
 +
SELECT  ITEM_ID,SUM(IF(ATIION_TYPE = '1',1,0))/COUNT(1)
 +
CLICK_EMP_RATE 
 +
FROM RESULT T1
 +
GROUP BY ITEM_ID   
 +
ORDER BY CLICK_EMP_RATE  DESC;</nowiki>

2018年11月21日 (三) 12:24的最新版本

.

使用 hive 对数据进行操作

在 master 上执行:

(1)# start-all.sh (启动 hadoop)
(2)# zkServer.sh start(各个节点均执行)
(3)# start-hbase.sh (直接运行这个命令需要将 HBASE 的 bin 目录也加
 

入到/etc/environment 中) (4)# 在 slave1 上输入命令:bin/hive --service metastore 启动 hive server,然后在 master 节点上输入命令:bin/hive 启动 hive 客户端,当所有 进程启动完全后方可执行以下操作。(所有命令需要在 hive 的安装目录下输入)


建表,加载数据

创建hongya数据库,并且查看

create database hongya;
show databases;

Show.png

建match_data表

create    table   match_data(
user_id  int,
age_range  int,
gender  int,
merchant_id int,
label  int, 
activity_log varchar(1000)
) 
row format  delimited fields terminated by  ',';

加载数据

load data local
inpath '/opt/soft/train_format2.csv' 
overwrite into table match_data;

数据加载完成.png

select * from match_data limit 100;

查看前一百行.png


创建中间表

CREATE   TABLE    RESULT  AS 
SELECT   USER_ID,
SPLIT(LOG_SPLIT,':')[0]    AS   ITEM_ID,
SPLIT(LOG_SPLIT,':')[2]    AS   BRAND_ID,
SPLIT(LOG_SPLIT,':')[4]    AS   ATIION_TYPE
FROM (SELECT  USER_ID,LOG_SPLIT
FROM match_data
LATERAL	  VIEW  EXPLODE(SPLIT(ACTIVITY_LOG,'#')) ACTIVITY_LOG  AS LOG_SPLIT )  T1;

中间表.png

select * from RESULT limit 100;

查看中间表.png

创建点击量表

CREATE TABLE CLICK  AS 
SELECT  ITEM_ID, COUNT(1)  COUNT_1 
FROM  RESULT
WHERE ATIION_TYPE ='0'
GROUP BY ITEM_ID
ORDER BY COUNT_1  DESC
LIMIT 100;

创建点击量表.png


select * from click;

查看点击量表.png


创建add_to_cart表

创建表 ADD_TO_CART,代表加入购物车量

CREATE   TABLE    ADD_TO_CART AS
SELECT  ITEM_ID,COUNT(1)   COUNT_1 
FROM   RESULT
WHERE   ATIION_TYPE = '1'
GROUP BY ITEM_ID 
ORDER BY COUNT_1  DESC
LIMIT 100 ;

Add to cart.png

创建cololect表

创建表 COLLECT,代表收藏量

CREATE     TABLE   COLLECT AS
SELECT ITEM_ID,COUNT(1) COUNT_1
FROM RESULT
WHERE ATIION_TYPE ='2'
GROUP BY ITEM_ID 
ORDER BY COUNT_1
LIMIT 100; 

创建emption 表

CREATE   TABLE   EMPTION AS 
SELECT ITEM_ID,COUNT(1) COUNT_1  
FROM RESULT
WHERE ATIION_TYPE ='3'
GROUP BY ITEM_ID  
ORDER BY COUNT_1
LIMIT 100;

创建emption表.png

创建 click_emp,写入商品点击购买转化率

CREATE TABLE   CLICK_EMP AS 
SELECT   ITEM_ID,SUM(IF(ATIION_TYPE='0'   ,1,0))/COUNT(1)
CLICK_EMP_RATE
FROM RESULT T1
GROUP BY ITEM_ID 
ORDER BY CLICK_EMP_RATE  DESC;


创建ADD_EMP

CREATE TABLE   ADD_EMP AS  
SELECT   ITEM_ID,SUM(IF(ATIION_TYPE = '1',1,0))/COUNT(1)
CLICK_EMP_RATE  
FROM RESULT T1
GROUP BY ITEM_ID    
ORDER BY CLICK_EMP_RATE   DESC;


创建表 collect_emp

CREATE TABLE  COLLECT_EMP  AS  
SELECT   ITEM_ID,SUM(IF(ATIION_TYPE = '1',1,0))/COUNT(1)
CLICK_EMP_RATE  
FROM RESULT T1
GROUP BY ITEM_ID    
ORDER BY CLICK_EMP_RATE   DESC;