“采集分析购物平台数据”的版本间的差异
来自CloudWiki
201708010242(讨论 | 贡献) |
201708010242(讨论 | 贡献) |
||
第83行: | 第83行: | ||
[[文件:查看点击量表.png]] | [[文件:查看点击量表.png]] | ||
+ | |||
+ | |||
+ | ==创建add_to_cart表 == | ||
+ | |||
+ | 创建表 ADD_TO_CART,代表加入购物车量 | ||
+ | |||
+ | <nowiki>CREATE TABLE ADD_TO_CART AS | ||
+ | SELECT ITEM_ID,COUNT(1) COUNT_1 | ||
+ | FROM RESULT | ||
+ | WHERE ATIION_TYPE = '1' | ||
+ | GROUP BY ITEM_ID | ||
+ | ORDER BY COUNT_1 DESC | ||
+ | LIMIT 100 ;</nowiki> | ||
+ | |||
+ | |||
+ | ==创建cololect表 == | ||
+ | |||
+ | 创建表 COLLECT,代表收藏量 | ||
+ | |||
+ | <nowiki> | ||
+ | CREATE TABLE COLLECT AS | ||
+ | SELECT ITEM_ID,COUNT(1) COUNT_1 | ||
+ | FROM RESULT | ||
+ | WHERE ATIION_TYPE ='2' | ||
+ | GROUP BY ITEM_ID | ||
+ | ORDER BY COUNT_1 | ||
+ | LIMIT 100; </nowiki> | ||
+ | |||
+ | ==创建emption 表 == | ||
+ | |||
+ | <nowiki>CREATE TABLE EMPTION AS | ||
+ | SELECT ITEM_ID,COUNT(1) COUNT_1 | ||
+ | FROM RESULT | ||
+ | WHERE ATIION_TYPE ='3' | ||
+ | GROUP BY ITEM_ID | ||
+ | ORDER BY COUNT_1 | ||
+ | LIMIT 100;</nowiki> | ||
+ | |||
+ | = 转化率计算 = |
2018年11月21日 (三) 11:55的版本
.
目录
使用 hive 对数据进行操作
在 master 上执行:
(1)# start-all.sh (启动 hadoop) (2)# zkServer.sh start(各个节点均执行) (3)# start-hbase.sh (直接运行这个命令需要将 HBASE 的 bin 目录也加
入到/etc/environment 中) (4)# 在 slave1 上输入命令:bin/hive --service metastore 启动 hive server,然后在 master 节点上输入命令:bin/hive 启动 hive 客户端,当所有 进程启动完全后方可执行以下操作。(所有命令需要在 hive 的安装目录下输入)
建表,加载数据
创建hongya数据库,并且查看
create database hongya; show databases;
建match_data表
create table match_data( user_id int, age_range int, gender int, merchant_id int, label int, activity_log varchar(1000) ) row format delimited fields terminated by ',';
加载数据
load data local inpath '/opt/soft/train_format2.csv' overwrite into table match_data;
select * from match_data limit 100;
创建中间表
CREATE TABLE RESULT AS SELECT USER_ID, SPLIT(LOG_SPLIT,':')[0] AS ITEM_ID, SPLIT(LOG_SPLIT,':')[2] AS BRAND_ID, SPLIT(LOG_SPLIT,':')[4] AS ATIION_TYPE FROM (SELECT USER_ID,LOG_SPLIT FROM match_data LATERAL VIEW EXPLODE(SPLIT(ACTIVITY_LOG,'#')) ACTIVITY_LOG AS LOG_SPLIT ) T1;
select * from RESULT limit 100;
创建点击量表
CREATE TABLE CLICK AS SELECT ITEM_ID, COUNT(1) COUNT_1 FROM RESULT WHERE ATIION_TYPE ='0' GROUP BY ITEM_ID ORDER BY COUNT_1 DESC LIMIT 100;
select * from click;
创建add_to_cart表
创建表 ADD_TO_CART,代表加入购物车量
CREATE TABLE ADD_TO_CART AS SELECT ITEM_ID,COUNT(1) COUNT_1 FROM RESULT WHERE ATIION_TYPE = '1' GROUP BY ITEM_ID ORDER BY COUNT_1 DESC LIMIT 100 ;
创建cololect表
创建表 COLLECT,代表收藏量
CREATE TABLE COLLECT AS SELECT ITEM_ID,COUNT(1) COUNT_1 FROM RESULT WHERE ATIION_TYPE ='2' GROUP BY ITEM_ID ORDER BY COUNT_1 LIMIT 100;
创建emption 表
CREATE TABLE EMPTION AS SELECT ITEM_ID,COUNT(1) COUNT_1 FROM RESULT WHERE ATIION_TYPE ='3' GROUP BY ITEM_ID ORDER BY COUNT_1 LIMIT 100;