“数据清洗,抽取”的版本间的差异

来自CloudWiki
跳转至: 导航搜索
数据清洗
第1行: 第1行:
 +
'''上传数据'''
  
 +
hadoop fs -mkdir -p /college
 +
hadoop fs -put /root/college/loan.csv /college
 +
 +
 +
'''创建数据库'''
 +
 +
 +
create database 库名;
 +
create table 表名1(
 +
id int,
 +
name string,
 +
price int,
 +
views int,
 +
sales int,
 +
stock int
 +
)row format delimited
 +
fields terminated by ',';
 +
 +
'''导入数据-》hive'''
 +
 +
 +
load data inpath '/college/loan.csv' into table 表名;
 +
 +
 +
load data ==local== inpath ‘/root/data’ into table psn;
 +
 +
'''创建新表存储'''
 +
 +
create table 表名2 like 表名2;
 +
 +
'''数据过滤'''
 +
 +
 +
insert into 表名2
 +
select * from 表名1
 +
where not id is null and name is null and price is null and views is null and sales is null and stock is null
 +
and name not like '%包包%' and name not like'%衣%' and name not like'%女士%'
 +
group by id,name,price,views,sales,stock;
 +
 +
 +
'''数据分析并下载'''
 +
 +
 +
INSERT OVERWRITE LOCAL DIRECTORY '/root/college022/’
 +
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t’
 +
SELECT count(distinct author) FROM data;
 +
 +
 +
 +
insert into table test partition(age='xx') select id,name,tel form perop;

2020年11月17日 (二) 01:14的版本

上传数据

hadoop fs -mkdir -p /college hadoop fs -put /root/college/loan.csv /college


创建数据库


create database 库名; create table 表名1( id int, name string, price int, views int, sales int, stock int )row format delimited fields terminated by ',';

导入数据-》hive


load data inpath '/college/loan.csv' into table 表名;


load data ==local== inpath ‘/root/data’ into table psn;

创建新表存储

create table 表名2 like 表名2;

数据过滤


insert into 表名2 select * from 表名1 where not id is null and name is null and price is null and views is null and sales is null and stock is null and name not like '%包包%' and name not like'%衣%' and name not like'%女士%' group by id,name,price,views,sales,stock;


数据分析并下载


INSERT OVERWRITE LOCAL DIRECTORY '/root/college022/’ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t’ SELECT count(distinct author) FROM data;


insert into table test partition(age='xx') select id,name,tel form perop;