2020红亚杯:借贷数据分析
题干
Prosper是美国一家p2p在在线借贷平台,网站可撮合一些有闲钱的人和基于用钱的热,用户若有贷款需求,可在网站上列出期望数额和可承受最大利率,潜在贷方则为数额和利率展开竞价,以下是该网站的部分数据
LoanStatus,BorrowerRate,ProsperScore,Occupation,EmploymentStatus,IsBorrowerHomeowner,CreditScoreRangeLower,CreditScoreRangeUpper,IncomeRange Completed,0.158,,Other,Self-employed,TRUE,640,659,"$25,000-49,999" Current,0.092,7,Professional,Employed,FALSE,680,699,"$50,000-74,999" Completed,0.275,,Other,Not available,FALSE,480,499,Not displayed Current,0.0974,9,Skilled Labor,Employed,TRUE,800,819,"$25,000-49,999" Current,0.2085,4,Executive,Employed,TRUE,680,699,"$100,000+" Current,0.1314,10,Professional,Employed,TRUE,740,759,"$100,000+" Current,0.2712,2,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999" Current,0.2019,4,Laborer,Employed,FALSE,700,719,"$25,000-49,999"
建表语句
建库
create database 库名;
提取有用数据建表
create table test(
LoanStatus string,
BorrowerRate float,
ProsperScore int,
Occupation string,
EmploymentStatus string,
IsBorrowerHomeowner string,
CreditScoreRangeLower int,
CreditScoreRangeUpper int,
IncomeRange string
)row format delimited
fields terminated by ',';
数据清洗
导入另一个表
insert into table test select LoanStatus,BorrowerRate,ProsperScore,Occupation,EmploymentStatus,IsBorrowerHomeowner,CreditScoreRangeLower,CreditScoreRangeUpper,IncomeRange form perop;
导入后的数据示例
Completed,0.158,,Other,Self-employed,TRUE,640,659,"$25,000-49,999" Current,0.092,7,Professional,Employed,FALSE,680,699,"$50,000-74,999" Completed,0.275,,Other,Not available,FALSE,480,499,Not displayed Current,0.0974,9,Skilled Labor,Employed,TRUE,800,819,"$25,000-49,999" Current,0.2085,4,Executive,Employed,TRUE,680,699,"$100,000+" Current,0.1314,10,Professional,Employed,TRUE,740,759,"$100,000+" Current,0.2712,2,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999" Current,0.2019,4,Laborer,Employed,FALSE,700,719,"$25,000-49,999" Current,0.0629,9,Food Service,Employed,TRUE,820,839,"$25,000-49,999" Current,0.0629,11,Food Service,Employed,TRUE,820,839,"$25,000-49,999" Current,0.2489,7,Fireman,Employed,FALSE,640,659,"$75,000-99,999" Completed,0.1325,,Waiter/Waitress,Full-time,FALSE,640,659,"$1-24,999" Past Due (1-15 days),0.1435,4,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999" Current,0.1034,8,Construction,Employed,TRUE,740,759,"$50,000-74,999" Current,0.0949,8,Computer Programmer,Employed,FALSE,740,759,"$100,000+" Defaulted,0.3177,5,Other,Other,TRUE,700,719,"$50,000-74,999" Current,0.2809,4,Professional,Full-time,TRUE,640,659,"$75,000-99,999" Chargedoff,0.125,,Professional,Not available,TRUE,760,779,Not displayed Current,0.0974,7,Sales - Commission,Employed,FALSE,740,759,"$75,000-99,999" Current,0.2225,8,Laborer,Employed,FALSE,680,699,"$25,000-49,999" Current,0.1314,7,Retail Management,Employed,TRUE,660,679,"$50,000-74,999" Completed,0.2075,,Professional,Full-time,FALSE,620,639,"$25,000-49,999"
数据分析
跟据不同职业的信用分中位数作为该职业信用得分,分析某职业信用得分 由大到小top5,写入/root/college001/
INSERT OVERWRITE LOCAL DIRECTORY '/root/college001/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select Occupation,percentile(ProsperScore,0.5) as a from loan
group by Occupation order by a desc limit 5;
违约职业前3名,写入/root/college002/
INSERT OVERWRITE LOCAL DIRECTORY '/root/college002/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select Occupation,count(*) as a from loan
where LoanStatus='Defaulted' and Occupation<>'\N'
group by Occupation order by a desc limit 3;
求违约次数最高的职业的支持度,写入/root/college003/ 支持度=违约,职业/总
select Occupation,count(*) as a from loan
where LoanStatus='Defaulted'
group by Occupation order by a desc limit 1;
INSERT OVERWRITE LOCAL DIRECTORY '/root/college006/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select round(第一个sql结果/count(*),5) from loan ;
数据分析
信用得分对借款的影响
select ProsperScore,avg(BorrowerRate) as t from loan group by ProsperScore;
- 职业对借款难易的影响 (拿不准)
select Occupation,avg(BorrowerRate) from loan group by Occupation;
违约客户的就业信息分布情况
select Occupation,count(*) from loan
where LoanStatus='Defaulted' or LoanStatus='Chargedoff'
group by Occupation;
贷款人的收入范围统计
select IncomeRange,count(*) from loan
group by IncomeRange;
不同职业的信用得分分布
select Occupation,ProsperScore,count(ProsperScore) from loan
group by Occupation,ProsperScore;