“2020红亚杯:借贷数据分析”的版本间的差异

来自CloudWiki
跳转至: 导航搜索
数据分析
数据分析
 
第133行: 第133行:
 
'''求违约次数最高的职业的支持度,写入/root/college003/        支持度=违约,职业/总'''
 
'''求违约次数最高的职业的支持度,写入/root/college003/        支持度=违约,职业/总'''
  
[[文件:7-LJL%`(I-CZTX6`6GKRI1I.png]]
+
[[文件:5454545454.png]]
  
 
select Occupation,count(*) as a from loan
 
select Occupation,count(*) as a from loan

2020年11月28日 (六) 14:31的最新版本

题干

Prosper是美国一家p2p在在线借贷平台,网站可撮合一些有闲钱的人和基于用钱的热,用户若有贷款需求,可在网站上列出期望数额和可承受最大利率,潜在贷方则为数额和利率展开竞价,以下是该网站的部分数据

LoanStatus,BorrowerRate,ProsperScore,Occupation,EmploymentStatus,IsBorrowerHomeowner,CreditScoreRangeLower,CreditScoreRangeUpper,IncomeRange

Completed,0.158,,Other,Self-employed,TRUE,640,659,"$25,000-49,999"

Current,0.092,7,Professional,Employed,FALSE,680,699,"$50,000-74,999"

Completed,0.275,,Other,Not available,FALSE,480,499,Not displayed

Current,0.0974,9,Skilled Labor,Employed,TRUE,800,819,"$25,000-49,999"

Current,0.2085,4,Executive,Employed,TRUE,680,699,"$100,000+"

Current,0.1314,10,Professional,Employed,TRUE,740,759,"$100,000+"

Current,0.2712,2,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999"

Current,0.2019,4,Laborer,Employed,FALSE,700,719,"$25,000-49,999"

12345646876545641.jpg

建表语句

建库

create database 库名;

提取有用数据建表

create table test(

LoanStatus string,

BorrowerRate float,

ProsperScore int,

Occupation string,

EmploymentStatus string,

IsBorrowerHomeowner string,

CreditScoreRangeLower int,

CreditScoreRangeUpper int,

IncomeRange string

)row format delimited

fields terminated by ',';

数据清洗

导入另一个表

insert into table test select LoanStatus,BorrowerRate,ProsperScore,Occupation,EmploymentStatus,IsBorrowerHomeowner,CreditScoreRangeLower,CreditScoreRangeUpper,IncomeRange form perop;

导入后的数据示例

Completed,0.158,,Other,Self-employed,TRUE,640,659,"$25,000-49,999"

Current,0.092,7,Professional,Employed,FALSE,680,699,"$50,000-74,999"

Completed,0.275,,Other,Not available,FALSE,480,499,Not displayed

Current,0.0974,9,Skilled Labor,Employed,TRUE,800,819,"$25,000-49,999"

Current,0.2085,4,Executive,Employed,TRUE,680,699,"$100,000+"

Current,0.1314,10,Professional,Employed,TRUE,740,759,"$100,000+"

Current,0.2712,2,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999"

Current,0.2019,4,Laborer,Employed,FALSE,700,719,"$25,000-49,999"

Current,0.0629,9,Food Service,Employed,TRUE,820,839,"$25,000-49,999"

Current,0.0629,11,Food Service,Employed,TRUE,820,839,"$25,000-49,999"

Current,0.2489,7,Fireman,Employed,FALSE,640,659,"$75,000-99,999"

Completed,0.1325,,Waiter/Waitress,Full-time,FALSE,640,659,"$1-24,999"

Past Due (1-15 days),0.1435,4,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999"

Current,0.1034,8,Construction,Employed,TRUE,740,759,"$50,000-74,999"

Current,0.0949,8,Computer Programmer,Employed,FALSE,740,759,"$100,000+"

Defaulted,0.3177,5,Other,Other,TRUE,700,719,"$50,000-74,999"

Current,0.2809,4,Professional,Full-time,TRUE,640,659,"$75,000-99,999"

Chargedoff,0.125,,Professional,Not available,TRUE,760,779,Not displayed

Current,0.0974,7,Sales - Commission,Employed,FALSE,740,759,"$75,000-99,999"

Current,0.2225,8,Laborer,Employed,FALSE,680,699,"$25,000-49,999"

Current,0.1314,7,Retail Management,Employed,TRUE,660,679,"$50,000-74,999"

Completed,0.2075,,Professional,Full-time,FALSE,620,639,"$25,000-49,999"


数据分析

跟据不同职业的信用分中位数作为该职业信用得分,分析某职业信用得分 由大到小top5,写入/root/college001/

INSERT OVERWRITE LOCAL DIRECTORY '/root/college001/'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'

select Occupation,percentile(ProsperScore,0.5) as a from loan

group by Occupation order by a desc limit 5;

违约职业前3名,写入/root/college002/

INSERT OVERWRITE LOCAL DIRECTORY '/root/college002/'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'

select Occupation,count(*) as a from loan

where LoanStatus='Defaulted' and Occupation<>'\N'

group by Occupation order by a desc limit 3;

求违约次数最高的职业的支持度,写入/root/college003/ 支持度=违约,职业/总

5454545454.png

select Occupation,count(*) as a from loan

where LoanStatus='Defaulted'

group by Occupation order by a desc limit 1;


INSERT OVERWRITE LOCAL DIRECTORY '/root/college006/'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'

select round(第一个sql结果/count(*),5) from loan ;

数据分析

信用得分对借款的影响

select ProsperScore,avg(BorrowerRate) as t from loan group by ProsperScore;

  1. 职业对借款难易的影响 (拿不准)

select Occupation,avg(BorrowerRate) from loan group by Occupation;

违约客户的就业信息分布情况

select Occupation,count(*) from loan

where LoanStatus='Defaulted' or LoanStatus='Chargedoff'

group by Occupation;

贷款人的收入范围统计

select IncomeRange,count(*) from loan

group by IncomeRange;

不同职业的信用得分分布

select Occupation,ProsperScore,count(ProsperScore) from loan

group by Occupation,ProsperScore;