|
|
第1行: |
第1行: |
− |
| |
− | == 建表语句 ==
| |
− | 建库
| |
− |
| |
− | '''create database 库名;'''
| |
− |
| |
− | '''建表,Prosper借贷平台 共81个字段'''
| |
− |
| |
− | create table Prosper(
| |
− |
| |
− | ListingKey string ,
| |
− |
| |
− | ListingNumber int,
| |
− |
| |
− | ListingCreationDate TIMESTAMP,
| |
− |
| |
− | CreditGrade string,
| |
− |
| |
− | Term int,
| |
− |
| |
− | LoanStatus string,
| |
− |
| |
− | ClosedDate TIMESTAMP,
| |
− |
| |
− | BorrowerAPR float,
| |
− |
| |
− | BorrowerRate float,
| |
− |
| |
− | LenderYield float,
| |
− |
| |
− | EstimatedEffectiveYield float,
| |
− |
| |
− | EstimatedLoss float,
| |
− |
| |
− | EstimatedReturn float,
| |
− |
| |
− | ProsperRating(numeric) int,
| |
− |
| |
− | ProsperRating (Alpha) string,
| |
− |
| |
− | ProsperScore int,
| |
− |
| |
− | ListingCategory (numeric) int,
| |
− |
| |
− | BorrowerState string,
| |
− |
| |
− | Occupation string,
| |
− |
| |
− | EmploymentStatus string,
| |
− |
| |
− | EmploymentStatusDuration int,
| |
− |
| |
− | IsBorrowerHomeowner string,
| |
− |
| |
− | CurrentlyInGroup string,
| |
− |
| |
− | GroupKey string,
| |
− |
| |
− | DateCreditPulled TIMESTAMP,
| |
− |
| |
− | CreditScoreRangeLower int,
| |
− |
| |
− | CreditScoreRangeUpper int,
| |
− |
| |
− | FirstRecordedCreditLine TIMESTAMP,
| |
− |
| |
− | CurrentCreditLines int,
| |
− |
| |
− | OpenCreditLines int,
| |
− |
| |
− | TotalCreditLinespast7years int,
| |
− |
| |
− | OpenRevolvingAccounts int,
| |
− |
| |
− | OpenRevolvingMonthlyPayment int,
| |
− |
| |
− | InquiriesLast6Months int,
| |
− |
| |
− | TotalInquiries int,
| |
− |
| |
− | CurrentDelinquencies int,
| |
− |
| |
− | AmountDelinquent int,
| |
− |
| |
− | DelinquenciesLast7Years int,
| |
− |
| |
− | PublicRecordsLast10Years int,
| |
− |
| |
− | PublicRecordsLast12Months int,
| |
− |
| |
− | RevolvingCreditBalance int,
| |
− |
| |
− | BankcardUtilization float,
| |
− |
| |
− | AvailableBankcardCredit int,
| |
− |
| |
− | TotalTrades int,
| |
− |
| |
− | TradesNeverDelinquent (percentage) float,
| |
− |
| |
− | TradesOpenedLast6Months int,
| |
− |
| |
− | DebtToIncomeRatio float,
| |
− |
| |
− | IncomeRange string,
| |
− |
| |
− | IncomeVerifiable string,
| |
− |
| |
− | StatedMonthlyIncome double,
| |
− |
| |
− | LoanKey string,
| |
− |
| |
− | TotalProsperLoans int,
| |
− |
| |
− | TotalProsperPaymentsBilled int,
| |
− |
| |
− | OnTimeProsperPayments int,
| |
− |
| |
− | ProsperPaymentsLessThanOneMonthLate int,
| |
− |
| |
− | ProsperPaymentsOneMonthPlusLate int,
| |
− |
| |
− | ProsperPrincipalBorrowed int,
| |
− |
| |
− | ProsperPrincipalOutstanding double,
| |
− |
| |
− | ScorexChangeAtTimeOfListing int,
| |
− |
| |
− | LoanCurrentDaysDelinquent int,
| |
− |
| |
− | LoanFirstDefaultedCycleNumber int,
| |
− |
| |
− | LoanMonthsSinceOrigination int,
| |
− |
| |
− | LoanNumber int,
| |
− |
| |
− | LoanOriginalAmount int,
| |
− |
| |
− | LoanOriginationDate TIMESTAMP,
| |
− |
| |
− | LoanOriginationQuarter string,
| |
− |
| |
− | MemberKey string,
| |
− |
| |
− | MonthlyLoanPayment float,
| |
− |
| |
− | LP_CustomerPayments double,
| |
− |
| |
− | LP_CustomerPrincipalPayments double,
| |
− |
| |
− | LP_InterestandFees double,
| |
− | LP_ServiceFees double,
| |
− |
| |
− | LP_CollectionFees double,
| |
− |
| |
− | LP_GrossPrincipalLoss double,
| |
− |
| |
− | LP_NetPrincipalLoss double,
| |
− |
| |
− | LP_NonPrincipalRecoverypayments double,
| |
− |
| |
− | PercentFunded double,
| |
− |
| |
− | Recommendations int,
| |
− |
| |
− | InvestmentFromFriendsCount int,
| |
− |
| |
− | InvestmentFromFriendsAmount double,
| |
− |
| |
− | Investors int
| |
− |
| |
− | )row format delimited
| |
− |
| |
− | fields terminated by ',';
| |
− |
| |
− | '''提取有用数据建表'''
| |
− |
| |
− | create table test(
| |
− |
| |
− | LoanStatus string,
| |
− |
| |
− | BorrowerRate float,
| |
− |
| |
− | ProsperScore int,
| |
− |
| |
− | Occupation string,
| |
− |
| |
− | EmploymentStatus string,
| |
− |
| |
− | IsBorrowerHomeowner string,
| |
− |
| |
− | CreditScoreRangeLower int,
| |
− |
| |
− | CreditScoreRangeUpper int,
| |
− |
| |
− | IncomeRange string
| |
− |
| |
− | )row format delimited
| |
− |
| |
− | fields terminated by ',';
| |
| | | |
| ==数据清洗== | | ==数据清洗== |
2020年11月17日 (二) 01:10的版本
数据清洗
导入另一个表
insert into table test select LoanStatus,BorrowerRate,ProsperScore,Occupation,EmploymentStatus,IsBorrowerHomeowner,CreditScoreRangeLower,CreditScoreRangeUpper,IncomeRange form perop;
导入后的数据示例
Completed,0.158,,Other,Self-employed,TRUE,640,659,"$25,000-49,999"
Current,0.092,7,Professional,Employed,FALSE,680,699,"$50,000-74,999"
Completed,0.275,,Other,Not available,FALSE,480,499,Not displayed
Current,0.0974,9,Skilled Labor,Employed,TRUE,800,819,"$25,000-49,999"
Current,0.2085,4,Executive,Employed,TRUE,680,699,"$100,000+"
Current,0.1314,10,Professional,Employed,TRUE,740,759,"$100,000+"
Current,0.2712,2,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999"
Current,0.2019,4,Laborer,Employed,FALSE,700,719,"$25,000-49,999"
Current,0.0629,9,Food Service,Employed,TRUE,820,839,"$25,000-49,999"
Current,0.0629,11,Food Service,Employed,TRUE,820,839,"$25,000-49,999"
Current,0.2489,7,Fireman,Employed,FALSE,640,659,"$75,000-99,999"
Completed,0.1325,,Waiter/Waitress,Full-time,FALSE,640,659,"$1-24,999"
Past Due (1-15 days),0.1435,4,Sales - Retail,Employed,FALSE,680,699,"$25,000-49,999"
Current,0.1034,8,Construction,Employed,TRUE,740,759,"$50,000-74,999"
Current,0.0949,8,Computer Programmer,Employed,FALSE,740,759,"$100,000+"
Defaulted,0.3177,5,Other,Other,TRUE,700,719,"$50,000-74,999"
Current,0.2809,4,Professional,Full-time,TRUE,640,659,"$75,000-99,999"
Chargedoff,0.125,,Professional,Not available,TRUE,760,779,Not displayed
Current,0.0974,7,Sales - Commission,Employed,FALSE,740,759,"$75,000-99,999"
Current,0.2225,8,Laborer,Employed,FALSE,680,699,"$25,000-49,999"
Current,0.1314,7,Retail Management,Employed,TRUE,660,679,"$50,000-74,999"
Completed,0.2075,,Professional,Full-time,FALSE,620,639,"$25,000-49,999"