“数据清洗,抽取”的版本间的差异

来自CloudWiki
跳转至: 导航搜索
建表语句
建表语句
第5行: 第5行:
 
'''create database 库名;'''
 
'''create database 库名;'''
  
建表
+
建表,Prosper借贷平台 共81个字段
  
 
create table test3(
 
create table test3(
 +
 
ListingKey string ,
 
ListingKey string ,
 +
 
ListingNumber int,
 
ListingNumber int,
 +
 
ListingCreationDate TIMESTAMP,
 
ListingCreationDate TIMESTAMP,
 +
 
CreditGrade string,
 
CreditGrade string,
 +
 
Term int,
 
Term int,
 +
 
LoanStatus string,
 
LoanStatus string,
 +
 
ClosedDate TIMESTAMP,
 
ClosedDate TIMESTAMP,
 +
 
BorrowerAPR float,
 
BorrowerAPR float,
 +
 
BorrowerRate float,
 
BorrowerRate float,
 +
 
LenderYield float,
 
LenderYield float,
 +
 
EstimatedEffectiveYield float,
 
EstimatedEffectiveYield float,
 +
 
EstimatedLoss float,
 
EstimatedLoss float,
 +
 
EstimatedReturn float,
 
EstimatedReturn float,
 +
 
ProsperRating(numeric) int,
 
ProsperRating(numeric) int,
 +
 
ProsperRating (Alpha) string,
 
ProsperRating (Alpha) string,
 +
 
ProsperScore int,
 
ProsperScore int,
 +
 
ListingCategory (numeric) int,
 
ListingCategory (numeric) int,
 +
 
BorrowerState string,
 
BorrowerState string,
 +
 
Occupation string,
 
Occupation string,
 +
 
EmploymentStatus string,
 
EmploymentStatus string,
 +
 
EmploymentStatusDuration int,
 
EmploymentStatusDuration int,
 +
 
IsBorrowerHomeowner string,
 
IsBorrowerHomeowner string,
 +
 
CurrentlyInGroup string,
 
CurrentlyInGroup string,
 +
 
GroupKey string,
 
GroupKey string,
 +
 
DateCreditPulled TIMESTAMP,
 
DateCreditPulled TIMESTAMP,
 +
 
CreditScoreRangeLower int,
 
CreditScoreRangeLower int,
 +
 
CreditScoreRangeUpper int,
 
CreditScoreRangeUpper int,
 +
 
FirstRecordedCreditLine TIMESTAMP,
 
FirstRecordedCreditLine TIMESTAMP,
 +
 
CurrentCreditLines int,
 
CurrentCreditLines int,
 +
 
OpenCreditLines int,
 
OpenCreditLines int,
 +
 
TotalCreditLinespast7years int,
 
TotalCreditLinespast7years int,
 +
 
OpenRevolvingAccounts int,
 
OpenRevolvingAccounts int,
 +
 
OpenRevolvingMonthlyPayment int,
 
OpenRevolvingMonthlyPayment int,
 +
 
InquiriesLast6Months int,
 
InquiriesLast6Months int,
 +
 
TotalInquiries int,
 
TotalInquiries int,
 +
 
CurrentDelinquencies int,
 
CurrentDelinquencies int,
 +
 
AmountDelinquent int,
 
AmountDelinquent int,
 +
 
DelinquenciesLast7Years int,
 
DelinquenciesLast7Years int,
 +
 
PublicRecordsLast10Years int,
 
PublicRecordsLast10Years int,
 +
 
PublicRecordsLast12Months int,
 
PublicRecordsLast12Months int,
 +
 
RevolvingCreditBalance int,
 
RevolvingCreditBalance int,
 +
 
BankcardUtilization float,
 
BankcardUtilization float,
 +
 
AvailableBankcardCredit int,
 
AvailableBankcardCredit int,
 +
 
TotalTrades int,
 
TotalTrades int,
 +
 
TradesNeverDelinquent (percentage) float,
 
TradesNeverDelinquent (percentage) float,
 +
 
TradesOpenedLast6Months int,
 
TradesOpenedLast6Months int,
 +
 
DebtToIncomeRatio float,
 
DebtToIncomeRatio float,
 +
 
IncomeRange string,
 
IncomeRange string,
 +
 
IncomeVerifiable string,
 
IncomeVerifiable string,
 +
 
StatedMonthlyIncome double,
 
StatedMonthlyIncome double,
 +
 
LoanKey string,
 
LoanKey string,
 +
 
TotalProsperLoans int,
 
TotalProsperLoans int,
 +
 
TotalProsperPaymentsBilled int,
 
TotalProsperPaymentsBilled int,
 +
 
OnTimeProsperPayments int,
 
OnTimeProsperPayments int,
 +
 
ProsperPaymentsLessThanOneMonthLate int,
 
ProsperPaymentsLessThanOneMonthLate int,
 +
 
ProsperPaymentsOneMonthPlusLate int,
 
ProsperPaymentsOneMonthPlusLate int,
 +
 
ProsperPrincipalBorrowed int,
 
ProsperPrincipalBorrowed int,
 +
 
ProsperPrincipalOutstanding double,
 
ProsperPrincipalOutstanding double,
 +
 
ScorexChangeAtTimeOfListing int,
 
ScorexChangeAtTimeOfListing int,
 +
 
LoanCurrentDaysDelinquent int,
 
LoanCurrentDaysDelinquent int,
 +
 
LoanFirstDefaultedCycleNumber int,
 
LoanFirstDefaultedCycleNumber int,
 +
 
LoanMonthsSinceOrigination int,
 
LoanMonthsSinceOrigination int,
 +
 
LoanNumber int,
 
LoanNumber int,
 +
 
LoanOriginalAmount int,
 
LoanOriginalAmount int,
 +
 
LoanOriginationDate TIMESTAMP,
 
LoanOriginationDate TIMESTAMP,
 +
 
LoanOriginationQuarter string,
 
LoanOriginationQuarter string,
 +
 
MemberKey string,
 
MemberKey string,
 +
 
MonthlyLoanPayment float,
 
MonthlyLoanPayment float,
 +
 
LP_CustomerPayments double,
 
LP_CustomerPayments double,
 +
 
LP_CustomerPrincipalPayments double,
 
LP_CustomerPrincipalPayments double,
 +
 
LP_InterestandFees double,
 
LP_InterestandFees double,
 
LP_ServiceFees double,
 
LP_ServiceFees double,
 +
 
LP_CollectionFees double,
 
LP_CollectionFees double,
 +
 
LP_GrossPrincipalLoss double,
 
LP_GrossPrincipalLoss double,
 +
 
LP_NetPrincipalLoss double,
 
LP_NetPrincipalLoss double,
 +
 
LP_NonPrincipalRecoverypayments double,
 
LP_NonPrincipalRecoverypayments double,
 +
 
PercentFunded double,
 
PercentFunded double,
 +
 
Recommendations int,
 
Recommendations int,
 +
 
InvestmentFromFriendsCount int,
 
InvestmentFromFriendsCount int,
 +
 
InvestmentFromFriendsAmount double,
 
InvestmentFromFriendsAmount double,
 +
 
Investors int
 
Investors int
 +
 
)row format delimited
 
)row format delimited
 +
 
fields terminated by ',';
 
fields terminated by ',';

2020年11月12日 (四) 13:54的版本

建表语句

建库

create database 库名;

建表,Prosper借贷平台 共81个字段

create table test3(

ListingKey string ,

ListingNumber int,

ListingCreationDate TIMESTAMP,

CreditGrade string,

Term int,

LoanStatus string,

ClosedDate TIMESTAMP,

BorrowerAPR float,

BorrowerRate float,

LenderYield float,

EstimatedEffectiveYield float,

EstimatedLoss float,

EstimatedReturn float,

ProsperRating(numeric) int,

ProsperRating (Alpha) string,

ProsperScore int,

ListingCategory (numeric) int,

BorrowerState string,

Occupation string,

EmploymentStatus string,

EmploymentStatusDuration int,

IsBorrowerHomeowner string,

CurrentlyInGroup string,

GroupKey string,

DateCreditPulled TIMESTAMP,

CreditScoreRangeLower int,

CreditScoreRangeUpper int,

FirstRecordedCreditLine TIMESTAMP,

CurrentCreditLines int,

OpenCreditLines int,

TotalCreditLinespast7years int,

OpenRevolvingAccounts int,

OpenRevolvingMonthlyPayment int,

InquiriesLast6Months int,

TotalInquiries int,

CurrentDelinquencies int,

AmountDelinquent int,

DelinquenciesLast7Years int,

PublicRecordsLast10Years int,

PublicRecordsLast12Months int,

RevolvingCreditBalance int,

BankcardUtilization float,

AvailableBankcardCredit int,

TotalTrades int,

TradesNeverDelinquent (percentage) float,

TradesOpenedLast6Months int,

DebtToIncomeRatio float,

IncomeRange string,

IncomeVerifiable string,

StatedMonthlyIncome double,

LoanKey string,

TotalProsperLoans int,

TotalProsperPaymentsBilled int,

OnTimeProsperPayments int,

ProsperPaymentsLessThanOneMonthLate int,

ProsperPaymentsOneMonthPlusLate int,

ProsperPrincipalBorrowed int,

ProsperPrincipalOutstanding double,

ScorexChangeAtTimeOfListing int,

LoanCurrentDaysDelinquent int,

LoanFirstDefaultedCycleNumber int,

LoanMonthsSinceOrigination int,

LoanNumber int,

LoanOriginalAmount int,

LoanOriginationDate TIMESTAMP,

LoanOriginationQuarter string,

MemberKey string,

MonthlyLoanPayment float,

LP_CustomerPayments double,

LP_CustomerPrincipalPayments double,

LP_InterestandFees double, LP_ServiceFees double,

LP_CollectionFees double,

LP_GrossPrincipalLoss double,

LP_NetPrincipalLoss double,

LP_NonPrincipalRecoverypayments double,

PercentFunded double,

Recommendations int,

InvestmentFromFriendsCount int,

InvestmentFromFriendsAmount double,

Investors int

)row format delimited

fields terminated by ',';