TensorFlow - 线性回归
来自CloudWiki
前期准备
TensorFlow 相关 API 可以到在实验 TensorFlow - 相关 API 中学习。
模型构建
示例代码:
现在您可以在 /home/ubuntu 目录下创建源文件 linear_regression_model.py,内容可参考:
示例代码:/home/ubuntu/linear_regression_model.py
#!/usr/bin/python # -*- coding: utf-8 -* import tensorflow as tf import numpy as np class linearRegressionModel: #对类中的变量进行初始化 def __init__(self,x_dimen): self.x_dimen = x_dimen self._index_in_epoch = 0 self.constructModel() self.sess = tf.Session()#创建一个会话 self.sess.run(tf.global_variables_initializer()) #权重初始化 def weight_variable(self,shape): initial = tf.truncated_normal(shape,stddev = 0.1) return tf.Variable(initial) #偏置项初始化 def bias_variable(self,shape): initial = tf.constant(0.1,shape = shape) return tf.Variable(initial) #每次选取100个样本,如果选完,重新打乱 def next_batch(self,batch_size): start = self._index_in_epoch self._index_in_epoch += batch_size if self._index_in_epoch > self._num_datas: perm = np.arange(self._num_datas) np.random.shuffle(perm) self._datas = self._datas[perm] self._labels = self._labels[perm] start = 0 self._index_in_epoch = batch_size assert batch_size <= self._num_datas end = self._index_in_epoch return self._datas[start:end],self._labels[start:end] def constructModel(self):#构建模型 self.x = tf.placeholder(tf.float32, [None,self.x_dimen]) #横坐标值,用占位符x表示 self.y = tf.placeholder(tf.float32,[None,1])#纵坐标值,用占位符y表示 y= w * x+ b self.w = self.weight_variable([self.x_dimen,1])#权重值w self.b = self.bias_variable([1])#偏移值 b self.y_prec = tf.nn.bias_add(tf.matmul(self.x, self.w), self.b) #y= w * x+ b mse = tf.reduce_mean(tf.squared_difference(self.y_prec, self.y))#求真实值和预测值的差平方 l2 = tf.reduce_mean(tf.square(self.w))#求权重的平均值 self.loss = mse + 0.15*l2 #损失函数 self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss) # 创建一个优化器,学习率为0.1,对数据进行分步训练 def train(self,x_train,y_train,x_test,y_test): self._datas = x_train self._labels = y_train self._num_datas = x_train.shape[0] for i in range(5000): batch = self.next_batch(100)#每次选取100个样本 self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]}) if i%10 == 0: train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]})#开始训练,每隔10步,打印一下现在的损失函数 print('step %d,test_loss %f' % (i,train_loss)) def predict_batch(self,arr,batch_size): for i in range(0,len(arr),batch_size): yield arr[i:i + batch_size]#一次性取batch_size个数据 def predict(self, x_predict): pred_list = [] for x_test_batch in self.predict_batch(x_predict,100): pred = self.sess.run(self.y_prec, {self.x:x_test_batch})#??? pred_list.append(pred)#??? return np.vstack(pred_list)#???
训练模型并和 sklearn 库线性回归模型对比
示例代码:
现在您可以在 /home/ubuntu 目录下创建源文件 run.py,内容可参考: 示例代码:/home/ubuntu/run.py
#!/usr/bin/python # -*- coding: utf-8 -* from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from sklearn.datasets import make_regression from sklearn.linear_model import LinearRegression from linear_regression_model import linearRegressionModel as lrm if __name__ == '__main__': x, y = make_regression(7000) # X为样本特征,y为样本输出, 共7000个样本,每个样本1个特征 x_train,x_test,y_train, y_test = train_test_split(x, y, test_size=0.5) #将样本分为测试数据和训练数据 y_lrm_train = y_train.reshape(-1, 1)#调整矩阵的维数为1列矩阵 y_lrm_test = y_test.reshape(-1, 1)#调整矩阵的维数为1列矩阵 linear = lrm(x.shape[1]) linear.train(x_train, y_lrm_train,x_test,y_lrm_test)#调用刚刚编写的tensorFlow的线性回归模型进行训练 y_predict = linear.predict(x_test)#对测试数据进行测试,求出预测值 print("Tensorflow R2: ", r2_score(y_predict.ravel(), y_lrm_test.ravel()))#将预测值与真实值做一个比较,求出正确率 lr = LinearRegression() y_predict = lr.fit(x_train, y_train).predict(x_test)#用python自带的sklearn库进行训练 print("Sklearn R2: ", r2_score(y_predict, y_test)) #采用r2_score评分函数
然后执行:
cd /home/ubuntu; python run.py
执行结果:
step 2410,test_loss 26.531937 step 2420,test_loss 26.542793 step 2430,test_loss 26.533974 step 2440,test_loss 26.530540 step 2450,test_loss 26.551474 step 2460,test_loss 26.541542 step 2470,test_loss 26.560783 step 2480,test_loss 26.538080 step 2490,test_loss 26.535666 ('Tensorflow R2: ', 0.99999612588302389) ('Sklearn R2: ', 1.0)
完成
任务时间:时间未知
恭喜,您已完成本实验内容