“TensorFlow - 线性回归”的版本间的差异
来自CloudWiki
(创建页面,内容为“ =前期准备== TensorFlow 相关 API 可以到在实验 TensorFlow - 相关 API 中学习。 ==模型构建== 示例代码: 现在您可以在 /home/ubuntu…”) |
|||
第1行: | 第1行: | ||
− | =前期准备== | + | ==前期准备== |
TensorFlow 相关 API 可以到在实验 TensorFlow - 相关 API 中学习。 | TensorFlow 相关 API 可以到在实验 TensorFlow - 相关 API 中学习。 | ||
第19行: | 第19行: | ||
class linearRegressionModel: | class linearRegressionModel: | ||
− | + | ||
+ | #对类中的变量进行初始化 | ||
def __init__(self,x_dimen): | def __init__(self,x_dimen): | ||
self.x_dimen = x_dimen | self.x_dimen = x_dimen | ||
self._index_in_epoch = 0 | self._index_in_epoch = 0 | ||
self.constructModel() | self.constructModel() | ||
− | self.sess = tf.Session() | + | self.sess = tf.Session()#创建一个会话 |
self.sess.run(tf.global_variables_initializer()) | self.sess.run(tf.global_variables_initializer()) | ||
第52行: | 第53行: | ||
return self._datas[start:end],self._labels[start:end] | return self._datas[start:end],self._labels[start:end] | ||
− | def constructModel(self): | + | def constructModel(self):#构建模型 |
− | self.x = tf.placeholder(tf.float32, [None,self.x_dimen]) | + | self.x = tf.placeholder(tf.float32, [None,self.x_dimen]) #横坐标值,用占位符x表示 |
− | self.y = tf.placeholder(tf.float32,[None,1]) | + | self.y = tf.placeholder(tf.float32,[None,1])#纵坐标值,用占位符y表示 y= w * x+ b |
− | self.w = self.weight_variable([self.x_dimen,1]) | + | self.w = self.weight_variable([self.x_dimen,1])#权重值w |
− | self.b = self.bias_variable([1]) | + | self.b = self.bias_variable([1])#偏移值 b |
− | self.y_prec = tf.nn.bias_add(tf.matmul(self.x, self.w), self.b) | + | self.y_prec = tf.nn.bias_add(tf.matmul(self.x, self.w), self.b) #y= w * x+ b |
− | mse = tf.reduce_mean(tf.squared_difference(self.y_prec, self.y)) | + | mse = tf.reduce_mean(tf.squared_difference(self.y_prec, self.y))#求真实值和预测值的差平方 |
− | l2 = tf.reduce_mean(tf.square(self.w)) | + | l2 = tf.reduce_mean(tf.square(self.w))#求权重的平均值 |
− | self.loss = mse + 0.15*l2 | + | self.loss = mse + 0.15*l2 #损失函数 |
− | self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss) | + | self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss) # 创建一个优化器,学习率为0.1,对数据进行分步训练 |
def train(self,x_train,y_train,x_test,y_test): | def train(self,x_train,y_train,x_test,y_test): | ||
第69行: | 第70行: | ||
self._num_datas = x_train.shape[0] | self._num_datas = x_train.shape[0] | ||
for i in range(5000): | for i in range(5000): | ||
− | batch = self.next_batch(100) | + | batch = self.next_batch(100)#每次选取100个样本 |
self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]}) | self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]}) | ||
if i%10 == 0: | if i%10 == 0: | ||
− | train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]}) | + | train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]})#开始训练,每隔10步,打印一下现在的损失函数 |
print('step %d,test_loss %f' % (i,train_loss)) | print('step %d,test_loss %f' % (i,train_loss)) | ||
def predict_batch(self,arr,batch_size): | def predict_batch(self,arr,batch_size): | ||
for i in range(0,len(arr),batch_size): | for i in range(0,len(arr),batch_size): | ||
− | yield arr[i:i + batch_size] | + | yield arr[i:i + batch_size]#一次性取batch_size个数据 |
def predict(self, x_predict): | def predict(self, x_predict): | ||
pred_list = [] | pred_list = [] | ||
for x_test_batch in self.predict_batch(x_predict,100): | for x_test_batch in self.predict_batch(x_predict,100): | ||
− | pred = self.sess.run(self.y_prec, {self.x:x_test_batch}) | + | pred = self.sess.run(self.y_prec, {self.x:x_test_batch})#??? |
− | pred_list.append(pred) | + | pred_list.append(pred)#??? |
− | return np.vstack(pred_list) | + | return np.vstack(pred_list)#??? |
</nowiki> | </nowiki> | ||
第105行: | 第106行: | ||
if __name__ == '__main__': | if __name__ == '__main__': | ||
− | x, y = make_regression(7000) | + | x, y = make_regression(7000) # X为样本特征,y为样本输出, 共7000个样本,每个样本1个特征 |
− | x_train,x_test,y_train, y_test = train_test_split(x, y, test_size=0.5) | + | x_train,x_test,y_train, y_test = train_test_split(x, y, test_size=0.5) #将样本分为测试数据和训练数据 |
− | y_lrm_train = y_train.reshape(-1, 1) | + | y_lrm_train = y_train.reshape(-1, 1)#调整矩阵的维数为1列矩阵 |
− | y_lrm_test = y_test.reshape(-1, 1) | + | y_lrm_test = y_test.reshape(-1, 1)#调整矩阵的维数为1列矩阵 |
linear = lrm(x.shape[1]) | linear = lrm(x.shape[1]) | ||
− | linear.train(x_train, y_lrm_train,x_test,y_lrm_test) | + | linear.train(x_train, y_lrm_train,x_test,y_lrm_test)#调用刚刚编写的tensorFlow的线性回归模型进行训练 |
− | y_predict = linear.predict(x_test) | + | y_predict = linear.predict(x_test)#求出预测值 |
− | print("Tensorflow R2: ", r2_score(y_predict.ravel(), y_lrm_test.ravel())) | + | print("Tensorflow R2: ", r2_score(y_predict.ravel(), y_lrm_test.ravel()))#打印预测比率 |
lr = LinearRegression() | lr = LinearRegression() | ||
− | y_predict = lr.fit(x_train, y_train).predict(x_test) | + | y_predict = lr.fit(x_train, y_train).predict(x_test)#用python自带的sklearn库进行训练 |
print("Sklearn R2: ", r2_score(y_predict, y_test)) #采用r2_score评分函数</nowiki> | print("Sklearn R2: ", r2_score(y_predict, y_test)) #采用r2_score评分函数</nowiki> | ||
2018年8月6日 (一) 08:17的版本
前期准备
TensorFlow 相关 API 可以到在实验 TensorFlow - 相关 API 中学习。
模型构建
示例代码:
现在您可以在 /home/ubuntu 目录下创建源文件 linear_regression_model.py,内容可参考:
示例代码:/home/ubuntu/linear_regression_model.py
#!/usr/bin/python # -*- coding: utf-8 -* import tensorflow as tf import numpy as np class linearRegressionModel: #对类中的变量进行初始化 def __init__(self,x_dimen): self.x_dimen = x_dimen self._index_in_epoch = 0 self.constructModel() self.sess = tf.Session()#创建一个会话 self.sess.run(tf.global_variables_initializer()) #权重初始化 def weight_variable(self,shape): initial = tf.truncated_normal(shape,stddev = 0.1) return tf.Variable(initial) #偏置项初始化 def bias_variable(self,shape): initial = tf.constant(0.1,shape = shape) return tf.Variable(initial) #每次选取100个样本,如果选完,重新打乱 def next_batch(self,batch_size): start = self._index_in_epoch self._index_in_epoch += batch_size if self._index_in_epoch > self._num_datas: perm = np.arange(self._num_datas) np.random.shuffle(perm) self._datas = self._datas[perm] self._labels = self._labels[perm] start = 0 self._index_in_epoch = batch_size assert batch_size <= self._num_datas end = self._index_in_epoch return self._datas[start:end],self._labels[start:end] def constructModel(self):#构建模型 self.x = tf.placeholder(tf.float32, [None,self.x_dimen]) #横坐标值,用占位符x表示 self.y = tf.placeholder(tf.float32,[None,1])#纵坐标值,用占位符y表示 y= w * x+ b self.w = self.weight_variable([self.x_dimen,1])#权重值w self.b = self.bias_variable([1])#偏移值 b self.y_prec = tf.nn.bias_add(tf.matmul(self.x, self.w), self.b) #y= w * x+ b mse = tf.reduce_mean(tf.squared_difference(self.y_prec, self.y))#求真实值和预测值的差平方 l2 = tf.reduce_mean(tf.square(self.w))#求权重的平均值 self.loss = mse + 0.15*l2 #损失函数 self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss) # 创建一个优化器,学习率为0.1,对数据进行分步训练 def train(self,x_train,y_train,x_test,y_test): self._datas = x_train self._labels = y_train self._num_datas = x_train.shape[0] for i in range(5000): batch = self.next_batch(100)#每次选取100个样本 self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]}) if i%10 == 0: train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]})#开始训练,每隔10步,打印一下现在的损失函数 print('step %d,test_loss %f' % (i,train_loss)) def predict_batch(self,arr,batch_size): for i in range(0,len(arr),batch_size): yield arr[i:i + batch_size]#一次性取batch_size个数据 def predict(self, x_predict): pred_list = [] for x_test_batch in self.predict_batch(x_predict,100): pred = self.sess.run(self.y_prec, {self.x:x_test_batch})#??? pred_list.append(pred)#??? return np.vstack(pred_list)#???
训练模型并和 sklearn 库线性回归模型对比
示例代码:
现在您可以在 /home/ubuntu 目录下创建源文件 run.py,内容可参考: 示例代码:/home/ubuntu/run.py
#!/usr/bin/python # -*- coding: utf-8 -* from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from sklearn.datasets import make_regression from sklearn.linear_model import LinearRegression from linear_regression_model import linearRegressionModel as lrm if __name__ == '__main__': x, y = make_regression(7000) # X为样本特征,y为样本输出, 共7000个样本,每个样本1个特征 x_train,x_test,y_train, y_test = train_test_split(x, y, test_size=0.5) #将样本分为测试数据和训练数据 y_lrm_train = y_train.reshape(-1, 1)#调整矩阵的维数为1列矩阵 y_lrm_test = y_test.reshape(-1, 1)#调整矩阵的维数为1列矩阵 linear = lrm(x.shape[1]) linear.train(x_train, y_lrm_train,x_test,y_lrm_test)#调用刚刚编写的tensorFlow的线性回归模型进行训练 y_predict = linear.predict(x_test)#求出预测值 print("Tensorflow R2: ", r2_score(y_predict.ravel(), y_lrm_test.ravel()))#打印预测比率 lr = LinearRegression() y_predict = lr.fit(x_train, y_train).predict(x_test)#用python自带的sklearn库进行训练 print("Sklearn R2: ", r2_score(y_predict, y_test)) #采用r2_score评分函数
然后执行:
cd /home/ubuntu; python run.py
执行结果:
step 2410,test_loss 26.531937 step 2420,test_loss 26.542793 step 2430,test_loss 26.533974 step 2440,test_loss 26.530540 step 2450,test_loss 26.551474 step 2460,test_loss 26.541542 step 2470,test_loss 26.560783 step 2480,test_loss 26.538080 step 2490,test_loss 26.535666 ('Tensorflow R2: ', 0.99999612588302389) ('Sklearn R2: ', 1.0)
完成
任务时间:时间未知
恭喜,您已完成本实验内容