“丝路通:导入商品分类数据”的版本间的差异
来自CloudWiki
第1行: | 第1行: | ||
== 生成CSV文件== | == 生成CSV文件== | ||
+ | 输入:已有的爬虫数据。 | ||
+ | |||
<nowiki> | <nowiki> | ||
import time | import time | ||
第56行: | 第58行: | ||
==将CSV文件上传数据库== | ==将CSV文件上传数据库== | ||
+ | MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n'; | ||
+ | |||
+ | <nowiki>Query OK, 415 rows affected, 431 warnings (0.06 sec) | ||
+ | Records: 415 Deleted: 0 Skipped: 0 Warnings: 431</nowiki> | ||
+ | |||
+ | MariaDB [mxshop]> select * from goods_goodscategory limit 0,10; | ||
+ | |||
+ | <nowiki>+----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | ||
+ | | id | name | code | desc | category_type | is_tab | add_time | parent_category_id | | ||
+ | +----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | ||
+ | | 1 | 生鲜食品 | sxsp | | 1 | 0 | 2020-06-24 16:34 :11 | NULL | | ||
+ | | 2 | 精品肉类 | jprl | | 2 | 0 | 2020-06-24 16:34 :11 | 1 | | ||
+ | | 3 | 羊肉 | yr | | 3 | 0 | 2020-06-24 16:34</nowiki> |
2020年9月24日 (四) 08:07的版本
生成CSV文件
输入:已有的爬虫数据。
import time category_file ='dh_category_data.csv' def read_category_file(): cat_list = "" # 创建类别网址列表 fp = open('dh_sub_category.csv', "rt") # 打开csv文件 count= 0 #类别名 类目级别 父类目级别 s =set()#储存已有的类别 for line in fp: # 文件对象可以直接迭代 count +=1 d = {}; data = line.split(',') d['line_num'] = data[0]; d['class1'] = data[1]; d['class2'] = data[2]; d['url'] = data[3] now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) class1 ="NULL"+","+d['class1']+","+d['class1']+","+ \ "NULL"+","+str(1)+","+str(0)+","+now_time+","+"NULL\n" class2 ="NULL"+","+d['class2']+","+d['class2']+","+ \ "NULL"+","+str(2)+","+str(0)+","+now_time+","+"NULL\n" if d['class1'] not in s: #只添加之前没有的目录 cat_list += class1 if d['class2'] not in s: cat_list += class2 s.add(d['class1']);s.add(d['class2']);#将目录名收录进集合中 #print(d['class1'],d['class1'] in s) #print(s) if count%100 ==0: fw = open(category_file,"a",encoding="utf-8") fw.write(cat_list) fw.close() cat_list ="" fp.close() return cat_list if __name__ == '__main__': cat_list =read_category_file()
将CSV文件上传数据库
MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';
Query OK, 415 rows affected, 431 warnings (0.06 sec) Records: 415 Deleted: 0 Skipped: 0 Warnings: 431
MariaDB [mxshop]> select * from goods_goodscategory limit 0,10;
+----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | id | name | code | desc | category_type | is_tab | add_time | parent_category_id | +----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | 1 | 生鲜食品 | sxsp | | 1 | 0 | 2020-06-24 16:34 :11 | NULL | | 2 | 精品肉类 | jprl | | 2 | 0 | 2020-06-24 16:34 :11 | 1 | | 3 | 羊肉 | yr | | 3 | 0 | 2020-06-24 16:34