“丝路通:导入商品分类数据”的版本间的差异
来自CloudWiki
(创建页面,内容为“==导入图片== 将案例项目media文件夹下的brands和goods目录 拷贝至新项目相同位置处。 600px 注意这里位置实…”) |
|||
(未显示2个用户的3个中间版本) | |||
第1行: | 第1行: | ||
− | == | + | == 生成CSV文件== |
− | + | ===敦煌网=== | |
+ | 输入:已有的爬虫数据。 | ||
− | + | <nowiki> | |
+ | import time | ||
− | + | category_file ='dh_category_data.csv' | |
+ | def read_category_file(): | ||
+ | cat_list = "" # 创建类别网址列表 | ||
+ | fp = open('dh_sub_category.csv', "rt") # 打开csv文件 | ||
− | = | + | count= 0 |
− | |||
− | |||
− | + | #类别名 类目级别 父类目级别 | |
− | [[ | + | s =set()#储存已有的类别 |
+ | for line in fp: # 文件对象可以直接迭代 | ||
+ | count +=1 | ||
+ | |||
+ | d = {}; | ||
+ | data = line.split(',') | ||
+ | d['line_num'] = data[0]; | ||
+ | d['class1'] = data[1]; | ||
+ | d['class2'] = data[2]; | ||
+ | d['url'] = data[3] | ||
+ | |||
+ | now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | ||
+ | |||
+ | class1 ="NULL"+","+d['class1']+","+d['class1']+","+ \ | ||
+ | "NULL"+","+str(1)+","+str(0)+","+now_time+","+"NULL\n" | ||
+ | class2 ="NULL"+","+d['class2']+","+d['class2']+","+ \ | ||
+ | "NULL"+","+str(2)+","+str(0)+","+now_time+","+"NULL\n" | ||
− | === | + | if d['class1'] not in s: #只添加之前没有的目录 |
− | + | cat_list += class1 | |
+ | |||
+ | |||
+ | if d['class2'] not in s: | ||
+ | cat_list += class2 | ||
+ | |||
+ | s.add(d['class1']);s.add(d['class2']);#将目录名收录进集合中 | ||
+ | #print(d['class1'],d['class1'] in s) | ||
+ | #print(s) | ||
+ | if count%100 ==0: | ||
+ | fw = open(category_file,"a",encoding="utf-8") | ||
+ | fw.write(cat_list) | ||
+ | fw.close() | ||
+ | cat_list ="" | ||
+ | |||
+ | |||
+ | fp.close() | ||
+ | return cat_list | ||
− | + | if __name__ == '__main__': | |
+ | cat_list =read_category_file() | ||
+ | |||
+ | </nowiki> | ||
− | + | ==将CSV文件上传数据库== | |
− | + | ===敦煌网=== | |
− | + | MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n'; | |
− | |||
+ | <nowiki>Query OK, 415 rows affected, 431 warnings (0.06 sec) | ||
+ | Records: 415 Deleted: 0 Skipped: 0 Warnings: 431</nowiki> | ||
− | + | MariaDB [mxshop]> select * from goods_goodscategory limit 0,10; | |
− | |||
− | |||
− | |||
− | + | <nowiki>+----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | |
− | + | | id | name | code | desc | category_type | is_tab | add_time | parent_category_id | | |
+ | +----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | ||
+ | | 1 | 生鲜食品 | sxsp | | 1 | 0 | 2020-06-24 16:34 :11 | NULL | | ||
+ | | 2 | 精品肉类 | jprl | | 2 | 0 | 2020-06-24 16:34 :11 | 1 | | ||
+ | | 3 | 羊肉 | yr | | 3 | 0 | 2020-06-24 16:34</nowiki> | ||
− | + | ===阿里巴巴=== | |
+ | MariaDB [mxshop]> load data infile '/opt/ali_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n'; | ||
+ | |||
+ | <nowiki>Query OK, 2022 rows affected, 997 warnings (0.04 sec) | ||
+ | Records: 2022 Deleted: 0 Skipped: 0 Warnings: 997</nowiki> | ||
− | from | + | MariaDB [mxshop]> select count(*) from goods_goodscategory; |
− | + | <nowiki>+----------+ | |
− | + | | count(*) | | |
− | + | +----------+ | |
− | + | | 2557 | | |
− | + | +----------+ | |
− | + | 1 row in set (0.00 sec)</nowiki> | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | ===中国制造网=== | |
− | [ | + | MariaDB [mxshop]> load data infile '/opt/china_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n'; |
2020年9月24日 (四) 15:57的最新版本
生成CSV文件
敦煌网
输入:已有的爬虫数据。
import time category_file ='dh_category_data.csv' def read_category_file(): cat_list = "" # 创建类别网址列表 fp = open('dh_sub_category.csv', "rt") # 打开csv文件 count= 0 #类别名 类目级别 父类目级别 s =set()#储存已有的类别 for line in fp: # 文件对象可以直接迭代 count +=1 d = {}; data = line.split(',') d['line_num'] = data[0]; d['class1'] = data[1]; d['class2'] = data[2]; d['url'] = data[3] now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) class1 ="NULL"+","+d['class1']+","+d['class1']+","+ \ "NULL"+","+str(1)+","+str(0)+","+now_time+","+"NULL\n" class2 ="NULL"+","+d['class2']+","+d['class2']+","+ \ "NULL"+","+str(2)+","+str(0)+","+now_time+","+"NULL\n" if d['class1'] not in s: #只添加之前没有的目录 cat_list += class1 if d['class2'] not in s: cat_list += class2 s.add(d['class1']);s.add(d['class2']);#将目录名收录进集合中 #print(d['class1'],d['class1'] in s) #print(s) if count%100 ==0: fw = open(category_file,"a",encoding="utf-8") fw.write(cat_list) fw.close() cat_list ="" fp.close() return cat_list if __name__ == '__main__': cat_list =read_category_file()
将CSV文件上传数据库
敦煌网
MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';
Query OK, 415 rows affected, 431 warnings (0.06 sec) Records: 415 Deleted: 0 Skipped: 0 Warnings: 431
MariaDB [mxshop]> select * from goods_goodscategory limit 0,10;
+----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | id | name | code | desc | category_type | is_tab | add_time | parent_category_id | +----+--------------+------+------+---------------+--------+----------------- ----+--------------------+ | 1 | 生鲜食品 | sxsp | | 1 | 0 | 2020-06-24 16:34 :11 | NULL | | 2 | 精品肉类 | jprl | | 2 | 0 | 2020-06-24 16:34 :11 | 1 | | 3 | 羊肉 | yr | | 3 | 0 | 2020-06-24 16:34
阿里巴巴
MariaDB [mxshop]> load data infile '/opt/ali_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';
Query OK, 2022 rows affected, 997 warnings (0.04 sec) Records: 2022 Deleted: 0 Skipped: 0 Warnings: 997
MariaDB [mxshop]> select count(*) from goods_goodscategory;
+----------+ | count(*) | +----------+ | 2557 | +----------+ 1 row in set (0.00 sec)
中国制造网
MariaDB [mxshop]> load data infile '/opt/china_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';