“丝路通:导入商品分类数据”的版本间的差异

来自CloudWiki
跳转至: 导航搜索
(创建页面,内容为“==导入图片== 将案例项目media文件夹下的brands和goods目录 拷贝至新项目相同位置处。 600px 注意这里位置实…”)
 
 
(未显示2个用户的3个中间版本)
第1行: 第1行:
==导入图片==
+
== 生成CSV文件==
将案例项目media文件夹下的brands和goods目录 拷贝至新项目相同位置处。
+
===敦煌网===
 +
输入:已有的爬虫数据。
  
[[文件:bd20-3-21.png|600px]]
+
<nowiki>
 +
import time
  
注意这里位置实际是和goods/models.py中的GoodsCategoryBrand表、Goods表中的图片上传字段相一致的,如果一个改了,另外一处也要改。
+
category_file ='dh_category_data.csv'
 +
def read_category_file():
 +
    cat_list = ""  # 创建类别网址列表
 +
    fp = open('dh_sub_category.csv', "rt")  # 打开csv文件
  
==导入数据==
+
    count= 0
===拷贝数据===
 
在db_tools目录下新建目录data ,
 
  
并将原项目的分类和产品数据category_data.py ,product_data.py 导入其中 。
+
    #类别名  类目级别  父类目级别
  
[[文件:bd20-3-22.png|600px]]
+
    s =set()#储存已有的类别
 +
    for line in fp:  # 文件对象可以直接迭代
 +
        count +=1
 +
       
 +
        d = {};
 +
        data = line.split(',')
 +
        d['line_num'] = data[0];
 +
        d['class1'] = data[1];
 +
        d['class2'] = data[2];
 +
        d['url'] = data[3]
 +
       
 +
        now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 +
       
 +
        class1 ="NULL"+","+d['class1']+","+d['class1']+","+ \
 +
              "NULL"+","+str(1)+","+str(0)+","+now_time+","+"NULL\n"
 +
        class2 ="NULL"+","+d['class2']+","+d['class2']+","+ \
 +
              "NULL"+","+str(2)+","+str(0)+","+now_time+","+"NULL\n"
  
===导入数据===
+
        if d['class1'] not in s: #只添加之前没有的目录
在db_tooles目录下 新建文件 import_category_data.py
+
            cat_list += class1
 +
           
 +
           
 +
        if d['class2'] not in s:
 +
            cat_list += class2
 +
       
 +
        s.add(d['class1']);s.add(d['class2']);#将目录名收录进集合中
 +
        #print(d['class1'],d['class1'] in s)
 +
        #print(s)
 +
        if count%100 ==0:
 +
            fw = open(category_file,"a",encoding="utf-8")
 +
            fw.write(cat_list)
 +
            fw.close()
 +
            cat_list =""
 +
       
 +
       
 +
    fp.close()
 +
    return cat_list
  
内容如下:
+
if __name__ == '__main__':
 +
    cat_list =read_category_file()
 +
   
 +
</nowiki>
  
<nowiki>
+
==将CSV文件上传数据库==
#独立使用django的model
+
===敦煌网===
import sys
+
MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory  fields terminated by ',' optionally enclosed by '"' escaped  by '"' lines  terminated by '\r\n';
import os
 
  
 +
<nowiki>Query OK, 415 rows affected, 431 warnings (0.06 sec)
 +
Records: 415  Deleted: 0  Skipped: 0  Warnings: 431</nowiki>
  
pwd = os.path.dirname(os.path.realpath(__file__))
+
MariaDB [mxshop]> select * from goods_goodscategory  limit 0,10;
parent_path = os.path.dirname(pwd)#获取上级目录
 
sys.path.append(parent_path)#将上级目录添加到环境变量
 
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mxshop.settings")#引用项目的配置文件
 
  
import django
+
    <nowiki>+----+--------------+------+------+---------------+--------+-----------------                                                                                          ----+--------------------+
django.setup()
+
| id | name        | code | desc | category_type | is_tab | add_time                                                                                                      | parent_category_id |
 +
+----+--------------+------+------+---------------+--------+-----------------                                                                                          ----+--------------------+
 +
|  1 | 生鲜食品    | sxsp |      |            1 |      0 | 2020-06-24 16:34                                                                                          :11 |              NULL |
 +
|  2 | 精品肉类    | jprl |      |            2 |      0 | 2020-06-24 16:34                                                                                          :11 |                  1 |
 +
|  3 | 羊肉        | yr  |      |            3 |      0 | 2020-06-24 16:34</nowiki>
  
from goods.models import GoodsCategory
+
===阿里巴巴===
 +
MariaDB [mxshop]> load data infile '/opt/ali_category_data.csv' into table goods_goodscategory  fields terminated by ',' optionally enclosed by '"' escaped by '"' lines  terminated by '\r\n';
 +
 
 +
  <nowiki>Query OK, 2022 rows affected, 997 warnings (0.04 sec)
 +
Records: 2022  Deleted: 0  Skipped: 0  Warnings: 997</nowiki>
  
from db_tools.data.category_data import row_data
+
MariaDB [mxshop]> select count(*) from goods_goodscategory;
  
for lev1_cat in row_data:
+
<nowiki>+----------+
    lev1_intance = GoodsCategory()
+
| count(*) |
     lev1_intance.code = lev1_cat["code"]
+
+----------+
    lev1_intance.name = lev1_cat["name"]
+
|     2557 |
    lev1_intance.category_type = 1
+
+----------+
    lev1_intance.save()
+
1 row in set (0.00 sec)</nowiki>
  
    for lev2_cat in lev1_cat["sub_categorys"]:
 
        lev2_intance = GoodsCategory()
 
        lev2_intance.code = lev2_cat["code"]
 
        lev2_intance.name = lev2_cat["name"]
 
        lev2_intance.category_type = 2
 
        lev2_intance.parent_category = lev1_intance
 
        lev2_intance.save()
 
 
        for lev3_cat in lev2_cat["sub_categorys"]:
 
            lev3_intance = GoodsCategory()
 
            lev3_intance.code = lev3_cat["code"]
 
            lev3_intance.name = lev3_cat["name"]
 
            lev3_intance.category_type = 3
 
            lev3_intance.parent_category = lev2_intance
 
            lev3_intance.save()
 
</nowiki>
 
  
python3 import_category_data.py
 
  
验证结果:10.0.0.30:8000
+
===中国制造网===
  
[[文件:bd20-3-23.png|600px]]
+
MariaDB [mxshop]> load data infile '/opt/china_category_data.csv' into table goods_goodscategory  fields terminated by ',' optionally enclosed by '"' escaped by '"' lines  terminated by '\r\n';

2020年9月24日 (四) 15:57的最新版本

生成CSV文件

敦煌网

输入:已有的爬虫数据。

import time

category_file ='dh_category_data.csv'
def read_category_file():
    cat_list = ""  # 创建类别网址列表
    fp = open('dh_sub_category.csv', "rt")  # 打开csv文件

    count= 0

    #类别名  类目级别  父类目级别

    s =set()#储存已有的类别
    for line in fp:  # 文件对象可以直接迭代
        count +=1
        
        d = {};
        data = line.split(',')
        d['line_num'] = data[0];
        d['class1'] = data[1];
        d['class2'] = data[2];
        d['url'] = data[3]
        
        now_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        
        class1 ="NULL"+","+d['class1']+","+d['class1']+","+ \
              "NULL"+","+str(1)+","+str(0)+","+now_time+","+"NULL\n"
        class2 ="NULL"+","+d['class2']+","+d['class2']+","+ \
              "NULL"+","+str(2)+","+str(0)+","+now_time+","+"NULL\n"

        if d['class1'] not in s: #只添加之前没有的目录
            cat_list += class1
            
            
        if d['class2'] not in s:
            cat_list += class2
        
        s.add(d['class1']);s.add(d['class2']);#将目录名收录进集合中
        #print(d['class1'],d['class1'] in s)
        #print(s)
        if count%100 ==0:
            fw = open(category_file,"a",encoding="utf-8")
            fw.write(cat_list)
            fw.close()
            cat_list =""
        
        
    fp.close()
    return cat_list

if __name__ == '__main__':
    cat_list =read_category_file()
    

将CSV文件上传数据库

敦煌网

MariaDB [mxshop]> load data infile '/opt/dh_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';

Query OK, 415 rows affected, 431 warnings (0.06 sec)
Records: 415  Deleted: 0  Skipped: 0  Warnings: 431

MariaDB [mxshop]> select * from goods_goodscategory limit 0,10;

   +----+--------------+------+------+---------------+--------+-----------------                                                                                          ----+--------------------+
| id | name         | code | desc | category_type | is_tab | add_time                                                                                                      | parent_category_id |
+----+--------------+------+------+---------------+--------+-----------------                                                                                          ----+--------------------+
|  1 | 生鲜食品     | sxsp |      |             1 |      0 | 2020-06-24 16:34                                                                                          :11 |               NULL |
|  2 | 精品肉类     | jprl |      |             2 |      0 | 2020-06-24 16:34                                                                                          :11 |                  1 |
|  3 | 羊肉         | yr   |      |             3 |      0 | 2020-06-24 16:34

阿里巴巴

MariaDB [mxshop]> load data infile '/opt/ali_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';

 Query OK, 2022 rows affected, 997 warnings (0.04 sec)
Records: 2022  Deleted: 0  Skipped: 0  Warnings: 997

MariaDB [mxshop]> select count(*) from goods_goodscategory;

+----------+
| count(*) |
+----------+
|     2557 |
+----------+
1 row in set (0.00 sec)


中国制造网

MariaDB [mxshop]> load data infile '/opt/china_category_data.csv' into table goods_goodscategory fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';