数据爬取
来自CloudWiki
import requests from lxml import etree for i in range(1,30): url = "http://880098.cn/index.php?s=/index/goods/index/id/%d.html" page = i data = requests.get(url%page) html = data.text if "资源不存在或已被删除" in html: break else: ele = etree.HTML(html) name = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[1]/h1/text()')[0] oldprice = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[2]/div/div[1]/dd/b/text()')[0] nowprice = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[2]/div/div[2]/dd/b/text()')[0] num = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[2]/ul/li[1]/div/span[2]/text()')[0] look = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[2]/ul/li[2]/div/span[2]/text()')[0] comm = ele.xpath('/html/body/div[5]/div[2]/div[2]/div[2]/ul/li[3]/div/span[2]/text()')[0] xp = '/html/body/div[5]/div[2]/div[2]/div[2]/dl/dd/div[2]/div[3]/form/div[1]/div[1]/dd/span//text() | /html/body/div[5]/div[2]/div[2]/div[2]/dl/dd/div[2]/div[3]/form/div[1]/div[3]/dd/span//text() | /html/body/div[5]/div[2]/div[2]/div[2]/dl/dd/div[2]/div[3]/form/div[1]/div[4]/dd/span//text()' data = ele.xpath(xp)[1] print("id"+str(page)+","+name+",原价"+oldprice+",现价"+nowprice+",累计销量"+num+",浏览次数"+look+",累计评价"+comm+",库存"+data)