Python爬虫案例：使用Selenium爬取中国制造网供应商

代码

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
# 使用前先导入By类
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup

driver = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")


#根据url模拟访问网站
def browse(url):
    try:
        driver.get(url)
        scroll()
        
        print("访问成功！")
        return "ok"
    except TimeoutException:
        return browse(url)

def scroll():#模拟页面下拉滚动函数
    
    for  i in range(1,11):
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight*"+str(i)+"/10)")

    
    #time.sleep(2)

#得到供应商信息
def get_suppliers():
    #wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item')))

    soup = BeautifulSoup(driver.page_source, "html.parser")
    h2_list = soup.find_all('h2',class_ = 'company-name')
    for h in h2_list:
        print(h.get_text().strip())
    
    
def main():
    start = time.clock()
    url =r'https://www.made-in-china.com/manufacturers-directory/item3/Machine-Tools-1.html'
    browse(url)
    get_suppliers()       
    elapsed = time.clock()-start
    print("Time used:",elapsed)
   


if __name__ == '__main__':
    main()

Python爬虫案例：使用Selenium爬取中国制造网供应商

代码

导航菜单

个人工具

命名空间

变种

视图

更多

搜索

导航

工具