Python爬虫案例:使用Selenium爬取中国制造网供应商

来自CloudWiki
39.78.68.254讨论2022年12月22日 (四) 13:43的版本 (创建页面,内容为“ ==代码== <nowiki> from selenium import webdriver from selenium.webdriver.common.keys import Keys import time # 使用前先导入By类 from selenium.webdriver.c…”)
(差异) ←上一版本 | 最后版本 (差异) | 下一版本→ (差异)
跳转至: 导航搜索

代码

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
# 使用前先导入By类
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup

driver = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")


#根据url模拟访问网站
def browse(url):
    try:
        driver.get(url)
        scroll()
        
        print("访问成功!")
        return "ok"
    except TimeoutException:
        return browse(url)

def scroll():#模拟页面下拉滚动函数
    
    for  i in range(1,11):
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight*"+str(i)+"/10)")

    
    #time.sleep(2)

#得到供应商信息
def get_suppliers():
    #wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item')))

    soup = BeautifulSoup(driver.page_source, "html.parser")
    h2_list = soup.find_all('h2',class_ = 'company-name')
    for h in h2_list:
        print(h.get_text().strip())
    
    
def main():
    start = time.clock()
    url =r'https://www.made-in-china.com/manufacturers-directory/item3/Machine-Tools-1.html'
    browse(url)
    get_suppliers()       
    elapsed = time.clock()-start
    print("Time used:",elapsed)
   


if __name__ == '__main__':
    main()