Python爬虫案例:使用Selenium爬取中国制造网供应商
来自CloudWiki
39.78.68.254(讨论)2022年12月22日 (四) 13:43的版本 (创建页面,内容为“ ==代码== <nowiki> from selenium import webdriver from selenium.webdriver.common.keys import Keys import time # 使用前先导入By类 from selenium.webdriver.c…”)
代码
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time # 使用前先导入By类 from selenium.webdriver.common.by import By from bs4 import BeautifulSoup driver = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe") #根据url模拟访问网站 def browse(url): try: driver.get(url) scroll() print("访问成功!") return "ok" except TimeoutException: return browse(url) def scroll():#模拟页面下拉滚动函数 for i in range(1,11): driver.execute_script("window.scrollTo(0,document.body.scrollHeight*"+str(i)+"/10)") #time.sleep(2) #得到供应商信息 def get_suppliers(): #wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item'))) soup = BeautifulSoup(driver.page_source, "html.parser") h2_list = soup.find_all('h2',class_ = 'company-name') for h in h2_list: print(h.get_text().strip()) def main(): start = time.clock() url =r'https://www.made-in-china.com/manufacturers-directory/item3/Machine-Tools-1.html' browse(url) get_suppliers() elapsed = time.clock()-start print("Time used:",elapsed) if __name__ == '__main__': main()