查看“2021云+数：51job”的源代码

from selenium import webdriver #导入包
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd

driver = webdriver.Chrome("C:\Program Files\Google\Chrome\Application/chromedriver.exe")  #打开Chrome浏览器

list_data = [ ]
def search():
    #登录搜索页面
    driver.get('https://search.51job.com/list/120200%252C010000,000000,0000,00,9,99,%2B,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=')  #输入url,打开百度首页

    elem = driver.find_element_by_xpath('//*[@id = "keywordInput"]')  #查找输入框
    elem.send_keys('数据分析',Keys.ENTER)  #模拟点击回车
    time.sleep(5)
    #print(driver.page_source)

def find_result( ):
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    joblist = soup.find("div", {'class': 'j_joblist'})  # 题目

    joblist = joblist.find_all("div", {'class': 'e'})  
    #公司名、职位名、工作地点、薪资、发布时间

    
    x = 0
    
    for job in joblist:
        #打印全部
        #print(job.text)
        dict ={}
        #公司名        
        company_name =  job.find('a', class_ = 'cname')
        #print(company_name.get_text().strip())
        dict['company'] = company_name.get_text().strip()
        
        #职位名
        job_name =  job.find('span', class_ = 'jname')        
        #print(job_name.get_text().strip())
        dict['job'] = job_name.get_text().strip()

        #工作地点
        area="济南|北京"
        #print(area)
        dict['area']=area

        #薪资
        salary =  job.find('span', class_ = 'sal')
        #print(salary.get_text().strip())
        dict['salary']=salary.get_text().strip()
        
        #发布时间
        pub_time = job.find('span',class_ ='time')
        #print(pub_time.get_text().strip())
        dict['pub_time'] = pub_time.get_text().strip()
        
        #将本职位信息添加到列表        
        global list_data
        list_data.append(dict)

        #print(dict)
        x += 1
        
    #driver.close()

def save_info():
    print("hello")
    #print(list_data)
    df = pd.DataFrame(list_data)
    print(df)

def next_page(page_number):
    time.sleep(5)
    elem = driver.find_elements_by_link_text(page_number)  #翻页按钮
    for e in elem:
        print(e)
    elem[0].click()
    
    #submit = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, '2')))
    #submit.click()

def main():
    search()
    find_result()
    
    
    #爬取第2~5页产品信息
    for i in range(2,4):
        num = str(i)
        next_page(num)
        find_result()

    #保存结果
    save_info() 
    

if __name__ == '__main__':
    main()