Big Yellow

来自CloudWiki
跳转至: 导航搜索

知识点一

if简用

  • 用if语句写一个简易的密码输入小程序
def account_login():                             #定义函数
    password = input('Password:')                #使用input获得用户输入的字符串并且储存在变量password中;
    password_correct = password == '12345'       #设定密码为12345
    if password_correct== '12345':               #当用户输入密码为12345时
        print('Login success!')                  #登陆成功
    else:                                        #输入密码若不是12345
        print('Wrong password or invalid input!')#登录失败
        account_login()
account_login()                                  #调用函数
  • 用if语句写一个简易的修改密码的小程序
password_list =['*#*#','123456']                          #创建一个列表,用于储存用户的密码 初始密码和其他数据;
def account_login():                                      #定义函数
    password = input('password:')                         #使用input获得用户输入的字符串并且储存在变量password中;
    password_correct = password == password_list[-1]      #当用户输入的密码等于密码列表中最后一个元素时(及用户最新设定的密码)
    password_reset = password == password_list[0]         #定义 : 当用户输入密码列表中第一个元素时修改密码
    if password_correct:                                  
        print('Login success!')                            
    elif password_reset:                                  #修改密码
        new_password = input('Enter a new password:')      
        password_list.append(new_password)
        print('Your password has changed successfully!')
        account_login()
    else:
        print('Wrong password or invalid input!')
        account_login()
account_login()

循环简用

  • 小小循环
for num in range(1,11):              #range(1,11) range函数 不包括11,实际范围1到10
    print(str(num)+'+1=',num+1)

循环,if合用

  • 让歌曲和歌手匹配
songslist =['Holy Diver', 'Thunderstruck', 'Rebel Rebel']   #定义一个列表,列表里面包含'Holy Diver', 'Thunderstruck', 'Rebel Rebel'
for song in songslist:                                      #使song在列表里面循环,
    if song == 'Holy Diver':                                #如果song=='Holy Diver'
        print(song,' - Dio')                                #输出'Holy Diver'的歌手名称为' - Dio'
    elif song == 'Thunderstruck':                           #重复以上命令
        print(song,'-AD/CD')
    elif song ==  'Rebel Rebel':
        print(song,'- David Bowie')

嵌套循环

  • 简易的乘法表
for i in range(1,10): 
   for j in range(1,10):
    print('{} X {} = {}'.format(i,j,i*j))

知识点二

python爬虫

  • 我的第一个爬虫
import requests
from bs4 import BeautifulSoup
link ="http://www.santostang.com"
headers ={'User-Agent' : 'Mozilla/5.0(Windows; U; Windows NT 6.1;en-US;rv:1.9.1.6)Gecko/20091201 Firefox/3.5.6'}
r=requests.get(link,headers=headers)
soup = BeautifulSoup(r.text,"lxml")
title = soup.find("h1",class_="post-title").a.text.strip()
print (title)
with open('title.txt', "a+") as f:
f.write(title)
f.close()

静态网页抓取

  • 1.获取响应内容
import requests
r=requests.get('http://www.santostang.com')
print ("文本编辑:",r.encoding)
print ("响应状态吗:", r.status_code)
print ("字符串方式的响应体:",r.text)
#r.text 是服务器相应的内容,会根据响应头部的字符编码进行编译,
#r.encoding 是服务器内容使用的文本编码
#r.status_code 用于检测响应状态的状态码,如果返回200,表示请求成功;若返回4xx,表示服务端错误;返回5xx表示服务器错误响应
#r.content是字节方式的响应体,会自动解码gzip和deflate 编码的相应数据
#r.json()是Requests中内置的JSON编解码
  • 2.传递URL参数
import requests
key_dict = {'key1': 'value1', 'key2': 'value2'}
r=requests.get('http://httpbin.org/get',params=key_dict)
print("URL已经正确编码",r.url)
print("字符串方式的响应体:\n",r.text)
  • 3.定制请求头
import requests
headers = {
'user-agent':'Mozilla/5.0 {Windows NT 6.1; Win64;x64} AppleWebkit/537.36(KHTML,like Gecko) Chrome/52.0.2743.B2 Safari/537.36',
'Host':'www.santostang.com'
    }
r=requests.get('http://www.santostang.com/',headers =headers)
print("响应状态码:",r.status_code)
  • 4.发送post请求
import requests
key_dict = {'key1':'values1', 'key2': 'values2'}
r= requests.post('http://httpbin.org/post',data =key_dict)
print(r.text)
  • 5.超时返回
import requests
link="http://www.santostang.com/"
r=requests.get(link,timeout=20)      #timeout=20 表示如果连接服务器超过20秒就会自动返回异常
  • 6.爬取豆瓣网top250
import requests
from bs4 import BeautifulSoup
def get_movies():
    headers={ 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64 AppleWebKit/537.36(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
             'Host':'movie.douban.com'
             }
    dymc_list =[]
   
    for i in range(0,10):
        link = 'https://movie.douban.com/top250?start=' +str(i*25)
        r = requests.get(link, headers =headers , timeout =10)
        print (str(i+1),"页响应码状态:",r.status_code)
        soup = BeautifulSoup(r.text,"lxml")
        div_list =soup.find_all('div',class_='hd')
        for each in div_list:
            movie = each.a.span.text.strip()
            dymc_list.append(movie)
    print(dymc_list)
    return dymc_list
get_movies()

动态网页抓取

  • 1.实例 抓取淘宝中iPhone8手机的评论
import requests
link ="""https://rate.tmall.com/list_detail_rate.htm?itemId=560745175443&spuId=878124235&sellerId=2616970884&
order=3&currentPage=1
&append=0&content=1&tagId=&posi=&picture=
&ua=098%23E1hvDQvUvbpvUvCkvvvvvjiPPFSpAj18RL5h0jivPmPU1jibPLswsjnjPLFUzjt8RphvCvvvphvPvpvhvv2MMQhCvvO
v9hCvvvvEvpCWvwPX0B0XRfJ0Io3EAp0zWdUZEcqhQ8TZHdUfbzc6%2Bu64de%2BRfJoKHdoJwZ2WlE%2B7RqwiLO2v5fVQKoZHlR9t%2BFuTWDAvD46XdigDNdyCvm9v
vhCvvvvvvvvvpdIvvvHSvvCVB9vv9LvvvhXVvvmCjvvvByOvvUhwuphvmvvvpo8VUqbGkphvC9hvpyPO08wCvvpvvhHh&isg=BKamCFPcwTtefJd2rGHI0j-U9xwibXQEg_aanpBPkkmk
E0Yt-Bc6UYzBbw-fu-JZ&needFold=0&_ksTS=1527427656035_398&callback=jsonp399"""
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/66.0.3359.181 Safari/537.36'}
r = requests.get(link,headers =headers)
print(r.text)