16
2018
09

python暑期专业实训-Day04代码参考

01_douban.py

------------------------------

import requests

import json


#1.url,

start_url_temp_list = [

    {

        "url_temp":"https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_american_hot/items?os=ios&for_mobile=1&start={}&count=18",

        "country":"US"

    },

    {

        "url_temp":"https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_domestic_hot/items?os=ios&for_mobile=1&start={}&count=18",

        "country":"CN"

    }

]


headers = {"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"}


def parse_url(url): #发送请求,获取响应的方法

    print("现在正在请求:",url)

    r = requests.get(url,headers=headers)

    return r.content.decode()


def get_content_list(json_response): #3.提取数据的方法

    dict_response = json.loads(json_response)

    content_list = dict_response["subject_collection_items"]

    total = dict_response["total"]

    return content_list,total


def save_content_list(content_list): #保存content_list的方法

    f = open("douban.txt", "a",encoding="utf-8") #每次调用这个方法只打开了一次文件,关闭了一次稳健

    for content in content_list:

        f.write(json.dumps(content,ensure_ascii=False,indent=2))

    f.close()

    print("保存成功")


def run():#主要逻辑的实现

    # 1.url,

    for url_temp in start_url_temp_list:

        num = 0

        total = 100

        while num<=total+18: #假设最后还有10条数据没有取

            url = url_temp["url_temp"].format(num)

            # 2.发送请求获取响应

            json_response = parse_url(url)

            #3.提取数据

            content_list,total = get_content_list(json_response)


            for content in content_list:#添加国家信息

                content["country"] = url_temp["country"]

            #4.保存

            save_content_list(content_list)

            num = num +18



if __name__ == '__main__':

    run()

------------------------------

01_try_requests.py

------------------------------

# coding=utf-8

import requests

url = "http://www.baidu.com"

# url1 = "www.baidu.com"  #当前这个url地址缺少协议

r = requests.get(url)

print(r)

#手动的指定编码方式

# r.encoding = "utf-8"

# print(r.text)


#另一个种方式获取网页源码

# print(type(r.content))

print(r.content.decode())

------------------------------

02_baidu_tupian.py

------------------------------

# coding=utf-8

import requests

r = requests.get("https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo/logo_white.png")

print(r.status_code)

print(r.headers)

print("*"*10)

print(r.request.headers)

#保存图片或者视屏到本地的时候,需要保存二进制的数据 #r.content

f = open("baidu.png","wb")

f.write(r.content)

f.close()

------------------------------

03_headers.py

------------------------------

# coding=utf-8

import requests


#顶一个headers

headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}

url = "http://www.baidu.com"

r = requests.get(url,headers=headers)

print(r.content.decode())

------------------------------

04_sina.py

------------------------------

# coding=utf-8

import requests

url ="http://www.sina.com"

headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}


r = requests.get(url,headers=headers)

# print(r.text)

print(r.content.decode())

------------------------------

05_baidufanyi.py

------------------------------

import requests

import json

import random

import hashlib


# 1.准备好url,post data

post_url = "http://api.fanyi.baidu.com/api/trans/vip/translate"

query_string = input("请输入要翻译的内容:")

appid = "20180917000208070"

appsecret = "BfD6HMJ9_izyRBA61glS"

salt = random.randint(1, 1)

sign = appid+query_string+str(salt)+appsecret

m1 = hashlib.md5()

m1.update(sign.encode(encoding='UTF-8'))

sign = m1.hexdigest()

print(sign)

myurl = post_url+'?appid='+appid+'&q='+query_string+'&from=en'+'&to=zh'+'&salt='+str(salt)+'&sign='+sign

# 2.发送请求,获得数据

response = requests.get(myurl)

json_resposne = response.content.decode()  #获取网页html字符串

print(json_resposne)

# 3.提取数据

dict_response = json.loads(json_resposne) #把字符串转化为字典

ret = dict_response["trans_result"][0]["dst"]


print("{}的翻译结果是:{}".format(query_string,ret))

------------------------------

06_session.py

------------------------------

# coding=utf-8

import requests

post_url = "http://www.renren.com/PLogin.do"

post_data = {"email":"mr_mao_hacker@163.com", "password":"alarmchime"}

headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}


session = requests.session()  #实例化一个seesion

session.post(post_url,data=post_data,headers=headers)  #使用session发送post请求,并且把登陆后的cookies保存在session里面


#继续使用前一次带有cookies的session请求必须登录才能访问的页面

response = session.get("http://www.renren.com/327550029/profile",headers=headers)


f = open("renren.html","w",encoding="utf-8")

f.write(response.content.decode())

f.close()

------------------------------

07_cookies.py

------------------------------

import requests

post_url = "http://www.renren.com/PLogin.do"

post_data = {"email":"mr_mao_hacker@163.com", "password":"alarmchime"}

headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",

           "Cookie":"anonymid=j3jxk555-nrn0wh; _r01_=1; JSESSIONID=abcX04wUgB28nc3au698v; depovince=BJ; _uij=JTdCJTIydXNlcklkJTIyJTNBMzI3NTUwMDI5JTJDJTIydXNlck5hbWUlMjIlM0ElMjIlRTYlQUYlOUIlRTUlODUlODYlRTUlODYlOUIlMjIlMkMlMjJoZWFkRnVsbFVybCUyMiUzQSUyMmh0dHAlM0ElMkYlMkZoZG4ueG5pbWcuY24lMkZwaG90b3MlMkZoZG4zMjElMkYyMDE3MTAxMCUyRjE2MTUlMkZoZWFkX01uUFBfYzk3NTAwMDA4NGZlMTk4Ni5qcGclMjIlMkMlMjJnZW5kZXIlMjIlM0ElMjIlRTclOTQlQjclRTclOTQlOUYlMjIlMkMlMjJsb2dDb3VudCUyMiUzQTIzMyUyQyUyMmNvZGUlMjIlM0EwJTdE; renrenuid=327550029; _ga=GA1.2.1274811859.1497951251; _gid=GA1.2.425390350.1509330131; ch_id=10016; jebecookies=355a2bd2-ff48-4ca7-849d-0cfa70d01373|||||; ick_login=bb62b795-7cc8-4fed-851e-bdb8552a0f2f; _de=BF09EE3A28DED52E6B65F6A4705D973F1383380866D39FF5; p=456b02be83f0342ca2f81410a5feb0fa9; first_login_flag=1; ln_uact=mr_mao_hacker@163.com; ln_hurl=http://hdn.xnimg.cn/photos/hdn321/20171010/1615/main_MnPP_c975000084fe1986.jpg; t=59278a421bf091324fd085192873a61c9; societyguester=59278a421bf091324fd085192873a61c9; id=327550029; xnsid=929e661c; loginfrom=syshome; wp_fold=0"}



#继续使用前一次带有cookies的session请求必须登录才能访问的页面

response = requests.get("http://www.renren.com/327550029/profile",headers=headers)


f = open("renren.html","w",encoding="utf-8")

f.write(response.content.decode())

f.close()

------------------------------

parse.py

------------------------------

import requests

from retrying import retry

headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}


@retry(stop_max_attempt_number=3)

def _parse_url(url):  #专门的发送请求

    print("*"*50)

    response = requests.get(url,headers=headers,timeout=3)

    assert response.status_code == 200 #assert断言,断言失败会报错

    return response.content.decode()


def parse_url(url):

    try:  #异常捕获

        html_str = _parse_url(url)  #调用之前的_parse_url的方法

    except:

        html_str = None  #如果异常了,就让html_str=None

    return html_str


if __name__ == '__main__':

    url = "http://www.baidu.com"

    url1 = "www.baidu.com"

    html_str = parse_url(url1)

    if html_str is not None:

        print(html_str[:500])

    else:

        print("error")

------------------------------

请同学们参考练习,务必亲自敲代码,脚踏实地的编程!

« 上一篇下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。