01_douban.py
------------------------------------
import requests
import json
#1.url,
start_url_temp_list = [
{
"url_temp":"https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_american_hot/items?os=ios&for_mobile=1&start={}&count=18",
"country":"US"
},
{
"url_temp":"https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_domestic_hot/items?os=ios&for_mobile=1&start={}&count=18",
"country":"CN"
}
]
headers = {"Referer":"",
"User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"}
def parse_url(url): #发送请求,获取响应的方法
print("现在正在请求:",url)
r = requests.get(url,headers=headers)
return r.content.decode()
def get_content_list(json_response): #3.提取数据的方法
dict_response = json.loads(json_response)
content_list = dict_response["subject_collection_items"]
total = dict_response["total"]
return content_list,total
def save_content_list(content_list): #保存content_list的方法
f = open("douban.txt", "a",encoding="utf-8") #每次调用这个方法只打开了一次文件,关闭了一次稳健
for content in content_list:
f.write(json.dumps(content,ensure_ascii=False,indent=2))
f.close()
print("保存成功")
def run():#主要逻辑的实现
# 1.url,
for url_temp in start_url_temp_list:
num = 0
total = 100
if url_temp["country"] == "CN":
headers.update({"Referer":"https://m.douban.com/tv/chinese"})
elif url_temp["country"] == "US":
headers.update({"Referer":"https://m.douban.com/tv/american"})
while num<=total+18: #假设最后还有10条数据没有取
url = url_temp["url_temp"].format(num)
# 2.发送请求获取响应
json_response = parse_url(url)
#3.提取数据
content_list,total = get_content_list(json_response)
for content in content_list:#添加国家信息
content["country"] = url_temp["country"]
#4.保存
save_content_list(content_list)
num = num +18
if __name__ == '__main__':
run()
------------------------------------
发表评论:
◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。