import requests,re
import json
from urllib import request
import os
url='https://www.toutiao.com/search_content/?offset={}&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=1&from=search_tab'
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
}
for i in range(3):
url='https://www.toutiao.com/search_content/?offset={}&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=1&from=search_tab'.format(20*i)
response=requests.get(url,headers=headers)
res=response.json()
data_list=res['data']
# 新建文件夹
if not os.path.exists('download') :
os.mkdir('download')
for data_item in data_list:
if 'article_url' in data_item:
article_url=data_item['article_url']
# print(article_url)
response=requests.get(article_url,headers=headers)
res_html=response.text
# print(res_html)
res_zhengze=r'gallery: JSON.parse((.*)),'
pattern=re.search(res_zhengze, res_html)
if pattern:
res_2=json.loads( pattern.group(1))
res_3=json.loads(res_2)
else:
continue
for res_4 in res_3['sub_images']:
res_5=res_4['url']
print(res_5)
filename='download/' + res_5.split('/')[-1] + '.jpg'
# 下载图片
request.urlretrieve(res_5, filename)