获取今日头条街拍图片

import requests,time

from urllib.parse import urlencode

import os

from hashlib import md5

class ToutiaoSpider:

def __init__(self):

self.params= {

'offset': None,

'format': 'json',

'keyword': '街拍',

'autoload': 'true',

'count': '20',

'cur_tab': '1',

'from': 'search_tab',

'pd': 'synthesis'

}

self.ua ="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"

self.headers = {

'user - agent': self.ua,

'referer': 'https://www.toutiao.com/search/?keyword=街拍',

'content-type': 'application/x-www-form-urlencoded'

}

def get_page(self,offset):

'''获取Ajax加载的数据'''

self.params['offset']= offset

p =urlencode(self.params)

url ='https://www.toutiao.com/search_content/?' + p

print(url)

try:

r =requests.get(url)

if r.status_code==200:

r.encoding='utf-8'

return r.json()

except Exception as e:

return None

def get_images(self,json_data):

'''下载图片?'''

if json_data.get('data'):

for item in json_data.get('data'):

title =item.get('title')

images =item.get('image_list')

for image in images:

items = {}

items['image']=image.get('url')

items['title']= title

yield items

def save_image(sele,item):

"""保存图片"""

if not os.path.exists(item.get('title')):

os.mkdir(item.get('title'))

try:

url =item.get('image')

print('download running:','http:'+url)

r =requests.get('http:'+item.get('image'))

if r.status_code==200:

file_path ='{}/{}.{}'.format(item.get('title'),md5(r.content).hexdigest(),'jpg')

if not os.path.exists(file_path):

with open(file_path,'wb') as fp:

fp.write(r.content)

print('download finished')

else:

print('Already Download,',file_path)

else:

print('页面下载失败！')

except Exception as e:

print(e)

print('Failed to save Image')

def run(self,offsets):

for offset in offsets:

json_data = self.get_page(offset)

#images = self.get_images(json_data)

for item in self.get_images(json_data):

print(item)

self.save_image(item)

time.sleep(1)

if __name__=='__main__':

t = ToutiaoSpider()

offsets =([i*20 for i in range(0,15)])

t.run(offsets)

获取今日头条街拍图片

聪少自媒体热门分类

推荐文章

热门文章

合作伙伴

关于本站