当前位置:聪少自媒体网 > 今日头条 > 正文

获取今日头条街拍图片

2020-10-05 今日头条 聪少自媒体

import requests,time

from urllib.parse import urlencode

import os

from hashlib import md5

class ToutiaoSpider:

def __init__(self):

self.params= {

'offset': None,

'format': 'json',

'keyword': '街拍',

'autoload': 'true',

'count': '20',

'cur_tab': '1',

'from': 'search_tab',

'pd': 'synthesis'

}

self.ua ="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"

self.headers = {

'user - agent': self.ua,

'referer': 'https://www.toutiao.com/search/?keyword=街拍',

'content-type': 'application/x-www-form-urlencoded'

}

def get_page(self,offset):

'''获取Ajax加载的数据'''

self.params['offset']= offset

p =urlencode(self.params)

url ='https://www.toutiao.com/search_content/?' + p

print(url)

try:

r =requests.get(url)

if r.status_code==200:

r.encoding='utf-8'

return r.json()

except Exception as e:

return None

def get_images(self,json_data):

'''下载图片?'''

if json_data.get('data'):

for item in json_data.get('data'):

title =item.get('title')

images =item.get('image_list')

for image in images:

items = {}

items['image']=image.get('url')

items['title']= title

yield items

def save_image(sele,item):

"""保存图片"""

if not os.path.exists(item.get('title')):

os.mkdir(item.get('title'))

try:

url =item.get('image')

print('download running:','http:'+url)

r =requests.get('http:'+item.get('image'))

if r.status_code==200:

file_path ='{}/{}.{}'.format(item.get('title'),md5(r.content).hexdigest(),'jpg')

if not os.path.exists(file_path):

with open(file_path,'wb') as fp:

fp.write(r.content)

print('download finished')

else:

print('Already Download,',file_path)

else:

print('页面下载失败!')

except Exception as e:

print(e)

print('Failed to save Image')

def run(self,offsets):

for offset in offsets:

json_data = self.get_page(offset)

#images = self.get_images(json_data)

for item in self.get_images(json_data):

print(item)

self.save_image(item)

time.sleep(1)

if __name__=='__main__':

t = ToutiaoSpider()

offsets =([i*20 for i in range(0,15)])

t.run(offsets)

聪少爱学堂聪少
聪少爱学堂创始人,梅州市鹏鑫网络科技有限公司CEO,09年开始踏入互联网,10年互联网行业经验,资深自媒体人,自媒体优秀导师,咪挺微商团对营销引流顾问,业务包含:精准引流技术/代引流精准粉,专业小红书,知乎,微博代运营。
  • 38988文章总数
  • 1491135访问次数
  • 建站天数
  • 合作伙伴