'''
@File : Pix2Spider.py
@Time : 2022/10/31 16:17:26
@Author : Hicoder
@Version : 1.0
@Contact : limastudio@qq.com
'''
import json
import os
import time
from urllib.error import HTTPError
import requests
from MyHeaders import get_headers # 自定义请求头
from mysettings import PIXBAYAPIKEY
class ImgSpider:
"""
通过调用 Pixbay 提供的 api 获取所需图片
"""
def __init__(self):
self.keyword = input('欢迎使用Pixbay图片下载器\n请输入需要搜索的图片关键字(推荐使用英文):')
self.APIkey = PIXBAYAPIKEY # input('请输入你的Pixbay API key:')
# 到 https://pixabay.com/api/docs/ 获取你的 API key
self.baseURL = 'https://pixabay.com/api/?key=' + self.APIkey + '&q='\
+ self.keyword + '&image_type=photo&per_page=50'
self.page = input('请输入需要爬取的页数:')
def getText(self, url):
"""
请求URL,获得数据
"""
headers = get_headers()
try:
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status() # 如果状态码不是200,引发HTTPError异常
r.encoding = r.apparent_encoding # 替换编码方式,防止显示中文乱码
return r
except HTTPError:
print("出现异常,获取失败!")
def getImgUrl(self):
"""
获取所有要下载的图片的 largeImageURL,存入 downURLs 数组中
"""
downURLs = []
imgList = []
for i in range(1, int(self.page)+1):
mainURL = self.baseURL + '&page=' + str(i)
dict_obj = json.loads(self.getText(mainURL).text)
imgList = dict_obj.get('hits')
for i in range(len(imgList)):
largeImageURL = imgList[i].get('largeImageURL')
downURLs.append(largeImageURL)
return downURLs
def downImage(self):
"""
下载保存所有图片
"""
root_path = './spiderimages/'
# 判断文件路径是否存在,否则创建
if not os.path.exists(root_path):
os.mkdir(root_path)
image_root = root_path + self.keyword + '/'
if not os.path.exists(image_root):
os.mkdir(image_root)
downURLs = self.getImgUrl()
for i in range(0, len(downURLs)):
imgUrl = downURLs[i]
res = self.getText(imgUrl).content
image_name = image_root + self.keyword + '_' + str(i) + '.'\
+ imgUrl.split('.')[-1]
if not os.path.exists(image_name):
with open(image_name, "wb") as fp:
# 存储图片
fp.write(res)
else:
print('图片已存在!')
time.sleep(10)
print('下载中···')
print('{0}张图片全部下载完成!'.format(len(downURLs)))
if __name__ == '__main__':
spider = ImgSpider()
spider.downImage()