Python爬取Pixbay图片

2022-11-07 Python 爬虫
'''
@File    :   Pix2Spider.py
@Time    :   2022/10/31 16:17:26
@Author  :   Hicoder
@Version :   1.0
@Contact :   limastudio@qq.com
'''

import json
import os
import time
from urllib.error import HTTPError
import requests
from MyHeaders import get_headers  # 自定义请求头
from mysettings import PIXBAYAPIKEY


class ImgSpider:
    """
    通过调用 Pixbay 提供的 api 获取所需图片
    """
    def __init__(self):
        self.keyword = input('欢迎使用Pixbay图片下载器\n请输入需要搜索的图片关键字（推荐使用英文）:')
        self.APIkey = PIXBAYAPIKEY  # input('请输入你的Pixbay API key:')
        # 到 https://pixabay.com/api/docs/ 获取你的 API　key
        self.baseURL = 'https://pixabay.com/api/?key=' + self.APIkey + '&q='\
            + self.keyword + '&image_type=photo&per_page=50'
        self.page = input('请输入需要爬取的页数：')

    def getText(self, url):
        """
        请求URL,获得数据
        """
        headers = get_headers()
        try:
            r = requests.get(url, headers=headers, timeout=30)
            r.raise_for_status()  # 如果状态码不是200，引发HTTPError异常
            r.encoding = r.apparent_encoding  # 替换编码方式，防止显示中文乱码
            return r
        except HTTPError:
            print("出现异常，获取失败！")

    def getImgUrl(self):
        """
        获取所有要下载的图片的 largeImageURL,存入 downURLs 数组中
        """
        downURLs = []
        imgList = []
        for i in range(1, int(self.page)+1):
            mainURL = self.baseURL + '&page=' + str(i)
            dict_obj = json.loads(self.getText(mainURL).text)
            imgList = dict_obj.get('hits')
        for i in range(len(imgList)):
            largeImageURL = imgList[i].get('largeImageURL')
            downURLs.append(largeImageURL)
        return downURLs

    def downImage(self):
        """
        下载保存所有图片
        """
        root_path = './spiderimages/'
        # 判断文件路径是否存在，否则创建
        if not os.path.exists(root_path):
            os.mkdir(root_path)
        image_root = root_path + self.keyword + '/'
        if not os.path.exists(image_root):
            os.mkdir(image_root)
        downURLs = self.getImgUrl()
        for i in range(0, len(downURLs)):
            imgUrl = downURLs[i]
            res = self.getText(imgUrl).content
            image_name = image_root + self.keyword + '_' + str(i) + '.'\
                + imgUrl.split('.')[-1]
            if not os.path.exists(image_name):
                with open(image_name, "wb") as fp:
                    # 存储图片
                    fp.write(res)
            else:
                print('图片已存在！')
            time.sleep(10)
        print('下载中···')
        print('{0}张图片全部下载完成！'.format(len(downURLs)))


if __name__ == '__main__':
    spider = ImgSpider()
    spider.downImage()
Python爬取Pixbay图片

Remove.bg极速精准抠图加PIL上背景色