Python爬取XKCD漫画

2019-05-13 Python 爬虫

参考《Python编程快速上手——让繁琐工作自动化》

#! python3
import requests,os,bs4

url = 'https://xkcd.com/1/' # starting url
os.makedirs('xkcd',exist_ok=True)
i = 1
while not url.endswith('#'):
  # Download the page
  print('Downloading page %s...' %url)
  res = requests.get(url)
  res.raise_for_status()

  soup = bs4.BeautifulSoup(res.text, "html.parser")

  # Find the URL of the comic image
  comicElem = soup.select('#comic img')
  if comicElem == []:
    print('Could not find comic image.')
  else:
    comicUrl = 'https:' + comicElem[0].get('src')
    # Download the image
    print('Downloading image %s...' %(comicUrl))
    res = requests.get(comicUrl)
    res.raise_for_status()

  # Save the image to ./xkcd
  imageFile= open(os.path.join('xkcd',str(i) + '_' + os.path.basename(comicUrl)), 'wb')
  for chunk in res.iter_content(100000):
    imageFile.write(chunk)
  imageFile.close()

  # Get the Prev button's url
  nextLink = soup.select('a[rel="next"]')[0]
  url = 'https://xkcd.com' + nextLink.get('href')
  i = i + 1

print('Done.')

Python爬取XKCD漫画

Python爬取百度首页logo