参考《Python编程快速上手——让繁琐工作自动化》
#! python3
import requests,os,bs4
url = 'https://xkcd.com/1/' # starting url
os.makedirs('xkcd',exist_ok=True)
i = 1
while not url.endswith('#'):
# Download the page
print('Downloading page %s...' %url)
res = requests.get(url)
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, "html.parser")
# Find the URL of the comic image
comicElem = soup.select('#comic img')
if comicElem == []:
print('Could not find comic image.')
else:
comicUrl = 'https:' + comicElem[0].get('src')
# Download the image
print('Downloading image %s...' %(comicUrl))
res = requests.get(comicUrl)
res.raise_for_status()
# Save the image to ./xkcd
imageFile= open(os.path.join('xkcd',str(i) + '_' + os.path.basename(comicUrl)), 'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
# Get the Prev button's url
nextLink = soup.select('a[rel="next"]')[0]
url = 'https://xkcd.com' + nextLink.get('href')
i = i + 1
print('Done.')