
原因
最近在学习python,可以做一些好玩的事情。
知乎上http://www.zhihu.com/question/35874887这个答案分享了1000+图片,都是华丽的表情图片,也是蛮拼,网页打开都卡死,正好做示范抓取图片使用
源码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
import urllib.request as request import urllib.parse as parse import string import re import os import urllib.error as error
def (url): count=0 m = request.urlopen(url).read()
dirpath = 'C:/Users/Administrator/Desktop/python_data/' dirname = 'question/35874887' new_path = os.path.join(dirpath, dirname) if not os.path.isdir(new_path): os.makedirs(new_path) page_data = m.decode('gbk','ignore') print (page_data) page_image = re.compile('<img src="(.+?)"') for image in page_image.findall(page_data): pattern = re.compile(r'^https://.*.jpg$') if pattern.match(image): print (image) try: image_data = request.urlopen(image).read() image_path = dirpath + dirname +'/'+str(count)+'.jpg' count += 1 print(image_path) with open(image_path, 'wb') as image_file: image_file.write(image_data) image_file.close() except error.URLError as e: print('Download failed') if __name__ == "__main__": url = "http://www.zhihu.com/question/35874887"
baidu_zhihu(url)
|
近期评论