
关于Python的爬取网站图片示例,基于Python 2.X
完整代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
import sys reload(sys) sys.setdefaultencoding('utf8')
import urllib import re import os import time
class : def __init__(self,url,position,regX): self.url = url self.position = position self.regX = regX
def save_page_info(self):
html = urllib.urlopen(self.url).read()
print html
pic_url = re.findall(self.regX,html,re.S)
i = 0 for item in pic_url: pic = urllib.urlopen(item)
print item if not os.path.isdir(self.position): os.makedirs(self.position) fp = open(self.position+str(i)+'.jpg','wb') fp.write(pic.read()) fp.close() time.sleep(0.5) i+=1
url = 'http://www.umei.cc/meinvtupian/' position = '/Users/mengfanxu/self/python/images/' regX = r'<img src="(.+?.jpg)" '
spider = Spider(url,position,regX) spider.save_page_info()
|
近期评论