python爬取图片

关于Python的爬取网站图片示例,基于Python 2.X


完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

## -*- coding: UTF-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf8')

import urllib
import re
import os
import time


class :
def __init__(self,url,position,regX):
self.url = url
self.position = position
self.regX = regX

def save_page_info(self):

html = urllib.urlopen(self.url).read()

print html

pic_url = re.findall(self.regX,html,re.S)

i = 0
for item in pic_url:
pic = urllib.urlopen(item)

print item
# 文件夹不存在则创建
if not os.path.isdir(self.position):
os.makedirs(self.position)
fp = open(self.position+str(i)+'.jpg','wb')
fp.write(pic.read())
fp.close()
time.sleep(0.5)
i+=1

url = 'http://www.umei.cc/meinvtupian/'
position = '/Users/mengfanxu/self/python/images/'
regX = r'<img src="(.+?.jpg)" '

spider = Spider(url,position,regX)
spider.save_page_info()