爬取一个页面的所有图片

以上次的亚马逊一个商品页面为例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import re
import os
from bs4 import BeautifulSoup

def (url):
try:
kv = {'user-agent':'Mozilla/5.0'}
r = requests.get(url, timeout=30, headers = kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "Error"

def savePic(text):
root = "D://pics//"
picurls = re.findall('https?://.+?.(?:jpg|png|gif)',text)
for url in picurls:
print(url)
for url in picurls:
path = root + url.split('/')[-1]
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(url)
with open(path,'wb') as f:
f.write(r.content)
f.close()
print("保存成功")
else:
print("文件已存在")
except:
print("爬取失败")


def main():
url = "https://www.amazon.cn/dp/B079FLYB49"
text = getHTMLText(url)
savePic(text)
print("finish")

main()

笔者比较懒就不解释了,大多数代码其实前面就出现过