1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
|
import requests import re import os from bs4 import BeautifulSoup
def (url): try: kv = {'user-agent':'Mozilla/5.0'} r = requests.get(url, timeout=30, headers = kv) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "Error"
def savePic(text): root = "D://pics//" picurls = re.findall('https?://.+?.(?:jpg|png|gif)',text) for url in picurls: print(url) for url in picurls: path = root + url.split('/')[-1] try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path): r = requests.get(url) with open(path,'wb') as f: f.write(r.content) f.close() print("保存成功") else: print("文件已存在") except: print("爬取失败")
def main(): url = "https://www.amazon.cn/dp/B079FLYB49" text = getHTMLText(url) savePic(text) print("finish")
main()
|
近期评论