1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
|
import requests from bs4 import BeautifulSoup import os import re import threading
dirs="斗图" headers ={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' }
def (): if not os.path.exists(dirs): os.mkdir(dirs) os.chdir(dirs) return True else: print("文件夹已存在") return False
def get_one_page(url): res = requests.get(url, headers=headers) content = res.content soup = BeautifulSoup(content, 'lxml') img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'}) one_page_list =[] for img in img_list: t = (img['alt'],img['data-original']) one_page_list.append(t) return one_page_list
def download_img(img,name,suf,i): with open(name+ "."+suf, 'wb+') as f: print("正在下载"+name) f.write(img.content)
if __name__ =="__main__": mkdir() for i in range(1,2605): one_page_list=get_one_page('http://www.doutula.com/photo/list/?page='+str(i)) threads=[] for j in one_page_list: name=re.sub('[/:*?"<>|_]','',j[0]) suf=j[1][-3:] img = requests.get(j[1]) t = threading.Thread(target=download_img,args=[img,name,suf,i]) threads.append(t) for i in range(0,len(threads)-1): threads[i].start() for i in range(0, len(threads) - 1): threads[i].join()
|
近期评论