1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
import requests from bs4 import BeautifulSoup import bs4 import re import zipfile
def (url): try: r = requests.get(url) r.raise_for_status r.encoding = r.apparent_encoding return r.text except Exception as e: raise e
def getHtmlTitle(html): soup = BeautifulSoup(html,"html.parser") return soup.title
def getPwd(html): soup = BeautifulSoup(html,"html.parser") links = soup.find_all('span') pwd = "" for link in links: if "百度网盘" in link.get_text() : print(link.get_text()) pwd = link.get_text() list = pwd.split(":") pwd = list[2] break return pwd
def getTitleDate(title): num = re.search( r'2017-dd-dd',title.string) date = num.group() print("最新更新日期为:"+date) return re.sub(r'D', "", date)
def getFileName(downloadHtml): soup = BeautifulSoup(downloadHtml,"html.parser") links = soup.find_all('a') return links[2]['href']
def downLoad(downloadUrl,date): print(downloadUrl) r = requests.get(downloadUrl) with open("hosts"+date+".zip", "wb") as code: code.write(r.content)
def extract(name,pwd): f = zipfile.ZipFile(name, 'r') for file in f.namelist(): f.extract(file,".",str.encode(s))
def main(): url = "https://laod.cn/hosts/2017-google-hosts.html" serverUrl = "https://iiio.io/download/"
html = getHtml(url) title = getHtmlTitle(html)
date = getTitleDate(title) serverHtml = getHtml(serverUrl + date)
fileName = getFileName(serverHtml) downloadUrl = serverUrl+date+'/'+fileName
downLoad(downloadUrl,date) print("下载完成") pwd = getPwd(html)
extract("hosts"+date+".zip",pwd)
print("任务完成")
main()
|
近期评论