1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
import re import requests from bs4 import BeautifulSoup
def (url): headers = { "accept-encoding":"gzip, deflate, br", "accept-language":"zh-CN,zh;q=0.9", "referer":"https://www.553ca.com/html/1/", "user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36" } html = session.get(url=url,headers=headers).text.encode('latin1').decode('utf-8') soup = BeautifulSoup(html,"html.parser") content = soup.find('div',class_='box-video-list') con_list = content.find_all('li','col-md-2 col-sm-3 col-xs-4') data = [] for list in con_list: lists = {} lists["video_pic"] = list.find('a').get('data-original') lists["video_href"] = "https://www.553ca.com"+list.find('a').get('href') lists["title"]= list.find('a').get('title') data.append(lists) return data
def devio_url(data): devio_data =[] for i in range(len(data)): list = {} devio_url = data[i]["video_href"] session = requests.session() html = session.get(devio_url).text.encode('latin1').decode('utf-8').strip() soup = BeautifulSoup(html, "html.parser") desc = soup.find_all("script")[18] pattern = re.search(' download="(.*)"', str(desc), re.S)[0] pat = re.compile('download="(.*)" target="_blank"', re.S) devio_url = pat.findall(pattern)[0] list["devio_name"] = data[i]["title"] list["devio_url"] = devio_url devio_data.append(list) return devio_data
def dewnload(devio_data): url = devio_data["devio_url"] r = requests.get(url, stream=True) try: with open('D:/爬取视频/%s.mp4'%devio_data["devio_name"], "wb") as mp4: for chunk in r.iter_content(chunk_size=1024 * 1024): if chunk: mp4.write(chunk) return "%s下载完成"%devio_data["devio_name"] except: return "%s下载失败" % devio_data["devio_name"]
if __name__ == '__main__': url = "https://www.553ca.com/html/65/" session = requests.session() data = devio_desc(url) devio_data=devio_url(data) for i in range(len(devio_data)): result=dewnload(devio_data[i]) print(result)
|
近期评论