1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
|
def (link): po = urlparse(link)
file_url = '{scheme}://{netloc}/favicon.ico'.format(scheme=po.scheme, netloc=po.netloc) favicon_name = 'favicon/{url}_favicon.ico'.format(url=po.netloc.replace('.', '_')) file_path = '{root}/{name}'.format(root=settings.MEDIA_ROOT, name=favicon_name) print(file_url, file_path) r = requests.get(file_url, stream=True) if r.status_code == 200: with open(file_path, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) return favicon_name elif r.status_code == 404: page = requests.get(link) print(page.text)
soup = BeautifulSoup(page.text) icon_link = soup.find("link", rel="shortcut icon") if not icon_link: icon_link = soup.find("link", rel="icon") if icon_link: print(icon_link['href']) icon_src = icon_link['href'] if not icon_link['href'].startswith('http'): icon_src = urljoin(link, icon_src) r2 = requests.get(icon_src, stream=True)
if r2.status_code == 200: with open(file_path, 'wb') as f: r2.raw.decode_content = True shutil.copyfileobj(r2.raw, f) return favicon_name
|
近期评论