首页 > itarticle > python爬虫笔记

python爬虫笔记

admin 11月 14, 2020 0

最简单的爬虫

获取标题，并捕获异常
我爬我自己

from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
def (url):
    try:
        html = urlopen(url)
    except HTTPError as e:
        return None
    try:
        bsObj = BeautifulSoup(html.read())
        title = bsObj.body.h1
    except AttributeError as e:
        return None
    tmp = bsObj.findAll("", {"class": "headerlink"})
    for obj in tmp:
        print(obj)
    return title.get_text()



tit = getTitle("https://beiyuouo.github.io/2019/05/26/note-matlab/")
if tit != None:
    print(tit)
else:
    print("None")