python爬虫笔记

最简单的爬虫

获取标题, 并捕获异常
我爬我自己

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
def (url):
try:
html = urlopen(url)
except HTTPError as e:
return None
try:
bsObj = BeautifulSoup(html.read())
title = bsObj.body.h1
except AttributeError as e:
return None
tmp = bsObj.findAll("", {"class": "headerlink"})
for obj in tmp:
print(obj)
return title.get_text()



tit = getTitle("https://beiyuouo.github.io/2019/05/26/note-matlab/")
if tit != None:
print(tit)
else:
print("None")