1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
import requests import re from bs4 import BeautifulSoup class : def __init__(self): pass
def deals (self, html):
soup = BeautifulSoup(html,'html.parser'); print (soup.prettify()); for i in soup.find_all('div',class_ = "mod-head"): try : if i.div.p.a.get_text() == "钟俊威": print(i.div.p.a.get_text() + i.find_next_sibling().div.get_text()) except: pass def load(self, url): print(url); headers = { "Connection":"keep-alive", "User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0" }
try: response = requests.get(url,headers = headers) html = response.text; except: print("No"); self.deals(html); def main(self): url = "https://www.ctguqmx.com/question/" for i in range(300 , 400): self.load("https://www.ctguqmx.com") self.load(url + str(i)); if __name__ == '__main__': spider = Spider(); spider.main();
|
近期评论