利用Python + Selenium 爬取选股宝首页新闻及利好,利空消息.
直接附上源码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
from selenium import webdriver import time
class (): def __init__(self): self.start_url = "https://xuangubao.cn/" self.driver = webdriver.Chrome()
def get_content_lsit(self): """提取页面内容""" next_page = self.driver.find_elements_by_xpath("//span[@class='home-news-footer-loadmore']") next_page = next_page[0] if next_page else None
while next_page: next_page.click() time.sleep(1)
li_list = self.driver.find_elements_by_xpath("//ul[@class='home-news-container']/li") content_list = [] bear_list = [] for li in li_list: item = {} item['id'] = li.get_attribute('id') title = li.find_elements_by_xpath(".//div[@class='news-item-title']/a/span") title = title[0].text if title else None item['title'] = title detail = li.find_elements_by_xpath(".//div[@class='news-item-detail']/pre") detail = detail[0].text if detail else None item['detail'] = detail bear = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bear']") if bear: print('found news of bear.') tag_list = [] tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']") for tag in tags: tag_list.append(tag.text) item['isBear'] = True item['tags'] = tag_list bullish = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bullish']") if bear: print('found news of bullish.') tag_list = [] tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']") for tag in tags: tag_list.append(tag.text) item['isBullish'] = True item['tags'] = tag_list print(item)
content_list.append(item)
return content_list
def run(self): self.driver.get(self.start_url) content_list = self.get_content_lsit() self.driver.quit()
if __name__ == '__main__': xuangubao = XuanGuBao() xuangubao.run()
|
近期评论