python + selenium 爬取选股宝首页新闻

利用Python + Selenium 爬取选股宝首页新闻及利好,利空消息.
直接附上源码:
from selenium import webdriver
import time


class ():
    def __init__(self):
        self.start_url = "https://xuangubao.cn/"
        self.driver = webdriver.Chrome()

    def get_content_lsit(self):
        """提取页面内容"""
        next_page = self.driver.find_elements_by_xpath("//span[@class='home-news-footer-loadmore']")
        next_page = next_page[0] if next_page else None

        while next_page:
            next_page.click()
            time.sleep(1)

        
        # i = 0
        # while i < 10 and next_page:
        #     next_page.click()
        #     time.sleep(1)
        #     i += 1

        li_list = self.driver.find_elements_by_xpath("//ul[@class='home-news-container']/li")
        content_list = []
        bear_list = []
        for li in li_list:
            item = {}
            item['id'] = li.get_attribute('id')
            title = li.find_elements_by_xpath(".//div[@class='news-item-title']/a/span")
            title = title[0].text if title else None
            item['title'] = title
            detail = li.find_elements_by_xpath(".//div[@class='news-item-detail']/pre")
            detail = detail[0].text if detail else None
            item['detail'] = detail
            # 利空消息
            bear = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bear']")
            if bear:
                print('found news of bear.')
                tag_list = []
                tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']")
                for tag in tags:
                    tag_list.append(tag.text)
                item['isBear'] = True
                item['tags'] = tag_list
            # 利好消息
            bullish = li.find_elements_by_xpath(".//span[@class='bullish-and-bear bullish']")
            if bear:
                print('found news of bullish.')
                tag_list = []
                tags = li.find_elements_by_xpath(".//a[@class='news-item-intro-topic']")
                for tag in tags:
                    tag_list.append(tag.text)
                item['isBullish'] = True
                item['tags'] = tag_list
            print(item)

            content_list.append(item)

        return content_list

    def run(self):
        # 发送请求
        self.driver.get(self.start_url)
        content_list = self.get_content_lsit()
        self.driver.quit()


if __name__ == '__main__':
    xuangubao = XuanGuBao()
    xuangubao.run()
python + selenium 爬取选股宝首页新闻

近期文章

近期评论

标签

热门

文章归档

分类目录

功能