AutoInfo/to_email/chaincatcher.py

# -*- coding: utf-8 -*-
'''
网络爬虫，抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换)
'''

import sys
import os
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup

sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
from utils.utils import *


def chaincatcher_news():
    url = "https://www.chaincatcher.com/news"
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        try:
            page.goto(url)

            time.sleep(2)
            start_time = time.time()
            while time.time() - start_time < 10:
                page.mouse.wheel(0, 100)
                time.sleep(0.1)
            page_content = page.content()
            browser.close()
            soup = BeautifulSoup(page_content, 'html.parser')
            contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span]
            result = '\n'.join(contents)
            if result:
                result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d日 %H时%M分%S秒")}'

                title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                sub = 'ChainCatcher News'
                SendEmail(subject=sub, title=title, text=result).send()
                # GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message()
            else:
                print("No news found.")
        except Exception as e:
            raise e
        finally:
            browser.close()


for retry in range(5):
    try:
        chaincatcher_news()
        break
    except Exception as e:
        sleep_time = 20
        print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}")
        time.sleep(sleep_time)