# -*- coding: utf-8 -*- ''' 网络爬虫,抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换) ''' import sys import os from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo')) from utils.utils import * def chaincatcher_news(): url = "https://www.chaincatcher.com/news" with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() try: page.goto(url) time.sleep(2) start_time = time.time() while time.time() - start_time < 10: page.mouse.wheel(0, 100) time.sleep(0.1) page_content = page.content() browser.close() soup = BeautifulSoup(page_content, 'html.parser') contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span] result = '\n'.join(contents) if result: result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d日 %H时%M分%S秒")}' title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) sub = 'ChainCatcher News' SendEmail(subject=sub, title=title, text=result).send() # GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message() else: print("No news found.") except Exception as e: raise e finally: browser.close() for retry in range(5): try: chaincatcher_news() break except Exception as e: sleep_time = 20 print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}") time.sleep(sleep_time)