You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
AutoInfo/to_email/chaincatcher.py

55 lines
1.9 KiB

# -*- coding: utf-8 -*-
'''
网络爬虫,抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换)
'''
import sys
import os
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
from utils.utils import *
def chaincatcher_news():
url = "https://www.chaincatcher.com/news"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
try:
page.goto(url)
time.sleep(2)
start_time = time.time()
while time.time() - start_time < 10:
page.mouse.wheel(0, 100)
time.sleep(0.1)
page_content = page.content()
browser.close()
soup = BeautifulSoup(page_content, 'html.parser')
contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span]
result = '\n'.join(contents)
if result:
result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d%H时%M分%S秒")}'
title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
sub = 'ChainCatcher News'
SendEmail(subject=sub, title=title, text=result).send()
# GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message()
else:
print("No news found.")
except Exception as e:
raise e
finally:
browser.close()
for retry in range(5):
try:
chaincatcher_news()
break
except Exception as e:
sleep_time = 20
print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}")
time.sleep(sleep_time)