You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
55 lines
1.9 KiB
55 lines
1.9 KiB
# -*- coding: utf-8 -*-
|
|
'''
|
|
网络爬虫,抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换)
|
|
'''
|
|
|
|
import sys
|
|
import os
|
|
from playwright.sync_api import sync_playwright
|
|
from bs4 import BeautifulSoup
|
|
|
|
sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
|
|
from utils.utils import *
|
|
|
|
|
|
def chaincatcher_news():
|
|
url = "https://www.chaincatcher.com/news"
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page()
|
|
try:
|
|
page.goto(url)
|
|
|
|
time.sleep(2)
|
|
start_time = time.time()
|
|
while time.time() - start_time < 10:
|
|
page.mouse.wheel(0, 100)
|
|
time.sleep(0.1)
|
|
page_content = page.content()
|
|
browser.close()
|
|
soup = BeautifulSoup(page_content, 'html.parser')
|
|
contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span]
|
|
result = '\n'.join(contents)
|
|
if result:
|
|
result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d日 %H时%M分%S秒")}'
|
|
|
|
title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
|
sub = 'ChainCatcher News'
|
|
SendEmail(subject=sub, title=title, text=result).send()
|
|
# GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message()
|
|
else:
|
|
print("No news found.")
|
|
except Exception as e:
|
|
raise e
|
|
finally:
|
|
browser.close()
|
|
|
|
|
|
for retry in range(5):
|
|
try:
|
|
chaincatcher_news()
|
|
break
|
|
except Exception as e:
|
|
sleep_time = 20
|
|
print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}")
|
|
time.sleep(sleep_time)
|
|
|