You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
auto/utils/utils_check_base.py

68 lines
2.4 KiB

# -*- coding: utf-8 -*-
"""
消息模块基础, 用于打开浏览器等相关操作
"""
import random
from playwright.sync_api import sync_playwright
import sys
import os
import time
sys.path.append(os.path.join(os.path.abspath(__file__).split('auto')[0] + 'auto'))
from utils.utils_logs_handle import LogsHandle
class CryptoCrawler:
def __init__(self, url_list, selectors, check_difference=False, headless=True, proxy=False):
self.url_list = url_list
self.selectors = selectors
self.check_difference = check_difference # 用于检测数据是否发生变化 (开关)
self.data_difference = False # 用于检测数据是否发生变化 (结果) (默认 否)
self.logs_handle = LogsHandle() # 记录日志
self.db = 'CHECK'
self.collection = 'check'
self.headless = headless
self.proxy = proxy
def main(self):
with sync_playwright() as playwright:
if self.proxy:
browser = playwright.webkit.launch(headless=self.headless, proxy={'server': '127.0.0.1:7890'})
else:
browser = playwright.webkit.launch(headless=self.headless)
context = browser.new_context(viewport={'width': 1920, 'height': 1080})
page = context.new_page()
all_data = []
for url_info in self.url_list:
for key, url in url_info.items():
result_list = []
try:
page.goto(url)
page.wait_for_load_state('load')
time.sleep(5) # 确保页面完全加载
for selector in self.selectors:
element = page.query_selector(selector)
if element:
res = element.text_content().strip()
result_list.append({key: res})
except Exception as e:
err_str = f"Error fetching {url}: {e}"
self.logs_handle.logs_write(self.collection, err_str, 'error', False)
continue
if result_list:
all_data.append(result_list)
time.sleep(random.randint(1, 3))
browser.close()
if all_data:
return all_data
else:
return None