#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 配置管理模块 """ import os from pathlib import Path from typing import List, Optional from pydantic import BaseModel, Field class AppConfig(BaseModel): """应用配置""" # 基础配置 app_name: str = "EH-Downloader" app_version: str = "1.0.0" debug: bool = False # 服务器配置 host: str = "0.0.0.0" port: int = 8000 # 数据目录配置 data_dir: str = "data" downloads_dir: str = "data/downloads" targets_file: str = "data/targets.txt" proxy_file: str = "data/proxy.txt" # 爬虫配置 concurrency: int = 20 max_page: int = 100 retry_per_page: int = 5 retry_per_image: int = 3 timeout: float = 10.0 image_timeout: float = 15.0 # 日志配置 log_level: str = "INFO" log_format: str = "[%(asctime)s] [%(levelname)s] %(message)s" # 文件清理配置 cleanup_patterns: List[str] = ["**/*.log", "**/*.json"] cleanup_exclude: List[str] = ["data/targets.txt"] def __init__(self, **kwargs): super().__init__(**kwargs) # 确保目录存在 self._ensure_directories() def _ensure_directories(self): """确保必要的目录存在""" Path(self.data_dir).mkdir(exist_ok=True) Path(self.downloads_dir).mkdir(parents=True, exist_ok=True) @property def targets_path(self) -> Path: """获取targets文件路径""" return Path(self.targets_file) @property def proxy_path(self) -> Path: """获取proxy文件路径""" return Path(self.proxy_file) def get_proxies(self) -> List[str]: """读取代理列表""" if not self.proxy_path.exists(): return ["127.0.0.1:7890"] try: with open(self.proxy_path, 'r', encoding='utf-8') as f: proxies = [line.strip() for line in f.readlines() if line.strip()] return proxies if proxies else ["127.0.0.1:7890"] except Exception: return ["127.0.0.1:7890"] def get_targets(self) -> List[str]: """读取目标URL列表""" if not self.targets_path.exists(): return [] try: with open(self.targets_path, 'r', encoding='utf-8') as f: urls = [line.strip() for line in f.readlines() if line.strip()] # 过滤掉注释行 return [url for url in urls if url and not url.startswith('#')] except Exception: return [] # 全局配置实例 config = AppConfig()