You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
92 lines
2.6 KiB
92 lines
2.6 KiB
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
配置管理模块
|
|
"""
|
|
import os
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class AppConfig(BaseModel):
|
|
"""应用配置"""
|
|
# 基础配置
|
|
app_name: str = "EH-Downloader"
|
|
app_version: str = "1.0.0"
|
|
debug: bool = False
|
|
|
|
# 服务器配置
|
|
host: str = "0.0.0.0"
|
|
port: int = 8000
|
|
|
|
# 数据目录配置
|
|
data_dir: str = "data"
|
|
downloads_dir: str = "data/downloads"
|
|
targets_file: str = "data/targets.txt"
|
|
proxy_file: str = "data/proxy.txt"
|
|
|
|
# 爬虫配置
|
|
concurrency: int = 20
|
|
max_page: int = 100
|
|
retry_per_page: int = 5
|
|
retry_per_image: int = 3
|
|
timeout: float = 10.0
|
|
image_timeout: float = 15.0
|
|
|
|
# 日志配置
|
|
log_level: str = "INFO"
|
|
log_format: str = "[%(asctime)s] [%(levelname)s] %(message)s"
|
|
|
|
# 文件清理配置
|
|
cleanup_patterns: List[str] = ["**/*.log", "**/*.json"]
|
|
cleanup_exclude: List[str] = ["data/targets.txt"]
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
# 确保目录存在
|
|
self._ensure_directories()
|
|
|
|
def _ensure_directories(self):
|
|
"""确保必要的目录存在"""
|
|
Path(self.data_dir).mkdir(exist_ok=True)
|
|
Path(self.downloads_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
@property
|
|
def targets_path(self) -> Path:
|
|
"""获取targets文件路径"""
|
|
return Path(self.targets_file)
|
|
|
|
@property
|
|
def proxy_path(self) -> Path:
|
|
"""获取proxy文件路径"""
|
|
return Path(self.proxy_file)
|
|
|
|
def get_proxies(self) -> List[str]:
|
|
"""读取代理列表"""
|
|
if not self.proxy_path.exists():
|
|
return ["127.0.0.1:7890"]
|
|
|
|
try:
|
|
with open(self.proxy_path, 'r', encoding='utf-8') as f:
|
|
proxies = [line.strip() for line in f.readlines() if line.strip()]
|
|
return proxies if proxies else ["127.0.0.1:7890"]
|
|
except Exception:
|
|
return ["127.0.0.1:7890"]
|
|
|
|
def get_targets(self) -> List[str]:
|
|
"""读取目标URL列表"""
|
|
if not self.targets_path.exists():
|
|
return []
|
|
|
|
try:
|
|
with open(self.targets_path, 'r', encoding='utf-8') as f:
|
|
urls = [line.strip() for line in f.readlines() if line.strip()]
|
|
# 过滤掉注释行
|
|
return [url for url in urls if url and not url.startswith('#')]
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
# 全局配置实例
|
|
config = AppConfig()
|
|
|