diff --git a/main.py b/main.py index 1eb6087..eda065b 100644 --- a/main.py +++ b/main.py @@ -26,8 +26,8 @@ logger = logging.getLogger(__name__) DOWNLOADS_DIR = "downloads" MAX_FILENAME_LENGTH = 100 INVALID_FILENAME_CHARS = '<>:"/\\|?*' -MAX_CONCURRENT_DOWNLOADS = 5 # 最大并发下载数 -DOWNLOAD_TIMEOUT = 30 # 下载超时时间(秒) +MAX_CONCURRENT_DOWNLOADS = 5 +DOWNLOAD_TIMEOUT = 30 # FastAPI应用 app = FastAPI(title="eh-v2") @@ -57,39 +57,29 @@ class DownloadStatusResponse(BaseModel): # 工具函数 def setup_downloads_directory() -> Path: - """创建并返回下载目录路径""" downloads_path = Path(DOWNLOADS_DIR) downloads_path.mkdir(exist_ok=True) - logger.info(f"下载目录已准备: {downloads_path.absolute()}") return downloads_path def sanitize_filename(filename: str) -> str: - """清理文件名,移除非法字符并限制长度""" sanitized = filename for char in INVALID_FILENAME_CHARS: sanitized = sanitized.replace(char, '_') - - # 限制文件名长度 if len(sanitized) > MAX_FILENAME_LENGTH: sanitized = sanitized[:MAX_FILENAME_LENGTH] - return sanitized def create_title_directory(base_path: Path, title: str) -> Path: - """创建标题对应的目录""" safe_title = sanitize_filename(title) title_dir = base_path / safe_title title_dir.mkdir(exist_ok=True) - logger.info(f"创建标题目录: {title_dir}") return title_dir async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None: - """异步保存数据到JSON文件""" async with aiofiles.open(file_path, 'w', encoding='utf-8') as f: await f.write(json.dumps(data, ensure_ascii=False, indent=2)) def get_all_galleries() -> List[GalleryInfo]: - """获取所有画廊信息""" galleries = [] downloads_path = Path(DOWNLOADS_DIR) @@ -104,7 +94,6 @@ def get_all_galleries() -> List[GalleryInfo]: with open(data_file, 'r', encoding='utf-8') as f: data = json.load(f) - # 计算已下载的图片数量 downloaded_count = 0 if 'all_images' in data: for filename, url in data['all_images'].items(): @@ -124,38 +113,32 @@ def get_all_galleries() -> List[GalleryInfo]: return galleries async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool: - """下载单张图片 - 精简版""" async with semaphore: try: - logger.info(f"开始下载: {url}") - - if file_path.exists(): - logger.info(f"文件已存在: {file_path}") - return True - - # 第一步:获取中间页面 + # 先获取图片后缀 response = await client.get(url, timeout=DOWNLOAD_TIMEOUT) response.raise_for_status() - # 第二步:提取真实图片URL import re match = re.search(r'img id="img" src="(.*?)"', response.text) if not match: - logger.error(f"无法提取图片URL: {url}") return False real_img_url = match.group(1) - logger.info(f"真实URL: {real_img_url}") + suffix = real_img_url.split('.')[-1] + + # 创建带后缀的文件路径 + file_path_with_suffix = file_path.with_suffix('.' + suffix) + + if file_path_with_suffix.exists(): + return True - # 第三步:下载图片 img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT) img_response.raise_for_status() - # 保存图片 - async with aiofiles.open(file_path, 'wb') as f: + async with aiofiles.open(file_path_with_suffix, 'wb') as f: await f.write(img_response.content) - logger.info(f"下载完成: {file_path}") return True except Exception as e: @@ -163,7 +146,6 @@ async def download_single_image(client: httpx.AsyncClient, url: str, file_path: return False async def download_gallery_images(title: str) -> DownloadStatusResponse: - """下载指定画廊的所有图片""" safe_title = sanitize_filename(title) gallery_path = downloads_path / safe_title data_file = gallery_path / "data.json" @@ -178,7 +160,6 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: ) try: - # 读取画廊数据 async with aiofiles.open(data_file, 'r', encoding='utf-8') as f: content = await f.read() data = json.loads(content) @@ -195,7 +176,6 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: current_progress=0.0 ) - # 初始化下载状态 download_status[title] = { "downloaded": 0, "total": total_images, @@ -204,10 +184,8 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: logger.info(f"开始下载画廊 '{title}',共 {total_images} 张图片") - # 创建信号量限制并发数 semaphore = asyncio.Semaphore(MAX_CONCURRENT_DOWNLOADS) - # 使用异步HTTP客户端 async with httpx.AsyncClient( headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' @@ -215,12 +193,10 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: follow_redirects=True ) as client: - # 准备下载任务 tasks = [] for filename, url in all_images.items(): image_path = gallery_path / filename - # 如果图片已存在,跳过下载但计入完成数量 if image_path.exists(): download_status[title]["downloaded"] += 1 continue @@ -228,26 +204,20 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: task = download_single_image(client, url, image_path, semaphore) tasks.append(task) - # 批量执行下载任务 if tasks: results = await asyncio.gather(*tasks, return_exceptions=True) - - # 统计成功下载的数量 successful_downloads = sum(1 for result in results if result is True) download_status[title]["downloaded"] += successful_downloads - # 更新最终状态 downloaded_count = download_status[title]["downloaded"] progress = (downloaded_count / total_images) * 100 if downloaded_count == total_images: download_status[title]["status"] = "completed" message = f"下载完成!共下载 {downloaded_count}/{total_images} 张图片" - logger.info(f"画廊 '{title}' {message}") else: download_status[title]["status"] = "partial" message = f"部分完成!下载 {downloaded_count}/{total_images} 张图片" - logger.warning(f"画廊 '{title}' {message}") return DownloadStatusResponse( status="success", @@ -272,28 +242,25 @@ async def download_gallery_images(title: str) -> DownloadStatusResponse: ) async def download_all_pending_galleries(): - """下载所有未完成的画廊""" galleries = get_all_galleries() pending_galleries = [g for g in galleries if g.downloaded_images < g.total_images] + logger.info(f"找到 {len(pending_galleries)} 个待下载画廊") + if not pending_galleries: logger.info("没有待下载的画廊") return - logger.info(f"开始批量下载 {len(pending_galleries)} 个画廊") - for gallery in pending_galleries: - if gallery.downloaded_images < gallery.total_images: - logger.info(f"开始下载画廊: {gallery.title}") - result = await download_gallery_images(gallery.title) - - if result.status == "success": - logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}") - else: - logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}") - - # 添加延迟避免请求过于频繁 - await asyncio.sleep(1) + logger.info(f"开始下载画廊: {gallery.title}") + result = await download_gallery_images(gallery.title) + + if result.status == "success": + logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}") + else: + logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}") + + await asyncio.sleep(1) logger.info("批量下载任务完成") @@ -301,9 +268,35 @@ async def download_all_pending_galleries(): downloads_path = setup_downloads_directory() # API路由 +@app.post("/save_url") +@app.options("/save_url") +async def save_url_data(request: SaveDataRequest = None): + if not request: + return {"status": "ok"} + + try: + title_dir = create_title_directory(downloads_path, request.title) + data_file = title_dir / "data.json" + await save_data_to_file(data_file, { + "url": request.url, + "title": request.title, + "all_images": request.all_images, + "total_images": request.total_images + }) + + logger.info(f"成功保存数据: {request.title}") + return { + "status": "success", + "message": f"数据保存成功,共 {request.total_images} 张图片", + "path": str(title_dir) + } + + except Exception as e: + logger.error(f"保存数据失败: {e}") + raise HTTPException(status_code=500, detail=f"保存失败: {str(e)}") + @app.get("/", response_class=HTMLResponse) async def read_gallery_manager(): - """画廊管理页面""" return """ @@ -312,193 +305,24 @@ async def read_gallery_manager():
管理您的画廊下载任务
-