From e95e9d000be39961459065a538f74b0fa4f91759 Mon Sep 17 00:00:00 2001 From: Jack Date: Sun, 23 Nov 2025 19:20:34 +0800 Subject: [PATCH] update --- main.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/main.py b/main.py index 23da0f4..1eb6087 100644 --- a/main.py +++ b/main.py @@ -124,34 +124,40 @@ def get_all_galleries() -> List[GalleryInfo]: return galleries async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool: - """下载单张图片""" + """下载单张图片 - 精简版""" async with semaphore: try: - logger.info(f"开始下载: {url} -> {file_path}") + logger.info(f"开始下载: {url}") - # 如果文件已存在,跳过下载 if file_path.exists(): - logger.info(f"文件已存在,跳过: {file_path}") + logger.info(f"文件已存在: {file_path}") return True - # 发送请求下载图片 - async with client.stream('GET', url, timeout=DOWNLOAD_TIMEOUT) as response: - response.raise_for_status() - - # 异步写入文件 - async with aiofiles.open(file_path, 'wb') as f: - async for chunk in response.aiter_bytes(): - await f.write(chunk) + # 第一步:获取中间页面 + response = await client.get(url, timeout=DOWNLOAD_TIMEOUT) + response.raise_for_status() + + # 第二步:提取真实图片URL + import re + match = re.search(r'img id="img" src="(.*?)"', response.text) + if not match: + logger.error(f"无法提取图片URL: {url}") + return False + + real_img_url = match.group(1) + logger.info(f"真实URL: {real_img_url}") + + # 第三步:下载图片 + img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT) + img_response.raise_for_status() + + # 保存图片 + async with aiofiles.open(file_path, 'wb') as f: + await f.write(img_response.content) logger.info(f"下载完成: {file_path}") return True - except httpx.TimeoutException: - logger.error(f"下载超时: {url}") - return False - except httpx.HTTPStatusError as e: - logger.error(f"HTTP错误 {e.response.status_code}: {url}") - return False except Exception as e: logger.error(f"下载失败 {url}: {e}") return False