You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
eh-v2/main.py

418 lines
14 KiB

# main.py
import os
import json
import logging
from pathlib import Path
from typing import Dict, Any, List
import asyncio
import httpx
import shutil
import aiofiles
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
MAX_CONCURRENT_DOWNLOADS = 5
DOWNLOAD_TIMEOUT = 30
# FastAPI应用
app = FastAPI(title="eh-v2")
# 全局变量用于跟踪下载状态
download_status: Dict[str, Dict[str, Any]] = {}
# 数据模型
class SaveDataRequest(BaseModel):
url: str
title: str
all_images: Dict[str, str]
total_images: int
class GalleryInfo(BaseModel):
title: str
path: str
total_images: int
downloaded_images: int
class DownloadStatusResponse(BaseModel):
status: str
message: str
downloaded: int
total: int
current_progress: float
# 工具函数
def setup_downloads_directory() -> Path:
downloads_path = Path(DOWNLOADS_DIR)
downloads_path.mkdir(exist_ok=True)
return downloads_path
def sanitize_filename(filename: str) -> str:
sanitized = filename
for char in INVALID_FILENAME_CHARS:
sanitized = sanitized.replace(char, '_')
if len(sanitized) > MAX_FILENAME_LENGTH:
sanitized = sanitized[:MAX_FILENAME_LENGTH]
return sanitized
def create_title_directory(base_path: Path, title: str) -> Path:
safe_title = sanitize_filename(title)
title_dir = base_path / safe_title
title_dir.mkdir(exist_ok=True)
return title_dir
async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(json.dumps(data, ensure_ascii=False, indent=2))
def get_all_galleries() -> List[GalleryInfo]:
galleries = []
downloads_path = Path(DOWNLOADS_DIR)
if not downloads_path.exists():
return galleries
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
downloaded_count = 0
if 'all_images' in data:
# 获取目录下所有图片文件
image_files = list(gallery_dir.glob("*.*"))
image_filenames = {file.stem for file in image_files if file.is_file() and file.name != "data.json"}
# 检查JSON中每个图片是否有对应的实际文件(忽略后缀名)
for filename in data['all_images'].keys():
# 移除可能的扩展名(如果有的话),只比较文件名主体
filename_stem = Path(filename).stem
if filename_stem in image_filenames:
downloaded_count += 1
# 只显示未完成的任务(下载进度不是100%的)
if downloaded_count < data.get('total_images', 0):
galleries.append(GalleryInfo(
title=data.get('title', gallery_dir.name),
path=str(gallery_dir),
total_images=data.get('total_images', 0),
downloaded_images=downloaded_count
))
except Exception as e:
logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
return galleries
async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool:
async with semaphore:
try:
# 先获取图片后缀
response = await client.get(url, timeout=DOWNLOAD_TIMEOUT)
response.raise_for_status()
import re
match = re.search(r'img id="img" src="(.*?)"', response.text)
if not match:
return False
real_img_url = match.group(1)
suffix = real_img_url.split('.')[-1]
# 创建带后缀的文件路径
file_path_with_suffix = file_path.with_suffix('.' + suffix)
# 检查是否已存在(考虑所有可能的扩展名)
if check_image_exists(file_path):
return True
img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT)
img_response.raise_for_status()
async with aiofiles.open(file_path_with_suffix, 'wb') as f:
await f.write(img_response.content)
return True
except Exception as e:
logger.error(f"下载失败 {url}: {e}")
return False
def check_image_exists(file_path: Path) -> bool:
"""检查图片文件是否存在(忽略扩展名)"""
if file_path.exists():
return True
# 检查是否有相同文件名但不同扩展名的文件
parent_dir = file_path.parent
stem = file_path.stem
# 常见的图片扩展名
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
for ext in image_extensions:
potential_file = parent_dir / f"{stem}{ext}"
if potential_file.exists():
return True
return False
async def download_gallery_images(title: str) -> DownloadStatusResponse:
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
data_file = gallery_path / "data.json"
if not data_file.exists():
return DownloadStatusResponse(
status="error",
message="画廊数据文件不存在",
downloaded=0,
total=0,
current_progress=0.0
)
try:
async with aiofiles.open(data_file, 'r', encoding='utf-8') as f:
content = await f.read()
data = json.loads(content)
all_images = data.get('all_images', {})
total_images = len(all_images)
if total_images == 0:
return DownloadStatusResponse(
status="error",
message="没有可下载的图片",
downloaded=0,
total=0,
current_progress=0.0
)
download_status[title] = {
"downloaded": 0,
"total": total_images,
"status": "downloading"
}
logger.info(f"开始下载画廊 '{title}',共 {total_images} 张图片")
semaphore = asyncio.Semaphore(MAX_CONCURRENT_DOWNLOADS)
async with httpx.AsyncClient(
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
},
follow_redirects=True
) as client:
tasks = []
for filename, url in all_images.items():
image_path = gallery_path / filename
# 使用新的检查方法,忽略扩展名
if check_image_exists(image_path):
download_status[title]["downloaded"] += 1
continue
task = download_single_image(client, url, image_path, semaphore)
tasks.append(task)
if tasks:
results = await asyncio.gather(*tasks, return_exceptions=True)
successful_downloads = sum(1 for result in results if result is True)
download_status[title]["downloaded"] += successful_downloads
downloaded_count = download_status[title]["downloaded"]
progress = (downloaded_count / total_images) * 100
if downloaded_count == total_images:
download_status[title]["status"] = "completed"
message = f"下载完成!共下载 {downloaded_count}/{total_images} 张图片"
else:
download_status[title]["status"] = "partial"
message = f"部分完成!下载 {downloaded_count}/{total_images} 张图片"
return DownloadStatusResponse(
status="success",
message=message,
downloaded=downloaded_count,
total=total_images,
current_progress=progress
)
except Exception as e:
logger.error(f"下载画廊 '{title}' 时发生错误: {e}")
download_status[title] = {
"status": "error",
"message": str(e)
}
return DownloadStatusResponse(
status="error",
message=f"下载失败: {str(e)}",
downloaded=0,
total=0,
current_progress=0.0
)
async def download_all_pending_galleries():
galleries = get_all_galleries()
pending_galleries = [g for g in galleries if g.downloaded_images < g.total_images]
logger.info(f"找到 {len(pending_galleries)} 个待下载画廊")
if not pending_galleries:
logger.info("没有待下载的画廊")
return
for gallery in pending_galleries:
logger.info(f"开始下载画廊: {gallery.title}")
result = await download_gallery_images(gallery.title)
if result.status == "success":
logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}")
else:
logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}")
await asyncio.sleep(1)
logger.info("批量下载任务完成")
def delete_completed_json_files():
"""删除已完成任务的JSON文件"""
downloads_path = Path(DOWNLOADS_DIR)
deleted_count = 0
if not downloads_path.exists():
return deleted_count
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# 检查是否所有图片都已下载
downloaded_count = 0
if 'all_images' in data:
image_files = list(gallery_dir.glob("*.*"))
image_filenames = {file.stem for file in image_files if file.is_file() and file.name != "data.json"}
for filename in data['all_images'].keys():
filename_stem = Path(filename).stem
if filename_stem in image_filenames:
downloaded_count += 1
total_images = len(data.get('all_images', {}))
# 如果所有图片都已下载,删除JSON文件
if downloaded_count == total_images and total_images > 0:
data_file.unlink()
deleted_count += 1
logger.info(f"已删除已完成任务的JSON文件: {gallery_dir.name}")
except Exception as e:
logger.error(f"处理画廊目录失败 {gallery_dir}: {e}")
return deleted_count
# 初始化
downloads_path = setup_downloads_directory()
# API路由
@app.post("/save_url")
@app.options("/save_url")
async def save_url_data(request: SaveDataRequest = None):
if not request:
return {"status": "ok"}
try:
title_dir = create_title_directory(downloads_path, request.title)
data_file = title_dir / "data.json"
await save_data_to_file(data_file, {
"url": request.url,
"title": request.title,
"all_images": request.all_images,
"total_images": request.total_images
})
logger.info(f"成功保存数据: {request.title}")
return {
"status": "success",
"message": f"数据保存成功,共 {request.total_images} 张图片",
"path": str(title_dir)
}
except Exception as e:
logger.error(f"保存数据失败: {e}")
raise HTTPException(status_code=500, detail=f"保存失败: {str(e)}")
@app.get("/")
async def read_gallery_manager():
return FileResponse("index.html")
@app.get("/api/galleries")
async def get_galleries():
galleries = get_all_galleries()
return galleries
@app.post("/api/download/all")
async def download_all_galleries(background_tasks: BackgroundTasks):
background_tasks.add_task(download_all_pending_galleries)
return {
"status": "success",
"message": "开始批量下载所有未完成的画廊"
}
@app.post("/api/download/{title}")
async def download_gallery(title: str, background_tasks: BackgroundTasks):
background_tasks.add_task(download_gallery_images, title)
return {
"status": "success",
"message": f"开始下载画廊: {title}",
"title": title
}
@app.post("/api/cleanup")
async def cleanup_completed_galleries():
"""清理已完成任务的JSON文件"""
try:
deleted_count = delete_completed_json_files()
return {
"status": "success",
"message": f"成功删除 {deleted_count} 个已完成任务的JSON文件",
"deleted_count": deleted_count
}
except Exception as e:
logger.error(f"清理JSON文件失败: {e}")
raise HTTPException(status_code=500, detail=f"清理失败: {str(e)}")
@app.get("/health")
async def health_check():
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=5100,
reload=True
)