You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
eh-v2/main.py

453 lines
17 KiB

# main.py
import os
import json
import logging
from pathlib import Path
from typing import Dict, Any, List
import asyncio
import httpx
import aiofiles
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
MAX_CONCURRENT_DOWNLOADS = 5
DOWNLOAD_TIMEOUT = 30
# FastAPI应用
app = FastAPI(title="eh-v2")
# 全局变量用于跟踪下载状态
download_status: Dict[str, Dict[str, Any]] = {}
# 数据模型
class SaveDataRequest(BaseModel):
url: str
title: str
all_images: Dict[str, str]
total_images: int
class GalleryInfo(BaseModel):
title: str
path: str
total_images: int
downloaded_images: int
class DownloadStatusResponse(BaseModel):
status: str
message: str
downloaded: int
total: int
current_progress: float
# 工具函数
def setup_downloads_directory() -> Path:
downloads_path = Path(DOWNLOADS_DIR)
downloads_path.mkdir(exist_ok=True)
return downloads_path
def sanitize_filename(filename: str) -> str:
sanitized = filename
for char in INVALID_FILENAME_CHARS:
sanitized = sanitized.replace(char, '_')
if len(sanitized) > MAX_FILENAME_LENGTH:
sanitized = sanitized[:MAX_FILENAME_LENGTH]
return sanitized
def create_title_directory(base_path: Path, title: str) -> Path:
safe_title = sanitize_filename(title)
title_dir = base_path / safe_title
title_dir.mkdir(exist_ok=True)
return title_dir
async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(json.dumps(data, ensure_ascii=False, indent=2))
def get_all_galleries() -> List[GalleryInfo]:
galleries = []
downloads_path = Path(DOWNLOADS_DIR)
if not downloads_path.exists():
return galleries
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
downloaded_count = 0
if 'all_images' in data:
for filename, url in data['all_images'].items():
image_path = gallery_dir / filename
if image_path.exists():
downloaded_count += 1
galleries.append(GalleryInfo(
title=data.get('title', gallery_dir.name),
path=str(gallery_dir),
total_images=data.get('total_images', 0),
downloaded_images=downloaded_count
))
except Exception as e:
logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
return galleries
async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool:
async with semaphore:
try:
# 先获取图片后缀
response = await client.get(url, timeout=DOWNLOAD_TIMEOUT)
response.raise_for_status()
import re
match = re.search(r'img id="img" src="(.*?)"', response.text)
if not match:
return False
real_img_url = match.group(1)
suffix = real_img_url.split('.')[-1]
# 创建带后缀的文件路径
file_path_with_suffix = file_path.with_suffix('.' + suffix)
if file_path_with_suffix.exists():
return True
img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT)
img_response.raise_for_status()
async with aiofiles.open(file_path_with_suffix, 'wb') as f:
await f.write(img_response.content)
return True
except Exception as e:
logger.error(f"下载失败 {url}: {e}")
return False
async def download_gallery_images(title: str) -> DownloadStatusResponse:
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
data_file = gallery_path / "data.json"
if not data_file.exists():
return DownloadStatusResponse(
status="error",
message="画廊数据文件不存在",
downloaded=0,
total=0,
current_progress=0.0
)
try:
async with aiofiles.open(data_file, 'r', encoding='utf-8') as f:
content = await f.read()
data = json.loads(content)
all_images = data.get('all_images', {})
total_images = len(all_images)
if total_images == 0:
return DownloadStatusResponse(
status="error",
message="没有可下载的图片",
downloaded=0,
total=0,
current_progress=0.0
)
download_status[title] = {
"downloaded": 0,
"total": total_images,
"status": "downloading"
}
logger.info(f"开始下载画廊 '{title}',共 {total_images} 张图片")
semaphore = asyncio.Semaphore(MAX_CONCURRENT_DOWNLOADS)
async with httpx.AsyncClient(
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
},
follow_redirects=True
) as client:
tasks = []
for filename, url in all_images.items():
image_path = gallery_path / filename
if image_path.exists():
download_status[title]["downloaded"] += 1
continue
task = download_single_image(client, url, image_path, semaphore)
tasks.append(task)
if tasks:
results = await asyncio.gather(*tasks, return_exceptions=True)
successful_downloads = sum(1 for result in results if result is True)
download_status[title]["downloaded"] += successful_downloads
downloaded_count = download_status[title]["downloaded"]
progress = (downloaded_count / total_images) * 100
if downloaded_count == total_images:
download_status[title]["status"] = "completed"
message = f"下载完成!共下载 {downloaded_count}/{total_images} 张图片"
else:
download_status[title]["status"] = "partial"
message = f"部分完成!下载 {downloaded_count}/{total_images} 张图片"
return DownloadStatusResponse(
status="success",
message=message,
downloaded=downloaded_count,
total=total_images,
current_progress=progress
)
except Exception as e:
logger.error(f"下载画廊 '{title}' 时发生错误: {e}")
download_status[title] = {
"status": "error",
"message": str(e)
}
return DownloadStatusResponse(
status="error",
message=f"下载失败: {str(e)}",
downloaded=0,
total=0,
current_progress=0.0
)
async def download_all_pending_galleries():
galleries = get_all_galleries()
pending_galleries = [g for g in galleries if g.downloaded_images < g.total_images]
logger.info(f"找到 {len(pending_galleries)} 个待下载画廊")
if not pending_galleries:
logger.info("没有待下载的画廊")
return
for gallery in pending_galleries:
logger.info(f"开始下载画廊: {gallery.title}")
result = await download_gallery_images(gallery.title)
if result.status == "success":
logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}")
else:
logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}")
await asyncio.sleep(1)
logger.info("批量下载任务完成")
# 初始化
downloads_path = setup_downloads_directory()
# API路由
@app.post("/save_url")
@app.options("/save_url")
async def save_url_data(request: SaveDataRequest = None):
if not request:
return {"status": "ok"}
try:
title_dir = create_title_directory(downloads_path, request.title)
data_file = title_dir / "data.json"
await save_data_to_file(data_file, {
"url": request.url,
"title": request.title,
"all_images": request.all_images,
"total_images": request.total_images
})
logger.info(f"成功保存数据: {request.title}")
return {
"status": "success",
"message": f"数据保存成功,共 {request.total_images} 张图片",
"path": str(title_dir)
}
except Exception as e:
logger.error(f"保存数据失败: {e}")
raise HTTPException(status_code=500, detail=f"保存失败: {str(e)}")
@app.get("/", response_class=HTMLResponse)
async def read_gallery_manager():
return """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>画廊下载管理器</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; padding: 20px; }
.container { max-width: 1200px; margin: 0 auto; background: white; border-radius: 15px; box-shadow: 0 20px 40px rgba(0,0,0,0.1); overflow: hidden; }
.header { background: linear-gradient(135deg, #2c3e50, #34495e); color: white; padding: 30px; text-align: center; }
.header h1 { font-size: 2.5em; margin-bottom: 10px; }
.controls { padding: 20px; background: #f8f9fa; border-bottom: 1px solid #e9ecef; display: flex; gap: 15px; flex-wrap: wrap; }
.btn { padding: 12px 24px; border: none; border-radius: 8px; font-size: 16px; font-weight: 600; cursor: pointer; transition: all 0.3s ease; }
.btn-primary { background: #007bff; color: white; }
.btn-primary:hover { background: #0056b3; }
.btn-success { background: #28a745; color: white; }
.btn-success:hover { background: #1e7e34; }
.gallery-list { padding: 20px; }
.gallery-item { background: white; border: 1px solid #e9ecef; border-radius: 10px; padding: 20px; margin-bottom: 15px; }
.gallery-title { font-size: 1.3em; font-weight: 600; color: #2c3e50; margin-bottom: 8px; }
.gallery-stats { display: flex; gap: 20px; color: #6c757d; font-size: 0.9em; }
.progress-bar { width: 100%; height: 8px; background: #e9ecef; border-radius: 4px; overflow: hidden; margin-top: 8px; }
.progress-fill { height: 100%; background: linear-gradient(90deg, #28a745, #20c997); transition: width 0.3s ease; }
.empty-state { text-align: center; padding: 60px 20px; color: #6c757d; }
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎨 画廊下载管理器</h1>
<p>管理您的画廊下载任务</p>
</div>
<div class="controls">
<button class="btn btn-primary" onclick="loadGalleries()">📁 读取文件夹</button>
<button class="btn btn-success" onclick="startDownload()" id="downloadBtn">⬇ 开始下载所有未完成</button>
</div>
<div class="gallery-list" id="galleryList">
<div class="empty-state">
<h3>暂无待下载任务</h3>
<p>点击"读取文件夹"按钮加载数据</p>
</div>
</div>
</div>
<script>
async function loadGalleries() {
try {
const response = await fetch('/api/galleries');
const galleries = await response.json();
displayGalleries(galleries);
} catch (error) {
alert('读取文件夹失败: ' + error);
}
}
function displayGalleries(galleries) {
const galleryList = document.getElementById('galleryList');
if (galleries.length === 0) {
galleryList.innerHTML = '<div class="empty-state"><h3>暂无画廊数据</h3></div>';
return;
}
const pendingGalleries = galleries.filter(gallery => gallery.downloaded_images < gallery.total_images);
if (pendingGalleries.length === 0) {
galleryList.innerHTML = '<div class="empty-state"><h3>🎉 所有任务已完成!</h3></div>';
return;
}
galleryList.innerHTML = pendingGalleries.map(gallery => {
const progress = (gallery.downloaded_images / gallery.total_images) * 100;
return `
<div class="gallery-item">
<div class="gallery-title">${gallery.title}</div>
<div class="gallery-stats">
<span>总图片: ${gallery.total_images}</span>
<span>已下载: ${gallery.downloaded_images}</span>
<span>进度: ${Math.round(progress)}%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: ${progress}%"></div>
</div>
</div>
`;
}).join('');
}
async function startDownload() {
const btn = document.getElementById('downloadBtn');
btn.disabled = true;
btn.innerHTML = '⏳ 下载中...';
try {
const response = await fetch('/api/download/all', { method: 'POST' });
const result = await response.json();
if (result.status === 'success') {
alert('批量下载任务已开始!请查看后端控制台了解进度。');
setTimeout(loadGalleries, 5000);
} else {
alert('下载失败: ' + result.message);
}
} catch (error) {
alert('下载请求失败: ' + error);
} finally {
btn.disabled = false;
btn.innerHTML = ' 开始下载所有未完成';
}
}
document.addEventListener('DOMContentLoaded', loadGalleries);
</script>
</body>
</html>
"""
@app.get("/api/galleries")
async def get_galleries():
galleries = get_all_galleries()
return galleries
@app.post("/api/download/all")
async def download_all_galleries(background_tasks: BackgroundTasks):
background_tasks.add_task(download_all_pending_galleries)
return {
"status": "success",
"message": "开始批量下载所有未完成的画廊"
}
@app.post("/api/download/{title}")
async def download_gallery(title: str, background_tasks: BackgroundTasks):
background_tasks.add_task(download_gallery_images, title)
return {
"status": "success",
"message": f"开始下载画廊: {title}",
"title": title
}
@app.get("/health")
async def health_check():
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=5100,
reload=True
)