eh-v2/main.py

# main.py
import os
import json
import logging
from pathlib import Path
from typing import Dict, Any, List
import asyncio
import httpx

import aiofiles
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
MAX_CONCURRENT_DOWNLOADS = 5
DOWNLOAD_TIMEOUT = 30

# FastAPI应用
app = FastAPI(title="eh-v2")

# 全局变量用于跟踪下载状态
download_status: Dict[str, Dict[str, Any]] = {}

# 数据模型
class SaveDataRequest(BaseModel):
    url: str
    title: str
    all_images: Dict[str, str]
    total_images: int

class GalleryInfo(BaseModel):
    title: str
    path: str
    total_images: int
    downloaded_images: int

class DownloadStatusResponse(BaseModel):
    status: str
    message: str
    downloaded: int
    total: int
    current_progress: float

# 工具函数
def setup_downloads_directory() -> Path:
    downloads_path = Path(DOWNLOADS_DIR)
    downloads_path.mkdir(exist_ok=True)
    return downloads_path

def sanitize_filename(filename: str) -> str:
    sanitized = filename
    for char in INVALID_FILENAME_CHARS:
        sanitized = sanitized.replace(char, '_')
    if len(sanitized) > MAX_FILENAME_LENGTH:
        sanitized = sanitized[:MAX_FILENAME_LENGTH]
    return sanitized

def create_title_directory(base_path: Path, title: str) -> Path:
    safe_title = sanitize_filename(title)
    title_dir = base_path / safe_title
    title_dir.mkdir(exist_ok=True)
    return title_dir

async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
    async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
        await f.write(json.dumps(data, ensure_ascii=False, indent=2))

def get_all_galleries() -> List[GalleryInfo]:
    galleries = []
    downloads_path = Path(DOWNLOADS_DIR)

    if not downloads_path.exists():
        return galleries

    for gallery_dir in downloads_path.iterdir():
        if gallery_dir.is_dir():
            data_file = gallery_dir / "data.json"
            if data_file.exists():
                try:
                    with open(data_file, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                    downloaded_count = 0
                    if 'all_images' in data:
                        for filename, url in data['all_images'].items():
                            image_path = gallery_dir / filename
                            if image_path.exists():
                                downloaded_count += 1

                    galleries.append(GalleryInfo(
                        title=data.get('title', gallery_dir.name),
                        path=str(gallery_dir),
                        total_images=data.get('total_images', 0),
                        downloaded_images=downloaded_count
                    ))
                except Exception as e:
                    logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")

    return galleries

async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool:
    async with semaphore:
        try:
            # 先获取图片后缀
            response = await client.get(url, timeout=DOWNLOAD_TIMEOUT)
            response.raise_for_status()

            import re
            match = re.search(r'img id="img" src="(.*?)"', response.text)
            if not match:
                return False

            real_img_url = match.group(1)
            suffix = real_img_url.split('.')[-1]

            # 创建带后缀的文件路径
            file_path_with_suffix = file_path.with_suffix('.' + suffix)

            if file_path_with_suffix.exists():
                return True

            img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT)
            img_response.raise_for_status()

            async with aiofiles.open(file_path_with_suffix, 'wb') as f:
                await f.write(img_response.content)

            return True

        except Exception as e:
            logger.error(f"下载失败 {url}: {e}")
            return False

async def download_gallery_images(title: str) -> DownloadStatusResponse:
    safe_title = sanitize_filename(title)
    gallery_path = downloads_path / safe_title
    data_file = gallery_path / "data.json"

    if not data_file.exists():
        return DownloadStatusResponse(
            status="error",
            message="画廊数据文件不存在",
            downloaded=0,
            total=0,
            current_progress=0.0
        )

    try:
        async with aiofiles.open(data_file, 'r', encoding='utf-8') as f:
            content = await f.read()
            data = json.loads(content)

        all_images = data.get('all_images', {})
        total_images = len(all_images)

        if total_images == 0:
            return DownloadStatusResponse(
                status="error",
                message="没有可下载的图片",
                downloaded=0,
                total=0,
                current_progress=0.0
            )

        download_status[title] = {
            "downloaded": 0,
            "total": total_images,
            "status": "downloading"
        }

        logger.info(f"开始下载画廊 '{title}'，共 {total_images} 张图片")

        semaphore = asyncio.Semaphore(MAX_CONCURRENT_DOWNLOADS)

        async with httpx.AsyncClient(
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            },
            follow_redirects=True
        ) as client:

            tasks = []
            for filename, url in all_images.items():
                image_path = gallery_path / filename

                if image_path.exists():
                    download_status[title]["downloaded"] += 1
                    continue

                task = download_single_image(client, url, image_path, semaphore)
                tasks.append(task)

            if tasks:
                results = await asyncio.gather(*tasks, return_exceptions=True)
                successful_downloads = sum(1 for result in results if result is True)
                download_status[title]["downloaded"] += successful_downloads

            downloaded_count = download_status[title]["downloaded"]
            progress = (downloaded_count / total_images) * 100

            if downloaded_count == total_images:
                download_status[title]["status"] = "completed"
                message = f"下载完成！共下载 {downloaded_count}/{total_images} 张图片"
            else:
                download_status[title]["status"] = "partial"
                message = f"部分完成！下载 {downloaded_count}/{total_images} 张图片"

            return DownloadStatusResponse(
                status="success",
                message=message,
                downloaded=downloaded_count,
                total=total_images,
                current_progress=progress
            )

    except Exception as e:
        logger.error(f"下载画廊 '{title}' 时发生错误: {e}")
        download_status[title] = {
            "status": "error",
            "message": str(e)
        }
        return DownloadStatusResponse(
            status="error",
            message=f"下载失败: {str(e)}",
            downloaded=0,
            total=0,
            current_progress=0.0
        )

async def download_all_pending_galleries():
    galleries = get_all_galleries()
    pending_galleries = [g for g in galleries if g.downloaded_images < g.total_images]

    logger.info(f"找到 {len(pending_galleries)} 个待下载画廊")

    if not pending_galleries:
        logger.info("没有待下载的画廊")
        return

    for gallery in pending_galleries:
        logger.info(f"开始下载画廊: {gallery.title}")
        result = await download_gallery_images(gallery.title)

        if result.status == "success":
            logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}")
        else:
            logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}")

        await asyncio.sleep(1)

    logger.info("批量下载任务完成")

# 初始化
downloads_path = setup_downloads_directory()

# API路由
@app.post("/save_url")
@app.options("/save_url")
async def save_url_data(request: SaveDataRequest = None):
    if not request:
        return {"status": "ok"}

    try:
        title_dir = create_title_directory(downloads_path, request.title)
        data_file = title_dir / "data.json"
        await save_data_to_file(data_file, {
            "url": request.url,
            "title": request.title,
            "all_images": request.all_images,
            "total_images": request.total_images
        })

        logger.info(f"成功保存数据: {request.title}")
        return {
            "status": "success",
            "message": f"数据保存成功，共 {request.total_images} 张图片",
            "path": str(title_dir)
        }

    except Exception as e:
        logger.error(f"保存数据失败: {e}")
        raise HTTPException(status_code=500, detail=f"保存失败: {str(e)}")

@app.get("/", response_class=HTMLResponse)
async def read_gallery_manager():
    return """
    <!DOCTYPE html>
    <html lang="zh-CN">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>画廊下载管理器</title>
        <style>
            * { margin: 0; padding: 0; box-sizing: border-box; }
            body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; padding: 20px; }
            .container { max-width: 1200px; margin: 0 auto; background: white; border-radius: 15px; box-shadow: 0 20px 40px rgba(0,0,0,0.1); overflow: hidden; }
            .header { background: linear-gradient(135deg, #2c3e50, #34495e); color: white; padding: 30px; text-align: center; }
            .header h1 { font-size: 2.5em; margin-bottom: 10px; }
            .controls { padding: 20px; background: #f8f9fa; border-bottom: 1px solid #e9ecef; display: flex; gap: 15px; flex-wrap: wrap; }
            .btn { padding: 12px 24px; border: none; border-radius: 8px; font-size: 16px; font-weight: 600; cursor: pointer; transition: all 0.3s ease; }
            .btn-primary { background: #007bff; color: white; }
            .btn-primary:hover { background: #0056b3; }
            .btn-success { background: #28a745; color: white; }
            .btn-success:hover { background: #1e7e34; }
            .gallery-list { padding: 20px; }
            .gallery-item { background: white; border: 1px solid #e9ecef; border-radius: 10px; padding: 20px; margin-bottom: 15px; }
            .gallery-title { font-size: 1.3em; font-weight: 600; color: #2c3e50; margin-bottom: 8px; }
            .gallery-stats { display: flex; gap: 20px; color: #6c757d; font-size: 0.9em; }
            .progress-bar { width: 100%; height: 8px; background: #e9ecef; border-radius: 4px; overflow: hidden; margin-top: 8px; }
            .progress-fill { height: 100%; background: linear-gradient(90deg, #28a745, #20c997); transition: width 0.3s ease; }
            .empty-state { text-align: center; padding: 60px 20px; color: #6c757d; }
        </style>
    </head>
    <body>
        <div class="container">
            <div class="header">
                <h1>🎨 画廊下载管理器</h1>
                <p>管理您的画廊下载任务</p>
            </div>

            <div class="controls">
                <button class="btn btn-primary" onclick="loadGalleries()">📁 读取文件夹</button>
                <button class="btn btn-success" onclick="startDownload()" id="downloadBtn">⬇️ 开始下载所有未完成</button>
            </div>

            <div class="gallery-list" id="galleryList">
                <div class="empty-state">
                    <h3>暂无待下载任务</h3>
                    <p>点击"读取文件夹"按钮加载数据</p>
                </div>
            </div>
        </div>

        <script>
            async function loadGalleries() {
                try {
                    const response = await fetch('/api/galleries');
                    const galleries = await response.json();
                    displayGalleries(galleries);
                } catch (error) {
                    alert('读取文件夹失败: ' + error);
                }
            }

            function displayGalleries(galleries) {
                const galleryList = document.getElementById('galleryList');

                if (galleries.length === 0) {
                    galleryList.innerHTML = '<div class="empty-state"><h3>暂无画廊数据</h3></div>';
                    return;
                }

                const pendingGalleries = galleries.filter(gallery => gallery.downloaded_images < gallery.total_images);

                if (pendingGalleries.length === 0) {
                    galleryList.innerHTML = '<div class="empty-state"><h3>🎉 所有任务已完成！</h3></div>';
                    return;
                }

                galleryList.innerHTML = pendingGalleries.map(gallery => {
                    const progress = (gallery.downloaded_images / gallery.total_images) * 100;
                    return `
                        <div class="gallery-item">
                            <div class="gallery-title">${gallery.title}</div>
                            <div class="gallery-stats">
                                <span>总图片: ${gallery.total_images}</span>
                                <span>已下载: ${gallery.downloaded_images}</span>
                                <span>进度: ${Math.round(progress)}%</span>
                            </div>
                            <div class="progress-bar">
                                <div class="progress-fill" style="width: ${progress}%"></div>
                            </div>
                        </div>
                    `;
                }).join('');
            }

            async function startDownload() {
                const btn = document.getElementById('downloadBtn');
                btn.disabled = true;
                btn.innerHTML = '⏳ 下载中...';

                try {
                    const response = await fetch('/api/download/all', { method: 'POST' });
                    const result = await response.json();

                    if (result.status === 'success') {
                        alert('批量下载任务已开始！请查看后端控制台了解进度。');
                        setTimeout(loadGalleries, 5000);
                    } else {
                        alert('下载失败: ' + result.message);
                    }
                } catch (error) {
                    alert('下载请求失败: ' + error);
                } finally {
                    btn.disabled = false;
                    btn.innerHTML = '⬇️ 开始下载所有未完成';
                }
            }

            document.addEventListener('DOMContentLoaded', loadGalleries);
        </script>
    </body>
    </html>
    """

@app.get("/api/galleries")
async def get_galleries():
    galleries = get_all_galleries()
    return galleries

@app.post("/api/download/all")
async def download_all_galleries(background_tasks: BackgroundTasks):
    background_tasks.add_task(download_all_pending_galleries)
    return {
        "status": "success",
        "message": "开始批量下载所有未完成的画廊"
    }

@app.post("/api/download/{title}")
async def download_gallery(title: str, background_tasks: BackgroundTasks):
    background_tasks.add_task(download_gallery_images, title)
    return {
        "status": "success",
        "message": f"开始下载画廊: {title}",
        "title": title
    }

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

if __name__ == "__main__":
    uvicorn.run(
        "main:app",
        host="0.0.0.0",
        port=5100,
        reload=True
    )