You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
eh-v2/main.py

834 lines
30 KiB

# main.py
import os
import json
import logging
from pathlib import Path
from typing import Dict, Any, List
import asyncio
import httpx
import aiofiles
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
MAX_CONCURRENT_DOWNLOADS = 5 # 最大并发下载数
DOWNLOAD_TIMEOUT = 30 # 下载超时时间(秒)
# FastAPI应用
app = FastAPI(title="eh-v2")
# 全局变量用于跟踪下载状态
download_status: Dict[str, Dict[str, Any]] = {}
# 数据模型
class SaveDataRequest(BaseModel):
url: str
title: str
all_images: Dict[str, str]
total_images: int
class GalleryInfo(BaseModel):
title: str
path: str
total_images: int
downloaded_images: int
class DownloadStatusResponse(BaseModel):
status: str
message: str
downloaded: int
total: int
current_progress: float
# 工具函数
def setup_downloads_directory() -> Path:
"""创建并返回下载目录路径"""
downloads_path = Path(DOWNLOADS_DIR)
downloads_path.mkdir(exist_ok=True)
logger.info(f"下载目录已准备: {downloads_path.absolute()}")
return downloads_path
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除非法字符并限制长度"""
sanitized = filename
for char in INVALID_FILENAME_CHARS:
sanitized = sanitized.replace(char, '_')
# 限制文件名长度
if len(sanitized) > MAX_FILENAME_LENGTH:
sanitized = sanitized[:MAX_FILENAME_LENGTH]
return sanitized
def create_title_directory(base_path: Path, title: str) -> Path:
"""创建标题对应的目录"""
safe_title = sanitize_filename(title)
title_dir = base_path / safe_title
title_dir.mkdir(exist_ok=True)
logger.info(f"创建标题目录: {title_dir}")
return title_dir
async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
"""异步保存数据到JSON文件"""
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(json.dumps(data, ensure_ascii=False, indent=2))
def get_all_galleries() -> List[GalleryInfo]:
"""获取所有画廊信息"""
galleries = []
downloads_path = Path(DOWNLOADS_DIR)
if not downloads_path.exists():
return galleries
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# 计算已下载的图片数量
downloaded_count = 0
if 'all_images' in data:
for filename, url in data['all_images'].items():
image_path = gallery_dir / filename
if image_path.exists():
downloaded_count += 1
galleries.append(GalleryInfo(
title=data.get('title', gallery_dir.name),
path=str(gallery_dir),
total_images=data.get('total_images', 0),
downloaded_images=downloaded_count
))
except Exception as e:
logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
return galleries
async def download_single_image(client: httpx.AsyncClient, url: str, file_path: Path, semaphore: asyncio.Semaphore) -> bool:
"""下载单张图片 - 精简版"""
async with semaphore:
try:
logger.info(f"开始下载: {url}")
if file_path.exists():
logger.info(f"文件已存在: {file_path}")
return True
# 第一步:获取中间页面
response = await client.get(url, timeout=DOWNLOAD_TIMEOUT)
response.raise_for_status()
# 第二步:提取真实图片URL
import re
match = re.search(r'img id="img" src="(.*?)"', response.text)
if not match:
logger.error(f"无法提取图片URL: {url}")
return False
real_img_url = match.group(1)
logger.info(f"真实URL: {real_img_url}")
# 第三步:下载图片
img_response = await client.get(real_img_url, timeout=DOWNLOAD_TIMEOUT)
img_response.raise_for_status()
# 保存图片
async with aiofiles.open(file_path, 'wb') as f:
await f.write(img_response.content)
logger.info(f"下载完成: {file_path}")
return True
except Exception as e:
logger.error(f"下载失败 {url}: {e}")
return False
async def download_gallery_images(title: str) -> DownloadStatusResponse:
"""下载指定画廊的所有图片"""
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
data_file = gallery_path / "data.json"
if not data_file.exists():
return DownloadStatusResponse(
status="error",
message="画廊数据文件不存在",
downloaded=0,
total=0,
current_progress=0.0
)
try:
# 读取画廊数据
async with aiofiles.open(data_file, 'r', encoding='utf-8') as f:
content = await f.read()
data = json.loads(content)
all_images = data.get('all_images', {})
total_images = len(all_images)
if total_images == 0:
return DownloadStatusResponse(
status="error",
message="没有可下载的图片",
downloaded=0,
total=0,
current_progress=0.0
)
# 初始化下载状态
download_status[title] = {
"downloaded": 0,
"total": total_images,
"status": "downloading"
}
logger.info(f"开始下载画廊 '{title}',共 {total_images} 张图片")
# 创建信号量限制并发数
semaphore = asyncio.Semaphore(MAX_CONCURRENT_DOWNLOADS)
# 使用异步HTTP客户端
async with httpx.AsyncClient(
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
},
follow_redirects=True
) as client:
# 准备下载任务
tasks = []
for filename, url in all_images.items():
image_path = gallery_path / filename
# 如果图片已存在,跳过下载但计入完成数量
if image_path.exists():
download_status[title]["downloaded"] += 1
continue
task = download_single_image(client, url, image_path, semaphore)
tasks.append(task)
# 批量执行下载任务
if tasks:
results = await asyncio.gather(*tasks, return_exceptions=True)
# 统计成功下载的数量
successful_downloads = sum(1 for result in results if result is True)
download_status[title]["downloaded"] += successful_downloads
# 更新最终状态
downloaded_count = download_status[title]["downloaded"]
progress = (downloaded_count / total_images) * 100
if downloaded_count == total_images:
download_status[title]["status"] = "completed"
message = f"下载完成!共下载 {downloaded_count}/{total_images} 张图片"
logger.info(f"画廊 '{title}' {message}")
else:
download_status[title]["status"] = "partial"
message = f"部分完成!下载 {downloaded_count}/{total_images} 张图片"
logger.warning(f"画廊 '{title}' {message}")
return DownloadStatusResponse(
status="success",
message=message,
downloaded=downloaded_count,
total=total_images,
current_progress=progress
)
except Exception as e:
logger.error(f"下载画廊 '{title}' 时发生错误: {e}")
download_status[title] = {
"status": "error",
"message": str(e)
}
return DownloadStatusResponse(
status="error",
message=f"下载失败: {str(e)}",
downloaded=0,
total=0,
current_progress=0.0
)
async def download_all_pending_galleries():
"""下载所有未完成的画廊"""
galleries = get_all_galleries()
pending_galleries = [g for g in galleries if g.downloaded_images < g.total_images]
if not pending_galleries:
logger.info("没有待下载的画廊")
return
logger.info(f"开始批量下载 {len(pending_galleries)} 个画廊")
for gallery in pending_galleries:
if gallery.downloaded_images < gallery.total_images:
logger.info(f"开始下载画廊: {gallery.title}")
result = await download_gallery_images(gallery.title)
if result.status == "success":
logger.info(f"画廊 '{gallery.title}' 下载完成: {result.message}")
else:
logger.error(f"画廊 '{gallery.title}' 下载失败: {result.message}")
# 添加延迟避免请求过于频繁
await asyncio.sleep(1)
logger.info("批量下载任务完成")
# 初始化
downloads_path = setup_downloads_directory()
# API路由
@app.get("/", response_class=HTMLResponse)
async def read_gallery_manager():
"""画廊管理页面"""
return """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>画廊下载管理器</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 15px;
box-shadow: 0 20px 40px rgba(0,0,0,0.1);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #2c3e50, #34495e);
color: white;
padding: 30px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
}
.header p {
opacity: 0.8;
font-size: 1.1em;
}
.controls {
padding: 20px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
display: flex;
gap: 15px;
flex-wrap: wrap;
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 8px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
display: inline-flex;
align-items: center;
gap: 8px;
}
.btn-primary {
background: #007bff;
color: white;
}
.btn-primary:hover {
background: #0056b3;
transform: translateY(-2px);
}
.btn-success {
background: #28a745;
color: white;
}
.btn-success:hover {
background: #1e7e34;
transform: translateY(-2px);
}
.btn-danger {
background: #dc3545;
color: white;
}
.btn-danger:hover {
background: #c82333;
transform: translateY(-2px);
}
.btn-warning {
background: #ffc107;
color: #212529;
}
.btn-warning:hover {
background: #e0a800;
transform: translateY(-2px);
}
.btn:disabled {
background: #6c757d;
cursor: not-allowed;
transform: none;
}
.gallery-list {
padding: 20px;
}
.gallery-item {
background: white;
border: 1px solid #e9ecef;
border-radius: 10px;
padding: 20px;
margin-bottom: 15px;
transition: all 0.3s ease;
display: flex;
justify-content: between;
align-items: center;
}
.gallery-item:hover {
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
transform: translateY(-2px);
}
.gallery-info {
flex: 1;
}
.gallery-title {
font-size: 1.3em;
font-weight: 600;
color: #2c3e50;
margin-bottom: 8px;
}
.gallery-stats {
display: flex;
gap: 20px;
color: #6c757d;
font-size: 0.9em;
}
.progress-bar {
width: 100%;
height: 8px;
background: #e9ecef;
border-radius: 4px;
overflow: hidden;
margin-top: 8px;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #28a745, #20c997);
transition: width 0.3s ease;
}
.completed .progress-fill {
background: linear-gradient(90deg, #007bff, #0056b3);
}
.status-badge {
display: inline-block;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.8em;
font-weight: 600;
margin-left: 10px;
}
.status-downloading {
background: #fff3cd;
color: #856404;
}
.status-completed {
background: #d1ecf1;
color: #0c5460;
}
.status-error {
background: #f8d7da;
color: #721c24;
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #6c757d;
}
.empty-state h3 {
margin-bottom: 10px;
font-size: 1.5em;
}
.stats-summary {
background: #f8f9fa;
padding: 15px 20px;
border-bottom: 1px solid #e9ecef;
display: flex;
justify-content: space-between;
align-items: center;
font-size: 0.9em;
color: #6c757d;
}
.gallery-actions {
display: flex;
gap: 10px;
}
.download-progress {
margin-top: 10px;
font-size: 0.9em;
color: #6c757d;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎨 画廊下载管理器</h1>
<p>管理您的画廊下载任务</p>
</div>
<div class="stats-summary" id="statsSummary">
<span>总计: <strong id="totalGalleries">0</strong> 个画廊</span>
<span>待下载: <strong id="pendingGalleries">0</strong> 个</span>
<span>已完成: <strong id="completedGalleries">0</strong> 个</span>
</div>
<div class="controls">
<button class="btn btn-primary" onclick="loadGalleries()">
📁 读取文件夹
</button>
<button class="btn btn-success" onclick="startDownload()" id="downloadBtn">
开始下载所有未完成
</button>
<button class="btn btn-warning" onclick="downloadSelected()" id="downloadSelectedBtn">
🎯 下载选中画廊
</button>
<button class="btn btn-danger" onclick="deleteJsonFiles()">
🗑 删除所有JSON文件
</button>
</div>
<div class="gallery-list" id="galleryList">
<div class="empty-state">
<h3>暂无待下载任务</h3>
<p>点击"读取文件夹"按钮加载数据</p>
</div>
</div>
</div>
<script>
let currentGalleries = [];
let selectedGalleries = new Set();
async function loadGalleries() {
try {
const response = await fetch('/api/galleries');
const galleries = await response.json();
currentGalleries = galleries;
displayGalleries(galleries);
updateStats(galleries);
} catch (error) {
alert('读取文件夹失败: ' + error);
}
}
function displayGalleries(galleries) {
const galleryList = document.getElementById('galleryList');
if (galleries.length === 0) {
galleryList.innerHTML = `
<div class="empty-state">
<h3>暂无画廊数据</h3>
<p>请先添加画廊数据文件</p>
</div>
`;
return;
}
// 过滤掉已完成的画廊(已下载数量等于总数量)
const pendingGalleries = galleries.filter(gallery =>
gallery.downloaded_images < gallery.total_images
);
if (pendingGalleries.length === 0) {
galleryList.innerHTML = `
<div class="empty-state">
<h3>🎉 所有任务已完成!</h3>
<p>没有待下载的画廊任务</p>
</div>
`;
return;
}
galleryList.innerHTML = pendingGalleries.map(gallery => {
const progress = (gallery.downloaded_images / gallery.total_images) * 100;
const isCompleted = gallery.downloaded_images === gallery.total_images;
const isSelected = selectedGalleries.has(gallery.title);
return `
<div class="gallery-item ${isCompleted ? 'completed' : ''} ${isSelected ? 'selected' : ''}"
onclick="toggleGallerySelection('${gallery.title}')"
style="cursor: pointer; ${isSelected ? 'border-color: #007bff; background-color: #f8f9fa;' : ''}">
<div class="gallery-info">
<div class="gallery-title">
<input type="checkbox" ${isSelected ? 'checked' : ''}
onclick="event.stopPropagation(); toggleGallerySelection('${gallery.title}')">
${gallery.title}
${isCompleted ?
'<span class="status-badge status-completed">已完成</span>' :
'<span class="status-badge status-downloading">待下载</span>'
}
</div>
<div class="gallery-stats">
<span>总图片: ${gallery.total_images}</span>
<span>已下载: ${gallery.downloaded_images}</span>
<span>进度: ${Math.round(progress)}%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: ${progress}%"></div>
</div>
<div class="gallery-actions">
<button class="btn btn-primary btn-sm" onclick="event.stopPropagation(); downloadSingleGallery('${gallery.title}')">
单独下载
</button>
</div>
</div>
</div>
`;
}).join('');
}
function toggleGallerySelection(title) {
if (selectedGalleries.has(title)) {
selectedGalleries.delete(title);
} else {
selectedGalleries.add(title);
}
displayGalleries(currentGalleries);
}
function updateStats(galleries) {
const total = galleries.length;
const completed = galleries.filter(g => g.downloaded_images === g.total_images).length;
const pending = total - completed;
document.getElementById('totalGalleries').textContent = total;
document.getElementById('pendingGalleries').textContent = pending;
document.getElementById('completedGalleries').textContent = completed;
}
async function startDownload() {
const btn = document.getElementById('downloadBtn');
btn.disabled = true;
btn.innerHTML = '⏳ 下载中...';
try {
const response = await fetch('/api/download/all', {
method: 'POST'
});
const result = await response.json();
if (result.status === 'success') {
alert('批量下载任务已开始!请查看控制台了解进度。');
// 定期刷新状态
setTimeout(loadGalleries, 3000);
} else {
alert('下载失败: ' + result.message);
}
} catch (error) {
alert('下载请求失败: ' + error);
} finally {
btn.disabled = false;
btn.innerHTML = ' 开始下载所有未完成';
}
}
async function downloadSelected() {
if (selectedGalleries.size === 0) {
alert('请先选择要下载的画廊!');
return;
}
const btn = document.getElementById('downloadSelectedBtn');
btn.disabled = true;
btn.innerHTML = '⏳ 下载中...';
try {
for (const title of selectedGalleries) {
await downloadSingleGallery(title);
// 添加延迟避免请求过于频繁
await new Promise(resolve => setTimeout(resolve, 1000));
}
alert('选中的画廊下载任务已完成!');
selectedGalleries.clear();
await loadGalleries();
} catch (error) {
alert('下载失败: ' + error);
} finally {
btn.disabled = false;
btn.innerHTML = '🎯 下载选中画廊';
}
}
async function downloadSingleGallery(title) {
try {
const response = await fetch(`/api/download/${encodeURIComponent(title)}`, {
method: 'POST'
});
const result = await response.json();
if (result.status === 'success') {
console.log(`开始下载: ${title}`);
alert(`开始下载: ${title}`);
// 刷新状态
setTimeout(loadGalleries, 2000);
} else {
alert(`下载失败: ${result.message}`);
}
} catch (error) {
alert('下载请求失败: ' + error);
}
}
async function deleteJsonFiles() {
if (!confirm('确定要删除所有JSON文件吗?此操作不可恢复!')) {
return;
}
try {
const response = await fetch('/api/cleanup', {
method: 'DELETE'
});
const result = await response.json();
alert(result.message);
await loadGalleries(); // 刷新列表
} catch (error) {
alert('删除失败: ' + error);
}
}
// 页面加载时自动读取
document.addEventListener('DOMContentLoaded', loadGalleries);
</script>
</body>
</html>
"""
@app.get("/api/galleries")
async def get_galleries():
"""获取所有画廊信息(包括已完成和未完成的)"""
galleries = get_all_galleries()
return galleries
@app.post("/api/download/{title}")
async def download_gallery(title: str, background_tasks: BackgroundTasks):
"""开始下载指定画廊的图片"""
try:
# 使用后台任务执行下载,避免阻塞请求
background_tasks.add_task(download_gallery_images, title)
return {
"status": "success",
"message": f"开始下载画廊: {title}",
"title": title
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"下载失败: {str(e)}")
@app.post("/api/download/all")
async def download_all_galleries(background_tasks: BackgroundTasks):
"""开始下载所有未完成的画廊"""
try:
# 使用后台任务执行批量下载
background_tasks.add_task(download_all_pending_galleries)
return {
"status": "success",
"message": "开始批量下载所有未完成的画廊"
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"批量下载失败: {str(e)}")
@app.get("/api/download/status/{title}")
async def get_download_status(title: str):
"""获取指定画廊的下载状态"""
status = download_status.get(title, {})
return status
@app.delete("/api/cleanup")
async def cleanup_json_files():
"""删除所有JSON文件(保留图片)"""
try:
deleted_count = 0
downloads_path = Path(DOWNLOADS_DIR)
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
data_file.unlink()
deleted_count += 1
return {
"status": "success",
"message": f"已删除 {deleted_count} 个JSON文件",
"deleted_count": deleted_count
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"清理失败: {str(e)}")
@app.delete("/api/galleries/{title}")
async def delete_gallery(title: str):
"""删除指定画廊的所有文件"""
try:
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
if gallery_path.exists():
# 删除整个画廊目录
import shutil
shutil.rmtree(gallery_path)
# 清除下载状态
download_status.pop(title, None)
return {
"status": "success",
"message": f"已删除画廊: {title}"
}
else:
raise HTTPException(status_code=404, detail="画廊不存在")
except Exception as e:
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
@app.get("/health")
async def health_check():
"""健康检查端点"""
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=5100,
reload=True
)