diff --git a/downloader.py b/downloader.py
deleted file mode 100644
index 48932d2..0000000
--- a/downloader.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from fastapi import APIRouter, BackgroundTasks
-from pydantic import BaseModel
-import uuid
-import os
-from pathlib import Path
-
-router = APIRouter(prefix="/api/v1", tags=["downloader"])
-
-# 存储任务状态
-tasks = {}
-
-class CrawlRequest(BaseModel):
- url: str
- cookies: str
- timestamp: str
-
-class TaskStatus(BaseModel):
- status: str # 'running', 'completed', 'failed'
- result: dict = None
- error: str = None
-
-@router.post("/start-crawl")
-async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
- task_id = str(uuid.uuid4())
- tasks[task_id] = {'status': 'running', 'result': None, 'error': None}
-
- # 在后台运行爬虫任务
- background_tasks.add_task(run_crawler, task_id, request)
-
- return {"task_id": task_id, "status": "started"}
-
-@router.get("/task-status/{task_id}")
-async def get_task_status(task_id: str):
- task = tasks.get(task_id)
- if not task:
- return {"status": "not_found"}
- return task
-
-async def run_crawler(task_id: str, request: CrawlRequest):
- try:
- # 这里执行您的爬虫逻辑,模拟长时间运行
- # 例如:time.sleep(300) # 5分钟
-
- # 确保 downloads 目录存在(双重保障)
- downloads_dir = Path("downloads")
- downloads_dir.mkdir(exist_ok=True)
-
- # 模拟下载文件到 downloads 目录
- filename = f"download_{task_id}.txt"
- filepath = downloads_dir / filename
-
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(f"URL: {request.url}\n")
- f.write(f"Cookies: {request.cookies}\n")
- f.write(f"Timestamp: {request.timestamp}\n")
- f.write("Download completed successfully\n")
-
- # 爬虫完成后更新状态
- tasks[task_id] = {
- 'status': 'completed',
- 'result': {
- 'message': '爬虫完成',
- 'data': '您的爬虫结果',
- 'download_path': str(filepath)
- },
- 'error': None
- }
- except Exception as e:
- tasks[task_id] = {
- 'status': 'failed',
- 'result': None,
- 'error': str(e)
- }
diff --git a/main.py b/main.py
index acf47f5..4b9f0b0 100644
--- a/main.py
+++ b/main.py
@@ -1,43 +1,535 @@
-from fastapi import FastAPI
-from contextlib import asynccontextmanager
-import uvicorn
+# main.py
+import os
+import json
+import logging
from pathlib import Path
-from downloader import router as downloader_router
-
-# 检查并创建 downloads 目录
-def ensure_downloads_dir():
- downloads_dir = Path("downloads")
- downloads_dir.mkdir(exist_ok=True)
- print(f"确保 downloads 目录存在: {downloads_dir.absolute()}")
-
-# lifespan 事件处理器
-@asynccontextmanager
-async def lifespan(app: FastAPI):
- # 启动时执行
- ensure_downloads_dir()
- print("应用启动完成!")
- yield
- # 关闭时执行(可选)
- print("应用正在关闭...")
-
-app = FastAPI(
- title="下载器API",
- description="一个基于FastAPI的异步下载器服务",
- version="1.0.0",
- lifespan=lifespan
+from typing import Dict, Any, List
+
+import aiofiles
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+import uvicorn
+
+# 配置日志
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
+logger = logging.getLogger(__name__)
+
+# 常量定义
+DOWNLOADS_DIR = "downloads"
+MAX_FILENAME_LENGTH = 100
+INVALID_FILENAME_CHARS = '<>:"/\\|?*'
+
+# FastAPI应用
+app = FastAPI(title="eh-v2")
+
+# 数据模型
+class SaveDataRequest(BaseModel):
+ url: str
+ title: str
+ all_images: Dict[str, str]
+ total_images: int
+
+class GalleryInfo(BaseModel):
+ title: str
+ path: str
+ total_images: int
+ downloaded_images: int
+
+# 工具函数
+def setup_downloads_directory() -> Path:
+ """创建并返回下载目录路径"""
+ downloads_path = Path(DOWNLOADS_DIR)
+ downloads_path.mkdir(exist_ok=True)
+ logger.info(f"下载目录已准备: {downloads_path.absolute()}")
+ return downloads_path
+
+def sanitize_filename(filename: str) -> str:
+ """清理文件名,移除非法字符并限制长度"""
+ sanitized = filename
+ for char in INVALID_FILENAME_CHARS:
+ sanitized = sanitized.replace(char, '_')
+
+ # 限制文件名长度
+ if len(sanitized) > MAX_FILENAME_LENGTH:
+ sanitized = sanitized[:MAX_FILENAME_LENGTH]
+
+ return sanitized
+
+def create_title_directory(base_path: Path, title: str) -> Path:
+ """创建标题对应的目录"""
+ safe_title = sanitize_filename(title)
+ title_dir = base_path / safe_title
+ title_dir.mkdir(exist_ok=True)
+ logger.info(f"创建标题目录: {title_dir}")
+ return title_dir
+
+async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
+ """异步保存数据到JSON文件"""
+ async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
+ await f.write(json.dumps(data, ensure_ascii=False, indent=2))
+
+def get_all_galleries() -> List[GalleryInfo]:
+ """获取所有画廊信息"""
+ galleries = []
+ downloads_path = Path(DOWNLOADS_DIR)
+
+ if not downloads_path.exists():
+ return galleries
+
+ for gallery_dir in downloads_path.iterdir():
+ if gallery_dir.is_dir():
+ data_file = gallery_dir / "data.json"
+ if data_file.exists():
+ try:
+ with open(data_file, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ # 计算已下载的图片数量
+ downloaded_count = 0
+ if 'all_images' in data:
+ for filename, url in data['all_images'].items():
+ image_path = gallery_dir / filename
+ if image_path.exists():
+ downloaded_count += 1
+
+ galleries.append(GalleryInfo(
+ title=data.get('title', gallery_dir.name),
+ path=str(gallery_dir),
+ total_images=data.get('total_images', 0),
+ downloaded_images=downloaded_count
+ ))
+ except Exception as e:
+ logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
+
+ return galleries
+
+# 初始化
+downloads_path = setup_downloads_directory()
+
+# API路由
+@app.post("/save_url")
+async def save_url(data: SaveDataRequest):
+ """保存URL数据到文件系统"""
+ try:
+ logger.info("收到保存数据请求")
+ logger.info(f"标题: {data.title}, URL: {data.url}, 图片数量: {data.total_images}")
+
+ # 创建标题目录
+ title_dir = create_title_directory(downloads_path, data.title)
+
+ # 数据文件路径
+ data_file = title_dir / "data.json"
+
+ # 异步保存数据
+ await save_data_to_file(data_file, data.dict())
+
+ logger.info(f"数据已保存到: {data_file}")
+
+ return {
+ "status": "success",
+ "message": "数据保存成功",
+ "file_path": str(data_file),
+ "title": data.title,
+ "total_images": data.total_images
+ }
+
+ except Exception as e:
+ error_msg = f"保存数据时出错: {str(e)}"
+ logger.error(error_msg)
+ logger.exception("详细错误信息:")
+ raise HTTPException(status_code=500, detail=error_msg)
+
+@app.get("/", response_class=HTMLResponse)
+async def read_gallery_manager():
+ """画廊管理页面"""
+ return """
+
+
+
+
+
+ 画廊下载管理器
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
暂无画廊数据
+
点击"读取文件夹"按钮加载数据
+
+
+
+
+
+
+
+ """
+
+@app.get("/api/galleries")
+async def get_galleries():
+ """获取所有画廊信息"""
+ galleries = get_all_galleries()
+ return galleries
+
+@app.post("/api/download/{title}")
+async def download_gallery(title: str):
+ """开始下载指定画廊的图片"""
+ try:
+ # 这里实现图片下载逻辑
+ # 遍历 all_images 字典,下载每个图片
+ return {
+ "status": "success",
+ "message": f"开始下载画廊: {title}",
+ "title": title
+ }
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"下载失败: {str(e)}")
+
+@app.delete("/api/cleanup")
+async def cleanup_json_files():
+ """删除所有JSON文件(保留图片)"""
+ try:
+ deleted_count = 0
+ downloads_path = Path(DOWNLOADS_DIR)
+
+ for gallery_dir in downloads_path.iterdir():
+ if gallery_dir.is_dir():
+ data_file = gallery_dir / "data.json"
+ if data_file.exists():
+ data_file.unlink()
+ deleted_count += 1
+
+ return {
+ "status": "success",
+ "message": f"已删除 {deleted_count} 个JSON文件",
+ "deleted_count": deleted_count
+ }
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"清理失败: {str(e)}")
-# 注册路由
-app.include_router(downloader_router)
+@app.delete("/api/galleries/{title}")
+async def delete_gallery(title: str):
+ """删除指定画廊的所有文件"""
+ try:
+ safe_title = sanitize_filename(title)
+ gallery_path = downloads_path / safe_title
+
+ if gallery_path.exists():
+ # 删除整个画廊目录
+ import shutil
+ shutil.rmtree(gallery_path)
+ return {
+ "status": "success",
+ "message": f"已删除画廊: {title}"
+ }
+ else:
+ raise HTTPException(status_code=404, detail="画廊不存在")
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
-@app.get("/")
-async def root():
- return {"message": "下载器服务运行中", "status": "healthy"}
+@app.get("/health")
+async def health_check():
+ """健康检查端点"""
+ return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(
"main:app",
- host="0.0.0.0",
+ host="0.0.0.0",
port=5100,
- reload=True # 开发时自动重载
- )
+ reload=True
+ )
\ No newline at end of file
diff --git a/post_eh_data.js b/post_eh_data.js
index 0173e1a..b059167 100644
--- a/post_eh_data.js
+++ b/post_eh_data.js
@@ -1,200 +1,229 @@
// ==UserScript==
-// @name 数据发送工具
+// @name eh-v2
// @namespace http://tampermonkey.net/
-// @version 1.0
-// @description 向本地后端发送当前页面的URL和Cookies
-// @author You
+// @version 0.1
+// @description 采集页面数据并发送到后端
+// @author Jack
// @match *://*/*
// @grant GM_xmlhttpRequest
-// @connect 127.0.0.1
-// @connect localhost
// ==/UserScript==
(function() {
'use strict';
- // 配置:您可以修改这些变量来自定义行为
- const TARGET_SELECTOR = 'body'; // 按钮插入位置的选择器
- const BACKEND_IP = '127.0.0.1'; // 后端IP地址
- const BACKEND_PORT = '5100'; // 后端端口号
-
- // 构建后端基础URL
- const BACKEND_BASE_URL = `http://${BACKEND_IP}:${BACKEND_PORT}`;
-
- function addButton() {
- if (document.getElementById('data-sender-button')) {
- return;
- }
-
- const button = document.createElement('button');
- button.id = 'data-sender-button';
- button.textContent = "send data";
- button.style.position = "fixed";
- button.style.top = "12.5%";
- button.style.right = "1%";
- button.style.transform = "translateY(-50%)";
- button.style.padding = "3px 8px";
- button.style.fontSize = "10px";
- button.style.backgroundColor = "#007baf";
- button.style.color = "#fff";
- button.style.border = "none";
- button.style.borderRadius = "5px";
- button.style.cursor = "pointer";
- button.style.zIndex = "10000";
-
- button.addEventListener('click', function() {
- sendDataToBackend();
- });
-
- const targetElement = document.querySelector(TARGET_SELECTOR);
-
- if (targetElement && TARGET_SELECTOR !== 'body') {
- const buttonContainer = document.createElement('div');
- buttonContainer.style.display = 'inline-block';
- buttonContainer.style.marginLeft = '10px';
-
- button.style.position = 'relative';
- button.style.top = 'auto';
- button.style.right = 'auto';
- button.style.transform = 'none';
- button.style.margin = '0';
-
- buttonContainer.appendChild(button);
-
- if (targetElement.nextSibling) {
- targetElement.parentNode.insertBefore(buttonContainer, targetElement.nextSibling);
- } else {
- targetElement.parentNode.appendChild(buttonContainer);
- }
- } else {
- document.body.appendChild(button);
- }
+ // 全局配置 - 请根据实际情况修改这些值
+ const BACKEND_IP = '127.0.0.1';
+ const BACKEND_PORT = '5100';
+ const BUTTON_LOCATION_SELECTOR = 'body';
+ const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器
+ const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999
+
+ // 创建按钮
+ const button = document.createElement('button');
+ button.id = 'data-sender-button';
+ button.textContent = "send data";
+ button.style.position = "fixed";
+ button.style.top = "32%";
+ button.style.right = "1%";
+ button.style.transform = "translateY(-50%)";
+ button.style.padding = "3px 8px";
+ button.style.fontSize = "10px";
+ button.style.backgroundColor = "#007baf";
+ button.style.color = "#fff";
+ button.style.border = "none";
+ button.style.borderRadius = "5px";
+ button.style.cursor = "pointer";
+ button.style.zIndex = "10000";
+
+ // 添加到指定位置
+ const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR);
+ if (targetElement) {
+ targetElement.appendChild(button);
+ } else {
+ // 如果选择器找不到元素,默认添加到body
+ document.body.appendChild(button);
}
- function sendDataToBackend() {
- const currentUrl = window.location.href;
- const cookies = document.cookie;
-
- const data = {
- url: currentUrl,
- cookies: cookies,
- timestamp: new Date().toISOString()
- };
-
- // 禁用按钮防止重复点击
- const button = document.getElementById('data-sender-button');
- if (button) {
- button.disabled = true;
- button.textContent = "任务进行中...";
- button.style.backgroundColor = "#6c757d";
+ // 从页面中提取图片的函数
+ function extractImagesFromPage(htmlContent) {
+ const images = [];
+ // 创建一个临时div来解析HTML
+ const tempDiv = document.createElement('div');
+ tempDiv.innerHTML = htmlContent;
+
+ if (DATA_LIST_SELECTOR) {
+ const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
+ linkElements.forEach(link => {
+ // 从a标签中获取href属性,这通常是图片页面链接
+ const href = link.href;
+ if (href) {
+ images.push(href);
+ }
+ });
}
+ return images;
+ }
- // 发送任务请求
- GM_xmlhttpRequest({
- method: "POST",
- url: `${BACKEND_BASE_URL}/start-crawl`,
- headers: {
- "Content-Type": "application/json"
- },
- data: JSON.stringify(data),
- onload: function(response) {
- if (response.status === 200) {
- const result = JSON.parse(response.responseText);
- if (result.task_id) {
- alert("爬虫任务已启动!任务ID: " + result.task_id);
- // 开始轮询任务状态
- pollTaskStatus(result.task_id);
- } else {
- alert("任务启动失败: " + (result.message || "未知错误"));
- resetButton();
- }
- } else {
- alert("请求失败,状态码: " + response.status);
- resetButton();
- }
- },
- onerror: function(error) {
- console.error("数据发送失败:", error);
- alert("数据发送失败,请检查后端服务是否运行");
- resetButton();
- }
- });
+ // 格式化数字为4位数
+ function formatNumber(num) {
+ return num.toString().padStart(4, '0');
}
- function pollTaskStatus(taskId) {
- let pollCount = 0;
- const maxPolls = 300; // 最多轮询300次(5分钟,每秒一次)
+ // 发送数据到后端的函数
+ function sendDataToBackend(data) {
+ console.log('准备发送的数据:', data);
+ console.log('数据类型:', typeof data);
+ console.log('字符串化后的数据:', JSON.stringify(data));
- const pollInterval = setInterval(() => {
- pollCount++;
-
+ return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
- method: "GET",
- url: `${BACKEND_BASE_URL}/task-status/${taskId}`,
+ method: "POST",
+ url: `http://${BACKEND_IP}:${BACKEND_PORT}/save_url`,
+ headers: {
+ "Content-Type": "application/json",
+ },
+ data: JSON.stringify(data),
onload: function(response) {
+ console.log('后端响应状态:', response.status);
+ console.log('后端响应内容:', response.responseText);
if (response.status === 200) {
- const result = JSON.parse(response.responseText);
-
- // 更新按钮状态显示进度
- const button = document.getElementById('data-sender-button');
- if (button) {
- button.textContent = `任务中...${pollCount}s`;
- }
-
- if (result.status === 'completed') {
- clearInterval(pollInterval);
- alert("爬虫任务完成!\n结果: " + JSON.stringify(result.result, null, 2));
- resetButton();
- } else if (result.status === 'failed') {
- clearInterval(pollInterval);
- alert("爬虫任务失败: " + result.error);
- resetButton();
- }
- // 如果状态是 'running',继续轮询
+ resolve(response);
} else {
- console.error("获取任务状态失败:", response.status);
+ reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
}
},
onerror: function(error) {
- console.error("轮询任务状态失败:", error);
+ reject(error);
}
});
-
- // 超过最大轮询次数,停止轮询
- if (pollCount >= maxPolls) {
- clearInterval(pollInterval);
- alert("任务超时,请稍后手动检查结果");
- resetButton();
- }
- }, 1000); // 每秒轮询一次
+ });
}
- function resetButton() {
- const button = document.getElementById('data-sender-button');
- if (button) {
- button.disabled = false;
- button.textContent = "send data";
- button.style.backgroundColor = "#007baf";
+ // 点击事件处理
+ button.addEventListener('click', async function() {
+ // 1. 获取当前URL和title
+ const currentUrl = window.location.href;
+ const pageTitle = document.title;
+
+ // 清空之前的图片数据
+ Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]);
+
+ let img_count = 1;
+
+ // 首先处理当前页(第0页)的图片
+ if (DATA_LIST_SELECTOR) {
+ const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR);
+ linkElements.forEach(link => {
+ const href = link.href;
+ if (href) {
+ ALL_IMG_DATA[formatNumber(img_count)] = href;
+ img_count++;
+ }
+ });
}
- }
- // 初始尝试添加按钮
- addButton();
+ // alert(`开始采集数据!\n当前页图片链接数量: ${Object.keys(ALL_IMG_DATA).length}\n开始采集其他页面...`);
- // 使用MutationObserver监听DOM变化
- const observer = new MutationObserver(function(mutations) {
- addButton();
- });
+ // 处理单个页面的函数
+ const processPage = async (page) => {
+ // 构建分页URL
+ let newTargetUrl;
+ if (currentUrl.includes('?')) {
+ newTargetUrl = currentUrl.replace(/([?&])p=\d+/, `$1p=${page}`);
+ if (!newTargetUrl.includes('p=')) {
+ newTargetUrl += `&p=${page}`;
+ }
+ } else {
+ newTargetUrl = currentUrl + `?p=${page}`;
+ }
- observer.observe(document.body, {
- childList: true,
- subtree: true
- });
+ try {
+ // 使用GM_xmlhttpRequest发送请求
+ const response = await new Promise((resolve, reject) => {
+ GM_xmlhttpRequest({
+ method: "GET",
+ url: newTargetUrl,
+ headers: {
+ "Referer": currentUrl,
+ "Cookie": document.cookie
+ },
+ onload: function(response) {
+ resolve(response);
+ },
+ onerror: function(error) {
+ reject(error);
+ }
+ });
+ });
+
+ // 从响应中提取图片链接
+ const pageImages = extractImagesFromPage(response.responseText);
+
+ if (pageImages.length === 0) {
+ console.log(`第${page}页没有图片,可能是最后一页`);
+ return false; // 没有图片,可能是最后一页
+ }
- if (document.readyState === 'loading') {
- document.addEventListener('DOMContentLoaded', addButton);
- } else {
- addButton();
- }
+ // 检查是否有重复图片
+ let hasNewImage = false;
+ pageImages.forEach(href => {
+ // 检查这个图片是否已经存在
+ const isDuplicate = Object.values(ALL_IMG_DATA).includes(href);
+ if (!isDuplicate) {
+ ALL_IMG_DATA[formatNumber(img_count)] = href;
+ img_count++;
+ hasNewImage = true;
+ }
+ });
+
+ console.log(`第${page}页采集完成,获取到${pageImages.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`);
+
+ return hasNewImage; // 返回是否有新图片
+ } catch (error) {
+ console.error(`第${page}页采集失败:`, error);
+ return false;
+ }
+ };
+
+ // 从第1页开始采集,最多到100页
+ let shouldContinue = true;
+ for (let page = 0; page <= 100; page++) {
+ if (!shouldContinue) break;
+
+ const hasNewImages = await processPage(page);
+
+ // 如果没有新图片,说明可能是最后一页了
+ if (!hasNewImages && page > 0) {
+ console.log(`第${page}页没有新图片,可能已到最后一页,停止采集`);
+ shouldContinue = false;
+ }
+
+ // 如果图片数量达到上限也停止
+ if (img_count > 2200) {
+ console.log('图片数量达到上限2200,停止采集');
+ shouldContinue = false;
+ }
+ }
+
+ // 打包最终数据
+ const data = {
+ url: currentUrl,
+ title: pageTitle,
+ all_images: ALL_IMG_DATA,
+ total_images: Object.keys(ALL_IMG_DATA).length
+ };
+
+ // 显示结果并发送到后端
+ console.log('采集完成的所有数据:', data);
+ console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT);
+
+ try {
+ await sendDataToBackend(data);
+ alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`);
+ } catch (error) {
+ console.error('发送数据到后端失败:', error);
+ alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`);
+ }
+ });
})();
\ No newline at end of file