main
Jack 2 months ago
parent a16c47b303
commit f9bf9826b4
  1. 73
      downloader.py
  2. 556
      main.py
  3. 351
      post_eh_data.js

@ -1,73 +0,0 @@
from fastapi import APIRouter, BackgroundTasks
from pydantic import BaseModel
import uuid
import os
from pathlib import Path
router = APIRouter(prefix="/api/v1", tags=["downloader"])
# 存储任务状态
tasks = {}
class CrawlRequest(BaseModel):
url: str
cookies: str
timestamp: str
class TaskStatus(BaseModel):
status: str # 'running', 'completed', 'failed'
result: dict = None
error: str = None
@router.post("/start-crawl")
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
tasks[task_id] = {'status': 'running', 'result': None, 'error': None}
# 在后台运行爬虫任务
background_tasks.add_task(run_crawler, task_id, request)
return {"task_id": task_id, "status": "started"}
@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str):
task = tasks.get(task_id)
if not task:
return {"status": "not_found"}
return task
async def run_crawler(task_id: str, request: CrawlRequest):
try:
# 这里执行您的爬虫逻辑,模拟长时间运行
# 例如:time.sleep(300) # 5分钟
# 确保 downloads 目录存在(双重保障)
downloads_dir = Path("downloads")
downloads_dir.mkdir(exist_ok=True)
# 模拟下载文件到 downloads 目录
filename = f"download_{task_id}.txt"
filepath = downloads_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"URL: {request.url}\n")
f.write(f"Cookies: {request.cookies}\n")
f.write(f"Timestamp: {request.timestamp}\n")
f.write("Download completed successfully\n")
# 爬虫完成后更新状态
tasks[task_id] = {
'status': 'completed',
'result': {
'message': '爬虫完成',
'data': '您的爬虫结果',
'download_path': str(filepath)
},
'error': None
}
except Exception as e:
tasks[task_id] = {
'status': 'failed',
'result': None,
'error': str(e)
}

@ -1,43 +1,535 @@
from fastapi import FastAPI # main.py
from contextlib import asynccontextmanager import os
import uvicorn import json
import logging
from pathlib import Path from pathlib import Path
from downloader import router as downloader_router from typing import Dict, Any, List
# 检查并创建 downloads 目录 import aiofiles
def ensure_downloads_dir(): from fastapi import FastAPI, HTTPException
downloads_dir = Path("downloads") from fastapi.middleware.cors import CORSMiddleware
downloads_dir.mkdir(exist_ok=True) from fastapi.responses import HTMLResponse
print(f"确保 downloads 目录存在: {downloads_dir.absolute()}") from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
# lifespan 事件处理器 import uvicorn
@asynccontextmanager
async def lifespan(app: FastAPI): # 配置日志
# 启动时执行 logging.basicConfig(
ensure_downloads_dir() level=logging.INFO,
print("应用启动完成!") format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
yield
# 关闭时执行(可选)
print("应用正在关闭...")
app = FastAPI(
title="下载器API",
description="一个基于FastAPI的异步下载器服务",
version="1.0.0",
lifespan=lifespan
) )
logger = logging.getLogger(__name__)
# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
# FastAPI应用
app = FastAPI(title="eh-v2")
# 数据模型
class SaveDataRequest(BaseModel):
url: str
title: str
all_images: Dict[str, str]
total_images: int
class GalleryInfo(BaseModel):
title: str
path: str
total_images: int
downloaded_images: int
# 工具函数
def setup_downloads_directory() -> Path:
"""创建并返回下载目录路径"""
downloads_path = Path(DOWNLOADS_DIR)
downloads_path.mkdir(exist_ok=True)
logger.info(f"下载目录已准备: {downloads_path.absolute()}")
return downloads_path
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除非法字符并限制长度"""
sanitized = filename
for char in INVALID_FILENAME_CHARS:
sanitized = sanitized.replace(char, '_')
# 限制文件名长度
if len(sanitized) > MAX_FILENAME_LENGTH:
sanitized = sanitized[:MAX_FILENAME_LENGTH]
return sanitized
def create_title_directory(base_path: Path, title: str) -> Path:
"""创建标题对应的目录"""
safe_title = sanitize_filename(title)
title_dir = base_path / safe_title
title_dir.mkdir(exist_ok=True)
logger.info(f"创建标题目录: {title_dir}")
return title_dir
async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
"""异步保存数据到JSON文件"""
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(json.dumps(data, ensure_ascii=False, indent=2))
def get_all_galleries() -> List[GalleryInfo]:
"""获取所有画廊信息"""
galleries = []
downloads_path = Path(DOWNLOADS_DIR)
if not downloads_path.exists():
return galleries
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# 计算已下载的图片数量
downloaded_count = 0
if 'all_images' in data:
for filename, url in data['all_images'].items():
image_path = gallery_dir / filename
if image_path.exists():
downloaded_count += 1
galleries.append(GalleryInfo(
title=data.get('title', gallery_dir.name),
path=str(gallery_dir),
total_images=data.get('total_images', 0),
downloaded_images=downloaded_count
))
except Exception as e:
logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
return galleries
# 初始化
downloads_path = setup_downloads_directory()
# API路由
@app.post("/save_url")
async def save_url(data: SaveDataRequest):
"""保存URL数据到文件系统"""
try:
logger.info("收到保存数据请求")
logger.info(f"标题: {data.title}, URL: {data.url}, 图片数量: {data.total_images}")
# 创建标题目录
title_dir = create_title_directory(downloads_path, data.title)
# 数据文件路径
data_file = title_dir / "data.json"
# 异步保存数据
await save_data_to_file(data_file, data.dict())
logger.info(f"数据已保存到: {data_file}")
return {
"status": "success",
"message": "数据保存成功",
"file_path": str(data_file),
"title": data.title,
"total_images": data.total_images
}
except Exception as e:
error_msg = f"保存数据时出错: {str(e)}"
logger.error(error_msg)
logger.exception("详细错误信息:")
raise HTTPException(status_code=500, detail=error_msg)
@app.get("/", response_class=HTMLResponse)
async def read_gallery_manager():
"""画廊管理页面"""
return """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>画廊下载管理器</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 15px;
box-shadow: 0 20px 40px rgba(0,0,0,0.1);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #2c3e50, #34495e);
color: white;
padding: 30px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
}
.header p {
opacity: 0.8;
font-size: 1.1em;
}
.controls {
padding: 20px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
display: flex;
gap: 15px;
flex-wrap: wrap;
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 8px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
display: inline-flex;
align-items: center;
gap: 8px;
}
.btn-primary {
background: #007bff;
color: white;
}
.btn-primary:hover {
background: #0056b3;
transform: translateY(-2px);
}
.btn-success {
background: #28a745;
color: white;
}
.btn-success:hover {
background: #1e7e34;
transform: translateY(-2px);
}
.btn-danger {
background: #dc3545;
color: white;
}
.btn-danger:hover {
background: #c82333;
transform: translateY(-2px);
}
.btn:disabled {
background: #6c757d;
cursor: not-allowed;
transform: none;
}
.gallery-list {
padding: 20px;
}
.gallery-item {
background: white;
border: 1px solid #e9ecef;
border-radius: 10px;
padding: 20px;
margin-bottom: 15px;
transition: all 0.3s ease;
display: flex;
justify-content: space-between;
align-items: center;
}
.gallery-item:hover {
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
transform: translateY(-2px);
}
.gallery-info {
flex: 1;
}
.gallery-title {
font-size: 1.3em;
font-weight: 600;
color: #2c3e50;
margin-bottom: 8px;
}
.gallery-stats {
display: flex;
gap: 20px;
color: #6c757d;
font-size: 0.9em;
}
.gallery-actions {
display: flex;
gap: 10px;
}
.progress-bar {
width: 200px;
height: 8px;
background: #e9ecef;
border-radius: 4px;
overflow: hidden;
margin-top: 8px;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #28a745, #20c997);
transition: width 0.3s ease;
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #6c757d;
}
.empty-state h3 {
margin-bottom: 10px;
font-size: 1.5em;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎨 画廊下载管理器</h1>
<p>管理您的画廊下载任务</p>
</div>
<div class="controls">
<button class="btn btn-primary" onclick="loadGalleries()">
📁 读取文件夹
</button>
<button class="btn btn-success" onclick="startDownload()" id="downloadBtn">
开始下载
</button>
<button class="btn btn-danger" onclick="deleteJsonFiles()">
🗑 删除JSON文件
</button>
</div>
<div class="gallery-list" id="galleryList">
<div class="empty-state">
<h3>暂无画廊数据</h3>
<p>点击"读取文件夹"按钮加载数据</p>
</div>
</div>
</div>
<script>
let currentGalleries = [];
async function loadGalleries() {
try {
const response = await fetch('/api/galleries');
const galleries = await response.json();
currentGalleries = galleries;
displayGalleries(galleries);
} catch (error) {
alert('读取文件夹失败: ' + error);
}
}
function displayGalleries(galleries) {
const galleryList = document.getElementById('galleryList');
if (galleries.length === 0) {
galleryList.innerHTML = `
<div class="empty-state">
<h3>暂无画廊数据</h3>
<p>未找到任何画廊数据文件</p>
</div>
`;
return;
}
galleryList.innerHTML = galleries.map(gallery => `
<div class="gallery-item">
<div class="gallery-info">
<div class="gallery-title">${gallery.title}</div>
<div class="gallery-stats">
<span>总图片: ${gallery.total_images}</span>
<span>已下载: ${gallery.downloaded_images}</span>
<span>进度: ${Math.round((gallery.downloaded_images / gallery.total_images) * 100)}%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: ${(gallery.downloaded_images / gallery.total_images) * 100}%"></div>
</div>
</div>
<div class="gallery-actions">
<button class="btn btn-primary" onclick="downloadGallery('${gallery.title}')">
下载
</button>
<button class="btn btn-danger" onclick="deleteGallery('${gallery.title}')">
删除
</button>
</div>
</div>
`).join('');
}
async function startDownload() {
const btn = document.getElementById('downloadBtn');
btn.disabled = true;
btn.innerHTML = '⏳ 下载中...';
try {
// 这里可以添加批量下载逻辑
for (const gallery of currentGalleries) {
if (gallery.downloaded_images < gallery.total_images) {
await downloadGallery(gallery.title);
}
}
alert('所有下载任务已完成!');
} catch (error) {
alert('下载失败: ' + error);
} finally {
btn.disabled = false;
btn.innerHTML = ' 开始下载';
await loadGalleries(); // 刷新列表
}
}
async function downloadGallery(title) {
try {
const response = await fetch(`/api/download/${encodeURIComponent(title)}`, {
method: 'POST'
});
const result = await response.json();
if (result.status === 'success') {
alert(`开始下载: ${title}`);
// 这里可以添加实时进度更新
} else {
alert(`下载失败: ${result.message}`);
}
} catch (error) {
alert('下载请求失败: ' + error);
}
}
async function deleteJsonFiles() {
if (!confirm('确定要删除所有JSON文件吗?此操作不可恢复!')) {
return;
}
try {
const response = await fetch('/api/cleanup', {
method: 'DELETE'
});
const result = await response.json();
alert(result.message);
await loadGalleries(); // 刷新列表
} catch (error) {
alert('删除失败: ' + error);
}
}
async function deleteGallery(title) {
if (!confirm(`确定要删除画廊"${title}"此操作不可恢复`)) {
return;
}
try {
const response = await fetch(`/api/galleries/${encodeURIComponent(title)}`, {
method: 'DELETE'
});
const result = await response.json();
alert(result.message);
await loadGalleries(); // 刷新列表
} catch (error) {
alert('删除失败: ' + error);
}
}
// 页面加载时自动读取
document.addEventListener('DOMContentLoaded', loadGalleries);
</script>
</body>
</html>
"""
@app.get("/api/galleries")
async def get_galleries():
"""获取所有画廊信息"""
galleries = get_all_galleries()
return galleries
@app.post("/api/download/{title}")
async def download_gallery(title: str):
"""开始下载指定画廊的图片"""
try:
# 这里实现图片下载逻辑
# 遍历 all_images 字典,下载每个图片
return {
"status": "success",
"message": f"开始下载画廊: {title}",
"title": title
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"下载失败: {str(e)}")
@app.delete("/api/cleanup")
async def cleanup_json_files():
"""删除所有JSON文件(保留图片)"""
try:
deleted_count = 0
downloads_path = Path(DOWNLOADS_DIR)
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
data_file.unlink()
deleted_count += 1
return {
"status": "success",
"message": f"已删除 {deleted_count} 个JSON文件",
"deleted_count": deleted_count
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"清理失败: {str(e)}")
@app.delete("/api/galleries/{title}")
async def delete_gallery(title: str):
"""删除指定画廊的所有文件"""
try:
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
# 注册路由 if gallery_path.exists():
app.include_router(downloader_router) # 删除整个画廊目录
import shutil
shutil.rmtree(gallery_path)
return {
"status": "success",
"message": f"已删除画廊: {title}"
}
else:
raise HTTPException(status_code=404, detail="画廊不存在")
except Exception as e:
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
@app.get("/") @app.get("/health")
async def root(): async def health_check():
return {"message": "下载器服务运行中", "status": "healthy"} """健康检查端点"""
return {"status": "healthy"}
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run( uvicorn.run(
"main:app", "main:app",
host="0.0.0.0", host="0.0.0.0",
port=5100, port=5100,
reload=True # 开发时自动重载 reload=True
) )

@ -1,200 +1,229 @@
// ==UserScript== // ==UserScript==
// @name 数据发送工具 // @name eh-v2
// @namespace http://tampermonkey.net/ // @namespace http://tampermonkey.net/
// @version 1.0 // @version 0.1
// @description 向本地后端发送当前页面的URL和Cookies // @description 采集页面数据并发送到后端
// @author You // @author Jack
// @match *://*/* // @match *://*/*
// @grant GM_xmlhttpRequest // @grant GM_xmlhttpRequest
// @connect 127.0.0.1
// @connect localhost
// ==/UserScript== // ==/UserScript==
(function() { (function() {
'use strict'; 'use strict';
// 配置:您可以修改这些变量来自定义行为 // 全局配置 - 请根据实际情况修改这些值
const TARGET_SELECTOR = 'body'; // 按钮插入位置的选择器 const BACKEND_IP = '127.0.0.1';
const BACKEND_IP = '127.0.0.1'; // 后端IP地址 const BACKEND_PORT = '5100';
const BACKEND_PORT = '5100'; // 后端端口号 const BUTTON_LOCATION_SELECTOR = 'body';
const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器
// 构建后端基础URL const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999
const BACKEND_BASE_URL = `http://${BACKEND_IP}:${BACKEND_PORT}`;
// 创建按钮
const button = document.createElement('button');
button.id = 'data-sender-button';
button.textContent = "send data";
button.style.position = "fixed";
button.style.top = "32%";
button.style.right = "1%";
button.style.transform = "translateY(-50%)";
button.style.padding = "3px 8px";
button.style.fontSize = "10px";
button.style.backgroundColor = "#007baf";
button.style.color = "#fff";
button.style.border = "none";
button.style.borderRadius = "5px";
button.style.cursor = "pointer";
button.style.zIndex = "10000";
// 添加到指定位置
const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR);
if (targetElement) {
targetElement.appendChild(button);
} else {
// 如果选择器找不到元素,默认添加到body
document.body.appendChild(button);
}
function addButton() { // 从页面中提取图片的函数
if (document.getElementById('data-sender-button')) { function extractImagesFromPage(htmlContent) {
return; const images = [];
// 创建一个临时div来解析HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
if (DATA_LIST_SELECTOR) {
const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
// 从a标签中获取href属性,这通常是图片页面链接
const href = link.href;
if (href) {
images.push(href);
}
});
} }
return images;
}
// 格式化数字为4位数
function formatNumber(num) {
return num.toString().padStart(4, '0');
}
// 发送数据到后端的函数
function sendDataToBackend(data) {
console.log('准备发送的数据:', data);
console.log('数据类型:', typeof data);
console.log('字符串化后的数据:', JSON.stringify(data));
const button = document.createElement('button'); return new Promise((resolve, reject) => {
button.id = 'data-sender-button'; GM_xmlhttpRequest({
button.textContent = "send data"; method: "POST",
button.style.position = "fixed"; url: `http://${BACKEND_IP}:${BACKEND_PORT}/save_url`,
button.style.top = "12.5%"; headers: {
button.style.right = "1%"; "Content-Type": "application/json",
button.style.transform = "translateY(-50%)"; },
button.style.padding = "3px 8px"; data: JSON.stringify(data),
button.style.fontSize = "10px"; onload: function(response) {
button.style.backgroundColor = "#007baf"; console.log('后端响应状态:', response.status);
button.style.color = "#fff"; console.log('后端响应内容:', response.responseText);
button.style.border = "none"; if (response.status === 200) {
button.style.borderRadius = "5px"; resolve(response);
button.style.cursor = "pointer"; } else {
button.style.zIndex = "10000"; reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
}
button.addEventListener('click', function() { },
sendDataToBackend(); onerror: function(error) {
reject(error);
}
});
}); });
}
// 点击事件处理
button.addEventListener('click', async function() {
// 1. 获取当前URL和title
const currentUrl = window.location.href;
const pageTitle = document.title;
const targetElement = document.querySelector(TARGET_SELECTOR); // 清空之前的图片数据
Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]);
if (targetElement && TARGET_SELECTOR !== 'body') { let img_count = 1;
const buttonContainer = document.createElement('div');
buttonContainer.style.display = 'inline-block';
buttonContainer.style.marginLeft = '10px';
button.style.position = 'relative'; // 首先处理当前页(第0页)的图片
button.style.top = 'auto'; if (DATA_LIST_SELECTOR) {
button.style.right = 'auto'; const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR);
button.style.transform = 'none'; linkElements.forEach(link => {
button.style.margin = '0'; const href = link.href;
if (href) {
ALL_IMG_DATA[formatNumber(img_count)] = href;
img_count++;
}
});
}
buttonContainer.appendChild(button); // alert(`开始采集数据!\n当前页图片链接数量: ${Object.keys(ALL_IMG_DATA).length}\n开始采集其他页面...`);
if (targetElement.nextSibling) { // 处理单个页面的函数
targetElement.parentNode.insertBefore(buttonContainer, targetElement.nextSibling); const processPage = async (page) => {
// 构建分页URL
let newTargetUrl;
if (currentUrl.includes('?')) {
newTargetUrl = currentUrl.replace(/([?&])p=\d+/, `$1p=${page}`);
if (!newTargetUrl.includes('p=')) {
newTargetUrl += `&p=${page}`;
}
} else { } else {
targetElement.parentNode.appendChild(buttonContainer); newTargetUrl = currentUrl + `?p=${page}`;
} }
} else {
document.body.appendChild(button);
}
}
function sendDataToBackend() { try {
const currentUrl = window.location.href; // 使用GM_xmlhttpRequest发送请求
const cookies = document.cookie; const response = await new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "GET",
url: newTargetUrl,
headers: {
"Referer": currentUrl,
"Cookie": document.cookie
},
onload: function(response) {
resolve(response);
},
onerror: function(error) {
reject(error);
}
});
});
const data = { // 从响应中提取图片链接
url: currentUrl, const pageImages = extractImagesFromPage(response.responseText);
cookies: cookies,
timestamp: new Date().toISOString()
};
// 禁用按钮防止重复点击 if (pageImages.length === 0) {
const button = document.getElementById('data-sender-button'); console.log(`${page}页没有图片,可能是最后一页`);
if (button) { return false; // 没有图片,可能是最后一页
button.disabled = true; }
button.textContent = "任务进行中...";
button.style.backgroundColor = "#6c757d";
}
// 发送任务请求 // 检查是否有重复图片
GM_xmlhttpRequest({ let hasNewImage = false;
method: "POST", pageImages.forEach(href => {
url: `${BACKEND_BASE_URL}/start-crawl`, // 检查这个图片是否已经存在
headers: { const isDuplicate = Object.values(ALL_IMG_DATA).includes(href);
"Content-Type": "application/json" if (!isDuplicate) {
}, ALL_IMG_DATA[formatNumber(img_count)] = href;
data: JSON.stringify(data), img_count++;
onload: function(response) { hasNewImage = true;
if (response.status === 200) {
const result = JSON.parse(response.responseText);
if (result.task_id) {
alert("爬虫任务已启动!任务ID: " + result.task_id);
// 开始轮询任务状态
pollTaskStatus(result.task_id);
} else {
alert("任务启动失败: " + (result.message || "未知错误"));
resetButton();
} }
} else { });
alert("请求失败,状态码: " + response.status);
resetButton();
}
},
onerror: function(error) {
console.error("数据发送失败:", error);
alert("数据发送失败,请检查后端服务是否运行");
resetButton();
}
});
}
function pollTaskStatus(taskId) { console.log(`${page}页采集完成,获取到${pageImages.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`);
let pollCount = 0;
const maxPolls = 300; // 最多轮询300次(5分钟,每秒一次)
const pollInterval = setInterval(() => { return hasNewImage; // 返回是否有新图片
pollCount++;
GM_xmlhttpRequest({ } catch (error) {
method: "GET", console.error(`${page}页采集失败:`, error);
url: `${BACKEND_BASE_URL}/task-status/${taskId}`, return false;
onload: function(response) { }
if (response.status === 200) { };
const result = JSON.parse(response.responseText);
// 更新按钮状态显示进度 // 从第1页开始采集,最多到100页
const button = document.getElementById('data-sender-button'); let shouldContinue = true;
if (button) { for (let page = 0; page <= 100; page++) {
button.textContent = `任务中...${pollCount}s`; if (!shouldContinue) break;
}
if (result.status === 'completed') { const hasNewImages = await processPage(page);
clearInterval(pollInterval);
alert("爬虫任务完成!\n结果: " + JSON.stringify(result.result, null, 2));
resetButton();
} else if (result.status === 'failed') {
clearInterval(pollInterval);
alert("爬虫任务失败: " + result.error);
resetButton();
}
// 如果状态是 'running',继续轮询
} else {
console.error("获取任务状态失败:", response.status);
}
},
onerror: function(error) {
console.error("轮询任务状态失败:", error);
}
});
// 超过最大轮询次数,停止轮询 // 如果没有新图片,说明可能是最后一页了
if (pollCount >= maxPolls) { if (!hasNewImages && page > 0) {
clearInterval(pollInterval); console.log(`${page}页没有新图片,可能已到最后一页,停止采集`);
alert("任务超时,请稍后手动检查结果"); shouldContinue = false;
resetButton();
} }
}, 1000); // 每秒轮询一次
}
function resetButton() { // 如果图片数量达到上限也停止
const button = document.getElementById('data-sender-button'); if (img_count > 2200) {
if (button) { console.log('图片数量达到上限2200,停止采集');
button.disabled = false; shouldContinue = false;
button.textContent = "send data"; }
button.style.backgroundColor = "#007baf";
} }
}
// 初始尝试添加按钮 // 打包最终数据
addButton(); const data = {
url: currentUrl,
title: pageTitle,
all_images: ALL_IMG_DATA,
total_images: Object.keys(ALL_IMG_DATA).length
};
// 使用MutationObserver监听DOM变化 // 显示结果并发送到后端
const observer = new MutationObserver(function(mutations) { console.log('采集完成的所有数据:', data);
addButton(); console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT);
});
observer.observe(document.body, { try {
childList: true, await sendDataToBackend(data);
subtree: true alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`);
} catch (error) {
console.error('发送数据到后端失败:', error);
alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`);
}
}); });
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', addButton);
} else {
addButton();
}
})(); })();
Loading…
Cancel
Save