main
Jack 2 months ago
parent a16c47b303
commit f9bf9826b4
  1. 73
      downloader.py
  2. 560
      main.py
  3. 361
      post_eh_data.js

@ -1,73 +0,0 @@
from fastapi import APIRouter, BackgroundTasks
from pydantic import BaseModel
import uuid
import os
from pathlib import Path
router = APIRouter(prefix="/api/v1", tags=["downloader"])
# 存储任务状态
tasks = {}
class CrawlRequest(BaseModel):
url: str
cookies: str
timestamp: str
class TaskStatus(BaseModel):
status: str # 'running', 'completed', 'failed'
result: dict = None
error: str = None
@router.post("/start-crawl")
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
tasks[task_id] = {'status': 'running', 'result': None, 'error': None}
# 在后台运行爬虫任务
background_tasks.add_task(run_crawler, task_id, request)
return {"task_id": task_id, "status": "started"}
@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str):
task = tasks.get(task_id)
if not task:
return {"status": "not_found"}
return task
async def run_crawler(task_id: str, request: CrawlRequest):
try:
# 这里执行您的爬虫逻辑,模拟长时间运行
# 例如:time.sleep(300) # 5分钟
# 确保 downloads 目录存在(双重保障)
downloads_dir = Path("downloads")
downloads_dir.mkdir(exist_ok=True)
# 模拟下载文件到 downloads 目录
filename = f"download_{task_id}.txt"
filepath = downloads_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"URL: {request.url}\n")
f.write(f"Cookies: {request.cookies}\n")
f.write(f"Timestamp: {request.timestamp}\n")
f.write("Download completed successfully\n")
# 爬虫完成后更新状态
tasks[task_id] = {
'status': 'completed',
'result': {
'message': '爬虫完成',
'data': '您的爬虫结果',
'download_path': str(filepath)
},
'error': None
}
except Exception as e:
tasks[task_id] = {
'status': 'failed',
'result': None,
'error': str(e)
}

@ -1,43 +1,535 @@
from fastapi import FastAPI
from contextlib import asynccontextmanager
import uvicorn
# main.py
import os
import json
import logging
from pathlib import Path
from downloader import router as downloader_router
# 检查并创建 downloads 目录
def ensure_downloads_dir():
downloads_dir = Path("downloads")
downloads_dir.mkdir(exist_ok=True)
print(f"确保 downloads 目录存在: {downloads_dir.absolute()}")
# lifespan 事件处理器
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时执行
ensure_downloads_dir()
print("应用启动完成!")
yield
# 关闭时执行(可选)
print("应用正在关闭...")
app = FastAPI(
title="下载器API",
description="一个基于FastAPI的异步下载器服务",
version="1.0.0",
lifespan=lifespan
from typing import Dict, Any, List
import aiofiles
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 常量定义
DOWNLOADS_DIR = "downloads"
MAX_FILENAME_LENGTH = 100
INVALID_FILENAME_CHARS = '<>:"/\\|?*'
# FastAPI应用
app = FastAPI(title="eh-v2")
# 数据模型
class SaveDataRequest(BaseModel):
url: str
title: str
all_images: Dict[str, str]
total_images: int
class GalleryInfo(BaseModel):
title: str
path: str
total_images: int
downloaded_images: int
# 工具函数
def setup_downloads_directory() -> Path:
"""创建并返回下载目录路径"""
downloads_path = Path(DOWNLOADS_DIR)
downloads_path.mkdir(exist_ok=True)
logger.info(f"下载目录已准备: {downloads_path.absolute()}")
return downloads_path
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除非法字符并限制长度"""
sanitized = filename
for char in INVALID_FILENAME_CHARS:
sanitized = sanitized.replace(char, '_')
# 限制文件名长度
if len(sanitized) > MAX_FILENAME_LENGTH:
sanitized = sanitized[:MAX_FILENAME_LENGTH]
return sanitized
def create_title_directory(base_path: Path, title: str) -> Path:
"""创建标题对应的目录"""
safe_title = sanitize_filename(title)
title_dir = base_path / safe_title
title_dir.mkdir(exist_ok=True)
logger.info(f"创建标题目录: {title_dir}")
return title_dir
async def save_data_to_file(file_path: Path, data: Dict[str, Any]) -> None:
"""异步保存数据到JSON文件"""
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(json.dumps(data, ensure_ascii=False, indent=2))
def get_all_galleries() -> List[GalleryInfo]:
"""获取所有画廊信息"""
galleries = []
downloads_path = Path(DOWNLOADS_DIR)
if not downloads_path.exists():
return galleries
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# 计算已下载的图片数量
downloaded_count = 0
if 'all_images' in data:
for filename, url in data['all_images'].items():
image_path = gallery_dir / filename
if image_path.exists():
downloaded_count += 1
galleries.append(GalleryInfo(
title=data.get('title', gallery_dir.name),
path=str(gallery_dir),
total_images=data.get('total_images', 0),
downloaded_images=downloaded_count
))
except Exception as e:
logger.error(f"读取画廊数据失败 {gallery_dir}: {e}")
return galleries
# 初始化
downloads_path = setup_downloads_directory()
# API路由
@app.post("/save_url")
async def save_url(data: SaveDataRequest):
"""保存URL数据到文件系统"""
try:
logger.info("收到保存数据请求")
logger.info(f"标题: {data.title}, URL: {data.url}, 图片数量: {data.total_images}")
# 创建标题目录
title_dir = create_title_directory(downloads_path, data.title)
# 数据文件路径
data_file = title_dir / "data.json"
# 异步保存数据
await save_data_to_file(data_file, data.dict())
logger.info(f"数据已保存到: {data_file}")
return {
"status": "success",
"message": "数据保存成功",
"file_path": str(data_file),
"title": data.title,
"total_images": data.total_images
}
except Exception as e:
error_msg = f"保存数据时出错: {str(e)}"
logger.error(error_msg)
logger.exception("详细错误信息:")
raise HTTPException(status_code=500, detail=error_msg)
@app.get("/", response_class=HTMLResponse)
async def read_gallery_manager():
"""画廊管理页面"""
return """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>画廊下载管理器</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 15px;
box-shadow: 0 20px 40px rgba(0,0,0,0.1);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #2c3e50, #34495e);
color: white;
padding: 30px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
}
.header p {
opacity: 0.8;
font-size: 1.1em;
}
.controls {
padding: 20px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
display: flex;
gap: 15px;
flex-wrap: wrap;
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 8px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s ease;
display: inline-flex;
align-items: center;
gap: 8px;
}
.btn-primary {
background: #007bff;
color: white;
}
.btn-primary:hover {
background: #0056b3;
transform: translateY(-2px);
}
.btn-success {
background: #28a745;
color: white;
}
.btn-success:hover {
background: #1e7e34;
transform: translateY(-2px);
}
.btn-danger {
background: #dc3545;
color: white;
}
.btn-danger:hover {
background: #c82333;
transform: translateY(-2px);
}
.btn:disabled {
background: #6c757d;
cursor: not-allowed;
transform: none;
}
.gallery-list {
padding: 20px;
}
.gallery-item {
background: white;
border: 1px solid #e9ecef;
border-radius: 10px;
padding: 20px;
margin-bottom: 15px;
transition: all 0.3s ease;
display: flex;
justify-content: space-between;
align-items: center;
}
.gallery-item:hover {
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
transform: translateY(-2px);
}
.gallery-info {
flex: 1;
}
.gallery-title {
font-size: 1.3em;
font-weight: 600;
color: #2c3e50;
margin-bottom: 8px;
}
.gallery-stats {
display: flex;
gap: 20px;
color: #6c757d;
font-size: 0.9em;
}
.gallery-actions {
display: flex;
gap: 10px;
}
.progress-bar {
width: 200px;
height: 8px;
background: #e9ecef;
border-radius: 4px;
overflow: hidden;
margin-top: 8px;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #28a745, #20c997);
transition: width 0.3s ease;
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #6c757d;
}
.empty-state h3 {
margin-bottom: 10px;
font-size: 1.5em;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎨 画廊下载管理器</h1>
<p>管理您的画廊下载任务</p>
</div>
<div class="controls">
<button class="btn btn-primary" onclick="loadGalleries()">
📁 读取文件夹
</button>
<button class="btn btn-success" onclick="startDownload()" id="downloadBtn">
开始下载
</button>
<button class="btn btn-danger" onclick="deleteJsonFiles()">
🗑 删除JSON文件
</button>
</div>
<div class="gallery-list" id="galleryList">
<div class="empty-state">
<h3>暂无画廊数据</h3>
<p>点击"读取文件夹"按钮加载数据</p>
</div>
</div>
</div>
<script>
let currentGalleries = [];
async function loadGalleries() {
try {
const response = await fetch('/api/galleries');
const galleries = await response.json();
currentGalleries = galleries;
displayGalleries(galleries);
} catch (error) {
alert('读取文件夹失败: ' + error);
}
}
function displayGalleries(galleries) {
const galleryList = document.getElementById('galleryList');
if (galleries.length === 0) {
galleryList.innerHTML = `
<div class="empty-state">
<h3>暂无画廊数据</h3>
<p>未找到任何画廊数据文件</p>
</div>
`;
return;
}
galleryList.innerHTML = galleries.map(gallery => `
<div class="gallery-item">
<div class="gallery-info">
<div class="gallery-title">${gallery.title}</div>
<div class="gallery-stats">
<span>总图片: ${gallery.total_images}</span>
<span>已下载: ${gallery.downloaded_images}</span>
<span>进度: ${Math.round((gallery.downloaded_images / gallery.total_images) * 100)}%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: ${(gallery.downloaded_images / gallery.total_images) * 100}%"></div>
</div>
</div>
<div class="gallery-actions">
<button class="btn btn-primary" onclick="downloadGallery('${gallery.title}')">
下载
</button>
<button class="btn btn-danger" onclick="deleteGallery('${gallery.title}')">
删除
</button>
</div>
</div>
`).join('');
}
async function startDownload() {
const btn = document.getElementById('downloadBtn');
btn.disabled = true;
btn.innerHTML = '⏳ 下载中...';
try {
// 这里可以添加批量下载逻辑
for (const gallery of currentGalleries) {
if (gallery.downloaded_images < gallery.total_images) {
await downloadGallery(gallery.title);
}
}
alert('所有下载任务已完成!');
} catch (error) {
alert('下载失败: ' + error);
} finally {
btn.disabled = false;
btn.innerHTML = ' 开始下载';
await loadGalleries(); // 刷新列表
}
}
async function downloadGallery(title) {
try {
const response = await fetch(`/api/download/${encodeURIComponent(title)}`, {
method: 'POST'
});
const result = await response.json();
if (result.status === 'success') {
alert(`开始下载: ${title}`);
// 这里可以添加实时进度更新
} else {
alert(`下载失败: ${result.message}`);
}
} catch (error) {
alert('下载请求失败: ' + error);
}
}
async function deleteJsonFiles() {
if (!confirm('确定要删除所有JSON文件吗?此操作不可恢复!')) {
return;
}
try {
const response = await fetch('/api/cleanup', {
method: 'DELETE'
});
const result = await response.json();
alert(result.message);
await loadGalleries(); // 刷新列表
} catch (error) {
alert('删除失败: ' + error);
}
}
async function deleteGallery(title) {
if (!confirm(`确定要删除画廊"${title}"此操作不可恢复`)) {
return;
}
try {
const response = await fetch(`/api/galleries/${encodeURIComponent(title)}`, {
method: 'DELETE'
});
const result = await response.json();
alert(result.message);
await loadGalleries(); // 刷新列表
} catch (error) {
alert('删除失败: ' + error);
}
}
// 页面加载时自动读取
document.addEventListener('DOMContentLoaded', loadGalleries);
</script>
</body>
</html>
"""
@app.get("/api/galleries")
async def get_galleries():
"""获取所有画廊信息"""
galleries = get_all_galleries()
return galleries
@app.post("/api/download/{title}")
async def download_gallery(title: str):
"""开始下载指定画廊的图片"""
try:
# 这里实现图片下载逻辑
# 遍历 all_images 字典,下载每个图片
return {
"status": "success",
"message": f"开始下载画廊: {title}",
"title": title
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"下载失败: {str(e)}")
@app.delete("/api/cleanup")
async def cleanup_json_files():
"""删除所有JSON文件(保留图片)"""
try:
deleted_count = 0
downloads_path = Path(DOWNLOADS_DIR)
for gallery_dir in downloads_path.iterdir():
if gallery_dir.is_dir():
data_file = gallery_dir / "data.json"
if data_file.exists():
data_file.unlink()
deleted_count += 1
return {
"status": "success",
"message": f"已删除 {deleted_count} 个JSON文件",
"deleted_count": deleted_count
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"清理失败: {str(e)}")
# 注册路由
app.include_router(downloader_router)
@app.delete("/api/galleries/{title}")
async def delete_gallery(title: str):
"""删除指定画廊的所有文件"""
try:
safe_title = sanitize_filename(title)
gallery_path = downloads_path / safe_title
if gallery_path.exists():
# 删除整个画廊目录
import shutil
shutil.rmtree(gallery_path)
return {
"status": "success",
"message": f"已删除画廊: {title}"
}
else:
raise HTTPException(status_code=404, detail="画廊不存在")
except Exception as e:
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
@app.get("/")
async def root():
return {"message": "下载器服务运行中", "status": "healthy"}
@app.get("/health")
async def health_check():
"""健康检查端点"""
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
host="0.0.0.0",
port=5100,
reload=True # 开发时自动重载
)
reload=True
)

@ -1,200 +1,229 @@
// ==UserScript==
// @name 数据发送工具
// @name eh-v2
// @namespace http://tampermonkey.net/
// @version 1.0
// @description 向本地后端发送当前页面的URL和Cookies
// @author You
// @version 0.1
// @description 采集页面数据并发送到后端
// @author Jack
// @match *://*/*
// @grant GM_xmlhttpRequest
// @connect 127.0.0.1
// @connect localhost
// ==/UserScript==
(function() {
'use strict';
// 配置:您可以修改这些变量来自定义行为
const TARGET_SELECTOR = 'body'; // 按钮插入位置的选择器
const BACKEND_IP = '127.0.0.1'; // 后端IP地址
const BACKEND_PORT = '5100'; // 后端端口号
// 构建后端基础URL
const BACKEND_BASE_URL = `http://${BACKEND_IP}:${BACKEND_PORT}`;
function addButton() {
if (document.getElementById('data-sender-button')) {
return;
}
const button = document.createElement('button');
button.id = 'data-sender-button';
button.textContent = "send data";
button.style.position = "fixed";
button.style.top = "12.5%";
button.style.right = "1%";
button.style.transform = "translateY(-50%)";
button.style.padding = "3px 8px";
button.style.fontSize = "10px";
button.style.backgroundColor = "#007baf";
button.style.color = "#fff";
button.style.border = "none";
button.style.borderRadius = "5px";
button.style.cursor = "pointer";
button.style.zIndex = "10000";
button.addEventListener('click', function() {
sendDataToBackend();
});
const targetElement = document.querySelector(TARGET_SELECTOR);
if (targetElement && TARGET_SELECTOR !== 'body') {
const buttonContainer = document.createElement('div');
buttonContainer.style.display = 'inline-block';
buttonContainer.style.marginLeft = '10px';
button.style.position = 'relative';
button.style.top = 'auto';
button.style.right = 'auto';
button.style.transform = 'none';
button.style.margin = '0';
buttonContainer.appendChild(button);
if (targetElement.nextSibling) {
targetElement.parentNode.insertBefore(buttonContainer, targetElement.nextSibling);
} else {
targetElement.parentNode.appendChild(buttonContainer);
}
} else {
document.body.appendChild(button);
}
// 全局配置 - 请根据实际情况修改这些值
const BACKEND_IP = '127.0.0.1';
const BACKEND_PORT = '5100';
const BUTTON_LOCATION_SELECTOR = 'body';
const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器
const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999
// 创建按钮
const button = document.createElement('button');
button.id = 'data-sender-button';
button.textContent = "send data";
button.style.position = "fixed";
button.style.top = "32%";
button.style.right = "1%";
button.style.transform = "translateY(-50%)";
button.style.padding = "3px 8px";
button.style.fontSize = "10px";
button.style.backgroundColor = "#007baf";
button.style.color = "#fff";
button.style.border = "none";
button.style.borderRadius = "5px";
button.style.cursor = "pointer";
button.style.zIndex = "10000";
// 添加到指定位置
const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR);
if (targetElement) {
targetElement.appendChild(button);
} else {
// 如果选择器找不到元素,默认添加到body
document.body.appendChild(button);
}
function sendDataToBackend() {
const currentUrl = window.location.href;
const cookies = document.cookie;
const data = {
url: currentUrl,
cookies: cookies,
timestamp: new Date().toISOString()
};
// 禁用按钮防止重复点击
const button = document.getElementById('data-sender-button');
if (button) {
button.disabled = true;
button.textContent = "任务进行中...";
button.style.backgroundColor = "#6c757d";
// 从页面中提取图片的函数
function extractImagesFromPage(htmlContent) {
const images = [];
// 创建一个临时div来解析HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
if (DATA_LIST_SELECTOR) {
const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
// 从a标签中获取href属性,这通常是图片页面链接
const href = link.href;
if (href) {
images.push(href);
}
});
}
return images;
}
// 发送任务请求
GM_xmlhttpRequest({
method: "POST",
url: `${BACKEND_BASE_URL}/start-crawl`,
headers: {
"Content-Type": "application/json"
},
data: JSON.stringify(data),
onload: function(response) {
if (response.status === 200) {
const result = JSON.parse(response.responseText);
if (result.task_id) {
alert("爬虫任务已启动!任务ID: " + result.task_id);
// 开始轮询任务状态
pollTaskStatus(result.task_id);
} else {
alert("任务启动失败: " + (result.message || "未知错误"));
resetButton();
}
} else {
alert("请求失败,状态码: " + response.status);
resetButton();
}
},
onerror: function(error) {
console.error("数据发送失败:", error);
alert("数据发送失败,请检查后端服务是否运行");
resetButton();
}
});
// 格式化数字为4位数
function formatNumber(num) {
return num.toString().padStart(4, '0');
}
function pollTaskStatus(taskId) {
let pollCount = 0;
const maxPolls = 300; // 最多轮询300次(5分钟,每秒一次)
// 发送数据到后端的函数
function sendDataToBackend(data) {
console.log('准备发送的数据:', data);
console.log('数据类型:', typeof data);
console.log('字符串化后的数据:', JSON.stringify(data));
const pollInterval = setInterval(() => {
pollCount++;
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "GET",
url: `${BACKEND_BASE_URL}/task-status/${taskId}`,
method: "POST",
url: `http://${BACKEND_IP}:${BACKEND_PORT}/save_url`,
headers: {
"Content-Type": "application/json",
},
data: JSON.stringify(data),
onload: function(response) {
console.log('后端响应状态:', response.status);
console.log('后端响应内容:', response.responseText);
if (response.status === 200) {
const result = JSON.parse(response.responseText);
// 更新按钮状态显示进度
const button = document.getElementById('data-sender-button');
if (button) {
button.textContent = `任务中...${pollCount}s`;
}
if (result.status === 'completed') {
clearInterval(pollInterval);
alert("爬虫任务完成!\n结果: " + JSON.stringify(result.result, null, 2));
resetButton();
} else if (result.status === 'failed') {
clearInterval(pollInterval);
alert("爬虫任务失败: " + result.error);
resetButton();
}
// 如果状态是 'running',继续轮询
resolve(response);
} else {
console.error("获取任务状态失败:", response.status);
reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
}
},
onerror: function(error) {
console.error("轮询任务状态失败:", error);
reject(error);
}
});
// 超过最大轮询次数,停止轮询
if (pollCount >= maxPolls) {
clearInterval(pollInterval);
alert("任务超时,请稍后手动检查结果");
resetButton();
}
}, 1000); // 每秒轮询一次
});
}
function resetButton() {
const button = document.getElementById('data-sender-button');
if (button) {
button.disabled = false;
button.textContent = "send data";
button.style.backgroundColor = "#007baf";
// 点击事件处理
button.addEventListener('click', async function() {
// 1. 获取当前URL和title
const currentUrl = window.location.href;
const pageTitle = document.title;
// 清空之前的图片数据
Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]);
let img_count = 1;
// 首先处理当前页(第0页)的图片
if (DATA_LIST_SELECTOR) {
const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
const href = link.href;
if (href) {
ALL_IMG_DATA[formatNumber(img_count)] = href;
img_count++;
}
});
}
}
// 初始尝试添加按钮
addButton();
// alert(`开始采集数据!\n当前页图片链接数量: ${Object.keys(ALL_IMG_DATA).length}\n开始采集其他页面...`);
// 使用MutationObserver监听DOM变化
const observer = new MutationObserver(function(mutations) {
addButton();
});
// 处理单个页面的函数
const processPage = async (page) => {
// 构建分页URL
let newTargetUrl;
if (currentUrl.includes('?')) {
newTargetUrl = currentUrl.replace(/([?&])p=\d+/, `$1p=${page}`);
if (!newTargetUrl.includes('p=')) {
newTargetUrl += `&p=${page}`;
}
} else {
newTargetUrl = currentUrl + `?p=${page}`;
}
observer.observe(document.body, {
childList: true,
subtree: true
});
try {
// 使用GM_xmlhttpRequest发送请求
const response = await new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "GET",
url: newTargetUrl,
headers: {
"Referer": currentUrl,
"Cookie": document.cookie
},
onload: function(response) {
resolve(response);
},
onerror: function(error) {
reject(error);
}
});
});
// 从响应中提取图片链接
const pageImages = extractImagesFromPage(response.responseText);
if (pageImages.length === 0) {
console.log(`${page}页没有图片,可能是最后一页`);
return false; // 没有图片,可能是最后一页
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', addButton);
} else {
addButton();
}
// 检查是否有重复图片
let hasNewImage = false;
pageImages.forEach(href => {
// 检查这个图片是否已经存在
const isDuplicate = Object.values(ALL_IMG_DATA).includes(href);
if (!isDuplicate) {
ALL_IMG_DATA[formatNumber(img_count)] = href;
img_count++;
hasNewImage = true;
}
});
console.log(`${page}页采集完成,获取到${pageImages.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`);
return hasNewImage; // 返回是否有新图片
} catch (error) {
console.error(`${page}页采集失败:`, error);
return false;
}
};
// 从第1页开始采集,最多到100页
let shouldContinue = true;
for (let page = 0; page <= 100; page++) {
if (!shouldContinue) break;
const hasNewImages = await processPage(page);
// 如果没有新图片,说明可能是最后一页了
if (!hasNewImages && page > 0) {
console.log(`${page}页没有新图片,可能已到最后一页,停止采集`);
shouldContinue = false;
}
// 如果图片数量达到上限也停止
if (img_count > 2200) {
console.log('图片数量达到上限2200,停止采集');
shouldContinue = false;
}
}
// 打包最终数据
const data = {
url: currentUrl,
title: pageTitle,
all_images: ALL_IMG_DATA,
total_images: Object.keys(ALL_IMG_DATA).length
};
// 显示结果并发送到后端
console.log('采集完成的所有数据:', data);
console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT);
try {
await sendDataToBackend(data);
alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`);
} catch (error) {
console.error('发送数据到后端失败:', error);
alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`);
}
});
})();
Loading…
Cancel
Save