You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
8.7 KiB
234 lines
8.7 KiB
// ==UserScript==
|
|
// @name eh-v2
|
|
// @namespace http://tampermonkey.net/
|
|
// @version 0.1
|
|
// @description 采集页面数据并发送到后端
|
|
// @author Jack
|
|
// @match *://*/*
|
|
// @grant GM_xmlhttpRequest
|
|
// ==/UserScript==
|
|
|
|
(function() {
|
|
'use strict';
|
|
|
|
// 全局配置 - 请根据实际情况修改这些值
|
|
const BACKEND_IP = '127.0.0.1';
|
|
const BACKEND_PORT = '5100';
|
|
const BUTTON_LOCATION_SELECTOR = 'body';
|
|
const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器
|
|
const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999
|
|
|
|
// 创建按钮
|
|
const button = document.createElement('button');
|
|
button.id = 'data-sender-button';
|
|
button.textContent = "send data";
|
|
button.style.position = "fixed";
|
|
button.style.top = "32%";
|
|
button.style.right = "1%";
|
|
button.style.transform = "translateY(-50%)";
|
|
button.style.padding = "3px 8px";
|
|
button.style.fontSize = "10px";
|
|
button.style.backgroundColor = "#007baf";
|
|
button.style.color = "#fff";
|
|
button.style.border = "none";
|
|
button.style.borderRadius = "5px";
|
|
button.style.cursor = "pointer";
|
|
button.style.zIndex = "10000";
|
|
|
|
// 添加到指定位置
|
|
const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR);
|
|
if (targetElement) {
|
|
targetElement.appendChild(button);
|
|
} else {
|
|
// 如果选择器找不到元素,默认添加到body
|
|
document.body.appendChild(button);
|
|
}
|
|
|
|
// 从页面中提取图片的函数
|
|
function extractImagesFromPage(htmlContent) {
|
|
const images = [];
|
|
// 创建一个临时div来解析HTML
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = htmlContent;
|
|
|
|
if (DATA_LIST_SELECTOR) {
|
|
const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
|
|
linkElements.forEach(link => {
|
|
// 从a标签中获取href属性,这通常是图片页面链接
|
|
const href = link.href;
|
|
if (href) {
|
|
images.push(href);
|
|
}
|
|
});
|
|
}
|
|
return images;
|
|
}
|
|
|
|
// 格式化数字为4位数
|
|
function formatNumber(num) {
|
|
return num.toString().padStart(4, '0');
|
|
}
|
|
|
|
// 发送数据到后端的函数
|
|
function sendDataToBackend(data) {
|
|
console.log('准备发送的数据:', data);
|
|
console.log('后端地址:', `http://${BACKEND_IP}:${BACKEND_PORT}/save_url`);
|
|
|
|
return new Promise((resolve, reject) => {
|
|
GM_xmlhttpRequest({
|
|
method: "POST",
|
|
url: `http://${BACKEND_IP}:${BACKEND_PORT}/save_url`,
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
},
|
|
data: JSON.stringify(data),
|
|
onload: function(response) {
|
|
console.log('后端响应状态:', response.status);
|
|
console.log('后端响应内容:', response.responseText);
|
|
console.log('响应头:', response.responseHeaders);
|
|
if (response.status === 200) {
|
|
resolve(response);
|
|
} else {
|
|
reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
|
|
}
|
|
},
|
|
onerror: function(error) {
|
|
console.error('请求错误详情:', error);
|
|
reject(error);
|
|
},
|
|
ontimeout: function() {
|
|
console.error('请求超时');
|
|
reject(new Error('请求超时'));
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// 点击事件处理
|
|
button.addEventListener('click', async function() {
|
|
// 1. 获取当前URL和title
|
|
const currentUrl = window.location.href;
|
|
const pageTitle = document.title;
|
|
|
|
// 清空之前的图片数据
|
|
Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]);
|
|
|
|
let img_count = 1;
|
|
|
|
// 首先处理当前页(第0页)的图片
|
|
if (DATA_LIST_SELECTOR) {
|
|
const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR);
|
|
linkElements.forEach(link => {
|
|
const href = link.href;
|
|
if (href) {
|
|
ALL_IMG_DATA[formatNumber(img_count)] = href;
|
|
img_count++;
|
|
}
|
|
});
|
|
}
|
|
|
|
// alert(`开始采集数据!\n当前页图片链接数量: ${Object.keys(ALL_IMG_DATA).length}\n开始采集其他页面...`);
|
|
|
|
// 处理单个页面的函数
|
|
const processPage = async (page) => {
|
|
// 构建分页URL
|
|
let newTargetUrl;
|
|
if (currentUrl.includes('?')) {
|
|
newTargetUrl = currentUrl.replace(/([?&])p=\d+/, `$1p=${page}`);
|
|
if (!newTargetUrl.includes('p=')) {
|
|
newTargetUrl += `&p=${page}`;
|
|
}
|
|
} else {
|
|
newTargetUrl = currentUrl + `?p=${page}`;
|
|
}
|
|
|
|
try {
|
|
// 使用GM_xmlhttpRequest发送请求
|
|
const response = await new Promise((resolve, reject) => {
|
|
GM_xmlhttpRequest({
|
|
method: "GET",
|
|
url: newTargetUrl,
|
|
headers: {
|
|
"Referer": currentUrl,
|
|
"Cookie": document.cookie
|
|
},
|
|
onload: function(response) {
|
|
resolve(response);
|
|
},
|
|
onerror: function(error) {
|
|
reject(error);
|
|
}
|
|
});
|
|
});
|
|
|
|
// 从响应中提取图片链接
|
|
const pageImages = extractImagesFromPage(response.responseText);
|
|
|
|
if (pageImages.length === 0) {
|
|
console.log(`第${page}页没有图片,可能是最后一页`);
|
|
return false; // 没有图片,可能是最后一页
|
|
}
|
|
|
|
// 检查是否有重复图片
|
|
let hasNewImage = false;
|
|
pageImages.forEach(href => {
|
|
// 检查这个图片是否已经存在
|
|
const isDuplicate = Object.values(ALL_IMG_DATA).includes(href);
|
|
if (!isDuplicate) {
|
|
ALL_IMG_DATA[formatNumber(img_count)] = href;
|
|
img_count++;
|
|
hasNewImage = true;
|
|
}
|
|
});
|
|
|
|
console.log(`第${page}页采集完成,获取到${pageImages.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`);
|
|
|
|
return hasNewImage; // 返回是否有新图片
|
|
|
|
} catch (error) {
|
|
console.error(`第${page}页采集失败:`, error);
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// 从第1页开始采集,最多到100页
|
|
let shouldContinue = true;
|
|
for (let page = 0; page <= 100; page++) {
|
|
if (!shouldContinue) break;
|
|
|
|
const hasNewImages = await processPage(page);
|
|
|
|
// 如果没有新图片,说明可能是最后一页了
|
|
if (!hasNewImages && page > 0) {
|
|
console.log(`第${page}页没有新图片,可能已到最后一页,停止采集`);
|
|
shouldContinue = false;
|
|
}
|
|
|
|
// 如果图片数量达到上限也停止
|
|
if (img_count > 2200) {
|
|
console.log('图片数量达到上限2200,停止采集');
|
|
shouldContinue = false;
|
|
}
|
|
}
|
|
|
|
// 打包最终数据
|
|
const data = {
|
|
url: currentUrl,
|
|
title: pageTitle,
|
|
all_images: ALL_IMG_DATA,
|
|
total_images: Object.keys(ALL_IMG_DATA).length
|
|
};
|
|
|
|
// 显示结果并发送到后端
|
|
console.log('采集完成的所有数据:', data);
|
|
console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT);
|
|
|
|
try {
|
|
await sendDataToBackend(data);
|
|
alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`);
|
|
} catch (error) {
|
|
console.error('发送数据到后端失败:', error);
|
|
alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`);
|
|
}
|
|
});
|
|
})(); |