Jack 2 months ago
parent afa3e93ea9
commit a83b4b1e22
  1. 353
      Tampermonkey/eh.js
  2. 46
      Tampermonkey/hdk4.js

@ -1,30 +1,38 @@
// ==UserScript==
// @name eh-v2
// @name eh
// @namespace http://tampermonkey.net/
// @version 1.1
// @version 1.5
// @description 采集页面数据并发送到后端
// @author Jack
// @match *://*/*
// @match https://e-hentai.org/*
// @grant GM_xmlhttpRequest
// @grant GM_notification
// ==/UserScript==
(function() {
'use strict';
// 全局配置 - 请根据实际情况修改这些值
// 全局配置
const BACKEND_IP = '127.0.0.1';
const BACKEND_PORT = '55830';
const BUTTON_LOCATION_SELECTOR = '#gd5 > p:nth-child(5)';
const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器
const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999
const DATA_LIST_SELECTOR = '#gdt a';
const ALL_IMG_DATA = {};
const source = 'eh';
// 位置配置 - 只需调整这个数字即可同时调整按钮和进度条位置
const TOP_POSITION = 32; // 百分比位置,按钮在32%,进度条在36%
// 并发配置
const CONCURRENT_LIMIT = 5; // 并发请求数量限制
const REQUEST_DELAY = 50; // 请求间延迟(毫秒)
// 创建按钮
const button = document.createElement('button');
button.id = 'data-sender-button';
button.textContent = "send data";
button.style.position = "fixed";
button.style.top = "32%";
button.style.top = `${TOP_POSITION}%`;
button.style.right = "1%";
button.style.transform = "translateY(-50%)";
button.style.padding = "3px 8px";
@ -36,120 +44,155 @@
button.style.cursor = "pointer";
button.style.zIndex = "10000";
// 添加到指定位置
// 创建进度显示 - 位置在按钮下方4%
const progressDiv = document.createElement('div');
progressDiv.id = 'progress-display';
progressDiv.style.position = "fixed";
progressDiv.style.top = `${TOP_POSITION + 4}%`;
progressDiv.style.right = "1%";
progressDiv.style.padding = "5px 10px";
progressDiv.style.fontSize = "12px";
progressDiv.style.backgroundColor = "rgba(0,0,0,0.7)";
progressDiv.style.color = "#fff";
progressDiv.style.borderRadius = "5px";
progressDiv.style.zIndex = "9999";
progressDiv.style.display = "none";
document.body.appendChild(progressDiv);
const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR);
if (targetElement) {
targetElement.appendChild(button);
} else {
// 如果选择器找不到元素,默认添加到body
document.body.appendChild(button);
}
// 从页面中提取图片的函数
function extractImagesFromPage(htmlContent) {
const images = [];
// 创建一个临时div来解析HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
if (DATA_LIST_SELECTOR) {
const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
// 从a标签中获取href属性,这通常是图片页面链接
const href = link.href;
if (href) {
images.push(href);
}
});
}
return images;
}
// 格式化数字为4位数
function formatNumber(num) {
return num.toString().padStart(4, '0');
}
// 获取基础URL(移除分页参数)
function getBaseUrl(url) {
// 使用正则表达式移除 p= 参数及其值
return url.replace(/([?&])p=\d+(&|$)/, (match, p1, p2) => {
// 如果后面还有参数,保留&符号,否则保留空字符串
return p2 === '&' ? p1 : '';
}).replace(/[?&]$/, ''); // 移除末尾的?或&
}).replace(/[?&]$/, '');
}
// 构建分页URL
function buildPageUrl(baseUrl, page) {
if (page === 0) {
// 第0页不需要p参数(首页)
return baseUrl.includes('?') ? baseUrl : baseUrl;
} else {
// 其他页添加p参数
const separator = baseUrl.includes('?') ? '&' : '?';
return baseUrl + separator + `p=${page}`;
}
}
// 发送数据到后端的函数
function sendDataToBackend(data) {
console.log('准备发送的数据:', data);
console.log('数据类型:', typeof data);
console.log('字符串化后的数据:', JSON.stringify(data));
function extractThumbnailLinks(htmlContent) {
const links = [];
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
if (DATA_LIST_SELECTOR) {
const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
const hrefAttr = link.getAttribute('href');
if (hrefAttr) {
links.push(hrefAttr);
}
});
}
return links;
}
function extractActualImageUrl(htmlContent) {
const regex = /<img[^>]*id="img"[^>]*src="([^"]*)"[^>]*>/i;
const match = htmlContent.match(regex);
return match ? match[1] : null;
}
async function fetchImagePage(url) {
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "POST",
url: `http://${BACKEND_IP}:${BACKEND_PORT}/api/save_json`,
method: "GET",
url: url,
headers: {
"Content-Type": "application/json",
"Referer": window.location.href,
"Cookie": document.cookie
},
data: JSON.stringify(data),
onload: function(response) {
console.log('后端响应状态:', response.status);
console.log('后端响应内容:', response.responseText);
if (response.status === 200) {
resolve(response);
} else {
reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
}
resolve({url, html: response.responseText});
},
onerror: function(error) {
reject(error);
reject({url, error});
}
});
});
}
// 处理单个页面的函数(支持HTML字符串和当前页面)
async function processPage(page, htmlContent = null) {
let images = [];
let hasNewImage = false;
// 并发处理函数
async function processThumbnailLinksConcurrently(thumbnailLinks, pageNum) {
const results = [];
const totalLinks = thumbnailLinks.length;
// 处理当前页(page = 0)的情况
if (page === 0 && htmlContent === null) {
// 直接从当前DOM中提取图片
if (DATA_LIST_SELECTOR) {
const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR);
linkElements.forEach(link => {
const href = link.href;
if (href) {
images.push(href);
// 更新进度显示 - 页数显示为 pageNum + 1(从1开始)
const displayPageNum = pageNum + 1;
updateProgress(`${displayPageNum}页: 0/${totalLinks}`, 0);
// 将链接分组,实现并发控制
for (let i = 0; i < thumbnailLinks.length; i += CONCURRENT_LIMIT) {
const chunk = thumbnailLinks.slice(i, i + CONCURRENT_LIMIT);
// 并发请求当前组
const promises = chunk.map(link => fetchImagePage(link));
try {
const chunkResults = await Promise.all(promises);
results.push(...chunkResults);
// 更新进度
const processed = Math.min(i + CONCURRENT_LIMIT, totalLinks);
updateProgress(`${displayPageNum}页: ${processed}/${totalLinks}`, (processed / totalLinks) * 100);
// 组间延迟,避免请求过快
if (i + CONCURRENT_LIMIT < thumbnailLinks.length) {
await new Promise(resolve => setTimeout(resolve, REQUEST_DELAY));
}
});
} catch (error) {
console.error('并发请求组失败:', error);
}
} else {
// 获取分页URL并请求
}
// 处理结果,提取真实图片URL
const imageUrls = [];
for (const result of results) {
if (result.html) {
const actualImageUrl = extractActualImageUrl(result.html);
if (actualImageUrl) {
imageUrls.push(actualImageUrl);
}
}
}
return imageUrls;
}
function updateProgress(text, percentage) {
progressDiv.textContent = text;
progressDiv.style.display = "block";
// 可以添加进度条样式
progressDiv.style.background = `linear-gradient(90deg, #007baf ${percentage}%, rgba(0,0,0,0.7) ${percentage}%)`;
}
async function processPage(page) {
const baseUrl = getBaseUrl(window.location.href);
const pageUrl = buildPageUrl(baseUrl, page);
// 如果是当前页面(htmlContent是响应内容)
if (htmlContent) {
images = extractImagesFromPage(htmlContent);
} else {
// 请求远程页面
try {
const response = await new Promise((resolve, reject) => {
let htmlContent;
if (page === 0) {
htmlContent = document.documentElement.innerHTML;
} else {
htmlContent = await new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "GET",
url: pageUrl,
@ -158,86 +201,120 @@
"Cookie": document.cookie
},
onload: function(response) {
resolve(response);
resolve(response.responseText);
},
onerror: function(error) {
reject(error);
}
});
});
images = extractImagesFromPage(response.responseText);
} catch (error) {
console.error(`${page}页采集失败:`, error);
return false;
}
}
}
if (images.length === 0) {
console.log(`${page}页没有图片,可能是最后一页`);
const thumbnailLinks = extractThumbnailLinks(htmlContent);
if (thumbnailLinks.length === 0) {
console.log(`${page + 1}页没有缩略图链接,可能是最后一页`);
return false;
}
// 检查是否有重复图片并添加到总数据中
images.forEach(href => {
const isDuplicate = Object.values(ALL_IMG_DATA).includes(href);
console.log(`${page + 1}页找到${thumbnailLinks.length}个缩略图链接,开始并发获取真实图片URL...`);
// 使用并发处理
const actualImageUrls = await processThumbnailLinksConcurrently(thumbnailLinks, page);
let hasNewImage = false;
actualImageUrls.forEach(url => {
const isDuplicate = Object.values(ALL_IMG_DATA).includes(url);
if (!isDuplicate) {
ALL_IMG_DATA[formatNumber(Object.keys(ALL_IMG_DATA).length + 1)] = href;
ALL_IMG_DATA[formatNumber(Object.keys(ALL_IMG_DATA).length + 1)] = url;
hasNewImage = true;
}
});
console.log(`${page}页采集完成,获取到${images.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`);
console.log(`${page + 1}页采集完成,获取到${actualImageUrls.length}个真实图片链接`);
return hasNewImage;
} catch (error) {
console.error(`${page + 1}页采集失败:`, error);
return false;
}
}
function sendDataToBackend(data) {
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "POST",
url: `http://${BACKEND_IP}:${BACKEND_PORT}/api/save_json`,
headers: {
"Content-Type": "application/json",
},
data: JSON.stringify(data),
onload: function(response) {
if (response.status === 200) {
try {
const result = JSON.parse(response.responseText);
if (result.success) {
resolve(result);
} else {
reject(new Error(result.message || '后端保存失败'));
}
} catch (e) {
reject(new Error('解析响应失败: ' + e.message));
}
} else {
reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`));
}
},
onerror: function(error) {
reject(error);
}
});
});
}
// 点击事件处理
button.addEventListener('click', async function() {
// 1. 获取当前URL和title
const currentUrl = window.location.href;
const pageTitle = document.title;
// 清空之前的图片数据
Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]);
// 获取当前页码(如果存在)
const pageMatch = currentUrl.match(/[?&]p=(\d+)/);
const currentPage = pageMatch ? parseInt(pageMatch[1]) : 0;
console.log(`当前页码: ${currentPage}`);
button.textContent = "采集中...";
button.disabled = true;
progressDiv.style.display = "block";
// 从第0页开始处理(包括当前页面)
try {
let shouldContinue = true;
let page = 0;
let totalPagesProcessed = 0;
while (shouldContinue && page <= 100) {
if (page === 0) {
// 处理当前页(第0页),直接使用当前DOM
const hasNewImages = await processPage(0, null);
if (!hasNewImages && page > 0) {
console.log(`${page}页没有新图片,可能已到最后一页,停止采集`);
shouldContinue = false;
}
} else {
// 处理后续分页
// 显示页数为 page + 1(从1开始)
updateProgress(`正在处理第${page + 1}页...`, (page / 100) * 50);
const hasNewImages = await processPage(page);
if (!hasNewImages) {
console.log(`${page}页没有新图片,可能已到最后一页,停止采集`);
totalPagesProcessed++;
if (!hasNewImages && page > 0) {
console.log(`${page + 1}页没有新图片,停止采集`);
shouldContinue = false;
}
}
// 如果图片数量达到上限也停止
if (Object.keys(ALL_IMG_DATA).length >= 2200) {
console.log('图片数量达到上限2200,停止采集');
shouldContinue = false;
}
page++;
// 页面间延迟,避免请求过快
if (shouldContinue) {
await new Promise(resolve => setTimeout(resolve, 200));
}
}
updateProgress(`处理完成,准备发送数据...`, 80);
// 打包最终数据
const data = {
url: currentUrl,
title: pageTitle,
@ -246,16 +323,52 @@
totalImages: Object.keys(ALL_IMG_DATA).length
};
// 显示结果并发送到后端
console.log('采集完成的所有数据:', data);
console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT);
try {
await sendDataToBackend(data);
alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`);
// 发送数据到后端
const response = await sendDataToBackend(data);
updateProgress(`数据发送成功!页面将在1秒后关闭...`, 100);
// 显示成功通知
if (typeof GM_notification !== 'undefined') {
GM_notification({
title: '数据采集完成',
text: `已采集 ${Object.keys(ALL_IMG_DATA).length} 张图片,页面即将关闭`,
timeout: 1000
});
}
// 1秒后自动关闭页面,无需用户确认
setTimeout(() => {
console.log(`数据保存成功,关闭页面。采集统计:
- 标题: ${pageTitle}
- 总图片数: ${Object.keys(ALL_IMG_DATA).length}
- 处理页数: ${totalPagesProcessed}
- 保存路径: ${response.folder || '未知'}`);
window.close();
}, 1000);
} catch (error) {
console.error('发送数据到后端失败:', error);
alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`);
console.error('采集失败:', error);
updateProgress(`采集失败: ${error.message}`, 0);
// 失败时显示错误信息,但也不弹框,只在控制台显示
console.error('数据采集失败!错误:', error.message);
// 在进度条上显示错误信息
progressDiv.textContent = `采集失败: ${error.message}`;
progressDiv.style.backgroundColor = "rgba(255,0,0,0.7)";
} finally {
button.textContent = "send data";
button.disabled = false;
// 如果发生错误,10秒后隐藏进度显示
setTimeout(() => {
progressDiv.style.display = "none";
progressDiv.style.backgroundColor = "rgba(0,0,0,0.7)";
}, 10000);
}
});
})();

@ -1,10 +1,10 @@
// ==UserScript==
// @name hd4k_downloader_simple
// @name hd4k_downloader
// @namespace http://tampermonkey.net/
// @version 1.2
// @version 1.4
// @description 简单直接的自动翻页图片爬取
// @author Your Name
// @match *://*/*
// @author Jack
// @match https://www.4khd.com/*
// @grant GM_xmlhttpRequest
// ==/UserScript==
@ -92,14 +92,23 @@
};
const getCurrentPageImages = () => {
const images = document.querySelectorAll('img');
// 每次都在当前页面重新查找容器
const container = document.querySelector('#basicExample');
if (!container) {
console.log('当前页面未找到图片容器 #basicExample');
return [];
}
// 在容器内查找图片
const images = container.querySelectorAll('img');
const imageUrls = [];
const seenUrls = new Set();
images.forEach(img => {
let src = img.src || img.dataset.src || img.dataset.original || img.currentSrc;
let src = img.src || img.dataset.src || img.currentSrc;
if (src && src.trim() && !src.startsWith('data:') && !src.startsWith('blob:')) {
// 处理URL
let fullUrl = src;
if (src.startsWith('//')) {
fullUrl = window.location.protocol + src;
@ -248,9 +257,17 @@
updateStatus(`处理第 ${currentPage} 页...`);
const imageUrls = getCurrentPageImages();
if (imageUrls.length === 0) {
updateStatus(`${currentPage} 页: 未找到图片`);
setTimeout(async () => {
await finishCrawling();
}, CONFIG.pageDelay);
return;
}
console.log(`${currentPage} 页找到 ${imageUrls.length} 张图片`);
if (imageUrls.length > 0) {
allImages[currentPage] = imageUrls;
sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages));
updateStatus(`${currentPage} 页: 找到 ${imageUrls.length} 张图片`);
@ -280,14 +297,6 @@
}, CONFIG.pageDelay);
}, CONFIG.pageDelay);
} else {
updateStatus(`${currentPage} 页: 无图片`);
setTimeout(async () => {
await finishCrawling();
}, CONFIG.pageDelay);
}
};
const startCrawling = async () => {
@ -296,6 +305,13 @@
return;
}
// 检查容器是否存在
const container = document.querySelector('#basicExample');
if (!container) {
alert('未找到图片容器 #basicExample,请确认页面结构!\n\n可能原因:\n1. 页面未完全加载\n2. 图片在滚动后才加载\n3. 网站结构已变化');
return;
}
const button = document.getElementById('hd4k-btn');
button.textContent = '爬取中...';
button.style.backgroundColor = '#ff9800';

Loading…
Cancel
Save