diff --git a/Tampermonkey/eh.js b/Tampermonkey/eh.js index dc19c63..4e7bcd5 100644 --- a/Tampermonkey/eh.js +++ b/Tampermonkey/eh.js @@ -1,30 +1,38 @@ // ==UserScript== -// @name eh-v2 +// @name eh // @namespace http://tampermonkey.net/ -// @version 1.1 +// @version 1.5 // @description 采集页面数据并发送到后端 // @author Jack -// @match *://*/* +// @match https://e-hentai.org/* // @grant GM_xmlhttpRequest +// @grant GM_notification // ==/UserScript== (function() { 'use strict'; - // 全局配置 - 请根据实际情况修改这些值 + // 全局配置 const BACKEND_IP = '127.0.0.1'; const BACKEND_PORT = '55830'; const BUTTON_LOCATION_SELECTOR = '#gd5 > p:nth-child(5)'; - const DATA_LIST_SELECTOR = '#gdt a'; // 修改为a标签的选择器 - const ALL_IMG_DATA = {}; // 用于储存每一页的图片url, 格式为 {"0001": "https://example001.jpg", "0002": "https://example002.jpg"}, 最高支持4位数至9999 + const DATA_LIST_SELECTOR = '#gdt a'; + const ALL_IMG_DATA = {}; const source = 'eh'; + // 位置配置 - 只需调整这个数字即可同时调整按钮和进度条位置 + const TOP_POSITION = 32; // 百分比位置,按钮在32%,进度条在36% + + // 并发配置 + const CONCURRENT_LIMIT = 5; // 并发请求数量限制 + const REQUEST_DELAY = 50; // 请求间延迟(毫秒) + // 创建按钮 const button = document.createElement('button'); button.id = 'data-sender-button'; button.textContent = "send data"; button.style.position = "fixed"; - button.style.top = "32%"; + button.style.top = `${TOP_POSITION}%`; button.style.right = "1%"; button.style.transform = "translateY(-50%)"; button.style.padding = "3px 8px"; @@ -36,226 +44,331 @@ button.style.cursor = "pointer"; button.style.zIndex = "10000"; - // 添加到指定位置 + // 创建进度显示 - 位置在按钮下方4% + const progressDiv = document.createElement('div'); + progressDiv.id = 'progress-display'; + progressDiv.style.position = "fixed"; + progressDiv.style.top = `${TOP_POSITION + 4}%`; + progressDiv.style.right = "1%"; + progressDiv.style.padding = "5px 10px"; + progressDiv.style.fontSize = "12px"; + progressDiv.style.backgroundColor = "rgba(0,0,0,0.7)"; + progressDiv.style.color = "#fff"; + progressDiv.style.borderRadius = "5px"; + progressDiv.style.zIndex = "9999"; + progressDiv.style.display = "none"; + document.body.appendChild(progressDiv); + const targetElement = document.querySelector(BUTTON_LOCATION_SELECTOR); if (targetElement) { targetElement.appendChild(button); } else { - // 如果选择器找不到元素,默认添加到body document.body.appendChild(button); } - // 从页面中提取图片的函数 - function extractImagesFromPage(htmlContent) { - const images = []; - // 创建一个临时div来解析HTML - const tempDiv = document.createElement('div'); - tempDiv.innerHTML = htmlContent; - - if (DATA_LIST_SELECTOR) { - const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR); - linkElements.forEach(link => { - // 从a标签中获取href属性,这通常是图片页面链接 - const href = link.href; - if (href) { - images.push(href); - } - }); - } - return images; - } - - // 格式化数字为4位数 function formatNumber(num) { return num.toString().padStart(4, '0'); } - // 获取基础URL(移除分页参数) function getBaseUrl(url) { - // 使用正则表达式移除 p= 参数及其值 return url.replace(/([?&])p=\d+(&|$)/, (match, p1, p2) => { - // 如果后面还有参数,保留&符号,否则保留空字符串 return p2 === '&' ? p1 : ''; - }).replace(/[?&]$/, ''); // 移除末尾的?或& + }).replace(/[?&]$/, ''); } - // 构建分页URL function buildPageUrl(baseUrl, page) { if (page === 0) { - // 第0页不需要p参数(首页) return baseUrl.includes('?') ? baseUrl : baseUrl; } else { - // 其他页添加p参数 const separator = baseUrl.includes('?') ? '&' : '?'; return baseUrl + separator + `p=${page}`; } } - // 发送数据到后端的函数 - function sendDataToBackend(data) { - console.log('准备发送的数据:', data); - console.log('数据类型:', typeof data); - console.log('字符串化后的数据:', JSON.stringify(data)); + function extractThumbnailLinks(htmlContent) { + const links = []; + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = htmlContent; + if (DATA_LIST_SELECTOR) { + const linkElements = tempDiv.querySelectorAll(DATA_LIST_SELECTOR); + linkElements.forEach(link => { + const hrefAttr = link.getAttribute('href'); + if (hrefAttr) { + links.push(hrefAttr); + } + }); + } + return links; + } + + function extractActualImageUrl(htmlContent) { + const regex = /]*id="img"[^>]*src="([^"]*)"[^>]*>/i; + const match = htmlContent.match(regex); + return match ? match[1] : null; + } + + async function fetchImagePage(url) { return new Promise((resolve, reject) => { GM_xmlhttpRequest({ - method: "POST", - url: `http://${BACKEND_IP}:${BACKEND_PORT}/api/save_json`, + method: "GET", + url: url, headers: { - "Content-Type": "application/json", + "Referer": window.location.href, + "Cookie": document.cookie }, - data: JSON.stringify(data), onload: function(response) { - console.log('后端响应状态:', response.status); - console.log('后端响应内容:', response.responseText); - if (response.status === 200) { - resolve(response); - } else { - reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`)); - } + resolve({url, html: response.responseText}); }, onerror: function(error) { - reject(error); + reject({url, error}); } }); }); } - // 处理单个页面的函数(支持HTML字符串和当前页面) - async function processPage(page, htmlContent = null) { - let images = []; - let hasNewImage = false; - - // 处理当前页(page = 0)的情况 - if (page === 0 && htmlContent === null) { - // 直接从当前DOM中提取图片 - if (DATA_LIST_SELECTOR) { - const linkElements = document.querySelectorAll(DATA_LIST_SELECTOR); - linkElements.forEach(link => { - const href = link.href; - if (href) { - images.push(href); - } - }); - } - } else { - // 获取分页URL并请求 - const baseUrl = getBaseUrl(window.location.href); - const pageUrl = buildPageUrl(baseUrl, page); + // 并发处理函数 + async function processThumbnailLinksConcurrently(thumbnailLinks, pageNum) { + const results = []; + const totalLinks = thumbnailLinks.length; - // 如果是当前页面(htmlContent是响应内容) - if (htmlContent) { - images = extractImagesFromPage(htmlContent); - } else { - // 请求远程页面 - try { - const response = await new Promise((resolve, reject) => { - GM_xmlhttpRequest({ - method: "GET", - url: pageUrl, - headers: { - "Referer": window.location.href, - "Cookie": document.cookie - }, - onload: function(response) { - resolve(response); - }, - onerror: function(error) { - reject(error); - } - }); - }); + // 更新进度显示 - 页数显示为 pageNum + 1(从1开始) + const displayPageNum = pageNum + 1; + updateProgress(`第${displayPageNum}页: 0/${totalLinks}`, 0); + + // 将链接分组,实现并发控制 + for (let i = 0; i < thumbnailLinks.length; i += CONCURRENT_LIMIT) { + const chunk = thumbnailLinks.slice(i, i + CONCURRENT_LIMIT); + + // 并发请求当前组 + const promises = chunk.map(link => fetchImagePage(link)); + + try { + const chunkResults = await Promise.all(promises); + results.push(...chunkResults); - images = extractImagesFromPage(response.responseText); - } catch (error) { - console.error(`第${page}页采集失败:`, error); - return false; + // 更新进度 + const processed = Math.min(i + CONCURRENT_LIMIT, totalLinks); + updateProgress(`第${displayPageNum}页: ${processed}/${totalLinks}`, (processed / totalLinks) * 100); + + // 组间延迟,避免请求过快 + if (i + CONCURRENT_LIMIT < thumbnailLinks.length) { + await new Promise(resolve => setTimeout(resolve, REQUEST_DELAY)); } + } catch (error) { + console.error('并发请求组失败:', error); } } - if (images.length === 0) { - console.log(`第${page}页没有图片,可能是最后一页`); - return false; + // 处理结果,提取真实图片URL + const imageUrls = []; + for (const result of results) { + if (result.html) { + const actualImageUrl = extractActualImageUrl(result.html); + if (actualImageUrl) { + imageUrls.push(actualImageUrl); + } + } } - // 检查是否有重复图片并添加到总数据中 - images.forEach(href => { - const isDuplicate = Object.values(ALL_IMG_DATA).includes(href); - if (!isDuplicate) { - ALL_IMG_DATA[formatNumber(Object.keys(ALL_IMG_DATA).length + 1)] = href; - hasNewImage = true; + return imageUrls; + } + + function updateProgress(text, percentage) { + progressDiv.textContent = text; + progressDiv.style.display = "block"; + + // 可以添加进度条样式 + progressDiv.style.background = `linear-gradient(90deg, #007baf ${percentage}%, rgba(0,0,0,0.7) ${percentage}%)`; + } + + async function processPage(page) { + const baseUrl = getBaseUrl(window.location.href); + const pageUrl = buildPageUrl(baseUrl, page); + + try { + let htmlContent; + + if (page === 0) { + htmlContent = document.documentElement.innerHTML; + } else { + htmlContent = await new Promise((resolve, reject) => { + GM_xmlhttpRequest({ + method: "GET", + url: pageUrl, + headers: { + "Referer": window.location.href, + "Cookie": document.cookie + }, + onload: function(response) { + resolve(response.responseText); + }, + onerror: function(error) { + reject(error); + } + }); + }); } - }); - console.log(`第${page}页采集完成,获取到${images.length}个图片链接,新增${hasNewImage ? '有新图片' : '全是重复图片'}`); + const thumbnailLinks = extractThumbnailLinks(htmlContent); + + if (thumbnailLinks.length === 0) { + console.log(`第${page + 1}页没有缩略图链接,可能是最后一页`); + return false; + } + + console.log(`第${page + 1}页找到${thumbnailLinks.length}个缩略图链接,开始并发获取真实图片URL...`); + + // 使用并发处理 + const actualImageUrls = await processThumbnailLinksConcurrently(thumbnailLinks, page); + + let hasNewImage = false; + actualImageUrls.forEach(url => { + const isDuplicate = Object.values(ALL_IMG_DATA).includes(url); + if (!isDuplicate) { + ALL_IMG_DATA[formatNumber(Object.keys(ALL_IMG_DATA).length + 1)] = url; + hasNewImage = true; + } + }); + + console.log(`第${page + 1}页采集完成,获取到${actualImageUrls.length}个真实图片链接`); + + return hasNewImage; + + } catch (error) { + console.error(`第${page + 1}页采集失败:`, error); + return false; + } + } - return hasNewImage; + function sendDataToBackend(data) { + return new Promise((resolve, reject) => { + GM_xmlhttpRequest({ + method: "POST", + url: `http://${BACKEND_IP}:${BACKEND_PORT}/api/save_json`, + headers: { + "Content-Type": "application/json", + }, + data: JSON.stringify(data), + onload: function(response) { + if (response.status === 200) { + try { + const result = JSON.parse(response.responseText); + if (result.success) { + resolve(result); + } else { + reject(new Error(result.message || '后端保存失败')); + } + } catch (e) { + reject(new Error('解析响应失败: ' + e.message)); + } + } else { + reject(new Error(`后端返回错误: ${response.status} - ${response.responseText}`)); + } + }, + onerror: function(error) { + reject(error); + } + }); + }); } - // 点击事件处理 button.addEventListener('click', async function() { - // 1. 获取当前URL和title const currentUrl = window.location.href; const pageTitle = document.title; - // 清空之前的图片数据 Object.keys(ALL_IMG_DATA).forEach(key => delete ALL_IMG_DATA[key]); - // 获取当前页码(如果存在) - const pageMatch = currentUrl.match(/[?&]p=(\d+)/); - const currentPage = pageMatch ? parseInt(pageMatch[1]) : 0; - console.log(`当前页码: ${currentPage}`); + button.textContent = "采集中..."; + button.disabled = true; + progressDiv.style.display = "block"; - // 从第0页开始处理(包括当前页面) - let shouldContinue = true; - let page = 0; + try { + let shouldContinue = true; + let page = 0; + let totalPagesProcessed = 0; + + while (shouldContinue && page <= 100) { + // 显示页数为 page + 1(从1开始) + updateProgress(`正在处理第${page + 1}页...`, (page / 100) * 50); + + const hasNewImages = await processPage(page); + totalPagesProcessed++; - while (shouldContinue && page <= 100) { - if (page === 0) { - // 处理当前页(第0页),直接使用当前DOM - const hasNewImages = await processPage(0, null); if (!hasNewImages && page > 0) { - console.log(`第${page}页没有新图片,可能已到最后一页,停止采集`); + console.log(`第${page + 1}页没有新图片,停止采集`); shouldContinue = false; } - } else { - // 处理后续分页 - const hasNewImages = await processPage(page); - if (!hasNewImages) { - console.log(`第${page}页没有新图片,可能已到最后一页,停止采集`); + + if (Object.keys(ALL_IMG_DATA).length >= 2200) { + console.log('图片数量达到上限2200,停止采集'); shouldContinue = false; } - } - // 如果图片数量达到上限也停止 - if (Object.keys(ALL_IMG_DATA).length >= 2200) { - console.log('图片数量达到上限2200,停止采集'); - shouldContinue = false; + page++; + + // 页面间延迟,避免请求过快 + if (shouldContinue) { + await new Promise(resolve => setTimeout(resolve, 200)); + } } - page++; - } + updateProgress(`处理完成,准备发送数据...`, 80); - // 打包最终数据 - const data = { - url: currentUrl, - title: pageTitle, - source: source, - imgs: ALL_IMG_DATA, - totalImages: Object.keys(ALL_IMG_DATA).length - }; + const data = { + url: currentUrl, + title: pageTitle, + source: source, + imgs: ALL_IMG_DATA, + totalImages: Object.keys(ALL_IMG_DATA).length + }; - // 显示结果并发送到后端 - console.log('采集完成的所有数据:', data); - console.log('后端地址:', BACKEND_IP + ':' + BACKEND_PORT); + console.log('采集完成的所有数据:', data); + + // 发送数据到后端 + const response = await sendDataToBackend(data); + + updateProgress(`数据发送成功!页面将在1秒后关闭...`, 100); + + // 显示成功通知 + if (typeof GM_notification !== 'undefined') { + GM_notification({ + title: '数据采集完成', + text: `已采集 ${Object.keys(ALL_IMG_DATA).length} 张图片,页面即将关闭`, + timeout: 1000 + }); + } + + // 1秒后自动关闭页面,无需用户确认 + setTimeout(() => { + console.log(`数据保存成功,关闭页面。采集统计: + - 标题: ${pageTitle} + - 总图片数: ${Object.keys(ALL_IMG_DATA).length} + - 处理页数: ${totalPagesProcessed} + - 保存路径: ${response.folder || '未知'}`); + window.close(); + }, 1000); - try { - await sendDataToBackend(data); - alert(`数据采集完成并已保存到后端!\n标题: ${pageTitle}\n总图片链接数量: ${Object.keys(ALL_IMG_DATA).length}`); } catch (error) { - console.error('发送数据到后端失败:', error); - alert(`数据采集完成但保存到后端失败!\n错误: ${error.message}\n请在控制台查看完整数据`); + console.error('采集失败:', error); + updateProgress(`采集失败: ${error.message}`, 0); + + // 失败时显示错误信息,但也不弹框,只在控制台显示 + console.error('数据采集失败!错误:', error.message); + + // 在进度条上显示错误信息 + progressDiv.textContent = `采集失败: ${error.message}`; + progressDiv.style.backgroundColor = "rgba(255,0,0,0.7)"; + + } finally { + button.textContent = "send data"; + button.disabled = false; + + // 如果发生错误,10秒后隐藏进度显示 + setTimeout(() => { + progressDiv.style.display = "none"; + progressDiv.style.backgroundColor = "rgba(0,0,0,0.7)"; + }, 10000); } }); })(); \ No newline at end of file diff --git a/Tampermonkey/hdk4.js b/Tampermonkey/hdk4.js index 0292282..c2a11a0 100644 --- a/Tampermonkey/hdk4.js +++ b/Tampermonkey/hdk4.js @@ -1,10 +1,10 @@ // ==UserScript== -// @name hd4k_downloader_simple +// @name hd4k_downloader // @namespace http://tampermonkey.net/ -// @version 1.2 +// @version 1.4 // @description 简单直接的自动翻页图片爬取 -// @author Your Name -// @match *://*/* +// @author Jack +// @match https://www.4khd.com/* // @grant GM_xmlhttpRequest // ==/UserScript== @@ -92,14 +92,23 @@ }; const getCurrentPageImages = () => { - const images = document.querySelectorAll('img'); + // 每次都在当前页面重新查找容器 + const container = document.querySelector('#basicExample'); + if (!container) { + console.log('当前页面未找到图片容器 #basicExample'); + return []; + } + + // 在容器内查找图片 + const images = container.querySelectorAll('img'); const imageUrls = []; const seenUrls = new Set(); images.forEach(img => { - let src = img.src || img.dataset.src || img.dataset.original || img.currentSrc; + let src = img.src || img.dataset.src || img.currentSrc; if (src && src.trim() && !src.startsWith('data:') && !src.startsWith('blob:')) { + // 处理URL let fullUrl = src; if (src.startsWith('//')) { fullUrl = window.location.protocol + src; @@ -248,46 +257,46 @@ updateStatus(`处理第 ${currentPage} 页...`); const imageUrls = getCurrentPageImages(); - console.log(`第 ${currentPage} 页找到 ${imageUrls.length} 张图片`); - - if (imageUrls.length > 0) { - allImages[currentPage] = imageUrls; - sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages)); - updateStatus(`第 ${currentPage} 页: 找到 ${imageUrls.length} 张图片`); + if (imageUrls.length === 0) { + updateStatus(`第 ${currentPage} 页: 未找到图片`); setTimeout(async () => { - const nextPage = currentPage + 1; + await finishCrawling(); + }, CONFIG.pageDelay); + return; + } - if (nextPage > CONFIG.maxPages) { - updateStatus(`已达到最大页数 ${CONFIG.maxPages}`); - await finishCrawling(); - return; - } + console.log(`第 ${currentPage} 页找到 ${imageUrls.length} 张图片`); - const nextUrl = buildPageUrl(nextPage); + allImages[currentPage] = imageUrls; + sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages)); + updateStatus(`第 ${currentPage} 页: 找到 ${imageUrls.length} 张图片`); - if (crawledUrls.includes(nextUrl)) { - updateStatus('下一页URL已爬取过,停止爬取'); - await finishCrawling(); - return; - } + setTimeout(async () => { + const nextPage = currentPage + 1; - updateStatus(`准备跳转到第 ${nextPage} 页`); - sessionStorage.setItem('hd4k_current_page', nextPage.toString()); + if (nextPage > CONFIG.maxPages) { + updateStatus(`已达到最大页数 ${CONFIG.maxPages}`); + await finishCrawling(); + return; + } - setTimeout(() => { - window.location.href = nextUrl; - }, CONFIG.pageDelay); + const nextUrl = buildPageUrl(nextPage); - }, CONFIG.pageDelay); + if (crawledUrls.includes(nextUrl)) { + updateStatus('下一页URL已爬取过,停止爬取'); + await finishCrawling(); + return; + } - } else { - updateStatus(`第 ${currentPage} 页: 无图片`); + updateStatus(`准备跳转到第 ${nextPage} 页`); + sessionStorage.setItem('hd4k_current_page', nextPage.toString()); - setTimeout(async () => { - await finishCrawling(); + setTimeout(() => { + window.location.href = nextUrl; }, CONFIG.pageDelay); - } + + }, CONFIG.pageDelay); }; const startCrawling = async () => { @@ -296,6 +305,13 @@ return; } + // 检查容器是否存在 + const container = document.querySelector('#basicExample'); + if (!container) { + alert('未找到图片容器 #basicExample,请确认页面结构!\n\n可能原因:\n1. 页面未完全加载\n2. 图片在滚动后才加载\n3. 网站结构已变化'); + return; + } + const button = document.getElementById('hd4k-btn'); button.textContent = '爬取中...'; button.style.backgroundColor = '#ff9800';