// ==UserScript== // @name hd4k_downloader // @namespace http://tampermonkey.net/ // @version 1.4 // @description 简单直接的自动翻页图片爬取 // @author Jack // @match https://www.4khd.com/* // @grant GM_xmlhttpRequest // ==/UserScript== (function() { 'use strict'; const CONFIG = { maxPages: 50, pageDelay: 1500, backendUrl: 'http://127.0.0.1:55830/api/save_json' }; let isCrawling = false; let allImages = {}; let currentPage = 1; let imgIndex = 1; let crawledUrls = []; const source = 'hd4k'; const createButton = () => { const button = document.createElement('button'); button.textContent = '开始爬取'; button.id = 'hd4k-btn'; button.style.position = 'fixed'; button.style.top = '14%'; button.style.right = '1%'; button.style.transform = 'translateY(-50%)'; button.style.padding = '8px 16px'; button.style.fontSize = '12px'; button.style.fontWeight = 'bold'; button.style.backgroundColor = '#2c80ff'; button.style.color = '#fff'; button.style.border = 'none'; button.style.borderRadius = '8px'; button.style.cursor = 'pointer'; button.style.zIndex = '10000'; button.style.boxShadow = '0 2px 5px rgba(0,0,0,0.2)'; button.style.transition = 'all 0.3s ease'; button.addEventListener('mouseenter', () => { if (!isCrawling) { button.style.backgroundColor = '#1a6ee0'; button.style.transform = 'translateY(-50%) scale(1.05)'; } }); button.addEventListener('mouseleave', () => { if (!isCrawling) { button.style.backgroundColor = '#2c80ff'; button.style.transform = 'translateY(-50%) scale(1)'; } }); button.addEventListener('click', startCrawling); return button; }; const createStatusDisplay = () => { const statusDiv = document.createElement('div'); statusDiv.id = 'hd4k-status'; statusDiv.style.position = 'fixed'; statusDiv.style.top = '18%'; statusDiv.style.right = '1%'; statusDiv.style.padding = '10px'; statusDiv.style.backgroundColor = 'rgba(0,0,0,0.85)'; statusDiv.style.color = '#fff'; statusDiv.style.borderRadius = '5px'; statusDiv.style.fontSize = '12px'; statusDiv.style.zIndex = '9999'; statusDiv.style.minWidth = '180px'; statusDiv.style.display = 'none'; return statusDiv; }; const updateStatus = (message) => { const statusDiv = document.getElementById('hd4k-status'); if (statusDiv) { statusDiv.innerHTML = message; statusDiv.style.display = 'block'; } console.log(`[状态] ${message}`); }; const getCurrentPageImages = () => { // 每次都在当前页面重新查找容器 const container = document.querySelector('#basicExample'); if (!container) { console.log('当前页面未找到图片容器 #basicExample'); return []; } // 在容器内查找图片 const images = container.querySelectorAll('img'); const imageUrls = []; const seenUrls = new Set(); images.forEach(img => { let src = img.src || img.dataset.src || img.currentSrc; if (src && src.trim() && !src.startsWith('data:') && !src.startsWith('blob:')) { // 处理URL let fullUrl = src; if (src.startsWith('//')) { fullUrl = window.location.protocol + src; } else if (src.startsWith('/')) { fullUrl = window.location.origin + src; } else if (!src.startsWith('http')) { fullUrl = new URL(src, window.location.href).href; } const isImage = /\.(jpg|jpeg|png|gif|webp|bmp|tiff)(\?.*)?$/i.test(fullUrl); if (isImage && !seenUrls.has(fullUrl)) { seenUrls.add(fullUrl); imageUrls.push(fullUrl); } } }); return imageUrls; }; const buildPageUrl = (pageNum) => { const currentUrl = window.location.href; const htmlIndex = currentUrl.indexOf('html'); if (htmlIndex === -1) { console.error('URL中没有找到html'); return currentUrl; } const basePart = currentUrl.substring(0, htmlIndex + 4); if (pageNum === 1) { return basePart; } else { return basePart + '/' + pageNum; } }; const getCurrentPageNumber = () => { const currentUrl = window.location.href; const htmlIndex = currentUrl.indexOf('html'); if (htmlIndex === -1) return 1; const afterHtml = currentUrl.substring(htmlIndex + 4); const match = afterHtml.match(/^\/(\d+)/); if (match) { const pageNum = parseInt(match[1], 10); if (!isNaN(pageNum) && pageNum > 0) { return pageNum; } } return 1; }; const sendToBackend = (data) => { return new Promise((resolve, reject) => { GM_xmlhttpRequest({ method: 'POST', url: CONFIG.backendUrl, headers: { 'Content-Type': 'application/json' }, data: JSON.stringify(data), onload: function(response) { if (response.status >= 200 && response.status < 300) { resolve(response); } else { reject(new Error(`HTTP ${response.status}: ${response.statusText}`)); } }, onerror: function(error) { reject(error); }, timeout: 10000 }); }); }; const sendAllData = async () => { updateStatus('整理数据并发送到后端...'); const finalImages = {}; let totalCount = 0; const sortedPages = Object.keys(allImages).map(Number).sort((a, b) => a - b); for (const page of sortedPages) { if (allImages[page]) { for (const imgUrl of allImages[page]) { const key = String(imgIndex).padStart(4, '0'); finalImages[key] = imgUrl; imgIndex++; totalCount++; } } } const data = { title: document.title || '无标题', source: source, url: buildPageUrl(1), totalPages: sortedPages.length, totalImages: totalCount, imgs: finalImages }; console.log('准备发送的数据:', data); try { await sendToBackend(data); updateStatus(`✅ 发送成功!
共 ${sortedPages.length} 页
${totalCount} 张图片`); return true; } catch (error) { updateStatus(`❌ 发送失败: ${error.message}`); return false; } }; const beginPageProcessing = async () => { const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true'; if (!isCrawlSession) { console.log('不在爬取会话中,停止处理'); return; } const currentUrl = window.location.href; if (crawledUrls.includes(currentUrl)) { updateStatus('检测到重复URL,停止爬取'); await finishCrawling(); return; } crawledUrls.push(currentUrl); sessionStorage.setItem('hd4k_crawled_urls', JSON.stringify(crawledUrls)); const urlPageNum = getCurrentPageNumber(); if (currentPage !== urlPageNum) { currentPage = urlPageNum; } updateStatus(`处理第 ${currentPage} 页...`); const imageUrls = getCurrentPageImages(); if (imageUrls.length === 0) { updateStatus(`第 ${currentPage} 页: 未找到图片`); setTimeout(async () => { await finishCrawling(); }, CONFIG.pageDelay); return; } console.log(`第 ${currentPage} 页找到 ${imageUrls.length} 张图片`); allImages[currentPage] = imageUrls; sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages)); updateStatus(`第 ${currentPage} 页: 找到 ${imageUrls.length} 张图片`); setTimeout(async () => { const nextPage = currentPage + 1; if (nextPage > CONFIG.maxPages) { updateStatus(`已达到最大页数 ${CONFIG.maxPages}`); await finishCrawling(); return; } const nextUrl = buildPageUrl(nextPage); if (crawledUrls.includes(nextUrl)) { updateStatus('下一页URL已爬取过,停止爬取'); await finishCrawling(); return; } updateStatus(`准备跳转到第 ${nextPage} 页`); sessionStorage.setItem('hd4k_current_page', nextPage.toString()); setTimeout(() => { window.location.href = nextUrl; }, CONFIG.pageDelay); }, CONFIG.pageDelay); }; const startCrawling = async () => { if (isCrawling) { alert('正在爬取中,请稍候...'); return; } // 检查容器是否存在 const container = document.querySelector('#basicExample'); if (!container) { alert('未找到图片容器 #basicExample,请确认页面结构!\n\n可能原因:\n1. 页面未完全加载\n2. 图片在滚动后才加载\n3. 网站结构已变化'); return; } const button = document.getElementById('hd4k-btn'); button.textContent = '爬取中...'; button.style.backgroundColor = '#ff9800'; button.disabled = true; isCrawling = true; allImages = {}; crawledUrls = []; currentPage = 1; imgIndex = 1; sessionStorage.removeItem('hd4k_all_images'); sessionStorage.removeItem('hd4k_crawled_urls'); updateStatus('开始自动翻页爬取...'); const firstPageUrl = buildPageUrl(1); const currentUrl = window.location.href; sessionStorage.setItem('hd4k_crawling', 'true'); sessionStorage.setItem('hd4k_current_page', '1'); if (currentUrl !== firstPageUrl) { updateStatus(`跳转到第一页`); window.location.href = firstPageUrl; return; } beginPageProcessing(); }; const finishCrawling = async () => { sessionStorage.removeItem('hd4k_crawling'); sessionStorage.removeItem('hd4k_current_page'); if (Object.keys(allImages).length > 0) { await sendAllData(); } else { updateStatus('未找到任何图片数据'); } const button = document.getElementById('hd4k-btn'); button.textContent = '开始爬取'; button.style.backgroundColor = '#2c80ff'; button.disabled = false; isCrawling = false; setTimeout(() => { const statusDiv = document.getElementById('hd4k-status'); if (statusDiv) { statusDiv.style.display = 'none'; } }, 5000); }; const onPageLoad = () => { const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true'; if (isCrawlSession) { isCrawling = true; currentPage = getCurrentPageNumber(); const savedImages = sessionStorage.getItem('hd4k_all_images'); const savedUrls = sessionStorage.getItem('hd4k_crawled_urls'); if (savedImages) { allImages = JSON.parse(savedImages); } if (savedUrls) { crawledUrls = JSON.parse(savedUrls); } setTimeout(() => { beginPageProcessing(); }, 1500); } }; const init = () => { if (!document.getElementById('hd4k-btn')) { const button = createButton(); const statusDiv = createStatusDisplay(); document.body.appendChild(button); document.body.appendChild(statusDiv); updateStatus('HD4K下载器已加载
点击按钮开始自动翻页爬取'); setTimeout(() => { const statusDiv = document.getElementById('hd4k-status'); if (statusDiv) { statusDiv.style.display = 'none'; } }, 3000); onPageLoad(); } }; if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', init); } else { const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true'; if (isCrawlSession) { isCrawling = true; const button = createButton(); button.textContent = '爬取中...'; button.style.backgroundColor = '#ff9800'; button.disabled = true; document.body.appendChild(button); const statusDiv = createStatusDisplay(); document.body.appendChild(statusDiv); updateStatus('检测到未完成的爬取任务,继续执行...'); setTimeout(onPageLoad, 1500); } else { init(); } } })();