// ==UserScript==
// @name hd4k_downloader_simple
// @namespace http://tampermonkey.net/
// @version 1.2
// @description 简单直接的自动翻页图片爬取
// @author Your Name
// @match *://*/*
// @grant GM_xmlhttpRequest
// ==/UserScript==
(function() {
'use strict';
const CONFIG = {
maxPages: 50,
pageDelay: 1500,
backendUrl: 'http://127.0.0.1:55830/api/save_json'
};
let isCrawling = false;
let allImages = {};
let currentPage = 1;
let imgIndex = 1;
let crawledUrls = [];
const source = 'hd4k';
const createButton = () => {
const button = document.createElement('button');
button.textContent = '开始爬取';
button.id = 'hd4k-btn';
button.style.position = 'fixed';
button.style.top = '14%';
button.style.right = '1%';
button.style.transform = 'translateY(-50%)';
button.style.padding = '8px 16px';
button.style.fontSize = '12px';
button.style.fontWeight = 'bold';
button.style.backgroundColor = '#2c80ff';
button.style.color = '#fff';
button.style.border = 'none';
button.style.borderRadius = '8px';
button.style.cursor = 'pointer';
button.style.zIndex = '10000';
button.style.boxShadow = '0 2px 5px rgba(0,0,0,0.2)';
button.style.transition = 'all 0.3s ease';
button.addEventListener('mouseenter', () => {
if (!isCrawling) {
button.style.backgroundColor = '#1a6ee0';
button.style.transform = 'translateY(-50%) scale(1.05)';
}
});
button.addEventListener('mouseleave', () => {
if (!isCrawling) {
button.style.backgroundColor = '#2c80ff';
button.style.transform = 'translateY(-50%) scale(1)';
}
});
button.addEventListener('click', startCrawling);
return button;
};
const createStatusDisplay = () => {
const statusDiv = document.createElement('div');
statusDiv.id = 'hd4k-status';
statusDiv.style.position = 'fixed';
statusDiv.style.top = '18%';
statusDiv.style.right = '1%';
statusDiv.style.padding = '10px';
statusDiv.style.backgroundColor = 'rgba(0,0,0,0.85)';
statusDiv.style.color = '#fff';
statusDiv.style.borderRadius = '5px';
statusDiv.style.fontSize = '12px';
statusDiv.style.zIndex = '9999';
statusDiv.style.minWidth = '180px';
statusDiv.style.display = 'none';
return statusDiv;
};
const updateStatus = (message) => {
const statusDiv = document.getElementById('hd4k-status');
if (statusDiv) {
statusDiv.innerHTML = message;
statusDiv.style.display = 'block';
}
console.log(`[状态] ${message}`);
};
const getCurrentPageImages = () => {
const images = document.querySelectorAll('img');
const imageUrls = [];
const seenUrls = new Set();
images.forEach(img => {
let src = img.src || img.dataset.src || img.dataset.original || img.currentSrc;
if (src && src.trim() && !src.startsWith('data:') && !src.startsWith('blob:')) {
let fullUrl = src;
if (src.startsWith('//')) {
fullUrl = window.location.protocol + src;
} else if (src.startsWith('/')) {
fullUrl = window.location.origin + src;
} else if (!src.startsWith('http')) {
fullUrl = new URL(src, window.location.href).href;
}
const isImage = /\.(jpg|jpeg|png|gif|webp|bmp|tiff)(\?.*)?$/i.test(fullUrl);
if (isImage && !seenUrls.has(fullUrl)) {
seenUrls.add(fullUrl);
imageUrls.push(fullUrl);
}
}
});
return imageUrls;
};
const buildPageUrl = (pageNum) => {
const currentUrl = window.location.href;
const htmlIndex = currentUrl.indexOf('html');
if (htmlIndex === -1) {
console.error('URL中没有找到html');
return currentUrl;
}
const basePart = currentUrl.substring(0, htmlIndex + 4);
if (pageNum === 1) {
return basePart;
} else {
return basePart + '/' + pageNum;
}
};
const getCurrentPageNumber = () => {
const currentUrl = window.location.href;
const htmlIndex = currentUrl.indexOf('html');
if (htmlIndex === -1) return 1;
const afterHtml = currentUrl.substring(htmlIndex + 4);
const match = afterHtml.match(/^\/(\d+)/);
if (match) {
const pageNum = parseInt(match[1], 10);
if (!isNaN(pageNum) && pageNum > 0) {
return pageNum;
}
}
return 1;
};
const sendToBackend = (data) => {
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'POST',
url: CONFIG.backendUrl,
headers: {
'Content-Type': 'application/json'
},
data: JSON.stringify(data),
onload: function(response) {
if (response.status >= 200 && response.status < 300) {
resolve(response);
} else {
reject(new Error(`HTTP ${response.status}: ${response.statusText}`));
}
},
onerror: function(error) {
reject(error);
},
timeout: 10000
});
});
};
const sendAllData = async () => {
updateStatus('整理数据并发送到后端...');
const finalImages = {};
let totalCount = 0;
const sortedPages = Object.keys(allImages).map(Number).sort((a, b) => a - b);
for (const page of sortedPages) {
if (allImages[page]) {
for (const imgUrl of allImages[page]) {
const key = String(imgIndex).padStart(4, '0');
finalImages[key] = imgUrl;
imgIndex++;
totalCount++;
}
}
}
const data = {
title: document.title || '无标题',
source: source,
url: buildPageUrl(1),
totalPages: sortedPages.length,
totalImages: totalCount,
imgs: finalImages
};
console.log('准备发送的数据:', data);
try {
await sendToBackend(data);
updateStatus(`✅ 发送成功!
共 ${sortedPages.length} 页
${totalCount} 张图片`);
return true;
} catch (error) {
updateStatus(`❌ 发送失败: ${error.message}`);
return false;
}
};
const beginPageProcessing = async () => {
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
if (!isCrawlSession) {
console.log('不在爬取会话中,停止处理');
return;
}
const currentUrl = window.location.href;
if (crawledUrls.includes(currentUrl)) {
updateStatus('检测到重复URL,停止爬取');
await finishCrawling();
return;
}
crawledUrls.push(currentUrl);
sessionStorage.setItem('hd4k_crawled_urls', JSON.stringify(crawledUrls));
const urlPageNum = getCurrentPageNumber();
if (currentPage !== urlPageNum) {
currentPage = urlPageNum;
}
updateStatus(`处理第 ${currentPage} 页...`);
const imageUrls = getCurrentPageImages();
console.log(`第 ${currentPage} 页找到 ${imageUrls.length} 张图片`);
if (imageUrls.length > 0) {
allImages[currentPage] = imageUrls;
sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages));
updateStatus(`第 ${currentPage} 页: 找到 ${imageUrls.length} 张图片`);
setTimeout(async () => {
const nextPage = currentPage + 1;
if (nextPage > CONFIG.maxPages) {
updateStatus(`已达到最大页数 ${CONFIG.maxPages}`);
await finishCrawling();
return;
}
const nextUrl = buildPageUrl(nextPage);
if (crawledUrls.includes(nextUrl)) {
updateStatus('下一页URL已爬取过,停止爬取');
await finishCrawling();
return;
}
updateStatus(`准备跳转到第 ${nextPage} 页`);
sessionStorage.setItem('hd4k_current_page', nextPage.toString());
setTimeout(() => {
window.location.href = nextUrl;
}, CONFIG.pageDelay);
}, CONFIG.pageDelay);
} else {
updateStatus(`第 ${currentPage} 页: 无图片`);
setTimeout(async () => {
await finishCrawling();
}, CONFIG.pageDelay);
}
};
const startCrawling = async () => {
if (isCrawling) {
alert('正在爬取中,请稍候...');
return;
}
const button = document.getElementById('hd4k-btn');
button.textContent = '爬取中...';
button.style.backgroundColor = '#ff9800';
button.disabled = true;
isCrawling = true;
allImages = {};
crawledUrls = [];
currentPage = 1;
imgIndex = 1;
sessionStorage.removeItem('hd4k_all_images');
sessionStorage.removeItem('hd4k_crawled_urls');
updateStatus('开始自动翻页爬取...');
const firstPageUrl = buildPageUrl(1);
const currentUrl = window.location.href;
sessionStorage.setItem('hd4k_crawling', 'true');
sessionStorage.setItem('hd4k_current_page', '1');
if (currentUrl !== firstPageUrl) {
updateStatus(`跳转到第一页`);
window.location.href = firstPageUrl;
return;
}
beginPageProcessing();
};
const finishCrawling = async () => {
sessionStorage.removeItem('hd4k_crawling');
sessionStorage.removeItem('hd4k_current_page');
if (Object.keys(allImages).length > 0) {
await sendAllData();
} else {
updateStatus('未找到任何图片数据');
}
const button = document.getElementById('hd4k-btn');
button.textContent = '开始爬取';
button.style.backgroundColor = '#2c80ff';
button.disabled = false;
isCrawling = false;
setTimeout(() => {
const statusDiv = document.getElementById('hd4k-status');
if (statusDiv) {
statusDiv.style.display = 'none';
}
}, 5000);
};
const onPageLoad = () => {
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
if (isCrawlSession) {
isCrawling = true;
currentPage = getCurrentPageNumber();
const savedImages = sessionStorage.getItem('hd4k_all_images');
const savedUrls = sessionStorage.getItem('hd4k_crawled_urls');
if (savedImages) {
allImages = JSON.parse(savedImages);
}
if (savedUrls) {
crawledUrls = JSON.parse(savedUrls);
}
setTimeout(() => {
beginPageProcessing();
}, 1500);
}
};
const init = () => {
if (!document.getElementById('hd4k-btn')) {
const button = createButton();
const statusDiv = createStatusDisplay();
document.body.appendChild(button);
document.body.appendChild(statusDiv);
updateStatus('HD4K下载器已加载
点击按钮开始自动翻页爬取');
setTimeout(() => {
const statusDiv = document.getElementById('hd4k-status');
if (statusDiv) {
statusDiv.style.display = 'none';
}
}, 3000);
onPageLoad();
}
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
if (isCrawlSession) {
isCrawling = true;
const button = createButton();
button.textContent = '爬取中...';
button.style.backgroundColor = '#ff9800';
button.disabled = true;
document.body.appendChild(button);
const statusDiv = createStatusDisplay();
document.body.appendChild(statusDiv);
updateStatus('检测到未完成的爬取任务,继续执行...');
setTimeout(onPageLoad, 1500);
} else {
init();
}
}
})();