You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
436 lines
14 KiB
436 lines
14 KiB
// ==UserScript==
|
|
// @name hd4k_downloader
|
|
// @namespace http://tampermonkey.net/
|
|
// @version 1.4
|
|
// @description 简单直接的自动翻页图片爬取
|
|
// @author Jack
|
|
// @match https://www.4khd.com/*
|
|
// @grant GM_xmlhttpRequest
|
|
// ==/UserScript==
|
|
|
|
(function() {
|
|
'use strict';
|
|
|
|
const CONFIG = {
|
|
maxPages: 50,
|
|
pageDelay: 1500,
|
|
backendUrl: 'http://127.0.0.1:55830/api/save_json'
|
|
};
|
|
|
|
let isCrawling = false;
|
|
let allImages = {};
|
|
let currentPage = 1;
|
|
let imgIndex = 1;
|
|
let crawledUrls = [];
|
|
const source = 'hd4k';
|
|
|
|
const createButton = () => {
|
|
const button = document.createElement('button');
|
|
button.textContent = '开始爬取';
|
|
button.id = 'hd4k-btn';
|
|
|
|
button.style.position = 'fixed';
|
|
button.style.top = '14%';
|
|
button.style.right = '1%';
|
|
button.style.transform = 'translateY(-50%)';
|
|
button.style.padding = '8px 16px';
|
|
button.style.fontSize = '12px';
|
|
button.style.fontWeight = 'bold';
|
|
button.style.backgroundColor = '#2c80ff';
|
|
button.style.color = '#fff';
|
|
button.style.border = 'none';
|
|
button.style.borderRadius = '8px';
|
|
button.style.cursor = 'pointer';
|
|
button.style.zIndex = '10000';
|
|
button.style.boxShadow = '0 2px 5px rgba(0,0,0,0.2)';
|
|
button.style.transition = 'all 0.3s ease';
|
|
|
|
button.addEventListener('mouseenter', () => {
|
|
if (!isCrawling) {
|
|
button.style.backgroundColor = '#1a6ee0';
|
|
button.style.transform = 'translateY(-50%) scale(1.05)';
|
|
}
|
|
});
|
|
|
|
button.addEventListener('mouseleave', () => {
|
|
if (!isCrawling) {
|
|
button.style.backgroundColor = '#2c80ff';
|
|
button.style.transform = 'translateY(-50%) scale(1)';
|
|
}
|
|
});
|
|
|
|
button.addEventListener('click', startCrawling);
|
|
|
|
return button;
|
|
};
|
|
|
|
const createStatusDisplay = () => {
|
|
const statusDiv = document.createElement('div');
|
|
statusDiv.id = 'hd4k-status';
|
|
statusDiv.style.position = 'fixed';
|
|
statusDiv.style.top = '18%';
|
|
statusDiv.style.right = '1%';
|
|
statusDiv.style.padding = '10px';
|
|
statusDiv.style.backgroundColor = 'rgba(0,0,0,0.85)';
|
|
statusDiv.style.color = '#fff';
|
|
statusDiv.style.borderRadius = '5px';
|
|
statusDiv.style.fontSize = '12px';
|
|
statusDiv.style.zIndex = '9999';
|
|
statusDiv.style.minWidth = '180px';
|
|
statusDiv.style.display = 'none';
|
|
|
|
return statusDiv;
|
|
};
|
|
|
|
const updateStatus = (message) => {
|
|
const statusDiv = document.getElementById('hd4k-status');
|
|
if (statusDiv) {
|
|
statusDiv.innerHTML = message;
|
|
statusDiv.style.display = 'block';
|
|
}
|
|
console.log(`[状态] ${message}`);
|
|
};
|
|
|
|
const getCurrentPageImages = () => {
|
|
// 每次都在当前页面重新查找容器
|
|
const container = document.querySelector('#basicExample');
|
|
if (!container) {
|
|
console.log('当前页面未找到图片容器 #basicExample');
|
|
return [];
|
|
}
|
|
|
|
// 在容器内查找图片
|
|
const images = container.querySelectorAll('img');
|
|
const imageUrls = [];
|
|
const seenUrls = new Set();
|
|
|
|
images.forEach(img => {
|
|
let src = img.src || img.dataset.src || img.currentSrc;
|
|
|
|
if (src && src.trim() && !src.startsWith('data:') && !src.startsWith('blob:')) {
|
|
// 处理URL
|
|
let fullUrl = src;
|
|
if (src.startsWith('//')) {
|
|
fullUrl = window.location.protocol + src;
|
|
} else if (src.startsWith('/')) {
|
|
fullUrl = window.location.origin + src;
|
|
} else if (!src.startsWith('http')) {
|
|
fullUrl = new URL(src, window.location.href).href;
|
|
}
|
|
|
|
const isImage = /\.(jpg|jpeg|png|gif|webp|bmp|tiff)(\?.*)?$/i.test(fullUrl);
|
|
if (isImage && !seenUrls.has(fullUrl)) {
|
|
seenUrls.add(fullUrl);
|
|
imageUrls.push(fullUrl);
|
|
}
|
|
}
|
|
});
|
|
|
|
return imageUrls;
|
|
};
|
|
|
|
const buildPageUrl = (pageNum) => {
|
|
const currentUrl = window.location.href;
|
|
const htmlIndex = currentUrl.indexOf('html');
|
|
|
|
if (htmlIndex === -1) {
|
|
console.error('URL中没有找到html');
|
|
return currentUrl;
|
|
}
|
|
|
|
const basePart = currentUrl.substring(0, htmlIndex + 4);
|
|
|
|
if (pageNum === 1) {
|
|
return basePart;
|
|
} else {
|
|
return basePart + '/' + pageNum;
|
|
}
|
|
};
|
|
|
|
const getCurrentPageNumber = () => {
|
|
const currentUrl = window.location.href;
|
|
const htmlIndex = currentUrl.indexOf('html');
|
|
|
|
if (htmlIndex === -1) return 1;
|
|
|
|
const afterHtml = currentUrl.substring(htmlIndex + 4);
|
|
const match = afterHtml.match(/^\/(\d+)/);
|
|
|
|
if (match) {
|
|
const pageNum = parseInt(match[1], 10);
|
|
if (!isNaN(pageNum) && pageNum > 0) {
|
|
return pageNum;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
};
|
|
|
|
const sendToBackend = (data) => {
|
|
return new Promise((resolve, reject) => {
|
|
GM_xmlhttpRequest({
|
|
method: 'POST',
|
|
url: CONFIG.backendUrl,
|
|
headers: {
|
|
'Content-Type': 'application/json'
|
|
},
|
|
data: JSON.stringify(data),
|
|
onload: function(response) {
|
|
if (response.status >= 200 && response.status < 300) {
|
|
resolve(response);
|
|
} else {
|
|
reject(new Error(`HTTP ${response.status}: ${response.statusText}`));
|
|
}
|
|
},
|
|
onerror: function(error) {
|
|
reject(error);
|
|
},
|
|
timeout: 10000
|
|
});
|
|
});
|
|
};
|
|
|
|
const sendAllData = async () => {
|
|
updateStatus('整理数据并发送到后端...');
|
|
|
|
const finalImages = {};
|
|
let totalCount = 0;
|
|
|
|
const sortedPages = Object.keys(allImages).map(Number).sort((a, b) => a - b);
|
|
|
|
for (const page of sortedPages) {
|
|
if (allImages[page]) {
|
|
for (const imgUrl of allImages[page]) {
|
|
const key = String(imgIndex).padStart(4, '0');
|
|
finalImages[key] = imgUrl;
|
|
imgIndex++;
|
|
totalCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
const data = {
|
|
title: document.title || '无标题',
|
|
source: source,
|
|
url: buildPageUrl(1),
|
|
totalPages: sortedPages.length,
|
|
totalImages: totalCount,
|
|
imgs: finalImages
|
|
};
|
|
|
|
console.log('准备发送的数据:', data);
|
|
|
|
try {
|
|
await sendToBackend(data);
|
|
updateStatus(`✅ 发送成功!<br>共 ${sortedPages.length} 页<br>${totalCount} 张图片`);
|
|
return true;
|
|
} catch (error) {
|
|
updateStatus(`❌ 发送失败: ${error.message}`);
|
|
return false;
|
|
}
|
|
};
|
|
|
|
const beginPageProcessing = async () => {
|
|
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
|
|
|
|
if (!isCrawlSession) {
|
|
console.log('不在爬取会话中,停止处理');
|
|
return;
|
|
}
|
|
|
|
const currentUrl = window.location.href;
|
|
|
|
if (crawledUrls.includes(currentUrl)) {
|
|
updateStatus('检测到重复URL,停止爬取');
|
|
await finishCrawling();
|
|
return;
|
|
}
|
|
|
|
crawledUrls.push(currentUrl);
|
|
sessionStorage.setItem('hd4k_crawled_urls', JSON.stringify(crawledUrls));
|
|
|
|
const urlPageNum = getCurrentPageNumber();
|
|
|
|
if (currentPage !== urlPageNum) {
|
|
currentPage = urlPageNum;
|
|
}
|
|
|
|
updateStatus(`处理第 ${currentPage} 页...`);
|
|
const imageUrls = getCurrentPageImages();
|
|
|
|
if (imageUrls.length === 0) {
|
|
updateStatus(`第 ${currentPage} 页: 未找到图片`);
|
|
setTimeout(async () => {
|
|
await finishCrawling();
|
|
}, CONFIG.pageDelay);
|
|
return;
|
|
}
|
|
|
|
console.log(`第 ${currentPage} 页找到 ${imageUrls.length} 张图片`);
|
|
|
|
allImages[currentPage] = imageUrls;
|
|
sessionStorage.setItem('hd4k_all_images', JSON.stringify(allImages));
|
|
updateStatus(`第 ${currentPage} 页: 找到 ${imageUrls.length} 张图片`);
|
|
|
|
setTimeout(async () => {
|
|
const nextPage = currentPage + 1;
|
|
|
|
if (nextPage > CONFIG.maxPages) {
|
|
updateStatus(`已达到最大页数 ${CONFIG.maxPages}`);
|
|
await finishCrawling();
|
|
return;
|
|
}
|
|
|
|
const nextUrl = buildPageUrl(nextPage);
|
|
|
|
if (crawledUrls.includes(nextUrl)) {
|
|
updateStatus('下一页URL已爬取过,停止爬取');
|
|
await finishCrawling();
|
|
return;
|
|
}
|
|
|
|
updateStatus(`准备跳转到第 ${nextPage} 页`);
|
|
sessionStorage.setItem('hd4k_current_page', nextPage.toString());
|
|
|
|
setTimeout(() => {
|
|
window.location.href = nextUrl;
|
|
}, CONFIG.pageDelay);
|
|
|
|
}, CONFIG.pageDelay);
|
|
};
|
|
|
|
const startCrawling = async () => {
|
|
if (isCrawling) {
|
|
alert('正在爬取中,请稍候...');
|
|
return;
|
|
}
|
|
|
|
// 检查容器是否存在
|
|
const container = document.querySelector('#basicExample');
|
|
if (!container) {
|
|
alert('未找到图片容器 #basicExample,请确认页面结构!\n\n可能原因:\n1. 页面未完全加载\n2. 图片在滚动后才加载\n3. 网站结构已变化');
|
|
return;
|
|
}
|
|
|
|
const button = document.getElementById('hd4k-btn');
|
|
button.textContent = '爬取中...';
|
|
button.style.backgroundColor = '#ff9800';
|
|
button.disabled = true;
|
|
|
|
isCrawling = true;
|
|
allImages = {};
|
|
crawledUrls = [];
|
|
currentPage = 1;
|
|
imgIndex = 1;
|
|
|
|
sessionStorage.removeItem('hd4k_all_images');
|
|
sessionStorage.removeItem('hd4k_crawled_urls');
|
|
|
|
updateStatus('开始自动翻页爬取...');
|
|
|
|
const firstPageUrl = buildPageUrl(1);
|
|
const currentUrl = window.location.href;
|
|
|
|
sessionStorage.setItem('hd4k_crawling', 'true');
|
|
sessionStorage.setItem('hd4k_current_page', '1');
|
|
|
|
if (currentUrl !== firstPageUrl) {
|
|
updateStatus(`跳转到第一页`);
|
|
window.location.href = firstPageUrl;
|
|
return;
|
|
}
|
|
|
|
beginPageProcessing();
|
|
};
|
|
|
|
const finishCrawling = async () => {
|
|
sessionStorage.removeItem('hd4k_crawling');
|
|
sessionStorage.removeItem('hd4k_current_page');
|
|
|
|
if (Object.keys(allImages).length > 0) {
|
|
await sendAllData();
|
|
} else {
|
|
updateStatus('未找到任何图片数据');
|
|
}
|
|
|
|
const button = document.getElementById('hd4k-btn');
|
|
button.textContent = '开始爬取';
|
|
button.style.backgroundColor = '#2c80ff';
|
|
button.disabled = false;
|
|
isCrawling = false;
|
|
|
|
setTimeout(() => {
|
|
const statusDiv = document.getElementById('hd4k-status');
|
|
if (statusDiv) {
|
|
statusDiv.style.display = 'none';
|
|
}
|
|
}, 5000);
|
|
};
|
|
|
|
const onPageLoad = () => {
|
|
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
|
|
|
|
if (isCrawlSession) {
|
|
isCrawling = true;
|
|
currentPage = getCurrentPageNumber();
|
|
|
|
const savedImages = sessionStorage.getItem('hd4k_all_images');
|
|
const savedUrls = sessionStorage.getItem('hd4k_crawled_urls');
|
|
|
|
if (savedImages) {
|
|
allImages = JSON.parse(savedImages);
|
|
}
|
|
|
|
if (savedUrls) {
|
|
crawledUrls = JSON.parse(savedUrls);
|
|
}
|
|
|
|
setTimeout(() => {
|
|
beginPageProcessing();
|
|
}, 1500);
|
|
}
|
|
};
|
|
|
|
const init = () => {
|
|
if (!document.getElementById('hd4k-btn')) {
|
|
const button = createButton();
|
|
const statusDiv = createStatusDisplay();
|
|
|
|
document.body.appendChild(button);
|
|
document.body.appendChild(statusDiv);
|
|
|
|
updateStatus('HD4K下载器已加载<br>点击按钮开始自动翻页爬取');
|
|
setTimeout(() => {
|
|
const statusDiv = document.getElementById('hd4k-status');
|
|
if (statusDiv) {
|
|
statusDiv.style.display = 'none';
|
|
}
|
|
}, 3000);
|
|
|
|
onPageLoad();
|
|
}
|
|
};
|
|
|
|
if (document.readyState === 'loading') {
|
|
document.addEventListener('DOMContentLoaded', init);
|
|
} else {
|
|
const isCrawlSession = sessionStorage.getItem('hd4k_crawling') === 'true';
|
|
if (isCrawlSession) {
|
|
isCrawling = true;
|
|
const button = createButton();
|
|
button.textContent = '爬取中...';
|
|
button.style.backgroundColor = '#ff9800';
|
|
button.disabled = true;
|
|
document.body.appendChild(button);
|
|
|
|
const statusDiv = createStatusDisplay();
|
|
document.body.appendChild(statusDiv);
|
|
updateStatus('检测到未完成的爬取任务,继续执行...');
|
|
|
|
setTimeout(onPageLoad, 1500);
|
|
} else {
|
|
init();
|
|
}
|
|
}
|
|
|
|
})(); |