import os import time import random import httpx from bs4 import BeautifulSoup comico_urls = [ '[PIXIV] LotteryFate (18900473)(AI)', ] # 是否使用代理 use_proxy = 1 def save_img(client, folder_path, img_links): for index, img_url in enumerate(img_links, start=1): try: # 生成文件名,例如 0001.png, 0002.png file_name = f"{str(index).zfill(4)}.png" file_path = os.path.join(folder_path, file_name) # 检查文件是否已经存在 if os.path.exists(file_path): print(f"文件已存在,跳过下载: {file_path}") continue # 发送请求获取图片内容 response = client.get(img_url) if response.status_code != 200: raise Exception( f"无法下载图片 {img_url},状态码: {response.status_code}") # 保存图片到本地 with open(file_path, 'wb') as file: file.write(response.content) print(f"图片已保存: {file_path}") except Exception as e: raise Exception(f"下载图片 {img_url} 时出错: {e}") # random_sleep = random.uniform(2, 3) # print(f"随机休眠 {random_sleep} 秒") # time.sleep(random_sleep) def get_imgs(client, folder_path, chapter_data): img_links = [] for chapter_name, url in chapter_data.items(): try: # 发送请求获取页面内容 response = client.get(url) if response.status_code != 200: raise Exception(f"无法访问 {url},状态码: {response.status_code}") # 解析 HTML soup = BeautifulSoup(response.text, 'html.parser') # 获取图片的上一层元素 parent_element = soup.select_one( 'body > div.container > div.row.col-lg-12.col-md-12.col-xs-12') if not parent_element: raise Exception(f"{chapter_name} 未找到图片容器") # 获取所有图片元素 img_elements = parent_element.select('img') total_images = len(img_elements) print(f'{chapter_name} 共 {total_images} 张图片') # 输出图片的 URL for img in img_elements: img_url = img.get('src') if img_url: img_links.append(img_url) except Exception as e: print(f"获取图片时出错: {e}") raise # 抛出异常,触发重试逻辑 return img_links def save_urls(folder_path, img_links): # 定义保存文件路径 save_path = os.path.join(folder_path, 'img_links.txt') # 将图片链接写入文件 with open(save_path, 'w', encoding='utf-8') as file: for link in img_links: file.write(link + '\n') print(f"图片链接已保存到: {save_path}") def new_folder(page_title): # 获取当前脚本所在的目录 script_dir = os.path.dirname(os.path.abspath(__file__)) download_dir = os.path.join(script_dir, 'downloads') if not os.path.exists(script_dir): os.makedirs(script_dir) if page_title: # 拼接目标文件夹路径 folder_path = os.path.join(download_dir, page_title) # 检查文件夹是否存在,如果不存在则创建 if not os.path.exists(folder_path): os.makedirs(folder_path) return folder_path def get_chapter_data(client, target_url): result = {} page_title = '' try: response = client.get(target_url) if response.status_code != 200: raise Exception(f"无法访问 {target_url},状态码: {response.status_code}") soup = BeautifulSoup(response.text, 'html.parser') # 获取指定选择器下的所有元素 elements = soup.select( 'body > div.container > div:nth-child(3) > div:nth-child(2) a') # 提取每个元素的 URL 和文本 for element in elements: url = element.get('href') text = element.get_text() result[text] = base_url + url except Exception as e: print(f"获取章节数据时出错: {e}") raise # 抛出异常,触发重试逻辑 return result def main(): proxy_url = 'http://127.0.0.1:7890' base_url = 'https://jcomic.net' herf_url = '/eps/' # 自定义请求头 custom_headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "max-age=0", "cookie": "_gid=GA1.2.724162267.1736817775; _ga_QL6YSDRWEV=GS1.1.1736837388.3.0.1736837388.0.0.0; _ga=GA1.2.1324234734.1736817774; _gat=1", "priority": "u=0, i", "sec-ch-ua": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" } for comico_url in comico_urls: target_url = base_url + herf_url + comico_url print(target_url) # 最大重试次数 max_retries = 999 retry_count = 0 while retry_count < max_retries: try: # 创建 httpx.Client 实例,并设置自定义请求头 with httpx.Client(proxies=proxy_url if use_proxy else None, headers=custom_headers) as client: # 1, 获取页面章节数据 chapter_data = get_chapter_data(client, target_url) print(chapter_data) # 2, 在当前文件夹下创建一个文件夹,用来保存图片, 文件名称是 title folder_path = new_folder(comico_url) # 3, 遍历章节数据,获取img的链接 img_links = get_imgs(client, folder_path, chapter_data) print(img_links) # 4, 保存url到新建的文件夹中 save_urls(folder_path, img_links) # 5,遍历 img_links ,将图片保存到 folder_path中, 保存的文件名类似 0001.png save_img(client, folder_path, img_links) # 如果成功执行完成,跳出循环 print('done!') break except Exception as e: retry_count += 1 print(f"发生错误: {e},正在进行第 {retry_count} 次重试...") if retry_count >= max_retries: print("已达到最大重试次数,程序终止。") break # 固定延迟 10 分钟(600 秒) delay = 30 print(f"等待 {delay} 秒后重试...") time.sleep(delay) if __name__ == '__main__': main()