first commit

4 weeks ago · efcc609cc5
commit efcc609cc5
6 changed files with 889 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+.DS_Store
+__pycache__/
+*.pyc
+.idea
+
+*/downloads/*
--- a/dumanwu/main.py
+++ b/dumanwu/main.py
@ -0,0 +1,168 @@
+import time
+import re
+import os
+import sqlite3
+import httpx
+from playwright.sync_api import sync_playwright
+
+current_dir_path = os.path.dirname(os.path.abspath(__file__))
+
+comico_key = 'OMzNzNS'
+base_url = 'https://www.dumanwu.com'
+target_url = base_url + '/' + comico_key
+
+download_folder = os.path.join(current_dir_path, 'downloads')
+if not os.path.exists(download_folder):
+    os.mkdir(download_folder)
+
+
+def write_db(title, db_path, chapter_folder_name, chapter_url):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute(
+        f'CREATE TABLE IF NOT EXISTS {title} (id INTEGER PRIMARY KEY AUTOINCREMENT, chapter_name TEXT, url TEXT, state BOOLEAN DEFAULT 0)'
+    )
+    conn.commit()
+
+    # 检查chapter_name是否已存在
+    cursor.execute(
+        f'SELECT EXISTS(SELECT 1 FROM {title} WHERE chapter_name = ?)', (chapter_folder_name,))
+    exists = cursor.fetchone()[0]
+
+    if not exists:
+        # 如果不存在，则插入新记录
+        cursor.execute(f'INSERT INTO {title} (chapter_name, url) VALUES (?, ?)', (chapter_folder_name, chapter_url))
+        conn.commit()
+
+    cursor.close()
+    conn.close()
+
+
+def load_db(title, db_path):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute(f'SELECT * FROM {title} WHERE state = 0 ORDER BY id ASC')
+    rows = cursor.fetchall()
+    cursor.close()
+    conn.close()
+    return rows
+
+
+def fetch_page_title(target_url):
+    with httpx.Client(verify=False) as client:  # 设置不验证证书
+        response = client.get(target_url)
+        if response.status_code != 200:
+            print(f'Error: {response.status_code}')
+            exit(0)
+        title = re.findall(r'<p></p><h1 class="name_mh">(.*?)</h1><p></p>', response.text)
+        if title:
+            return title[0]
+        else:
+            print("Title not found")
+            exit(0)
+
+
+def fetch_chapter_data():
+    with sync_playwright() as playwright:
+        browser = playwright.chromium.launch(
+            headless=True,
+            args=['--ignore-certificate-errors']
+        )
+        page = browser.new_page()
+        page.goto(target_url)
+
+        time.sleep(1)
+
+        button_selector = 'body > div > div > div.forminfo > div.chapterList > div.chapterlistload > div > button'
+        for i in range(3):
+            try:
+                page.click(button_selector)
+                break
+            except Exception as e:
+                pass
+
+        page.wait_for_timeout(1000)
+
+        source = page.content()
+
+        ul_list = re.findall('<ul>(.*?)</ul>', source, re.DOTALL)
+        if len(ul_list) > 0:
+            ul_list = ul_list[0]
+        else:
+            return False
+
+        chapter_url_list = re.findall('<a href="(.*?)">', ul_list)
+        chapter_name_list = re.findall('<li>(.*?)</li>', ul_list)
+
+        chapter_url_list = chapter_url_list[::-1]
+        chapter_name_list = chapter_name_list[::-1]
+
+        result = {}
+
+        chapter_count = 1
+        for chapter_name, chapter_url in zip(chapter_name_list, chapter_url_list):
+            chapter_count_str = str(chapter_count).zfill(4)
+            chapter_url = base_url + chapter_url
+            result[chapter_count_str] = (chapter_name, chapter_url)
+            chapter_count += 1
+
+        browser.close()
+
+        return result
+
+
+def fetch_images(data, chapter_folder_name):
+    data_id = data[0]
+    chapter_url = data[2]
+    with sync_playwright() as playwright:
+        browser = playwright.chromium.launch(
+            headless=False,
+            args=['--ignore-certificate-errors']
+        )
+        page = browser.new_page()
+        page.goto(chapter_url)
+
+        time.sleep(1)
+
+        html_content = page.content()  # 获取渲染后的整个页面HTML
+        img_list = re.findall('<div class="main_img"><div class="chapter-img-box">([\S\s]*?)</a></div>', html_content)
+        img_list = img_list[0]
+        urls = re.findall('<img (src="|data-src=")(.*?)"', img_list)
+        for url in urls:
+            page.goto(url)
+        browser.close()
+
+
+def main():
+    print(target_url)
+    # ------------------------------  step1  ------------------------------
+    title = fetch_page_title(target_url)
+
+    comico_folder = os.path.join(download_folder, title)
+    if not os.path.exists(comico_folder):
+        os.mkdir(comico_folder)
+
+    # 创建 chapter db, 保存 chapter 数据
+    db_path = os.path.join(comico_folder, 'comico.db')
+
+    # 获取章节的 title, url
+    chapter_data = fetch_chapter_data()
+
+    for k, v in chapter_data.items():
+        chapter_url = v[1]
+        write_db(title, db_path, k + '_' + v[0], chapter_url)
+
+    # ------------------------------  step2  ------------------------------
+    all_data = load_db(title, db_path)
+
+    for data in all_data:
+        chapter_folder_name = os.path.join(comico_folder, data[1])
+        if not os.path.exists(chapter_folder_name):
+            os.mkdir(chapter_folder_name)
+
+        fetch_images(data, chapter_folder_name)
+        time.sleep(999)
+
+
+if __name__ == '__main__':
+    main()
--- a/jcomic/main.py
+++ b/jcomic/main.py
@ -0,0 +1,205 @@
+import os
+import time
+import random
+import httpx
+from bs4 import BeautifulSoup
+
+comico_urls = [
+    '[PIXIV] LotteryFate (18900473)（AI）',
+]
+
+# 是否使用代理
+use_proxy = 1
+
+
+def save_img(client, folder_path, img_links):
+    for index, img_url in enumerate(img_links, start=1):
+        try:
+            # 生成文件名，例如 0001.png, 0002.png
+            file_name = f"{str(index).zfill(4)}.png"
+            file_path = os.path.join(folder_path, file_name)
+
+            # 检查文件是否已经存在
+            if os.path.exists(file_path):
+                print(f"文件已存在，跳过下载: {file_path}")
+                continue
+
+            # 发送请求获取图片内容
+            response = client.get(img_url)
+            if response.status_code != 200:
+                raise Exception(
+                    f"无法下载图片 {img_url}，状态码: {response.status_code}")
+
+            # 保存图片到本地
+            with open(file_path, 'wb') as file:
+                file.write(response.content)
+
+            print(f"图片已保存: {file_path}")
+        except Exception as e:
+            raise Exception(f"下载图片 {img_url} 时出错: {e}")
+
+        # random_sleep = random.uniform(2, 3)
+        # print(f"随机休眠 {random_sleep} 秒")
+        # time.sleep(random_sleep)
+
+
+def get_imgs(client, folder_path, chapter_data):
+    img_links = []
+    for chapter_name, url in chapter_data.items():
+        try:
+            # 发送请求获取页面内容
+            response = client.get(url)
+            if response.status_code != 200:
+                raise Exception(f"无法访问 {url}，状态码: {response.status_code}")
+
+            # 解析 HTML
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            # 获取图片的上一层元素
+            parent_element = soup.select_one(
+                'body > div.container > div.row.col-lg-12.col-md-12.col-xs-12')
+            if not parent_element:
+                raise Exception(f"{chapter_name} 未找到图片容器")
+
+            # 获取所有图片元素
+            img_elements = parent_element.select('img')
+            total_images = len(img_elements)
+            print(f'{chapter_name} 共 {total_images} 张图片')
+
+            # 输出图片的 URL
+            for img in img_elements:
+                img_url = img.get('src')
+                if img_url:
+                    img_links.append(img_url)
+        except Exception as e:
+            print(f"获取图片时出错: {e}")
+            raise  # 抛出异常，触发重试逻辑
+    return img_links
+
+
+def save_urls(folder_path, img_links):
+    # 定义保存文件路径
+    save_path = os.path.join(folder_path, 'img_links.txt')
+
+    # 将图片链接写入文件
+    with open(save_path, 'w', encoding='utf-8') as file:
+        for link in img_links:
+            file.write(link + '\n')
+
+    print(f"图片链接已保存到: {save_path}")
+
+
+def new_folder(page_title):
+    # 获取当前脚本所在的目录
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    download_dir = os.path.join(script_dir, 'downloads')
+    if not os.path.exists(script_dir):
+        os.makedirs(script_dir)
+
+    if page_title:
+        # 拼接目标文件夹路径
+        folder_path = os.path.join(download_dir, page_title)
+
+        # 检查文件夹是否存在，如果不存在则创建
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        return folder_path
+
+
+def get_chapter_data(client, target_url):
+    result = {}
+    page_title = ''
+
+    try:
+        response = client.get(target_url)
+        if response.status_code != 200:
+            raise Exception(f"无法访问 {target_url}，状态码: {response.status_code}")
+
+        soup = BeautifulSoup(response.text, 'html.parser')
+
+        # 获取指定选择器下的所有元素
+        elements = soup.select(
+            'body > div.container > div:nth-child(3) > div:nth-child(2) a')
+
+        # 提取每个元素的 URL 和文本
+        for element in elements:
+            url = element.get('href')
+            text = element.get_text()
+            result[text] = base_url + url
+    except Exception as e:
+        print(f"获取章节数据时出错: {e}")
+        raise  # 抛出异常，触发重试逻辑
+
+    return result
+
+
+def main():
+    proxy_url = 'http://127.0.0.1:7890'
+    base_url = 'https://jcomic.net'
+    herf_url = '/eps/'
+    # 自定义请求头
+    custom_headers = {
+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+        "accept-language": "zh-CN,zh;q=0.9",
+        "cache-control": "max-age=0",
+        "cookie": "_gid=GA1.2.724162267.1736817775; _ga_QL6YSDRWEV=GS1.1.1736837388.3.0.1736837388.0.0.0; _ga=GA1.2.1324234734.1736817774; _gat=1",
+        "priority": "u=0, i",
+        "sec-ch-ua": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"macOS"',
+        "sec-fetch-dest": "document",
+        "sec-fetch-mode": "navigate",
+        "sec-fetch-site": "same-origin",
+        "sec-fetch-user": "?1",
+        "upgrade-insecure-requests": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+    }
+
+    for comico_url in comico_urls:
+        target_url = base_url + herf_url + comico_url
+        print(target_url)
+        # 最大重试次数
+        max_retries = 999
+        retry_count = 0
+
+        while retry_count < max_retries:
+            try:
+                # 创建 httpx.Client 实例，并设置自定义请求头
+                with httpx.Client(proxies=proxy_url if use_proxy else None, headers=custom_headers) as client:
+                    # 1, 获取页面章节数据
+                    chapter_data = get_chapter_data(client, target_url)
+                    print(chapter_data)
+
+                    # 2, 在当前文件夹下创建一个文件夹，用来保存图片, 文件名称是 title
+                    folder_path = new_folder(comico_url)
+
+                    # 3, 遍历章节数据，获取img的链接
+                    img_links = get_imgs(client, folder_path, chapter_data)
+                    print(img_links)
+
+                    # 4, 保存url到新建的文件夹中
+                    save_urls(folder_path, img_links)
+
+                    # 5，遍历 img_links ，将图片保存到 folder_path中， 保存的文件名类似 0001.png
+                    save_img(client, folder_path, img_links)
+
+                # 如果成功执行完成，跳出循环
+                print('done!')
+                break
+
+            except Exception as e:
+                retry_count += 1
+                print(f"发生错误: {e}，正在进行第 {retry_count} 次重试...")
+                if retry_count >= max_retries:
+                    print("已达到最大重试次数，程序终止。")
+                    break
+
+                # 固定延迟 10 分钟（600 秒）
+                delay = 30
+                print(f"等待 {delay} 秒后重试...")
+                time.sleep(delay)
+
+
+if __name__ == '__main__':
+    main()
--- a/zcymh/zcymh.py
+++ b/zcymh/zcymh.py
@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+
+import platform
+import time
+import random
+from datetime import datetime
+import re
+import os
+from pymongo import MongoClient
+
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+import httpx
+
+
+def browser_opt():
+    # 浏览器打开前, 设置浏览器
+    os_name = platform.system()
+    chrome_options = Options()
+    chrome_options.add_argument('--no-sandbox')
+    chrome_options.add_argument('--disable-setuid-sandbox')
+    chrome_options.add_argument('--disable-gpu')
+    chrome_options.add_argument('--headless')  # 添加无头模式参数
+    # chrome_options.add_argument('--incognito') # 隐身模式（无痕模式）
+    # chrome_options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" # 手动指定使用的浏览器位置
+
+    if os_name == 'Linux':
+        chrome_options.add_argument('/usr/bin/chromium')  # linux 必须指定chromium路径
+    else:
+        pass  # 其他系统不需要指定路径
+
+    browser = webdriver.Chrome(options=chrome_options)
+
+    return browser
+
+
+def browser_open(browser, url):
+    # 打开浏览器
+    browser.get(url)
+    time.sleep(random.uniform(1, 2))
+    return browser
+
+
+def browser_get_page_source(browser):
+    # 获取当前页面源代码
+    return browser.page_source
+
+
+def browser_find_by_selector(browser, selector):
+    # 通过 css 选择器搜素
+    try:
+        WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
+        element = browser.find_element(By.CSS_SELECTOR, selector)
+        if not element:
+            return None
+        return element.text
+    except Exception as e:
+        print(e)
+        return None
+
+
+def browser_screenshot(browser):
+    # 获取当前网页的标题
+    title = browser.title
+    # 获取当前时间的时间戳
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    # 构建文件名
+    filename = f"{title.replace(' ', '')}_{timestamp}.png"
+    # 保存截图
+    browser.save_screenshot(filename)
+    print(f"保存截图文件: {filename}")
+
+
+def browser_close(browser):
+    browser.close()
+
+
+def sanitize_filename(string):
+    # 替换Windows不允许的字符
+    allowed_chars = re.compile(r'[<>:"/\\|?*]')
+    sanitized_filename = allowed_chars.sub('', string)
+
+    # 替换空格为下划线
+    sanitized_filename = sanitized_filename.replace(' ', '_')
+
+    # 确保文件名不以点开头
+    if sanitized_filename.startswith('.'):
+        sanitized_filename = '_' + sanitized_filename[1:]
+
+    # 确保文件名不包含两个连续的点
+    sanitized_filename = sanitized_filename.replace('..', '.')
+
+    # 确保文件名不是空字符串
+    if not sanitized_filename:
+        sanitized_filename = 'noname' + '_' + str(int(time.time()))
+
+    return sanitized_filename
+
+
+def task1():
+    browser = browser_opt()
+    print(f'正在打开浏览器')
+    browser = browser_open(browser, url)
+    print(f'前往 url: {url}')
+
+    page_source = browser_get_page_source(browser)
+
+    # 获取漫画名, 作为文件夹名
+    book_name = re.findall('meta property="og:novel:book_name" content="(.*?)"', page_source)
+    if book_name:
+        book_name = book_name[0]
+
+        book_name = sanitize_filename(book_name)
+    else:
+        print("获取漫画名称失败")
+        exit(0)
+
+    # 获取每一集的url
+    all_set = []
+
+    host = 'https://zcymh.com'
+
+    start_tag = '<ol class="chapter-list col-4 text" id="j_chapter_list">'
+    end_tag = '</ol>'
+    start_index = page_source.find(start_tag)
+    end_index = page_source.find(end_tag, start_index)
+    if start_index != -1 and end_index != -1:
+        target_element = page_source[start_index + len(start_tag):end_index]
+        pattern = r'<a title="(.*?)" href="(.*?)" target="_self">'
+        matches = re.findall(pattern, target_element)
+        set_num = 1
+        for match in matches:
+            title = sanitize_filename(match[0])
+            set_url = host + match[1]
+            # 观看顺序排序, 集名, 集url
+            all_set.append([str(set_num).zfill(4), title, set_url])
+            set_num += 1
+
+    # 循环每一集的 url, 拿到每集的每一个图片, 存到一个总列表里面
+    all_data_list = []
+    for set_data in all_set:
+        browser = browser_open(browser, set_data[2])
+
+        page_source = browser_get_page_source(browser)
+        page_list = re.findall('<img src="(.*?)" width', page_source)
+        print(f'正在获取 {set_data[1]}')
+        page_num = 1
+        for page in page_list:
+            # 此处是 db 或者 csv 的一行数据
+            all_data_list.append({
+                'comico_serial': set_data[0],
+                'set_name': set_data[1],
+                'page_num': page_num,
+                'set_url': set_data[2],
+                'img_url': page,
+                'is_download': 0,
+            })
+            page_num += 1
+
+    # 总列表储存所有数据, 存 mongodb
+    conn = MongoClient(mongodb_link)
+    db = conn[db_name]
+    collection = db[book_name]
+
+    for data in all_data_list:
+        data_exists = collection.find_one({"img_url": data['img_url']})
+        if data_exists is None:
+            try:
+                result = collection.insert_one(data)
+                print(f"数据插入成功，ObjectId: {data['comico_serial']}\t{data['set_name']}\t{data['page_num']}")
+            except Exception as e:
+                print(f"数据插入失败，错误信息: {e}")
+        else:
+            print(f'数据已存在: {data}')
+
+
+    comico_path = os.path.join(os.getcwd(), 'comico')
+    if not os.path.exists(comico_path):
+        os.makedirs(comico_path)
+
+    # 写完所有数据, 创建一个文件夹
+    file_path = os.path.join(comico_path, book_name)
+    if not os.path.exists(file_path):
+        os.mkdir(file_path)
+
+    browser_close(browser)
+
+
+def task2():
+    file_path = os.path.join(os.getcwd(), 'comico', load_book_name)
+
+    if not os.path.exists(file_path):
+        os.mkdir(file_path)
+
+    client = MongoClient(mongodb_link)
+
+    db = client[db_name]
+
+    collection = db[load_book_name]
+
+    # 还原is_download
+    # for document in collection.find():
+    #     collection.update_one({'_id': document['_id']}, {'$set': {'is_download': 0}})
+
+    # 读取集合中的所有文档
+    try:
+        for document in collection.find():
+            if document['is_download'] == 0:
+                # 执行你的代码
+                try:
+                    resp = httpx.get(document['img_url'], headers=headers)
+                    if resp.status_code != 200:
+                        err = f'请求图片失败, 错误码: {resp.status_code}'
+                        raise Exception(err)
+
+                    set_file_name = document['comico_serial'] + '_' + sanitize_filename(document['set_name'])
+
+                    if not os.path.exists(os.path.join(file_path, set_file_name)):
+                        os.makedirs(os.path.join(file_path, set_file_name))
+
+                    img_name = str(document['page_num']).zfill(4)
+
+                    suffix = document['img_url'].split('.')[-1]
+
+                    img_path = file_path + '/' + set_file_name + '/' + img_name + '.' + suffix
+
+                    with open(img_path, 'wb') as f:
+                        f.write(resp.content)
+
+                    # 执行成功后，将is_download字段更新为1
+                    collection.update_one({'_id': document['_id']}, {'$set': {'is_download': 1}})
+                    print(f"已更新文档: {load_book_name}\t{document['comico_serial']}\t{document['set_name']}\t{document['page_num']}")
+                except Exception as e:
+                    print(f"处理文档时发生错误：{e}")
+            else:
+                print("已下载，跳过")
+    except Exception as e:
+        print(f"读取集合时发生错误：{e}")
+
+    # 关闭数据库连接
+    client.close()
+
+
+if __name__ == "__main__":
+    choose = 2
+
+    mongodb_link = 'mongodb://root:aaaAAA111!!!@192.168.31.177:38000/'
+    db_name = 'comico'
+
+    if choose == 1:
+        comico_id = '384'
+        url = 'https://zcymh.com/manben/{}/'.format(comico_id)
+        host = 'https://zcymh.com'
+        task1()
+    elif choose == 2:
+        load_book_name = '诚如神之所说'
+        headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'}
+        task2()
--- a/zhuimh/merge_images.py
+++ b/zhuimh/merge_images.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+import os
+from PIL import Image
+
+current_dir_path = os.path.dirname(os.path.abspath(__file__))
+# 设置起始目录
+start_dir = os.path.join(current_dir_path, 'downloads')
+
+
+
+# 遍历downloads文件夹
+for root, dirs, files in os.walk(start_dir):
+    for dir in dirs:
+        sub_dir = os.path.join(root, dir)
+        for sub_root, sub_dirs, sub_files in os.walk(sub_dir):
+            for sub_sub_dir in sub_dirs:
+                sub_sub_dir_path = os.path.join(sub_root, sub_sub_dir)
+                print(sub_sub_dir_path)
+                png_count = 0
+                images = []
+                for file in os.listdir(sub_sub_dir_path):
+                    if file.lower().endswith('.png'):
+                        images.append(os.path.join(sub_sub_dir_path, file))
+                        png_count += 1
+
+                if not images:
+                    raise ValueError("图片列表不能为空")
+
+                total_image = Image.open(images[0])
+
+                for image in images[1:]:
+                    img = Image.open(image)
+                    
+                    new_image = Image.new('RGB', (max(total_image.width, img.width), total_image.height + img.height))
+                    
+                    new_image.paste(total_image, (0, 0))
+                    new_image.paste(img, (0, total_image.height))
+
+                    total_image = new_image
+
+                total_image.save(f'{sub_sub_dir_path}.png')
+                break
+            break
+    break
+
--- a/zhuimh/zhuimh.py
+++ b/zhuimh/zhuimh.py
@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+import os
+import time
+import httpx
+import asyncio
+import re
+import sqlite3
+from playwright.sync_api import sync_playwright
+
+comico_id = '419025'
+base_url = 'https://www.zhuimh.com'
+target_href_url = 'https://www.zhuimh.com/comic/'
+scroll_speed = 2
+current_dir_path = os.path.dirname(os.path.abspath(__file__))
+
+download_folder = os.path.join(current_dir_path, 'downloads')
+if not os.path.exists(download_folder):
+    os.mkdir(download_folder)
+
+db_path = os.path.join(download_folder, 'zhuimh.db')
+txt_path = os.path.join(download_folder, 'target_comico_name.txt')
+
+def create_db(title):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute(
+        f'CREATE TABLE IF NOT EXISTS {title} (id INTEGER PRIMARY KEY AUTOINCREMENT, chapter_name TEXT, url TEXT, state BOOLEAN DEFAULT 0)'
+    )
+    conn.commit()
+    cursor.close()
+    conn.close()
+
+
+def write_to_db(title, chapter_name, url):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    # 检查chapter_name是否已存在
+    cursor.execute(
+        f'SELECT EXISTS(SELECT 1 FROM {title} WHERE chapter_name = ?)', (chapter_name,))
+    exists = cursor.fetchone()[0]
+
+    if not exists:
+        # 如果不存在，则插入新记录
+        cursor.execute(f'INSERT INTO {title} (chapter_name, url) VALUES (?, ?)', (chapter_name, url))
+        conn.commit()
+
+    cursor.close()
+    conn.close()
+
+
+async def async_get_chapter_list():
+    async with httpx.AsyncClient() as client:
+        chapters_data = {}
+        response = await client.get(target_href_url + comico_id)
+        if response.status_code == 200:
+            text = response.text
+            title = re.findall(r'<h4>(.*?)</h4>', text)
+            title = title[0] if title else comico_id
+            print(title)
+            # 这里先创建一个 txt, 固定获取本次爬取的名称,下面读取数据库的时候, 需要通过这个名称作为表名读出来
+            with open(txt_path, 'w', encoding='utf-8') as f:
+                print('写入当前目标名称')
+                f.write(title)
+
+            chapters = re.findall(r'<li><a href="(.*?)">(.*?)</a></li>', text)
+            for chapter in chapters:
+                chapters_data[chapter[1]] = base_url + chapter[0]
+
+            # 创建 sqlite 将数据存起来
+            create_db(title)
+
+            for chapter_name, url in chapters_data.items():
+                write_to_db(title, chapter_name, url)
+    print('数据ok')
+
+
+async def get_chapter_list():
+    await async_get_chapter_list()
+
+
+def load_db(title):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute(f'SELECT * FROM {title} WHERE state = 0 ORDER BY id ASC')
+    rows = cursor.fetchall()
+    cursor.close()
+    conn.close()
+    return rows
+
+
+def change_db_data_state(data_id, t_name):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    table_name = t_name
+    id_column = 'id'
+    id_value = data_id
+    bool_column = 'state'
+    sql = f'UPDATE {table_name} SET {bool_column} = 1 WHERE {id_column} = ?'
+    cursor.execute(sql, (id_value,))
+    conn.commit()
+    cursor.close()
+    conn.close()
+
+
+
+
+def scroll_to_percentage(page):
+    # 滚动浏览器页面
+    percentage_list = [i for i in range(5, 101, scroll_speed)]
+    for percentage in percentage_list:
+        # 计算页面的指定百分比高度
+        height = page.evaluate("() => document.body.scrollHeight")
+        scroll_position = height * (percentage / 100)
+        # 跳转到指定的百分比位置
+        page.evaluate(f"window.scrollTo({0}, {scroll_position})")
+        time.sleep(0.5)
+    
+
+
+def request_chapter_data(title, data_id, chapter_name, chapter_url):
+    chapter_folder = os.path.join(current_dir_path, 'downloads', title, chapter_name)
+    with sync_playwright() as playwright:
+        try:
+            browser = playwright.chromium.launch(headless=True)
+            page = browser.new_page()
+            page.goto(chapter_url)
+            page.wait_for_load_state('networkidle')
+        except Exception as e:
+            print(e)
+            return False
+
+        # 滚动页面
+        print('开始滚动页面')
+        scroll_to_percentage(page)
+        page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
+        scroll_to_percentage(page)
+        print('滚动完成')
+        time.sleep(2)
+
+        # 检查一下是否所有的图片都加载完成, 如果没有, 直接退出函数, 然后重新打开浏览器
+        html_content = page.content()
+        check_list = re.findall('img class="lazy-read" src="(.*?)"', html_content)
+        for l in check_list:
+            if 'lazy-read.gif' in l:
+                return False
+
+        # 创建章节文件夹
+        if not os.path.exists(chapter_folder):
+            os.makedirs(chapter_folder)
+
+        # 获取匹配的元素数量
+        total_images = page.locator('.lazy-read').count()
+
+        for page_num in range(1, total_images+1):
+            img_locator = f'body > div.chpater-images > img:nth-child({page_num})'
+            img_path = os.path.join(chapter_folder, f'{str(page_num).zfill(3)}.png')
+            page.locator(img_locator).screenshot(path=img_path)
+            print(f'已下载 {img_path}')
+            page_num += 1
+
+        # 下载完当前章节后， 将state字段改为True
+        print(f'{chapter_name} 已下载完成\n\n')
+        change_db_data_state(data_id, title)
+
+        browser.close()
+        return True
+
+
+def main():
+    asyncio.run(get_chapter_list())
+
+    # 开始对每一页的章节进行爬取
+    # 先读取当前的目标名称
+    title = ''
+    with open(txt_path, 'r', encoding='utf-8') as f:
+        title = f.read()
+
+    folder_name = os.path.join(download_folder, title)
+    if not os.path.exists(folder_name):
+        os.mkdir(folder_name)
+
+    for retry in range(999):
+        load_data = load_db(title)
+
+        if not load_data:
+            print('The database has no data or all done!')
+            exit(0)
+        
+        for data in load_data:
+            ok = True
+            data_id = data[0]
+            chapter_name = data[1]
+            chapter_url = data[2]
+            print(f'准备获取图片： {title}  {chapter_name}')
+            ok = request_chapter_data(title, data_id, chapter_name, chapter_url)
+            if not ok:
+                print(f'图片加载失败： {title}  {chapter_name} 重试\n\n')
+                time.sleep(5)
+                break
+
+
+if __name__ == "__main__":
+    main()