You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
eh-v2/downloader.py

73 lines
2.2 KiB

from fastapi import APIRouter, BackgroundTasks
from pydantic import BaseModel
import uuid
import os
from pathlib import Path
router = APIRouter(prefix="/api/v1", tags=["downloader"])
# 存储任务状态
tasks = {}
class CrawlRequest(BaseModel):
url: str
cookies: str
timestamp: str
class TaskStatus(BaseModel):
status: str # 'running', 'completed', 'failed'
result: dict = None
error: str = None
@router.post("/start-crawl")
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
task_id = str(uuid.uuid4())
tasks[task_id] = {'status': 'running', 'result': None, 'error': None}
# 在后台运行爬虫任务
background_tasks.add_task(run_crawler, task_id, request)
return {"task_id": task_id, "status": "started"}
@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str):
task = tasks.get(task_id)
if not task:
return {"status": "not_found"}
return task
async def run_crawler(task_id: str, request: CrawlRequest):
try:
# 这里执行您的爬虫逻辑,模拟长时间运行
# 例如:time.sleep(300) # 5分钟
# 确保 downloads 目录存在(双重保障)
downloads_dir = Path("downloads")
downloads_dir.mkdir(exist_ok=True)
# 模拟下载文件到 downloads 目录
filename = f"download_{task_id}.txt"
filepath = downloads_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"URL: {request.url}\n")
f.write(f"Cookies: {request.cookies}\n")
f.write(f"Timestamp: {request.timestamp}\n")
f.write("Download completed successfully\n")
# 爬虫完成后更新状态
tasks[task_id] = {
'status': 'completed',
'result': {
'message': '爬虫完成',
'data': '您的爬虫结果',
'download_path': str(filepath)
},
'error': None
}
except Exception as e:
tasks[task_id] = {
'status': 'failed',
'result': None,
'error': str(e)
}