You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
2.2 KiB
73 lines
2.2 KiB
from fastapi import APIRouter, BackgroundTasks
|
|
from pydantic import BaseModel
|
|
import uuid
|
|
import os
|
|
from pathlib import Path
|
|
|
|
router = APIRouter(prefix="/api/v1", tags=["downloader"])
|
|
|
|
# 存储任务状态
|
|
tasks = {}
|
|
|
|
class CrawlRequest(BaseModel):
|
|
url: str
|
|
cookies: str
|
|
timestamp: str
|
|
|
|
class TaskStatus(BaseModel):
|
|
status: str # 'running', 'completed', 'failed'
|
|
result: dict = None
|
|
error: str = None
|
|
|
|
@router.post("/start-crawl")
|
|
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
|
|
task_id = str(uuid.uuid4())
|
|
tasks[task_id] = {'status': 'running', 'result': None, 'error': None}
|
|
|
|
# 在后台运行爬虫任务
|
|
background_tasks.add_task(run_crawler, task_id, request)
|
|
|
|
return {"task_id": task_id, "status": "started"}
|
|
|
|
@router.get("/task-status/{task_id}")
|
|
async def get_task_status(task_id: str):
|
|
task = tasks.get(task_id)
|
|
if not task:
|
|
return {"status": "not_found"}
|
|
return task
|
|
|
|
async def run_crawler(task_id: str, request: CrawlRequest):
|
|
try:
|
|
# 这里执行您的爬虫逻辑,模拟长时间运行
|
|
# 例如:time.sleep(300) # 5分钟
|
|
|
|
# 确保 downloads 目录存在(双重保障)
|
|
downloads_dir = Path("downloads")
|
|
downloads_dir.mkdir(exist_ok=True)
|
|
|
|
# 模拟下载文件到 downloads 目录
|
|
filename = f"download_{task_id}.txt"
|
|
filepath = downloads_dir / filename
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(f"URL: {request.url}\n")
|
|
f.write(f"Cookies: {request.cookies}\n")
|
|
f.write(f"Timestamp: {request.timestamp}\n")
|
|
f.write("Download completed successfully\n")
|
|
|
|
# 爬虫完成后更新状态
|
|
tasks[task_id] = {
|
|
'status': 'completed',
|
|
'result': {
|
|
'message': '爬虫完成',
|
|
'data': '您的爬虫结果',
|
|
'download_path': str(filepath)
|
|
},
|
|
'error': None
|
|
}
|
|
except Exception as e:
|
|
tasks[task_id] = {
|
|
'status': 'failed',
|
|
'result': None,
|
|
'error': str(e)
|
|
}
|
|
|