You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
264 lines
9.2 KiB
264 lines
9.2 KiB
import httpx
|
|
from httpx import BasicAuth, Timeout
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
from pathlib import Path
|
|
import logging
|
|
import time
|
|
import random
|
|
|
|
|
|
class SimpleAlphaFetcher:
|
|
def __init__(self, base_path=None):
|
|
"""
|
|
初始化 Alpha 获取器
|
|
|
|
Args:
|
|
base_path: 输出文件保存路径
|
|
"""
|
|
self.client = None
|
|
self.base_path = Path(base_path) if base_path else Path.cwd()
|
|
self.logger = self._setup_logger()
|
|
|
|
# 登录
|
|
self.login()
|
|
|
|
def _setup_logger(self):
|
|
"""设置日志记录器"""
|
|
logger = logging.getLogger(__name__)
|
|
logger.setLevel(logging.INFO)
|
|
|
|
if not logger.handlers:
|
|
handler = logging.StreamHandler()
|
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
|
handler.setFormatter(formatter)
|
|
logger.addHandler(handler)
|
|
|
|
return logger
|
|
|
|
def login(self):
|
|
"""登录 WorldQuant Brain API"""
|
|
try:
|
|
# 从 nacos 获取账号密码
|
|
with httpx.Client(timeout=10.0) as temp_client:
|
|
nacos_resp = temp_client.get(
|
|
'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
|
|
)
|
|
|
|
if nacos_resp.status_code != 200:
|
|
self.logger.error('获取账号密码失败')
|
|
return False
|
|
|
|
config = nacos_resp.json()
|
|
username = config.get('user_name')
|
|
password = config.get('password')
|
|
|
|
if not username or not password:
|
|
self.logger.error('账号密码不完整')
|
|
return False
|
|
|
|
self.logger.info(f"正在登录账户: {username}")
|
|
|
|
# 创建客户端并设置超时
|
|
timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
|
|
self.client = httpx.Client(
|
|
auth=BasicAuth(username, password),
|
|
timeout=timeout
|
|
)
|
|
|
|
# 发送登录请求
|
|
response = self.client.post('https://api.worldquantbrain.com/authentication')
|
|
|
|
if response.status_code == 201:
|
|
self.logger.info("登录成功!")
|
|
return True
|
|
else:
|
|
self.logger.error(f"登录失败: {response.status_code} - {response.text}")
|
|
self.client.close()
|
|
self.client = None
|
|
return False
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"登录异常: {e}")
|
|
return False
|
|
|
|
def fetch_alphas(
|
|
self,
|
|
max_pages: int = 100,
|
|
limit: int = 100,
|
|
delay: int = 1,
|
|
region: str = "USA",
|
|
universe: str = "TOP3000",
|
|
hidden: str = "false",
|
|
output_file_name: str = "alpha_list.csv",
|
|
mode: str = "w",
|
|
max_retries: int = 3,
|
|
):
|
|
"""
|
|
获取 Alpha 列表
|
|
|
|
Args:
|
|
max_pages: 最大爬取页数
|
|
limit: 每页数量
|
|
delay: API 请求的延迟设置
|
|
region: 市场区域
|
|
universe: 股票池
|
|
hidden: 是否搜索已隐藏的 Alpha
|
|
output_file_name: 输出的 CSV 文件名
|
|
mode: 写入模式:"w"(覆盖写入) 或 "a"(追加写入)
|
|
max_retries: 请求失败时的最大重试次数
|
|
|
|
Returns:
|
|
list: Alpha 列表
|
|
"""
|
|
if not self.client:
|
|
self.logger.error("客户端未登录,无法执行获取")
|
|
return []
|
|
|
|
fetched_alphas = []
|
|
offset = 0
|
|
total_accessed = 0
|
|
pages_fetched = 0
|
|
|
|
# 先获取总数
|
|
count_url = (
|
|
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}"
|
|
f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
|
|
)
|
|
|
|
total_available = 0
|
|
for attempt in range(max_retries):
|
|
try:
|
|
count_response = self.client.get(count_url)
|
|
total_available = count_response.json()["count"]
|
|
break
|
|
except Exception as e:
|
|
self.logger.warning(f"获取 Alpha 总数失败 (尝试 {attempt+1}/{max_retries}): {e}")
|
|
if attempt < max_retries - 1:
|
|
time.sleep(random.uniform(3, 5))
|
|
else:
|
|
self.logger.error("获取 Alpha 总数最终失败")
|
|
return []
|
|
|
|
if total_available == 0:
|
|
self.logger.warning("未找到任何 Alpha")
|
|
return []
|
|
|
|
# 计算实际最大页数
|
|
actual_max_pages = min(max_pages, (total_available + limit - 1) // limit)
|
|
self.logger.info(f"共找到 {total_available} 个 Alpha,计划爬取 {actual_max_pages} 页...")
|
|
|
|
pbar = tqdm(total=actual_max_pages, desc="爬取 Alpha 页面", unit="页")
|
|
|
|
while pages_fetched < actual_max_pages:
|
|
# 构建请求 URL
|
|
url = (
|
|
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}"
|
|
f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
|
|
)
|
|
|
|
try:
|
|
# 使用重试机制
|
|
response = None
|
|
for attempt in range(max_retries):
|
|
try:
|
|
response = self.client.get(url)
|
|
break
|
|
except Exception as e:
|
|
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
|
|
if attempt < max_retries - 1:
|
|
time.sleep(random.uniform(3, 5))
|
|
else:
|
|
raise
|
|
|
|
if response.status_code == 400:
|
|
self.logger.warning(f"遇到 API 限制 (offset={offset}),停止获取")
|
|
break
|
|
|
|
response_data = response.json()
|
|
|
|
if not isinstance(response_data, dict) or "results" not in response_data:
|
|
self.logger.error(f"API 返回了意外的数据: {response_data}")
|
|
break
|
|
|
|
alphas = response_data["results"]
|
|
|
|
if not alphas:
|
|
break
|
|
|
|
fetched_alphas.extend(alphas)
|
|
total_accessed += len(alphas)
|
|
pages_fetched += 1
|
|
|
|
# 更新进度条
|
|
pbar.update(1)
|
|
pbar.set_postfix({
|
|
"Region": region,
|
|
"已获取": total_accessed,
|
|
"本页": len(alphas),
|
|
})
|
|
|
|
if len(alphas) < limit:
|
|
self.logger.info(f"本页数据不足 {limit} 条,已到达末尾")
|
|
break
|
|
|
|
offset += limit
|
|
|
|
# 添加延迟避免请求过快
|
|
time.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"请求失败: {e}")
|
|
break
|
|
|
|
pbar.close()
|
|
|
|
if not fetched_alphas:
|
|
self.logger.warning("未获取到任何 Alpha!")
|
|
return []
|
|
|
|
# 按 fitness 排序
|
|
df = pd.DataFrame(fetched_alphas)
|
|
df["temp_fitness"] = df.apply(
|
|
lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0,
|
|
axis=1
|
|
)
|
|
df_sorted = df.sort_values(by="temp_fitness", ascending=False)
|
|
df_sorted = df_sorted.drop("temp_fitness", axis=1)
|
|
|
|
output_path = self.base_path / output_file_name
|
|
|
|
if mode == "w":
|
|
df_sorted.to_csv(output_path, index=False)
|
|
elif mode == "a":
|
|
df_sorted.to_csv(output_path, mode="a", index=False, header=False)
|
|
|
|
self.logger.info(f"Alpha 列表已保存!共 {len(fetched_alphas)} 条记录!\n文件路径: {output_path}")
|
|
|
|
return fetched_alphas
|
|
|
|
def close(self):
|
|
"""关闭客户端"""
|
|
if self.client:
|
|
self.client.close()
|
|
|
|
|
|
# 使用示例
|
|
if __name__ == "__main__":
|
|
# 创建获取器实例(会自动登录)
|
|
fetcher = SimpleAlphaFetcher()
|
|
|
|
if fetcher.client:
|
|
# 获取 Alpha 列表
|
|
results = fetcher.fetch_alphas(
|
|
max_pages=100, # 最多爬取 100 页
|
|
limit=100, # 每页 100 条
|
|
region="USA", # 美股市场
|
|
universe="TOP3000", # TOP3000 股票池
|
|
output_file_name="alpha_list.csv" # 输出文件名
|
|
)
|
|
|
|
print(f"成功获取 {len(results) if results else 0} 个 Alpha")
|
|
|
|
# 关闭客户端
|
|
fetcher.close()
|
|
|