jack 1 month ago
parent 0598040359
commit 0e39f21012
  1. 83
      VerificationAlpha.py
  2. 1
      account.txt
  3. 7
      ai_config.json
  4. 10001
      alpha-forge/alpha_list.csv
  5. 0
      alpha-forge/feature_analysis_database.py
  6. 189
      alpha-forge/feature_attribution.py
  7. 264
      alpha-forge/simple_alpha_fetcher.py
  8. 232
      alpha-forge/simple_frature_analysis.py
  9. 20
      alpha-forge/test_code_001.py
  10. 20
      alpha-forge/test_code_002.py
  11. 19
      alpha-forge/test_code_003.py
  12. 381
      alpha-forge/wqb-get-alphas.py
  13. 2
      alpha.txt
  14. 35
      alpha_prompt.txt
  15. 0
      base/AlphaGenerator.py
  16. 0
      base/FactorSimulator.py
  17. 0
      base/FieldDownloader.py
  18. 0
      base/wqb_operator.txt
  19. 0
      data_sets/get_datasets_local/all_data_combined.csv
  20. 0
      data_sets/get_datasets_local/category.txt
  21. 0
      data_sets/get_datasets_local/keys_text.txt
  22. 0
      data_sets/get_datasets_local/load_data_sets.py
  23. 0
      data_sets/get_datasets_local/seach_data_sets.py
  24. 0
      extract_fields/特征归因_导出高频使用操作符及数据集
  25. 231
      feature_analysis_database/feature_analysis_database.py
  26. 0
      feature_analysis_database/特征分析
  27. 148
      import_alpha_txt.py
  28. 299
      test/wqb-filter-alpha/fetch-alpha.py
  29. 11
      test/wqb-filter-alpha/查询参数.md
  30. 4
      test/wqb-simulate-go/account.json
  31. 22
      test/wqb-simulate-go/alphas.json
  32. 431
      test/wqb-simulate-go/client.go
  33. 76
      test/wqb-simulate-go/config.go
  34. 5
      test/wqb-simulate-go/go.mod
  35. 2
      test/wqb-simulate-go/go.sum
  36. BIN
      test/wqb-simulate-go/main
  37. 184
      test/wqb-simulate-go/main.go
  38. 76
      test/wqb-simulate-go/models.go
  39. 22
      test/wqb-simulate/alphas.json
  40. 28
      test/wqb-simulate/result/simulation_results-1773977014.json
  41. 28
      test/wqb-simulate/result/simulation_results-1773977522.json
  42. 28
      test/wqb-simulate/result/simulation_results-1773977891.json
  43. 28
      test/wqb-simulate/result/simulation_results-1773978098.json
  44. 28
      test/wqb-simulate/result/simulation_results-1773978142.json
  45. 28
      test/wqb-simulate/result/simulation_results-1773978278.json
  46. 54
      test/wqb-simulate/result/simulation_results-1773978475.json
  47. 41
      test/wqb-simulate/result/simulation_results-1773979205.json
  48. 15
      test/wqb-simulate/result/simulation_results-1773979454.json
  49. 15
      test/wqb-simulate/result/simulation_results-1773979487.json
  50. 15
      test/wqb-simulate/result/simulation_results-1773986794.json
  51. 3
      wqb-get-alphas/my_alpha_list.csv
  52. 381
      wqb-get-alphas/wqb-get-alphas.py
  53. 0
      wqb-get-alphas/获取wq线上质量较好alpha

@ -1,83 +0,0 @@
# -*- coding: utf-8 -*-
import os
import httpx
from requests.auth import HTTPBasicAuth
from urllib.parse import urlencode
class BrainLogin:
def __init__(self, credentials_file='account.txt'):
self.credentials_file = credentials_file
self.client = None
self.brain_api_url = 'https://api.worldquantbrain.com'
def load_credentials(self):
if not os.path.exists(self.credentials_file):
print("未找到 account.txt 文件")
with open(self.credentials_file, 'w') as f:
f.write("")
print("account.txt 文件已创建,请填写账号密码, 格式: ['username', 'password']")
exit(1)
with open(self.credentials_file) as f:
credentials = eval(f.read())
return credentials[0], credentials[1]
def login(self):
try:
username, password = self.load_credentials()
self.client = httpx.Client(auth=HTTPBasicAuth(username, password))
response = self.client.post(f'{self.brain_api_url}/authentication')
print(f"登录状态: {response.status_code}")
if response.status_code in [200, 201]:
print("登录成功!")
return self.client
else:
print(f"登录失败: {response.json()}")
return None
except Exception as e:
print(f"登录过程中出现错误: {e}")
return None
def get_alphas(self, **params):
if self.client is None:
print("请先登录!")
return None
try:
encoded_params = urlencode(params, doseq=True)
url = f"{self.brain_api_url}/users/self/alphas?{encoded_params}"
response = self.client.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"获取alpha列表失败: {response.text}")
return None
except Exception as e:
print(f"获取alpha列表过程中出现错误: {e}")
return None
# 使用示例
if __name__ == "__main__":
brain = BrainLogin()
if brain.login():
alphas = brain.get_alphas(
limit=50,
offset=0,
status="UNSUBMITTED",
order="dateSubmitted",
hidden="false"
)
if alphas:
print("获取alpha列表成功!")
print(alphas)
print(f"一共 {len(alphas)} 个 alpha")

@ -1 +0,0 @@
['jack0210_@hotmail.com', '!QAZ2wsx+0913']

@ -1,7 +0,0 @@
{
"siliconflow": {
"base_url": "https://api.siliconflow.cn/v1",
"api_keys": "sk-pvdiisdowmuwkrpnxsrlhxaovicqibmlljwrwwvbbdjaitdl",
"model": "Qwen/Qwen3-VL-235B-A22B-Instruct"
}
}

File diff suppressed because one or more lines are too long

@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""
计算特征增益值修复版 v2
"""
import sqlite3
import pandas as pd
from pathlib import Path
SQLITE_PATH = Path(__file__).parent / "alpha_analysis.db"
def main():
print("=" * 60)
print("开始计算特征增益值")
print("=" * 60)
conn = sqlite3.connect(str(SQLITE_PATH))
# 1. 获取所有 Alpha 的 fitness(使用 id 作为主键)
print("\n📊 获取 Alpha 数据...")
df_alpha = pd.read_sql_query("""
SELECT id, alpha_id, fitness
FROM alpha_success
WHERE fitness IS NOT NULL
""", conn)
print(f" 列名: {list(df_alpha.columns)}")
global_avg = df_alpha['fitness'].mean()
print(f" 全局平均 fitness: {global_avg:.4f}")
print(f" 样本数: {len(df_alpha):,}")
# 2. 获取所有特征
print("\n🔧 获取特征数据...")
df_features = pd.read_sql_query("""
SELECT alpha_id, feature_type, feature_name
FROM alpha_feature_long
""", conn)
print(f" 总特征数: {len(df_features):,}")
# 3. 合并 Alpha 的 fitness 到特征
print("\n📈 计算每个特征的平均分...")
# 建立 fitness 映射(使用 id)
fitness_map = dict(zip(df_alpha['id'], df_alpha['fitness']))
df_features['fitness'] = df_features['alpha_id'].map(fitness_map)
# 过滤掉没有 fitness 的特征
df_features = df_features[df_features['fitness'].notna()]
print(f" 有效特征数: {len(df_features):,}")
# 按特征分组统计
stats = df_features.groupby(['feature_type', 'feature_name']).agg(
avg_score=('fitness', 'mean'),
sample_count=('fitness', 'count'),
total_fitness=('fitness', 'sum')
).reset_index()
# 计算增益值
stats['gain'] = stats['avg_score'] - global_avg
# 按样本数过滤(至少出现 10 次)
stats_filtered = stats[stats['sample_count'] >= 10].copy()
print(f" 过滤后特征数(样本≥10): {len(stats_filtered):,}")
# 4. 按增益值排序
stats_positive = stats_filtered[stats_filtered['gain'] > 0.05].sort_values('gain', ascending=False)
stats_negative = stats_filtered[stats_filtered['gain'] < -0.05].sort_values('gain', ascending=True)
print(f"\n✅ 统计完成:")
print(f" 总特征类型数: {len(stats_filtered):,}")
print(f" 正向特征 (gain > 0.05): {len(stats_positive):,}")
print(f" 负向特征 (gain < -0.05): {len(stats_negative):,}")
# 5. 输出正向特征 Top 30
print("\n" + "=" * 60)
print("📈 正向特征 Top 30 (增益值 > 0.05)")
print("=" * 60)
for _, row in stats_positive.head(30).iterrows():
print(f" {row['feature_type']:15} {row['feature_name']:35} 增益: {row['gain']:+.4f} (样本: {row['sample_count']:,})")
# 6. 输出负向特征 Bottom 30
if len(stats_negative) > 0:
print("\n" + "=" * 60)
print("📉 负向特征 Bottom 30 (增益值 < -0.05)")
print("=" * 60)
for _, row in stats_negative.head(30).iterrows():
print(f" {row['feature_type']:15} {row['feature_name']:35} 增益: {row['gain']:+.4f} (样本: {row['sample_count']:,})")
# 7. 保存结果到 feature_statistics 表
print("\n💾 保存到 feature_statistics 表...")
# 清空旧数据
conn.execute("DELETE FROM feature_statistics")
# 插入新数据
for _, row in stats_filtered.iterrows():
positive_effect = 0
if row['gain'] > 0.05:
positive_effect = 1
elif row['gain'] < -0.05:
positive_effect = -1
conn.execute("""
INSERT INTO feature_statistics
(feature_type, feature_name, gain_value, avg_score, global_avg_score, sample_count, positive_effect)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (
row['feature_type'],
row['feature_name'],
row['gain'],
row['avg_score'],
global_avg,
row['sample_count'],
positive_effect
))
conn.commit()
# 8. 更新 generation_bias 表
print("\n💾 更新 generation_bias 表...")
conn.execute("DELETE FROM generation_bias")
# 只取增益绝对值 > 0.05 且样本数 >= 10 的特征
bias_features = stats_filtered[abs(stats_filtered['gain']) > 0.05]
for _, row in bias_features.iterrows():
bias_weight = 1.0
if row['gain'] > 0.05:
bias_weight = 1.0 + min(row['gain'] * 2, 2.0)
elif row['gain'] < -0.05:
bias_weight = max(0.1, 1.0 - abs(row['gain']) * 2)
conn.execute("""
INSERT INTO generation_bias
(feature_type, feature_name, bias_weight, gain_value, sample_count, is_active)
VALUES (?, ?, ?, ?, ?, 1)
""", (
row['feature_type'],
row['feature_name'],
bias_weight,
row['gain'],
row['sample_count']
))
conn.commit()
# 9. 统计信息
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM feature_statistics")
stats_count = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM generation_bias")
bias_count = cursor.fetchone()[0]
print(f"\n✅ 完成!")
print(f" feature_statistics: {stats_count:,}")
print(f" generation_bias: {bias_count:,}")
# 10. 显示一些关键发现
print("\n" + "=" * 60)
print("💡 关键发现")
print("=" * 60)
# 最佳算子
best_ops = stats_positive[stats_positive['feature_type'] == 'operator'].head(5)
if len(best_ops) > 0:
print("\n🏆 最佳算子 (增益最高):")
for _, row in best_ops.iterrows():
print(f" {row['feature_name']}: 增益 {row['gain']:+.4f}")
# 最佳字段
best_fields = stats_positive[stats_positive['feature_type'] == 'field'].head(5)
if len(best_fields) > 0:
print("\n🏆 最佳字段 (增益最高):")
for _, row in best_fields.iterrows():
print(f" {row['feature_name']}: 增益 {row['gain']:+.4f}")
# 最差算子
worst_ops = stats_negative[stats_negative['feature_type'] == 'operator'].tail(5)
if len(worst_ops) > 0:
print("\n 最差算子 (增益最低):")
for _, row in worst_ops.iterrows():
print(f" {row['feature_name']}: 增益 {row['gain']:+.4f}")
conn.close()
if __name__ == "__main__":
main()

@ -0,0 +1,264 @@
import httpx
from httpx import BasicAuth, Timeout
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import logging
import time
import random
class SimpleAlphaFetcher:
def __init__(self, base_path=None):
"""
初始化 Alpha 获取器
Args:
base_path: 输出文件保存路径
"""
self.client = None
self.base_path = Path(base_path) if base_path else Path.cwd()
self.logger = self._setup_logger()
# 登录
self.login()
def _setup_logger(self):
"""设置日志记录器"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def login(self):
"""登录 WorldQuant Brain API"""
try:
# 从 nacos 获取账号密码
with httpx.Client(timeout=10.0) as temp_client:
nacos_resp = temp_client.get(
'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
)
if nacos_resp.status_code != 200:
self.logger.error('获取账号密码失败')
return False
config = nacos_resp.json()
username = config.get('user_name')
password = config.get('password')
if not username or not password:
self.logger.error('账号密码不完整')
return False
self.logger.info(f"正在登录账户: {username}")
# 创建客户端并设置超时
timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
self.client = httpx.Client(
auth=BasicAuth(username, password),
timeout=timeout
)
# 发送登录请求
response = self.client.post('https://api.worldquantbrain.com/authentication')
if response.status_code == 201:
self.logger.info("登录成功!")
return True
else:
self.logger.error(f"登录失败: {response.status_code} - {response.text}")
self.client.close()
self.client = None
return False
except Exception as e:
self.logger.error(f"登录异常: {e}")
return False
def fetch_alphas(
self,
max_pages: int = 100,
limit: int = 100,
delay: int = 1,
region: str = "USA",
universe: str = "TOP3000",
hidden: str = "false",
output_file_name: str = "alpha_list.csv",
mode: str = "w",
max_retries: int = 3,
):
"""
获取 Alpha 列表
Args:
max_pages: 最大爬取页数
limit: 每页数量
delay: API 请求的延迟设置
region: 市场区域
universe: 股票池
hidden: 是否搜索已隐藏的 Alpha
output_file_name: 输出的 CSV 文件名
mode: 写入模式"w"(覆盖写入) "a"(追加写入)
max_retries: 请求失败时的最大重试次数
Returns:
list: Alpha 列表
"""
if not self.client:
self.logger.error("客户端未登录,无法执行获取")
return []
fetched_alphas = []
offset = 0
total_accessed = 0
pages_fetched = 0
# 先获取总数
count_url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}"
f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
total_available = 0
for attempt in range(max_retries):
try:
count_response = self.client.get(count_url)
total_available = count_response.json()["count"]
break
except Exception as e:
self.logger.warning(f"获取 Alpha 总数失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(random.uniform(3, 5))
else:
self.logger.error("获取 Alpha 总数最终失败")
return []
if total_available == 0:
self.logger.warning("未找到任何 Alpha")
return []
# 计算实际最大页数
actual_max_pages = min(max_pages, (total_available + limit - 1) // limit)
self.logger.info(f"共找到 {total_available} 个 Alpha,计划爬取 {actual_max_pages} 页...")
pbar = tqdm(total=actual_max_pages, desc="爬取 Alpha 页面", unit="")
while pages_fetched < actual_max_pages:
# 构建请求 URL
url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}"
f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
try:
# 使用重试机制
response = None
for attempt in range(max_retries):
try:
response = self.client.get(url)
break
except Exception as e:
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(random.uniform(3, 5))
else:
raise
if response.status_code == 400:
self.logger.warning(f"遇到 API 限制 (offset={offset}),停止获取")
break
response_data = response.json()
if not isinstance(response_data, dict) or "results" not in response_data:
self.logger.error(f"API 返回了意外的数据: {response_data}")
break
alphas = response_data["results"]
if not alphas:
break
fetched_alphas.extend(alphas)
total_accessed += len(alphas)
pages_fetched += 1
# 更新进度条
pbar.update(1)
pbar.set_postfix({
"Region": region,
"已获取": total_accessed,
"本页": len(alphas),
})
if len(alphas) < limit:
self.logger.info(f"本页数据不足 {limit} 条,已到达末尾")
break
offset += limit
# 添加延迟避免请求过快
time.sleep(0.5)
except Exception as e:
self.logger.error(f"请求失败: {e}")
break
pbar.close()
if not fetched_alphas:
self.logger.warning("未获取到任何 Alpha!")
return []
# 按 fitness 排序
df = pd.DataFrame(fetched_alphas)
df["temp_fitness"] = df.apply(
lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0,
axis=1
)
df_sorted = df.sort_values(by="temp_fitness", ascending=False)
df_sorted = df_sorted.drop("temp_fitness", axis=1)
output_path = self.base_path / output_file_name
if mode == "w":
df_sorted.to_csv(output_path, index=False)
elif mode == "a":
df_sorted.to_csv(output_path, mode="a", index=False, header=False)
self.logger.info(f"Alpha 列表已保存!共 {len(fetched_alphas)} 条记录!\n文件路径: {output_path}")
return fetched_alphas
def close(self):
"""关闭客户端"""
if self.client:
self.client.close()
# 使用示例
if __name__ == "__main__":
# 创建获取器实例(会自动登录)
fetcher = SimpleAlphaFetcher()
if fetcher.client:
# 获取 Alpha 列表
results = fetcher.fetch_alphas(
max_pages=100, # 最多爬取 100 页
limit=100, # 每页 100 条
region="USA", # 美股市场
universe="TOP3000", # TOP3000 股票池
output_file_name="alpha_list.csv" # 输出文件名
)
print(f"成功获取 {len(results) if results else 0} 个 Alpha")
# 关闭客户端
fetcher.close()

@ -0,0 +1,232 @@
#!/usr/bin/env python3
"""
CSV 导入 Alpha 数据到 SQLite修复去重版
"""
import sqlite3
import pandas as pd
import ast
import re
from pathlib import Path
# 路径配置
CSV_PATH = Path(__file__).parent / "alpha_list.csv"
SQLITE_PATH = Path(__file__).parent / "alpha_analysis.db"
# 84 个算子集合(同上,省略重复部分,你复制完整的)
OPERATORS_SET = {
'add', 'abs', 'log', 'subtract', 'signed_power', 'sign', 'reverse', 'power',
'multiply', 'min', 'max', 'inverse', 'sqrt', 's_log_1p', 'densify', 'divide',
'not', 'and', 'less', 'equal', 'or', 'not_equal', 'greater', 'greater_equal',
'less_equal', 'is_nan', 'if_else', 'ts_sum', 'ts_scale', 'ts_mean', 'ts_zscore',
'ts_std_dev', 'kth_element', 'inst_tvr', 'ts_corr', 'ts_count_nans',
'ts_target_tvr_decay', 'ts_median', 'ts_covariance', 'ts_decay_linear',
'ts_product', 'ts_regression', 'ts_delta_limit', 'ts_step', 'ts_decay_exp_window',
'ts_quantile', 'days_from_last_change', 'hump', 'last_diff_value', 'ts_arg_max',
'ts_arg_min', 'ts_av_diff', 'ts_backfill', 'ts_rank', 'ts_delay', 'ts_delta',
'winsorize', 'truncate', 'regression_neut', 'scale', 'rank', 'quantile',
'normalize', 'zscore', 'vec_min', 'vec_count', 'vec_stddev', 'vec_range',
'vec_avg', 'vec_sum', 'vec_max', 'left_tail', 'trade_when', 'right_tail',
'bucket', 'group_rank', 'group_cartesian_product', 'group_backfill', 'group_mean',
'group_neutralize', 'group_normalize', 'group_median', 'group_scale', 'group_zscore'
}
WINDOWS = {1, 2, 5, 10, 20, 30, 60, 90, 252}
def safe_parse_dict(s):
"""安全解析 Python 字典字符串"""
if pd.isna(s) or s == '':
return {}
try:
return ast.literal_eval(s)
except:
return {}
def extract_features(expression):
"""从表达式提取特征"""
if not expression:
return [], [], []
operators = set()
for match in re.finditer(r'\b([a-z_][a-z0-9_]*)\s*\(', expression):
op = match.group(1)
if op in OPERATORS_SET:
operators.add(op)
candidates = set(re.findall(r'\b([a-z][a-z0-9_]*)\b', expression))
fields = set()
for c in candidates:
if (c not in OPERATORS_SET and
not c.isdigit() and
len(c) > 2 and
c not in ['true', 'false', 'nan', 'null', 'constant', 'filter']):
fields.add(c)
windows = set()
for match in re.finditer(r'\b(\d+)\b', expression):
num = int(match.group(1))
if num in WINDOWS:
windows.add(num)
return list(operators), list(fields), list(windows)
def main():
print("=" * 60)
print("开始导入 CSV 到 SQLite")
print("=" * 60)
# 1. 读取 CSV
print("\n📖 读取 CSV...")
df = pd.read_csv(CSV_PATH)
print(f"{len(df):,} 条记录")
# 2. 解析字段
print("\n🔍 解析字段...")
df['regular_dict'] = df['regular'].apply(safe_parse_dict)
df['expression'] = df['regular_dict'].apply(lambda x: x.get('code', ''))
df['settings_dict'] = df['settings'].apply(safe_parse_dict)
df['neutralization'] = df['settings_dict'].apply(lambda x: x.get('neutralization'))
df['universe'] = df['settings_dict'].apply(lambda x: x.get('universe'))
df['is_dict'] = df['is'].apply(safe_parse_dict)
df['sharpe'] = df['is_dict'].apply(lambda x: x.get('sharpe'))
df['fitness'] = df['is_dict'].apply(lambda x: x.get('fitness'))
df['drawdown'] = df['is_dict'].apply(lambda x: x.get('drawdown'))
df['turnover'] = df['is_dict'].apply(lambda x: x.get('turnover'))
# 过滤有效数据
df_valid = df[df['expression'].notna() & (df['expression'] != '') & df['fitness'].notna()].copy()
print(f" 有效记录: {len(df_valid):,}")
# 3. 去重:同一个 alpha_id 保留 fitness 最高的
print("\n🔄 去重(按 alpha_id 保留最高 fitness)...")
df_unique = df_valid.sort_values('fitness', ascending=False).drop_duplicates(subset=['id'], keep='first')
print(f" 去重后: {len(df_unique):,}")
# 4. 连接 SQLite
conn = sqlite3.connect(str(SQLITE_PATH))
cursor = conn.cursor()
# 清空旧数据
print("\n🗑 清空旧数据...")
cursor.execute("DELETE FROM alpha_feature_long")
cursor.execute("DELETE FROM alpha_success")
conn.commit()
# 5. 写入 alpha_success
print("\n💾 写入 alpha_success 表...")
for idx, row in df_unique.iterrows():
try:
cursor.execute("""
INSERT INTO alpha_success
(alpha_id, expression, sharpe, fitness, drawdown, turnover, neutralization, universe)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
row['id'],
row['expression'],
row['sharpe'],
row['fitness'],
row['drawdown'],
row['turnover'],
row['neutralization'],
row['universe']
))
except sqlite3.IntegrityError:
# 如果还有重复,跳过(理论上不应该)
print(f" 跳过重复: {row['id']}")
continue
conn.commit()
# 获取插入后的 id 映射
cursor.execute("SELECT alpha_id, rowid FROM alpha_success")
id_map = {row[0]: row[1] for row in cursor.fetchall()}
print(f" 已写入 {len(id_map):,}")
# 6. 提取特征并写入 alpha_feature_long
print("\n🔧 提取特征并写入 alpha_feature_long...")
feature_count = 0
processed = 0
for idx, row in df_unique.iterrows():
alpha_id = row['id']
if alpha_id not in id_map:
continue
sqlite_id = id_map[alpha_id]
expression = row['expression']
operators, fields, windows = extract_features(expression)
# 插入算子
for op in operators:
cursor.execute(
"INSERT INTO alpha_feature_long (alpha_id, feature_type, feature_name) VALUES (?, ?, ?)",
(sqlite_id, 'operator', op)
)
feature_count += 1
# 插入字段
for field in fields:
cursor.execute(
"INSERT INTO alpha_feature_long (alpha_id, feature_type, feature_name) VALUES (?, ?, ?)",
(sqlite_id, 'field', field)
)
feature_count += 1
# 插入窗口
for w in windows:
cursor.execute(
"INSERT INTO alpha_feature_long (alpha_id, feature_type, feature_name, feature_value) VALUES (?, ?, ?, ?)",
(sqlite_id, 'window', str(w), w)
)
feature_count += 1
# 插入中性化方式
if pd.notna(row['neutralization']):
cursor.execute(
"INSERT INTO alpha_feature_long (alpha_id, feature_type, feature_name) VALUES (?, ?, ?)",
(sqlite_id, 'neutralization', row['neutralization'])
)
feature_count += 1
processed += 1
if processed % 1000 == 0:
print(f" 已处理 {processed:,} 条,特征数 {feature_count:,}")
conn.commit() # 每 1000 条提交一次
conn.commit()
# 7. 最终统计
cursor.execute("SELECT COUNT(*) FROM alpha_success")
success_count = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM alpha_feature_long")
final_feature_count = cursor.fetchone()[0]
print(f"\n✅ 导入完成!")
print(f" alpha_success: {success_count:,}")
print(f" alpha_feature_long: {final_feature_count:,} 条特征")
# 8. 验证一条数据
print("\n🔍 验证前 10 条特征:")
cursor.execute("""
SELECT a.alpha_id, f.feature_type, f.feature_name
FROM alpha_success a
JOIN alpha_feature_long f ON a.rowid = f.alpha_id
LIMIT 10
""")
for row in cursor.fetchall():
print(f" {row[0]} | {row[1]}: {row[2]}")
conn.close()
if __name__ == "__main__":
main()

@ -0,0 +1,20 @@
import pandas as pd
import json
import ast # Python 的 ast.literal_eval 可以安全解析 Python 字典字符串
df = pd.read_csv("alpha_list.csv", nrows=5)
# 方法1:用 ast.literal_eval(推荐,能解析 Python 字面量)
for i, row in df.iterrows():
regular_str = row['regular']
if pd.notna(regular_str):
try:
regular_dict = ast.literal_eval(regular_str)
code = regular_dict.get('code', '')
print(f"Row {i}: {code[:80]}...")
except Exception as e:
print(f"Row {i}: 解析失败 - {e}")
# 方法2:看看原始字符串前200个字符
print("\n原始字符串示例:")
print(df.iloc[0]['regular'][:200])

@ -0,0 +1,20 @@
import re
expression = "last_diff_value(ts_sum(subtract(implied_volatility_call_120, implied_volatility_put_90), 20), 5)"
# 提取算子
operators = set()
for match in re.finditer(r'\b([a-z_][a-z0-9_]*)\s*\(', expression):
op = match.group(1)
operators.add(op)
print(f"匹配到算子: {op}")
print(f"\n所有算子: {operators}")
# 提取字段
candidates = set(re.findall(r'\b([a-z][a-z0-9_]*)\b', expression))
print(f"\n所有候选词: {candidates}")
# 过滤掉算子
fields = [c for c in candidates if c not in operators and not c.isdigit() and len(c) > 2]
print(f"\n字段: {fields}")

@ -0,0 +1,19 @@
import pandas as pd
df = pd.read_csv("alpha_list.csv")
# 检查 id 列的类型
print(f"id 列类型: {df['id'].dtype}")
# 检查是否有重复
duplicates = df[df.duplicated(subset=['id'], keep=False)]
print(f"\n重复的 id 数量: {len(duplicates)}")
if len(duplicates) > 0:
print("\n重复的 id 示例:")
for aid in duplicates['id'].unique()[:5]:
rows = df[df['id'] == aid]
print(f" {aid}: 出现 {len(rows)}")
# 打印这些行的 fitness 看是否相同
for _, row in rows.iterrows():
print(f" fitness: {row['is'][:100] if isinstance(row['is'], str) else row['is']}...")

@ -0,0 +1,381 @@
import httpx
from httpx import BasicAuth, Timeout
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import logging
from typing import List, Optional
import time
class AlphaManager:
def __init__(self, credentials_file='account.txt', base_path=None):
"""
初始化Alpha管理器
Args:
credentials_file: 账号文件路径备用
base_path: 输出文件保存路径
"""
self.client = None
self.base_path = Path(base_path) if base_path else Path.cwd()
self.logger = self._setup_logger()
# 登录
self.login(credentials_file)
def _setup_logger(self):
"""设置日志记录器"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def login(self, credentials_file='account.txt'):
"""登录WorldQuant Brain API"""
try:
# 从nacos获取账号密码
with httpx.Client(timeout=10.0) as temp_client:
nacos_resp = temp_client.get(
'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
)
if nacos_resp.status_code != 200:
self.logger.error('获取账号密码失败')
return False
config = nacos_resp.json()
username = config.get('user_name')
password = config.get('password')
if not username or not password:
self.logger.error('账号密码不完整')
return False
self.logger.info(f"正在登录账户: {username}")
# 创建客户端并设置超时(关键修复)
# 设置更长的超时时间:连接30秒,读取60秒
timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
self.client = httpx.Client(
auth=BasicAuth(username, password),
timeout=timeout
)
# 发送登录请求
response = self.client.post('https://api.worldquantbrain.com/authentication')
if response.status_code == 201:
self.logger.info("登录成功!")
return True
else:
self.logger.error(f"登录失败: {response.status_code} - {response.text}")
self.client.close()
self.client = None
return False
except Exception as e:
self.logger.error(f"登录异常: {e}")
return False
def update_alpha_color(self, alpha_id: str, color: str) -> bool:
"""标记Alpha颜色"""
if not self.client:
self.logger.error("客户端未登录")
return False
try:
update_data = {"color": color}
response = self.client.patch(
f"https://api.worldquantbrain.com/alphas/{alpha_id}",
json=update_data
)
return response.status_code == 200
except Exception as e:
self.logger.error(f"标记颜色失败: {e}")
return False
def wechat_check_corr_message(self, message: str):
"""微信通知(可选功能)"""
self.logger.info(f"通知消息: {message}")
def _make_request_with_retry(self, url: str, max_retries: int = 3, retry_delay: float = 2.0):
"""
带重试机制的请求
Args:
url: 请求URL
max_retries: 最大重试次数
retry_delay: 重试延迟
Returns:
httpx.Response None
"""
for attempt in range(max_retries):
try:
response = self.client.get(url)
return response
except Exception as e:
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(retry_delay)
else:
self.logger.error(f"请求最终失败: {url}")
raise
return None
def get_alphas(
self,
total_alphas: int = 5,
limit: int = 100,
delay: int = 1,
region: str = "USA",
universe: str = "TOP3000",
required_fields: Optional[List[str]] = None,
match_mode: str = "all",
min_sharpe: Optional[float] = None,
min_fitness: Optional[float] = None,
hidden: str = "false",
submittable: bool = False,
auto_color: bool = False,
color: str = "GREEN",
output_file_name: str = "alpha_search_list.csv",
mode: str = "w",
max_retries: int = 3,
):
"""
搜索Alpha并筛选
Args:
total_alphas: 最多获取多少个Alpha
limit: 每次API请求获取的数量
delay: API请求的延迟设置
region: 市场区域 "USA", "CHINA"
universe: 股票池 "TOP3000"
required_fields: 关键词列表 ['put', 'call']
match_mode: 关键词匹配模式"all"(全匹配) "any"(任一匹配)
min_sharpe: 最小夏普比率阈值
min_fitness: 最小适应度阈值
hidden: 是否搜索已隐藏的Alpha ("true"/"false")
submittable: 是否只筛选可提交无FAIL检查项的Alpha
auto_color: 是否自动给符合条件的Alpha标记颜色
color: 标记的颜色
output_file_name: 输出的CSV文件名
mode: 写入模式"w"(覆盖写入) "a"(追加写入)
max_retries: 请求失败时的最大重试次数
Returns:
list: 符合条件的Alpha列表
"""
if not self.client:
self.logger.error("客户端未登录,无法执行搜索")
return []
# 验证颜色参数
valid_colors = [None, "GREEN", "YELLOW", "RED", "BLUE", "PURPLE", "ORANGE"]
if color not in valid_colors:
raise ValueError(f"颜色必须是以下之一: {valid_colors}")
fetched_alphas = []
offset = 0
total_accessed = 0
colored_count = 0
# 先获取总数(带重试)
count_url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}"
f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
total_available = 0
for attempt in range(max_retries):
try:
count_response = self.client.get(count_url)
total_available = count_response.json()["count"]
break
except Exception as e:
self.logger.warning(f"获取Alpha总数失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(2.0)
else:
self.logger.error("获取Alpha总数最终失败")
return []
if total_available == 0:
self.logger.warning("未找到任何Alpha")
return []
self.logger.info(f"共找到 {total_available} 个Alpha,开始筛选...")
pbar = tqdm(total=min(total_available, 10000), desc="扫描Alpha", unit="")
while len(fetched_alphas) < total_alphas and offset < total_available:
# 构建请求URL
url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}"
f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
try:
# 使用重试机制
response = None
for attempt in range(max_retries):
try:
response = self.client.get(url)
break
except Exception as e:
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(2.0)
else:
raise
if response.status_code == 400:
self.logger.warning(f"遇到API限制 (offset={offset}),停止获取更多数据")
break
response_data = response.json()
if not isinstance(response_data, dict) or "results" not in response_data:
self.logger.error(f"API返回了意外的数据: {response_data}")
break
alphas = response_data["results"]
if not alphas:
break
total_accessed += len(alphas)
# 关键词筛选
if required_fields:
if match_mode == "all":
filtered_alphas = [
alpha for alpha in alphas
if all(field in alpha["regular"]["code"] for field in required_fields)
]
elif match_mode == "any":
filtered_alphas = [
alpha for alpha in alphas
if any(field in alpha["regular"]["code"] for field in required_fields)
]
else:
raise ValueError("match_mode 必须是 'all''any'")
else:
filtered_alphas = alphas
# 进一步筛选夏普比率和fitness
final_filtered = []
for alpha in filtered_alphas:
sharpe = alpha.get("is", {}).get("sharpe", 0)
fitness = alpha.get("is", {}).get("fitness", 0)
sharpe_ok = (min_sharpe is None) or (sharpe is not None and abs(sharpe) >= min_sharpe)
fitness_ok = (min_fitness is None) or (fitness is not None and abs(fitness) >= min_fitness)
if sharpe_ok and fitness_ok:
if submittable:
checks = alpha.get("is", {}).get("checks", [])
fail_count = sum(1 for check in checks if check.get("result") == "FAIL")
if fail_count == 0 and auto_color:
alpha_id = alpha.get("id")
if alpha_id:
success = self.update_alpha_color(alpha_id, color)
if success:
colored_count += 1
if fail_count == 0:
final_filtered.append(alpha)
else:
final_filtered.append(alpha)
fetched_alphas.extend(final_filtered)
# 更新进度条
pbar.update(len(alphas))
pbar.set_postfix({
"Region": region,
"已扫描": total_accessed,
"找到": len(fetched_alphas),
"本次": len(final_filtered),
"标记": colored_count,
})
if len(alphas) < limit:
break
offset += limit
# 添加延迟避免请求过快
time.sleep(0.5)
except Exception as e:
self.logger.error(f"请求失败: {e}")
break
pbar.close()
# 输出标记统计
if auto_color and colored_count > 0:
self.logger.info(f"共标记了 {colored_count} 个Alpha颜色为{color}")
self.wechat_check_corr_message(f"共标记了 {colored_count} 个Alpha颜色为{color}")
alpha_list = fetched_alphas[:total_alphas]
if not alpha_list:
self.logger.warning("未找到任何符合条件的Alpha!请检查筛选条件是否过于严格。")
else:
df = pd.DataFrame(alpha_list)
df["temp_fitness"] = df.apply(
lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0,
axis=1
)
df_sorted = df.sort_values(by="temp_fitness", ascending=False)
df_sorted = df_sorted.drop("temp_fitness", axis=1)
output_path = self.base_path / output_file_name
if mode == "w":
df_sorted.to_csv(output_path, index=False)
elif mode == "a":
df_sorted.to_csv(output_path, mode="a", index=False, header=False)
self.logger.info(f"批量回测初步检测结果已经下载!共{len(alpha_list)}条记录!\n{output_path}文件名保存!")
return alpha_list
def close(self):
"""关闭客户端"""
if self.client:
self.client.close()
# 使用示例
if __name__ == "__main__":
# 创建管理器实例(会自动登录)
manager = AlphaManager()
if manager.client:
# 搜索包含"put"关键词的可提交Alpha
results = manager.get_alphas(
total_alphas=10,
region="USA",
universe="TOP3000",
required_fields=['put'],
match_mode="all",
submittable=True,
auto_color=True,
color="GREEN",
output_file_name="my_alpha_list.csv"
)
print(f"找到 {len(results) if results else 0} 个符合条件的Alpha")
# 关闭客户端
manager.close()

@ -1,2 +0,0 @@
group_neutralize(rank(multiply(ts_delta(vwap, 3), ts_rank(divide(volume, ts_mean(volume, 10)), 20))), sector)
rank(ts_corr(divide(close, ts_delay(close, 5)), divide(volume, ts_delay(volume, 5)), 15)) - rank(ts_std_dev(close, 10))

@ -1,35 +0,0 @@
作为WorldQuant因子挖掘专家,请基于以下多维度框架生成20个原创alpha因子。请严格遵循WebSim语法规范:
因子设计维度(也可以你自己设计维度):
价格趋势 - 动量/均值回归/突破策略
量价关系 - 成交量确认/量价背离
波动特征 - 波动率变化/波动聚集性
横截面特征 - 行业中性/市值因子
技术形态 - 价格位置/高低点突破
流动性特征 - 成交量分布/VWAP关系
约束条件:
仅使用:使用我提供的操作符
可用字段:open, high, low, close, volume, vwap, sector, country, market_cap
禁用:pandas, numpy, 自定义函数,机器学习
每个表达式必须独立有效且可直接在WebSim运行
输出要求:
严格按以下格式,一行一个WebSim表达式,不包含任何解释:
[表达式1]
[表达式2]
...
[表达式20]
注意: 确保因子逻辑清晰,避免过度拟合,保持表达式简洁高效。优先考虑具有经济直觉的因子组合。

@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""
Alpha 特征分析数据库 - 长表方案
适用场景95,003 条表达式6,488 个字段84 个算子
"""
import sqlite3
import os
import re
from pathlib import Path
DB_PATH = Path(__file__).parent / "alpha_analysis.db"
# 84 个算子完整列表
OPERATORS = [
# Arithmetic (16)
'add', 'abs', 'log', 'subtract', 'signed_power', 'sign', 'reverse', 'power',
'multiply', 'min', 'max', 'inverse', 'sqrt', 's_log_1p', 'densify', 'divide',
# Logical (12)
'not', 'and', 'less', 'equal', 'or', 'not_equal', 'greater', 'greater_equal',
'less_equal', 'is_nan', 'if_else',
# Time Series (29)
'ts_sum', 'ts_scale', 'ts_mean', 'ts_zscore', 'ts_std_dev', 'kth_element',
'inst_tvr', 'ts_corr', 'ts_count_nans', 'ts_target_tvr_decay', 'ts_median',
'ts_covariance', 'ts_decay_linear', 'ts_product', 'ts_regression', 'ts_delta_limit',
'ts_step', 'ts_decay_exp_window', 'ts_quantile', 'days_from_last_change', 'hump',
'last_diff_value', 'ts_arg_max', 'ts_arg_min', 'ts_av_diff', 'ts_backfill',
'ts_rank', 'ts_delay', 'ts_delta',
# Cross Sectional (8)
'winsorize', 'truncate', 'regression_neut', 'scale', 'rank', 'quantile',
'normalize', 'zscore',
# Vector (7)
'vec_min', 'vec_count', 'vec_stddev', 'vec_range', 'vec_avg', 'vec_sum', 'vec_max',
# Transformational (4)
'left_tail', 'trade_when', 'right_tail', 'bucket',
# Group (8)
'group_rank', 'group_cartesian_product', 'group_backfill', 'group_mean',
'group_neutralize', 'group_normalize', 'group_median', 'group_scale', 'group_zscore'
]
# 常用窗口参数
WINDOWS = [1, 2, 5, 10, 20, 30, 60, 90, 252]
# 中性化方式
NEUTRALIZATIONS = ['FAST', 'SUBINDUSTRY', 'INDUSTRY', 'NONE']
def get_connection():
"""获取数据库连接"""
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
return conn
def create_tables(conn):
"""创建所有表"""
cursor = conn.cursor()
# =========================================================
# 表1: alpha_success(成功样本 - 主表)
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS alpha_success (
id INTEGER PRIMARY KEY AUTOINCREMENT,
alpha_id VARCHAR(64) UNIQUE,
expression TEXT NOT NULL,
sharpe REAL,
fitness REAL,
returns REAL,
drawdown REAL,
turnover REAL,
long_count INTEGER,
short_count INTEGER,
neutralization VARCHAR(32),
universe VARCHAR(32),
dataset VARCHAR(32),
score REAL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# 索引
cursor.execute("CREATE INDEX IF NOT EXISTS idx_success_score ON alpha_success(score)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_success_sharpe ON alpha_success(sharpe)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_success_neutralization ON alpha_success(neutralization)")
# =========================================================
# 表2: alpha_feature_long(特征长表 - 核心)
# 每个特征一行:算子、字段、窗口、配置
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS alpha_feature_long (
id INTEGER PRIMARY KEY AUTOINCREMENT,
alpha_id INTEGER NOT NULL,
feature_type VARCHAR(32) NOT NULL, -- operator, field, window, neutralization
feature_name VARCHAR(128) NOT NULL,
feature_value INTEGER DEFAULT 1, -- 1=存在窗口值存具体数字
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (alpha_id) REFERENCES alpha_success(id) ON DELETE CASCADE
)
""")
# 索引(加速统计查询)
cursor.execute("CREATE INDEX IF NOT EXISTS idx_feature_type_name ON alpha_feature_long(feature_type, feature_name)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_feature_alpha_id ON alpha_feature_long(alpha_id)")
# =========================================================
# 表3: feature_statistics(物化统计表 - 反馈核心)
# 存储每个特征的增益值,供生成器使用
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS feature_statistics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
feature_type VARCHAR(32) NOT NULL,
feature_name VARCHAR(128) NOT NULL,
gain_value REAL, -- 增益值 = 特征平均分 - 全局平均分
avg_score REAL, -- 包含该特征的表达式平均分
global_avg_score REAL, -- 统计时的全局平均分
sample_count INTEGER, -- 包含该特征的表达式数量
positive_effect INTEGER DEFAULT 0, -- 1=正向特征(gain>0.05), -1=负向特征(gain<-0.05)
is_active INTEGER DEFAULT 1, -- 是否启用反馈
calculated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(feature_type, feature_name)
)
""")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_stats_positive ON feature_statistics(positive_effect)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_stats_gain ON feature_statistics(gain_value DESC)")
# =========================================================
# 表4: alpha_failure(失败样本 - 不兼容组合)
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS alpha_failure (
id INTEGER PRIMARY KEY AUTOINCREMENT,
operator VARCHAR(64) NOT NULL,
field VARCHAR(64) NOT NULL,
expression TEXT,
error_type VARCHAR(32),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_failure_combo ON alpha_failure(operator, field)")
# =========================================================
# 表5: operator_field_blacklist(黑名单 - 从不兼容统计生成)
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS operator_field_blacklist (
id INTEGER PRIMARY KEY AUTOINCREMENT,
operator VARCHAR(64) NOT NULL,
field VARCHAR(64) NOT NULL,
fail_count INTEGER DEFAULT 0,
total_attempts INTEGER DEFAULT 0,
fail_rate REAL DEFAULT 0.0,
is_active INTEGER DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(operator, field)
)
""")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_blacklist_active ON operator_field_blacklist(is_active)")
# =========================================================
# 表6: generation_bias(生成偏向配置 - 反馈到生成器)
# =========================================================
cursor.execute("""
CREATE TABLE IF NOT EXISTS generation_bias (
id INTEGER PRIMARY KEY AUTOINCREMENT,
feature_type VARCHAR(32) NOT NULL,
feature_name VARCHAR(128) NOT NULL,
bias_weight REAL DEFAULT 1.0, -- >1 正向<1 负向1 中性
gain_value REAL,
sample_count INTEGER,
is_active INTEGER DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(feature_type, feature_name)
)
""")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_bias_weight ON generation_bias(bias_weight DESC)")
conn.commit()
print("✅ 所有表创建成功")
print(f"📁 数据库路径: {DB_PATH}")
print(f"\n📊 表结构:")
print(f" - alpha_success: 主表,存储表达式和回测结果")
print(f" - alpha_feature_long: 长表,每个特征一行")
print(f" - feature_statistics: 物化统计,特征增益值")
print(f" - alpha_failure: 失败样本记录")
print(f" - operator_field_blacklist: 不兼容黑名单")
print(f" - generation_bias: 生成偏向配置")
def show_table_info(conn):
"""显示各表记录数"""
cursor = conn.cursor()
tables = ['alpha_success', 'alpha_feature_long', 'feature_statistics',
'alpha_failure', 'operator_field_blacklist', 'generation_bias']
print("\n📋 当前表记录数:")
for table in tables:
cursor.execute(f"SELECT COUNT(*) FROM {table}")
count = cursor.fetchone()[0]
print(f" - {table}: {count:,}")
if __name__ == "__main__":
# 检查数据库是否存在
if DB_PATH.exists():
print(f" 数据库已存在: {DB_PATH}")
response = input("是否删除并重建?(y/N): ").strip().lower()
if response == 'y':
os.remove(DB_PATH)
print("🗑 已删除旧数据库")
else:
print("❌ 已取消操作")
exit(0)
# 创建数据库和表
conn = get_connection()
try:
create_tables(conn)
show_table_info(conn)
finally:
conn.close()
print("\n✅ 初始化完成!下一步:导入数据并提取特征")

@ -1,148 +0,0 @@
# -*- coding: utf-8 -*-
import os
import psycopg2
class AlphaTxtImporter:
def __init__(self, txt_file='alpha.txt'):
self.database_name = "alpha"
self.txt_file = txt_file
self.connection = None
def get_connection(self):
"""获取数据库连接"""
if self.connection is None or self.connection.closed:
self.connection = psycopg2.connect(
host="localhost",
port="5432",
database=self.database_name,
user="jack",
password="aaaAAA111"
)
return self.connection
def close_connection(self):
"""关闭数据库连接"""
if self.connection and not self.connection.closed:
self.connection.close()
def load_alpha_from_txt(self):
"""从txt文件加载alpha表达式"""
if not os.path.exists(self.txt_file):
print(f"未找到 {self.txt_file} 文件")
return []
alpha_list = []
with open(self.txt_file, 'r', encoding='utf-8') as file:
for line_num, line in enumerate(file, 1):
line = line.strip()
if line and not line.startswith('#'): # 跳过空行和注释行
alpha_list.append(line)
print(f"{line_num} 行: {line}")
return alpha_list
def import_to_database(self, alpha_list):
"""导入alpha表达式到数据库"""
if not alpha_list:
print("没有要导入的alpha表达式")
return 0
conn = self.get_connection()
cursor = conn.cursor()
imported_count = 0
duplicate_count = 0
for alpha in alpha_list:
try:
# 使用 ON CONFLICT 避免重复插入
cursor.execute('''
INSERT INTO alpha_prepare (alpha, unused)
VALUES (%s, %s) ON CONFLICT (alpha) DO NOTHING
''', (alpha, True))
if cursor.rowcount > 0:
imported_count += 1
print(f"✅ 导入: {alpha}")
else:
duplicate_count += 1
print(f" 跳过(重复): {alpha}")
except Exception as e:
print(f"❌ 导入失败: {alpha} - 错误: {e}")
continue
conn.commit()
cursor.close()
return imported_count, duplicate_count
def check_existing_data(self):
"""检查数据库中已存在的数据"""
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM alpha_prepare")
total_count = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM alpha_prepare WHERE unused = TRUE")
unused_count = cursor.fetchone()[0]
cursor.close()
print(f"数据库统计:")
print(f" - 总alpha数量: {total_count}")
print(f" - 未使用的alpha数量: {unused_count}")
return total_count, unused_count
def run_import(self):
"""执行导入过程"""
print(f"开始从 {self.txt_file} 导入alpha表达式到 PostgreSQL...")
# 检查现有数据
print("\n=== 数据库当前状态 ===")
total_count, unused_count = self.check_existing_data()
# 加载txt文件
print(f"\n=== 加载 {self.txt_file} ===")
alpha_list = self.load_alpha_from_txt()
print(f"从文件加载了 {len(alpha_list)} 个alpha表达式")
if not alpha_list:
print("没有找到有效的alpha表达式,导入终止")
return
# 导入到数据库
print(f"\n=== 导入到数据库 ===")
imported_count, duplicate_count = self.import_to_database(alpha_list)
# 显示结果
print(f"\n=== 导入结果 ===")
print(f"文件中的表达式数量: {len(alpha_list)}")
print(f"成功导入: {imported_count}")
print(f"跳过重复: {duplicate_count}")
# 再次检查数据库状态
print(f"\n=== 导入后数据库状态 ===")
new_total_count, new_unused_count = self.check_existing_data()
print(f"新增记录: {new_total_count - total_count}")
print(f"新增未使用记录: {new_unused_count - unused_count}")
print("\n导入完成!")
def main():
# 可以指定不同的txt文件
txt_file = 'alpha.txt' # 可以改为其他文件名
importer = AlphaTxtImporter(txt_file)
try:
importer.run_import()
finally:
importer.close_connection()
if __name__ == "__main__":
main()

@ -0,0 +1,299 @@
import httpx
from httpx import BasicAuth
import os
import json
import time
from datetime import datetime
def login(credentials_file='account.txt'):
"""登录WorldQuant Brain API"""
# 读取本地账号密码
if not os.path.exists(credentials_file):
print("未找到 account.txt 文件")
with open(credentials_file, 'w') as f: f.write("")
print("account.txt 文件已创建,请填写账号密码, 格式: ['username', 'password']")
exit(1)
with open(credentials_file) as f:
credentials = eval(f.read())
username, password = credentials[0], credentials[1]
print(f"正在登录账户: {username}")
# 创建客户端并认证
client = httpx.Client(auth=BasicAuth(username, password))
# 发送登录请求
response = client.post('https://api.worldquantbrain.com/authentication')
print(f"登录状态: {response.status_code}")
if response.status_code == 201:
print("登录成功!")
return client
else:
print(f"登录失败: {response.json()}")
client.close()
return None
def get_alphas_data(client, page_size=10):
"""获取所有alpha数据(自动处理分页)"""
all_alphas = []
offset = 0
total_count = 0
while True:
print(f"正在获取第 {offset//page_size + 1} 页数据 (offset={offset})...")
# 修复请求参数 - 使用正确的格式
params = {
'limit': page_size,
'offset': offset,
'order': '-dateSubmitted',
'is.sharpe>1': '',
'hidden': 'false'
}
# 可选:添加状态过滤(如果需要)
# params['status'] = 'UNSUBMITTED'
url = 'https://api.worldquantbrain.com/users/self/alphas'
try:
print(f"请求参数: {params}")
response = client.get(url, params=params)
print(f"获取数据状态码: {response.status_code}")
if response.status_code != 200:
print(f"获取数据失败: {response.text}")
break
data = response.json()
# 第一次请求时获取总数量
if offset == 0:
total_count = data.get('count', 0)
print(f"总数据量: {total_count}")
if total_count == 0:
print("没有Alpha因子数据")
break
# 添加当前页的数据
if 'alphas' in data:
current_page_alphas = data['alphas']
all_alphas.extend(current_page_alphas)
print(f"当前页获取到 {len(current_page_alphas)} 条alpha数据")
else:
print("响应中没有'alphas'字段")
print(f"响应数据结构: {data.keys()}")
# 检查是否还有更多数据
current_count = offset + len(data.get('alphas', []))
print(f"进度: {current_count}/{total_count}")
# 如果已经获取了所有数据,则退出循环
if current_count >= total_count:
print("所有数据已获取完毕")
break
# 更新offset继续获取下一页
offset += page_size
# 添加一个小延迟,避免请求过快
time.sleep(1)
except Exception as e:
print(f"请求发生错误: {e}")
break
return {
'count': len(all_alphas),
'alphas': all_alphas
}
def extract_alpha_info(alpha_data):
"""
从alpha数据中提取有用信息
"""
extracted_data = []
# 检查数据结构
if not alpha_data or 'results' not in alpha_data:
print("无效的数据格式")
return extracted_data
for alpha in alpha_data['results']:
# 提取基本信息
info = {
# 基础信息
'id': alpha.get('id', 'N/A'),
'name': alpha.get('name', 'N/A'),
'author': alpha.get('author', 'N/A'),
'status': alpha.get('status', 'N/A'),
'grade': alpha.get('grade', 'N/A'),
'stage': alpha.get('stage', 'N/A'),
'date_created': alpha.get('dateCreated', 'N/A'),
'date_submitted': alpha.get('dateSubmitted', 'N/A'),
# 因子代码和设置
'code': alpha.get('regular', {}).get('code', 'N/A'),
'operator_count': alpha.get('regular', {}).get('operatorCount', 0),
# 回测设置
'settings': {
'region': alpha.get('settings', {}).get('region', 'N/A'),
'universe': alpha.get('settings', {}).get('universe', 'N/A'),
'neutralization': alpha.get('settings', {}).get('neutralization', 'N/A'),
'truncation': alpha.get('settings', {}).get('truncation', 'N/A'),
'start_date': alpha.get('settings', {}).get('startDate', 'N/A'),
'end_date': alpha.get('settings', {}).get('endDate', 'N/A')
},
# 回测表现指标
'performance': {
'sharpe': alpha.get('is', {}).get('sharpe', 0),
'fitness': alpha.get('is', {}).get('fitness', 0),
'returns': alpha.get('is', {}).get('returns', 0),
'turnover': alpha.get('is', {}).get('turnover', 0),
'drawdown': alpha.get('is', {}).get('drawdown', 0),
'pnl': alpha.get('is', {}).get('pnl', 0),
'book_size': alpha.get('is', {}).get('bookSize', 0),
'long_count': alpha.get('is', {}).get('longCount', 0),
'short_count': alpha.get('is', {}).get('shortCount', 0),
'margin': alpha.get('is', {}).get('margin', 0)
},
# 检查结果
'checks': {}
}
# 提取检查结果
checks = alpha.get('is', {}).get('checks', [])
for check in checks:
check_name = check.get('name', 'N/A')
info['checks'][check_name] = {
'result': check.get('result', 'N/A'),
'limit': check.get('limit', 'N/A'),
'value': check.get('value', 'N/A')
}
# 分类信息
classifications = alpha.get('classifications', [])
if classifications:
info['classifications'] = [cls.get('name', 'N/A') for cls in classifications]
else:
info['classifications'] = []
extracted_data.append(info)
return extracted_data
def print_alpha_summary(extracted_data):
"""
打印提取的alpha信息摘要
"""
if len(extracted_data) == 0:
print("\n没有提取到任何Alpha因子数据")
return
print(f"\n总共提取了 {len(extracted_data)} 个Alpha因子")
print("=" * 100)
for i, alpha in enumerate(extracted_data, 1):
print(f"\n{i}. Alpha ID: {alpha['id']}")
print(f" 代码: {alpha['code'][:80]}{'...' if len(alpha['code']) > 80 else ''}")
print(f" 状态: {alpha['status']} | 等级: {alpha['grade']} | 阶段: {alpha['stage']}")
print(f" 创建时间: {alpha['date_created']}")
print(f" 设置: {alpha['settings']['region']} | {alpha['settings']['universe']} | "
f"中性化: {alpha['settings']['neutralization']}")
print(f" 表现: Sharpe={alpha['performance']['sharpe']:.2f} | "
f"Fitness={alpha['performance']['fitness']:.2f} | "
f"收益={alpha['performance']['returns']:.2%} | "
f"换手率={alpha['performance']['turnover']:.2f}")
# 显示关键检查结果
key_checks = ['LOW_SHARPE', 'LOW_FITNESS', 'LOW_TURNOVER', 'HIGH_TURNOVER']
check_results = []
for check_name in key_checks:
if check_name in alpha['checks']:
check = alpha['checks'][check_name]
result_symbol = "" if check['result'] == 'PASS' else ""
check_results.append(f"{check_name}: {result_symbol}")
if check_results:
print(f" 检查: {', '.join(check_results)}")
def save_to_json(data, filename=None):
"""
将数据保存到JSON文件文件名默认为当前时间戳
如果数据为空则不保存
"""
# 检查数据是否为空
if data.get('total_count', 0) == 0:
print("\n没有获取到任何数据,跳过保存文件")
return False
if filename is None:
# 生成时间戳文件名:alphas_YYYYMMDD_HHMMSS.json
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"alphas_{timestamp}.json"
try:
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"\n数据已保存到文件: {filename}")
print(f"文件路径: {os.path.abspath(filename)}")
return True
except Exception as e:
print(f"保存文件失败: {e}")
return False
# 使用示例
def process_alpha_data(raw_data):
"""
处理原始alpha数据的完整流程
"""
# 提取信息
extracted_data = extract_alpha_info(raw_data)
# 打印摘要
print_alpha_summary(extracted_data)
return extracted_data
def main():
# 登录
client = login()
if not client:
return
try:
# 一次性获取所有alpha数据
all_data = get_alphas_data(client, page_size=10)
print(f"\n最终结果: 总共获取到 {all_data['count']} 条alpha数据")
if all_data and 'alphas' in all_data and all_data['count'] > 0:
# 处理提取信息
processed_data = process_alpha_data({'results': all_data['alphas']})
# 保存原始数据和提取的数据到JSON文件
output_data = {
'fetch_time': datetime.now().isoformat(),
'total_count': all_data['count'],
'raw_data': all_data['alphas'],
'extracted_data': processed_data
}
# 保存到文件(空数据时不保存)
save_to_json(output_data)
else:
print("\n没有获取到任何Alpha数据")
finally:
# 确保客户端被关闭
client.close()
print("\n连接已关闭")
# 使用示例
if __name__ == "__main__":
main()

@ -0,0 +1,11 @@
地址: https://api.worldquantbrain.com/users/self/alphas?
limit=22&
offset=0&
status=UNSUBMITTED%1FIS_FAIL&
settings.region=EUR&
settings.universe=TOPCS1600&
is.sharpe%3E1&
is.fitness%3E0.9&
order=-dateCreated&
hidden=false

@ -0,0 +1,4 @@
{
"username": "jack0210_@hotmail.com",
"password": "!QAZ2wsx+0913"
}

@ -0,0 +1,22 @@
[
{
"type": "REGULAR",
"settings": {
"instrumentType": "EQUITY",
"region": "USA",
"universe": "TOP3000",
"delay": 1,
"decay": 12,
"neutralization": "FAST",
"truncation": 0.05,
"pasteurization": "ON",
"testPeriod": "P0Y0M",
"unitHandling": "VERIFY",
"nanHandling": "ON",
"maxTrade": "ON",
"language": "FASTEXPR",
"visualization": false
},
"regular": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)"
}
]

@ -0,0 +1,431 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
)
// BrainClient 是WorldQuant Brain API的客户端
// 类似于Python中的WorldQuantBrainSimulate类
type BrainClient struct {
httpClient *http.Client // HTTP客户端,负责发送请求
baseURL string // API的基础URL
username string // 用户名
password string // 密码
isLoggedIn bool // 登录状态
}
// NewBrainClient 创建一个新的Brain API客户端
// 这是Go中的构造函数,类似于Python的__init__
func NewBrainClient(username, password string) *BrainClient {
return &BrainClient{
// 创建HTTP客户端,设置30秒超时
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
baseURL: "https://api.worldquantbrain.com",
username: username,
password: password,
isLoggedIn: false,
}
}
// Login 登录Brain API
// 对应Python中的login方法
func (c *BrainClient) Login() error {
// 1. 创建POST请求
req, err := http.NewRequest("POST", c.baseURL+"/authentication", nil)
if err != nil {
return fmt.Errorf("创建登录请求失败: %v", err)
}
// 2. 设置Basic认证(用户名和密码)
req.SetBasicAuth(c.username, c.password)
// 3. 发送请求
resp, err := c.httpClient.Do(req)
if err != nil {
return fmt.Errorf("发送登录请求失败: %v", err)
}
defer resp.Body.Close() // 重要:确保函数结束时关闭响应体
fmt.Printf("登录状态: %d\n", resp.StatusCode)
// 4. 检查响应状态码
if resp.StatusCode == 201 {
fmt.Println("登录成功!")
c.isLoggedIn = true
return nil
}
// 5. 登录失败,读取错误信息
var errorResp map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&errorResp); err != nil {
return fmt.Errorf("登录失败,状态码: %d", resp.StatusCode)
}
return fmt.Errorf("登录失败: %v", errorResp)
}
// SimulateAlpha 模拟Alpha因子
// 对应Python中的simulate_alpha方法
func (c *BrainClient) SimulateAlpha(expression string, settings map[string]interface{}) (*SimulationResult, error) {
// 检查是否已登录
if !c.isLoggedIn {
return nil, fmt.Errorf("请先调用Login()登录")
}
startTime := time.Now() // 记录开始时间
// ========== 1. 准备模拟设置 ==========
// 创建默认设置(对应Python中的default_settings)
defaultSettings := SimulationSettings{
InstrumentType: "EQUITY",
Region: "USA",
Universe: "TOP3000",
Delay: 1,
Decay: 12,
Truncation: 0.05,
Neutralization: "FAST",
Pasteurization: "ON",
UnitHandling: "VERIFY",
NanHandling: "ON",
Language: "FASTEXPR",
Visualization: false,
}
// 用传入的设置覆盖默认设置
// 注意:这里需要安全地进行类型断言
if region, ok := settings["region"]; ok {
if regionStr, ok := region.(string); ok {
defaultSettings.Region = regionStr
}
}
if universe, ok := settings["universe"]; ok {
if universeStr, ok := universe.(string); ok {
defaultSettings.Universe = universeStr
}
}
if instrumentType, ok := settings["instrumentType"]; ok {
if instrumentTypeStr, ok := instrumentType.(string); ok {
defaultSettings.InstrumentType = instrumentTypeStr
}
}
if decay, ok := settings["decay"]; ok {
// 注意:JSON中的数字可能是float64类型
if decayFloat, ok := decay.(float64); ok {
defaultSettings.Decay = int(decayFloat)
} else if decayInt, ok := decay.(int); ok {
defaultSettings.Decay = decayInt
}
}
if truncation, ok := settings["truncation"]; ok {
if truncationFloat, ok := truncation.(float64); ok {
defaultSettings.Truncation = truncationFloat
}
}
if neutralization, ok := settings["neutralization"]; ok {
if neutralizationStr, ok := neutralization.(string); ok {
defaultSettings.Neutralization = neutralizationStr
}
}
// 可以根据需要添加更多字段的覆盖逻辑
// ========== 2. 构建请求数据 ==========
simRequest := SimulationRequest{
Type: "REGULAR",
Settings: defaultSettings,
Regular: expression,
}
// 将结构体转换为JSON字节数组
simData, err := json.Marshal(simRequest)
if err != nil {
return nil, fmt.Errorf("构建模拟请求数据失败: %v", err)
}
// ========== 3. 发送模拟请求 ==========
req, err := http.NewRequest("POST", c.baseURL+"/simulations", bytes.NewBuffer(simData))
if err != nil {
return nil, fmt.Errorf("创建模拟请求失败: %v", err)
}
// 设置请求头
req.SetBasicAuth(c.username, c.password)
req.Header.Set("Content-Type", "application/json")
// 发送请求
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("发送模拟请求失败: %v", err)
}
defer resp.Body.Close()
fmt.Printf("模拟提交状态: %d\n", resp.StatusCode)
// 检查响应状态
if resp.StatusCode != 201 && resp.StatusCode != 200 {
var errorResp map[string]interface{}
json.NewDecoder(resp.Body).Decode(&errorResp)
return nil, fmt.Errorf("模拟请求失败: %v", errorResp)
}
// ========== 4. 获取轮询URL ==========
location := resp.Header.Get("Location")
if location == "" {
return nil, fmt.Errorf("未获取到模拟进度URL")
}
fmt.Printf("进度URL: %s\n", location)
// ========== 5. 轮询等待结果 ==========
// 对应Python中的while循环
var finalResp map[string]interface{}
for {
// 创建轮询请求
pollReq, err := http.NewRequest("GET", location, nil)
if err != nil {
return nil, fmt.Errorf("创建轮询请求失败: %v", err)
}
pollReq.SetBasicAuth(c.username, c.password)
// 发送轮询请求
pollResp, err := c.httpClient.Do(pollReq)
if err != nil {
return nil, fmt.Errorf("轮询请求失败: %v", err)
}
// 检查Retry-After头(告诉我们需要等待多久)
retryAfter := pollResp.Header.Get("Retry-After")
if retryAfter != "" {
// 需要等待,解析等待时间
waitSeconds, err := strconv.ParseFloat(retryAfter, 64)
if err != nil {
waitSeconds = 1.0 // 解析失败则默认等待1秒
}
fmt.Printf("等待 %.2f 秒...\n", waitSeconds)
// 先关闭当前响应体
pollResp.Body.Close()
// 等待指定时间
time.Sleep(time.Duration(waitSeconds) * time.Second)
// 继续轮询
continue
}
// 解析响应JSON
err = json.NewDecoder(pollResp.Body).Decode(&finalResp)
pollResp.Body.Close()
if err != nil {
return nil, fmt.Errorf("解析轮询响应失败: %v", err)
}
// 检查状态是否为ERROR
if status, ok := finalResp["status"]; ok && status == "ERROR" {
message := "未知错误"
if msg, ok := finalResp["message"].(string); ok {
message = msg
}
return nil, fmt.Errorf("因子模拟失败: %s", message)
}
// 检查是否已经完成(包含alpha字段)
if alphaID, ok := finalResp["alpha"]; ok && alphaID != nil {
// 模拟完成!
alphaIDStr := fmt.Sprintf("%v", alphaID)
fmt.Printf("生成的Alpha ID: %s\n", alphaIDStr)
// 获取详细指标
metrics, err := c.getAlphaMetrics(alphaIDStr)
if err != nil {
fmt.Printf("警告: 获取Alpha指标失败: %v\n", err)
// 即使获取指标失败,也继续返回基础信息
}
// 计算耗时
elapsed := time.Since(startTime).Seconds()
// 返回成功结果
return &SimulationResult{
Status: "success",
Expression: expression,
AlphaID: alphaIDStr,
TimeCost: elapsed,
FormattedTime: formatTime(elapsed),
Timestamp: time.Now().Format("2006-01-02 15:04:05"),
Metrics: metrics,
Message: "",
}, nil
}
// 如果既没有Retry-After,也没有alpha字段,可能是其他状态
// 打印当前状态以便调试
fmt.Printf("当前状态: %v\n", finalResp)
// 为了避免无限循环,如果没有Retry-After也没有完成,就退出
// 但在实际应用中,可能需要继续等待或处理其他状态
break
}
// 如果走到这里,说明轮询异常退出
elapsed := time.Since(startTime).Seconds()
return &SimulationResult{
Status: "failed",
Expression: expression,
AlphaID: "",
TimeCost: elapsed,
FormattedTime: formatTime(elapsed),
Timestamp: time.Now().Format("2006-01-02 15:04:05"),
Metrics: nil,
Message: "轮询超时或状态异常",
}, nil
}
// getAlphaMetrics 获取Alpha因子的详细指标
// 对应Python中的get_alpha_metrics和_parse_alpha_metrics
func (c *BrainClient) getAlphaMetrics(alphaID string) (*PerformanceMetrics, error) {
// 构建请求URL
url := fmt.Sprintf("%s/alphas/%s", c.baseURL, alphaID)
// 创建GET请求
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("创建请求失败: %v", err)
}
req.SetBasicAuth(c.username, c.password)
// 发送请求
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("请求失败: %v", err)
}
defer resp.Body.Close()
// 检查响应状态
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP状态码: %d", resp.StatusCode)
}
// 解析JSON
var alphaData map[string]interface{}
err = json.NewDecoder(resp.Body).Decode(&alphaData)
if err != nil {
return nil, fmt.Errorf("解析JSON失败: %v", err)
}
// 创建指标对象
metrics := &PerformanceMetrics{}
// ========== 解析returns字段 ==========
if returns, ok := alphaData["returns"].(map[string]interface{}); ok {
if sharpe, ok := returns["sharpe"].(float64); ok {
metrics.SharpeRatio = &sharpe
}
if annualReturn, ok := returns["annualReturn"].(float64); ok {
metrics.AnnualReturn = &annualReturn
}
if annualVolatility, ok := returns["annualVolatility"].(float64); ok {
metrics.AnnualVolatility = &annualVolatility
}
if maxDrawdown, ok := returns["maxDrawdown"].(float64); ok {
metrics.MaxDrawdown = &maxDrawdown
}
if informationRatio, ok := returns["informationRatio"].(float64); ok {
metrics.InformationRatio = &informationRatio
}
if tailRatio, ok := returns["tailRatio"].(float64); ok {
metrics.TailRatio = &tailRatio
}
if commonRatio, ok := returns["commonRatio"].(float64); ok {
metrics.CommonRatio = &commonRatio
}
}
// ========== 解析riskAdjustment字段 ==========
if riskAdj, ok := alphaData["riskAdjustment"].(map[string]interface{}); ok {
if score, ok := riskAdj["score"].(float64); ok {
metrics.Score = &score
}
if turnover, ok := riskAdj["turnover"].(float64); ok {
metrics.Turnover = &turnover
}
if specificReturn, ok := riskAdj["specificReturn"].(float64); ok {
metrics.SpecificReturn = &specificReturn
}
if specificRisk, ok := riskAdj["specificRisk"].(float64); ok {
metrics.SpecificRisk = &specificRisk
}
}
// ========== 解析quantiles字段 ==========
if quantiles, ok := alphaData["quantiles"].(map[string]interface{}); ok {
if topMinusBottom, ok := quantiles["topMinusBottom"].(float64); ok {
metrics.TopMinusBottom = &topMinusBottom
}
if topDecileReturn, ok := quantiles["topDecileReturn"].(float64); ok {
metrics.TopDecileReturn = &topDecileReturn
}
if bottomDecileReturn, ok := quantiles["bottomDecileReturn"].(float64); ok {
metrics.BottomDecileReturn = &bottomDecileReturn
}
if ic, ok := quantiles["ic"].(float64); ok {
metrics.IC = &ic
}
if icDecay, ok := quantiles["icDecay"].(float64); ok {
metrics.ICDecay = &icDecay
}
}
// ========== 解析其他字段 ==========
if totalReturn, ok := alphaData["totalReturn"].(float64); ok {
metrics.TotalReturn = &totalReturn
}
if capacity, ok := alphaData["capacity"].(float64); ok {
metrics.Capacity = &capacity
}
if fitness, ok := alphaData["fitness"].(float64); ok {
metrics.Fitness = &fitness
}
if instrumentCount, ok := alphaData["instrumentCount"].(float64); ok {
metrics.InstrumentCount = &instrumentCount
}
if startDate, ok := alphaData["startDate"].(string); ok {
metrics.StartDate = &startDate
}
if endDate, ok := alphaData["endDate"].(string); ok {
metrics.EndDate = &endDate
}
return metrics, nil
}
// formatTime 将秒数格式化为"xx分xx秒"或"xx秒"的格式
// 对应Python中的format_time方法
func formatTime(seconds float64) string {
if seconds < 60 {
return fmt.Sprintf("%.2f秒", seconds)
}
minutes := int(seconds / 60)
remainingSeconds := seconds - float64(minutes*60)
return fmt.Sprintf("%d分%.2f秒", minutes, remainingSeconds)
}
// Close 关闭客户端,释放资源
// 对应Python中的close方法
func (c *BrainClient) Close() error {
// Go的http.Client不需要显式关闭
// 但如果需要清理其他资源,可以在这里添加
c.httpClient = nil
c.isLoggedIn = false
return nil
}
// 辅助函数:打印分隔线(方便使用)
func printSeparator() {
fmt.Println(strings.Repeat("=", 60))
}

@ -0,0 +1,76 @@
package main
import (
"encoding/json"
"fmt"
"os"
)
// 读取账号配置
// Python: eval(f.read()) -> Go: JSON解析
func LoadAccountConfig(filename string) (*AccountConfig, error) {
// 检查文件是否存在
if _, err := os.Stat(filename); os.IsNotExist(err) {
// 文件不存在,创建模板文件
defaultConfig := AccountConfig{
Username: "your_username",
Password: "your_password",
}
file, _ := json.MarshalIndent(defaultConfig, "", " ")
os.WriteFile(filename, file, 0644)
return nil, fmt.Errorf("账号文件不存在,已创建模板 %s,请填写后重试", filename)
}
// 读取文件
data, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("读取账号文件失败: %v", err)
}
// 解析JSON
var config AccountConfig
err = json.Unmarshal(data, &config)
if err != nil {
return nil, fmt.Errorf("解析账号文件失败,请确保是JSON格式: %v", err)
}
return &config, nil
}
// 读取Alpha表达式列表
// Python: json.load(f) -> Go: 也是json解析,但要定义结构
func LoadAlphaList(filename string) ([]map[string]interface{}, error) {
// 检查文件是否存在
if _, err := os.Stat(filename); os.IsNotExist(err) {
// 创建示例文件
example := []map[string]interface{}{
{
"regular": "示例表达式",
"settings": map[string]interface{}{},
"type": "REGULAR",
},
}
file, _ := json.MarshalIndent(example, "", " ")
os.WriteFile(filename, file, 0644)
return nil, fmt.Errorf("Alpha文件不存在,已创建示例 %s,请添加表达式后重试", filename)
}
data, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("读取Alpha文件失败: %v", err)
}
// 解析为通用格式
// Go中,如果JSON结构不确定,可以用[]map[string]interface{}
var alphaList []map[string]interface{}
err = json.Unmarshal(data, &alphaList)
if err != nil {
return nil, fmt.Errorf("解析Alpha文件失败: %v", err)
}
return alphaList, nil
}

@ -0,0 +1,5 @@
module wqb-simulate-go
go 1.25.1
require github.com/google/uuid v1.6.0 // indirect

@ -0,0 +1,2 @@
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=

Binary file not shown.

@ -0,0 +1,184 @@
package main
import (
"encoding/json"
"fmt"
"os"
"time"
)
func main() {
// 1. 加载账号配置
fmt.Println("正在加载账号配置...")
config, err := LoadAccountConfig("account.json") // 注意:现在是.json,不是.txt
if err != nil {
fmt.Println(err)
os.Exit(1)
}
// 2. 加载Alpha表达式
fmt.Println("正在加载Alpha表达式...")
alphaList, err := LoadAlphaList("alphas.json")
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if len(alphaList) == 0 {
fmt.Println("alphas.json 文件为空")
os.Exit(1)
}
// 3. 提取表达式(类似Python的提取逻辑)
var expressions []map[string]interface{}
for _, item := range alphaList {
expr := map[string]interface{}{
"expression": item["regular"],
"settings": item["settings"],
"type": item["type"],
}
expressions = append(expressions, expr)
}
// 4. 创建客户端并登录
client := NewBrainClient(config.Username, config.Password)
fmt.Println("\n正在登录Brain API...")
err = client.Login()
if err != nil {
fmt.Printf("登录失败: %v\n", err)
os.Exit(1)
}
// 5. 开始模拟
fmt.Println("\n开始Alpha因子模拟...")
totalStart := time.Now()
var results []*SimulationResult
for i, item := range expressions {
fmt.Printf("\n%s\n", "============================================")
fmt.Printf("开始第 %d 个因子模拟 (共 %d 个)\n", i+1, len(expressions))
fmt.Printf("因子: %v\n", item)
fmt.Printf("%s\n", "============================================")
// 获取表达式和设置
expression, _ := item["expression"].(string)
settings, _ := item["settings"].(map[string]interface{})
// 执行模拟
result, err := client.SimulateAlpha(expression, settings)
if err != nil {
// 处理错误
result = &SimulationResult{
Status: "failed",
Expression: expression,
Message: err.Error(),
TimeCost: 0,
FormattedTime: "0秒",
Timestamp: time.Now().Format("2006-01-02 15:04:05"),
}
}
results = append(results, result)
// 打印结果
printResult(result)
// 最后一个不等待
if i < len(expressions)-1 {
sleepTime := 3.0 // 固定等待3秒
fmt.Printf("\n等待 %.2f 秒后开始下一个...\n", sleepTime)
time.Sleep(time.Duration(sleepTime) * time.Second)
}
}
totalTime := time.Since(totalStart).Seconds()
// 6. 打印汇总
printSummary(results, totalTime)
// 7. 保存结果
saveResults(results)
}
// 打印单个结果
func printResult(r *SimulationResult) {
if r.Status == "success" {
fmt.Printf("✓ 因子模拟成功: %s\n", r.Expression)
fmt.Printf(" 耗时: %s,Alpha ID: %s\n", r.FormattedTime, r.AlphaID)
if r.Metrics != nil {
fmt.Println(" 关键指标:")
if r.Metrics.SharpeRatio != nil {
fmt.Printf(" 夏普比率: %.4f\n", *r.Metrics.SharpeRatio)
}
if r.Metrics.AnnualReturn != nil {
fmt.Printf(" 年化收益: %.4f\n", *r.Metrics.AnnualReturn)
}
}
} else {
fmt.Printf("✗ 因子模拟失败: %s\n", r.Expression)
fmt.Printf(" 耗时: %s,错误: %s\n", r.FormattedTime, r.Message)
}
}
// 打印汇总
func printSummary(results []*SimulationResult, totalTime float64) {
fmt.Printf("\n%s\n", "============================================")
fmt.Println("模拟结果汇总")
fmt.Printf("%s\n", "============================================")
success := 0
failed := 0
for _, r := range results {
if r.Status == "success" {
success++
} else {
failed++
}
}
fmt.Printf("总模拟因子数: %d\n", len(results))
fmt.Printf("成功: %d 个\n", success)
fmt.Printf("失败: %d 个\n", failed)
fmt.Printf("总耗时: %s\n", formatTime(totalTime))
fmt.Printf("%s\n", "============================================")
for i, r := range results {
mark := "✓"
if r.Status != "success" {
mark = "✗"
}
fmt.Printf("%d. %s %s\n", i+1, mark, r.Expression)
fmt.Printf(" 状态: %s\n", r.Status)
fmt.Printf(" 耗时: %s\n", r.FormattedTime)
fmt.Printf(" Alpha ID: %s\n", r.AlphaID)
if r.Status != "success" {
fmt.Printf(" 原因: %s\n", r.Message)
}
fmt.Println()
}
}
// 保存结果
func saveResults(results []*SimulationResult) {
// 创建result目录
os.MkdirAll("./result", 0755)
filename := fmt.Sprintf("result/simulation_results-%d.json", time.Now().Unix())
// 转换为JSON并保存
data, err := json.MarshalIndent(results, "", " ")
if err != nil {
fmt.Printf("保存结果失败: %v\n", err)
return
}
err = os.WriteFile(filename, data, 0644)
if err != nil {
fmt.Printf("保存结果失败: %v\n", err)
return
}
fmt.Printf("结果已保存到 %s\n", filename)
}

@ -0,0 +1,76 @@
package main
// 账号配置 - 对应Python的account.txt
// 在Go中,我们用结构体(struct)代替Python的字典
type AccountConfig struct {
Username string `json:"username"` // `json:xxx` 是标签,告诉程序如何解析JSON
Password string `json:"password"`
}
// Alpha模拟设置 - 对应Python的default_settings
type SimulationSettings struct {
InstrumentType string `json:"instrumentType"` // "EQUITY"
Region string `json:"region"` // "USA"
Universe string `json:"universe"` // "TOP3000"
Delay int `json:"delay"` // 1
Decay int `json:"decay"` // 12
Truncation float64 `json:"truncation"` // 0.05
Neutralization string `json:"neutralization"` // "FAST"
Pasteurization string `json:"pasteurization"` // "ON"
UnitHandling string `json:"unitHandling"` // "VERIFY"
NanHandling string `json:"nanHandling"` // "ON"
Language string `json:"language"` // "FASTEXPR"
Visualization bool `json:"visualization"` // false
}
// 模拟请求数据 - 对应Python的simulation_data
type SimulationRequest struct {
Type string `json:"type"` // "REGULAR"
Settings SimulationSettings `json:"settings"`
Regular string `json:"regular"` // 表达式
}
// PerformanceMetrics 包含Alpha因子的所有性能指标
type PerformanceMetrics struct {
// 基本统计信息
SharpeRatio *float64 `json:"sharpe_ratio,omitempty"`
AnnualReturn *float64 `json:"annual_return,omitempty"`
AnnualVolatility *float64 `json:"annual_volatility,omitempty"`
MaxDrawdown *float64 `json:"max_drawdown,omitempty"`
InformationRatio *float64 `json:"information_ratio,omitempty"`
TailRatio *float64 `json:"tail_ratio,omitempty"`
CommonRatio *float64 `json:"common_ratio,omitempty"`
// 风险调整后收益
Score *float64 `json:"score,omitempty"`
Turnover *float64 `json:"turnover,omitempty"`
SpecificReturn *float64 `json:"specific_return,omitempty"`
SpecificRisk *float64 `json:"specific_risk,omitempty"`
// 分位数表现
TopMinusBottom *float64 `json:"top_minus_bottom,omitempty"`
TopDecileReturn *float64 `json:"top_decile_return,omitempty"`
BottomDecileReturn *float64 `json:"bottom_decile_return,omitempty"`
IC *float64 `json:"ic,omitempty"`
ICDecay *float64 `json:"ic_decay,omitempty"`
// 其他指标
TotalReturn *float64 `json:"total_return,omitempty"`
Capacity *float64 `json:"capacity,omitempty"`
Fitness *float64 `json:"fitness,omitempty"`
InstrumentCount *float64 `json:"instrument_count,omitempty"`
StartDate *string `json:"start_date,omitempty"`
EndDate *string `json:"end_date,omitempty"`
}
// 模拟结果 - 对应Python的simulation_result
type SimulationResult struct {
Status string `json:"status"` // "success", "error", "failed"
Expression string `json:"expression"`
AlphaID string `json:"alpha_id"`
Message string `json:"message,omitempty"` // 错误信息
Metrics *PerformanceMetrics `json:"metrics,omitempty"`
TimeCost float64 `json:"time_cost"` // 耗时(秒)
FormattedTime string `json:"formatted_time"`
Timestamp string `json:"timestamp"`
}

@ -0,0 +1,22 @@
[
{
"type": "REGULAR",
"settings": {
"instrumentType": "EQUITY",
"region": "USA",
"universe": "TOP3000",
"delay": 1,
"decay": 12,
"neutralization": "FAST",
"truncation": 0.05,
"pasteurization": "ON",
"testPeriod": "P0Y0M",
"unitHandling": "VERIFY",
"nanHandling": "ON",
"maxTrade": "ON",
"language": "FASTEXPR",
"visualization": false
},
"regular": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)"
}
]

@ -0,0 +1,28 @@
[
{
"expression": "",
"time_consuming": 0.26,
"formatted_time": "0.26秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:23:34",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "",
"time_consuming": 0.27,
"formatted_time": "0.27秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:23:34",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,28 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 0.25,
"formatted_time": "0.25秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:32:01",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 0.25,
"formatted_time": "0.25秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:32:02",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,28 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 0.25,
"formatted_time": "0.25秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:38:07",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 0.26,
"formatted_time": "0.26秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:38:11",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,28 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 0.25,
"formatted_time": "0.25秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:41:33",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 0.6,
"formatted_time": "0.60秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:41:38",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,28 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 0.24,
"formatted_time": "0.24秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:42:17",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 0.26,
"formatted_time": "0.26秒",
"alpha_id": "/",
"status": "failed",
"description": "'location'",
"simulation_timestamp": "2026-03-20 11:42:22",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,28 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 0.27,
"formatted_time": "0.27秒",
"alpha_id": "/",
"status": "failed",
"description": "未获取到模拟进度URL",
"simulation_timestamp": "2026-03-20 11:44:33",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 1.01,
"formatted_time": "1.01秒",
"alpha_id": "/",
"status": "failed",
"description": "未获取到模拟进度URL",
"simulation_timestamp": "2026-03-20 11:44:38",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,54 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 7.54,
"formatted_time": "7.54秒",
"alpha_id": "/",
"status": "error",
"description": "Operator divide does not support event inputs",
"simulation_timestamp": "2026-03-20 11:45:38",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
},
{
"expression": "ts_std_dev(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8) / ts_mean(fnd6_newqv1300_xoprq / fnd6_cptmfmq_saleq, 8)",
"time_consuming": 133.33,
"formatted_time": "2分13.33秒",
"alpha_id": "Gr6mk5XJ",
"status": "success",
"description": "/",
"simulation_timestamp": "2026-03-20 11:47:55",
"performance_metrics": {
"sharpe_ratio": null,
"annual_return": null,
"annual_volatility": null,
"max_drawdown": null,
"information_ratio": null,
"total_return": null
},
"risk_metrics": {
"turnover": null,
"score": null,
"specific_return": null,
"specific_risk": null,
"tail_ratio": null,
"common_ratio": null
},
"quantile_metrics": {
"top_minus_bottom": null,
"top_decile_return": null,
"bottom_decile_return": null,
"ic": null,
"ic_decay": null
},
"other_metrics": {
"capacity": null,
"fitness": null,
"instrument_count": null,
"start_date": null,
"end_date": null
}
}
]

@ -0,0 +1,41 @@
[
{
"expression": "ts_mean(ts_mean(operating_income / return_assets, 4), 20)",
"time_consuming": 105.39,
"formatted_time": "1分45.39秒",
"alpha_id": "O0kJWQ7Y",
"status": "success",
"description": "/",
"simulation_timestamp": "2026-03-20 12:00:05",
"performance_metrics": {
"sharpe_ratio": null,
"annual_return": null,
"annual_volatility": null,
"max_drawdown": null,
"information_ratio": null,
"total_return": null
},
"risk_metrics": {
"turnover": null,
"score": null,
"specific_return": null,
"specific_risk": null,
"tail_ratio": null,
"common_ratio": null
},
"quantile_metrics": {
"top_minus_bottom": null,
"top_decile_return": null,
"bottom_decile_return": null,
"ic": null,
"ic_decay": null
},
"other_metrics": {
"capacity": null,
"fitness": null,
"instrument_count": null,
"start_date": null,
"end_date": null
}
}
]

@ -0,0 +1,15 @@
[
{
"expression": "ts_std_dev(fnd6_xopr / fnd6_salexg, 8) / ts_mean(fnd6_xopr / fnd6_salexg, 8)",
"time_consuming": 0.26,
"formatted_time": "0.26秒",
"alpha_id": "/",
"status": "failed",
"description": "未获取到模拟进度URL",
"simulation_timestamp": "2026-03-20 12:04:14",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,15 @@
[
{
"expression": "ts_std_dev(fnd6_xopr / fnd6_salexg, 8) / ts_mean(fnd6_xopr / fnd6_salexg, 8)",
"time_consuming": 6.53,
"formatted_time": "6.53秒",
"alpha_id": "/",
"status": "error",
"description": "Operator divide does not support event inputs",
"simulation_timestamp": "2026-03-20 12:04:47",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,15 @@
[
{
"expression": "ts_std_dev(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8) / ts_mean(fnd6_newqeventv110_xoprq / fnd6_newa2v1300_sale, 8)",
"time_consuming": 6.88,
"formatted_time": "6.88秒",
"alpha_id": "/",
"status": "error",
"description": "Operator divide does not support event inputs",
"simulation_timestamp": "2026-03-20 14:06:34",
"performance_metrics": {},
"risk_metrics": {},
"quantile_metrics": {},
"other_metrics": {}
}
]

@ -0,0 +1,3 @@
alpha_id,alpha_type,author,settings,regular,dateCreated,dateSubmitted,dateModified,name,favorite,hidden,color,category,tags,classifications,grade,stage,status,is,os,train,test,prod,competitions,themes,pyramids,pyramidThemes,team,osmosisPoints
npkZlgwd,REGULAR,YC93384,"{'instrumentType': 'EQUITY', 'region': 'USA', 'universe': 'TOP3000', 'delay': 1, 'decay': 0, 'neutralization': 'INDUSTRY', 'truncation': 0.01, 'pasteurization': 'ON', 'unitHandling': 'VERIFY', 'nanHandling': 'OFF', 'maxTrade': 'OFF', 'maxPosition': 'OFF', 'language': 'FASTEXPR', 'visualization': False, 'startDate': '2014-01-01', 'endDate': '2023-12-31'}","{'code': 'multiply(last_diff_value(ts_sum(subtract(implied_volatility_call_120, implied_volatility_put_90), 20), 5),ts_mean(volume,20))', 'description': 'Idea: Long volatility skew momentum in high-option-liquidity stocks weighted by trading volume.\nRationale for data used: implied_volatility_call_120, implied_volatility_put_90\nRationale for operators used: multiply, last_diff_value, ts_sum, subtract, ts_mean', 'operatorCount': 5}",2026-02-07T04:08:56-05:00,,2026-02-07T04:45:53-05:00,,False,False,,,[],[],,IS,UNSUBMITTED,"{'pnl': 25004583, 'bookSize': 20000000, 'longCount': 2140, 'shortCount': 787, 'turnover': 0.0744, 'returns': 0.2505, 'drawdown': 0.4119, 'margin': 0.006739, 'sharpe': 1.14, 'fitness': 1.61, 'startDate': '2014-01-01', 'investabilityConstrained': {'pnl': 13948144, 'bookSize': 20000000, 'longCount': 2166, 'shortCount': 763, 'turnover': 0.0627, 'returns': 0.1398, 'drawdown': 0.4277, 'margin': 0.004458, 'fitness': 1.03, 'sharpe': 0.97}, 'riskNeutralized': {'pnl': 17145827, 'bookSize': 20000000, 'longCount': 2140, 'shortCount': 787, 'turnover': 0.0744, 'returns': 0.1718, 'drawdown': 0.3665, 'margin': 0.004621, 'fitness': 1.15, 'sharpe': 0.98}, 'checks': [{'name': 'LOW_SHARPE', 'result': 'WARNING', 'limit': 1.58, 'value': 1.14}, {'name': 'LOW_FITNESS', 'result': 'PASS', 'limit': 1.0, 'value': 1.61}, {'name': 'LOW_TURNOVER', 'result': 'PASS', 'limit': 0.01, 'value': 0.0744}, {'name': 'HIGH_TURNOVER', 'result': 'PASS', 'limit': 0.7, 'value': 0.0744}, {'name': 'CONCENTRATED_WEIGHT', 'result': 'WARNING', 'date': '2017-09-21', 'limit': 0.1, 'value': 0.479658}, {'name': 'LOW_SUB_UNIVERSE_SHARPE', 'result': 'PASS', 'limit': 0.49, 'value': 0.52}, {'name': 'SELF_CORRELATION', 'result': 'PENDING'}, {'name': 'DATA_DIVERSITY', 'result': 'PENDING'}, {'name': 'PROD_CORRELATION', 'result': 'PENDING'}, {'name': 'REGULAR_SUBMISSION', 'result': 'PENDING'}, {'name': 'IS_LADDER_SHARPE', 'result': 'PASS', 'year': 2, 'startDate': '2024-01-02', 'endDate': '2022-01-03', 'limit': 2.02, 'value': 2.06}, {'name': 'POWER_POOL_CORRELATION', 'result': 'PENDING'}, {'name': 'MATCHES_COMPETITION', 'result': 'WARNING', 'competitions': [{'id': 'DCC2026', 'name': 'Data Creation Challenge 2026'}]}, {'result': 'PASS', 'name': 'MATCHES_PYRAMID', 'effective': 2, 'multiplier': 1.1, 'pyramids': [{'name': 'USA/D1/PV', 'multiplier': 1.1}, {'name': 'USA/D1/OPTION', 'multiplier': 1.3}]}, {'result': 'WARNING', 'name': 'MATCHES_THEMES', 'themes': [{'id': 'M4ZYl3D', 'multiplier': 2.0, 'name': 'USA HighTurnover Datasets Theme'}]}, {'result': 'PENDING', 'name': 'MATCHES_THEMES', 'themes': [{'id': 'EDrKz34', 'multiplier': 1.0, 'name': 'All regions/D1 Power Pool Mar`26'}]}, {'name': 'OSMOSIS_ALLOCATION', 'result': 'WARNING'}]}",,,,,,,,,,
LLOgaLvL,REGULAR,YC93384,"{'instrumentType': 'EQUITY', 'region': 'USA', 'universe': 'TOP3000', 'delay': 1, 'decay': 0, 'neutralization': 'INDUSTRY', 'truncation': 0.08, 'pasteurization': 'ON', 'unitHandling': 'VERIFY', 'nanHandling': 'OFF', 'maxTrade': 'OFF', 'maxPosition': 'OFF', 'language': 'FASTEXPR', 'visualization': False, 'startDate': '2014-01-01', 'endDate': '2023-12-31'}","{'code': 'ts_rank(add(ts_count_nans(pv87_2_nav_qf_matrix_all_chngratio_median, 45), ts_count_nans(mdl264_call_put_erlanger_ratio_l3, 45)), 90)', 'description': None, 'operatorCount': 4}",2026-01-19T04:36:02-05:00,,2026-01-19T04:36:02-05:00,,False,False,,,[],[],,IS,UNSUBMITTED,"{'pnl': 10827219, 'bookSize': 20000000, 'longCount': 2486, 'shortCount': 389, 'turnover': 0.0484, 'returns': 0.1085, 'drawdown': 0.183, 'margin': 0.004481, 'sharpe': 1.27, 'fitness': 1.18, 'startDate': '2014-01-01', 'investabilityConstrained': {'pnl': 10545973, 'bookSize': 20000000, 'longCount': 2333, 'shortCount': 797, 'turnover': 0.038, 'returns': 0.1057, 'drawdown': 0.1061, 'margin': 0.005561, 'fitness': 1.27, 'sharpe': 1.38}, 'riskNeutralized': {'pnl': 3060381, 'bookSize': 20000000, 'longCount': 2486, 'shortCount': 389, 'turnover': 0.0484, 'returns': 0.0307, 'drawdown': 0.1434, 'margin': 0.001267, 'fitness': 0.3, 'sharpe': 0.6}, 'checks': [{'name': 'LOW_SHARPE', 'result': 'WARNING', 'limit': 1.58, 'value': 1.27}, {'name': 'LOW_FITNESS', 'result': 'PASS', 'limit': 1.0, 'value': 1.18}, {'name': 'LOW_TURNOVER', 'result': 'PASS', 'limit': 0.01, 'value': 0.0484}, {'name': 'HIGH_TURNOVER', 'result': 'PASS', 'limit': 0.7, 'value': 0.0484}, {'name': 'CONCENTRATED_WEIGHT', 'result': 'PASS'}, {'name': 'LOW_SUB_UNIVERSE_SHARPE', 'result': 'PASS', 'limit': 0.55, 'value': 0.95}, {'name': 'SELF_CORRELATION', 'result': 'PENDING'}, {'name': 'DATA_DIVERSITY', 'result': 'PENDING'}, {'name': 'PROD_CORRELATION', 'result': 'PENDING'}, {'name': 'REGULAR_SUBMISSION', 'result': 'PENDING'}, {'name': 'IS_LADDER_SHARPE', 'result': 'WARNING', 'year': 2, 'startDate': '2024-01-02', 'endDate': '2022-01-03', 'limit': 1.58, 'value': 1.26}, {'name': 'POWER_POOL_CORRELATION', 'result': 'PENDING'}, {'name': 'MATCHES_COMPETITION', 'result': 'WARNING', 'competitions': [{'id': 'DCC2026', 'name': 'Data Creation Challenge 2026'}]}, {'result': 'PASS', 'name': 'MATCHES_PYRAMID', 'effective': 2, 'multiplier': 1.1, 'pyramids': [{'name': 'USA/D1/PV', 'multiplier': 1.1}, {'name': 'USA/D1/MODEL', 'multiplier': 1.4}]}, {'result': 'WARNING', 'name': 'MATCHES_THEMES', 'themes': [{'id': 'M4ZYl3D', 'multiplier': 2.0, 'name': 'USA HighTurnover Datasets Theme'}]}, {'result': 'PENDING', 'name': 'MATCHES_THEMES', 'themes': [{'id': 'EDrKz34', 'multiplier': 1.0, 'name': 'All regions/D1 Power Pool Mar`26'}]}, {'name': 'OSMOSIS_ALLOCATION', 'result': 'WARNING'}]}",,,,,,,,,,
unable to load file from base commit

@ -0,0 +1,381 @@
import httpx
from httpx import BasicAuth, Timeout
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import logging
from typing import List, Optional
import time
class AlphaManager:
def __init__(self, credentials_file='account.txt', base_path=None):
"""
初始化Alpha管理器
Args:
credentials_file: 账号文件路径备用
base_path: 输出文件保存路径
"""
self.client = None
self.base_path = Path(base_path) if base_path else Path.cwd()
self.logger = self._setup_logger()
# 登录
self.login(credentials_file)
def _setup_logger(self):
"""设置日志记录器"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def login(self, credentials_file='account.txt'):
"""登录WorldQuant Brain API"""
try:
# 从nacos获取账号密码
with httpx.Client(timeout=10.0) as temp_client:
nacos_resp = temp_client.get(
'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
)
if nacos_resp.status_code != 200:
self.logger.error('获取账号密码失败')
return False
config = nacos_resp.json()
username = config.get('user_name')
password = config.get('password')
if not username or not password:
self.logger.error('账号密码不完整')
return False
self.logger.info(f"正在登录账户: {username}")
# 创建客户端并设置超时(关键修复)
# 设置更长的超时时间:连接30秒,读取60秒
timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
self.client = httpx.Client(
auth=BasicAuth(username, password),
timeout=timeout
)
# 发送登录请求
response = self.client.post('https://api.worldquantbrain.com/authentication')
if response.status_code == 201:
self.logger.info("登录成功!")
return True
else:
self.logger.error(f"登录失败: {response.status_code} - {response.text}")
self.client.close()
self.client = None
return False
except Exception as e:
self.logger.error(f"登录异常: {e}")
return False
def update_alpha_color(self, alpha_id: str, color: str) -> bool:
"""标记Alpha颜色"""
if not self.client:
self.logger.error("客户端未登录")
return False
try:
update_data = {"color": color}
response = self.client.patch(
f"https://api.worldquantbrain.com/alphas/{alpha_id}",
json=update_data
)
return response.status_code == 200
except Exception as e:
self.logger.error(f"标记颜色失败: {e}")
return False
def wechat_check_corr_message(self, message: str):
"""微信通知(可选功能)"""
self.logger.info(f"通知消息: {message}")
def _make_request_with_retry(self, url: str, max_retries: int = 3, retry_delay: float = 2.0):
"""
带重试机制的请求
Args:
url: 请求URL
max_retries: 最大重试次数
retry_delay: 重试延迟
Returns:
httpx.Response None
"""
for attempt in range(max_retries):
try:
response = self.client.get(url)
return response
except Exception as e:
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(retry_delay)
else:
self.logger.error(f"请求最终失败: {url}")
raise
return None
def get_alphas(
self,
total_alphas: int = 5,
limit: int = 100,
delay: int = 1,
region: str = "USA",
universe: str = "TOP3000",
required_fields: Optional[List[str]] = None,
match_mode: str = "all",
min_sharpe: Optional[float] = None,
min_fitness: Optional[float] = None,
hidden: str = "false",
submittable: bool = False,
auto_color: bool = False,
color: str = "GREEN",
output_file_name: str = "alpha_search_list.csv",
mode: str = "w",
max_retries: int = 3,
):
"""
搜索Alpha并筛选
Args:
total_alphas: 最多获取多少个Alpha
limit: 每次API请求获取的数量
delay: API请求的延迟设置
region: 市场区域 "USA", "CHINA"
universe: 股票池 "TOP3000"
required_fields: 关键词列表 ['put', 'call']
match_mode: 关键词匹配模式"all"(全匹配) "any"(任一匹配)
min_sharpe: 最小夏普比率阈值
min_fitness: 最小适应度阈值
hidden: 是否搜索已隐藏的Alpha ("true"/"false")
submittable: 是否只筛选可提交无FAIL检查项的Alpha
auto_color: 是否自动给符合条件的Alpha标记颜色
color: 标记的颜色
output_file_name: 输出的CSV文件名
mode: 写入模式"w"(覆盖写入) "a"(追加写入)
max_retries: 请求失败时的最大重试次数
Returns:
list: 符合条件的Alpha列表
"""
if not self.client:
self.logger.error("客户端未登录,无法执行搜索")
return []
# 验证颜色参数
valid_colors = [None, "GREEN", "YELLOW", "RED", "BLUE", "PURPLE", "ORANGE"]
if color not in valid_colors:
raise ValueError(f"颜色必须是以下之一: {valid_colors}")
fetched_alphas = []
offset = 0
total_accessed = 0
colored_count = 0
# 先获取总数(带重试)
count_url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}"
f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
total_available = 0
for attempt in range(max_retries):
try:
count_response = self.client.get(count_url)
total_available = count_response.json()["count"]
break
except Exception as e:
self.logger.warning(f"获取Alpha总数失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(2.0)
else:
self.logger.error("获取Alpha总数最终失败")
return []
if total_available == 0:
self.logger.warning("未找到任何Alpha")
return []
self.logger.info(f"共找到 {total_available} 个Alpha,开始筛选...")
pbar = tqdm(total=min(total_available, 10000), desc="扫描Alpha", unit="")
while len(fetched_alphas) < total_alphas and offset < total_available:
# 构建请求URL
url = (
f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}"
f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
)
try:
# 使用重试机制
response = None
for attempt in range(max_retries):
try:
response = self.client.get(url)
break
except Exception as e:
self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(2.0)
else:
raise
if response.status_code == 400:
self.logger.warning(f"遇到API限制 (offset={offset}),停止获取更多数据")
break
response_data = response.json()
if not isinstance(response_data, dict) or "results" not in response_data:
self.logger.error(f"API返回了意外的数据: {response_data}")
break
alphas = response_data["results"]
if not alphas:
break
total_accessed += len(alphas)
# 关键词筛选
if required_fields:
if match_mode == "all":
filtered_alphas = [
alpha for alpha in alphas
if all(field in alpha["regular"]["code"] for field in required_fields)
]
elif match_mode == "any":
filtered_alphas = [
alpha for alpha in alphas
if any(field in alpha["regular"]["code"] for field in required_fields)
]
else:
raise ValueError("match_mode 必须是 'all''any'")
else:
filtered_alphas = alphas
# 进一步筛选夏普比率和fitness
final_filtered = []
for alpha in filtered_alphas:
sharpe = alpha.get("is", {}).get("sharpe", 0)
fitness = alpha.get("is", {}).get("fitness", 0)
sharpe_ok = (min_sharpe is None) or (sharpe is not None and abs(sharpe) >= min_sharpe)
fitness_ok = (min_fitness is None) or (fitness is not None and abs(fitness) >= min_fitness)
if sharpe_ok and fitness_ok:
if submittable:
checks = alpha.get("is", {}).get("checks", [])
fail_count = sum(1 for check in checks if check.get("result") == "FAIL")
if fail_count == 0 and auto_color:
alpha_id = alpha.get("id")
if alpha_id:
success = self.update_alpha_color(alpha_id, color)
if success:
colored_count += 1
if fail_count == 0:
final_filtered.append(alpha)
else:
final_filtered.append(alpha)
fetched_alphas.extend(final_filtered)
# 更新进度条
pbar.update(len(alphas))
pbar.set_postfix({
"Region": region,
"已扫描": total_accessed,
"找到": len(fetched_alphas),
"本次": len(final_filtered),
"标记": colored_count,
})
if len(alphas) < limit:
break
offset += limit
# 添加延迟避免请求过快
time.sleep(0.5)
except Exception as e:
self.logger.error(f"请求失败: {e}")
break
pbar.close()
# 输出标记统计
if auto_color and colored_count > 0:
self.logger.info(f"共标记了 {colored_count} 个Alpha颜色为{color}")
self.wechat_check_corr_message(f"共标记了 {colored_count} 个Alpha颜色为{color}")
alpha_list = fetched_alphas[:total_alphas]
if not alpha_list:
self.logger.warning("未找到任何符合条件的Alpha!请检查筛选条件是否过于严格。")
else:
df = pd.DataFrame(alpha_list)
df["temp_fitness"] = df.apply(
lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0,
axis=1
)
df_sorted = df.sort_values(by="temp_fitness", ascending=False)
df_sorted = df_sorted.drop("temp_fitness", axis=1)
output_path = self.base_path / output_file_name
if mode == "w":
df_sorted.to_csv(output_path, index=False)
elif mode == "a":
df_sorted.to_csv(output_path, mode="a", index=False, header=False)
self.logger.info(f"批量回测初步检测结果已经下载!共{len(alpha_list)}条记录!\n{output_path}文件名保存!")
return alpha_list
def close(self):
"""关闭客户端"""
if self.client:
self.client.close()
# 使用示例
if __name__ == "__main__":
# 创建管理器实例(会自动登录)
manager = AlphaManager()
if manager.client:
# 搜索包含"put"关键词的可提交Alpha
results = manager.get_alphas(
total_alphas=10,
region="USA",
universe="TOP3000",
required_fields=['put'],
match_mode="all",
submittable=True,
auto_color=True,
color="GREEN",
output_file_name="my_alpha_list.csv"
)
print(f"找到 {len(results) if results else 0} 个符合条件的Alpha")
# 关闭客户端
manager.close()
Loading…
Cancel
Save