@ -2,6 +2,7 @@ import requests
import json
import json
import sys
import sys
import asyncio
import asyncio
import os
import openai
import openai
import re
import re
from typing import Optional , Union # Added this import
from typing import Optional , Union # Added this import
@ -584,18 +585,26 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
async def call_llm_new ( prompt : str ) - > dict :
async def call_llm_new ( prompt : str ) - > dict :
# 声明使用全局变量
# 声明使用全局变量
global LLM_model_name , LLM_API_KEY , llm_base_url
global LLM_model_name , LLM_API_KEY , llm_base_url
print ( f " \n [call_llm_new] 准备调用 LLM... " , flush = True )
print ( f " [call_llm_new] 模型: { LLM_model_name } " , flush = True )
print ( f " [call_llm_new] llm_base_url: { llm_base_url } " , flush = True )
try :
try :
print ( f " [call_llm_new] 正在获取 LLM token... " , flush = True )
llm_api_key = get_token_from_auth_server ( )
llm_api_key = get_token_from_auth_server ( )
llm_base_url_value = llm_base_url # 使用全局变量
llm_base_url_value = llm_base_url # 使用全局变量
print ( f " [call_llm_new] 创建 LLM 客户端... " , flush = True )
llm_client = openai . AsyncOpenAI ( base_url = llm_base_url_value , api_key = llm_api_key )
llm_client = openai . AsyncOpenAI ( base_url = llm_base_url_value , api_key = llm_api_key )
print ( " LLM Gateway Authentication successful. (LLM网关认证成功) " , flush = True )
print ( " [call_llm_new] LLM Gateway Authentication successful. (LLM网关认证成功)" , flush = True )
except Exception as e :
except Exception as e :
print ( f " LLM Gateway Authentication failed (LLM网关认证失败): { e } " , flush = True )
print ( f " [call_llm_new] ❌ LLM Gateway Authentication failed (LLM网关认证失败): { e } " , flush = True )
sys . exit ( 1 )
sys . exit ( 1 )
print ( " --- Calling LLM to propose templates... (正在调用LLM生成模板...) --- " , flush = True )
print ( " [call_llm_new] --- Calling LLM... (正在调用LLM...) --- " , flush = True )
print ( f " [call_llm_new] Prompt 长度: { len ( prompt ) } 字符 " , flush = True )
try :
try :
# Await the async create call
# Await the async create call
print ( f " [call_llm_new] 正在发送请求到 LLM... " , flush = True )
response = await llm_client . chat . completions . create (
response = await llm_client . chat . completions . create (
model = LLM_model_name ,
model = LLM_model_name ,
messages = [
messages = [
@ -604,6 +613,7 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
] ,
] ,
# response_format={"type": "json_object"},
# response_format={"type": "json_object"},
)
)
print ( f " [call_llm_new] 收到 LLM 响应 " , flush = True )
# The async client may return a nested structure. Try to extract content robustly.
# The async client may return a nested structure. Try to extract content robustly.
content = None
content = None
@ -637,18 +647,26 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
# Return wrapped string if not JSON
# Return wrapped string if not JSON
return { " text " : content }
return { " text " : content }
print ( f " [call_llm_new] ✓ 成功返回结果 " , flush = True )
return { }
return { }
except Exception as e :
except Exception as e :
print ( f " Error calling LLM (调用LLM出错): { e } " , flush = True )
print ( f " [call_llm_new] ❌ Error calling LLM (调用LLM出错): { e } " , flush = True )
import traceback
print ( f " [call_llm_new] 错误详情: { traceback . format_exc ( ) } " , flush = True )
return { }
return { }
print ( f " \n [Alpha Description] 开始获取 Alpha { alpha_id } 的详情... " , flush = True )
try :
try :
brain_api_url = " https://api.worldquantbrain.com "
brain_api_url = " https://api.worldquantbrain.com "
alpha_url = f " { brain_api_url } /alphas/ { alpha_id } "
alpha_url = f " { brain_api_url } /alphas/ { alpha_id } "
print ( f " [Alpha Description] 请求 URL: { alpha_url } " , flush = True )
response = brain_session . get ( alpha_url )
response = brain_session . get ( alpha_url )
print ( f " [Alpha Description] 响应状态码: { response . status_code } " , flush = True )
response . raise_for_status ( ) # Raise an exception for HTTP errors
response . raise_for_status ( ) # Raise an exception for HTTP errors
alpha_data = response . json ( )
alpha_data = response . json ( )
print ( f " [Alpha Description] 成功获取 Alpha 数据 " , flush = True )
settings = alpha_data . get ( ' settings ' , { } )
settings = alpha_data . get ( ' settings ' , { } )
expression_dict = alpha_data . get ( ' regular ' , alpha_data . get ( ' combo ' , None ) )
expression_dict = alpha_data . get ( ' regular ' , alpha_data . get ( ' combo ' , None ) )
@ -831,10 +849,14 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
print ( " LLM client not initialized. Please check authentication. (LLM客户端未初始化,请检查认证) " , flush = True )
print ( " LLM client not initialized. Please check authentication. (LLM客户端未初始化,请检查认证) " , flush = True )
return { }
return { }
print ( " --- Calling LLM... (正在调用LLM...) --- " , flush = True )
print ( " \n [LLM Call] 准备调用 LLM API... " , flush = True )
print ( f " [LLM Call] 模型: { LLM_model_name } " , flush = True )
print ( f " [LLM Call] Prompt 长度: { len ( prompt ) } 字符 " , flush = True )
print ( " [LLM Call] 正在发送请求... " , flush = True )
for attempt in range ( max_retries ) :
for attempt in range ( max_retries ) :
try :
try :
print ( f " [LLM Call] 第 { attempt + 1 } 次尝试... " , flush = True )
response = await llm_client . chat . completions . create (
response = await llm_client . chat . completions . create (
model = LLM_model_name , # Or your preferred model
model = LLM_model_name , # Or your preferred model
messages = [
messages = [
@ -843,15 +865,22 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
] ,
] ,
# response_format={"type": "json_object"},
# response_format={"type": "json_object"},
)
)
print ( f " [LLM Call] 收到响应,状态: OK " , flush = True )
content = response . choices [ 0 ] . message . content
content = response . choices [ 0 ] . message . content
print ( f " [LLM Call] 响应内容长度: { len ( content ) } 字符 " , flush = True )
# Try to clean markdown code blocks if present
# Try to clean markdown code blocks if present
if " ```json " in content :
if " ```json " in content :
content = content . split ( " ```json " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
content = content . split ( " ```json " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
print ( f " [LLM Call] 清理了 JSON markdown 标记 " , flush = True )
elif " ``` " in content :
elif " ``` " in content :
content = content . split ( " ``` " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
content = content . split ( " ``` " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
print ( f " [LLM Call] 清理了 markdown 标记 " , flush = True )
return json . loads ( content )
print ( f " [LLM Call] 解析 JSON... " , flush = True )
result = json . loads ( content )
print ( f " [LLM Call] JSON 解析成功,返回 { len ( result ) } 个结果 " , flush = True )
return result
except json . JSONDecodeError as e :
except json . JSONDecodeError as e :
print ( f " ⚠ JSON Decode Error (Attempt { attempt + 1 } / { max_retries } ): { e } " , flush = True )
print ( f " ⚠ JSON Decode Error (Attempt { attempt + 1 } / { max_retries } ): { e } " , flush = True )
if attempt == max_retries - 1 :
if attempt == max_retries - 1 :
@ -860,30 +889,28 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
print ( f " ⚠ LLM Call Error (Attempt { attempt + 1 } / { max_retries } ): { e } " , flush = True )
print ( f " ⚠ LLM Call Error (Attempt { attempt + 1 } / { max_retries } ): { e } " , flush = True )
if attempt == max_retries - 1 :
if attempt == max_retries - 1 :
print ( f " ❌ Failed to call LLM after { max_retries } attempts. " , flush = True )
print ( f " ❌ Failed to call LLM after { max_retries } attempts. " , flush = True )
raise Exception ( f " LLM 调用失败: { e } " )
# Wait before retrying
# Wait before retrying (2 seconds for MiniMax 529)
print ( f " ⏳ 等待 2 秒后重试... " , flush = True )
await asyncio . sleep ( 2 )
await asyncio . sleep ( 2 )
return { }
return { }
async def propose_alpha_templates ( alpha_details : dict , template_summary : str , llm_client : openai . AsyncOpenAI , user_data_type : str = " MATRIX " ) - > dict :
def has_valid_placeholders ( template_str : str ) - > bool :
"""
""" 检查模板字符串是否包含有效的占位符 """
Uses an LLM to propose new alpha templates based on a seed alpha ' s details.
import re
placeholders = re . findall ( r ' (<[A-Za-z0-9_]+/>) ' , template_str )
return len ( placeholders ) > 0
Args :
alpha_details ( dict ) : The details of the seed alpha .
template_summary ( str ) : A summary of alpha templates to guide the LLM .
llm_client ( openai . AsyncOpenAI ) : The authenticated OpenAI - compatible client .
user_data_type ( str ) : The data type for the alpha ( MATRIX or VECTOR ) .
Returns :
async def propose_alpha_templates_with_retry ( alpha_details : dict , template_summary : str , llm_client : openai . AsyncOpenAI , user_data_type : str = " MATRIX " , max_retries : int = 20 ) - > dict :
dict : A dictionary of proposed alpha templates in JSON format .
"""
使用重试机制生成 Alpha 模板 , 确保包含占位符
"""
"""
if not alpha_details . get ( ' expression ' ) :
if not alpha_details . get ( ' expression ' ) :
print ( " Error: Alpha expression is missing. (错误:缺少Alpha表达式) " , flush = True )
print ( " Error: Alpha expression is missing. (错误:缺少Alpha表达式) " , flush = True )
return { }
return { }
else :
print ( f " current seed alpha detail (当前种子Alpha详情): { alpha_details . get ( ' expression ' ) } " , flush = True )
data_type_instruction = " "
data_type_instruction = " "
if user_data_type == " MATRIX " :
if user_data_type == " MATRIX " :
@ -904,38 +931,87 @@ You will be provided with the seed alpha's expression and a summary of successfu
* * Your Task : * *
* * Your Task : * *
Based on the structure and potential economic rationale of the seed alpha , by the aid of the Alpha template summary , propose 3 - 5 new , diverse alpha templates .
Based on the structure and potential economic rationale of the seed alpha , by the aid of the Alpha template summary , propose 3 - 5 new , diverse alpha templates .
* * Rules : * *
* * CRITICAL RULES ( 必须遵守 ) : * *
1. The proposed templates must be valid BRAIN alpha expressions .
1. The proposed templates must be valid BRAIN alpha expressions .
2. Use placeholders like ` < data_field / > ` for data fields and ` < operator / > ` for operators that can be programmatically replaced later .
2. * * MANDATORY : You MUST use placeholders like ` < data_field / > ` for data fields and ` < operator / > ` for operators . DO NOT use actual data field names like ` avg_pct_change_estimate_12m_earnings_7d ` directly in the template . Placeholders are REQUIRED and will be replaced programmatically later . * *
3. For each proposed template , provide a brief , clear explanation of its investment rationale .
3. Valid placeholder formats : ` < data_field / > ` , ` < operator / > ` , ` < ts_operator / > ` , ` < group_operator / > ` , ` < integer_parameter / > ` , ` < float_parameter / > `
4. Return the output as a single , valid JSON object where keys are the proposed template strings and values are their corresponding explanations . Do not include any other text or formatting outside of the JSON object .
4. For each proposed template , provide a brief , clear explanation of its investment rationale .
5. The proposed new alpha template should be related to the economic sense of seed Alpha { alpha_details } but in different format such as . Utilize the inspiration well .
5. Return the output as a single , valid JSON object where keys are the proposed template strings and values are their corresponding explanations . Do not include any other text or formatting outside of the JSON object .
6. The proposed new alpha template should be related to the economic sense of seed Alpha but in different format . Utilize the inspiration well .
{ data_type_instruction }
{ data_type_instruction }
* * Example Output Format : * *
* * Example Output Format ( 占位符格式示例 ) : * *
{ {
{ {
" <group_operators />(<ts_operators />(<data_field/>, 60), industry) " : " A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups. " ,
" <group_operator/>(<ts_operator/>(<data_field/>, 60), industry) " : " A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups. " ,
" <logical_ operator/><ts_operators />(<data_field/>, 20) " : " A simple short-term momentum operator applied to a data field. "
" <operator/>( <ts_operator/>(<data_field/>, 20), <float_parameter/> ) " : " A simple short-term momentum operator applied to a data field with a parameter . "
} }
} }
Now , generate the JSON object with your proposed templates .
* * WARNING : If you do not use placeholders like ` < data_field / > ` , the template will be rejected and you will need to regenerate . Placeholders are ESSENTIAL for the template system to work . * *
Now , generate the JSON object with your proposed templates . Remember : USE PLACEHOLDERS like ` < data_field / > ` , NOT actual field names !
"""
"""
try :
print ( f " \n [Step 1/5] 正在调用 LLM 生成 Alpha 模板... " , flush = True )
print ( f " \n [Step 1/5] 正在调用 LLM 生成 Alpha 模板... " , flush = True )
print ( f " - 模型: { LLM_model_name } " , flush = True )
print ( f " - 模型: { LLM_model_name } " , flush = True )
print ( f " - 数据类型: { user_data_type } " , flush = True )
print ( f " - 数据类型: { user_data_type } " , flush = True )
print ( f " - 最大重试次数: { max_retries } " , flush = True )
alpha_expr = alpha_details . get ( ' expression ' , { } )
alpha_expr = alpha_details . get ( ' expression ' , { } )
if isinstance ( alpha_expr , dict ) :
if isinstance ( alpha_expr , dict ) :
alpha_expr = alpha_expr . get ( ' code ' , ' N/A ' )
alpha_expr = alpha_expr . get ( ' code ' , ' N/A ' )
print ( f " - 种子 Alpha: { str ( alpha_expr ) [ : 50 ] } ... " , flush = True )
print ( f " - 种子 Alpha: { str ( alpha_expr ) [ : 50 ] } ... " , flush = True )
# print(f"现在的template summary是{template_summary}")
proposed_templates = await call_llm ( prompt , llm_client )
# 重试机制
print ( f " ✓ LLM 返回 { len ( proposed_templates ) } 个模板提议 " , flush = True )
for attempt in range ( 1 , max_retries + 1 ) :
return proposed_templates
try :
except Exception as e :
print ( f " \n [尝试 { attempt } / { max_retries } ] 调用 LLM... " , flush = True )
print ( f " An error occurred while calling the LLM (调用LLM时发生错误): { e } " , flush = True )
proposed_templates = await call_llm ( prompt , llm_client )
return { }
# 验证是否包含占位符
valid_templates = { }
invalid_templates = [ ]
for template_expr , explanation in proposed_templates . items ( ) :
if has_valid_placeholders ( template_expr ) :
valid_templates [ template_expr ] = explanation
else :
invalid_templates . append ( template_expr )
if valid_templates :
print ( f " ✓ 成功生成 { len ( valid_templates ) } 个有效模板(含占位符) " , flush = True )
if invalid_templates :
print ( f " ⚠ 丢弃 { len ( invalid_templates ) } 个无效模板(无占位符) " , flush = True )
return valid_templates
else :
print ( f " ✗ 所有模板均无占位符,需要重试 " , flush = True )
if invalid_templates :
print ( f " 无效模板示例: { invalid_templates [ 0 ] [ : 80 ] } ... " , flush = True )
if attempt < max_retries :
print ( f " ↻ 等待重试... " , flush = True )
await asyncio . sleep ( 1 ) # 短暂延迟避免请求过快
except Exception as e :
print ( f " ✗ 调用 LLM 时发生错误: { e } " , flush = True )
# 529 是 MiniMax 的特色,继续重试
if " overloaded " in str ( e ) or " 529 " in str ( e ) :
print ( f " ⚠ MiniMax 529 错误,继续重试... " , flush = True )
if attempt < max_retries :
print ( f " ↻ 等待 2 秒后重试... " , flush = True )
await asyncio . sleep ( 2 )
# 20次都失败了
print ( f " \n ⚠⚠⚠ 警告: 经过 { max_retries } 次重试,仍未能生成包含占位符的模板! " , flush = True )
print ( f " 可能原因: LLM 未遵循指令,或模型不支持此格式。 " , flush = True )
print ( f " 建议: 检查 LLM 模型是否正确,或手动修改 prompt。 " , flush = True )
return { }
async def propose_alpha_templates ( alpha_details : dict , template_summary : str , llm_client : openai . AsyncOpenAI , user_data_type : str = " MATRIX " , max_retries : int = 20 ) - > dict :
"""
Uses an LLM to propose new alpha templates based on a seed alpha ' s details.
包装函数 , 支持重试机制
"""
return await propose_alpha_templates_with_retry ( alpha_details , template_summary , llm_client , user_data_type , max_retries )
async def propose_datafield_keywords ( template_expression : str , template_explanation : str , placeholder : str , llm_client : openai . AsyncOpenAI , user_category : Optional [ Union [ str , list ] ] = None ) - > list [ str ] :
async def propose_datafield_keywords ( template_expression : str , template_explanation : str , placeholder : str , llm_client : openai . AsyncOpenAI , user_category : Optional [ Union [ str , list ] ] = None ) - > list [ str ] :
"""
"""
@ -1304,7 +1380,7 @@ def get_datafield_prefix(datafield_name: str) -> str:
async def generate_new_alphas ( alpha_description , brain_session , template_summary : Optional [ str ] = None , top_n_datafield : int = 50 , user_region : Optional [ str ] = None , user_universe : Optional [ str ] = None , user_delay : Optional [ int ] = None , user_category : Optional [ Union [ str , list ] ] = None , user_data_type : str = " MATRIX " ) :
async def generate_new_alphas ( alpha_description , brain_session , template_summary : Optional [ str ] = None , top_n_datafield : int = 50 , user_region : Optional [ str ] = None , user_universe : Optional [ str ] = None , user_delay : Optional [ int ] = None , user_category : Optional [ Union [ str , list ] ] = None , user_data_type : str = " MATRIX " , max_retries : int = 20 ) :
"""
"""
Main function to generate new alpha templates based on a seed alpha .
Main function to generate new alpha templates based on a seed alpha .
@ -1347,7 +1423,7 @@ async def generate_new_alphas(alpha_description, brain_session, template_summary
print ( f " \n { ' = ' * 60 } " , flush = True )
print ( f " \n { ' = ' * 60 } " , flush = True )
print ( " [Step 2/5] 正在生成 Alpha 模板提议... " , flush = True )
print ( " [Step 2/5] 正在生成 Alpha 模板提议... " , flush = True )
print ( f " { ' = ' * 60 } " , flush = True )
print ( f " { ' = ' * 60 } " , flush = True )
proposed_templates = await propose_alpha_templates ( details , template_summary , llm_client , user_data_type = user_data_type )
proposed_templates = await propose_alpha_templates ( details , template_summary , llm_client , user_data_type = user_data_type , max_retries = max_retries )
if not proposed_templates :
if not proposed_templates :
print ( " Failed to generate proposed alpha templates. (生成提议模板失败) " , flush = True )
print ( " Failed to generate proposed alpha templates. (生成提议模板失败) " , flush = True )
@ -1595,12 +1671,28 @@ async def main():
# 设置全局变量
# 设置全局变量
global LLM_model_name , LLM_API_KEY , llm_base_url , username , password
global LLM_model_name , LLM_API_KEY , llm_base_url , username , password
print ( " \n [Config] 正在设置全局变量... " , flush = True )
required_config_fields = [ ' LLM_model_name ' , ' LLM_API_KEY ' , ' llm_base_url ' , ' username ' , ' password ' , ' alpha_id ' ]
missing_fields = [ f for f in required_config_fields if f not in config ]
if missing_fields :
print ( f " ❌ [Config] 配置缺少必填字段: { missing_fields } " , flush = True )
print ( f " ❌ [Config] 当前配置内容: { list ( config . keys ( ) ) } " , flush = True )
sys . exit ( 1 )
LLM_model_name = config [ ' LLM_model_name ' ]
LLM_model_name = config [ ' LLM_model_name ' ]
LLM_API_KEY = config [ ' LLM_API_KEY ' ]
LLM_API_KEY = config [ ' LLM_API_KEY ' ]
llm_base_url = config [ ' llm_base_url ' ]
llm_base_url = config [ ' llm_base_url ' ]
username = config [ ' username ' ]
username = config [ ' username ' ]
password = config [ ' password ' ]
password = config [ ' password ' ]
print ( f " ✓ [Config] LLM_model_name: { LLM_model_name } " , flush = True )
print ( f " ✓ [Config] llm_base_url: { llm_base_url } " , flush = True )
print ( f " ✓ [Config] username: { username } " , flush = True )
print ( f " ✓ [Config] alpha_id: { config [ ' alpha_id ' ] } " , flush = True )
# --- Step 1: 加载模板总结 ---
# --- Step 1: 加载模板总结 ---
template_summary = load_template_summary ( config . get ( ' template_summary_path ' ) )
template_summary = load_template_summary ( config . get ( ' template_summary_path ' ) )
@ -1646,7 +1738,8 @@ async def main():
user_universe = user_datafield_config . get ( ' user_universe ' ) ,
user_universe = user_datafield_config . get ( ' user_universe ' ) ,
user_delay = user_datafield_config . get ( ' user_delay ' ) ,
user_delay = user_datafield_config . get ( ' user_delay ' ) ,
user_category = user_datafield_config . get ( ' user_category ' ) ,
user_category = user_datafield_config . get ( ' user_category ' ) ,
user_data_type = user_datafield_config . get ( ' user_data_type ' , ' MATRIX ' )
user_data_type = user_datafield_config . get ( ' user_data_type ' , ' MATRIX ' ) ,
max_retries = config . get ( ' max_retries ' , 20 )
)
)
def interactive_datafield_selection ( s : SingleSession ) - > dict :
def interactive_datafield_selection ( s : SingleSession ) - > dict :