++

2 weeks ago · 14d2300564
parent 10953219f5
commit 14d2300564
11 changed files with 306 additions and 172 deletions
--- a/simple72/Tranformer/Transformer.py
+++ b/simple72/Tranformer/Transformer.py
@ -2,6 +2,7 @@ import requests
 import json
 import sys
 import asyncio
 import os
 import openai
 import re
 from typing import Optional, Union # Added this import
@ -584,18 +585,26 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
    async def call_llm_new(prompt: str) -> dict:
        # 声明使用全局变量
        global LLM_model_name, LLM_API_KEY, llm_base_url
        print(f"\n[call_llm_new] 准备调用 LLM...", flush=True)
        print(f"[call_llm_new] 模型: {LLM_model_name}", flush=True)
        print(f"[call_llm_new] llm_base_url: {llm_base_url}", flush=True)
        try:
            print(f"[call_llm_new] 正在获取 LLM token...", flush=True)
            llm_api_key = get_token_from_auth_server()
            llm_base_url_value = llm_base_url  # 使用全局变量
            print(f"[call_llm_new] 创建 LLM 客户端...", flush=True)
            llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key)
-            print("LLM Gateway Authentication successful. (LLM网关认证成功)", flush=True)
+            print("[call_llm_new] LLM Gateway Authentication successful. (LLM网关认证成功)", flush=True)
        except Exception as e:
-            print(f"LLM Gateway Authentication failed (LLM网关认证失败): {e}", flush=True)
+            print(f"[call_llm_new] ❌ LLM Gateway Authentication failed (LLM网关认证失败): {e}", flush=True)
            sys.exit(1)
-        print("--- Calling LLM to propose templates... (正在调用LLM生成模板...) ---", flush=True)
+        print("[call_llm_new] --- Calling LLM... (正在调用LLM...) ---", flush=True)
        print(f"[call_llm_new] Prompt 长度: {len(prompt)} 字符", flush=True)
        try:
            # Await the async create call
            print(f"[call_llm_new] 正在发送请求到 LLM...", flush=True)
            response = await llm_client.chat.completions.create(
                model=LLM_model_name,
                messages=[
@ -604,6 +613,7 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
                ],
                # response_format={"type": "json_object"},
            )
            print(f"[call_llm_new] 收到 LLM 响应", flush=True)
            # The async client may return a nested structure. Try to extract content robustly.
            content = None
@ -637,18 +647,26 @@ async def generate_alpha_description(alpha_id: str, brain_session: SingleSession
                    # Return wrapped string if not JSON
                    return {"text": content}
            print(f"[call_llm_new] ✓ 成功返回结果", flush=True)
            return {}
        except Exception as e:
-            print(f"Error calling LLM (调用LLM出错): {e}", flush=True)
+            print(f"[call_llm_new] ❌ Error calling LLM (调用LLM出错): {e}", flush=True)
            import traceback
            print(f"[call_llm_new] 错误详情: {traceback.format_exc()}", flush=True)
            return {}
    print(f"\n[Alpha Description] 开始获取 Alpha {alpha_id} 的详情...", flush=True)
    try:
        brain_api_url = "https://api.worldquantbrain.com"
        alpha_url = f"{brain_api_url}/alphas/{alpha_id}"
        print(f"[Alpha Description] 请求 URL: {alpha_url}", flush=True)
        response = brain_session.get(alpha_url)
        print(f"[Alpha Description] 响应状态码: {response.status_code}", flush=True)
        response.raise_for_status()  # Raise an exception for HTTP errors
        alpha_data = response.json()
        print(f"[Alpha Description] 成功获取 Alpha 数据", flush=True)
        settings = alpha_data.get('settings', {})
        expression_dict = alpha_data.get('regular', alpha_data.get('combo', None))
@ -831,10 +849,14 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
        print("LLM client not initialized. Please check authentication. (LLM客户端未初始化，请检查认证)", flush=True)
        return {}
-    print("--- Calling LLM... (正在调用LLM...) ---", flush=True)
+    print("\n[LLM Call] 准备调用 LLM API...", flush=True)
    print(f"[LLM Call] 模型: {LLM_model_name}", flush=True)
    print(f"[LLM Call] Prompt 长度: {len(prompt)} 字符", flush=True)
    print("[LLM Call] 正在发送请求...", flush=True)
    for attempt in range(max_retries):
        try:
            print(f"[LLM Call] 第 {attempt + 1} 次尝试...", flush=True)
            response = await llm_client.chat.completions.create(
                model=LLM_model_name,  # Or your preferred model
                messages=[
@ -843,15 +865,22 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
                ],
                # response_format={"type": "json_object"},
            )
            print(f"[LLM Call] 收到响应，状态: OK", flush=True)
            content = response.choices[0].message.content
            print(f"[LLM Call] 响应内容长度: {len(content)} 字符", flush=True)
            # Try to clean markdown code blocks if present
            if "```json" in content:
                content = content.split("```json")[1].split("```")[0].strip()
                print(f"[LLM Call] 清理了 JSON markdown 标记", flush=True)
            elif "```" in content:
                content = content.split("```")[1].split("```")[0].strip()
-                
+                print(f"[LLM Call] 清理了 markdown 标记", flush=True)
-            return json.loads(content)
+            
            print(f"[LLM Call] 解析 JSON...", flush=True)
            result = json.loads(content)
            print(f"[LLM Call] JSON 解析成功，返回 {len(result)} 个结果", flush=True)
            return result
        except json.JSONDecodeError as e:
            print(f"⚠ JSON Decode Error (Attempt {attempt + 1}/{max_retries}): {e}", flush=True)
            if attempt == max_retries - 1:
@ -860,30 +889,28 @@ async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int
            print(f"⚠ LLM Call Error (Attempt {attempt + 1}/{max_retries}): {e}", flush=True)
            if attempt == max_retries - 1:
                print(f"❌ Failed to call LLM after {max_retries} attempts.", flush=True)
                raise Exception(f"LLM 调用失败: {e}")
-        # Wait before retrying
+        # Wait before retrying (2 seconds for MiniMax 529)
        print(f"⏳ 等待 2 秒后重试...", flush=True)
        await asyncio.sleep(2)
    return {}
-async def propose_alpha_templates(alpha_details: dict, template_summary: str, llm_client: openai.AsyncOpenAI, user_data_type: str = "MATRIX") -> dict:
+def has_valid_placeholders(template_str: str) -> bool:
-    """
+    """检查模板字符串是否包含有效的占位符"""
-    Uses an LLM to propose new alpha templates based on a seed alpha's details.
+    import re
    placeholders = re.findall(r'(<[A-Za-z0-9_]+/>)', template_str)
    return len(placeholders) > 0
    Args:
        alpha_details (dict): The details of the seed alpha.
        template_summary (str): A summary of alpha templates to guide the LLM.
        llm_client (openai.AsyncOpenAI): The authenticated OpenAI-compatible client.
        user_data_type (str): The data type for the alpha (MATRIX or VECTOR).
-    Returns:
+async def propose_alpha_templates_with_retry(alpha_details: dict, template_summary: str, llm_client: openai.AsyncOpenAI, user_data_type: str = "MATRIX", max_retries: int = 20) -> dict:
-        dict: A dictionary of proposed alpha templates in JSON format.
+    """
    使用重试机制生成 Alpha 模板，确保包含占位符
    """
    if not alpha_details.get('expression'):
        print("Error: Alpha expression is missing. (错误：缺少Alpha表达式)", flush=True)
        return {}
    else:
        print(f"current seed alpha detail (当前种子Alpha详情): {alpha_details.get('expression')}", flush=True)
    data_type_instruction = ""
    if user_data_type == "MATRIX":
@ -904,38 +931,87 @@ You will be provided with the seed alpha's expression and a summary of successfu
 **Your Task:**
 Based on the structure and potential economic rationale of the seed alpha, by the aid of the Alpha template summary, propose 3-5 new, diverse alpha templates.
-**Rules:**
+**CRITICAL RULES (必须遵守):**
 1.  The proposed templates must be valid BRAIN alpha expressions.
-2.  Use placeholders like `<data_field/>` for data fields and `<operator/>` for operators that can be programmatically replaced later.
+2.  **MANDATORY: You MUST use placeholders like `<data_field/>` for data fields and `<operator/>` for operators. DO NOT use actual data field names like `avg_pct_change_estimate_12m_earnings_7d` directly in the template. Placeholders are REQUIRED and will be replaced programmatically later.**
-3.  For each proposed template, provide a brief, clear explanation of its investment rationale.
+3.  Valid placeholder formats: `<data_field/>`, `<operator/>`, `<ts_operator/>`, `<group_operator/>`, `<integer_parameter/>`, `<float_parameter/>`
-4.  Return the output as a single, valid JSON object where keys are the proposed template strings and values are their corresponding explanations. Do not include any other text or formatting outside of the JSON object.
+4.  For each proposed template, provide a brief, clear explanation of its investment rationale.
-5.  The proposed new alpha template should be related to the economic sense of seed Alpha {alpha_details} but in different format such as. Utilize the inspiration well.
+5.  Return the output as a single, valid JSON object where keys are the proposed template strings and values are their corresponding explanations. Do not include any other text or formatting outside of the JSON object.
 6.  The proposed new alpha template should be related to the economic sense of seed Alpha but in different format. Utilize the inspiration well.
 {data_type_instruction}
-**Example Output Format:**
+**Example Output Format (占位符格式示例):**
 {{
-  "<group_operators/>(<ts_operators/>(<data_field/>, 60), industry)": "A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups.",
+  "<group_operator/>(<ts_operator/>(<data_field/>, 60), industry)": "A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups.",
-  "<logical_operator/><ts_operators/>(<data_field/>, 20)": "A simple short-term momentum operator applied to a data field."
+  "<operator/>(<ts_operator/>(<data_field/>, 20), <float_parameter/>)": "A simple short-term momentum operator applied to a data field with a parameter."
 }}
-Now, generate the JSON object with your proposed templates.
+**WARNING: If you do not use placeholders like `<data_field/>`, the template will be rejected and you will need to regenerate. Placeholders are ESSENTIAL for the template system to work.**
 Now, generate the JSON object with your proposed templates. Remember: USE PLACEHOLDERS like `<data_field/>`, NOT actual field names!
 """
-    try:
+    print(f"\n[Step 1/5] 正在调用 LLM 生成 Alpha 模板...", flush=True)
-        print(f"\n[Step 1/5] 正在调用 LLM 生成 Alpha 模板...", flush=True)
+    print(f"  - 模型: {LLM_model_name}", flush=True)
-        print(f"  - 模型: {LLM_model_name}", flush=True)
+    print(f"  - 数据类型: {user_data_type}", flush=True)
-        print(f"  - 数据类型: {user_data_type}", flush=True)
+    print(f"  - 最大重试次数: {max_retries}", flush=True)
-        alpha_expr = alpha_details.get('expression', {})
+    alpha_expr = alpha_details.get('expression', {})
-        if isinstance(alpha_expr, dict):
+    if isinstance(alpha_expr, dict):
-            alpha_expr = alpha_expr.get('code', 'N/A')
+        alpha_expr = alpha_expr.get('code', 'N/A')
-        print(f"  - 种子 Alpha: {str(alpha_expr)[:50]}...", flush=True)
+    print(f"  - 种子 Alpha: {str(alpha_expr)[:50]}...", flush=True)
-        # print(f"现在的template summary是{template_summary}")
+    
-        proposed_templates = await call_llm(prompt, llm_client)
+    # 重试机制
-        print(f"✓ LLM 返回 {len(proposed_templates)} 个模板提议", flush=True)
+    for attempt in range(1, max_retries + 1):
-        return proposed_templates
+        try:
-    except Exception as e:
+            print(f"\n  [尝试 {attempt}/{max_retries}] 调用 LLM...", flush=True)
-        print(f"An error occurred while calling the LLM (调用LLM时发生错误): {e}", flush=True)
+            proposed_templates = await call_llm(prompt, llm_client)
-        return {}
+            
            # 验证是否包含占位符
            valid_templates = {}
            invalid_templates = []
            for template_expr, explanation in proposed_templates.items():
                if has_valid_placeholders(template_expr):
                    valid_templates[template_expr] = explanation
                else:
                    invalid_templates.append(template_expr)
            if valid_templates:
                print(f"  ✓ 成功生成 {len(valid_templates)} 个有效模板（含占位符）", flush=True)
                if invalid_templates:
                    print(f"  ⚠ 丢弃 {len(invalid_templates)} 个无效模板（无占位符）", flush=True)
                return valid_templates
            else:
                print(f"  ✗ 所有模板均无占位符，需要重试", flush=True)
                if invalid_templates:
                    print(f"    无效模板示例: {invalid_templates[0][:80]}...", flush=True)
                if attempt < max_retries:
                    print(f"  ↻ 等待重试...", flush=True)
                    await asyncio.sleep(1)  # 短暂延迟避免请求过快
        except Exception as e:
            print(f"  ✗ 调用 LLM 时发生错误: {e}", flush=True)
            # 529 是 MiniMax 的特色，继续重试
            if "overloaded" in str(e) or "529" in str(e):
                print(f"  ⚠ MiniMax 529 错误，继续重试...", flush=True)
            if attempt < max_retries:
                print(f"  ↻ 等待 2 秒后重试...", flush=True)
                await asyncio.sleep(2)
    # 20次都失败了
    print(f"\n⚠⚠⚠ 警告: 经过 {max_retries} 次重试，仍未能生成包含占位符的模板！", flush=True)
    print(f"  可能原因: LLM 未遵循指令，或模型不支持此格式。", flush=True)
    print(f"  建议: 检查 LLM 模型是否正确，或手动修改 prompt。", flush=True)
    return {}
 async def propose_alpha_templates(alpha_details: dict, template_summary: str, llm_client: openai.AsyncOpenAI, user_data_type: str = "MATRIX", max_retries: int = 20) -> dict:
    """
    Uses an LLM to propose new alpha templates based on a seed alpha's details.
    包装函数，支持重试机制
    """
    return await propose_alpha_templates_with_retry(alpha_details, template_summary, llm_client, user_data_type, max_retries)
 async def propose_datafield_keywords(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, user_category: Optional[Union[str, list]] = None) -> list[str]:
    """
@ -1304,7 +1380,7 @@ def get_datafield_prefix(datafield_name: str) -> str:
-async def generate_new_alphas(alpha_description, brain_session, template_summary: Optional[str] = None, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX"):
+async def generate_new_alphas(alpha_description, brain_session, template_summary: Optional[str] = None, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX", max_retries: int = 20):
    """
    Main function to generate new alpha templates based on a seed alpha.
@ -1347,7 +1423,7 @@ async def generate_new_alphas(alpha_description, brain_session, template_summary
    print(f"\n{'='*60}", flush=True)
    print("[Step 2/5] 正在生成 Alpha 模板提议...", flush=True)
    print(f"{'='*60}", flush=True)
-    proposed_templates = await propose_alpha_templates(details, template_summary, llm_client, user_data_type=user_data_type)
+    proposed_templates = await propose_alpha_templates(details, template_summary, llm_client, user_data_type=user_data_type, max_retries=max_retries)
    if not proposed_templates:
        print("Failed to generate proposed alpha templates. (生成提议模板失败)", flush=True)
@ -1595,12 +1671,28 @@ async def main():
    # 设置全局变量
    global LLM_model_name, LLM_API_KEY, llm_base_url, username, password
    print("\n[Config] 正在设置全局变量...", flush=True)
    required_config_fields = ['LLM_model_name', 'LLM_API_KEY', 'llm_base_url', 'username', 'password', 'alpha_id']
    missing_fields = [f for f in required_config_fields if f not in config]
    if missing_fields:
        print(f"❌ [Config] 配置缺少必填字段: {missing_fields}", flush=True)
        print(f"❌ [Config] 当前配置内容: {list(config.keys())}", flush=True)
        sys.exit(1)
    LLM_model_name = config['LLM_model_name']
    LLM_API_KEY = config['LLM_API_KEY']
    llm_base_url = config['llm_base_url']
    username = config['username']
    password = config['password']
    print(f"✓ [Config] LLM_model_name: {LLM_model_name}", flush=True)
    print(f"✓ [Config] llm_base_url: {llm_base_url}", flush=True)
    print(f"✓ [Config] username: {username}", flush=True)
    print(f"✓ [Config] alpha_id: {config['alpha_id']}", flush=True)
    # --- Step 1: 加载模板总结 ---
    template_summary = load_template_summary(config.get('template_summary_path'))
@ -1646,7 +1738,8 @@ async def main():
        user_universe=user_datafield_config.get('user_universe'),
        user_delay=user_datafield_config.get('user_delay'),
        user_category=user_datafield_config.get('user_category'),
-        user_data_type=user_datafield_config.get('user_data_type', 'MATRIX')
+        user_data_type=user_datafield_config.get('user_data_type', 'MATRIX'),
        max_retries=config.get('max_retries', 20)
    )
 def interactive_datafield_selection(s: SingleSession) -> dict:
--- a/simple72/Tranformer/output/Alpha_candidates.json
+++ b/simple72/Tranformer/output/Alpha_candidates.json
@ -1,112 +0,0 @@
 {
    "ts_zscore(divide(avg_pct_change_estimate_12m_earnings_7d, add(count_analysts_lower_curr_qtr_earnings_30d, 0.0001)), 126)": {
        "template_explanation": "This template applies a 126-day rolling z-score normalization to the original earnings confidence ratio. By standardizing the signal relative to its own historical distribution, it captures whether current earnings optimism (vs. near-term pessimism) is unusually strong or weak compared to historical norms, enabling mean-reversion or momentum trading around historical equilibrium points.",
        "seed_alpha_settings": {
            "instrumentType": "EQUITY",
            "region": "IND",
            "universe": "TOP500",
            "delay": 1,
            "decay": 6,
            "neutralization": "SLOW_AND_FAST",
            "truncation": 0.02,
            "pasteurization": "ON",
            "unitHandling": "VERIFY",
            "nanHandling": "ON",
            "maxTrade": "OFF",
            "maxPosition": "OFF",
            "language": "FASTEXPR",
            "visualization": false,
            "startDate": "2014-01-01",
            "endDate": "2023-12-31"
        },
        "placeholder_candidates": {}
    },
    "group_zscore(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), industry)": {
        "template_explanation": "This template calculates the 66-day mean of the 12-month earnings estimate change and then performs industry-relative z-score normalization. It extracts pure earnings momentum by removing sector-wide trends, identifying stocks within each industry that have stronger or weaker earnings revisions than their peer group average.",
        "seed_alpha_settings": {
            "instrumentType": "EQUITY",
            "region": "IND",
            "universe": "TOP500",
            "delay": 1,
            "decay": 6,
            "neutralization": "SLOW_AND_FAST",
            "truncation": 0.02,
            "pasteurization": "ON",
            "unitHandling": "VERIFY",
            "nanHandling": "ON",
            "maxTrade": "OFF",
            "maxPosition": "OFF",
            "language": "FASTEXPR",
            "visualization": false,
            "startDate": "2014-01-01",
            "endDate": "2023-12-31"
        },
        "placeholder_candidates": {}
    },
    "ts_decay_linear(avg_pct_change_estimate_12m_earnings_7d, 20)": {
        "template_explanation": "This template applies exponential decay weighting to the 12-month earnings estimate changes over a 20-day window. Recent earnings revisions receive higher weight than older ones, creating a smoothed momentum signal that responds quickly to new information while filtering out short-term noise\u2014a refined version focusing purely on the numerator's forward-looking signal.",
        "seed_alpha_settings": {
            "instrumentType": "EQUITY",
            "region": "IND",
            "universe": "TOP500",
            "delay": 1,
            "decay": 6,
            "neutralization": "SLOW_AND_FAST",
            "truncation": 0.02,
            "pasteurization": "ON",
            "unitHandling": "VERIFY",
            "nanHandling": "ON",
            "maxTrade": "OFF",
            "maxPosition": "OFF",
            "language": "FASTEXPR",
            "visualization": false,
            "startDate": "2014-01-01",
            "endDate": "2023-12-31"
        },
        "placeholder_candidates": {}
    },
    "regression_neut(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), log(cap))": {
        "template_explanation": "This template removes the market cap factor exposure from the earnings momentum signal using regression neutralization. By stripping out size bias (larger companies may have more analyst coverage and different revision patterns), this alpha isolates the pure earnings-specific component, reducing unintended factor tilts.",
        "seed_alpha_settings": {
            "instrumentType": "EQUITY",
            "region": "IND",
            "universe": "TOP500",
            "delay": 1,
            "decay": 6,
            "neutralization": "SLOW_AND_FAST",
            "truncation": 0.02,
            "pasteurization": "ON",
            "unitHandling": "VERIFY",
            "nanHandling": "ON",
            "maxTrade": "OFF",
            "maxPosition": "OFF",
            "language": "FASTEXPR",
            "visualization": false,
            "startDate": "2014-01-01",
            "endDate": "2023-12-31"
        },
        "placeholder_candidates": {}
    },
    "divide(ts_rank(avg_pct_change_estimate_12m_earnings_7d, 252), add(ts_rank(count_analysts_lower_curr_qtr_earnings_30d, 126), 0.1))": {
        "template_explanation": "This template converts both earnings estimate change and analyst cut counts into percentile ranks before taking their ratio. The 252-day rank for earnings captures long-term earnings momentum, while the 126-day rank for analyst cuts captures recent bearishness. Ranking before division creates a more robust, distribution-invariant signal that is comparable across different market regimes.",
        "seed_alpha_settings": {
            "instrumentType": "EQUITY",
            "region": "IND",
            "universe": "TOP500",
            "delay": 1,
            "decay": 6,
            "neutralization": "SLOW_AND_FAST",
            "truncation": 0.02,
            "pasteurization": "ON",
            "unitHandling": "VERIFY",
            "nanHandling": "ON",
            "maxTrade": "OFF",
            "maxPosition": "OFF",
            "language": "FASTEXPR",
            "visualization": false,
            "startDate": "2014-01-01",
            "endDate": "2023-12-31"
        },
        "placeholder_candidates": {}
    }
 }
--- a/simple72/Tranformer/output/Alpha_generated_expressions_error.json
+++ b/simple72/Tranformer/output/Alpha_generated_expressions_error.json
@ -1 +0,0 @@
 []
--- a/simple72/Tranformer/output/Alpha_generated_expressions_success.json
+++ b/simple72/Tranformer/output/Alpha_generated_expressions_success.json
@ -1,7 +0,0 @@
 [
  "divide(ts_rank(avg_pct_change_estimate_12m_earnings_7d, 252), add(ts_rank(count_analysts_lower_curr_qtr_earnings_30d, 126), 0.1))",
  "regression_neut(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), log(cap))",
  "ts_decay_linear(avg_pct_change_estimate_12m_earnings_7d, 20)",
  "ts_zscore(divide(avg_pct_change_estimate_12m_earnings_7d, add(count_analysts_lower_curr_qtr_earnings_30d, 0.0001)), 126)",
  "group_zscore(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), industry)"
 ]
--- a/simple72/Tranformer/output/Alpha_candidates_示例.json
+++ b/simple72/Tranformer/output/Alpha_candidates_示例.json
--- a/simple72/Tranformer/test_config.json
+++ b/simple72/Tranformer/test_config.json
@ -0,0 +1,11 @@
 {
  "LLM_model_name": "MiniMax-M2.7",
  "LLM_API_KEY": "sk-cp-l_as8mjqPhsOIny9IFKZ8jzA92z1c0eRwchldhEf4KzQjs9cjVknV2o7VNCcvYUXsXFq7uF4aSgp2RxxmUHLXwPGKgIvzedM70_XUIXiBB3gu_UmLDQLfh4",
  "llm_base_url": "https://api.minimaxi.com/v1",
  "username": "jack0210_@hotmail.com",
  "password": "!QAZ2wsx+0913",
  "alpha_id": "rKrlVO8o",
  "top_n_datafield": 30,
  "user_data_type": "MATRIX",
  "max_retries": 2
 }
--- a/simple72/main.py
+++ b/simple72/main.py
@ -169,9 +169,10 @@ async def generate_alpha(request: Request):
            "user_universe": data.get('user_universe'),
            "user_delay": int(data.get('user_delay')) if data.get('user_delay') else None,
            "user_category": data.get('user_category'),
-            "user_data_type": data.get('user_data_type', 'MATRIX')
+            "user_data_type": data.get('user_data_type', 'MATRIX'),
            "max_retries": int(data.get('max_retries', 20))
        }
-        print(f"配置已构建: LLM_model={config['LLM_model_name']}, alpha_id={config['alpha_id']}")
+        print(f"配置已构建: LLM_model={config['LLM_model_name']}, alpha_id={config['alpha_id']}, max_retries={config['max_retries']}")
        # 将配置写入临时 JSON 文件，供 Transformer 脚本读取
        config_path = os.path.join(transformer_dir, f'config_{task_id}.json')
@ -191,6 +192,18 @@ async def generate_alpha(request: Request):
                env={**os.environ, "PYTHONIOENCODING": "utf-8"}
            )
            print(f"Transformer 脚本执行完成，返回码: {process.returncode}")
            # 打印 Transformer 的输出到终端
            if process.stdout:
                print(f"\n{'='*50}")
                print("Transformer 输出:")
                print(f"{'='*50}")
                print(process.stdout)
            if process.stderr:
                print(f"\n{'='*50}")
                print("Transformer 错误:")
                print(f"{'='*50}")
                print(process.stderr)
            # 定义输出文件路径
            output_file = os.path.join(transformer_dir, 'output', 'Alpha_generated_expressions_success.json')
@ -356,6 +369,70 @@ async def health_check():
    return {"status": "healthy", "service": "alpha-transformer"}
@app.post("/api/test-llm")
 async def test_llm_connection(request: Request):
    """
    测试 LLM 连接
    接收 LLM 配置，尝试连接并返回测试结果
    """
    try:
        data = await request.json()
        api_key = data.get('llm_api_key')
        base_url = data.get('llm_base_url')
        model = data.get('llm_model')
        if not api_key or not base_url or not model:
            return JSONResponse(
                status_code=400,
                content={"success": False, "error": "Missing required LLM configuration"}
            )
        print(f"测试 LLM 连接: {base_url}, 模型: {model}")
        # 导入 openai
        import openai
        # 创建客户端
        client = openai.AsyncOpenAI(
            api_key=api_key,
            base_url=base_url
        )
        # 尝试发送一个简单的请求
        response = await client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": "Hello, this is a connection test. Reply with 'OK' only."}
            ],
            max_tokens=10,
            timeout=30
        )
        # 检查响应
        if response and response.choices and len(response.choices) > 0:
            content = response.choices[0].message.content
            print(f"✓ LLM 连接测试成功: {content[:50]}...")
            return JSONResponse(content={
                "success": True,
                "message": "LLM 连接成功",
                "response": content[:100]
            })
        else:
            return JSONResponse(
                status_code=500,
                content={"success": False, "error": "Empty response from LLM"}
            )
    except Exception as e:
        error_msg = str(e)
        print(f"✗ LLM 连接测试失败: {error_msg}")
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": error_msg}
        )
@app.get("/api/download/{alpha_id}")
 async def download_results(alpha_id: str):
    """
--- a/simple72/requirements.txt
+++ b/simple72/requirements.txt
@ -3,5 +3,6 @@ uvicorn>=0.20.0
 requests>=2.28.0
 openai>=1.0.0
 pandas>=2.0.0
 pyarrow>=10.0.0
 pydantic>=2.0.0
 jinja2>=3.0.0
--- a/simple72/templates/app.js
+++ b/simple72/templates/app.js
@ -45,6 +45,7 @@ const form = document.getElementById('transformerForm');
 const submitBtn = document.getElementById('submitBtn');
 const downloadBtn = document.getElementById('downloadBtn');
 const loginAndFetchBtn = document.getElementById('loginAndFetchBtn');
 const testLLMBtn = document.getElementById('testLLMBtn');
 const regionSelect = document.getElementById('region');
 const delaySelect = document.getElementById('delay');
 const universeSelect = document.getElementById('universe');
@ -105,6 +106,48 @@ loginAndFetchBtn.addEventListener('click', async () => {
    }
 });
 testLLMBtn.addEventListener('click', async () => {
    const apiKey = document.getElementById('llmApiKey').value.trim();
    const baseUrl = document.getElementById('llmBaseUrl').value.trim();
    const model = document.getElementById('llmModel').value.trim();
    if (!apiKey || !baseUrl || !model) {
        alert('请先填写完整的 LLM 配置');
        return;
    }
    testLLMBtn.disabled = true;
    testLLMBtn.textContent = '测试中...';
    testLLMBtn.classList.remove('btn-success', 'btn-error');
    try {
        const response = await fetch('/api/test-llm', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({
                llm_api_key: apiKey,
                llm_base_url: baseUrl,
                llm_model: model
            })
        });
        const result = await response.json();
        if (result.success) {
            testLLMBtn.textContent = '连接成功';
            testLLMBtn.classList.add('btn-success');
        } else {
            testLLMBtn.textContent = '连接失败';
            testLLMBtn.classList.add('btn-error');
        }
    } catch (error) {
        testLLMBtn.textContent = '连接失败';
        testLLMBtn.classList.add('btn-error');
    } finally {
        testLLMBtn.disabled = false;
    }
 });
 function populateRegionSelect() {
    while (regionSelect.options.length > 1) {
        regionSelect.remove(1);
@ -222,6 +265,7 @@ form.addEventListener('submit', async (e) => {
        brain_username: document.getElementById('brainUsername').value.trim(),
        brain_password: document.getElementById('brainPassword').value.trim(),
        top_n_datafield: parseInt(document.getElementById('topNDatafield').value) || 50,
        max_retries: parseInt(document.getElementById('maxRetries').value) || 20,
        data_type: document.getElementById('dataType').value || 'MATRIX'
    };
@ -267,22 +311,22 @@ form.addEventListener('submit', async (e) => {
            const successCount = result.expressions_success ? result.expressions_success.length : 0;
            const candidateCount = result.candidates ? result.candidates.length : 0;
            const errorCount = result.expressions_error ? result.expressions_error.length : 0;
            alert('生成完成！成功: ' + successCount + ' 个, 候选: ' + candidateCount + ' 个, 错误: ' + errorCount + ' 个');
            // 显示下载按钮
            downloadBtn.style.display = 'block';
            downloadBtn.textContent = '下载结果 (' + successCount + '成功, ' + candidateCount + '候选)';
            downloadBtn.onclick = function() {
                const alphaId = document.getElementById('alphaId').value.trim();
                window.location.href = '/api/download/' + alphaId;
            };
        } else {
            alert('生成失败: ' + (result.error || '未知错误'));
            downloadBtn.style.display = 'none';
            downloadBtn.textContent = '下载结果 (ZIP)';
        }
    } catch (error) {
        alert('请求失败: ' + error.message);
        downloadBtn.style.display = 'none';
        downloadBtn.textContent = '下载结果 (ZIP)';
    } finally {
        submitBtn.disabled = false;
        submitBtn.textContent = '生成变种';
--- a/simple72/templates/index.html
+++ b/simple72/templates/index.html
@ -80,6 +80,10 @@
                               value="kimi-k2.5"
                               placeholder="例如: kimi-k2.5, gpt-4">
                    </div>
                    <button type="button" class="btn" id="testLLMBtn" style="margin-top: 10px;">
                        测试 LLM 连接
                    </button>
                </div>
                <div class="form-section">
@ -91,6 +95,12 @@
                               value="50" min="1" max="100">
                    </div>
                    <div class="form-group">
                        <label for="maxRetries">LLM 重试次数 (模板生成失败时重试)</label>
                        <input type="number" id="maxRetries" name="max_retries" 
                               value="20" min="1" max="100">
                    </div>
                    <div class="form-group">
                        <label for="dataType">数据类型 (Data Type)</label>
                        <select id="dataType" name="data_type">
--- a/simple72/templates/styles.css
+++ b/simple72/templates/styles.css
@ -147,6 +147,24 @@ body {
    transform: translateY(0);
 }
 .btn-success {
    background: var(--monokai-green) !important;
    color: var(--monokai-bg) !important;
 }
 .btn-success:hover {
    background: #b8e068 !important;
 }
 .btn-error {
    background: var(--monokai-red) !important;
    color: var(--monokai-fg) !important;
 }
 .btn-error:hover {
    background: #ff5a8a !important;
 }
 .btn:disabled {
    background: var(--monokai-comment);
    color: var(--monokai-dark);