alpha_tools/backup_code/wqb_app/Tranformer/Transformer.py

import requests
import json
import sys
import asyncio
import openai
import re
from typing import Optional, Union # Added this import
try:
    from .validator_hooks import is_valid_template_expr, has_empty_datafield_candidates
except Exception:
    # Fallback for direct script execution
    try:
        from validator_hooks import is_valid_template_expr, has_empty_datafield_candidates
    except Exception:
        is_valid_template_expr = None
        has_empty_datafield_candidates = None

# --- Validation wrappers to integrate into the pipeline ---
def _filter_valid_templates(
    proposed_templates: dict,
    operators_meta,
    brain_session,
    settings: dict,
    parse_alpha_code_func,
):
    """Return dict of only templates that pass validation.

    Safe no-op if validation helpers are unavailable.
    """
    if not is_valid_template_expr or not parse_alpha_code_func:
        return proposed_templates
    filtered = {}
    for template_expr, template_expl in proposed_templates.items():
        try:
            if is_valid_template_expr(
                template_expr,
                operators_meta,
                brain_session,
                settings,
                parse_alpha_code_func,
            ):
                filtered[template_expr] = template_expl
        except Exception:
            # Be conservative: drop on exceptions
            continue
    return filtered


def _should_skip_due_to_empty_candidates(populated_info: dict) -> bool:
    """True if any data_field placeholder has zero candidates.

    Safe no-op fallback when helper is missing.
    """
    if not has_empty_datafield_candidates:
        return False
    try:
        return has_empty_datafield_candidates(populated_info)
    except Exception:
        return False
import logging
import pandas as pd
import os
from pathlib import Path
from urllib.parse import urljoin
import time
import threading
import itertools
import getpass
import io
import validator as val
from ace_lib import get_instrument_type_region_delay
# Force stdout/stderr to use utf-8 on Windows to avoid UnicodeEncodeError
if sys.platform.startswith('win'):
    try:
        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
    except Exception:
        pass

# 这些变量将在交互式输入中设置
LLM_model_name = None
LLM_API_KEY = None
llm_base_url = None
username = None
password = None
DATA_CATEGORIES = None


template_summary = """# BRAIN论坛Alpha模板精华总结

本文档旨在系统性地整理和总结优秀Alpha模板,它是一种可复用的标准化框架性表达式，它承载着特定的经济逻辑，并预留出若干 “配置项”（包括数据字段、算子、分组方式、衰减规则、中性化方案等），用于生成多个候选阿尔法因子。其典型流程为：数据清洗（数据回填、缩尾处理）→ 跨时间或跨标的维度进行转换 / 对比 → 排序 / 中性化处理 →（可选步骤）衰减调整 / 换手率优化。这种模板模式能够推动系统化的因子挖掘、复用与多元化配置，同时确保每一个因子都具备清晰可追溯的经济逻辑支撑。
以下每个模板都附有其核心思想、变量说明、适用场景及原帖链接，方便您理解、应用和进一步探索。
使用时请思考如何将下列模板与有的Alpha表达式结合，创造出新的模板来捕捉和发现市场规律，找到”好“公司和”坏“公司
**使用前请注意：**
*   **过拟合风险**：部分模板可能存在过拟合风险，请谨慎使用，并结合IS-Ladder测试、多市场回测等方法进行验证。
*   **参数调整**：模板中的参数（如时间窗口、数据集字段）需要根据您的具体研究目标和数据特性进行调整。
*   **持续学习**：最好的模板是您自己创造的。希望本文档能激发您的灵感，而不是限制您的思维。

---

## From: Alpha Examples from Learn101

### Momentum after news
**Hypothesis**: After news is released, if a stock takes a longer time to rise, it may show strong evidence of upward momentum, and it could be beneficial to take a long position in it.
**Expression**: `ts_backfill(vec_avg(nws12_prez_4l),504)`
**Settings**: Region: USA, Universe: TOP500, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **时序相对性 (Step 4)**: 这是一个典型的时序信号。`ts_backfill` 的使用暗示了新闻数据是稀疏的（Step 4.2.4），需要填补空白以维持信号连续性。
*   **算子深意**: `vec_avg` 用于聚合多维新闻向量，提取核心情绪/强度；`ts_backfill` 确保在无新闻日也能维持上一次的观点，直到新消息到来。
**优化方向**:
*   **去噪 (Step 0)**: 新闻情绪可能存在极端噪音，建议在 `vec_avg` 后增加 `winsorize` 或 `rank`。
*   **从属信号 (Subordinate)**: 叠加 `Social Media Effect`。若新闻情绪好但社媒热度低（噪音少），则放大权重；若社媒过热，可能反转。
*   **门限交易 (Step 5)**: 仅在新闻情绪显著偏离均值时交易，如 `trade_when(abs(zscore(news)) > 1.5, ...)`。

### Pretax Income
**Hypothesis**: Pretax income is a good measure of a company's financial health and profitability.
**Expression**: `quantile(ts_rank(pretax_income,250))`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 4, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON
**逻辑链深度解析**:
*   **时序相对性 (Step 4)**: `ts_rank(..., 250)` 比较当前收入与过去一年的水平，寻找“自身改善”而非“绝对高收入”。
*   **分布重塑 (Step 0)**: `quantile` 强制将信号拉伸为均匀分布，避免了极值影响，只关注相对排序。
**优化方向**:
*   **区间优化 (Step 2)**: 收入微弱变化可能只是噪音。可改用 `ts_zscore` 并只在 >1 或 <-1 时交易。
*   **从属信号**: 引入 `market_cap`。大市值的收入创新高可能比小市值更稳健（质量溢价）。

### Operating Earnings Yield
**Hypothesis**: If the operating income of a company is currently higher than its past 1 year history, buy the company's stock and vice-versa.
**Expression**: `ts_rank(operating_income,252)`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SUBINDUSTRY, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **时序相对性 (Step 4)**: 纯粹的时序动量逻辑。`ts_rank` 将当前值映射到历史分位，捕捉“业绩改善”趋势。
**优化方向**:
*   **组内比较 (Step 3)**: 考虑行业周期性。先做 `group_zscore(operating_income, industry)` 再做 `ts_rank`，剔除行业景气度影响，只看个股相对行业的改善。
*   **门限 (Step 5)**: `trade_when(ts_rank > 0.8, ...)` 只做多业绩显著改善的股票。

### Appreciation of liabilities
**Hypothesis**: An increase in the fair value of liabilities could indicate a higher cost than expected.
**Expression**: `-ts_rank(fn_liab_fair_val_l1_a,252)`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SUBINDUSTRY, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **反向信号**: 负号 `-` 表示这是一个反向指标（负债增加是坏事）。
*   **时序相对性**: 同样基于 `ts_rank`，关注负债相对于自身历史的增长速度。
**优化方向**:
*   **去噪**: 负债数据可能存在跳变，建议先 `winsorize`。
*   **从属信号**: 结合 `cash_flow`。若负债增加但现金流同时也大幅增加（良性杠杆），则不应做空。

### Deferred Revenue
**Hypothesis**: Firms with high deferred revenue will surprise the market in the future when the deferred revenue is recognized.
**Expression**: `ts_backfill(fnd6_drc, 252)/assets`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SECTOR, Truncation: 1, Pasteurization: ON
**逻辑链深度解析**:
*   **截面比较 (Step 3)**: 除以 `assets` 是为了标准化（Size Adjustment），使其在截面上可比。
*   **数据填补 (Step 0)**: `ts_backfill` 处理财报数据的低频更新特性。
**优化方向**:
*   **行业中性 (Step 3)**: 递延收入在软件/服务业常见，在制造业少见。必须做 `group_zscore(..., sector)` 或 `neutralize`，否则只是在做多特定行业。
*   **时序变化 (Step 4)**: 关注递延收入的 *增长率* `ts_delta`，而不仅仅是绝对值。

### Reducing debt
**Hypothesis**: Take a long position in companies whose debt has decreased compared to the past.
**Expression**: `-ts_quantile(debt, 126)`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON
**逻辑链深度解析**:
*   **时序相对性**: `ts_quantile` 与 `ts_rank` 类似，捕捉债务下降趋势。
**优化方向**:
*   **从属信号**: 结合 `interest_coverage` (利息保障倍数)。只有在偿债能力弱的公司中，债务减少才最重要（困境反转逻辑）。

### Power of leverage
**Hypothesis**: Companies with high liability-to-asset ratios often leverage debt as a strategic tool.
**Expression**: `liabilities/assets`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON
**逻辑链深度解析**:
*   **截面比较 (Step 3)**: 这是一个经典的截面因子（杠杆率）。
**优化方向**:
*   **非线性 (Step 1)**: 杠杆通常是倒U型关系（适度杠杆好，过高杠杆坏）。考虑使用 `bucket` 分段，或 `trade_when` 剔除极端高杠杆。
*   **行业中性**: 银行/地产杠杆天生高，必须行业中性化。

## From: Alpha Examples from Learn102

### Social Media Effect
**Hypothesis**: Poorly performing stocks are discussed more in general on social media platforms.
**Expression**: `-scl12_buzz`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.01, Pasteurization: ON
**逻辑链深度解析**:
*   **反向指标**: 负号暗示“关注度高=坏事”（可能是负面新闻缠身）。
*   **原始信号**: 直接使用 `buzz`，假设线性关系。
**优化方向**:
*   **去噪 (Step 0)**: 社媒数据极值多，必须 `log` 或 `winsorize`。
*   **从属信号**: 结合 `sentiment`。若关注度高且情感为正，可能是好事；关注度高且情感负，才是做空机会。
*   **门限**: `trade_when(rank(buzz) > 0.9, ...)` 只在极度热门时做空。

### Valuation Disconnect Swing Short
**Hypothesis**: A stock with high momentum and value score correlation suggests a disconnect between the stock's price and its intrinsic value.
**Expression**: `-ts_corr(ts_backfill(fscore_momentum,66),ts_backfill(fscore_value,66),756)`
**Settings**: Region: USA, Universe: TOP200, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **高阶统计量**: 使用 `ts_corr` 捕捉两个因子之间的动态关系，而非因子本身。
*   **逻辑**: 动量与价值相关性高，意味着价格脱离基本面（泡沫），因此做空（负号）。
**优化方向**:
*   **窗口调整**: 756天（3年）非常长，捕捉的是长期结构变化。可尝试短窗口（如126天）捕捉短期背离。

### Network Dependence
**Hypothesis**: Long stocks of companies whose hub score of customers are low over the past two years.
**Expression**: `-ts_mean(pv13_ustomergraphrank_hub_rank,504)`
**Settings**: Region: USA, Universe: TOP1000, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **供应链逻辑**: 客户集中度/中心度过高可能意味着风险（依赖大客户）。
*   **平滑 (Step 4)**: `ts_mean(..., 504)` 说明这是一个非常慢的变量，关注长期结构。
**优化方向**:
*   **从属信号**: 结合 `volatility`。高依赖度+高波动 = 极度危险。

## From: Alpha Examples from Learn103

### News-driven Volatility
**Hypothesis**: Stocks of companies that face high differences in their prices after any news release can be subject to varying sentiments.
**Expression**: `(ts_arg_max(ts_backfill(news_session_range, 20), 60))`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SECTOR, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **事件驱动 (Step 4.2.3)**: `ts_arg_max` 寻找过去60天内波动最大的那一天（新闻日）。
*   **算子深意**: 这不是直接用波动率，而是用“最大波动发生的时间距离”作为信号。
**优化方向**:
*   **衰减逻辑**: 结合 `days_from_last_change` 或 `exp_decay`，让信号随时间减弱。
*   **从属信号**: 叠加 `IV Skew`。若波动大且 Skew 偏空，做空；若 Skew 偏多，做多。

### Implied Volatility Spread as a predictor
**Hypothesis**: If the Call Open interest is higher than the Put Open interest, the stock may rise based on the intensity of the implied volatility spread.
**Expression**: `trade_when(pcr_oi_270 < 1, (implied_volatility_call_270-implied_volatility_put_270), -1)`
**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 4, Neutralization: MARKET, Truncation: 0.08, Pasteurization: ON
**逻辑链深度解析**:
*   **门限交易 (Step 5)**: `trade_when(pcr_oi < 1, ...)` 是典型的门禁逻辑。只有在看涨持仓量大于看跌时（情绪偏多），才使用 IV Spread 信号。
*   **条件分支**: 不满足条件时给 `-1`（做空），这是一个激进的二元策略。
**优化方向**:
*   **平滑**: IV 数据跳动大，建议对 Spread 做 `ts_mean` 或 `ts_decay_linear`。

## 《151 Trading Strategies》论文精华模板

本部分总结自Zura Kakushadze与Juan Andrés Serur合著的《151 Trading Strategies》一文，重点提炼其中适用于BRAIN平台的股票类策略，并将其泛化为可复用的Alpha模板。

---

### 1. 风险调整后动量模板 (Risk-Adjusted Momentum)

*   **模板表达式**: `ts_mean(ts_delay(returns, <skip_period>), <lookback_period>) / ts_std_dev(ts_delay(returns, <skip_period>), <lookback_period>)`
*   **核心思想**: 这是对经典动量因子的改进。它计算的是过去一段时间（lookback_period）的"时序夏普比率"，即收益均值除以收益波动。同时，`ts_delay`跳过了最近一段时间（skip_period，通常为21天/1个月）的数据，以规避短期反转效应的干扰。该因子旨在寻找那些"高质量"的、持续且平稳的动量。
*   **变量说明**:
    *   `<skip_period>`: 跳过的近期交易日数，如 `21`。
    *   `<lookback_period>`: 计算动量的回看窗口，如 `252`。
*   **适用场景**: 通用性强，适用于构建稳健的动量类Alpha。
*   **逻辑链深度解析**:
    *   **时序标准化 (Step 4)**: 分子是收益均值，分母是波动率。本质是 Rolling Sharpe Ratio。
    *   **去噪 (Step 0)**: `ts_delay` 跳过最近一个月，剔除了短期反转（Short-term Reversal）噪音，只保留中长期动量。
*   **优化方向**:
    *   **从属信号**: 叠加 `turnover`。在低换手率时，动量更可靠（量价配合）。
    *   **残差化**: 先对 returns 做 `regression_neut` 剔除大盘影响，计算纯特异性动量。
*   **适配自**: Section 3.1, "Price-momentum", `Rrisk.adj`

### 2. 标准化盈利超预期模板 (SUE - Standardized Unexpected Earnings)

*   **模板表达式**: `(fnd_eps_q - ts_delay(fnd_eps_q, 4)) / ts_std_dev(fnd_eps_q - ts_delay(fnd_eps_q, 4), 8)`
*   **核心思想**: 捕捉超预期的盈利增长。它计算的是最新一季的EPS相较于去年同期的增量，并用该增量自身过去8个季度的波动性进行标准化。标准化后的值（SUE）越高，代表盈利惊喜越大，是经典的盈利动量因子。
*   **变量说明**:
    *   `fnd_eps_q`: 季度每股收益（EPS）字段。
*   **适用场景**: `Fundamental`（基本面）数据集，用于事件驱动型Alpha。
*   **逻辑链深度解析**:
    *   **季节性调整**: `ts_delay(..., 4)` 比较同比季度，消除季节性影响。
    *   **波动率标准化 (Step 0)**: 除以过去8季度的波动，将“惊喜”转化为标准差单位（Z-Score），使其在不同波动率的公司间可比。
*   **优化方向**:
    *   **事件衰减 (Step 4)**: 叠加 `days_from_last_change`，让 SUE 信号随财报发布时间衰减。
    *   **从属信号**: 叠加 `Analyst Revision`。若 SUE 高且分析师上调预期，信号更强。
*   **适配自**: Section 3.2, "Earnings-momentum", SUE


### 4. 隐含波动率偏斜动量模板 (Implied Volatility Skew Momentum)

*   **模板表达式**: `ts_delta(implied_volatility_call_<window>, <period>) - ts_delta(implied_volatility_put_<window>, <period>)`
*   **核心思想**: 捕捉市场情绪的变化。看涨期权IV的上升通常与乐观情绪相关，而看跌期权IV的上升则与悲观或避险情绪相关。该模板计算Call IV的变化量与Put IV变化量之差，旨在做多情绪改善、做空情绪恶化的股票。
*   **变量说明**:
    *   `implied_volatility_call_<window>`: 不同期限的看涨期权隐含波动率。
    *   `implied_volatility_put_<window>`: 不同期限的看跌期权隐含波动率。
    *   `<period>`: 计算IV变化的时间窗口，如 `21` (月度变化)。
*   **适用场景**: `Option`（期权）数据集，用于捕捉短中期市场情绪变化。
*   **逻辑链深度解析**:
    *   **时序变化 (Step 4)**: 关注的是 IV 的 *变化* (`ts_delta`) 而非绝对值。
    *   **情绪差**: Call IV 涨幅 > Put IV 涨幅 -> 情绪改善。
*   **优化方向**:
    *   **门限**: `trade_when(abs(skew_delta) > threshold, ...)` 只在情绪剧烈变化时交易。
    *   **事件驱动**: 在财报前（IV 高企时）该策略可能失效，需用 `days_to_earnings` 过滤。
*   **适配自**: Section 3.5, "Implied volatility"

### 5. 残差动量模板 (Residual Momentum)

*   **模板表达式**: `ts_mean(regression_neut(regression_neut(regression_neut(returns, <factor_1/>), <factor_2/>), <factor_3/>), <window/>)`
*   **核心思想**: 提纯动量信号。传统动量可能包含了市场Beta、市值、价值等多种因子的敞口。此模板通过连续的中性化（例如依次对`<factor_1/>`, `<factor_2/>`, `<factor_3/>`执行`regression_neut`）剥离可被通用因子解释的部分，然后仅对无法被解释的"残差等价物"部分计算动量。
*   **变量说明**:
    *   `<factor_1/>`, `<factor_2/>`, `<factor_3/>`: 市场通用因子，如 `mkt_beta`, `size_factor`, `value_factor`。
    *   `<window/>`: 计算残差动量的时间窗口。
*   **适用场景**: 通用性强，是因子提纯、构建高质量Alpha的关键步骤。
*   **逻辑链深度解析**:
    *   **提纯 (Step 0)**: 通过连续 `regression_neut` 剥离 Beta、Size、Value 等风格暴露。
    *   **时序动量**: 对剥离后的残差求 `ts_mean`。
*   **优化方向**:
    *   **加权**: 使用 `ts_decay_linear` 代替 `ts_mean`，给予近期残差更大权重。
    *   **组内比较**: 在残差基础上再做 `group_rank`，寻找行业内最强特异动量。
*   **适配自**: Section 3.7, "Residual momentum"

### 6. 风险加权回归均值回归模板 (Weighted Regression Mean-Reversion)

*   **模板表达式**: `reverse(regression_neut(multiply(returns, power(inverse(ts_std_dev(returns, <window/>)), 2)), <group_matrix/>))`
*   **核心思想**: 这是对标准行业中性化均值回归的增强。在对收益率进行行业中性化时，它为不同股票赋予了不同的权重。具体来说，它给历史波动率较低的股票更高的权重，认为这些股票的收益率数据更"可靠"，在计算行业均值时应占更大比重。
*   **变量说明**:
    *   `<group_matrix>`: 行业或分组的哑变量矩阵。
    *   `weights`: 回归权重，通常是可靠性的度量，如 `1/variance`。
    *   `<window>`: 计算波动率的时间窗口。
*   **适用场景**: 适用于任何需要进行组内中性化或回归剥离的场景，尤其是当组内成员的信号质量或波动性差异较大时。
*   **逻辑链深度解析**:
    *   **加权最小二乘 (WLS)**: 使用 `1/variance` 作为权重，认为低波动的股票信息更可靠。
    *   **均值回归**: `reverse` 捕捉残差的反转。
*   **优化方向**:
    *   **从属信号**: 引入 `liquidity` 权重。流动性好的股票回归更快。
*   **适配自**: Section 3.10, "Mean-reversion – weighted regression"

### 7. 移动平均线交叉模板 (Moving Average Crossover)

*   **模板表达式**: `sign(ts_mean(<price/>, <short_window>) - ts_mean(<price/>, <long_window>))`
*   **核心思想**: 经典的趋势跟踪策略。当短期均线上穿长期均线（"金叉"）时，表明短期趋势走强，产生买入信号。当短期均线下穿长期均线（"死叉"）时，表明趋势走弱，产生卖出信号。
*   **变量说明**:
    *   `<price/>`: `close`, `vwap` 等价格字段。
    *   `<short_window>`: 短期均线窗口，如 `10`, `20`。
    *   `<long_window>`: 长期均线窗口，如 `50`, `100`。
*   **适用场景**: 适用于趋势性较强的市场或资产。
*   **逻辑链深度解析**:
    *   **低通滤波**: MA 本质是滤除高频噪音。
    *   **二元信号**: `sign` 输出 +1/-1，不包含强度信息。
*   **优化方向**:
    *   **连续化 (Step 1)**: 去掉 `sign`，直接使用差值并标准化 (`zscore`)，保留强度信息。
    *   **从属信号**: 结合 `ADX` (趋势强度指标)。只有在趋势强时才使用 MA 交叉。
*   **适配自**: Section 3.12, "Two moving averages"


### 9. 渠道突破模板 (Channel Breakout)

*   **模板表达式**: `alpha = if_else(greater(close, ts_max(high, <window/>)), 1, if_else(less(close, ts_min(low, <window/>)), -1, 0)); reverse(alpha)`
*   **核心思想**: 这是一个经典的反转策略。它定义了一个由过去N日最高价和最低价构成的价格渠道（Channel）。当价格向上突破渠道上轨时，认为市场过热，产生卖出信号（-1）；当价格向下突破渠道下轨时，认为市场超卖，产生买入信号（+1）。
*   **变量说明**:
    *   `<window>`: 定义渠道的时间窗口，如 `20`。
*   **适用场景**: 适用于有均值回归特性的市场或个股。
*   **逻辑链深度解析**:
    *   **区间突破 (Step 2)**: 典型的“只在尾部交易”逻辑。中间区间为 0。
    *   **反转逻辑**: `reverse` 赌突破是假突破（False Breakout）。
*   **优化方向**:
    *   **顺势/逆势切换**: 结合 `volatility`。低波时做反转（假突破），高波时做顺势（真突破）。
*   **适配自**: Section 3.15, "Channel"


### 11. 价值因子基础模板 (Value Factor)

*   **模板表达式**: `group_rank(<book_value/> / <market_cap/>)`
*   **核心思想**: 经典的价值投资策略。它旨在买入账面价值相对于市场价值被低估的"价值股"，并卖出被高估的"成长股"。最核心的衡量指标是账面市值比（Book-to-Price / Book-to-Market Ratio）。
*   **变量说明**:
    *   `<book_value/>`: 公司账面价值或每股净资产字段。
    *   `<market_cap/>`: 公司市值或收盘价字段。
*   **适用场景**: `Fundamental` (基本面) 数据集，作为构建多因子模型的基础因子之一。
*   **逻辑链深度解析**:
    *   **组内比较 (Step 3)**: 价值因子在不同行业间不可比（如科技 vs 银行），必须用 `group_rank`。
*   **优化方向**:
    *   **去噪**: 先 `winsorize` 再 `group_rank`。
    *   **从属信号**: 叠加 `Quality` (ROE)。避免买入“价值陷阱”（便宜但烂的公司）。
*   **适配自**: Section 3.3, "Value"


### 13. 配对交易均值回归框架 (Pairs Trading)

*   **模板表达式**: `signal_A = (close_A - close_B) - ts_mean(close_A - close_B, <window>); reverse(signal_A)`
*   **核心思想**: 寻找历史上高度相关的两只股票（一个"配对"），当它们的价差（spread）偏离历史均值时进行套利。如果价差过大，则做空价高的股票、做多价低的股票，赌价差会回归。这是一个经典的统计套利和均值回归策略。
*   **变量说明**:
    *   `close_A`, `close_B`: 配对股票A和B的价格序列。
    *   `<window>`: 计算历史价差均值的时间窗口。
*   **适用场景**: 适用于同一行业内业务高度相似的公司，是构建市场中性策略的基础。
*   **逻辑链深度解析**:
    *   **协整关系**: 构造平稳序列 `Spread`。
    *   **均值回归**: 赌 Spread 回归均值。
*   **优化方向**:
    *   **动态阈值**: 使用 `ts_std_dev(Spread)` 设定动态开仓线（如 2倍标准差）。
    *   **止损**: 增加 `trade_when(abs(Spread) > 4*std, 0, ...)` 防止协整破裂。
*   **适配自**: Section 3.8, "Pairs trading"

---

## 补充模板

### A. Analyst交叉分组打底（模板名：示例）
*   **核心结构**: `financial_data = ts_backfill(<vec_func/>(<analyst_metric/>), 60); gp = group_cartesian_product(country, industry); <ts_operator/>(<group_operator/>(financial_data, gp), <window/>)`
*   **思想**: 先对分析师字段做向量聚合（`vec_avg`、`vec_kurtosis`、`vec_ir`等），用`group_cartesian_product`构建国家×行业组合，再做组内标准化/中性化+时序处理，形成稳定的截面信号。
*   **变量要点**: `analyst_metric`覆盖`mdl26_*`、`star_arm_*`等Analyst/SmartEstimate场景；`vec_func`选择聚合方式；`group_operator`用于行业/国家组内的scale或neutralize；`ts_operator`用于时间平滑（`ts_mean`、`ts_zscore`等）；`window`在20/60/90/200之间取值。
*   **适用场景**: 适合Analyst情感、预期修正类主题，想要跨国+行业分组的稳健截面信号。
*   **逻辑链深度解析**:
    *   **数据填补 (Step 0)**: 分析师数据稀疏，必须 `ts_backfill`。
    *   **精细分组 (Step 3)**: `group_cartesian_product` 实现了“国家x行业”的精细化中性化，适合全球策略。
*   **优化方向**:
    *   **算子选择**: `vec_ir` (信息比率) 比 `vec_avg` 更能体现分析师的一致性。

### B. 双重中性化（模板名：双重中性化:以Analyst15为例）
*   **核心结构**: 与上类似，先`ts_backfill(vec_func(Analyst15字段), 60)`，再按国家×行业分组，做组内中性化与时序处理。
*   **思想**: 针对`anl15_*`增长/估值/分红等字段，在截面层面做两次中性化（向量聚合后+组内处理），用于剥离共性行业/国家暴露。
*   **变量要点**: 数据集中`anl15_*`覆盖多期增长率、PE、估值、分红等；`vec_func`与`ts_operator`选择决定信号平滑度；窗口建议60–200以保证填补稳定。
*   **适用场景**: Analyst15预期修正、估值再定价类信号，需要同时消化国家+行业噪音的场景。
*   **逻辑链深度解析**:
    *   **多重剥离**: 彻底消除风格暴露，追求纯 Alpha。
*   **优化方向**:
    *   **顺序**: 先做行业中性，再做国家中性，通常更符合基本面逻辑。

### C. 组间比较（模板名：组间比较_GLB_topdiv）
*   **核心结构**: 先在`country × <group1/>`分组内对回填后的向量聚合结果做`ts_zscore`和`group_zscore`，再计算组均值/极值（`group_min/median/max/sum/count`），用`resid = <compare/>(alpha, alpha_gpm)`求组间残差，最后再做组内+时序处理。
*   **思想**: 对同一层级（如行业/子行业/交易所）之间的相对强弱做剥离，得到“相对组均值”的残差信号，适合跨组对比的Alpha挖掘。
*   **变量要点**: `analyst_field`来源于`fnd8_*`基本面/现金流字段；`vec_op`可选`vec_max/avg/min`；`compare`可用`regression_neut`或`signed_power`提取残差；`t_window`取20/60/200/600，控制平滑与稳定性。
*   **适用场景**: GLB区域的分红/现金流因子（topdiv）在国家+行业框架下的相对价值比较，关注跨组差异的策略。
*   **逻辑链深度解析**:
    *   **相对价值**: 关注的是“我在我的组里是否优秀”，而不是“我绝对值多少”。
*   **优化方向**:
    *   **非线性**: 使用 `rank` 代替原始值计算残差，对异常值更鲁棒。

### D. 组间比较（Analyst15版，模板名：组间比较_glb_topdiv_anl15）
*   **核心结构**: 与上一模板相同，但`analyst_field`替换为`anl15_*`系列的增长/估值/分红字段。
*   **思想**: 通过对Analyst15增长与估值预期的组间残差建模，捕捉行业/国家层面的相对高低估与预期修正。
*   **变量要点**: `group1`可选industry/subindustry/sector/exchange；`compare`与`group_stats`同上；`ts_op`和`group_op`用于残差后再标准化和时序平滑。
*   **适用场景**: 全球范围GLB，基于Analyst15预期数据的组间相对价值或动量信号。
*   **逻辑链深度解析**:
    *   **预期差**: 寻找行业内被分析师低估/高估的股票。
*   **优化方向**:
    *   **时序叠加**: 结合 `ts_delta`，寻找“行业内预期提升最快”的股票。

### E. 顾问分析示例（模板名：顾问分析示例）
*   **核心结构**: `financial_data = ts_backfill(<mixdata/>, 90); gp = industry; <ts_operator/>(<group_operator/>(financial_data, gp), <window/>)`
*   **思想**: 直接对`anl69_*`多字段做90日回填，行业组内标准化后再做时序平滑，生成简洁的行业中性信号。
*   **变量要点**: `mixdata`覆盖`anl69_*`的EPS/EBIT/现金分红/目标价/报告日期等；`ts_operator`可用`ts_zscore`、`ts_scale`、`ts_rank`等；`window`提供60/120/220/600可调节频率。
*   **适用场景**: Analyst69数据驱动的行业内预期跟踪、财报节奏/指引变化监控。
*   **逻辑链深度解析**:
    *   **标准流程**: 填补 -> 截面标准化 -> 时序平滑。这是构建稳健因子的标准三板斧。
*   **优化方向**:
    *   **事件驱动**: 在财报日前后缩短 `ts_mean` 的窗口，提高灵敏度。

---

## 新增模板（CAPM與估值、分析師期限、期權、搜尋優化）

### 1. CAPM殘差模板（市場/行業中性收益）
*   **表達式**: `ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0)`。
*   **核心思想**: 回歸剔除市場/行業暴露，保留超額收益殘差作為Alpha。
*   **適用場景**: 通用起手式，回歸殘差可作後續動量或價值信號的底板。
*   **優化**: 改`rettype=2`獲取beta斜率，用於風險排序或低/高beta組合；可加入`winsorize`、`ts_backfill`預處理。

### 2. CAPM廣義殘差（任意特徵）
*   **表達式**: `data = winsorize(ts_backfill(<data>,63), std=4); gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, gpm, 252, rettype=0)`。
*   **核心思想**: 將任意特徵去除組均值成分，提取行業相對的特異性部分。
*   **適用場景**: 基本面、情緒、替代數據的組內殘差提純。
*   **優化**: 先`group_zscore`再回歸；對`resid`再做`ts_zscore`或`ts_mean`平滑。

### 3. CAPM Beta排序模板
*   **表達式**: `target_data = winsorize(ts_backfill(<target>,63), std=4); market_data = winsorize(ts_backfill(<market>,63), std=4); beta = ts_regression(target_data, group_mean(market_data, log(ts_mean(cap,21)), sector), 252, rettype=2)`。
*   **核心思想**: 提取行業內相對beta，作為風險/防禦排序；低beta偏防禦，高beta偏進攻。
*   **優化**: 行業或國家分組；可按beta分桶做長低/短高，或反向用於高波段套利。

### 4. 實際-預估差異模板（Analyst Surprise）
*   **表達式**: `group_zscore(subtract(group_zscore(<act>, industry), group_zscore(<est>, industry)), industry)`。
*   **核心思想**: 行業內標準化後的實際值與預估值差，捕捉超預期或低於預期的驚喜。
*   **適用場景**: analyst7/analyst14/earnings估值類字段。
*   **優化**: 對差分再做`ts_zscore`；門檻交易只在|z|>1.5時開倉。

### 5. 分析師期限結構模板（近遠期預估斜率）
*   **表達式**: `group_zscore(subtract(group_zscore(anl14_mean_eps_<p1>, industry), group_zscore(anl14_mean_eps_<p2>, industry)), industry)`，`<p1>/<p2>`為fp1/fp2/fy1/fy2等。
*   **核心思想**: 比較短期與長期預估的行業內斜率，捕捉預期加速或鈍化。
*   **適用場景**: analyst14/15 期別字段；適用成長/拐點挖掘。
*   **優化**: 擴展到多期間差分或`ts_delta`跟蹤斜率變化；對斜率做`rank`或`winsorize`。

### 6. 期權Greeks淨值模板
*   **表達式**: `group_operator(<put_greek> - <call_greek>, <group>)`，Greek可選Delta/Gamma/Vega/Theta。
*   **核心思想**: 同組內看多vs看空的期權敏感度差，反映隱含情緒或凸性差異。
*   **適用場景**: Option數據集；行業或市值分組下的情緒/波動信號。
*   **優化**: 多Greek加權組合；對淨值再`ts_mean`平滑；事件期(財報)可降權或過濾。

### 7. IV Skew動量擴展
*   **表達式**: `ts_delta(implied_volatility_call_<w>, <p>) - ts_delta(implied_volatility_put_<w>, <p>)`。
*   **核心思想**: Call與Put隱含波動變化差捕捉情緒轉折；可做多情緒改善、做空情緒惡化。
*   **優化**: 加`trade_when(abs(skew)>thr)`門檻；財報前後縮窗；行業中性。

### 8. 殘差動量精簡版
*   **表達式**: `res = regression_neut(returns, <common_factor_matrix>); ts_mean(res, <window>)`。
*   **核心思想**: 先剝離市場/風格暴露，再對特異收益做動量；較原版多重回歸更輕量。
*   **優化**: 使用`ts_decay_linear`增加近期權重；行業內`group_rank`提升截面穩定度。

### 9. 分紅/現金流組間殘差（簡版）
*   **表達式**: `alpha = ts_zscore(ts_backfill(<cf_or_div_field>,90)); g = group_mean(alpha, <group>, <weight_opt>); resid = alpha - g; group_zscore(resid, <group>)`。
*   **核心思想**: 先回填平滑，再对組均值做殘差，捕捉組內相對高/低分紅或現金流質量。
*   **適用場景**: fnd8/fnd6/topdiv等分紅現金流字段；行業/國家分組。
*   **優化**: 權重可用log(cap)或vol逆；對resid再做`ts_mean`平滑。

---

## 模板格式说明

每个模板使用以下占位符格式：
- `<ts_op/>` - 时间序列操作符，如 `ts_rank`, `ts_mean`, `ts_delta`, `ts_ir`, `ts_stddev`, `ts_zscore`
- `<group_op/>` - 分组操作符，如 `group_rank`, `group_neutralize`, `group_zscore`
- `<vec_op/>` - 向量操作符，如 `vec_avg`, `vec_sum`, `vec_max`, `vec_min`, `vec_stddev`
- `<field/>` - 数据字段占位符
- `<d/>` - 时间窗口参数，常用值: `{5, 22, 66, 126, 252, 504}`
- `<group/>` - 分组字段，如 `industry`, `sector`, `subindustry`, `market`

---

## 第一部分：基础结构模板 (TPL-001 ~ TPL-010)

### TPL-001: 基本面时序排名
```
模板: <group_op/>(<ts_op/>(<field/>, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore`, `ts_delta`, `ts_ir` | 时序比较操作 |
| `<group_op/>` | `group_rank`, `group_zscore`, `group_neutralize` | 截面比较操作 |
| `<field/>` | 基本面字段: `eps`, `sales`, `assets`, `roe`, `roa` | 公司财务数据 |
| `<d/>` | `66`, `126`, `252` | 季度/半年/年 |
| `<group/>` | `industry`, `sector` | 行业分组 |

**示例**:
```
group_rank(ts_rank(eps, 252), industry)
group_zscore(ts_ir(sales, 126), sector)
```

---

### TPL-002: 利润/规模比率模板
```
模板: <ts_op/>(<profit_field/>/<size_field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore`, `ts_mean`, `ts_delta` | 时序操作 |
| `<profit_field/>` | `net_income`, `ebitda`, `operating_income`, `gross_profit` | 利润类字段 |
| `<size_field/>` | `assets`, `cap`, `sales`, `equity` | 规模类字段 |
| `<d/>` | `66`, `126`, `252` | 中长期窗口 |

**示例**:
```
ts_rank(net_income/assets, 252)
ts_zscore(ebitda/cap, 126)
ts_rank(operating_income/cap, 252)^2
```

---

### TPL-003: 向量数据处理模板 (VECTOR字段必用)
```
模板: <ts_op/>(<vec_op/>(<vector_field/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_mean`, `ts_delta`, `ts_ir`, `ts_zscore` | 时序操作 |
| `<vec_op/>` | `vec_avg`, `vec_sum`, `vec_max`, `vec_min`, `vec_stddev` | 向量聚合 |
| `<vector_field/>` | 分析师数据: `anl4_*`, `analyst_*`, `oth41_*` | VECTOR类型字段 |
| `<d/>` | `22`, `66`, `126` | 短中期窗口 |

**示例**:
```
ts_delta(vec_avg(anl4_eps_mean), 22)
ts_rank(vec_sum(analyst_estimate), 66)
ts_ir(vec_avg(oth41_s_west_eps_ftm_chg_3m), 126)
```

---

### TPL-004: 双重中性化模板
```
模板:
a = <ts_op/>(<field/>, <d/>);
a1 = group_neutralize(a, bucket(rank(cap), range="<range/>"));
group_neutralize(a1, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_zscore`, `ts_rank`, `ts_ir` | 时序操作 |
| `<field/>` | 任意数据字段 | 主信号 |
| `<d/>` | `66`, `126`, `252` | 时间窗口 |
| `<range/>` | `"0.1,1,0.1"`, `"0,1,0.1"` | 市值分组范围 |
| `<group/>` | `industry`, `sector`, `subindustry` | 行业分组 |

**示例**:
```
a = ts_zscore(fnd72_s_pit_or_is_q_spe_si, 252);
a1 = group_neutralize(a, bucket(rank(cap), range="0.1,1,0.1"));
group_neutralize(a1, subindustry)
```

---

### TPL-005: 回归中性化模板
```
模板:
a = <ts_op/>(<field/>, <d/>);
a1 = group_neutralize(a, bucket(rank(cap), range="<range/>"));
a2 = group_neutralize(a1, <group/>);
b = ts_zscore(cap, <d/>);
b1 = group_neutralize(b, <group/>);
regression_neut(a2, b1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_zscore`, `ts_rank` | 时序操作 |
| `<field/>` | 基本面或其他字段 | 主信号 |
| `<d/>` | `252`, `504` | 长期窗口 |
| `<range/>` | `"0.1,1,0.1"` | 市值分组 |
| `<group/>` | `subindustry`, `sector` | 行业分组 |

---

### TPL-006: 基本面动量模板
```
模板: log(ts_mean(<field/>, <d_short/>)) - log(ts_mean(<field/>, <d_long/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | `anl4_{data}_{stats}`, 基本面字段 | 数据字段 |
| `<d_short/>` | `20`, `44` | 短期窗口 |
| `<d_long/>` | `44`, `126` | 长期窗口 |

**示例**:
```
log(ts_mean(anl4_eps_mean, 44)) - log(ts_mean(anl4_eps_mean, 20))
```

---

### TPL-007: 财报事件驱动模板
```
模板:
event = ts_delta(<fundamental_field/>, -1);
if_else(event != 0, <alpha/>, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<fundamental_field/>` | `assets`, `sales`, `eps` | 基本面字段 |
| `<alpha/>` | 主信号表达式 | 事件发生时的Alpha |

**扩展版**:
```
change = if_else(days_from_last_change(<field/>) == <days/>, ts_delta(close, <d/>), nan)
```

---

### TPL-008: 标准化回填模板
```
模板: <ts_op/>(winsorize(ts_backfill(<field/>, <d_backfill/>), std=<std/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_decay_linear`, `ts_zscore` | 时序操作 |
| `<field/>` | 低频数据字段 | 需要回填的字段 |
| `<d_backfill/>` | `115`, `120`, `180` | 回填窗口 |
| `<std/>` | `4`, `3`, `5` | winsorize标准差 |
| `<d/>` | `10`, `22`, `60` | 操作窗口 |

**示例**:
```
ts_decay_linear(-densify(zscore(winsorize(ts_backfill(anl4_adjusted_netincome_ft, 115), std=4))), 10)
ts_rank(winsorize(ts_backfill(<data>, 120), std=4), 60)
```

---

### TPL-009: 信号质量分组模板
```
模板:
signal = <ts_op/>(<field/>, <d/>);
credit_quality = bucket(rank(ts_delay(signal, 1), rate=0), range="<range/>");
group_neutralize(<decay_op/>(signal, k=<k/>), credit_quality)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore` | 信号计算 |
| `<field/>` | 任意数据字段 | 主字段 |
| `<d/>` | `60`, `120` | 窗口 |
| `<range/>` | `"0.2,1,0.2"` | 分组范围 |
| `<decay_op/>` | `ts_weighted_decay` | 衰减操作 |
| `<k/>` | `0.5`, `0.3` | 衰减系数 |

---

### TPL-010: 复合分组中性化
```
模板: group_neutralize(<alpha/>, densify(<group1/>)*1000 + densify(<group2/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<group1/>` | `subindustry`, `sector` | 主分组 |
| `<group2/>` | `country`, `exchange` | 次分组 |

---

## 第二部分：量价类模板 (TPL-101 ~ TPL-120)

### TPL-101: 换手率反转
```
模板: -<ts_op/>(volume/sharesout, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_mean`, `ts_rank`, `ts_std_dev` | 时序统计 |
| `<d/>` | `5`, `22`, `66` | 短中期窗口 |

**示例**:
```
-ts_mean(volume/sharesout, 22)
-ts_std_dev(volume/sharesout, 22)
```

---

### TPL-102: 量稳换手率 (STR)
```
模板: -ts_std_dev(volume/sharesout, <d1/>)/ts_mean(volume/sharesout, <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d1/>` | `20`, `22` | 波动计算窗口 |
| `<d2/>` | `20`, `22` | 均值计算窗口 |

**优化版**:
```
模板: -group_neutralize(ts_std_dev(volume/sharesout, <d/>)/ts_mean(volume/sharesout, <d/>), bucket(rank(cap), range="0.1,1,0.1"))
```

---

### TPL-103: 价格反转模板
```
模板: -<ts_op/>(<price_field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_delta`, `ts_mean`, `ts_rank` | 时序操作 |
| `<price_field/>` | `close`, `returns`, `close/open-1`, `open/ts_delay(close,1)-1` | 价格/收益字段 |
| `<d/>` | `3`, `5`, `22` | 短期窗口 |

**示例**:
```
-ts_delta(close, 5)                    # 价格变化反转
-ts_mean(returns, 22)                  # 收益均值反转
-ts_mean(close/open-1, 22)             # 日内收益反转
-(open/ts_delay(close,1)-1)            # 隔夜收益反转
```

---

### TPL-104: 价格乖离率
```
模板: -(close - ts_mean(close, <d/>))/ts_mean(close, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d/>` | `5`, `22`, `66` | MA周期 |

---

### TPL-105: 量价相关性
```
模板: -ts_corr(<price_field/>, <volume_field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<price_field/>` | `close`, `returns`, `abs(returns)` | 价格类 |
| `<volume_field/>` | `volume`, `volume/sharesout`, `adv20` | 成交量类 |
| `<d/>` | `22`, `66`, `126` | 相关性窗口 |

---

### TPL-106: 跳跃因子
```
模板: -group_neutralize(ts_mean((close/open-1) - log(close/open), <d/>), bucket(rank(cap), range="0.1,1,0.1"))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d/>` | `22`, `30`, `66` | 平均窗口 |

**带成交量增强版**:
```
模板: -group_neutralize(ts_mean((close/open-1) - log(close/open), <d/>) * ts_rank(volume, 5), bucket(rank(cap), range="0.1,1,0.1"))
```

---

### TPL-107: 指数衰减动量
```
模板: -ts_decay_exp_window(<field/>, <d/>, factor=<f/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | `returns`, `returns*(volume/sharesout)`, `close/open-1` | 收益类字段 |
| `<d/>` | `22`, `66`, `126` | 衰减窗口 |
| `<f/>` | `0.04`, `0.1`, `0.5`, `0.9` | 衰减因子，越小衰减越快 |

---

### TPL-108: 成交量周期函数 (VOC)
```
模板:
m_minus = ts_mean(volume, <d_long/>) - ts_mean(volume, <d_short/>);
delta = (ts_max(m_minus, <d_short/>) - m_minus)/(ts_max(m_minus, <d_short/>) - ts_min(m_minus, <d_short/>));
<weight1/>*delta + <weight2/>*ts_delay(delta, 1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d_long/>` | `30`, `66` | 长期均值窗口 |
| `<d_short/>` | `10`, `22` | 短期均值窗口 |
| `<weight1/>` | `0.33`, `0.5` | 当日权重 |
| `<weight2/>` | `0.67`, `0.5` | 前日权重 |

---

### TPL-109: 市场相关性因子
```
模板:
mkt_ret = group_mean(returns, 1, market);
pt = ts_corr(returns, mkt_ret, <d/>);
rank(1/(2*(1-pt)))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d/>` | `10`, `22`, `66` | 相关性窗口 |

---

### TPL-110: 成交量趋势模板
```
模板: ts_decay_linear(volume/ts_sum(volume, <d_long/>), <d_short/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d_long/>` | `252`, `504` | 长期总量窗口 |
| `<d_short/>` | `10`, `22` | 衰减窗口 |

---

### TPL-111: VWAP收益相关
```
模板:
returns > -<threshold/> ? (ts_ir(ts_corr(ts_returns(vwap, 1), ts_delay(group_neutralize(<field/>, market), <d1/>), <d2/>), <d2/>)) : -1
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<threshold/>` | `0.1`, `0.05` | 收益阈值 |
| `<field/>` | 任意数据字段 | 信号字段 |
| `<d1/>` | `30`, `60` | 延迟窗口 |
| `<d2/>` | `90`, `120` | 相关性窗口 |

---

### TPL-112: 动量因子创建
```
模板: ts_sum(winsorize(ts_backfill(<data/>, <day/>), std=4.0), <n/>*21) - ts_sum(winsorize(ts_backfill(<data/>, <day/>), std=4.0), <m/>*21)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<data/>` | `returns`, 基本面字段 | 数据字段 |
| `<day/>` | `120`, `180` | 回填窗口 |
| `<n/>` | `6`, `12` | 长期月数 |
| `<m/>` | `1`, `0.1*n` | 短期月数 |

---

### TPL-113: 线性衰减排名
```
模板: -ts_rank(ts_decay_linear(<field/>, <d1/>), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | `percent`, 任意时序信号 | 输入信号 |
| `<d1/>` | `10`, `22`, `150` | 衰减窗口 |
| `<d2/>` | `50`, `126` | 排名窗口 |

---

## 第三部分：情绪/新闻类模板 (TPL-201 ~ TPL-220)

### TPL-201: 情绪差值模板
```
模板: <ts_op/>(rank(ts_backfill(<positive_sentiment/>, <d/>)) - rank(ts_backfill(<negative_sentiment/>, <d/>)), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_mean`, `ts_rank`, `ts_zscore` | 时序操作 |
| `<positive_sentiment/>` | 正面情绪字段 | 积极信号 |
| `<negative_sentiment/>` | 负面情绪字段 | 消极信号 |
| `<d/>` | `20`, `30` | 回填窗口 |
| `<d2/>` | `5`, `22` | 比较窗口 |

---

### TPL-202: 新闻情绪回归残差
```
模板:
sentiment = ts_backfill(ts_delay(<vec_op/>(<sentiment_field/>), 1), <d1/>);
vhat = ts_regression(volume, sentiment, <d2/>);
ehat = -ts_regression(returns, vhat, <d3/>);
group_rank(ehat, bucket(rank(cap), range="0,1,0.1"))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<vec_op/>` | `vec_avg`, `vec_sum` | 情绪聚合方式 |
| `<sentiment_field/>` | `scl12_sentiment`, `snt_buzz_ret`, `nws18_relevance` | 情绪数据 |
| `<d1/>` | `20`, `30` | 回填窗口 |
| `<d2/>` | `120`, `250` | 成交量回归窗口 |
| `<d3/>` | `250`, `750` | 收益回归窗口 |

---

### TPL-203: 社交媒体情绪
```
模板: rank(<vec_op/>(scl12_alltype_buzzvec) * <vec_op/>(scl12_sentiment))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<vec_op/>` | `vec_sum`, `vec_avg` | 向量聚合 |

**带条件版**:
```
模板:
sent_vol = vec_sum(scl12_alltype_buzzvec);
trade_when(rank(sent_vol) > 0.95, -zscore(scl12_buzz)*sent_vol, -1)
```

---

### TPL-204: 条件情绪过滤
```
模板:
group_rank(
sigmoid(if_else(ts_zscore(<sentiment_field/>, <d/>) > <threshold/>, ts_zscore(<sentiment_field/>, <d/>), 0)),
<group/>
)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<sentiment_field/>` | 情绪字段 | 情绪数据 |
| `<d/>` | `22`, `30`, `66` | zscore窗口 |
| `<threshold/>` | `1`, `1.5`, `2` | z-score阈值 |
| `<group/>` | `industry`, `sector` | 分组字段 |

---

### TPL-205: 情绪+波动率复合
```
模板: log(1 + sigmoid(ts_zscore(<sentiment_field/>, <d1/>)) * sigmoid(ts_zscore(<volatility_field/>, <d2/>)))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<sentiment_field/>` | 情绪字段 | 情绪数据 |
| `<volatility_field/>` | `option8_*`, 波动率字段 | 波动率数据 |
| `<d1/>` | `30`, `66` | 情绪窗口 |
| `<d2/>` | `30`, `66` | 波动率窗口 |

---

### TPL-206: 指数衰减情绪
```
模板: ts_decay_exp_window(vec_avg(<sentiment_field/>), <d/>, <factor/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<sentiment_field/>` | `mws85_sentiment`, `nws18_ber` | 情绪向量字段 |
| `<d/>` | `10`, `22` | 衰减窗口 |
| `<factor/>` | `0.9`, `0.7` | 衰减因子 |

**双情绪组合**:
```
decayed_sentiment_1 = ts_decay_exp_window(vec_avg(mws85_sentiment), 10, 0.9);
decayed_sentiment_2 = ts_decay_exp_window(vec_avg(nws18_ber), 10, 0.9);
decayed_sentiment_1 + decayed_sentiment_2
```

---

### TPL-207: 新闻结果排名
```
模板:
percent = ts_rank(vec_stddev(<news_field/>), <d1/>);
-ts_rank(ts_decay_linear(percent, <d2/>), <d1/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<news_field/>` | `nws12_prez_result2` | 新闻数据 |
| `<d1/>` | `50`, `66` | 排名窗口 |
| `<d2/>` | `150`, `252` | 衰减窗口 |

---

### TPL-208: 分组行业提取情绪
```
模板: scale(group_extra(ts_sum(sigmoid(ts_backfill(<data/>, <d1/>)), <d2/>) - ts_sum(sigmoid(ts_backfill(<data/>, <d1/>)), <d2/>), 0.5, densify(industry)))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<data/>` | 情绪或基本面字段 | 数据字段 |
| `<d1/>` | `180`, `252` | 回填窗口 |
| `<d2/>` | `3`, `5` | 求和窗口 |

---

## 第四部分：期权类模板 (TPL-301 ~ TPL-320)

### TPL-301: 期权希腊字母差值
```
模板: <group_op/>(<put_greek/> - <call_greek/>, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<group_op/>` | `group_rank`, `group_neutralize`, `group_zscore` | 分组操作 |
| `<put_greek/>` | `put_delta`, `put_gamma`, `put_theta`, `put_vega` | Put希腊字母 |
| `<call_greek/>` | `call_delta`, `call_gamma`, `call_theta`, `call_vega` | Call希腊字母 |
| `<group/>` | `industry`, `sector` | 分组字段 |

---

### TPL-302: 期权价格信号
```
模板: group_rank(<ts_op/>(<vec_op/>(<option_price_field/>)/close, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_scale`, `ts_rank`, `ts_zscore` | 时序操作 |
| `<vec_op/>` | `vec_max`, `vec_avg` | 向量操作 |
| `<option_price_field/>` | 期权价格字段 | 期权数据 |
| `<d/>` | `66`, `120`, `252` | 时间窗口 |
| `<group/>` | `industry`, `sector` | 分组字段 |

---

### TPL-303: 期权波动率信号
```
模板: sigmoid(<ts_op/>(<opt_high/> - <opt_close/>, <d/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_ir`, `ts_stddev`, `ts_zscore`, `ts_mean` | 波动性操作 |
| `<opt_high/>` | 期权高价字段 | 期权最高价 |
| `<opt_close/>` | 期权收盘价字段 | 期权收盘价 |
| `<d/>` | `120`, `250`, `504` | 长期窗口 |

**说明**: 期权波动类因子通常需要较长窗口(120-504天)来捕捉稳定信号

---

### TPL-304: 隐含波动率比率
```
模板: <ts_op/>(implied_volatility_call_<tenor/>/parkinson_volatility_<tenor/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore`, `ts_delta` | 时序操作 |
| `<tenor/>` | `120`, `270` | 期权期限 |
| `<d/>` | `66`, `126`, `252` | 窗口 |

---

### TPL-305: Put-Call成交量比
```
模板: <ts_op/>(pcr_vol_<tenor/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_delta`, `ts_zscore` | 时序操作 |
| `<tenor/>` | `10`, `30`, `60` | 期限 |
| `<d/>` | `22`, `66`, `126` | 窗口 |

---

### TPL-306: 期权盈亏平衡点
```
模板: group_rank(ts_zscore(<breakeven_field/>/close, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<breakeven_field/>` | `call_breakeven_10`, `put_breakeven_10` | 盈亏平衡字段 |
| `<d/>` | `66`, `126`, `252` | 窗口 |
| `<group/>` | `sector`, `industry` | 分组 |

---

## 第五部分：分析师类模板 (TPL-401 ~ TPL-420)

### TPL-401: 分析师预期变化
```
模板: <vec_op/>(tail(tail(<analyst_change_field/>, lower=<low/>, upper=<high/>, newval=<low/>), lower=-<high/>, upper=-<low/>, newval=-<low/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<vec_op/>` | `vec_avg`, `vec_sum` | 向量聚合 |
| `<analyst_change_field/>` | `oth41_s_west_eps_ftm_chg_3m`, `anl4_eps_chg` | 预期变化字段 |
| `<low/>` | `0.25`, `0.1` | 下截断值 |
| `<high/>` | `1000`, `100` | 上截断值 |

---

### TPL-402: 剥离动量的分析师因子
```
模板:
afr = <vec_op/>(<analyst_field/>);
short_mom = ts_mean(returns - group_mean(returns, 1, market), <d_short/>);
long_mom = ts_delay(ts_mean(returns - group_mean(returns, 1, market), <d_long/>), <d_long/>);
regression_neut(regression_neut(afr, short_mom), long_mom)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<vec_op/>` | `vec_avg`, `vec_sum` | 向量聚合 |
| `<analyst_field/>` | 分析师数据字段 | 一致预期等 |
| `<d_short/>` | `5`, `10` | 短期动量窗口 |
| `<d_long/>` | `20`, `22` | 长期动量窗口 |

---

### TPL-403: 分析师覆盖度过滤
```
模板:
coverage_filter = ts_sum(<vec_op/>(<analyst_field/>), <d/>) > <min_count/>;
if_else(coverage_filter, <alpha/>, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<vec_op/>` | `vec_count` | 统计分析师数量 |
| `<analyst_field/>` | 分析师向量字段 | 分析师数据 |
| `<d/>` | `66`, `90`, `126` | 统计窗口 |
| `<min_count/>` | `2`, `3`, `5` | 最小覆盖数量 |
| `<alpha/>` | 主信号表达式 | 待过滤的Alpha |

---

### TPL-404: 老虎哥回归模板
```
模板: group_rank(ts_regression(ts_zscore(<field1/>, <d/>), ts_zscore(vec_sum(<field2/>), <d/>), <d/>), densify(sector))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | 任意MATRIX字段 | Y变量 |
| `<field2/>` | 任意VECTOR字段 | X变量 |
| `<d/>` | `252`, `504` | 回归窗口 |

**说明**: 经典回归模板，适用于基本面与分析师数据组合

---

### TPL-405: 分析师预期时序变化
```
模板: ts_mean(vec_avg(<analyst_field/>), <d_short/>) - ts_mean(vec_avg(<analyst_field/>), <d_long/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<analyst_field/>` | `anl4_eps_mean`, `anl4_revenue_mean` | 分析师预测 |
| `<d_short/>` | `22`, `44` | 短期窗口 |
| `<d_long/>` | `66`, `126` | 长期窗口 |

---

### TPL-406: 三因子组合模板
```
模板:
my_group = market;
rank(
group_rank(ts_decay_linear(volume/ts_sum(volume, 252), 10), my_group) *
group_rank(ts_rank(vec_avg(<fundamental/>), <d/>), my_group) *
group_rank(-ts_delta(close, 5), my_group)
)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<fundamental/>` | 基本面VECTOR字段 | 基本面数据 |
| `<d/>` | `252`, `504` | 排名窗口 |

---

### TPL-407: 分析师FCF比率
```
模板: ts_rank(vec_avg(<fcf_field/>) / vec_avg(<profit_field/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<fcf_field/>` | `anl4_fcf_value` | 自由现金流预测 |
| `<profit_field/>` | `anl4_netprofit_low`, `anl4_netprofit_mean` | 利润预测 |
| `<d/>` | `66`, `126`, `252` | 排名窗口 |

---

## 第六部分：中性化技术模板 (TPL-501 ~ TPL-515)

### TPL-501: 市值分组中性化
```
模板: group_neutralize(<alpha/>, bucket(rank(cap), range="<range/>"))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号表达式 | 待中性化的Alpha |
| `<range/>` | `"0.1,1,0.1"`, `"0,1,0.1"` | 分组范围 |

---

### TPL-502: 双重中性化 (行业+市值)
```
模板:
a1 = group_neutralize(<alpha/>, bucket(rank(cap), range="<range/>"));
group_neutralize(a1, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<range/>` | `"0.1,1,0.1"` | 市值分组 |
| `<group/>` | `industry`, `sector`, `subindustry` | 行业分组 |

---

### TPL-503: 回归中性化
```
模板: regression_neut(<alpha/>, <factor/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<factor/>` | `log(cap)`, `ts_ir(returns, 126)`, `ts_std_dev(returns, 22)` | 待剥离因子 |

**多层回归中性化**:
```
模板: regression_neut(regression_neut(<alpha/>, <factor1/>), <factor2/>)
```

---

### TPL-504: 中性化顺序优化
```
模板:
a = ts_zscore(<field/>, <d/>);
a1 = group_neutralize(a, <group/>);
a2 = group_neutralize(a1, bucket(rank(cap), range="<range/>"))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意数据字段 | 主信号 |
| `<d/>` | `252` | zscore窗口 |
| `<group/>` | `industry`, `subindustry` | 行业分组 |
| `<range/>` | `"0.1,1,0.1"` | 市值分组 |

**说明**: 先行业中性化再市值中性化，与反向顺序效果可能不同

---

### TPL-505: sta1分组中性化
```
模板: group_neutralize(<alpha/>, sta1_top3000c20)
```
**说明**: 使用预定义的sta1分组进行中性化

---

## 第七部分：条件交易模板 (TPL-601 ~ TPL-620)

### TPL-601: 流动性过滤
```
模板: trade_when(volume > adv20 * <threshold/>, <alpha/>, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<threshold/>` | `0.618`, `0.5`, `1` | 流动性阈值 |
| `<alpha/>` | 主信号 | 原始Alpha |

**反向流动性**:
```
trade_when(volume < adv20, <alpha/>, -1)
```

---

### TPL-602: 波动率过滤
```
模板: trade_when(ts_rank(ts_std_dev(returns, <d1/>), <d2/>) < <threshold/>, <alpha/>, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d1/>` | `5`, `10`, `22` | 波动计算窗口 |
| `<d2/>` | `126`, `180`, `252` | 排名窗口 |
| `<threshold/>` | `0.8`, `0.9` | 波动率阈值 |
| `<alpha/>` | 主信号 | 原始Alpha |

---

### TPL-603: 极端收益过滤
```
模板: trade_when(abs(returns) < <entry/>, <alpha/>, abs(returns) > <exit/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<entry/>` | `0.075`, `0.05` | 入场阈值 |
| `<exit/>` | `0.1`, `0.095` | 出场阈值 |
| `<alpha/>` | 主信号 | 原始Alpha |

---

### TPL-604: 市值过滤
```
模板: trade_when(rank(cap) > <threshold/>, <alpha/>, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<threshold/>` | `0.3`, `0.5` | 市值排名阈值 |
| `<alpha/>` | 主信号 | 原始Alpha |

---

### TPL-605: 触发条件交易
```
模板:
triggerTradeexp = (ts_arg_max(volume, <d/>) < 1) && (volume > ts_sum(volume, <d/>)/<d/>);
triggerExitexp = -1;
trade_when(triggerTradeexp, <alpha/>, triggerExitexp)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d/>` | `5`, `10` | 判断窗口 |
| `<alpha/>` | `-rank(ts_delta(close, 2))` | 主信号 |

---

### TPL-606: 组合条件交易
```
模板:
my_group2 = bucket(rank(cap), range="0,1,0.1");
trade_when(volume > adv20, group_neutralize(<alpha/>, my_group2), -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 复合信号 | 主信号 |

---

### TPL-607: 条件排名交易
```
模板:
a = <ts_op/>(<field/>, <d/>);
trade_when(rank(a) > <threshold_low/>, -zscore(<field2/>)*a, <threshold_high/>-rank(a))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore` | 时序操作 |
| `<field/>` | 任意字段 | 条件字段 |
| `<field2/>` | 任意字段 | 信号字段 |
| `<d/>` | `25`, `66` | 窗口 |
| `<threshold_low/>` | `0.03`, `0.1` | 下阈值 |
| `<threshold_high/>` | `0.25`, `0.5` | 上阈值 |

---

## 第八部分：复合多因子模板 (TPL-701 ~ TPL-720)

### TPL-701: 三因子乘积
```
模板:
my_group = market;
rank(
group_rank(<ts_op1/>(<field1/>, <d1/>), my_group) *
group_rank(<ts_op2/>(<field2/>, <d2/>), my_group) *
group_rank(<ts_op3/>(<field3/>, <d3/>), my_group)
)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op1/>` | `ts_decay_linear`, `ts_rank` | 第一因子操作 |
| `<ts_op2/>` | `ts_rank`, `ts_zscore` | 第二因子操作 |
| `<ts_op3/>` | `-ts_delta` | 第三因子操作(反转) |
| `<field1/>` | `volume/ts_sum(volume, 252)` | 成交量趋势 |
| `<field2/>` | `vec_avg({Fundamental})` | 基本面信号 |
| `<field3/>` | `close` | 价格信号 |
| `<d1/>`, `<d2/>`, `<d3/>` | 各因子窗口 | 时间参数 |

---

### TPL-702: 波动率条件反转
```
模板:
vol = ts_std_dev(<ret_field/>, <d/>);
vol_mean = group_mean(vol, 1, market);
flip_ret = if_else(vol < vol_mean, -<ret_field/>, <ret_field/>);
-ts_mean(flip_ret, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ret_field/>` | `returns`, `close/open-1` | 收益字段 |
| `<d/>` | `20`, `22` | 窗口参数 |

**说明**: 低波动环境做反转，高波动环境做动量

---

### TPL-703: 恐惧指标组合
```
模板:
fear = ts_mean(
abs(returns - group_mean(returns, 1, market)) /
(abs(returns) + abs(group_mean(returns, 1, market)) + 0.1),
<d/>
);
-group_neutralize(fear * <signal/>, bucket(rank(cap), range="0.1,1,0.1"))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<d/>` | `20`, `22` | 恐惧指标窗口 |
| `<signal/>` | 主信号表达式 | 待组合信号 |

---

### TPL-704: 债务杠杆相关性
```
模板: group_neutralize(ts_zscore(<leverage_field/>, <d1/>) * ts_corr(<leverage_field/>, returns, <d2/>), sector)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<leverage_field/>` | `debt_to_equity`, `debt/assets` | 杠杆字段 |
| `<d1/>` | `60`, `126` | zscore窗口 |
| `<d2/>` | `20`, `66` | 相关性窗口 |

---

### TPL-705: 模型数据信号
```
模板: -<model_field/>
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<model_field/>` | `mdl175_01dtsv`, `mdl175_01icc` | 模型字段 |

**带排名版**:
```
rank(group_rank(ts_rank(ts_backfill(<model_field/>, 5), 5), sta1_top3000c20))
```

---

### TPL-706: 回归zscore模板
```
模板: ts_regression(ts_zscore(<field1/>, <d/>), ts_zscore(<field2/>, <d/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | MATRIX字段 | Y变量 |
| `<field2/>` | MATRIX字段或vec_sum(VECTOR) | X变量 |
| `<d/>` | `252`, `500`, `504` | 回归窗口 |

---

### TPL-707: 分组Delta模板
```
模板: group_neutralize(ts_delta(<field/>, <d/>), sector)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意数据字段 | 主字段 |
| `<d/>` | `22`, `66`, `126` | 差分窗口 |

---

## 第九部分：数据预处理模板 (TPL-801 ~ TPL-815)

### TPL-801: Winsorize截断
```
模板: winsorize(<field/>, std=<std/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 原始数据 |
| `<std/>` | `3`, `4`, `5` | 截断标准差 |

---

### TPL-802: Sigmoid归一化
```
模板: sigmoid(<ts_op/>(<field/>, <d/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_zscore`, `ts_ir`, `ts_rank` | 时序操作 |
| `<field/>` | 任意字段 | 原始数据 |
| `<d/>` | `22`, `66`, `252` | 窗口 |

---

### TPL-803: 数据回填
```
模板: ts_backfill(<field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 低频数据字段 | 需要回填的字段 |
| `<d/>` | `115`, `120`, `180`, `252` | 回填窗口 |

---

### TPL-804: 条件替换
```
模板: if_else(is_not_nan(<field/>), <field/>, <alternative/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 主字段 | 可能有NaN的字段 |
| `<alternative/>` | 替代字段或值 | NaN时的替代 |

---

### TPL-805: 极端值替换
```
模板: tail(tail(<field/>, lower=<low/>, upper=<high/>, newval=<low/>), lower=-<high/>, upper=-<low/>, newval=-<low/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 原始数据 |
| `<low/>` | `0.25`, `0.1` | 下界 |
| `<high/>` | `100`, `1000` | 上界 |

---

### TPL-806: 组合预处理
```
模板: <ts_op/>(winsorize(ts_backfill(<field/>, <d_backfill/>), std=<std/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore`, `ts_mean` | 时序操作 |
| `<field/>` | 低频字段 | 需要处理的字段 |
| `<d_backfill/>` | `120`, `180` | 回填窗口 |
| `<std/>` | `4` | winsorize参数 |
| `<d/>` | `22`, `66` | 操作窗口 |

---

### TPL-807: ts_min/ts_max替代
```
模板: ts_backfill(if_else(ts_arg_min(<field/>, <d/>) == 0, <field/>, nan), 120)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 原始数据 |
| `<d/>` | `22`, `66`, `126` | 窗口 |

**说明**: 当ts_min/ts_max不可用时的替代方案

---

## 第十部分：高级统计模板 (TPL-901 ~ TPL-920)

### TPL-901: 高阶矩模板 (ts_moment)
```
模板: <ts_op/>(<group_op/>(ts_moment(<field/>, <d/>, k=<k/>), <group/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `rank`, `zscore`, `sigmoid` | 标准化操作 |
| `<group_op/>` | `group_rank`, `group_zscore` | 分组操作 |
| `<field/>` | 任意MATRIX字段 | 数据字段 |
| `<d/>` | `22`, `66`, `126` | 窗口 |
| `<k/>` | `2`, `3`, `4` | k=2方差, k=3偏度, k=4峰度 |

**说明**: ts_moment(x, d, k)计算k阶中心矩

---

### TPL-902: 协偏度/协峰度模板
```
模板: <group_op/>(ts_co_skewness(<field1/>, <field2/>, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<group_op/>` | `group_rank`, `group_zscore` | 分组操作 |
| `<field1/>` | `returns`, `close` | 第一变量 |
| `<field2/>` | `volume`, `vwap` | 第二变量 |
| `<d/>` | `66`, `126`, `252` | 窗口 |

**协峰度版**:
```
模板: <group_op/>(ts_co_kurtosis(<field1/>, <field2/>, <d/>), <group/>)
```

---

### TPL-903: 偏相关模板 (ts_partial_corr)
```
模板: group_rank(ts_partial_corr(<field1/>, <field2/>, <control/>, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | `returns`, 收益相关 | Y变量 |
| `<field2/>` | 任意字段 | X变量 |
| `<control/>` | `group_mean(returns, 1, market)` | 控制变量(市场收益) |
| `<d/>` | `60`, `126`, `252` | 窗口 |
| `<group/>` | `sector`, `industry` | 分组 |

**说明**: 计算两变量偏相关，控制第三变量影响

---

### TPL-904: 三元相关模板 (ts_triple_corr)
```
模板: group_rank(ts_triple_corr(<field1/>, <field2/>, <field3/>, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | `returns` | 第一变量 |
| `<field2/>` | `volume` | 第二变量 |
| `<field3/>` | 基本面字段 | 第三变量 |
| `<d/>` | `60`, `126` | 窗口 |
| `<group/>` | `sector`, `industry` | 分组 |

---

### TPL-905: Theil-Sen回归模板
```
模板: group_rank(ts_theilsen(<field1/>, <field2/>, <d/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | 任意MATRIX字段 | Y变量 |
| `<field2/>` | 任意MATRIX字段或`ts_step(1)` | X变量 |
| `<d/>` | `126`, `252`, `500` | 窗口 |
| `<group/>` | `sector`, `industry` | 分组 |

**说明**: Theil-Sen回归比普通回归更鲁棒

---

### TPL-906: 多项式回归残差
```
模板: ts_poly_regression(<field1/>, <field2/>, <d/>, k=<k/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field1/>` | Y变量 | 被解释变量 |
| `<field2/>` | X变量 | 解释变量 |
| `<d/>` | `126`, `252` | 窗口 |
| `<k/>` | `1`, `2`, `3` | 多项式阶数, k=2为二次回归 |

**说明**: 返回 y - Ey (残差)

---

### TPL-907: 向量中性化模板
```
模板: ts_vector_neut(<alpha/>, <risk_factor/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 待中性化Alpha |
| `<risk_factor/>` | `returns`, `cap` | 风险因子 |
| `<d/>` | `22`, `66`, `126` | 窗口(不宜过长，计算慢) |

**分组向量中性化**:
```
模板: group_vector_neut(<alpha/>, <risk_factor/>, <group/>)
```

---

### TPL-908: 加权衰减模板
```
模板: group_neutralize(ts_weighted_decay(<alpha/>, k=<k/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 待衰减Alpha |
| `<k/>` | `0.3`, `0.5`, `0.7` | 衰减系数 |
| `<group/>` | `bucket(rank(cap), range="0.1,1,0.1")` | 分组 |

---

### TPL-909: 回归斜率模板
```
模板: ts_regression(ts_zscore(<field/>, <d/>), ts_step(1), <d/>, rettype=2)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意MATRIX字段 | 数据字段 |
| `<d/>` | `252`, `500` | 窗口 |

**说明**: rettype=2返回斜率，用于检测趋势

---

### TPL-910: 最小最大压缩模板
```
模板: ts_min_max_cps(<field/>, <d/>, f=<f/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `22`, `66`, `126` | 窗口 |
| `<f/>` | `2`, `0.5` | 压缩因子 |

**等价公式**: `x - f * (ts_min(x, d) + ts_max(x, d))`

---

## 第十一部分：事件驱动模板 (TPL-1001 ~ TPL-1020)

### TPL-1001: 数据变化天数模板
```
模板: if_else(days_from_last_change(<field/>) == <days/>, <alpha/>, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 基本面字段 | 监测变化的字段 |
| `<days/>` | `1`, `2`, `5` | 距离变化的天数 |
| `<alpha/>` | `ts_delta(close, 5)`, 主信号 | 事件触发时的Alpha |

**动态衰减版**:
```
模板: <alpha/> / (1 + days_from_last_change(<field/>))
```

---

### TPL-1002: 最近差值模板
```
模板: <ts_op/>(last_diff_value(<field/>, <d/>), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore` | 时序操作 |
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `60`, `90`, `120` | 回溯窗口 |
| `<d2/>` | `22`, `66` | 操作窗口 |

**说明**: 返回过去d天内最近一次不同于当前值的历史值

---

### TPL-1003: 缺失值计数模板
```
模板: -ts_count_nans(ts_backfill(<field/>, <d1/>), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 分析师数据等 | 可能有缺失的字段 |
| `<d1/>` | `5`, `10` | 回填窗口 |
| `<d2/>` | `20`, `30` | 计数窗口 |

**应用**: 分析师覆盖度信号，缺失越少覆盖越好

---

### TPL-1004: 位置最大/最小模板
```
模板: if_else(ts_arg_max(<field/>, <d/>) == <position/>, <alpha/>, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | `volume`, 任意字段 | 监测字段 |
| `<d/>` | `5`, `10` | 窗口 |
| `<position/>` | `0`, `1` | 0表示今天是最大值 |
| `<alpha/>` | 主信号 | 条件满足时的Alpha |

**组合条件**:
```
模板: (ts_arg_max(<field1/>, <d/>) == ts_arg_max(<field2/>, <d/>)) * (<alpha1/> + <alpha2/>)
```

---

### TPL-1005: 财报发布事件模板
```
模板:
event_signal = if_else(ts_delta(<fundamental_field/>, 1) != 0, <alpha/>, nan);
ts_decay_linear(event_signal, <decay_d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<fundamental_field/>` | `assets`, `sales`, `eps` | 基本面字段 |
| `<alpha/>` | `ts_delta(close, 5)`, 主信号 | 事件Alpha |
| `<decay_d/>` | `10`, `22` | 衰减窗口 |

---

### TPL-1006: 动态Decay事件驱动
```
模板:
decay_weight = 1 / (1 + days_from_last_change(<event_field/>));
<alpha/> * decay_weight
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<event_field/>` | 任意字段 | 事件触发字段 |
| `<alpha/>` | 主信号 | 原始Alpha |

---

### TPL-1007: 盈利公告模板
```
模板:
surprise = <actual_field/> - <estimate_field/>;
if_else(days_from_last_change(<actual_field/>) < <window/>, surprise, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<actual_field/>` | `eps` | 实际值 |
| `<estimate_field/>` | `vec_avg(anl4_eps_mean)` | 预测值 |
| `<window/>` | `5`, `10` | 事件有效窗口 |

---

## 第十二部分：信号处理模板 (TPL-1101 ~ TPL-1120)

### TPL-1101: 黄金比例幂变换
```
模板: signed_power(<alpha/>, 0.618)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号表达式 | 原始Alpha |

**其他幂次**:
```
signed_power(<alpha/>, 0.5)   # 平方根
signed_power(<alpha/>, 2)     # 平方增强
```

---

### TPL-1102: 尾部截断模板
```
模板: right_tail(<alpha/>, minimum=<min/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<min/>` | `0`, `0.1` | 最小阈值 |

**左尾版**:
```
模板: left_tail(<alpha/>, maximum=<max/>)
```

---

### TPL-1103: Clamp边界限制
```
模板: clamp(<alpha/>, lower=<low/>, upper=<high/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<low/>` | `-1`, `-0.5` | 下界 |
| `<high/>` | `1`, `0.5` | 上界 |

---

### TPL-1104: 分数映射模板
```
模板: fraction(<alpha/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |

**说明**: 将连续变量映射到分布内的相对位置

---

### TPL-1105: NaN外推模板
```
模板: nan_out(<field/>, lower=<low/>, upper=<high/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<low/>` | `-3`, `-5` | 下界 |
| `<high/>` | `3`, `5` | 上界 |

**说明**: 将超出范围的值替换为NaN

---

### TPL-1106: Purify数据清洗
```
模板: purify(<field/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 需要清洗的数据 |

**说明**: 自动化数据清洗，减少噪声和异常值

---

### TPL-1107: 条件保留模板
```
模板: keep(<field/>, <condition/>, period=<d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<condition/>` | `<field/> > 0` | 保留条件 |
| `<d/>` | `3`, `5`, `10` | 滚动窗口 |

**示例**:
```
keep(returns, returns > 0, period=3)  # 只保留正收益
```

---

### TPL-1108: 缩放降维模板
```
模板: -scale_down(<ts_op/>(<field/>, <d1/>), constant=<c/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_mean`, `ts_rank` | 时序操作 |
| `<field/>` | `returns`, 任意字段 | 数据字段 |
| `<d1/>` | `2`, `5` | 窗口 |
| `<c/>` | `0.1`, `0.05` | 缩放常数 |

---

### TPL-1109: Truncate截断模板
```
模板: truncate(<alpha/>, maxPercent=<percent/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<percent/>` | `0.01`, `0.05` | 截断百分比 |

---

### TPL-1110: 组合Normalize模板
```
模板: group_normalize(<alpha/>, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<group/>` | `sector`, `industry` | 分组 |

**等价公式**: `alpha / group_sum(abs(alpha), group)`

---

## 第十三部分：Turnover控制模板 (TPL-1201 ~ TPL-1215)

### TPL-1201: 目标换手率Hump
```
模板: ts_target_tvr_hump(<alpha/>, lambda_min=0, lambda_max=1, target_tvr=<target/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<target/>` | `0.1`, `0.15`, `0.2` | 目标换手率 |

---

### TPL-1202: Delta限制换手率
```
模板: ts_target_tvr_delta_limit(<alpha/>, <factor/>, lambda_min=0, lambda_max=1, target_tvr=<target/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<factor/>` | 辅助因子 | 限制因子 |
| `<target/>` | `0.1`, `0.15` | 目标换手率 |

---

### TPL-1203: Hump衰减组合
```
模板: hump_decay(<alpha/>, hump=<h/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<h/>` | `0.001`, `0.01` | Hump参数 |

**嵌套版**:
```
hump(hump_decay(<alpha/>, hump=0.001))
```

---

### TPL-1204: 平均+Hump模板
```
模板: -ts_mean(ts_target_tvr_hump(group_rank(<field/>, country), lambda_min=0, lambda_max=1, target_tvr=<target/>), <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<target/>` | `0.1` | 目标换手率 |
| `<d/>` | `5`, `10` | 平均窗口 |

---

### TPL-1205: 简单Hump模板
```
模板: hump(<alpha/>, hump=<h/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<h/>` | `0.01`, `0.001`, `0.0001` | Hump参数 |

**示例**:
```
hump(-ts_delta(close, 5), hump=0.01)
```

---

## 第十四部分：回填与覆盖模板 (TPL-1301 ~ TPL-1315)

### TPL-1301: 分组回填模板
```
模板: group_backfill(<field/>, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 需要回填的字段 |
| `<group/>` | `sector`, `industry`, `market` | 分组字段 |

**说明**: 使用组内最近值填充NaN

---

### TPL-1302: 嵌套回填排名
```
模板: rank(group_backfill(<field/>, <group/>))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<group/>` | `sector`, `industry` | 分组 |

---

### TPL-1303: 覆盖度过滤
```
模板: group_count(is_nan(<field/>), market) > <threshold/> ? <alpha/> : nan
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 检测字段 |
| `<threshold/>` | `40`, `50` | 最小覆盖数 |
| `<alpha/>` | 主信号 | 原始Alpha |

---

### TPL-1304: NaN替换模板
```
模板: if_else(is_not_nan(<field/>), <field/>, <default/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<default/>` | `0`, `0.5`, `nan` | 默认值 |

---

### TPL-1305: 综合数据清洗
```
模板: <ts_op/>(winsorize(group_backfill(ts_backfill(<field/>, <d1/>), <group/>), std=<std/>), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `ts_rank`, `ts_zscore` | 时序操作 |
| `<field/>` | 低频字段 | 数据字段 |
| `<d1/>` | `120`, `180` | 时序回填窗口 |
| `<group/>` | `sector`, `industry` | 分组回填 |
| `<std/>` | `4` | winsorize参数 |
| `<d2/>` | `66`, `126` | 操作窗口 |

---

## 第十五部分：组合提取模板 (TPL-1401 ~ TPL-1415)

### TPL-1401: group_extra填补模板
```
模板: group_extra(<field/>, <weight/>, <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<weight/>` | `0.5`, `1` | 权重 |
| `<group/>` | `densify(industry)`, `sector` | 分组 |

**说明**: 用组均值填补缺失值

---

### TPL-1402: 组合提取sigmoid
```
模板: scale(group_extra(ts_sum(sigmoid(ts_backfill(<field/>, <d1/>)), <d2/>) - ts_sum(sigmoid(ts_backfill(<field/>, <d1/>)), <d2/>), 0.5, densify(industry)))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<d1/>` | `180` | 回填窗口 |
| `<d2/>` | `3` | 求和窗口 |

---

### TPL-1403: PnL反馈模板
```
模板: if_else(inst_pnl(<alpha/>) > <threshold/>, <alpha/>, nan)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |
| `<threshold/>` | `0`, `-0.05` | PnL阈值 |

**说明**: 基于单标的PnL进行条件交易

---

### TPL-1404: 流动性加权模板
```
模板: <alpha/> * log(volume)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |

**说明**: 将仓位偏向高流动性股票

---

### TPL-1405: 市值回归中性化
```
模板: regression_neut(<alpha/>, log(cap))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<alpha/>` | 主信号 | 原始Alpha |

**说明**: 剥离市值因子影响

---

## 第十六部分：百分位与分位数模板 (TPL-1501 ~ TPL-1510)

### TPL-1501: 时序百分位模板
```
模板: ts_percentage(<field/>, <d/>, percentage=<p/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `22`, `66`, `126` | 窗口 |
| `<p/>` | `0.5`, `0.25`, `0.75` | 百分位 |

---

### TPL-1502: 分位数模板
```
模板: <ts_op/>(ts_quantile(<field/>, <d/>, <q/>), <d2/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ts_op/>` | `rank`, `zscore` | 标准化 |
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `66`, `126` | 窗口 |
| `<q/>` | `0.25`, `0.5`, `0.75` | 分位数 |
| `<d2/>` | `22` | 操作窗口 |

---

### TPL-1503: Max-Min比率模板
```
模板: ts_max_diff(<field/>, <d/>) / ts_av_diff(<field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `22`, `66` | 窗口 |

---

### TPL-1504: 中位数模板
```
模板: <field/> - ts_median(<field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<field/>` | 任意字段 | 数据字段 |
| `<d/>` | `22`, `66`, `252` | 窗口 |

---

### TPL-1505: 累积乘积模板
```
模板: ts_product(1 + <ret_field/>, <d/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<ret_field/>` | `returns`, 收益率字段 | 收益字段 |
| `<d/>` | `5`, `22`, `66` | 窗口 |

**说明**: 计算累积收益

---

## 第十七部分：实战表达式模板 (TPL-1601 ~ TPL-1700)

**说明**: 以下模板从社区高票帖子中提取，为实际验证过的表达式格式。

### TPL-1601: ts_max/ts_min替代公式
```
模板: {data} - ts_max_diff({data}, {d})                      # 等效于 ts_max
模板: (({data} - ts_max_diff({data}, {d})) * ts_scale({data}, {d}) - {data}) / (ts_scale({data}, {d}) - 1)  # 等效于 ts_min
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{data}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `22`, `66`, `126` | 窗口 |

**应用**: 当平台不支持ts_max/ts_min时的替代方案

---

### TPL-1602: 线性衰减权重公式
```
模板: weight = {d} + ts_step(0); ts_sum({data} * weight, {d}) / ts_sum(weight, {d})  # 等效于 ts_decay_linear
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{data}` | 任意字段 | 数据字段 |
| `{d}` | `10`, `22`, `66` | 衰减窗口 |

---

### TPL-1603: 组归一化公式
```
模板: {data} / group_sum(abs({data}), {group})  # 等效于 group_normalize
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{data}` | 任意字段 | 数据字段 |
| `{group}` | `industry`, `sector` | 分组字段 |

---

### TPL-1604: IR+峰度组合模板
```
模板:
rank_data = rank({field});
ts_ir(rank_data, {d}) + ts_kurtosis(rank_data, {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | `volume`, `returns`, 任意字段 | 数据字段 |
| `{d}` | `22`, `66` | 窗口 |

**说明**: IR和峰度组合捕捉信号强度和分布特征

---

### TPL-1605: VWAP相关性信号
```
模板: returns > -{threshold} ? (ts_ir(ts_corr(ts_returns(vwap, 1), ts_delay(group_neutralize({field}, market), {d1}), {d2}), {d2})) : -1
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意数据字段 | 信号字段 |
| `{threshold}` | `0.1`, `0.05` | 收益过滤阈值 |
| `{d1}` | `30`, `60` | 延迟窗口 |
| `{d2}` | `90`, `120` | 相关性窗口 |

---

### TPL-1606: 球队硬币因子 (ballteam_coin)
```
模板:
# 基础版
rank(ballteam_coin)

# 市值中性化版
group_neutralize(rank(ballteam_coin), bucket(rank(assets), range='0.1,1,0.1'))
```
**说明**: 经典球队vs硬币因子，用于捕捉收益持续性

---

### TPL-1607: 偏度因子模板
```
模板: -group_rank(ts_skewness(returns, {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `22`, `66`, `126` | 偏度计算窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 负偏度股票往往表现更好

---

### TPL-1608: 熵信号模板
```
模板: ts_zscore({field}, {d1}) * ts_entropy({field}, {d2})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | `returns`, 任意字段 | 信号字段 |
| `{d1}` | `14`, `22` | zscore窗口 |
| `{d2}` | `14`, `22` | 熵窗口 |

**说明**: 结合标准化和不确定性度量

---

### TPL-1609: 分析师动量短长差模板
```
模板: log(ts_mean(anl4_{data}_{stats}, {d_short})) - log(ts_mean(anl4_{data}_{stats}, {d_long}))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{data}` | `eps`, `revenue`, `netprofit` | 分析师预测类型 |
| `{stats}` | `mean`, `low`, `high` | 统计量类型 |
| `{d_short}` | `20`, `44` | 短期窗口 |
| `{d_long}` | `44`, `126` | 长期窗口 |

---

### TPL-1610: 目标换手率分组排名
```
模板: -ts_mean(ts_target_tvr_hump(group_rank({field}, country), lambda_min=0, lambda_max=1, target_tvr={target}), {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意字段 | 数据字段 |
| `{target}` | `0.1`, `0.15` | 目标换手率 |
| `{d}` | `5`, `10` | 平均窗口 |

---

### TPL-1611: 最大差/均值差比率
```
模板: ts_max_diff({field}, {d}) / ts_av_diff({field}, {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意字段 | 数据字段 |
| `{d}` | `22`, `66` | 窗口 |

**说明**: 捕捉极端值相对于平均变化的幅度

---

### TPL-1612: 模型数据三层嵌套
```
模板:
a = rank(group_rank(ts_rank(ts_backfill({model_field}, 5), 5), sta1_top3000c20));
trade_when(rank(a) > 0.03, -zscore(ts_zscore({model_field}, 25)) * a, 0.25 - rank(a))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{model_field}` | `mdl175_01icc`, `mdl175_01dtsv` | 模型字段 |

---

### TPL-1613: 量价触发条件交易
```
模板:
triggerTradeexp = (ts_arg_max(volume, {d}) < 1) && (volume > ts_sum(volume, {d}) / {d});
triggerExitexp = -1;
alphaexp = -rank(ts_delta(close, 2));
trade_when(triggerTradeexp, alphaexp, triggerExitexp)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `5`, `10` | 窗口 |

**说明**: 今日成交量为近期最大且高于均值时交易

---

### TPL-1614: 情绪成交量交易
```
模板:
sent_vol = vec_sum(scl12_alltype_buzzvec);
trade_when(rank(sent_vol) > 0.95, -zscore(scl12_buzz) * sent_vol, -1)
```
**说明**: 高情绪量时反向交易情绪

---

### TPL-1615: 双层中性化模板
```
模板:
a = ts_zscore({field}, 252);
a1 = group_neutralize(a, industry);
a2 = group_neutralize(a1, bucket(rank(cap), range='0.1,1,0.1'))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意字段 | 数据字段 |

**说明**: 先行业后市值的双重中性化

---

### TPL-1616: 相关性计算公式
```
模板:
a = {field1};
b = {field2};
p = {d};
c = ts_mean(ts_av_diff(a, p) * ts_av_diff(b, p), p);
c / ts_std_dev(a, p) / ts_std_dev(b, p)  # 近似 ts_corr
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field1}` | `close`, `returns` | 第一字段 |
| `{field2}` | `volume`, `open` | 第二字段 |
| `{d}` | `5`, `22` | 窗口 |

---

### TPL-1617: 回归中性化双因子
```
模板:
afr = vec_avg({analyst_field});
short_mom = ts_mean(returns - group_mean(returns, 1, market), {d_short});
long_mom = ts_delay(ts_mean(returns - group_mean(returns, 1, market), {d_long}), {d_long});
regression_neut(regression_neut(afr, short_mom), long_mom)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{analyst_field}` | 分析师VECTOR字段 | 分析师数据 |
| `{d_short}` | `5`, `10` | 短期动量窗口 |
| `{d_long}` | `20`, `22` | 长期动量窗口 |

**说明**: 剥离短期和长期动量后的分析师因子

---

### TPL-1618: 回归斜率趋势检测
```
模板: ts_regression(ts_zscore({field}, {d}), ts_step(1), {d}, rettype=2)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `252`, `500` | 窗口 |

**说明**: rettype=2返回回归斜率，检测长期趋势

---

### TPL-1619: 三因子乘积组合
```
模板:
my_group = market;
rank(
group_rank(ts_decay_linear(volume / ts_sum(volume, 252), 10), my_group) *
group_rank(ts_rank(vec_avg({fundamental}), {d}), my_group) *
group_rank(-ts_delta(close, 5), my_group)
)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{fundamental}` | 基本面VECTOR字段 | 基本面数据 |
| `{d}` | `252`, `504` | 排名窗口 |

**说明**: 成交量趋势 × 基本面排名 × 价格反转

---

### TPL-1620: 波动率条件反转
```
模板:
vol = ts_std_dev(returns, {d});
vol_mean = group_mean(vol, 1, market);
flip_ret = if_else(vol < vol_mean, -returns, returns);
-ts_mean(flip_ret, {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `20`, `22` | 窗口 |

**说明**: 低波动做反转，高波动做动量

---

### TPL-1621: 恐惧指标复合
```
模板:
fear = ts_mean(
abs(returns - group_mean(returns, 1, market)) /
(abs(returns) + abs(group_mean(returns, 1, market)) + 0.1),
{d}
);
-group_neutralize(fear * {signal}, bucket(rank(cap), range='0.1,1,0.1'))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `20`, `22` | 窗口 |
| `{signal}` | 主信号 | 待组合信号 |

---

### TPL-1622: 财务质量单因子
```
模板: group_neutralize(rank({fundamental_field}), bucket(rank(cap), range='0,1,0.1'))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{fundamental_field}` | `roe`, `roa`, `net_income/assets` | 财务质量指标 |

---

### TPL-1623: 老虎哥回归模板
```
模板: group_rank(ts_regression(ts_zscore({field1}, {d}), ts_zscore(vec_sum({field2}), {d}), {d}), densify(sector))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field1}` | 任意MATRIX字段 | Y变量 |
| `{field2}` | 任意VECTOR字段 | X变量 |
| `{d}` | `252`, `504` | 回归窗口 |

---

### TPL-1624: 综合数据清洗模板
```
模板: ts_decay_linear(-densify(zscore(winsorize(ts_backfill({field}, 115), std=4))), 10)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 低频字段如 `anl4_adjusted_netincome_ft` | 需要处理的字段 |

---

### TPL-1625: 延迟最大值位置模板
```
模板: ts_max({field}, {d}) = ts_delay({field}, ts_arg_max({field}, {d}))  # 等效公式
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意字段 | 数据字段 |
| `{d}` | `22`, `66` | 窗口 |

---

### TPL-1626: 数据探索通用模板
```
模板: zscore(ts_delta(rank(ts_zscore({field}, {d1})), {d2}))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 待探索数据字段 |
| `{d1}` | `60`, `126`, `252` | zscore窗口 |
| `{d2}` | `5`, `10`, `22` | delta窗口 |

**说明**: 顾问推荐的新数据探索模板，可替换op和时间参数

---

### TPL-1627: 自定义衰减权重模板
```
模板:
weight = {d} + ts_step(0);                       # 线性递增权重
ts_sum({data} * weight, {d}) / ts_sum(weight, {d})  # 加权平均

# 替代版 (ts_step递减)
ts_sum({alpha} * ts_step(1), {d}) / ts_sum(ts_step(1), {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{data}` | 任意字段 | 数据字段 |
| `{alpha}` | 主信号 | 原始Alpha |
| `{d}` | `10`, `22`, `66` | 衰减窗口 |

**说明**: 当没有ts_decay_linear权限时的替代方案

---

### TPL-1628: log_diff相对增长模板
```
模板: group_rank(log_diff({field}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 财务指标如 `sales`, `eps`, `assets` | 数据字段 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 检测相对增长率，对乘性变化更敏感

---

### TPL-1629: ts_product累积收益模板
```
模板: group_rank(ts_product(1 + {ret_field}, {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{ret_field}` | `returns`, 收益率字段 | 收益字段 |
| `{d}` | `22`, `66`, `126` | 窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 计算累积收益排名

---

### TPL-1630: ts_percentage阈值模板
```
模板:
high_threshold = ts_percentage({field}, {d}, percentage=0.5);
low_threshold = ts_percentage({field}, {d}, percentage=0.5);
{signal}
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | `close`, 价格字段 | 阈值计算字段 |
| `{d}` | `22`, `66` | 窗口 |
| `{signal}` | 主信号 | 条件信号 |

**说明**: 用于震荡带突破策略的阈值构建

---

### TPL-1631: 动量反转切换模板
```
模板:
mom = ts_sum(returns, {d_long}) - ts_sum(returns, {d_short});
reversal = -ts_delta(close, {d_short});
if_else(ts_rank(ts_std_dev(returns, {d_short}), {d_long}) > 0.5, mom, reversal)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d_short}` | `5`, `10` | 短期窗口 |
| `{d_long}` | `22`, `66` | 长期窗口 |

**说明**: 高波动环境用动量，低波动环境用反转

---

### TPL-1632: 市场收益率近似模板 (CHN)
```
模板:
value = rank(cap) > 0.9 ? cap : 0;
market_return = group_sum(returns * value, country) / group_sum(value, country);
market_return
```
**说明**: 用市值加权近似沪深300指数收益率，设置neutralization=NONE, decay=0

---

### TPL-1633: Beta回归中性化模板
```
模板:
market_return = group_mean(returns, 1, market);
ts_regression({field}, market_return, {d})  # 返回残差(Y - E[Y])
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 待中性化字段 |
| `{d}` | `126`, `252` | 回归窗口 |

**说明**: 使用一元线性回归剥离市场因子

---

### TPL-1634: ts_moment高阶矩k值模板
```
模板: ts_moment({field}, {d}, k={k})

k=2: 方差 (等价于 ts_std_dev^2)
k=3: 偏度 (等价于 ts_skewness)
k=4: 峰度 (等价于 ts_kurtosis)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `22`, `66`, `126` | 窗口 |
| `{k}` | `2`, `3`, `4` | 阶数 |

---

### TPL-1635: 龙头股因子增强模板
```
模板: sigmoid(rank(star_pm_global_rank))
```
**说明**: 对龙头股因子进行sigmoid增强

---

### TPL-1636: purify数据清洗嵌套模板
```
模板: group_rank(ts_rank(purify({field}), {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意字段 | 待清洗数据 |
| `{d}` | `22`, `66` | 排名窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: purify自动化清洗异常值和噪声

---

### TPL-1637: 理想振幅因子模板
```
模板:
amplitude = (high - low) / close;
ideal_amp = ts_percentage(amplitude, {d}, percentage=0.5);
group_rank(amplitude - ideal_amp, {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `22`, `66` | 百分位窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 实际振幅偏离理想振幅的程度

---

### TPL-1638: 异同离差乖离率因子 (MACD风格)
```
模板:
ema_short = ts_decay_exp_window({field}, {d_short}, 0.9);
ema_long = ts_decay_exp_window({field}, {d_long}, 0.9);
dif = ema_short - ema_long;
ts_zscore(dif, {d_signal})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | `close`, 价格字段 | 数据字段 |
| `{d_short}` | `12`, `22` | 短期EMA窗口 |
| `{d_long}` | `26`, `66` | 长期EMA窗口 |
| `{d_signal}` | `9`, `22` | 信号线窗口 |

---

### TPL-1639: 收益率条件筛选反转
```
模板:
high_ret = ts_rank(returns, {d1}) > 0.8;
low_ret = ts_rank(returns, {d1}) < 0.2;
if_else(high_ret, -returns, if_else(low_ret, returns, 0))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d1}` | `22`, `66` | 排名窗口 |

**说明**: 只对极端收益做反转

---

### TPL-1640: 三阶模板优化版
```
模板: <group_op/>(<ts_op1/>(<ts_op2/>(<field/>, <d1/>), <d2/>), <group/>)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `<group_op/>` | `group_rank`, `group_zscore` | 外层分组操作 |
| `<ts_op1/>` | `ts_rank`, `ts_delta`, `ts_mean` | 中层时序操作 |
| `<ts_op2/>` | `ts_zscore`, `ts_rank`, `ts_ir` | 内层时序操作 |
| `<field/>` | 任意字段 | 数据字段 |
| `<d1/>` | `60`, `126`, `252` | 内层窗口 |
| `<d2/>` | `5`, `22`, `66` | 外层窗口 |
| `<group/>` | `sector`, `industry` | 分组 |

**说明**: 经典三阶嵌套结构，可灵活替换各层操作符

---

### TPL-1641: ts_entropy信号检测模板
```
模板: ts_entropy({field}, {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | `returns`, `volume`, 任意MATRIX字段 | 数据字段 |
| `{d}` | `14`, `22`, `66` | 窗口 |

**说明**: 衡量时序数据的不确定性，高熵值表示更多随机性

---

### TPL-1642: 熵+ZScore组合模板
```
模板: ts_zscore({field}, {d}) * ts_entropy({field}, {d})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `14`, `22` | 窗口 |

**说明**: RSI超买超卖 + 熵不确定性组合，捕捉可能的修正

---

### TPL-1643: ts_ir+ts_entropy信号组合
```
模板:
signal = ts_ir({field}, {d}) + ts_entropy({field}, {d});
group_rank(signal, {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `22`, `66` | 窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: IR(信息比率)和Entropy组合捕捉信号稳定性和分布特征

---

### TPL-1644: trade_when市值过滤模板
```
模板: trade_when(rank(cap) > {threshold}, {alpha}, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{threshold}` | `0.3`, `0.5`, `0.7` | 市值排名阈值 |
| `{alpha}` | 主信号 | 原始Alpha |

**说明**: 仅交易大市值股票，降低prod corr

---

### TPL-1645: trade_when盈利过滤模板
```
模板: trade_when(eps > {threshold} * est_eps, group_rank((eps - est_eps)/est_eps, industry), -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{threshold}` | `1.0`, `1.1`, `1.2` | 盈利超预期比例 |

**说明**: 只交易盈利超预期的股票

---

### TPL-1646: trade_when量价触发模板
```
模板:
triggerTrade = (ts_arg_max(volume, {d}) < 1) && (volume > ts_sum(volume, {d})/{d});
trade_when(triggerTrade, {alpha}, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `5`, `10` | 判断窗口 |
| `{alpha}` | `-rank(ts_delta(close, 2))` | 主信号 |

**说明**: 量价突破触发条件交易

---

### TPL-1647: trade_when情绪量过滤模板
```
模板:
sent_vol = vec_sum({sentiment_vec});
trade_when(rank(sent_vol) > {threshold}, -zscore({sentiment_field}) * sent_vol, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{sentiment_vec}` | `scl12_alltype_buzzvec` 等VECTOR字段 | 情绪向量 |
| `{sentiment_field}` | `scl12_buzz`, `scl12_sentiment` | 情绪字段 |
| `{threshold}` | `0.9`, `0.95` | 情绪量阈值 |

**说明**: 高情绪量时反向交易情绪

---

### TPL-1648: bucket市值分组中性化模板
```
模板:
my_group2 = bucket(rank(cap), range='{range}');
group_neutralize({alpha}, my_group2)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{range}` | `'0,1,0.1'`, `'0.1,1,0.1'` | 分桶范围 |
| `{alpha}` | 主信号 | 原始Alpha |

**说明**: 按市值分桶进行中性化，去除规模效应

---

### TPL-1649: group_zscore时序组合模板
```
模板: group_zscore(ts_ir({field}, {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `22`, `66`, `126` | IR窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 在分组内进行IR的Z-score标准化

---

### TPL-1650: scale+rank+ts组合模板
```
模板: scale(rank(ts_zscore({field}, {d})))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `66`, `126`, `252` | 窗口 |

**说明**: 多层标准化处理信号

---

### TPL-1651: Betting Against Beta模板
```
模板:
market_return = group_mean(returns, 1, market);
beta = ts_regression(returns, market_return, {d}, rettype=2);
-group_rank(beta, industry)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `126`, `252` | 回归窗口 |

**说明**: 反Beta投注因子，做多低Beta股票

---

### TPL-1652: 跳跃因子模板
```
模板:
jump_up = ts_count(returns > ts_std_dev(returns, {d}) * {threshold}, {d});
jump_down = ts_count(returns < -ts_std_dev(returns, {d}) * {threshold}, {d});
group_rank(jump_down - jump_up, {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `22`, `66` | 统计窗口 |
| `{threshold}` | `2`, `2.5`, `3` | 标准差倍数 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 统计尾部跳跃事件的不对称性

---

### TPL-1653: 量小换手率模板
```
模板:
turnover = volume / sharesout;
low_turnover = ts_percentage(turnover, {d}, percentage=0.2);
group_rank(turnover < low_turnover, {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `22`, `66` | 百分位窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 识别低换手率状态

---

### TPL-1654: 隔夜收益因子模板
```
模板:
overnight_ret = open / ts_delay(close, 1) - 1;
group_rank(ts_mean(overnight_ret, {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `5`, `22`, `66` | 平均窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 隔夜"拉锯战"因子

---

### TPL-1655: sta1分组三因子模板
```
模板:
a = rank(group_rank(ts_rank(ts_backfill({field1}, {d1}), {d2}), sta1_top3000c20));
trade_when(rank(a) > {threshold}, -zscore(ts_zscore({field2}, {d3})) * a, {exit_threshold} - rank(a))
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field1}` | 任意字段 | 第一因子字段 |
| `{field2}` | 模型字段如`mdl175_01dtsv` | 第二因子字段 |
| `{d1}`, `{d2}`, `{d3}` | 各窗口参数 | 时间窗口 |
| `{threshold}` | `0.03`, `0.1` | 入场阈值 |
| `{exit_threshold}` | `0.25`, `0.5` | 出场阈值 |

**说明**: 使用sta1预定义分组的复合策略

---

### TPL-1656: macro泛化模板
```
模板: group_rank(ts_delta(ts_zscore({macro_field}, {d1}), {d2}), country)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{macro_field}` | 宏观数据字段 | 宏观数据 |
| `{d1}` | `126`, `252` | zscore窗口 |
| `{d2}` | `5`, `22` | delta窗口 |

**说明**: 基于Labs分析macro的泛化模板

---

### TPL-1657: ASI broker模板
```
模板:
signal = group_rank(ts_rank({broker_field}, {d}), market);
trade_when(volume > adv20, signal, -1)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{broker_field}` | broker数据字段 | 券商数据 |
| `{d}` | `22`, `66` | 排名窗口 |

**说明**: ASI区域broker因子，需设置max_trade=ON

---

### TPL-1658: Earnings超预期模板
```
模板:
surprise = (actual_eps - est_eps) / abs(est_eps);
group_rank(ts_zscore(surprise, {d}), industry)
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `66`, `126` | zscore窗口 |

**说明**: 盈利超预期因子

---

### TPL-1659: CCI技术指标模板
```
模板:
tp = (high + low + close) / 3;
cci = (tp - ts_mean(tp, {d})) / (0.015 * ts_mean(abs(tp - ts_mean(tp, {d})), {d}));
group_rank(-cci, {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{d}` | `14`, `20` | CCI窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 商品通道指数(CCI)反转策略

---

### TPL-1660: 0.618黄金比例幂变换模板
```
模板:
power_signal = signed_power({field}, 0.618);
group_rank(ts_zscore(power_signal, {d}), {group})
```
| 占位符 | 可选值 | 说明 |
|--------|--------|------|
| `{field}` | 任意MATRIX字段 | 数据字段 |
| `{d}` | `66`, `126` | zscore窗口 |
| `{group}` | `sector`, `industry` | 分组 |

**说明**: 使用黄金比例0.618进行幂次变换

---

## 附录A：标准时间窗口

| 窗口代号 | 天数 | 含义 |
|---------|------|------|
| `d_week` | 5 | 一周 |
| `d_month` | 22 | 一月 |
| `d_quarter` | 66 | 一季度 |
| `d_half` | 126 | 半年 |
| `d_year` | 252 | 一年 |
| `d_2year` | 504 | 两年 |

**使用规则**:
- 反转因子: 短窗口 `{3, 5, 22}`
- 动量因子: 中窗口 `{22, 66}`
- 长期趋势: 长窗口 `{126, 252, 504}`
- 回归/波动: 超长窗口 `{250, 500, 750}`

---

## 附录B：常用操作符分类

### 时序操作符 `<ts_op/>`
| 操作符 | 用途 |
|--------|------|
| `ts_mean` | 移动平均 |
| `ts_rank` | 时序排名 |
| `ts_delta` | 差分 |
| `ts_std_dev` | 移动标准差 |
| `ts_ir` | 信息比率 |
| `ts_zscore` | 时序Z-score |
| `ts_corr` | 滚动相关性 |
| `ts_regression` | 滚动回归 |
| `ts_decay_linear` | 线性衰减 |
| `ts_decay_exp_window` | 指数衰减 |
| `ts_sum` | 滚动求和 |
| `ts_backfill` | 数据回填 |
| `ts_arg_min` | 最小值位置 |
| `ts_arg_max` | 最大值位置 |
| `ts_max` | 滚动最大值 |
| `ts_min` | 滚动最小值 |
| `ts_delay` | 延迟 |
| `ts_moment` | k阶中心矩 |
| `ts_co_skewness` | 协偏度 |
| `ts_co_kurtosis` | 协峰度 |
| `ts_partial_corr` | 偏相关 |
| `ts_triple_corr` | 三元相关 |
| `ts_theilsen` | Theil-Sen回归 |
| `ts_poly_regression` | 多项式回归残差 |
| `ts_vector_neut` | 向量中性化 |
| `ts_weighted_decay` | 加权衰减 |
| `ts_min_max_cps` | 最小最大压缩 |
| `ts_max_diff` | 与最大值差 |
| `ts_av_diff` | 与均值差 |
| `ts_quantile` | 分位数 |
| `ts_percentage` | 百分位 |
| `ts_median` | 中位数 |
| `ts_product` | 累积乘积 |
| `ts_count_nans` | NaN计数 |
| `ts_scale` | 时序缩放 |
| `ts_target_tvr_hump` | 目标换手率Hump |
| `ts_target_tvr_delta_limit` | Delta换手率限制 |

### 分组操作符 `<group_op/>`
| 操作符 | 用途 |
|--------|------|
| `group_rank` | 分组排名 |
| `group_neutralize` | 分组中性化 |
| `group_zscore` | 分组Z-score |
| `group_mean` | 分组均值 |
| `group_sum` | 分组求和 |
| `group_extra` | 分组提取/填补 |
| `group_backfill` | 分组回填 |
| `group_normalize` | 分组归一化 |
| `group_vector_neut` | 分组向量中性化 |
| `group_vector_proj` | 分组向量投影 |
| `group_count` | 分组计数 |
| `group_std_dev` | 分组标准差 |

### 向量操作符 `<vec_op/>`
| 操作符 | 用途 |
|--------|------|
| `vec_avg` | 向量平均 |
| `vec_sum` | 向量求和 |
| `vec_max` | 向量最大 |
| `vec_min` | 向量最小 |
| `vec_stddev` | 向量标准差 |
| `vec_count` | 向量计数 |
| `vec_norm` | 向量归一化 |
| `vec_zscore` | 向量Z-score |
| `vec_range` | 向量范围 |

### 事件/时间操作符
| 操作符 | 用途 |
|--------|------|
| `days_from_last_change` | 距离上次变化天数 |
| `last_diff_value` | 最近不同值 |
| `ts_step` | 时间步长 |

### 信号处理操作符
| 操作符 | 用途 |
|--------|------|
| `signed_power` | 带符号幂变换 |
| `clamp` | 边界限制 |
| `left_tail` | 左尾截断 |
| `right_tail` | 右尾截断 |
| `fraction` | 分数映射 |
| `nan_out` | NaN外推 |
| `purify` | 数据清洗 |
| `keep` | 条件保留 |
| `scale_down` | 缩放降维 |
| `hump` | Hump平滑 |
| `hump_decay` | Hump衰减 |

### 其他常用操作符
| 操作符 | 用途 |
|--------|------|
| `rank` | 截面排名 |
| `zscore` | 截面Z-score |
| `sigmoid` | Sigmoid归一化 |
| `winsorize` | 极端值截断 |
| `truncate` | 截断 |
| `tail` | 尾部处理 |
| `scale` | 缩放 |
| `filter` | 过滤 |
| `densify` | 稠密化 |
| `bucket` | 分桶 |
| `log` | 对数 |
| `abs` | 绝对值 |
| `if_else` | 条件判断 |
| `trade_when` | 条件交易 |
| `regression_neut` | 回归中性化 |
| `regression_proj` | 回归投影 |
| `is_nan` | NaN检测 |
| `is_not_nan` | 非NaN检测 |
| `inst_pnl` | 单标的PnL |
| `convert` | 单位转换 |
| `pasteurize` | 去无效值 |

---

## 附录C：数据字段分类

### 量价类 `<pv_field/>`
```
close, open, high, low, vwap
returns, volume, adv20, sharesout, cap
```

### 基本面类 `<fundamental_field/>`
```
assets, sales, ebitda, net_income, eps, operating_income
goodwill, debt, cash, equity, gross_profit
fnd6_*, fnd72_*, mdl175_*, mdl163_*
debt_to_equity, roe, roa
```

### 分析师类 `<analyst_field/>` (VECTOR)
```
anl4_eps_mean, anl4_eps_low, anl4_eps_high
anl4_revenue_mean, anl4_fcf_value, anl4_netprofit_mean
anl4_adjusted_netincome_ft, anl4_bvps_flag
oth41_s_west_*, analyst_*
```

### 情绪类 `<sentiment_field/>`
```
scl12_sentiment, scl12_buzz, scl12_alltype_buzzvec
snt_value, snt_buzz, snt_buzz_ret, snt_buzz_bfl
nws18_relevance, nws18_ber
nws12_prez_result2, nws12_prez_short_interest
mws85_sentiment, mws46_mcv
```

### 期权类 `<option_field/>`
```
option8_*, option14_*
implied_volatility_call_120, implied_volatility_call_270
parkinson_volatility_120, parkinson_volatility_270
pcr_vol_10, pcr_vol_30
put_delta, call_delta, put_gamma, call_gamma
put_theta, call_theta, put_vega, call_vega
call_breakeven_10, put_breakeven_10
```

### 模型类 `<model_field/>`
```
mdl175_01dtsv, mdl175_01icc
mdl163_*, mdl*
```

### 分组类 `<group/>`
```
industry, sector, subindustry
market, country, exchange
sta1_top3000c20, sta1_*
pv13_*, pv27_*
```

"""

class SingleSession(requests.Session):
    _instance = None
    _lock = threading.Lock()
    _relogin_lock = threading.Lock()
    _initialized = False

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self, *args, **kwargs):
        if not self._initialized:
            super(SingleSession, self).__init__(*args, **kwargs)
            self._initialized = True

    def get_relogin_lock(self):
        return self._relogin_lock

def load_template_summary(file_path: Optional[str] = None) -> str:
    """
    Loads the template summary from a file or returns the built-in template summary.

    Args:
        file_path: Optional path to a .txt or .md file containing the template summary.
                   If None or file doesn't exist, returns the built-in template summary.

    Returns:
        str: The template summary content.
    """
    if file_path:
        try:
            file_path_obj = Path(file_path)
            if file_path_obj.exists() and file_path_obj.is_file():
                with open(file_path_obj, 'r', encoding='utf-8') as f:
                    content = f.read()
                    print(f"✓ 成功从文件加载模板总结: {file_path}")
                    return content
            else:
                print(f"⚠ 警告: 文件不存在: {file_path}，将使用内置模板总结")
        except Exception as e:
            print(f"⚠ 警告: 读取文件时出错: {e}，将使用内置模板总结")

    # 返回内置的模板总结
    print("✓ 使用内置模板总结")
    return template_summary


def get_credentials() -> tuple[str, str]:
    """
    Retrieve or prompt for platform credentials.

    This function attempts to read credentials from a JSON file in the user's home directory.
    If the file doesn't exist or is empty, it prompts the user to enter credentials and saves them.

    Returns:
        tuple: A tuple containing the email and password.

    Raises:
        json.JSONDecodeError: If the credentials file exists but contains invalid JSON.
    """
    # 声明使用全局变量
    global username, password
    # please input your own BRAIN Credentials into the function
    return (username, password)

def get_token_from_auth_server() -> str:
    # 声明使用全局变量
    global LLM_API_KEY
    # please input your own LLM Gateway token into the function, please note, we are using kimi-k2-turbo-preview model
    return LLM_API_KEY

def interactive_input() -> dict:
    """
    交互式输入函数，收集所有必要的配置信息。

    Returns:
        dict: 包含所有配置信息的字典
    """
    print("\n" + "="*60)
    print("欢迎使用 Alpha Transformer 交互式配置")
    print("此程序在于让您输入一个Alpha ID即可通过历史总结的Alpha模板,转化成更多的表达式")
    print("72变,助您腾云驾雾")
    print("如果你想修改模型，则可以使用新模型的url和api key")
    print("不同模型效果不同，默认的kimi可能会产生语法错误，请检查生成的模板文件进行甄别")
    print("强烈推荐你使用自己总结的模板文档，效果会更好")
    print("="*60 + "\n")

    config = {}

    # 1. 询问 LLM 模型名称
    print("【1/6】LLM 模型配置")
    print("如果你想修改模型，则可以使用新模型的名称")
    default_model = "kimi-k2-turbo-preview"
    model_input = input(f"请输入 LLM 模型名称 (直接回车使用默认值: {default_model}): ").strip()
    config['LLM_model_name'] = model_input if model_input else default_model
    print(f"✓ LLM 模型名称: {config['LLM_model_name']}\n")

    # 2. 询问 LLM API Key
    print("【2/6】LLM API Key 配置")
    api_key = getpass.getpass("请输入 LLM API Key (输入时不会显示): ").strip()
    if not api_key:
        print("⚠ 警告: API Key 为空，程序可能无法正常工作")
    config['LLM_API_KEY'] = api_key
    print("✓ API Key 已设置\n")

    # 3. 询问 LLM Base URL
    print("【3/6】LLM Base URL 配置")
    print("提示：不同模型有不同的URL")
    default_url = "https://api.moonshot.cn/v1"
    url_input = input(f"请输入 LLM Base URL (直接回车使用默认值: {default_url}): ").strip()
    config['llm_base_url'] = url_input if url_input else default_url
    print(f"✓ LLM Base URL: {config['llm_base_url']}\n")

    # 4. 询问 BRAIN 平台用户名
    print("【4/6】BRAIN 平台认证信息")
    username_input = input("请输入 BRAIN 平台用户名/邮箱: ").strip()
    if not username_input:
        print("⚠ 警告: 用户名为空，程序可能无法正常工作")
    config['username'] = username_input
    print("✓ 用户名已设置\n")

    # 5. 询问 BRAIN 平台密码
    password_input = getpass.getpass("请输入 BRAIN 平台密码 (输入时不会显示): ").strip()
    if not password_input:
        print("⚠ 警告: 密码为空，程序可能无法正常工作")
    config['password'] = password_input
    print("✓ 密码已设置\n")

    # 6. 询问模板总结文件路径
    print("【5/6】模板总结文件配置")
    print("强烈推荐你使用自己总结的模板文档，效果会更好")
    print("提示: 如果您有 template_summary 的 .txt 或 .md 文件，请输入完整路径")
    print("      如果没有，直接回车将使用内置模板总结")
    template_path = input("请输入模板总结文件路径 (直接回车使用内置模板): ").strip()
    config['template_summary_path'] = template_path if template_path else None
    if template_path:
        print(f"✓ 将尝试从文件加载: {template_path}\n")
    else:
        print("✓ 将使用内置模板总结\n")

    # 7. 询问 Alpha ID
    print("【6/7】Alpha ID 配置")
    alpha_id = input("请输入要处理的 Alpha ID: ").strip()
    if not alpha_id:
        print("❌ 错误: Alpha ID 不能为空")
        sys.exit(1)
    config['alpha_id'] = alpha_id
    print(f"✓ Alpha ID: {alpha_id}\n")

    # 8. 询问 Top N 参数（仅数据字段）
    print("【7/7】候选数量配置 (Top N)")
    print("提示: 此参数控制为每个占位符生成的数据字段候选数量")

    # Datafield top_n
    default_datafield_topn = 50
    datafield_topn_input = input(f"请输入数据字段候选数量 (直接回车使用默认值: {default_datafield_topn}): ").strip()
    try:
        config['top_n_datafield'] = int(datafield_topn_input) if datafield_topn_input else default_datafield_topn
    except ValueError:
        print(f"⚠ 警告: 输入无效，使用默认值: {default_datafield_topn}")
        config['top_n_datafield'] = default_datafield_topn
    print(f"✓ 数据字段候选数量: {config['top_n_datafield']}\n")

    print("="*60)
    print("配置完成！开始处理...")
    print("="*60 + "\n")

    return config


def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
    """
    Expand dictionary columns in a DataFrame into separate columns.

    Args:
        data (pandas.DataFrame): The input DataFrame with dictionary columns.

    Returns:
        pandas.DataFrame: A new DataFrame with expanded columns.
    """
    dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
    new_columns = pd.concat(
        [data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
        axis=1,
    )

    data = pd.concat([data, new_columns], axis=1)
    return data

def start_session() -> SingleSession:
    """
    Start a new session with the WorldQuant BRAIN platform.

    This function authenticates the user, handles biometric authentication if required,
    and creates a new session.

    Returns:
        SingleSession: An authenticated session object.

    Raises:
        requests.exceptions.RequestException: If there's an error during the authentication process.
    """
    brain_api_url = "https://api.worldquantbrain.com"
    s = SingleSession()
    s.auth = get_credentials()
    r = s.post(brain_api_url + "/authentication")
    print(f"New session created (ID: {id(s)}) with authentication response: {r.status_code}, {r.json()} (新会话已创建)")
    if r.status_code == requests.status_codes.codes.unauthorized:
        if r.headers["WWW-Authenticate"] == "persona":
            print(
                "Complete biometrics authentication and press any key to continue (请完成生物识别认证并按任意键继续): \n"
                + urljoin(r.url, r.headers["Location"])
                + "\n"
            )
            input()
            s.post(urljoin(r.url, r.headers["Location"]))
            while True:
                if s.post(urljoin(r.url, r.headers["Location"])).status_code != 201:
                    input(
                        "Biometrics authentication is not complete. Please try again and press any key when completed (生物识别认证未完成，请重试并按任意键): \n"
                    )
                else:
                    break
        else:
            print("\nIncorrect email or password (邮箱或密码错误)\n")
            return start_session()
    return s

def get_data_categories(s: SingleSession) -> list[dict]:
    """
    Fetch and cache data categories from the BRAIN API.
    """
    global DATA_CATEGORIES
    if DATA_CATEGORIES is not None:
        return DATA_CATEGORIES

    try:
        brain_api_url = "https://api.worldquantbrain.com"
        response = s.get(brain_api_url + "/data-categories")
        response.raise_for_status()
        data = response.json()
        if isinstance(data, list):
            DATA_CATEGORIES = data
        elif isinstance(data, dict):
            DATA_CATEGORIES = data.get('results', [])
        else:
            DATA_CATEGORIES = []
        return DATA_CATEGORIES
    except Exception as e:
        print(f"Error fetching data categories: {e}")
        return []

def get_datafields(
    s: SingleSession,
    instrument_type: str = "EQUITY",
    region: str = "USA",
    delay: int = 1,
    universe: str = "TOP3000",
    theme: str = "false",
    dataset_id: str = "",
    data_type: str = "MATRIX",
    search: str = "",
    category: Union[str, list] = "",
) -> pd.DataFrame:
    """
    Retrieve available datafields based on specified parameters.

    Args:
        s (SingleSession): An authenticated session object.
        instrument_type (str, optional): The type of instrument. Defaults to "EQUITY".
        region (str, optional): The region. Defaults to "USA".
        delay (int, optional): The delay. Defaults to 1.
        universe (str, optional): The universe. Defaults to "TOP3000".
        theme (str, optional): The theme. Defaults to "false".
        dataset_id (str, optional): The ID of a specific dataset. Defaults to "".
        data_type (str, optional): The type of data. Defaults to "MATRIX".
        search (str, optional): A search string to filter datafields. Defaults to "".
        category (str or list, optional): A category ID or list of IDs to filter datafields. Defaults to "".

    Returns:
        pandas.DataFrame: A DataFrame containing information about available datafields.
    """
    brain_api_url = "https://api.worldquantbrain.com"
    type_param = f"&type={data_type}" if data_type != "ALL" else ""

    url_template = (
        brain_api_url
        + "/data-fields?"
        + f"&instrumentType={instrument_type}"
        + f"&region={region}&delay={str(delay)}&universe={universe}{type_param}&limit=50"
    )

    if dataset_id:
        url_template += f"&dataset.id={dataset_id}"

    if len(search) > 0:
        url_template += f"&search={search}"

    url_template += "&offset={x}"

    count = 0
    if len(search) == 0:
        try:
            count = s.get(url_template.format(x=0)).json()["count"]
        except Exception as e:
            print(f"Error getting count: {e}")
            return pd.DataFrame()

        if count == 0:
            print(
                f"No fields found (未找到字段): region={region}, delay={str(delay)}, universe={universe}, "
                f"type={data_type}, dataset.id={dataset_id}"
            )
            return pd.DataFrame()
    else:
        if category:
            count = 500 # Search deeper if filtering
        else:
            count = 100

    max_try = 5
    datafields_list = []
    found_count = 0
    target_found = 50 if category else count

    for x in range(0, count, 50):
        for _ in range(max_try):
            try:
                resp = s.get(url_template.format(x=x))
                while resp.status_code == 429:
                    print("status_code 429, sleep 3 seconds")
                    time.sleep(3)
                    resp = s.get(url_template.format(x=x))
                if resp.status_code == 200 and "results" in resp.json():
                    datafields = resp
                    break
            except:
                pass
            time.sleep(5)
        else:
            continue

        results = datafields.json().get("results", [])
        if not results:
            break

        if category:
            if isinstance(category, list):
                filtered_results = [
                    item for item in results
                    if isinstance(item.get('category'), dict) and item['category'].get('id') in category
                ]
            else:
                filtered_results = [
                    item for item in results
                    if isinstance(item.get('category'), dict) and item['category'].get('id') == category
                ]
            datafields_list.append(filtered_results)
            found_count += len(filtered_results)
            if len(search) > 0 and found_count >= target_found:
                break
        else:
            datafields_list.append(results)

    datafields_list_flat = [item for sublist in datafields_list for item in sublist]

    if not datafields_list_flat:
        return pd.DataFrame()

    datafields_df = pd.DataFrame(datafields_list_flat)
    datafields_df = expand_dict_columns(datafields_df)
    return datafields_df

def set_alpha_properties(
    s: SingleSession,
    alpha_id: str,
    name: Optional[str] = None,
    color: Optional[str] = None,
    regular_desc: Optional[str] = None,
    selection_desc: str = "None",
    combo_desc: str = "None",
    tags: Optional[list[str]] = None,
) -> requests.Response:
    """
    Update the properties of an alpha.

    Args:
        s (SingleSession): An authenticated session object.
        alpha_id (str): The ID of the alpha to update.
        name (str, optional): The new name for the alpha. Defaults to None.
        color (str, optional): The new color for the alpha. Defaults to None.
        regular_desc (str, optional): Description for regular alpha. Defaults to None.
        selection_desc (str, optional): Description for the selection part of a super alpha. Defaults to "None".
        combo_desc (str, optional): Description for the combo part of a super alpha. Defaults to "None".
        tags (list, optional): List of tags to apply to the alpha. Defaults to None.

    Returns:
        requests.Response: The response object from the API call.
    """
    brain_api_url = "https://api.worldquantbrain.com"
    params = {}
    if name is not None:
        params["name"] = name
    if color is not None:
        params["color"] = color
    if tags is not None:
        params["tags"] = tags
    if regular_desc is not None:
        params.setdefault("regular", {})["description"] = regular_desc
    if selection_desc != "None":  # Assuming "None" is the default string value for selection_desc
        params.setdefault("selection", {})["description"] = selection_desc
    if combo_desc != "None":  # Assuming "None" is the default string value for combo_desc
        params.setdefault("combo", {})["description"] = combo_desc

    response = s.patch(brain_api_url + "/alphas/" + alpha_id, json=params)

    return response


def extract_placeholders(template_expression: str) -> list[str]:
    """
    Extracts placeholders from a template expression using regular expressions.
    Placeholders are identified by text enclosed in angle brackets (e.g., `<data_field/>`).
    """
    # Only match placeholders of the form `<name/>` or `<name/>` with alphanumeric and underscores
    return re.findall(r'(<[A-Za-z0-9_]+/>)', template_expression)

def parse_alpha_code(alpha_code: str, all_operators: list[dict]) -> tuple[list[str], list[str]]:
    """
    Parses the alpha code to extract operators and data fields.
    """
    # Remove C-style comments /* ... */
    alpha_code = re.sub(r"/\*[\s\S]*?\*/", "", alpha_code)
    # Remove Python-style comments # ...
    alpha_code = re.sub(r"#.*", "", alpha_code)

    operators_names = [op['name'] for op in all_operators]

    found_operators = []
    found_datafields = []

    # Regex to find potential identifiers (operators or datafields)
    # This regex looks for words that could be operators or datafields,
    # excluding numbers and common programming constructs.
    identifiers = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', alpha_code)

    for identifier in identifiers:
        if identifier in operators_names:
            found_operators.append(identifier)
        elif not (identifier.isdigit() or identifier.lower() in ['true', 'false', 'null', 'nan', 'if', 'else', 'for', 'while', 'return', 'and', 'or', 'not', 'in', 'is', 'try', 'except', 'finally', 'with', 'as', 'def', 'class', 'import', 'from', 'yield', 'lambda', 'global', 'nonlocal', 'break', 'continue', 'pass', 'async', 'await', 'raise', 'assert', 'del', 'print', 'input', 'len', 'min', 'max', 'sum', 'abs', 'round', 'int', 'float', 'str', 'list', 'dict', 'set', 'tuple', 'range', 'map', 'filter', 'zip', 'open', 'file', 'type', 'id', 'dir', 'help', 'object', 'super', 'issubclass', 'isinstance', 'hasattr', 'getattr', 'setattr', 'delattr', '__import__', 'None', 'True', 'False']):
            found_datafields.append(identifier)

    # Remove duplicates
    found_operators = list(set(found_operators))
    found_datafields = list(set(found_datafields))

    return found_operators, found_datafields

async def generate_alpha_description(alpha_id: str, brain_session: SingleSession) -> str:
    """
    Generates and potentially enriches the description of a given Alpha ID from the WorldQuant BRAIN API.

    Args:
        alpha_id (str): The ID of the alpha to retrieve.
        brain_session (SingleSession): The active BRAIN API session.
        llm_client (openai.AsyncOpenAI): The authenticated OpenAI-compatible client.

    Returns:
        str: A JSON string containing the alpha's settings, expression, and potentially enriched description,
             or an empty JSON string if an error occurs.
    """

    async def call_llm_new(prompt: str) -> dict:
        # 声明使用全局变量
        global LLM_model_name, LLM_API_KEY, llm_base_url
        try:
            llm_api_key = get_token_from_auth_server()
            llm_base_url_value = llm_base_url  # 使用全局变量
            llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key)
            print("LLM Gateway Authentication successful. (LLM网关认证成功)")
        except Exception as e:
            print(f"LLM Gateway Authentication failed (LLM网关认证失败): {e}")
            sys.exit(1)

        print("--- Calling LLM to propose templates... (正在调用LLM生成模板...) ---")
        try:
            # Await the async create call
            response = await llm_client.chat.completions.create(
                model=LLM_model_name,
                messages=[
                    {"role": "system", "content": "You are a quantitative finance expert and a helpful assistant designed to output JSON."},
                    {"role": "user", "content": prompt},
                ],
                # response_format={"type": "json_object"},
            )

            # The async client may return a nested structure. Try to extract content robustly.
            content = None
            if isinstance(response, dict):
                # Some clients return raw dicts
                # Try common paths
                choices = response.get('choices')
                if choices and isinstance(choices, list):
                    msg = choices[0].get('message') or choices[0]
                    content = msg.get('content') if isinstance(msg, dict) else None
                elif 'content' in response:
                    content = response.get('content')
            else:
                # Fallback: attempt attribute access
                try:
                    content = response.choices[0].message.content
                except Exception:
                    content = None

            if content is None:
                # As a last resort, try to stringify the response
                content = str(response)

            # If content is already a dict/list, return it directly; if it's a JSON string, parse it.
            if isinstance(content, (dict, list)):
                return content
            if isinstance(content, str):
                try:
                    return json.loads(content)
                except json.JSONDecodeError:
                    # Return wrapped string if not JSON
                    return {"text": content}

            return {}
        except Exception as e:
            print(f"Error calling LLM (调用LLM出错): {e}")
            return {}

    try:
        brain_api_url = "https://api.worldquantbrain.com"
        alpha_url = f"{brain_api_url}/alphas/{alpha_id}"
        response = brain_session.get(alpha_url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        alpha_data = response.json()
        settings = alpha_data.get('settings', {})
        expression_dict = alpha_data.get('regular', alpha_data.get('combo', None))

        if not expression_dict or 'code' not in expression_dict:
            print(f"Error: Alpha expression code not found for Alpha ID (未找到Alpha表达式代码): {alpha_id}")
            return json.dumps({})

        alpha_code = expression_dict['code']
        current_description = expression_dict.get('description', '')

        # 1. Get all operators for parsing (no filter as per feedback)
        operators_data = get_brain_operators()
        all_operators = operators_data.get('operators', [])

        # 2. Parse the code to get operators and datafields
        found_operators_names, found_datafields_names = parse_alpha_code(alpha_code, all_operators)

        # 3. Get descriptions for operators
        operator_descriptions = {op['name']: op.get('description', 'No description available.') for op in all_operators if op['name'] in found_operators_names}

        # 4. Get descriptions for datafields
        datafield_descriptions = {}
        if found_datafields_names:
            # Extract settings from alpha_data for the get_datafields call
            instrument_type = settings.get('instrumentType', 'EQUITY')
            region = settings.get('region', 'USA')
            universe = settings.get('universe', 'TOP3000')
            delay = settings.get('delay', 1)

            for df_name in found_datafields_names:
                # get_datafields returns a DataFrame, so we need to process it
                datafield_df = get_datafields(s=brain_session, instrument_type=instrument_type, region=region, delay=delay, universe=universe, search=df_name)
                if not datafield_df.empty:
                    # Assuming the first result is the most relevant
                    datafield_descriptions[df_name] = datafield_df.iloc[0].get('description', 'No description available.')
                else:
                    datafield_descriptions[df_name] = 'No description found.'

        # 5. Use LLM to judge if current description is good
        judgment_prompt = f"""
        Given the following alpha code, its current description, and descriptions of its operators and datafields:

        Alpha Code:
        {alpha_code}

        Current Description:
        {current_description}

        Operators and their descriptions:
        {json.dumps(operator_descriptions, indent=2)}

        Datafields and their descriptions:
        {json.dumps(datafield_descriptions, indent=2)}

        Alpha Settings:
        {json.dumps(settings, indent=2)}

        Is the current description good enough? Respond with 'yes' or 'no' in a JSON object: {{"judgment": "yes/no"}}
        A "good" description should clearly explain the investment idea, rationale for data used, and rationale for operators used.
        """

        judgment_response = await call_llm_new(judgment_prompt)
        is_description_good = judgment_response.get("judgment", "no").lower() == "yes"

        new_description = current_description
        if not is_description_good:
            # 6. If not good, use another LLM to generate a new description
            generation_prompt = f"""
            Based on the following alpha code, its operators, datafields, and settings, generate a new, improved description.
            The description should clearly explain the investment idea, rationale for data used, and rationale for operators used.
            Format the output as:
            "Idea: xxxxx\\nRationale for data used: xxxxx\\nRationale for operators used: xxxxxxx"

            Alpha Code:
            {alpha_code}

            Operators and their descriptions:
            {json.dumps(operator_descriptions, indent=2)}

            Datafields and their descriptions:
            {json.dumps(datafield_descriptions, indent=2)}

            Alpha Settings:
            {json.dumps(settings, indent=2)}
            """

            generated_description_response = await call_llm_new(generation_prompt)
            # Assuming LLM returns a string directly or a JSON with a 'description' key
            new_description = generated_description_response.get("description", generated_description_response)
            if isinstance(new_description, dict): # Handle cases where LLM might return a dict directly
                new_description = json.dumps(new_description, indent=2)

            # 7. Override this new description and patch the alpha
            set_alpha_properties(
                s=brain_session,
                alpha_id=alpha_id,
                regular_desc=new_description
            )
            print(f"Alpha {alpha_id} description updated on platform. (Alpha描述已在平台更新)")

        if 'regular' in alpha_data:
            alpha_data['regular']['description'] = new_description
        elif 'combo' in alpha_data:
            alpha_data['combo']['description'] = new_description

        return json.dumps({
            'settings': settings,
            'expression': expression_dict
        })

    except requests.exceptions.RequestException as e:
        print(f"Error during API request (API请求出错): {e}")
        return json.dumps({})
    except json.JSONDecodeError:
        print("Error: Could not decode JSON response from API. (无法解析API的JSON响应)")
        return json.dumps({})
    except Exception as e:
        print(f"An unexpected error occurred (发生意外错误): {e}")
        return json.dumps({})

def get_brain_operators(scope_filters: Optional[list[str]] = None) -> dict:
    """
    Retrieves the list of available operators from the WorldQuant BRAIN API,
    optionally filtered by a list of scopes. If no scopes are provided, all operators are returned.

    Args:
        scope_filters (list[str], optional): A list of strings to filter operators by their scope (e.g., ["REGULAR", "TS_OPERATOR"]).
                                             If None or empty, all operators are returned.

    Returns:
        dict: A dictionary containing the operators list and count,
              or an empty dictionary if an error occurs.
    """
    try:
        brain_api_url = "https://api.worldquantbrain.com"
        session = start_session()
        operators_url = f"{brain_api_url}/operators"
        response = session.get(operators_url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        operators_list = response.json()

        if not isinstance(operators_list, list):
            print(f"Error: Expected a list of operators, but received type (预期运算符列表，但收到类型): {type(operators_list)}")
            return {}

        if scope_filters:
            filtered_operators = [
                op for op in operators_list
                if any(s_filter in op.get('scope', []) for s_filter in scope_filters)
            ]
            return {
                'operators': filtered_operators,
                'count': len(filtered_operators)
            }
        else:
            return {
                'operators': operators_list,
                'count': len(operators_list)
            }

    except requests.exceptions.RequestException as e:
        print(f"Error during API request for operators (获取运算符时API请求出错): {e}")
        return {}
    except json.JSONDecodeError:
        print("Error: Could not decode JSON response from operators API. (无法解析运算符API的JSON响应)")
        return {}
    except Exception as e:
        print(f"An unexpected error occurred while getting operators (获取运算符时发生意外错误): {e}")
        return {}

async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int = 3) -> dict:
    """
    Interface with a Large Language Model to process prompts and get a JSON response.
    Includes retry logic for JSON parsing errors.
    """
    # 声明使用全局变量
    global LLM_model_name
    if not llm_client:
        print("LLM client not initialized. Please check authentication. (LLM客户端未初始化，请检查认证)")
        return {}

    print("--- Calling LLM... (正在调用LLM...) ---")

    for attempt in range(max_retries):
        try:
            response = await llm_client.chat.completions.create(
                model=LLM_model_name,  # Or your preferred model
                messages=[
                    {"role": "system", "content": "You are a quantitative finance expert and a helpful assistant designed to output JSON."},
                    {"role": "user", "content": prompt},
                ],
                # response_format={"type": "json_object"},
            )
            content = response.choices[0].message.content

            # Try to clean markdown code blocks if present
            if "```json" in content:
                content = content.split("```json")[1].split("```")[0].strip()
            elif "```" in content:
                content = content.split("```")[1].split("```")[0].strip()

            return json.loads(content)
        except json.JSONDecodeError as e:
            print(f"⚠ JSON Decode Error (Attempt {attempt + 1}/{max_retries}): {e}")
            if attempt == max_retries - 1:
                print(f"❌ Failed to parse JSON after {max_retries} attempts. Raw content: {content[:100]}...")
        except Exception as e:
            print(f"⚠ LLM Call Error (Attempt {attempt + 1}/{max_retries}): {e}")
            if attempt == max_retries - 1:
                print(f"❌ Failed to call LLM after {max_retries} attempts.")

        # Wait before retrying
        await asyncio.sleep(2)

    return {}

async def propose_alpha_templates(alpha_details: dict, template_summary: str, llm_client: openai.AsyncOpenAI, user_data_type: str = "MATRIX") -> dict:
    """
    Uses an LLM to propose new alpha templates based on a seed alpha's details.

    Args:
        alpha_details (dict): The details of the seed alpha.
        template_summary (str): A summary of alpha templates to guide the LLM.
        llm_client (openai.AsyncOpenAI): The authenticated OpenAI-compatible client.
        user_data_type (str): The data type for the alpha (MATRIX or VECTOR).

    Returns:
        dict: A dictionary of proposed alpha templates in JSON format.
    """
    if not alpha_details.get('expression'):
        print("Error: Alpha expression is missing. (错误：缺少Alpha表达式)")
        return {}
    else:
        print(f"current seed alpha detail (当前种子Alpha详情): {alpha_details.get('expression')}")

    data_type_instruction = ""
    if user_data_type == "MATRIX":
        data_type_instruction = "\n**Important Note on Data Type:**\nThe user has specified the data type as **MATRIX**. Please do NOT use any vector-type operators (e.g., `vec_avg`, `vec_sum`) in your proposed templates, as they will raise errors for MATRIX type data in BRAIN. Note: 'MATRIX' is just a system identifier and does not refer to mathematical matrices."
    elif user_data_type == "VECTOR":
        data_type_instruction = "\n**Important Note on Data Type:**\nThe user has specified the data type as **VECTOR**. Please ensure you use vector-type operators (e.g., `vec_avg`, `vec_sum`) to handle the data fields before applying other operators."

    prompt = f"""
As a world-class BRAIN consultant, your task is to design new alpha templates based on an existing seed alpha.
You will be provided with the seed alpha's expression and a summary of successful alpha templates for inspiration.

**Seed Alpha Expression:**
{alpha_details['expression']}

**Inspiration: Summary of Alpha Templates:**
{template_summary}

**Your Task:**
Based on the structure and potential economic rationale of the seed alpha, by the aid of the Alpha template summary, propose 3-5 new, diverse alpha templates.

**Rules:**
1.  The proposed templates must be valid BRAIN alpha expressions.
2.  Use placeholders like `<data_field/>` for data fields and `<operator/>` for operators that can be programmatically replaced later.
3.  For each proposed template, provide a brief, clear explanation of its investment rationale.
4.  Return the output as a single, valid JSON object where keys are the proposed template strings and values are their corresponding explanations. Do not include any other text or formatting outside of the JSON object.
5.  The proposed new alpha template should be related to the economic sense of seed Alpha {alpha_details} but in different format such as. Utilize the inspiration well.
{data_type_instruction}

**Example Output Format:**
{{
  "<group_operators/>(<ts_operators/>(<data_field/>, 60), industry)": "A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups.",
  "<logical_operator/><ts_operators/>(<data_field/>, 20)": "A simple short-term momentum operator applied to a data field."
}}

Now, generate the JSON object with your proposed templates.
"""

    try:
        # print(f"现在的template summary是{template_summary}")
        proposed_templates = await call_llm(prompt, llm_client)
        return proposed_templates
    except Exception as e:
        print(f"An error occurred while calling the LLM (调用LLM时发生错误): {e}")
        return {}

async def propose_datafield_keywords(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, user_category: Optional[Union[str, list]] = None) -> list[str]:
    """
    Uses an LLM to propose search keywords for finding data fields.
    """
    category_instruction = ""
    if user_category:
        category_instruction = f"\n**User Specified Data Category:**\nThe user has specified the data category: {user_category}. Please ensure the proposed keywords are relevant to this category."
    else:
        category_instruction = "\n**Data Category:**\n Please propose keywords across diverse and relevant data categories."

    prompt = f"""
As a quantitative researcher, you need to find the best data fields for an alpha template placeholder.
Based on the template's logic and the placeholder's name, suggest a list of 3-5 concise search keywords to use with the WorldQuant BRAIN `get_datafields` tool.

**Alpha Template:**
`{template_expression}`

**Template Explanation:**
`{template_explanation}`

**Placeholder to Fill:**
`{placeholder}`
{category_instruction}

**Your Task:**
Provide a list of search keywords that are likely to yield relevant data fields for this placeholder. The keywords should be specific and diverse. Return the output as a single, valid JSON array of strings.

**Example Input:**
Placeholder: `<slow_moving_characteristic/>`
Explanation: "measures the time-series evolution of a fund's relative rank on a slow-moving characteristic (e.g., fund style, expense tier)"

**Example Output:**
["fund style", "expense ratio", "management fee", "turnover", "aum"]

    Now, generate the JSON array of search keywords for the given placeholder.
"""
    print(f"--- Calling LLM to get keywords for placeholder (正在调用LLM获取占位符关键词): {placeholder} ---")
    response = await call_llm(prompt, llm_client)
    print(f"AI使用如下提示词获取搜索关键词推荐：{prompt}")
    # Accept either a direct list or a dict containing a 'keywords' key
    if isinstance(response, list) and all(isinstance(item, str) for item in response):
        return response
    if isinstance(response, dict):
        # Common keys that might contain the list
        for key in ('keywords', 'data', 'result', 'items'):
            if key in response and isinstance(response[key], list) and all(isinstance(i, str) for i in response[key]):
                return response[key]
    print(f"Warning: LLM did not return a valid list of strings for keywords (警告：LLM未返回有效的关键词列表). Got: {response}")
    return []

async def get_datafield_candidates(s: SingleSession, alpha_details: dict, template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX") -> list[dict]:
    """
    Gets candidate data fields for a placeholder by using an LLM to generate search keywords
    and then calling the BRAIN API's get_datafields to retrieve the top N results for each keyword.
    """
    keywords = await propose_datafield_keywords(template_expression, template_explanation, placeholder, llm_client, user_category=user_category)
    if not keywords:
        print(f"Could not generate keywords for placeholder (无法生成占位符关键词): {placeholder}")
        return []

    print(f"LLM-proposed keywords for '{placeholder}' (LLM提议的关键词): {keywords}")

    # Extract settings from alpha_details for the get_datafields call
    settings = alpha_details.get('settings', {})
    print(f"Alpha settings for datafield search (用于数据字段搜索的Alpha设置):")
    instrument_type = settings.get('instrumentType', 'EQUITY')

    if user_region:
        region = user_region
    elif 'region' in settings:
        region = settings['region']
    else:
        print(f"❌ Error: Could not determine 'region' for datafield search. It is missing in Alpha settings and not provided by user. (错误：无法确定数据搜索的地区，Alpha设置中缺失且用户未提供)")
        return []
    print(f"   数据地区: {region}")

    if user_universe:
        universe = user_universe
    elif 'universe' in settings:
        universe = settings['universe']
    else:
        print(f"❌ Error: Could not determine 'universe' for datafield search. It is missing in Alpha settings and not provided by user. (错误：无法确定数据搜索的范围，Alpha设置中缺失且用户未提供)")
        return []
    print(f"   数据范围: {universe}")

    if user_delay is not None:
        delay = user_delay
    elif 'delay' in settings:
        delay = settings['delay']
    else:
        print(f"❌ Error: Could not determine 'delay' for datafield search. It is missing in Alpha settings and not provided by user. (错误：无法确定数据搜索的Delay，Alpha设置中缺失且用户未提供)")
        return []
    print(f"   Delay: {delay} 类别")

    if user_category:
        print(f"   Category Filter: {user_category}")

    # Use asyncio.gather to make parallel API calls for efficiency
    tasks = []
    for keyword in keywords:
        tasks.append(
            asyncio.to_thread(get_datafields,
                s=s,
                instrument_type=instrument_type,
                region=region,
                delay=delay,
                universe=universe,
                search=keyword,
                category=user_category if user_category else "",
                data_type=user_data_type
            )
        )

    results = await asyncio.gather(*tasks)

    # Process results to get top N from each keyword search
    top_results_per_keyword = []
    for res_df in results:
        if not res_df.empty:
            top_results_per_keyword.append(res_df.head(top_n))

    candidate_datafields = []
    if top_results_per_keyword:
        # Concatenate the top N results from all keywords
        combined_df = pd.concat(top_results_per_keyword, ignore_index=True)
        # Remove duplicates from the combined list
        combined_df.drop_duplicates(subset=['id'], inplace=True)
        # Format the final list of candidates
        candidate_datafields = combined_df[['id', 'description']].to_dict(orient='records')

    return candidate_datafields

async def get_group_datafield_candidates(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 3) -> list[dict]:
    """
    Uses an LLM to select suitable group data fields from a predefined list.
    """
    predefined_group_fields = ["industry", "subindustry", "sector", "market", "exchange"]

    prompt = f"""
    As a quantitative researcher, you need to select the most relevant group data fields for an alpha template placeholder.
    Based on the template's logic and the placeholder's name, select {top_n} group fields from the following list that are most suitable: {predefined_group_fields}.

    **Alpha Template:**
    `{template_expression}`

    **Template Explanation:**
    `{template_explanation}`

    **Placeholder to Fill:**
    `{placeholder}`

    **Your Task:**
    Provide a list of selected group data fields. Return the output as a single, valid JSON array of strings.

    **Example Output Format:**
    ["industry", "sector"]

    Now, generate the JSON array of selected group data fields.
    """
    print(f"--- Calling LLM to select group datafields for placeholder (正在调用LLM选择分组数据字段): {placeholder} ---")
    response = await call_llm(prompt, llm_client)

    if isinstance(response, list) and all(isinstance(item, str) for item in response):
        return [{"name": field} for field in response[:top_n]]
    print(f"Warning: LLM did not return a valid list of strings for group datafields (警告：LLM未返回有效的分组数据字段列表). Got: {response}")
    return [{"name": field} for field in predefined_group_fields[:top_n]] # Fallback to default if LLM fails

async def get_operator_candidates(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 3) -> list[dict]:
    """
    Gets candidate operators for a placeholder by first fetching all REGULAR scope operators
    and then using an LLM to select the most relevant ones.
    """
    operators_data = get_brain_operators(scope_filters=["REGULAR"])
    all_operators = operators_data.get('operators', [])

    if not all_operators:
        print("No REGULAR scope operators found. (未找到REGULAR范围的运算符)")
        return []

    # Create a summary of available operators for the LLM
    operator_names_and_descriptions = "\n".join([f"- {op['name']}: {op.get('description', 'No description available.')}" for op in all_operators])

    prompt = f"""
    As a quantitative finance expert, you need to select the most relevant operators for an alpha template placeholder.
    Based on the template's logic, its explanation, and the specific placeholder, select {top_n} operators from the provided list that are most suitable.

    **Alpha Template:**
    `{template_expression}`

    **Template Explanation:**
    `{template_explanation}`

    **Placeholder to Fill:**
    `{placeholder}`

    **Available REGULAR Scope Operators:**
    {operator_names_and_descriptions}

    **Your Task:**
    Provide a list of selected operator names. Return the output as a single, valid JSON array of strings.

    **Example Output Format:**
    ["ts_mean", "ts_rank", "ts_decay"]

    Now, generate the JSON array of selected operators.
    """
    print(f"--- Calling LLM to select operator candidates for placeholder (正在调用LLM选择运算符候选): {placeholder} ---")
    response = await call_llm(prompt, llm_client)

    if isinstance(response, list) and all(isinstance(item, str) for item in response):
        # Filter the full list of operators to return the selected ones with their descriptions
        selected_ops_details = []
        for selected_name in response:
            for op in all_operators:
                if op['name'] == selected_name:
                    selected_ops_details.append({"name": op['name'], "description": op.get('description', '')})
                    break
        return selected_ops_details[:top_n]

    print(f"Warning: LLM did not return a valid list of strings for operator candidates (警告：LLM未返回有效的运算符候选列表). Got: {response}")
    # Fallback to a default set if LLM fails
    return [{"name": op['name'], "description": op.get('description', '')} for op in all_operators[:top_n]]

async def get_parameter_candidates(param_type: str, template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI) -> list[dict]:
    """
    Uses an LLM to suggest sensible numerical candidates for parameters.
    """
    param_description = "an integer value, typically a window length or count (e.g., `d` in `ts_mean(x, d)`)" if param_type == "integer_parameter" else \
                        "a floating-point number, typically a threshold or factor"

    prompt = f"""
    As a quantitative finance expert, you need to suggest sensible numerical candidates for a placeholder parameter.
    Based on the alpha template's logic, its explanation, and the placeholder's type and context, propose 3-5 diverse numerical candidates.

    **Alpha Template:**
    `{template_expression}`

    **Template Explanation:**
    `{template_explanation}`

    **Placeholder to Fill:**
    `{placeholder}`

    **Parameter Type:**
    This placeholder represents {param_description}.

    **Your Task:**
    Provide a list of numerical candidates that are appropriate for this parameter. Return the output as a single, valid JSON array of numbers.

    **Example Output (for integer_parameter):**
    [10, 20, 60, 120, 252]

    **Example Output (for float_parameter):**
    [0.01, 0.05, 0.1, 0.2, 0.5]

    Now, generate the JSON array of numerical candidates.
    """
    print(f"--- Calling LLM to suggest candidates for {param_type} placeholder (正在调用LLM建议参数候选): {placeholder} ---")
    response = await call_llm(prompt, llm_client)

    if isinstance(response, list) and all(isinstance(item, (int, float)) for item in response):
        return [{"value": val} for val in response]
    print(f"Warning: LLM did not return a valid list of numbers for {param_type} candidates (警告：LLM未返回有效的数字候选列表). Got: {response}")

    # Fallback to default if LLM fails
    if param_type == "integer_parameter":
        return [{"value": x} for x in [10, 20, 60, 120, 252]]
    elif param_type == "float_parameter":
        return [{"value": x} for x in [0.01, 0.05, 0.1, 0.2, 0.5]]
    return []

async def judge_placeholder_type(placeholder: str, template_expression: str, template_explanation: str, operator_summary: str, llm_client: openai.AsyncOpenAI) -> str:
    """
    Uses an LLM to judge the type of placeholder (e.g., "data_field", "integer_parameter", "group_operator").
    """
    prompt = f"""
    As a world-class quantitative finance expert, your task is to classify the type of a placeholder within an alpha expression.
    You will be provided with the alpha template, its explanation, the specific placeholder, and a comprehensive summary of available BRAIN operators and data field characteristics.

    **Alpha Template:**
    `{template_expression}`

    **Template Explanation:**
    `{template_explanation}`

    **Placeholder to Classify:**
    `{placeholder}`

    **Available BRAIN Operators and Data Field Characteristics:**
    {operator_summary}

    **Your Task:**
    Classify the `{placeholder}` based on the provided context. The classification should be one of the following types:
    - "data_field": If the placeholder clearly represents a financial data series (e.g., price, volume, fundamental ratio).
    - "group_data_field": If the placeholder represents a categorical field used for grouping or neutralization (e.g., `industry` in `group_zscore(x, industry)`).
    - "operator": If the placeholder represents a BRAIN operator that performs a calculation or transformation.
    - "vector_operator": If the placeholder represents a vector operator (e.g., vec_avg, vec_sum).
    - "integer_parameter": If the placeholder represents an integer value, typically a window length or count (e.g., `d` in `ts_mean(x, d)`).
    - "float_parameter": If the placeholder represents a floating-point number, typically a threshold or factor.
    - "string_parameter": If the placeholder represents a string value, like a group name (e.g., `industry` in `group_zscore(x, industry)`).
    - "unknown": If the type cannot be determined from the context.

    Return the classification as a single JSON object with a key "placeholder_type" and its corresponding value. Do not include any other text or formatting outside of the JSON object.

    **Example Output Format:**
    {{"placeholder_type": "data_field"}}
    {{"placeholder_type": "integer_parameter"}}

    Now, classify the placeholder.
    """
    print(f"--- Calling LLM to judge type for placeholder (正在调用LLM判断占位符类型): {placeholder} ---")

    response = await call_llm(prompt, llm_client)
    return response.get("placeholder_type", "unknown")

async def populate_template(s: SingleSession, alpha_details: dict, template_expression: str, template_explanation: str, operator_summary: str, llm_client: openai.AsyncOpenAI, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX") -> dict:
    """
    Populates placeholders in an alpha template with candidate data fields, operators, or parameters.
    """
    placeholders = extract_placeholders(template_expression)

    if not placeholders:
        print("No placeholders found in the template. (模板中未找到占位符)")
        return {}

    """
    Populates placeholders in an alpha template with candidate data fields, operators, or parameters.
    """
    placeholders = extract_placeholders(template_expression)
    print(f"Found placeholders in template (在模板中找到占位符): {placeholders}")

    populated_placeholders = {}

    for ph in placeholders:
        # Use LLM to judge placeholder type
        ph_type = await judge_placeholder_type(ph, template_expression, template_explanation, operator_summary, llm_client)
        print(f"'{ph}' judged as type (判断类型为): {ph_type}")

        if ph_type == "data_field":
            candidates = await get_datafield_candidates(s, alpha_details, template_expression, template_explanation, ph, llm_client, top_n=top_n_datafield, user_region=user_region, user_universe=user_universe, user_delay=user_delay, user_category=user_category, user_data_type=user_data_type)
            populated_placeholders[ph] = {"type": "data_field", "candidates": candidates}
        elif ph_type == "group_data_field":
            candidates = await get_group_datafield_candidates(template_expression, template_explanation, ph, llm_client)
            populated_placeholders[ph] = {"type": "group_data_field", "candidates": candidates}
        elif ph_type in ["operator", "group_operator", "ts_operator","vector_operator"]:
            candidates = await get_operator_candidates(template_expression, template_explanation, ph, llm_client)
            populated_placeholders[ph] = {"type": ph_type, "candidates": candidates}
        elif ph_type in ["integer_parameter", "float_parameter"]:
            candidates = await get_parameter_candidates(ph_type, template_expression, template_explanation, ph, llm_client)
            populated_placeholders[ph] = {"type": ph_type, "candidates": candidates}
        elif ph_type == "string_parameter":
            # Add logic for string_parameter if needed, for now it returns empty
            populated_placeholders[ph] = {"type": "string_parameter", "candidates": []}
        else:
            print(f"Could not determine type for placeholder (无法确定占位符类型): {ph} (LLM classified as {ph_type})")
            populated_placeholders[ph] = {"type": "unknown", "candidates": []}

    return populated_placeholders

def get_datafield_prefix(datafield_name: str) -> str:
    """Extracts the prefix from a datafield name (e.g., 'anl44_...' -> 'anl44')."""
    if '_' in datafield_name:
        return datafield_name.split('_')[0]
    return datafield_name


async def generate_new_alphas(alpha_description, brain_session, template_summary: Optional[str] = None, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX"):
    """
    Main function to generate new alpha templates based on a seed alpha.

    Args:
        alpha_description: The alpha description JSON string.
        brain_session: The BRAIN session object.
        template_summary: Optional template summary string. If None, will load from built-in.
        top_n_datafield: Number of data field candidates to retrieve (default: 50).
        user_data_type: Data type for datafield search (MATRIX or VECTOR).
    """
    # 声明使用全局变量
    global LLM_model_name, LLM_API_KEY, llm_base_url

    # Load template summary if not provided
    if template_summary is None:
        template_summary = load_template_summary()
    # --- Load Operator Summary ---
    operator_summary = get_brain_operators(scope_filters=["REGULAR"])

    try:
        llm_api_key = get_token_from_auth_server()
        llm_base_url_value = llm_base_url  # 使用全局变量
        llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key)
        print("✓ LLM Gateway 认证成功")
    except Exception as e:
        print(f"❌ LLM Gateway 认证失败: {e}")
        sys.exit(1)

    details = json.loads(alpha_description)

    if not details:
        print(f"Failed to retrieve details for Alpha (获取Alpha详情失败)")
        sys.exit(1)

    print("Alpha Details Retrieved (已获取Alpha详情):")
    print(json.dumps(details, indent=4))


    # --- Step 4: Propose New Alpha Templates ---
    print(f"\n--- Proposing new alpha templates for Alpha (正在为Alpha提议新模板) ---")
    proposed_templates = await propose_alpha_templates(details, template_summary, llm_client, user_data_type=user_data_type)

    if not proposed_templates:
        print("Failed to generate proposed alpha templates. (生成提议模板失败)")
        sys.exit(1)

    print("\n--- Proposed Alpha Templates (JSON) (建议的Alpha模板,多样性会受到模型和模板总结文档的影响) ---")
    print(json.dumps(proposed_templates, indent=4))

    # --- Validation: Drop templates with suspicious literal identifiers ---
    try:
        operators_meta = get_brain_operators().get('operators', [])
        proposed_templates = _filter_valid_templates(
            proposed_templates,
            operators_meta,
            brain_session,
            details.get('settings', {}),
            parse_alpha_code,
        )
    except Exception as e:
        print(f"⚠ 模板校验步骤出现异常，跳过校验: {e}")

    if not proposed_templates:
        print("❌ 所有模板在校验后被丢弃，无法继续。")
        sys.exit(1)

    # --- Step 5: Process all proposed templates and gather candidates ---
    # --- Step 6: Prepare for Output ---
    # Ensure the output directory exists next to this script
    output_dir = Path(__file__).parent / "output"
    try:
        output_dir.mkdir(parents=True, exist_ok=True)
    except Exception as e:
        print(f"Warning: could not create directory {output_dir}: {e}")

    output_filepath = output_dir / f"Alpha_candidates.json"

    final_output = {}

    # --- Step 5: Process all proposed templates and gather candidates ---
    for template_expr, template_expl in proposed_templates.items():
        print(f"\n--- Populating template (正在填充模板): '{template_expr}' ---")
        try:
            populated_info = await populate_template(brain_session, details, template_expr, template_expl, operator_summary, llm_client, top_n_datafield=top_n_datafield, user_region=user_region, user_universe=user_universe, user_delay=user_delay, user_category=user_category, user_data_type=user_data_type)

            # Skip templates where any data_field placeholder has zero candidates
            if _should_skip_due_to_empty_candidates(populated_info):
                print("⚠ 该模板存在数据字段候选为空的占位符，跳过此模板。")
                continue

            final_output[template_expr] = {
                "template_explanation": template_expl,
                "seed_alpha_settings": details.get('settings', {}),
                "placeholder_candidates": populated_info
            }

            # --- Incremental Saving ---
            try:
                with output_filepath.open('w', encoding='utf-8') as f:
                    json.dump(final_output, f, indent=4)
                print(f"✓ Progress saved to {output_filepath.name}")
            except IOError as e:
                print(f"⚠️ Warning: Failed to save progress: {e}")

        except Exception as e:
            print(f"❌ Error processing template '{template_expr}': {e}")
            print("Skipping this template and continuing...")
            continue

    print("\n--- Final Consolidated Output (最终合并输出) ---")
    print(json.dumps(final_output, indent=4))


    generated_expressions = set()

    for template_expression, template_data in final_output.items():
        placeholder_candidates = template_data["placeholder_candidates"]
        seed_alpha_settings = template_data["seed_alpha_settings"]

        # Prepare a dictionary to hold lists of candidates for each placeholder
        candidates_for_placeholders = {}
        for placeholder, details in placeholder_candidates.items():
            # Extract only the 'value' or 'name' from the candidates list
            if details["type"] == "data_field":
                candidates_for_placeholders[placeholder] = [c["id"] for c in details["candidates"]]
            elif details["type"] in ["integer_parameter", "float_parameter"]:
                candidates_for_placeholders[placeholder] = [str(c["value"]) for c in details["candidates"]]
            elif details["type"] == "group_data_field":
                candidates_for_placeholders[placeholder] = [c["name"] for c in details["candidates"]]
            elif details["type"] == "operator":
                candidates_for_placeholders[placeholder] = [c["name"] for c in details["candidates"]]
            else:
                candidates_for_placeholders[placeholder] = []


        # --- Step 3: Implement logic to generate all alpha expression combinations from the candidates ---
        # Generate all possible combinations of placeholder values
        placeholder_names = list(candidates_for_placeholders.keys())
        all_combinations_values = list(itertools.product(*candidates_for_placeholders.values()))

        for combination_values in all_combinations_values:

            # --- ATOM Mode ---

            datafield_values_in_combo = []
            placeholder_types = {ph: details["type"] for ph, details in placeholder_candidates.items()}

            for i, placeholder_name in enumerate(placeholder_names):
                if placeholder_types.get(placeholder_name) == 'data_field':
                    datafield_values_in_combo.append(combination_values[i])

            if len(datafield_values_in_combo) > 1:
                first_prefix = get_datafield_prefix(datafield_values_in_combo[0])
                if not all(get_datafield_prefix(df) == first_prefix for df in datafield_values_in_combo):
                    continue  # Skip this combination as prefixes do not match

            current_expression = template_expression
            for i, placeholder_name in enumerate(placeholder_names):
                current_expression = current_expression.replace(placeholder_name, combination_values[i])

            # Check for duplicates before adding
            if current_expression not in generated_expressions:
                generated_expressions.add(current_expression)
    # dump all unique generated expressions to a file, a list of strings in json file
    print(f"\n--- Total Unique Generated Alpha Expressions (生成的唯一Alpha表达式总数): {len(generated_expressions)} ---")
    # output_filepath = output_dir / f"Alpha_generated_expressions.json"
    # try:
    #     with output_filepath.open('w', encoding='utf-8') as f:
    #         json.dump(list(generated_expressions), f, indent=4)
    #     print(f"\nGenerated expressions successfully written to {output_filepath} (生成的表达式已成功写入)")
    # except IOError as e:
    #     print(f"Error writing generated expressions to file {output_filepath} (写入生成的表达式出错): {e}")


    validator = val.ExpressionValidator()
    print("开始表达式语法检查感谢社区贡献，原帖https://support.worldquantbrain.com/hc/en-us/community/posts/36740689434391--check%E7%8E%8B-%E9%AA%8C%E8%AF%81%E8%A1%A8%E8%BE%BE%E5%BC%8F%E6%98%AF%E5%90%A6%E6%AD%A3%E7%A1%AE%E7%9A%84%E8%84%9A%E6%9C%AC-%E4%B8%83%E5%8D%81%E4%BA%8C%E5%8F%98%E9%BB%84%E9%87%91%E6%90%AD%E6%A1%A3?page=1#community_comment_36798176158999")
    print("请注意，该文件仅用于验证表达式的格式正确性，\n不保证表达式在实际使用中的逻辑正确性或可执行性。\n")
    print("不在内置函数列表中的operator将无法检查，如有需要，请使用AI按需修改本源代码添加")

    expressions_data = list(generated_expressions)
    # 提取表达式列表
    # 假设JSON文件结构为 {"expressions": ["expr1", "expr2", ...]} 或直接是 ["expr1", "expr2", ...]
    if isinstance(expressions_data, dict) and "expressions" in expressions_data:
        expressions = expressions_data["expressions"]
    elif isinstance(expressions_data, list):
        expressions = expressions_data
    else:
        print("错误: JSON文件格式不正确，需要包含表达式列表")
        return

    # 验证表达式
    valid_expressions = []
    invalid_expressions = []

    print(f"开始验证 {len(expressions)} 个表达式...")
    for i, expr in enumerate(expressions, 1):
        if i % 10 == 0:
            print(f"已验证 {i}/{len(expressions)} 个表达式")

        result = validator.check_expression(expr)
        if result["valid"]:
            valid_expressions.append(expr)
        else:
            invalid_expressions.append({"expression": expr, "errors": result["errors"]})

    # 生成输出文件路径
    name = "Alpha_generated_expressions"
    valid_output_path = os.path.join(output_dir, f"{name}_success.json")
    invalid_output_path = os.path.join(output_dir, f"{name}_error.json")

    # 保存结果到JSON文件
    print(f"\n验证完成！")
    print(f"有效表达式: {len(valid_expressions)}")
    print(f"无效表达式: {len(invalid_expressions)}")

    # 保存有效表达式
    try:
        with open(valid_output_path, 'w', encoding='utf-8') as f:
            json.dump(valid_expressions, f, ensure_ascii=False, indent=2)
        print(f"有效表达式已保存到: {valid_output_path}")
    except Exception as e:
        print(f"错误: 保存有效表达式失败 - {e}")

    # 保存无效表达式
    try:
        with open(invalid_output_path, 'w', encoding='utf-8') as f:
            json.dump(invalid_expressions, f, ensure_ascii=False, indent=2)
        print(f"无效表达式已保存到: {invalid_output_path}，文件包含错误详情")
        print("查看该文件，你将获得修改模板的灵感，你可以定位到错误的模板并在APP里修改")
    except Exception as e:
        print(f"错误: 保存无效表达式失败 - {e}")

    print("请注意，该文件仅用于验证表达式的格式正确性，\n不保证表达式在实际使用中的逻辑正确性或可执行性。\n")
    print("不在内置函数列表中的operator将无法检查，如有需要，请使用AI按需修改validator源代码添加")

    print("不同模型效果不同，默认的kimi模型可能会产生Alpha语法错误，请检查生成的模板文件进行甄别")
    print("下一步，请下载已完成的模板，放入APP首页进行解析和语法检查，强烈建议生成表达式后手动尝试回测")


async def main():
    """
    Main execution function.
    """

    # Check for command line argument for config file
    if len(sys.argv) > 1:
        config_path = sys.argv[1]
        if os.path.exists(config_path):
            try:
                with open(config_path, 'r', encoding='utf-8') as f:
                    config = json.load(f)
                print(f"✓ 已从命令行参数加载配置: {config_path}")
                # Ensure all required fields are present or set defaults
                if 'top_n_datafield' not in config:
                    config['top_n_datafield'] = 50
                if 'template_summary_path' not in config:
                    config['template_summary_path'] = None
            except Exception as e:
                print(f"❌ 加载配置文件失败: {e}")
                sys.exit(1)
        else:
            print(f"❌ 配置文件不存在: {config_path}")
            sys.exit(1)
    else:
        # --- Step 0: 交互式输入收集配置信息 ---
        print("输入回车加载同文件夹下的transformer_config.json文件，否则按其他任意键并回车，进入交互式输入账号信息")
        input_str = input()
        if input_str == "":
            config_path = os.path.join(os.path.dirname(__file__), 'transformer_config.json')
            with open(config_path, 'r', encoding='utf-8') as f:
                config = json.load(f)
            print("\n" + "="*60)
            print("✓ 已从 transformer_config.json 加载账号配置")
            print("="*60 + "\n")

            # 继续交互式输入运行时参数
            # 1. 询问模板总结文件路径
            print("【1/3】模板总结文件配置")
            print("强烈推荐你使用自己总结的模板文档，效果会更好")
            print("提示: 如果您有 template_summary 的 .txt 或 .md 文件，请输入完整路径")
            print("      如果没有，直接回车将使用内置模板总结")
            template_path = input("请输入模板总结文件路径 (直接回车使用内置模板): ").strip()
            config['template_summary_path'] = template_path if template_path else None
            if template_path:
                print(f"✓ 将尝试从文件加载: {template_path}\n")
            else:
                print("✓ 将使用内置模板总结\n")

            # 2. 询问 Alpha ID
            print("【2/3】Alpha ID 配置")
            alpha_id = input("请输入要处理的 Alpha ID: ").strip()
            if not alpha_id:
                print("❌ 错误: Alpha ID 不能为空")
                sys.exit(1)
            config['alpha_id'] = alpha_id
            print(f"✓ Alpha ID: {alpha_id}\n")

            # 3. 询问 Top N 参数（仅数据字段）
            print("【3/3】候选数量配置 (Top N)")
            print("提示: 此参数控制为每个占位符生成的数据字段候选数量")
            default_datafield_topn = 50
            datafield_topn_input = input(f"请输入数据字段候选数量 (直接回车使用默认值: {default_datafield_topn}): ").strip()
            try:
                config['top_n_datafield'] = int(datafield_topn_input) if datafield_topn_input else default_datafield_topn
            except ValueError:
                print(f"⚠ 警告: 输入无效，使用默认值: {default_datafield_topn}")
                config['top_n_datafield'] = default_datafield_topn
            print(f"✓ 数据字段候选数量: {config['top_n_datafield']}\n")

            print("="*60)
            print("配置完成！开始处理...")
            print("="*60 + "\n")
        else:
            config = interactive_input()

    # 设置全局变量
    global LLM_model_name, LLM_API_KEY, llm_base_url, username, password
    LLM_model_name = config['LLM_model_name']
    LLM_API_KEY = config['LLM_API_KEY']
    llm_base_url = config['llm_base_url']
    username = config['username']
    password = config['password']

    # --- Step 1: 加载模板总结 ---
    template_summary = load_template_summary(config.get('template_summary_path'))

    # --- Step 2: 启动 BRAIN 会话 ---
    print("--- 正在启动 BRAIN 会话... ---")
    s = start_session()

    # --- Step 3: 认证 LLM Gateway ---
    llm_client = None
    print("--- 正在认证 LLM Gateway... ---")
    try:
        llm_api_key = get_token_from_auth_server()
        llm_base_url_value = llm_base_url
        llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key)
        print("✓ LLM Gateway 认证成功")
    except Exception as e:
        print(f"❌ LLM Gateway 认证失败: {e}")
        sys.exit(1)

    # --- Step 4: 获取 Alpha 详情 ---
    alpha_id = config['alpha_id']
    print(f"\n--- 正在获取 Alpha ID: {alpha_id} 的详情... ---")

    # --- Step 4.5: 交互式选择数据字段范围 ---
    if len(sys.argv) > 1:
         user_datafield_config = {
            'user_region': config.get('user_region'),
            'user_universe': config.get('user_universe'),
            'user_delay': config.get('user_delay'),
            'user_category': config.get('user_category'),
            'user_data_type': config.get('user_data_type', 'MATRIX')
        }
    else:
        user_datafield_config = interactive_datafield_selection(s)

    details_str = await generate_alpha_description(alpha_id, brain_session=s)
    await generate_new_alphas(
        alpha_description=details_str,
        brain_session=s,
        template_summary=template_summary,
        top_n_datafield=config.get('top_n_datafield', 50),
        user_region=user_datafield_config.get('user_region'),
        user_universe=user_datafield_config.get('user_universe'),
        user_delay=user_datafield_config.get('user_delay'),
        user_category=user_datafield_config.get('user_category'),
        user_data_type=user_datafield_config.get('user_data_type', 'MATRIX')
    )

def interactive_datafield_selection(s: SingleSession) -> dict:
    """
    Interactively ask the user for datafield search configuration (Region, Universe, Delay).
    """
    print("\n" + "="*60)
    print("【附加配置】数据字段搜索范围配置")
    print("正在获取有效的 Region/Universe/Delay 组合...")

    try:
        df = get_instrument_type_region_delay(s)
    except Exception as e:
        print(f"⚠ 获取配置选项失败: {e}")
        print("将使用 Seed Alpha 的默认设置")
        return {}

    # Filter for EQUITY only as per current logic
    df_equity = df[df['InstrumentType'] == 'EQUITY']

    if df_equity.empty:
        print("未找到 EQUITY 类型的配置选项。")
        return {}

    # 1. Select Region
    regions = df_equity['Region'].unique().tolist()
    print(f"\n可用地区 (Region): {regions}")
    region_input = input(f"请输入地区 (直接回车使用 Seed Alpha 默认值): ").strip()

    selected_region = None
    if region_input:
        if region_input in regions:
            selected_region = region_input
        else:
            print(f"⚠ 输入无效，将使用默认值")

    # 2. Select Delay
    # If region is selected, filter delays for that region
    if selected_region:
        delays = df_equity[df_equity['Region'] == selected_region]['Delay'].unique().tolist()
    else:
        delays = df_equity['Delay'].unique().tolist()

    print(f"\n可用延迟 (Delay): {delays}")
    delay_input = input(f"请输入延迟 (直接回车使用 Seed Alpha 默认值): ").strip()

    selected_delay = None
    if delay_input:
        try:
            d_val = int(delay_input)
            if d_val in delays:
                selected_delay = d_val
            else:
                print(f"⚠ 输入不在列表中，将使用默认值")
        except ValueError:
            print(f"⚠ 输入无效，将使用默认值")

    # 3. Select Universe
    # If region and delay are selected, filter universes
    if selected_region and selected_delay is not None:
        subset = df_equity[(df_equity['Region'] == selected_region) & (df_equity['Delay'] == selected_delay)]
        if not subset.empty:
            universes = subset.iloc[0]['Universe']
        else:
            universes = []
    else:
        # Just show all unique universes if we can't filter precisely
        universes = set()
        for u_list in df_equity['Universe']:
            universes.update(u_list)
        universes = list(universes)

    print(f"\n可用范围 (Universe): {universes}")
    universe_input = input(f"请输入范围 (直接回车使用 Seed Alpha 默认值): ").strip()

    selected_universe = None
    if universe_input:
        if universe_input in universes:
            selected_universe = universe_input
        else:
             print(f"⚠ 输入无效，将使用默认值")

    # 4. Select Category
    print("\n正在获取数据类别 (Data Categories)...")
    categories = get_data_categories(s)

    selected_category = None
    if categories:
        print("\n可用类别 (Categories):")
        for i, cat in enumerate(categories):
            print(f"{i+1}. {cat['name']} (ID: {cat['id']})")

        cat_input = input(f"请输入类别编号或ID (多个用逗号分隔, 直接回车不筛选): ").strip()

        if cat_input:
            selected_categories = []
            inputs = [x.strip() for x in cat_input.split(',')]

            for inp in inputs:
                # Check if input is an index
                if inp.isdigit():
                    idx = int(inp) - 1
                    if 0 <= idx < len(categories):
                        selected_categories.append(categories[idx]['id'])
                        print(f"已选择类别: {categories[idx]['name']}")
                else:
                    # Check if input is an ID
                    found = False
                    for cat in categories:
                        if cat['id'] == inp:
                            selected_categories.append(cat['id'])
                            print(f"已选择类别: {cat['name']}")
                            found = True
                            break
                    if not found:
                        print(f"⚠ 输入无效: {inp}")

            if selected_categories:
                selected_category = selected_categories
            else:
                print(f"⚠ 未选择有效类别，将不筛选类别")
    else:
        print("⚠ 无法获取类别列表，跳过类别选择")

    # 5. Select Data Type
    print("\n可用数据类型 (Data Type): [MATRIX, VECTOR]")
    data_type_input = input(f"请输入数据类型 (直接回车默认 MATRIX): ").strip().upper()

    selected_data_type = "MATRIX"
    if data_type_input == "VECTOR":
        print("⚠ 警告: 请确保您输入的原型Alpha中正确地使用了vector operator,否则极容易造成数据类型错误")
        confirm = input("确认使用 VECTOR 吗? (y/n): ").strip().lower()
        if confirm == 'y':
            selected_data_type = "VECTOR"
        else:
            print("已取消 VECTOR 选择，使用默认值 MATRIX")
    elif data_type_input and data_type_input != "MATRIX":
        print(f"⚠ 输入无效，将使用默认值 MATRIX")

    return {
        'user_region': selected_region,
        'user_universe': selected_universe,
        'user_delay': selected_delay,
        'user_category': selected_category,
        'user_data_type': selected_data_type
    }

if __name__ == "__main__":
    # To allow asyncio to run in environments like Jupyter notebooks
    if sys.platform.startswith('win') and sys.version_info[:2] >= (3, 8):
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

    asyncio.run(main())