From 61633b21b42f5f7b8388d8594a0002dd3a277648 Mon Sep 17 00:00:00 2001 From: jack Date: Wed, 22 Apr 2026 11:31:11 +0800 Subject: [PATCH] ++ --- hide_low_sharpe_alpha/main.py | 239 + hide_low_sharpe_alpha/result.json | 3891 +++++++++++++++++ simple72/CODING_Guidance_Document.md | 65 + simple72/README.md | 383 ++ simple72/README_CONFIG.md | 177 + simple72/Tranformer/Transformer.py | 1803 ++++++++ simple72/Tranformer/__init__.py | 1 + simple72/Tranformer/ace_lib.py | 1514 +++++++ simple72/Tranformer/helpful_functions.py | 180 + .../Tranformer/output/Alpha_candidates.json | 112 + .../output/Alpha_candidates_示例.json | 654 +++ .../Alpha_generated_expressions_error.json | 1 + .../Alpha_generated_expressions_success.json | 7 + simple72/Tranformer/parsetab.py | 60 + simple72/Tranformer/template_summary.md | 3182 ++++++++++++++ simple72/Tranformer/validator.py | 1261 ++++++ simple72/__init__.py | 1 + simple72/config.json | 15 + simple72/config.json.example | 15 + simple72/main.py | 404 ++ simple72/requirements.txt | 7 + simple72/running_error.txt | 10 + simple72/templates/app.js | 290 ++ simple72/templates/index.html | 148 + simple72/templates/styles.css | 343 ++ 25 files changed, 14763 insertions(+) create mode 100644 hide_low_sharpe_alpha/main.py create mode 100644 hide_low_sharpe_alpha/result.json create mode 100644 simple72/CODING_Guidance_Document.md create mode 100644 simple72/README.md create mode 100644 simple72/README_CONFIG.md create mode 100755 simple72/Tranformer/Transformer.py create mode 100644 simple72/Tranformer/__init__.py create mode 100755 simple72/Tranformer/ace_lib.py create mode 100755 simple72/Tranformer/helpful_functions.py create mode 100644 simple72/Tranformer/output/Alpha_candidates.json create mode 100755 simple72/Tranformer/output/Alpha_candidates_示例.json create mode 100644 simple72/Tranformer/output/Alpha_generated_expressions_error.json create mode 100644 simple72/Tranformer/output/Alpha_generated_expressions_success.json create mode 100755 simple72/Tranformer/parsetab.py create mode 100644 simple72/Tranformer/template_summary.md create mode 100755 simple72/Tranformer/validator.py create mode 100644 simple72/__init__.py create mode 100644 simple72/config.json create mode 100644 simple72/config.json.example create mode 100644 simple72/main.py create mode 100644 simple72/requirements.txt create mode 100644 simple72/running_error.txt create mode 100644 simple72/templates/app.js create mode 100644 simple72/templates/index.html create mode 100644 simple72/templates/styles.css diff --git a/hide_low_sharpe_alpha/main.py b/hide_low_sharpe_alpha/main.py new file mode 100644 index 0000000..9f09a00 --- /dev/null +++ b/hide_low_sharpe_alpha/main.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +""" +批量隐藏低质量Alpha +目标URL: https://api.worldquantbrain.com/users/self/alphas?limit=22&offset=0&status=UNSUBMITTED%1FIS_FAIL&is.sharpe%3C0.7&is.sharpe%3E-0.8&is.turnover%3C0.2&order=-is.sharpe&hidden=false +隐藏接口: PATCH https://api.worldquantbrain.com/alphas/{alpha_id} +Payload: {"hidden":true} +""" +import random +import time + +import httpx +from httpx import BasicAuth + +# 全局配置 +TIMEOUT = 10.0 # 请求超时时间(秒) +MAX_RETRIES = 3 # 最大重试次数 +RETRY_DELAY_MIN = 3 # 重试最小等待时间(秒) +RETRY_DELAY_MAX = 5 # 重试最大等待时间(秒) +MAX_LIMIT = 100 # 每页最大数量 +MAX_PAGE = 5 # 最大页数 + + +def login(): + """登录WorldQuant Brain API""" + # 从nacos获取账号密码 + nacos_resp = httpx.get('http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify') + if nacos_resp.status_code != 200: + print('获取账号密码失败') + return False + + config = nacos_resp.json() + + username = config['user_name'] + password = config['password'] + + print(f"正在登录账户: {username}") + + # 创建客户端并认证,设置超时 + client = httpx.Client(auth=BasicAuth(username, password), timeout=TIMEOUT) + + # 发送登录请求 + response = client.post('https://api.worldquantbrain.com/authentication') + print(f"登录状态: {response.status_code}") + + if response.status_code == 201: + print("登录成功!") + print(response.json()) + return client + else: + print(f"登录失败: {response.json()}") + client.close() + return None + + +def request_with_retry(client, method, url, **kwargs): + """ + 带重试机制的请求函数 + 默认重试3次,每次等待3-5秒 + """ + for attempt in range(1, MAX_RETRIES + 1): + try: + print(f" 请求尝试 {attempt}/{MAX_RETRIES}: {method.upper()} {url}") + response = client.request(method, url, **kwargs) + return response + except Exception as e: + print(f" 请求异常: {str(e)}") + if attempt < MAX_RETRIES: + sleep_time = random.uniform(RETRY_DELAY_MIN, RETRY_DELAY_MAX) + print(f" 等待 {sleep_time:.2f} 秒后重试...") + time.sleep(sleep_time) + else: + print(f" 已达到最大重试次数 {MAX_RETRIES},放弃请求") + raise + return None + + +def fetch_all_alphas(client, base_url): + """ + 分页获取所有符合条件的alpha + 返回alpha信息列表(包含id和is数据) + 使用for循环,步进MAX_LIMIT,最多获取MAX_PAGE页 + """ + alphas = [] + + for page in range(MAX_PAGE): + # 计算当前页的offset + offset = page * MAX_LIMIT + + # 构建当前页的URL + url = f"{base_url}&offset={offset}" + + print(f"\n正在获取第 {page + 1}/{MAX_PAGE} 页,offset={offset} 的数据...") + + try: + response = request_with_retry(client, 'get', url) + except Exception as e: + print(f"获取数据失败: {str(e)}") + break + + if response.status_code != 200: + print(f"获取数据失败: {response.status_code}") + print(f"响应: {response.text}") + break + + data = response.json() + results = data.get('results', []) + + # 如果没有结果,说明已经获取完毕 + if not results: + print("\n没有更多数据,获取完成") + break + + # 提取当前页的alpha信息 + for alpha in results: + alpha_info = { + 'id': alpha.get('id'), + 'is': alpha.get('is', {}) + } + if alpha_info['id']: + alphas.append(alpha_info) + print(f" 发现Alpha: {alpha_info['id']}") + + print(f"本页获取完成,共 {len(results)} 个Alpha") + + sleep_time = random.uniform(3, 5) + print(f"等待 {sleep_time:.2f} 秒后继续获取下一页数据...") + time.sleep(sleep_time) + + return alphas + + +def hide_alpha(client, alpha_info): + """ + 隐藏单个Alpha + PATCH https://api.worldquantbrain.com/alphas/{alpha_id} + Payload: {"hidden":true} + 隐藏后输出is数据 + """ + alpha_id = alpha_info['id'] + is_data = alpha_info.get('is', {}) + + url = f"https://api.worldquantbrain.com/alphas/{alpha_id}" + payload = {"hidden": True} + + try: + response = request_with_retry(client, 'patch', url, json=payload) + + if response.status_code in [200, 204]: + # 输出is数据 + sharpe = is_data.get('sharpe', 'N/A') + fitness = is_data.get('fitness', 'N/A') + margin = is_data.get('margin', 'N/A') + drawdown = is_data.get('drawdown', 'N/A') + returns = is_data.get('returns', 'N/A') + + print(f" ✓ Alpha {alpha_id} 隐藏成功") + print(f" sharpe={sharpe}, fitness={fitness}, margin={margin}, drawdown={drawdown}, returns={returns}") + return True + else: + print(f" ✗ Alpha {alpha_id} 隐藏失败: {response.status_code}") + print(f" 响应: {response.text}") + return False + except Exception as e: + print(f" ✗ Alpha {alpha_id} 隐藏异常: {str(e)}") + return False + + +def batch_hide_alphas(client, alphas): + """ + 批量隐藏Alpha + """ + total = len(alphas) + success_count = 0 + fail_count = 0 + + print(f"\n开始批量隐藏 {total} 个Alpha...") + print("=" * 50) + + for index, alpha_info in enumerate(alphas, 1): + alpha_id = alpha_info['id'] + print(f"[{index}/{total}] 正在隐藏 Alpha: {alpha_id}") + + if hide_alpha(client, alpha_info): + success_count += 1 + else: + fail_count += 1 + + time.sleep(0.5) + + print("=" * 50) + print(f"批量隐藏完成!") + print(f"成功: {success_count} 个") + print(f"失败: {fail_count} 个") + + return success_count, fail_count + + +def run(client): + # 目标URL(获取未隐藏的、低sharpe的alpha) + TARGET_URL = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=0&status=UNSUBMITTED%1FIS_FAIL&is.sharpe%3C0.65&is.sharpe%3E-0.8&is.turnover%3C0.2&order=-is.sharpe&hidden=false" + + if client: + try: + print("\n" + "=" * 50) + print("获取所有符合条件的Alpha") + print("=" * 50) + + alphas = fetch_all_alphas(client, TARGET_URL) + + if not alphas: + print("没有找到需要隐藏的Alpha") + exit(1) + else: + print(f"\n总共找到 {len(alphas)} 个需要隐藏的Alpha") + + print("\n" + "=" * 50) + print("批量隐藏Alpha") + print("=" * 50) + + success, fail = batch_hide_alphas(client, alphas) + + except Exception as e: + print(str(e)) + else: + print("登录失败,程序退出") + + +def main(): + # 登录 + client = login() + + while True: + run(client) + time.sleep(60) + + + +if __name__ == '__main__': + main() diff --git a/hide_low_sharpe_alpha/result.json b/hide_low_sharpe_alpha/result.json new file mode 100644 index 0000000..de15423 --- /dev/null +++ b/hide_low_sharpe_alpha/result.json @@ -0,0 +1,3891 @@ +{ + "count": 10000, + "next": "http://api.worldquantbrain.com:443/users/self/alphas?hidden=false&is.sharpe%3C0.7=&is.sharpe%3E-0.8=&is.turnover%3C0.2=&limit=22&offset=22&order=-is.sharpe&status=UNSUBMITTED%1FIS_FAIL", + "previous": null, + "results": [ + { + "id": "1YnXbbLm", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 12, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "abs(subtract(oth335_hc_combined_all_region_mind, divide(add(add(oth335_combined_all_region_linear, oth335_hc_combined_all_region_hedge), oth335_hc_combined_all_region_shield), 3)))", + "description": null, + "operatorCount": 5 + }, + "dateCreated": "2026-04-19T22:38:21-04:00", + "dateSubmitted": null, + "dateModified": "2026-04-19T22:38:25-04:00", + "name": "8a29b337cde9", + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3181669, + "bookSize": 20000000, + "longCount": 232, + "shortCount": 269, + "turnover": 0.1057, + "returns": 0.0308, + "drawdown": 0.0953, + "margin": 0.000582, + "sharpe": 0.69, + "fitness": 0.34, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.34 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1057 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.4, + "value": 0.1057 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2020-05-12", + "limit": 0.1, + "value": 0.400075 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.2, + "value": 0.1 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 1, + "value": 0.23 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.52, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "IND/D1/OTHER", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "A1OdVgMX", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SUBINDUSTRY", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "ts_sum(mdl219_1_monchgsip, 60) - mdl219_3_ratrev6m", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-04-12T13:18:49-04:00", + "dateSubmitted": null, + "dateModified": "2026-04-12T13:18:53-04:00", + "name": "a7f6c0fa76fd", + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 2902190, + "bookSize": 20000000, + "longCount": 249, + "shortCount": 261, + "turnover": 0.0467, + "returns": 0.0291, + "drawdown": 0.0749, + "margin": 0.001244, + "sharpe": 0.69, + "fitness": 0.33, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 1336668, + "bookSize": 20000000, + "longCount": 249, + "shortCount": 261, + "turnover": 0.0467, + "returns": 0.0134, + "drawdown": 0.0568, + "margin": 0.000573, + "fitness": 0.15, + "sharpe": 0.45 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.33 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0467 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0467 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.33, + "value": 0.75 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "HT_TURNOVER", + "result": "WARNING", + "limit": 0.2, + "value": 0.0467 + }, + { + "name": "HT_HIGH_TURNOVER_RETURNS_RATIO", + "result": "WARNING", + "limit": 0.75, + "value": 0.0515 + }, + { + "name": "HT_PNL_REALIZATION_HORIZON", + "result": "WARNING", + "limit": 20, + "value": 120 + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.23, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "USA/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "LLlx6jR2", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SUBINDUSTRY", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "ts_sum(mdl219_1_monchgsip, 60) - mdl219_2_ratrev6m", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-04-12T10:03:10-04:00", + "dateSubmitted": null, + "dateModified": "2026-04-12T10:03:16-04:00", + "name": "19faabf67dc0", + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 2905892, + "bookSize": 20000000, + "longCount": 248, + "shortCount": 261, + "turnover": 0.0466, + "returns": 0.0291, + "drawdown": 0.0755, + "margin": 0.001249, + "sharpe": 0.69, + "fitness": 0.33, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 1339489, + "bookSize": 20000000, + "longCount": 248, + "shortCount": 261, + "turnover": 0.0466, + "returns": 0.0134, + "drawdown": 0.0576, + "margin": 0.000576, + "fitness": 0.15, + "sharpe": 0.45 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.33 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0466 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0466 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.33, + "value": 0.75 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "HT_TURNOVER", + "result": "WARNING", + "limit": 0.2, + "value": 0.0466 + }, + { + "name": "HT_HIGH_TURNOVER_RETURNS_RATIO", + "result": "WARNING", + "limit": 0.75, + "value": 0.0584 + }, + { + "name": "HT_PNL_REALIZATION_HORIZON", + "result": "WARNING", + "limit": 20, + "value": 120 + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.25, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "USA/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "xAzWNaAW", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP500", + "delay": 1, + "decay": 24, + "neutralization": "REVERSION_AND_MOMENTUM", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "subtract(max(max(mdl262_trkdpit_models_profitability1, mdl262_rasv2splitprofitabilityrev_ttm_profitability2), mdl262_rasv2splitprofitabilityrev_ttm_profitability3), min(min(mdl262_trkdpit_models_profitability1, mdl262_rasv2splitprofitabilityrev_ttm_profitability2), mdl262_rasv2splitprofitabilityrev_ttm_profitability3))", + "description": null, + "operatorCount": 5 + }, + "dateCreated": "2026-04-06T04:34:27-04:00", + "dateSubmitted": null, + "dateModified": "2026-04-06T04:34:34-04:00", + "name": "07e8f23f642a", + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 9889371, + "bookSize": 20000000, + "longCount": 159, + "shortCount": 356, + "turnover": 0.1519, + "returns": 0.0991, + "drawdown": 0.2195, + "margin": 0.001304, + "sharpe": 0.69, + "fitness": 0.56, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.56 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1519 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1519 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2023-08-09", + "limit": 0.1, + "value": 0.290529 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.33, + "value": 0.09 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "HT_TURNOVER", + "result": "WARNING", + "limit": 0.2, + "value": 0.1519 + }, + { + "name": "HT_HIGH_TURNOVER_RETURNS_RATIO", + "result": "WARNING", + "limit": 0.75, + "value": 0.0172 + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 1.24, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "USA/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "N1jnxeML", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 12, + "neutralization": "FAST", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "subtract(divide(anl14_stddev_eps_fy2, anl14_median_eps_fy1), divide(anl14_high_revenue_fy2, anl14_numofests_revenue_fy1))", + "description": null, + "operatorCount": 3 + }, + "dateCreated": "2026-03-31T02:16:57-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-31T02:16:57-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3707742, + "bookSize": 20000000, + "longCount": 1913, + "shortCount": 1216, + "turnover": 0.1342, + "returns": 0.0372, + "drawdown": 0.086, + "margin": 0.000554, + "sharpe": 0.69, + "fitness": 0.36, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.36 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1342 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1342 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.53 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "HT_TURNOVER", + "result": "WARNING", + "limit": 0.2, + "value": 0.1342 + }, + { + "name": "HT_HIGH_TURNOVER_RETURNS_RATIO", + "result": "WARNING", + "limit": 0.75, + "value": 0.0457 + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.54, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "QPl20rZG", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 12, + "neutralization": "FAST", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "subtract(divide(anl14_stddev_eps_fy2, anl14_high_eps_fy1), divide(anl14_high_revenue_fy2, anl14_numofests_revenue_fy1))", + "description": null, + "operatorCount": 3 + }, + "dateCreated": "2026-03-30T23:30:28-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-30T23:30:28-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3724409, + "bookSize": 20000000, + "longCount": 1913, + "shortCount": 1216, + "turnover": 0.1341, + "returns": 0.0373, + "drawdown": 0.0861, + "margin": 0.000557, + "sharpe": 0.69, + "fitness": 0.36, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.36 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1341 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1341 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.54 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "HT_TURNOVER", + "result": "WARNING", + "limit": 0.2, + "value": 0.1341 + }, + { + "name": "HT_HIGH_TURNOVER_RETURNS_RATIO", + "result": "WARNING", + "limit": 0.75, + "value": 0.0483 + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.54, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "j2wzR5WE", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 8, + "neutralization": "FAST", + "truncation": 0.06, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "subtract(mdl262_trkdpitpredictivetangiblebvps_mad_act, mdl262_trkdpitpredictiveroa_mad_pred)", + "description": null, + "operatorCount": 1 + }, + "dateCreated": "2026-03-25T15:59:32-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-25T15:59:33-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 1422433, + "bookSize": 20000000, + "longCount": 1440, + "shortCount": 1689, + "turnover": 0.0762, + "returns": 0.0143, + "drawdown": 0.069, + "margin": 0.000374, + "sharpe": 0.69, + "fitness": 0.23, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.23 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0762 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0762 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.31 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "PASS", + "value": 2.16, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "USA/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "le2mMqrn", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 8, + "neutralization": "FAST", + "truncation": 0.05, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "ts_sum(subtract(oth432_trkdpitpredictivetangiblebvps_mad_act, oth432_trkdpitpredictiveroe_mad_pred), 126)", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-24T10:34:37-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-24T10:34:37-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 1642406, + "bookSize": 20000000, + "longCount": 1471, + "shortCount": 1658, + "turnover": 0.0498, + "returns": 0.0165, + "drawdown": 0.0799, + "margin": 0.000661, + "sharpe": 0.69, + "fitness": 0.25, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.25 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0498 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0498 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.49 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "PASS", + "value": 1.9, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "USA/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "LL6v05G6", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 10, + "neutralization": "INDUSTRY", + "truncation": 0.05, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "if_else(less(sales_estimate_stddev_quarterly, eps_adjusted_min_guidance_value), -1, if_else(greater(sales_estimate_stddev_quarterly, eps_adjusted_min_guidance_value), 1, 0))", + "description": null, + "operatorCount": 4 + }, + "dateCreated": "2026-03-24T06:10:38-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-24T06:10:38-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 1411624, + "bookSize": 20000000, + "longCount": 1709, + "shortCount": 1420, + "turnover": 0.0214, + "returns": 0.0141, + "drawdown": 0.0527, + "margin": 0.001324, + "sharpe": 0.69, + "fitness": 0.23, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 231345, + "bookSize": 20000000, + "longCount": 1709, + "shortCount": 1420, + "turnover": 0.0214, + "returns": 0.0023, + "drawdown": 0.0599, + "margin": 0.000217, + "fitness": 0.02, + "sharpe": 0.15 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.23 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0214 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0214 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.3, + "value": 0.24 + }, + { + "name": "UNITS", + "result": "WARNING", + "message": "Incompatible unit for input of \"less\" at index 1, expected \"Unit[CSPrice:1]\", found \"Unit[CSPrice:1,CSShare:1]\"" + }, + { + "name": "UNITS", + "result": "WARNING", + "message": "Incompatible unit for input of \"greater\" at index 1, expected \"Unit[CSPrice:1]\", found \"Unit[CSPrice:1,CSShare:1]\"" + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 1.17, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "1YL3JgYQ", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 8, + "neutralization": "FAST", + "truncation": 0.06, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(subtract(pretax_income_reported_value, anl4_qf_az_cfps_mean), anl4_ebitda_std)", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-23T14:34:26-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-23T14:34:26-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3521675, + "bookSize": 20000000, + "longCount": 1500, + "shortCount": 1629, + "turnover": 0.1284, + "returns": 0.0353, + "drawdown": 0.1627, + "margin": 0.00055, + "sharpe": 0.69, + "fitness": 0.36, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.36 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1284 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1284 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2022-05-03", + "limit": 0.1, + "value": 0.172558 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.68 + }, + { + "name": "UNITS", + "result": "WARNING", + "message": "Incompatible unit for input of \"subtract\" at index 1, expected \"Unit[CSPrice:1,CSShare:1]\", found \"Unit[]\"" + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.32, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "npo57n0E", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 8, + "neutralization": "FAST", + "truncation": 0.06, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(subtract(selling_general_admin_expense_reported_value, anl4_afv4_cfps_mean), anl4_ebitda_std)", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-23T07:52:36-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-23T07:52:36-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3027060, + "bookSize": 20000000, + "longCount": 1730, + "shortCount": 1399, + "turnover": 0.1245, + "returns": 0.0303, + "drawdown": 0.0733, + "margin": 0.000487, + "sharpe": 0.69, + "fitness": 0.34, + "startDate": "2014-01-01", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.34 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1245 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1245 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.3, + "value": 0.56 + }, + { + "name": "UNITS", + "result": "WARNING", + "message": "Incompatible unit for input of \"subtract\" at index 1, expected \"Unit[CSPrice:1]\", found \"Unit[]\"" + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.31, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "blMdKYVq", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 5, + "neutralization": "FAST", + "truncation": 0.04, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "ts_sum(ts_delta(anl14_mean_eps_fp1, 1), 63)", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-17T02:35:51-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-17T02:35:51-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 2965910, + "bookSize": 20000000, + "longCount": 1517, + "shortCount": 1609, + "turnover": 0.1407, + "returns": 0.0297, + "drawdown": 0.087, + "margin": 0.000422, + "sharpe": 0.69, + "fitness": 0.32, + "startDate": "2014-01-01", + "investabilityConstrained": { + "pnl": 1579184, + "bookSize": 20000000, + "longCount": 1513, + "shortCount": 1616, + "turnover": 0.1158, + "returns": 0.0158, + "drawdown": 0.0722, + "margin": 0.000273, + "fitness": 0.14, + "sharpe": 0.4 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.32 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1407 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1407 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.3, + "value": 0.05 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.78, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "RRY167Qd", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 5, + "neutralization": "NONE", + "truncation": 0.04, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(anl14_median_ntprep_fp1, add(anl14_median_ntprep_fp1, 0.01))", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-17T01:04:14-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-17T01:04:14-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 27055816, + "bookSize": 20000000, + "longCount": 2526, + "shortCount": 1, + "turnover": 0.0043, + "returns": 0.2711, + "drawdown": 0.5959, + "margin": 0.125767, + "sharpe": 0.69, + "fitness": 1.02, + "startDate": "2014-01-01", + "investabilityConstrained": { + "pnl": -153787, + "bookSize": 20000000, + "longCount": 2084, + "shortCount": 462, + "turnover": 0.0073, + "returns": -0.0015, + "drawdown": 0.2307, + "margin": -0.000423, + "fitness": 0, + "sharpe": -0.02 + }, + "riskNeutralized": { + "pnl": 383582, + "bookSize": 20000000, + "longCount": 2526, + "shortCount": 1, + "turnover": 0.0043, + "returns": 0.0038, + "drawdown": 0.0241, + "margin": 0.001783, + "fitness": 0.08, + "sharpe": 0.45 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "PASS", + "limit": 1, + "value": 1.02 + }, + { + "name": "LOW_TURNOVER", + "result": "FAIL", + "limit": 0.01, + "value": 0.0043 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0043 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.3, + "value": -0.33 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.11, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "589kkM6J", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 5, + "neutralization": "NONE", + "truncation": 0.04, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(anl14_median_ntprep_fp1, add(anl14_mean_ntprep_fp1, 0.01))", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-17T00:02:28-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-17T00:02:29-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 26245371, + "bookSize": 20000000, + "longCount": 2508, + "shortCount": 19, + "turnover": 0.0176, + "returns": 0.263, + "drawdown": 0.5911, + "margin": 0.029895, + "sharpe": 0.69, + "fitness": 1, + "startDate": "2014-01-01", + "investabilityConstrained": { + "pnl": -318392, + "bookSize": 20000000, + "longCount": 2068, + "shortCount": 478, + "turnover": 0.0114, + "returns": -0.0032, + "drawdown": 0.2301, + "margin": -0.000558, + "fitness": -0.01, + "sharpe": -0.04 + }, + "riskNeutralized": { + "pnl": 296268, + "bookSize": 20000000, + "longCount": 2508, + "shortCount": 19, + "turnover": 0.0176, + "returns": 0.003, + "drawdown": 0.075, + "margin": 0.000337, + "fitness": 0.01, + "sharpe": 0.09 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "PASS", + "limit": 1, + "value": 1 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0176 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0176 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2023-11-22", + "limit": 0.1, + "value": 0.226179 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.3, + "value": -0.23 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.21, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.2, + "pyramids": [ + { + "name": "USA/D1/ANALYST", + "multiplier": 1.2 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "Wjd6VNeO", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 4, + "neutralization": "NONE", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "ts_corr(social_hybrid_sector_percentile_score, sustainability_maxcorr_sector_percentile_score, 63)", + "description": null, + "operatorCount": 1 + }, + "dateCreated": "2026-03-10T13:15:26-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-10T13:15:26-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3440225, + "bookSize": 20000000, + "longCount": 801, + "shortCount": 605, + "turnover": 0.0626, + "returns": 0.0333, + "drawdown": 0.0761, + "margin": 0.001065, + "sharpe": 0.69, + "fitness": 0.36, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 455327, + "bookSize": 20000000, + "longCount": 801, + "shortCount": 605, + "turnover": 0.0626, + "returns": 0.0044, + "drawdown": 0.047, + "margin": 0.000141, + "fitness": 0.05, + "sharpe": 0.25 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.36 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0626 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0626 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.45 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.6 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.61, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.5, + "pyramids": [ + { + "name": "EUR/D1/ANALYST", + "multiplier": 1.5 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "A1bMaaeY", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 4, + "neutralization": "NONE", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(ts_std_dev(employer_subsector_percentile_score, 126), ts_mean(employer_subsector_percentile_score, 126))", + "description": null, + "operatorCount": 3 + }, + "dateCreated": "2026-03-10T11:33:20-04:00", + "dateSubmitted": null, + "dateModified": "2026-03-10T11:33:20-04:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3283187, + "bookSize": 20000000, + "longCount": 574, + "shortCount": 479, + "turnover": 0.0443, + "returns": 0.0318, + "drawdown": 0.0729, + "margin": 0.001436, + "sharpe": 0.69, + "fitness": 0.35, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 268026, + "bookSize": 20000000, + "longCount": 574, + "shortCount": 479, + "turnover": 0.0443, + "returns": 0.0026, + "drawdown": 0.0528, + "margin": 0.000117, + "fitness": 0.02, + "sharpe": 0.12 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.35 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0443 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0443 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.41 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.61 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.66, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.5, + "pyramids": [ + { + "name": "EUR/D1/ANALYST", + "multiplier": 1.5 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "0m9aj5Qv", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 4, + "neutralization": "SECTOR", + "truncation": 0.005, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "multiply(vec_avg(oth567_trend_deltarating_compensation_and_benefits), multiply(vec_avg(oth567_deltascore_work_life_balance_274), multiply(vec_avg(oth567_trend_deltarating_career_opportunities_297), vec_avg(oth567_trend_deltarating_culture_and_values))))", + "description": null, + "operatorCount": 7 + }, + "dateCreated": "2026-03-05T11:34:14-05:00", + "dateSubmitted": null, + "dateModified": "2026-03-05T11:34:14-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 2994624, + "bookSize": 20000000, + "longCount": 12, + "shortCount": 3, + "turnover": 0.1084, + "returns": 0.0728, + "drawdown": 0.2078, + "margin": 0.001343, + "sharpe": 0.69, + "fitness": 0.53, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 2288658, + "bookSize": 20000000, + "longCount": 12, + "shortCount": 3, + "turnover": 0.1084, + "returns": 0.0556, + "drawdown": 0.0759, + "margin": 0.001026, + "fitness": 0.39, + "sharpe": 0.59 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.53 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.1084 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.1084 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2022-07-26", + "limit": 0.1, + "value": 0.5 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.37, + "value": 0.11 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": -0.17 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.77, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.6, + "pyramids": [ + { + "name": "EUR/D1/OTHER", + "multiplier": 1.6 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "xA9v82mq", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 8, + "neutralization": "NONE", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "(anl15_ebt_gr_12_m_ests_up + anl15_cps_gr_12_m_ests_dn) / anl15_ebt_ind_12_m_ests_dn", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-02T11:52:15-05:00", + "dateSubmitted": null, + "dateModified": "2026-03-02T11:52:15-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 4618626, + "bookSize": 20000000, + "longCount": 386, + "shortCount": 394, + "turnover": 0.0645, + "returns": 0.0448, + "drawdown": 0.0688, + "margin": 0.001388, + "sharpe": 0.69, + "fitness": 0.41, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 714865, + "bookSize": 20000000, + "longCount": 386, + "shortCount": 394, + "turnover": 0.0645, + "returns": 0.0069, + "drawdown": 0.0691, + "margin": 0.000215, + "fitness": 0.05, + "sharpe": 0.22 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.41 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0645 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0645 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.37, + "value": 0.33 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.27 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.86, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.5, + "pyramids": [ + { + "name": "EUR/D1/ANALYST", + "multiplier": 1.5 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "XgP9Admz", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 8, + "neutralization": "SECTOR", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "(anl15_gr_12_m_ests_up + anl15_dps_gr_12_m_ests_dn) / anl15_bps_ind_12_m_ests_dn", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-03-02T10:06:27-05:00", + "dateSubmitted": null, + "dateModified": "2026-03-02T10:06:27-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3891612, + "bookSize": 20000000, + "longCount": 334, + "shortCount": 461, + "turnover": 0.0689, + "returns": 0.0377, + "drawdown": 0.0577, + "margin": 0.001095, + "sharpe": 0.69, + "fitness": 0.38, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 2554613, + "bookSize": 20000000, + "longCount": 334, + "shortCount": 461, + "turnover": 0.0689, + "returns": 0.0248, + "drawdown": 0.0561, + "margin": 0.000719, + "fitness": 0.3, + "sharpe": 0.68 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.38 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0689 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0689 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.47 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.29 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 0.88, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.5, + "pyramids": [ + { + "name": "EUR/D1/ANALYST", + "multiplier": 1.5 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "pwoj9n5V", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 5, + "neutralization": "SECTOR", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "multiply(multiply(count_analyst_upward_revision_year_earnings_14d, count_analyst_upward_revision_year_revenue_14d), sign(count_analyst_upward_revision_year_ebitda_14d))", + "description": null, + "operatorCount": 3 + }, + "dateCreated": "2026-02-28T13:38:00-05:00", + "dateSubmitted": null, + "dateModified": "2026-02-28T13:38:00-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 4449917, + "bookSize": 20000000, + "longCount": 227, + "shortCount": 1006, + "turnover": 0.0728, + "returns": 0.0431, + "drawdown": 0.1435, + "margin": 0.001185, + "sharpe": 0.69, + "fitness": 0.41, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 92139, + "bookSize": 20000000, + "longCount": 227, + "shortCount": 1006, + "turnover": 0.0728, + "returns": 0.0009, + "drawdown": 0.1634, + "margin": 0.000025, + "fitness": 0, + "sharpe": 0.02 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.41 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0728 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0728 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.64 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.55 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 1.29, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "EUR/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "780jRlvQ", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 12, + "neutralization": "SECTOR", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "multiply(multiply(avg_pct_change_estimate_next_quarter_earnings_14d, avg_pct_change_estimate_forward_revenue_14d), sign(avg_pct_change_estimate_next_year_ebitda_14d))", + "description": null, + "operatorCount": 3 + }, + "dateCreated": "2026-02-28T12:51:36-05:00", + "dateSubmitted": null, + "dateModified": "2026-02-28T12:51:36-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 12023841, + "bookSize": 20000000, + "longCount": 257, + "shortCount": 211, + "turnover": 0.0698, + "returns": 0.1165, + "drawdown": 0.2818, + "margin": 0.003337, + "sharpe": 0.69, + "fitness": 0.67, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 4475082, + "bookSize": 20000000, + "longCount": 257, + "shortCount": 211, + "turnover": 0.0698, + "returns": 0.0434, + "drawdown": 0.4356, + "margin": 0.001242, + "fitness": 0.18, + "sharpe": 0.31 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.67 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0698 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0698 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2017-01-10", + "limit": 0.1, + "value": 0.287959 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.43 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": 0.04 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "PASS", + "value": 1.87, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "EUR/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + }, + { + "id": "0m9wXqEq", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "EUR", + "universe": "TOPCS1600", + "delay": 1, + "decay": 12, + "neutralization": "SECTOR", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "ON", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "regular": { + "code": "divide(avg_pct_change_estimate_quarter_earnings_7d, add(avg_pct_change_estimate_next_year_earnings_30d, avg_pct_change_estimate_quarter_earnings_90d))", + "description": null, + "operatorCount": 2 + }, + "dateCreated": "2026-02-28T10:16:07-05:00", + "dateSubmitted": null, + "dateModified": "2026-02-28T10:16:07-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [ + { + "id": "DATA_USAGE:SINGLE_DATA_SET", + "name": "Single Data Set Alpha" + } + ], + "grade": null, + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 6084297, + "bookSize": 20000000, + "longCount": 221, + "shortCount": 290, + "turnover": 0.0801, + "returns": 0.059, + "drawdown": 0.1987, + "margin": 0.001472, + "sharpe": 0.69, + "fitness": 0.47, + "startDate": "2014-01-01", + "riskNeutralized": { + "pnl": 2658660, + "bookSize": 20000000, + "longCount": 221, + "shortCount": 290, + "turnover": 0.0801, + "returns": 0.0258, + "drawdown": 0.1577, + "margin": 0.000643, + "fitness": 0.15, + "sharpe": 0.33 + }, + "checks": [ + { + "name": "LOW_SHARPE", + "result": "FAIL", + "limit": 1.58, + "value": 0.69 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1, + "value": 0.47 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.0801 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.0801 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "FAIL", + "date": "2018-05-01", + "limit": 0.1, + "value": 0.330933 + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.37, + "value": 0.39 + }, + { + "name": "LOW_ROBUST_UNIVERSE_SHARPE", + "result": "FAIL", + "limit": 0.7, + "value": -0.03 + }, + { + "name": "UNITS", + "result": "WARNING", + "message": "Incompatible unit for input of \"add\" at index 1, expected \"Unit[]\", found \"Unit[CSPrice:1,CSShare:1]\"" + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "DATA_DIVERSITY", + "result": "PENDING" + }, + { + "name": "PROD_CORRELATION", + "result": "PENDING" + }, + { + "name": "REGULAR_SUBMISSION", + "result": "PENDING" + }, + { + "name": "LOW_2Y_SHARPE", + "result": "FAIL", + "value": 1.01, + "limit": 1.58 + }, + { + "result": "PASS", + "name": "MATCHES_PYRAMID", + "effective": 1, + "multiplier": 1.4, + "pyramids": [ + { + "name": "EUR/D1/MODEL", + "multiplier": 1.4 + } + ] + }, + { + "result": "WARNING", + "name": "MATCHES_THEMES", + "themes": [ + { + "id": "KymLz14", + "multiplier": 1, + "name": "All regions/D1 Power Pool Apr`26 2" + }, + { + "id": "lBqMzOB", + "multiplier": 3, + "name": "Investable HTVR Theme" + } + ] + }, + { + "name": "OSMOSIS_ALLOCATION", + "result": "WARNING" + } + ] + }, + "os": null, + "train": null, + "test": null, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null, + "osmosisPoints": null, + "origin": "PLATFORM" + } + ] +} \ No newline at end of file diff --git a/simple72/CODING_Guidance_Document.md b/simple72/CODING_Guidance_Document.md new file mode 100644 index 0000000..daced9b --- /dev/null +++ b/simple72/CODING_Guidance_Document.md @@ -0,0 +1,65 @@ +# CLAUDE.md + +Behavioral guidelines to reduce common LLM coding mistakes. Merge with project-specific instructions as needed. + +**Tradeoff:** These guidelines bias toward caution over speed. For trivial tasks, use judgment. + +## 1. Think Before Coding + +**Don't assume. Don't hide confusion. Surface tradeoffs.** + +Before implementing: +- State your assumptions explicitly. If uncertain, ask. +- If multiple interpretations exist, present them - don't pick silently. +- If a simpler approach exists, say so. Push back when warranted. +- If something is unclear, stop. Name what's confusing. Ask. + +## 2. Simplicity First + +**Minimum code that solves the problem. Nothing speculative.** + +- No features beyond what was asked. +- No abstractions for single-use code. +- No "flexibility" or "configurability" that wasn't requested. +- No error handling for impossible scenarios. +- If you write 200 lines and it could be 50, rewrite it. + +Ask yourself: "Would a senior engineer say this is overcomplicated?" If yes, simplify. + +## 3. Surgical Changes + +**Touch only what you must. Clean up only your own mess.** + +When editing existing code: +- Don't "improve" adjacent code, comments, or formatting. +- Don't refactor things that aren't broken. +- Match existing style, even if you'd do it differently. +- If you notice unrelated dead code, mention it - don't delete it. + +When your changes create orphans: +- Remove imports/variables/functions that YOUR changes made unused. +- Don't remove pre-existing dead code unless asked. + +The test: Every changed line should trace directly to the user's request. + +## 4. Goal-Driven Execution + +**Define success criteria. Loop until verified.** + +Transform tasks into verifiable goals: +- "Add validation" → "Write tests for invalid inputs, then make them pass" +- "Fix the bug" → "Write a test that reproduces it, then make it pass" +- "Refactor X" → "Ensure tests pass before and after" + +For multi-step tasks, state a brief plan: +``` +1. [Step] → verify: [check] +2. [Step] → verify: [check] +3. [Step] → verify: [check] +``` + +Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification. + +--- + +**These guidelines are working if:** fewer unnecessary changes in diffs, fewer rewrites due to overcomplication, and clarifying questions come before implementation rather than after mistakes. diff --git a/simple72/README.md b/simple72/README.md new file mode 100644 index 0000000..77916b4 --- /dev/null +++ b/simple72/README.md @@ -0,0 +1,383 @@ +# Alpha Transformer () + +基于AI的Alpha变种自动生成器,将一个种子Alpha转换为多个具有不同逻辑和参数的新Alpha表达式。 + +## 功能概述 + +****的核心思想是:给定一个种子Alpha作为参考,结合大量已知的Alpha模板和AI能力,自动生成多个具有不同策略逻辑的新Alpha表达式。这些变种可以用于: +- 策略多元化(避免单一策略风险) +- 参数优化(探索最优参数组合) +- 逻辑变体(发现新的市场规律) + +## 工作原理详解 + +### 整体流程 + +``` +种子Alpha → 获取详情 → AI生成模板 → 填充参数 → 验证输出 + ↓ ↓ ↓ ↓ ↓ + 原始ID settings 逻辑变体 表达式组合 JSON结果 + expression + expression + datafields +``` + +### 第一阶段:获取种子Alpha信息 + +当用户输入Alpha ID后,系统会: + +1. **连接BRAIN平台** - 使用提供的用户名密码认证 +2. **获取Alpha详情** - 通过API获取以下信息: + - `settings`: Alpha的配置参数(region, universe, delay等) + - `expression`: Alpha的具体表达式代码 + - `description`: Alpha的策略描述 + - `operators`: 使用的算子列表(ts_mean, group_rank等) + - `data_fields`: 使用的数据字段(close, volume等) + +3. **解析表达式结构** - 分析Alpha代码中的: + - 占位符类型(数据字段、时间窗口、分组方式等) + - 算子组合模式 + - 数据依赖关系 + +### 第二阶段:AI生成模板 + +这是**的核心**。系统会: + +1. **构建提示词** - 将以下内容发送给LLM: + ``` + 种子Alpha详情(settings + expression + 使用的算子) + + 模板摘要(template_summary.md中的90+个模板) + + 生成指令(要求生成X个变种模板) + ``` + +2. **LLM推理** - AI根据以下信息生成新模板: + - 种子Alpha的核心逻辑是什么? + - template_summary中有哪些类似或可组合的模板? + - 如何创造性地变形生成新模板? + +3. **模板输出** - LLM返回的每个模板包含: + - `template_expression`: 使用占位符的模板表达式 + - 例如:`group_rank( / ts_mean(, ))` + - `template_explanation`: 模板的核心思想和逻辑说明 + +### 第三阶段:模板填充 + +对每个生成的模板,系统会: + +1. **识别占位符** - 解析模板中的所有``: + ```python + # 常见的占位符类型 + # 数据字段(如 close, volume, pe_ratio) + # 时间窗口(如 20, 60, 120) + # 分组方式(如 industry, sector) + # 操作符(如 ts_mean, rank) + ``` + +2. **获取候选数据** - 从BRAIN API获取: + - 符合条件的数据字段列表(top_n个) + - 可用的窗口参数 + - 支持的分组方式 + +3. **组合生成** - 笛卡尔积组合所有候选: + ```python + # 如果模板有2个和3个 + # 则生成 50 * 50 * 3 = 7500 个表达式(示例) + + # 实际会限制总数,避免组合爆炸 + ``` + +4. **设置继承** - 新生成的表达式会继承种子Alpha的: + - Region(地区) + - Universe(股票池) + - Delay(延迟) + - Neutralization(中性化方式) + +### 第四阶段:验证与输出 + +1. **表达式验证** - 验证生成的表达式是否合法: + - 语法检查 + - 数据依赖检查 + - 算子兼容性检查 + +2. **结果分类** - 分成三类输出: + - **Alpha_candidates.json**: 模板级别的结果 + - 包含模板表达式和每个占位符的候选列表 + - 可用于进一步手动编辑或参数调整 + + - **Alpha_generated_expressions_success.json**: 成功生成的表达式 + - 具体的、可直接使用的Alpha表达式 + - 可导入BRAIN或回测器 + + - **Alpha_generated_expressions_error.json**: 失败的表达式 + - 生成或验证过程中出错的表达式 + - 用于排查模板问题 + +## template_summary.txt 的作用 + +### 文件内容 + +`template_summary.md`是一个包含**90+个精选Alpha模板**的文档,每个模板包含: + +1. **Hypothesis(假设)** - 策略的核心思想 + ``` + "After news is released, if a stock takes a longer time to rise, + it may show strong evidence of upward momentum" + ``` + +2. **Expression(表达式)** - 具体的Alpha代码 + ``` + `ts_backfill(vec_avg(nws12_prez_4l),504)` + ``` + +3. **Settings(设置)** - 推荐使用的配置 + ``` + Region: USA, Universe: TOP500, Delay: 1 + ``` + +4. **逻辑链深度解析** - 为什么这个Alpha有效 + ``` + - 时序相对性: ts_backfill处理新闻数据的稀疏性 + - 算子深意: vec_avg聚合多维情绪 + ``` + +5. **优化方向** - 如何进一步改进 + ``` + - 去噪: 增加winsorize或rank + - 从属信号: 叠加Social Media Effect + ``` + +### 模板分类 + +文件中的模板来自多个来源: + +1. **Learn系列** - BRAIN官方教程中的示例 + - Learn101: 基础Alpha示例 + - Learn102: 中级Alpha示例 + - Learn103: 高级Alpha示例 + +2. **《151 Trading Strategies》** - 学术论文中的策略 + - 动量策略 + - 价值策略 + - 波动率策略 + +3. **社区精选** - 论坛中高评分的Alpha + +### 为什么修改它能生成不同结果? + +**是的,这是生成不同Alpha变种的关键!** + +当你修改`template_summary.md`时: + +| 修改内容 | 影响 | +|---------|------| +| **增加新模板** | LLM有更多参考,生成更多样化的变种 | +| **删除旧模板** | 生成的变种会集中在剩余模板上 | +| **修改模板说明** | LLM对模板的理解改变,生成的变种逻辑不同 | +| **调整模板格式** | 可能影响LLM的解析和理解 | + +### 如何优化template_summary + +**建议策略:** + +1. **按主题分类** - 如果你想生成某类策略的变种 + ```markdown + ## 动量策略 + [相关的5-10个模板] + + ## 价值策略 + [相关的5-10个模板] + ``` + +2. **加入自己的Alpha** - 如果你有成功的Alpha + ```markdown + ## 我的成功策略 + **Expression**: `group_rank(close / ts_mean(close, 20))` + **核心思想**: 均线偏离策略 + ``` + +3. **保持格式一致** - 确保每个模板都包含: + - 清晰的假设 + - 具体表达式 + - 逻辑解析 + +4. **定期更新** - 随着策略进化,不断添加新的有效模板 + +## 快速开始 + +### 1. 安装依赖 + +```bash +cd /Users/jack/source/mySpace/mycode/my_project/py/alpha/WqApp/simple72 +pip install -r requirements.txt +``` + +### 2. 启动服务 + +```bash +python main.py +``` + +服务将在 http://localhost:8000 启动 + +### 3. 使用Web界面 + +1. 打开浏览器访问 http://localhost:8000 +2. 填写表单: + - **Alpha ID**: 输入种子Alpha的ID(格式如 `ak2YPVxv`) + - **LLM API Key**: 你的LLM服务API密钥 + - **LLM Base URL**: LLM服务地址 + - Kimi: `https://api.moonshot.cn/v1` + - OpenAI: `https://api.openai.com/v1` + - 其他: `https://your-llm-service.com/v1` + - **LLM Model**: 模型名称(如 `kimi-k2.5`, `gpt-4`) + - **BRAIN Username/Password**: 你的BRAIN平台账号 +3. 点击"生成变种" +4. 等待处理完成(通常3-10分钟) +5. 查看或复制JSON结果 + +### 4. 使用API + +```bash +curl -X POST http://localhost:8000/api/generate \ + -H "Content-Type: application/json" \ + -d '{ + "alpha_id": "ak2YPVxv", + "llm_api_key": "your-api-key", + "llm_base_url": "https://api.moonshot.cn/v1", + "llm_model": "kimi-k2.5", + "brain_username": "your-brain-user", + "brain_password": "your-brain-pass", + "top_n_datafield": 50 + }' +``` + +## API端点 + +### GET / +- **功能**: 主页面 +- **返回**: HTML页面 + +### POST /api/generate +- **功能**: 生成Alpha变种 +- **请求体**: + ```json + { + "alpha_id": "string (必填)", + "llm_api_key": "string (必填)", + "llm_base_url": "string (必填)", + "llm_model": "string (必填)", + "brain_username": "string (必填)", + "brain_password": "string (必填)", + "top_n_datafield": "int (可选,默认50)", + "user_region": "string (可选)", + "user_universe": "string (可选)", + "user_delay": "int (可选)", + "user_category": "string (可选)", + "user_data_type": "string (可选,默认MATRIX)" + } + ``` +- **响应**: + ```json + { + "success": true, + "alpha_id": "种子Alpha ID", + "candidates": [...], + "expressions_success": [...], + "expressions_error": [...] + } + ``` + +### GET /api/health +- **功能**: 健康检查 +- **响应**: `{"status": "healthy", "service": "alpha-transformer"}` + +## 项目结构 + +``` +simple72/ +├── main.py # FastAPI应用入口 +├── requirements.txt # 依赖清单 +├── Tranformer/ # Transformer核心模块 +│ ├── Transformer.py # 主逻辑(~5000行) +│ │ ├── generate_alpha_description() # 获取Alpha详情 +│ │ ├── generate_new_alphas() # 生成新Alpha +│ │ ├── propose_alpha_templates() # LLM生成模板 +│ │ ├── populate_template() # 填充模板 +│ │ └── validate_expression() # 验证表达式 +│ ├── ace_lib.py # BRAIN API客户端 +│ ├── helpful_functions.py # 辅助函数 +│ ├── validator.py # 表达式验证器 +│ ├── template_summary.md # 模板摘要(可自定义) +│ └── output/ # 输出目录 +│ ├── Alpha_candidates.json +│ ├── Alpha_generated_expressions_success.json +│ └── Alpha_generated_expressions_error.json +└── templates/ + └── index.html # 前端页面 +``` + +## 高级配置 + +### 调整生成数量 + +修改`top_n_datafield`参数: +- 值越大 → 生成的表达式越多,但处理时间越长 +- 值越小 → 生成更快,但可能错过好的变种 +- 建议值:30-100之间 + +### 自定义模板摘要 + +编辑`Tranformer/template_summary.md`: +- 添加你认为有效的Alpha模板 +- 按策略类型分类整理 +- 保持每个模板的格式一致性 + +### 限制生成范围 + +通过可选参数限制生成范围: +- `user_region`: 只在特定地区生成 +- `user_universe`: 只在特定股票池生成 +- `user_delay`: 只使用特定的延迟设置 + +## 常见问题 + +### Q: 生成失败怎么办? +A: 检查以下几点: +1. BRAIN账号密码是否正确 +2. LLM API Key是否有效 +3. Alpha ID是否存在且可访问 +4. 查看返回的error字段 + +### Q: 生成的结果都是类似的? +A: 尝试: +1. 修改`template_summary.md`,添加更多样化的模板 +2. 调整`top_n_datafield`,增加数据字段候选 +3. 使用不同的种子Alpha + +### Q: 生成时间太长? +A: 这是正常的,因为: +- LLM调用需要时间 +- BRAIN API查询数据字段 +- 表达式组合和验证 + +可以: +- 减少`top_n_datafield` +- 简化`template_summary.md` +- 使用本地部署的LLM + +## 技术栈 + +- **后端**: FastAPI + asyncio +- **LLM调用**: OpenAI SDK (AsyncOpenAI) +- **BRAIN连接**: requests Session +- **前端**: 原生HTML/CSS/JavaScript +- **验证**: 自定义表达式验证器 + +## 许可证 + +MIT License + +## 参考资源 + +- [WorldQuant BRAIN 文档](https://www.worldquantbrain.com/) +- [BRAIN表达式语法](https://www.worldquantbrain.com/data/expressions) +- [BRAIN算子列表](https://www.worldquantbrain.com/data/operators) diff --git a/simple72/README_CONFIG.md b/simple72/README_CONFIG.md new file mode 100644 index 0000000..1c4b308 --- /dev/null +++ b/simple72/README_CONFIG.md @@ -0,0 +1,177 @@ +# 配置文件说明 + +## 概述 + +`config.json` 用于存储基础配置信息,避免每次使用时重复填写。程序启动时会自动加载此文件。 + +## 文件位置 + +``` +simple72/ +└── config.json # 配置文件(不存在时使用默认值) +└── config.json.example # 配置示例文件 +``` + +## 配置结构 + +```json +{ + "brain": { + "username": "your_username", + "password": "your_password" + }, + "llm": { + "api_key": "your_api_key", + "base_url": "https://api.moonshot.cn/v1", + "model": "kimi-k2.5" + }, + "transformer": { + "top_n_datafield": 50, + "data_type": "MATRIX" + } +} +``` + +## 各配置项说明 + +### brain - BRAIN 平台凭证 + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| `username` | string | BRAIN 平台用户名 | `your_email@example.com` | +| `password` | string | BRAIN 平台密码 | `your_password` | + +**注意**: 密码明文存储,请确保文件权限安全。 + +### llm - 大模型配置 + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| `api_key` | string | LLM API 密钥 | `sk-xxxxx` | +| `base_url` | string | LLM 服务地址 | `https://api.moonshot.cn/v1` | +| `model` | string | 模型名称 | `kimi-k2.5` | + +**支持的 LLM 服务**: +- Kimi: `https://api.moonshot.cn/v1` +- OpenAI: `https://api.openai.com/v1` +- 其他 OpenAI 兼容服务 + +### transformer - Transformer 配置 + +| 字段 | 类型 | 说明 | 默认值 | +|------|------|------|--------| +| `top_n_datafield` | int | 数据字段候选数量 | `50` | +| `data_type` | string | 数据类型 | `MATRIX` | + +## 使用方式 + +### 方式一:手动编辑配置文件 + +1. 复制 `config.json.example` 为 `config.json` + ```bash + cp config.json.example config.json + ``` + +2. 编辑 `config.json`,填入你的配置信息 + +3. 启动服务 + ```bash + python main.py + ``` + +4. 打开 http://localhost:8000,表单项会自动填充默认值 + +### 方式二:通过页面保存配置 + +(功能开发中) +1. 在页面填写配置信息 +2. (可选)配置会自动保存到 `config.json` + +## 安全性建议 + +### ⚠️ 重要提示 + +1. **保护 config.json 文件** + ```bash + # 设置文件权限为仅所有者可读写 + chmod 600 config.json + ``` + +2. **添加到 .gitignore** + ```bash + echo "config.json" >> .gitignore + ``` + +3. **使用环境变量(高级)** + + 如果不想在文件中存储敏感信息,可以: + - 使用空字符串作为占位符 + - 每次使用时手动填写 + +## 示例配置 + +### Kimi (Moonshot) 配置 + +```json +{ + "brain": { + "username": "your_email@example.com", + "password": "your_password" + }, + "llm": { + "api_key": "sk-xxxxx", + "base_url": "https://api.moonshot.cn/v1", + "model": "kimi-k2.5" + }, + "transformer": { + "top_n_datafield": 50, + "data_type": "MATRIX" + } +} +``` + +### OpenAI 配置 + +```json +{ + "brain": { + "username": "your_email@example.com", + "password": "your_password" + }, + "llm": { + "api_key": "sk-xxxxx", + "base_url": "https://api.openai.com/v1", + "model": "gpt-4" + }, + "transformer": { + "top_n_datafield": 50, + "data_type": "MATRIX" + } +} +``` + +## 故障排查 + +### 配置未生效? + +1. 检查 `config.json` 文件是否存在 + ```bash + ls -la config.json + ``` + +2. 检查 JSON 格式是否正确 + ```bash + python -c "import json; json.load(open('config.json'))" + ``` + +3. 检查字段是否为空字符串 + - 如果字段值为空字符串 `""`,则不会填充默认值 + - 这是为了保护隐私,不想存储某些信息时可以留空 + +### 想清除所有配置? + +```bash +rm config.json +``` + +程序会使用空配置启动,所有字段都需要手动填写。 diff --git a/simple72/Tranformer/Transformer.py b/simple72/Tranformer/Transformer.py new file mode 100755 index 0000000..d8aa72e --- /dev/null +++ b/simple72/Tranformer/Transformer.py @@ -0,0 +1,1803 @@ +import requests +import json +import sys +import asyncio +import openai +import re +from typing import Optional, Union # Added this import +try: + from .validator_hooks import is_valid_template_expr, has_empty_datafield_candidates +except Exception: + # Fallback for direct script execution + try: + from validator_hooks import is_valid_template_expr, has_empty_datafield_candidates + except Exception: + is_valid_template_expr = None + has_empty_datafield_candidates = None + +# --- Validation wrappers to integrate into the pipeline --- +def _filter_valid_templates( + proposed_templates: dict, + operators_meta, + brain_session, + settings: dict, + parse_alpha_code_func, +): + """Return dict of only templates that pass validation. + + Safe no-op if validation helpers are unavailable. + """ + if not is_valid_template_expr or not parse_alpha_code_func: + return proposed_templates + filtered = {} + for template_expr, template_expl in proposed_templates.items(): + try: + if is_valid_template_expr( + template_expr, + operators_meta, + brain_session, + settings, + parse_alpha_code_func, + ): + filtered[template_expr] = template_expl + except Exception: + # Be conservative: drop on exceptions + continue + return filtered + + +def _should_skip_due_to_empty_candidates(populated_info: dict) -> bool: + """True if any data_field placeholder has zero candidates. + + Safe no-op fallback when helper is missing. + """ + if not has_empty_datafield_candidates: + return False + try: + return has_empty_datafield_candidates(populated_info) + except Exception: + return False +import logging +import pandas as pd +import os +from pathlib import Path +from urllib.parse import urljoin +import time +import threading +import itertools +import getpass +import io +import validator as val +from ace_lib import get_instrument_type_region_delay +# Force stdout/stderr to use utf-8 on Windows to avoid UnicodeEncodeError +if sys.platform.startswith('win'): + try: + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + except Exception: + pass + +# 这些变量将在交互式输入中设置 +LLM_model_name = None +LLM_API_KEY = None +llm_base_url = None +username = None +password = None +DATA_CATEGORIES = None + + + +# 加载模板总结文件 +template_summary_path = os.path.join(os.path.dirname(__file__), "template_summary.md") +try: + with open(template_summary_path, "r", encoding="utf-8") as f: + template_summary = f.read() + print(f"✓ 已加载模板总结文件: {template_summary_path}", flush=True) +except FileNotFoundError: + print(f"⚠ 模板总结文件不存在: {template_summary_path},使用内置模板", flush=True) + template_summary = """# BRAIN论坛Alpha模板精华总结 + +请创建 template_summary.md 文件""" +except Exception as e: + print(f"⚠ 加载模板总结文件失败: {e},使用内置模板", flush=True) + template_summary = """# BRAIN论坛Alpha模板精华总结 + +请检查 template_summary.md 文件""" + + +class SingleSession(requests.Session): + _instance = None + _lock = threading.Lock() + _relogin_lock = threading.Lock() + _initialized = False + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, *args, **kwargs): + if not self._initialized: + super(SingleSession, self).__init__(*args, **kwargs) + self._initialized = True + + def get_relogin_lock(self): + return self._relogin_lock + +def load_template_summary(file_path: Optional[str] = None) -> str: + """ + Loads the template summary from a file or returns the built-in template summary. + + Args: + file_path: Optional path to a .txt or .md file containing the template summary. + If None or file doesn't exist, returns the built-in template summary. + + Returns: + str: The template summary content. + """ + if file_path: + try: + file_path_obj = Path(file_path) + if file_path_obj.exists() and file_path_obj.is_file(): + with open(file_path_obj, 'r', encoding='utf-8') as f: + content = f.read() + print(f"✓ 成功从文件加载模板总结: {file_path}", flush=True) + return content + else: + print(f"⚠ 警告: 文件不存在: {file_path},将使用内置模板总结", flush=True) + except Exception as e: + print(f"⚠ 警告: 读取文件时出错: {e},将使用内置模板总结", flush=True) + + # 返回内置的模板总结 + print("✓ 使用内置模板总结", flush=True) + return template_summary + + +def get_credentials() -> tuple[str, str]: + """ + Retrieve or prompt for platform credentials. + + This function attempts to read credentials from a JSON file in the user's home directory. + If the file doesn't exist or is empty, it prompts the user to enter credentials and saves them. + + Returns: + tuple: A tuple containing the email and password. + + Raises: + json.JSONDecodeError: If the credentials file exists but contains invalid JSON. + """ + # 声明使用全局变量 + global username, password + # please input your own BRAIN Credentials into the function + return (username, password) + +def get_token_from_auth_server() -> str: + # 声明使用全局变量 + global LLM_API_KEY + # please input your own LLM Gateway token into the function, please note, we are using kimi-k2.5 model + return LLM_API_KEY + +def interactive_input() -> dict: + """ + 交互式输入函数,收集所有必要的配置信息。 + + Returns: + dict: 包含所有配置信息的字典 + """ + print("\n" + "="*60, flush=True) + print("欢迎使用 Alpha Transformer 交互式配置", flush=True) + print("此程序在于让您输入一个Alpha ID即可通过历史总结的Alpha模板,转化成更多的表达式", flush=True) + print("72变,助您腾云驾雾", flush=True) + print("如果你想修改模型,则可以使用新模型的url和api key", flush=True) + print("不同模型效果不同,默认的kimi可能会产生语法错误,请检查生成的模板文件进行甄别", flush=True) + print("强烈推荐你使用自己总结的模板文档,效果会更好", flush=True) + print("="*60 + "\n", flush=True) + + config = {} + + # 1. 询问 LLM 模型名称 + print("【1/6】LLM 模型配置", flush=True) + print("如果你想修改模型,则可以使用新模型的名称", flush=True) + default_model = "kimi-k2.5" + model_input = input(f"请输入 LLM 模型名称 (直接回车使用默认值: {default_model}): ").strip() + config['LLM_model_name'] = model_input if model_input else default_model + print(f"✓ LLM 模型名称: {config['LLM_model_name']}\n", flush=True) + + # 2. 询问 LLM API Key + print("【2/6】LLM API Key 配置", flush=True) + api_key = getpass.getpass("请输入 LLM API Key (输入时不会显示): ").strip() + if not api_key: + print("⚠ 警告: API Key 为空,程序可能无法正常工作", flush=True) + config['LLM_API_KEY'] = api_key + print("✓ API Key 已设置\n", flush=True) + + # 3. 询问 LLM Base URL + print("【3/6】LLM Base URL 配置", flush=True) + print("提示:不同模型有不同的URL", flush=True) + default_url = "https://api.moonshot.cn/v1" + url_input = input(f"请输入 LLM Base URL (直接回车使用默认值: {default_url}): ").strip() + config['llm_base_url'] = url_input if url_input else default_url + print(f"✓ LLM Base URL: {config['llm_base_url']}\n", flush=True) + + # 4. 询问 BRAIN 平台用户名 + print("【4/6】BRAIN 平台认证信息", flush=True) + username_input = input("请输入 BRAIN 平台用户名/邮箱: ").strip() + if not username_input: + print("⚠ 警告: 用户名为空,程序可能无法正常工作", flush=True) + config['username'] = username_input + print("✓ 用户名已设置\n", flush=True) + + # 5. 询问 BRAIN 平台密码 + password_input = getpass.getpass("请输入 BRAIN 平台密码 (输入时不会显示): ").strip() + if not password_input: + print("⚠ 警告: 密码为空,程序可能无法正常工作", flush=True) + config['password'] = password_input + print("✓ 密码已设置\n", flush=True) + + # 6. 询问模板总结文件路径 + print("【5/6】模板总结文件配置", flush=True) + print("强烈推荐你使用自己总结的模板文档,效果会更好", flush=True) + print("提示: 如果您有 template_summary 的 .txt 或 .md 文件,请输入完整路径", flush=True) + print(" 如果没有,直接回车将使用内置模板总结", flush=True) + template_path = input("请输入模板总结文件路径 (直接回车使用内置模板): ").strip() + config['template_summary_path'] = template_path if template_path else None + if template_path: + print(f"✓ 将尝试从文件加载: {template_path}\n", flush=True) + else: + print("✓ 将使用内置模板总结\n", flush=True) + + # 7. 询问 Alpha ID + print("【6/7】Alpha ID 配置", flush=True) + alpha_id = input("请输入要处理的 Alpha ID: ").strip() + if not alpha_id: + print("❌ 错误: Alpha ID 不能为空", flush=True) + sys.exit(1) + config['alpha_id'] = alpha_id + print(f"✓ Alpha ID: {alpha_id}\n", flush=True) + + # 8. 询问 Top N 参数(仅数据字段) + print("【7/7】候选数量配置 (Top N)", flush=True) + print("提示: 此参数控制为每个占位符生成的数据字段候选数量", flush=True) + + # Datafield top_n + default_datafield_topn = 50 + datafield_topn_input = input(f"请输入数据字段候选数量 (直接回车使用默认值: {default_datafield_topn}): ").strip() + try: + config['top_n_datafield'] = int(datafield_topn_input) if datafield_topn_input else default_datafield_topn + except ValueError: + print(f"⚠ 警告: 输入无效,使用默认值: {default_datafield_topn}", flush=True) + config['top_n_datafield'] = default_datafield_topn + print(f"✓ 数据字段候选数量: {config['top_n_datafield']}\n", flush=True) + + print("="*60, flush=True) + print("配置完成!开始处理...", flush=True) + print("="*60 + "\n", flush=True) + + return config + + + +def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame: + """ + Expand dictionary columns in a DataFrame into separate columns. + + Args: + data (pandas.DataFrame): The input DataFrame with dictionary columns. + + Returns: + pandas.DataFrame: A new DataFrame with expanded columns. + """ + dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns)) + new_columns = pd.concat( + [data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns], + axis=1, + ) + + data = pd.concat([data, new_columns], axis=1) + return data + +def start_session() -> SingleSession: + """ + Start a new session with the WorldQuant BRAIN platform. + + This function authenticates the user, handles biometric authentication if required, + and creates a new session. + + Returns: + SingleSession: An authenticated session object. + + Raises: + requests.exceptions.RequestException: If there's an error during the authentication process. + """ + brain_api_url = "https://api.worldquantbrain.com" + s = SingleSession() + s.auth = get_credentials() + r = s.post(brain_api_url + "/authentication") + print(f"New session created (ID: {id(s)}) with authentication response: {r.status_code}, {r.json()} (新会话已创建)", flush=True) + if r.status_code == requests.status_codes.codes.unauthorized: + if r.headers["WWW-Authenticate"] == "persona": + print( + "Complete biometrics authentication and press any key to continue (请完成生物识别认证并按任意键继续): \n" + + urljoin(r.url, r.headers["Location"]) + + "\n" + ) + input() + s.post(urljoin(r.url, r.headers["Location"])) + while True: + if s.post(urljoin(r.url, r.headers["Location"])).status_code != 201: + input( + "Biometrics authentication is not complete. Please try again and press any key when completed (生物识别认证未完成,请重试并按任意键): \n" + ) + else: + break + else: + print("\nIncorrect email or password (邮箱或密码错误)\n", flush=True) + return start_session() + return s + +def get_data_categories(s: SingleSession) -> list[dict]: + """ + Fetch and cache data categories from the BRAIN API. + """ + global DATA_CATEGORIES + if DATA_CATEGORIES is not None: + return DATA_CATEGORIES + + try: + brain_api_url = "https://api.worldquantbrain.com" + response = s.get(brain_api_url + "/data-categories") + response.raise_for_status() + data = response.json() + if isinstance(data, list): + DATA_CATEGORIES = data + elif isinstance(data, dict): + DATA_CATEGORIES = data.get('results', []) + else: + DATA_CATEGORIES = [] + return DATA_CATEGORIES + except Exception as e: + print(f"Error fetching data categories: {e}", flush=True) + return [] + +def get_datafields( + s: SingleSession, + instrument_type: str = "EQUITY", + region: str = "USA", + delay: int = 1, + universe: str = "TOP3000", + theme: str = "false", + dataset_id: str = "", + data_type: str = "MATRIX", + search: str = "", + category: Union[str, list] = "", +) -> pd.DataFrame: + """ + Retrieve available datafields based on specified parameters. + + Args: + s (SingleSession): An authenticated session object. + instrument_type (str, optional): The type of instrument. Defaults to "EQUITY". + region (str, optional): The region. Defaults to "USA". + delay (int, optional): The delay. Defaults to 1. + universe (str, optional): The universe. Defaults to "TOP3000". + theme (str, optional): The theme. Defaults to "false". + dataset_id (str, optional): The ID of a specific dataset. Defaults to "". + data_type (str, optional): The type of data. Defaults to "MATRIX". + search (str, optional): A search string to filter datafields. Defaults to "". + category (str or list, optional): A category ID or list of IDs to filter datafields. Defaults to "". + + Returns: + pandas.DataFrame: A DataFrame containing information about available datafields. + """ + brain_api_url = "https://api.worldquantbrain.com" + type_param = f"&type={data_type}" if data_type != "ALL" else "" + + url_template = ( + brain_api_url + + "/data-fields?" + + f"&instrumentType={instrument_type}" + + f"®ion={region}&delay={str(delay)}&universe={universe}{type_param}&limit=50" + ) + + if dataset_id: + url_template += f"&dataset.id={dataset_id}" + + if len(search) > 0: + url_template += f"&search={search}" + + url_template += "&offset={x}" + + count = 0 + if len(search) == 0: + try: + count = s.get(url_template.format(x=0)).json()["count"] + except Exception as e: + print(f"Error getting count: {e}", flush=True) + return pd.DataFrame() + + if count == 0: + print( + f"No fields found (未找到字段): region={region}, delay={str(delay)}, universe={universe}, " + f"type={data_type}, dataset.id={dataset_id}" + ) + return pd.DataFrame() + else: + if category: + count = 500 # Search deeper if filtering + else: + count = 100 + + max_try = 5 + datafields_list = [] + found_count = 0 + target_found = 50 if category else count + time.sleep(2) + for x in range(0, count, 50): + for _ in range(max_try): + try: + resp = s.get(url_template.format(x=x)) + while resp.status_code == 429: + print("status_code 429, sleep 3 seconds", flush=True) + time.sleep(3) + resp = s.get(url_template.format(x=x)) + if resp.status_code == 200 and "results" in resp.json(): + datafields = resp + break + except: + pass + time.sleep(5) + else: + continue + + results = datafields.json().get("results", []) + if not results: + break + + if category: + if isinstance(category, list): + filtered_results = [ + item for item in results + if isinstance(item.get('category'), dict) and item['category'].get('id') in category + ] + else: + filtered_results = [ + item for item in results + if isinstance(item.get('category'), dict) and item['category'].get('id') == category + ] + datafields_list.append(filtered_results) + found_count += len(filtered_results) + if len(search) > 0 and found_count >= target_found: + break + else: + datafields_list.append(results) + + datafields_list_flat = [item for sublist in datafields_list for item in sublist] + + if not datafields_list_flat: + return pd.DataFrame() + + datafields_df = pd.DataFrame(datafields_list_flat) + datafields_df = expand_dict_columns(datafields_df) + return datafields_df + +def set_alpha_properties( + s: SingleSession, + alpha_id: str, + name: Optional[str] = None, + color: Optional[str] = None, + regular_desc: Optional[str] = None, + selection_desc: str = "None", + combo_desc: str = "None", + tags: Optional[list[str]] = None, +) -> requests.Response: + """ + Update the properties of an alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha to update. + name (str, optional): The new name for the alpha. Defaults to None. + color (str, optional): The new color for the alpha. Defaults to None. + regular_desc (str, optional): Description for regular alpha. Defaults to None. + selection_desc (str, optional): Description for the selection part of a super alpha. Defaults to "None". + combo_desc (str, optional): Description for the combo part of a super alpha. Defaults to "None". + tags (list, optional): List of tags to apply to the alpha. Defaults to None. + + Returns: + requests.Response: The response object from the API call. + """ + brain_api_url = "https://api.worldquantbrain.com" + params = {} + if name is not None: + params["name"] = name + if color is not None: + params["color"] = color + if tags is not None: + params["tags"] = tags + if regular_desc is not None: + params.setdefault("regular", {})["description"] = regular_desc + if selection_desc != "None": # Assuming "None" is the default string value for selection_desc + params.setdefault("selection", {})["description"] = selection_desc + if combo_desc != "None": # Assuming "None" is the default string value for combo_desc + params.setdefault("combo", {})["description"] = combo_desc + + response = s.patch(brain_api_url + "/alphas/" + alpha_id, json=params) + + return response + + +def extract_placeholders(template_expression: str) -> list[str]: + """ + Extracts placeholders from a template expression using regular expressions. + Placeholders are identified by text enclosed in angle brackets (e.g., ``). + """ + # Only match placeholders of the form `` or `` with alphanumeric and underscores + return re.findall(r'(<[A-Za-z0-9_]+/>)', template_expression) + +def parse_alpha_code(alpha_code: str, all_operators: list[dict]) -> tuple[list[str], list[str]]: + """ + Parses the alpha code to extract operators and data fields. + """ + # Remove C-style comments /* ... */ + alpha_code = re.sub(r"/\*[\s\S]*?\*/", "", alpha_code) + # Remove Python-style comments # ... + alpha_code = re.sub(r"#.*", "", alpha_code) + + operators_names = [op['name'] for op in all_operators] + + found_operators = [] + found_datafields = [] + + # Regex to find potential identifiers (operators or datafields) + # This regex looks for words that could be operators or datafields, + # excluding numbers and common programming constructs. + identifiers = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', alpha_code) + + for identifier in identifiers: + if identifier in operators_names: + found_operators.append(identifier) + elif not (identifier.isdigit() or identifier.lower() in ['true', 'false', 'null', 'nan', 'if', 'else', 'for', 'while', 'return', 'and', 'or', 'not', 'in', 'is', 'try', 'except', 'finally', 'with', 'as', 'def', 'class', 'import', 'from', 'yield', 'lambda', 'global', 'nonlocal', 'break', 'continue', 'pass', 'async', 'await', 'raise', 'assert', 'del', 'print', 'input', 'len', 'min', 'max', 'sum', 'abs', 'round', 'int', 'float', 'str', 'list', 'dict', 'set', 'tuple', 'range', 'map', 'filter', 'zip', 'open', 'file', 'type', 'id', 'dir', 'help', 'object', 'super', 'issubclass', 'isinstance', 'hasattr', 'getattr', 'setattr', 'delattr', '__import__', 'None', 'True', 'False']): + found_datafields.append(identifier) + + # Remove duplicates + found_operators = list(set(found_operators)) + found_datafields = list(set(found_datafields)) + + return found_operators, found_datafields + +async def generate_alpha_description(alpha_id: str, brain_session: SingleSession) -> str: + """ + Generates and potentially enriches the description of a given Alpha ID from the WorldQuant BRAIN API. + + Args: + alpha_id (str): The ID of the alpha to retrieve. + brain_session (SingleSession): The active BRAIN API session. + llm_client (openai.AsyncOpenAI): The authenticated OpenAI-compatible client. + + Returns: + str: A JSON string containing the alpha's settings, expression, and potentially enriched description, + or an empty JSON string if an error occurs. + """ + + async def call_llm_new(prompt: str) -> dict: + # 声明使用全局变量 + global LLM_model_name, LLM_API_KEY, llm_base_url + try: + llm_api_key = get_token_from_auth_server() + llm_base_url_value = llm_base_url # 使用全局变量 + llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key) + print("LLM Gateway Authentication successful. (LLM网关认证成功)", flush=True) + except Exception as e: + print(f"LLM Gateway Authentication failed (LLM网关认证失败): {e}", flush=True) + sys.exit(1) + + print("--- Calling LLM to propose templates... (正在调用LLM生成模板...) ---", flush=True) + try: + # Await the async create call + response = await llm_client.chat.completions.create( + model=LLM_model_name, + messages=[ + {"role": "system", "content": "You are a quantitative finance expert and a helpful assistant designed to output JSON."}, + {"role": "user", "content": prompt}, + ], + # response_format={"type": "json_object"}, + ) + + # The async client may return a nested structure. Try to extract content robustly. + content = None + if isinstance(response, dict): + # Some clients return raw dicts + # Try common paths + choices = response.get('choices') + if choices and isinstance(choices, list): + msg = choices[0].get('message') or choices[0] + content = msg.get('content') if isinstance(msg, dict) else None + elif 'content' in response: + content = response.get('content') + else: + # Fallback: attempt attribute access + try: + content = response.choices[0].message.content + except Exception: + content = None + + if content is None: + # As a last resort, try to stringify the response + content = str(response) + + # If content is already a dict/list, return it directly; if it's a JSON string, parse it. + if isinstance(content, (dict, list)): + return content + if isinstance(content, str): + try: + return json.loads(content) + except json.JSONDecodeError: + # Return wrapped string if not JSON + return {"text": content} + + return {} + except Exception as e: + print(f"Error calling LLM (调用LLM出错): {e}", flush=True) + return {} + + try: + brain_api_url = "https://api.worldquantbrain.com" + alpha_url = f"{brain_api_url}/alphas/{alpha_id}" + response = brain_session.get(alpha_url) + response.raise_for_status() # Raise an exception for HTTP errors + + alpha_data = response.json() + settings = alpha_data.get('settings', {}) + expression_dict = alpha_data.get('regular', alpha_data.get('combo', None)) + + if not expression_dict or 'code' not in expression_dict: + print(f"Error: Alpha expression code not found for Alpha ID (未找到Alpha表达式代码): {alpha_id}", flush=True) + return json.dumps({}) + + alpha_code = expression_dict['code'] + current_description = expression_dict.get('description', '') + + # 1. Get all operators for parsing (no filter as per feedback) + operators_data = get_brain_operators() + all_operators = operators_data.get('operators', []) + + # 2. Parse the code to get operators and datafields + found_operators_names, found_datafields_names = parse_alpha_code(alpha_code, all_operators) + + # 3. Get descriptions for operators + operator_descriptions = {op['name']: op.get('description', 'No description available.') for op in all_operators if op['name'] in found_operators_names} + + # 4. Get descriptions for datafields + datafield_descriptions = {} + if found_datafields_names: + # Extract settings from alpha_data for the get_datafields call + instrument_type = settings.get('instrumentType', 'EQUITY') + region = settings.get('region', 'USA') + universe = settings.get('universe', 'TOP3000') + delay = settings.get('delay', 1) + + for df_name in found_datafields_names: + # get_datafields returns a DataFrame, so we need to process it + datafield_df = get_datafields(s=brain_session, instrument_type=instrument_type, region=region, delay=delay, universe=universe, search=df_name) + if not datafield_df.empty: + # Assuming the first result is the most relevant + datafield_descriptions[df_name] = datafield_df.iloc[0].get('description', 'No description available.') + else: + datafield_descriptions[df_name] = 'No description found.' + + # 5. Use LLM to judge if current description is good + judgment_prompt = f""" + Given the following alpha code, its current description, and descriptions of its operators and datafields: + + Alpha Code: + {alpha_code} + + Current Description: + {current_description} + + Operators and their descriptions: + {json.dumps(operator_descriptions, indent=2)} + + Datafields and their descriptions: + {json.dumps(datafield_descriptions, indent=2)} + + Alpha Settings: + {json.dumps(settings, indent=2)} + + Is the current description good enough? Respond with 'yes' or 'no' in a JSON object: {{"judgment": "yes/no"}} + A "good" description should clearly explain the investment idea, rationale for data used, and rationale for operators used. + """ + + judgment_response = await call_llm_new(judgment_prompt) + is_description_good = judgment_response.get("judgment", "no").lower() == "yes" + + new_description = current_description + if not is_description_good: + # 6. If not good, use another LLM to generate a new description + generation_prompt = f""" + Based on the following alpha code, its operators, datafields, and settings, generate a new, improved description. + The description should clearly explain the investment idea, rationale for data used, and rationale for operators used. + Format the output as: + "Idea: xxxxx\\nRationale for data used: xxxxx\\nRationale for operators used: xxxxxxx" + + Alpha Code: + {alpha_code} + + Operators and their descriptions: + {json.dumps(operator_descriptions, indent=2)} + + Datafields and their descriptions: + {json.dumps(datafield_descriptions, indent=2)} + + Alpha Settings: + {json.dumps(settings, indent=2)} + """ + + generated_description_response = await call_llm_new(generation_prompt) + # Assuming LLM returns a string directly or a JSON with a 'description' key + new_description = generated_description_response.get("description", generated_description_response) + if isinstance(new_description, dict): # Handle cases where LLM might return a dict directly + new_description = json.dumps(new_description, indent=2) + + # 7. Override this new description and patch the alpha + set_alpha_properties( + s=brain_session, + alpha_id=alpha_id, + regular_desc=new_description + ) + print(f"Alpha {alpha_id} description updated on platform. (Alpha描述已在平台更新)", flush=True) + + if 'regular' in alpha_data: + alpha_data['regular']['description'] = new_description + elif 'combo' in alpha_data: + alpha_data['combo']['description'] = new_description + + return json.dumps({ + 'settings': settings, + 'expression': expression_dict + }) + + except requests.exceptions.RequestException as e: + print(f"Error during API request (API请求出错): {e}", flush=True) + return json.dumps({}) + except json.JSONDecodeError: + print("Error: Could not decode JSON response from API. (无法解析API的JSON响应)", flush=True) + return json.dumps({}) + except Exception as e: + print(f"An unexpected error occurred (发生意外错误): {e}", flush=True) + return json.dumps({}) + +def get_brain_operators(scope_filters: Optional[list[str]] = None) -> dict: + """ + Retrieves the list of available operators from the WorldQuant BRAIN API, + optionally filtered by a list of scopes. If no scopes are provided, all operators are returned. + + Args: + scope_filters (list[str], optional): A list of strings to filter operators by their scope (e.g., ["REGULAR", "TS_OPERATOR"]). + If None or empty, all operators are returned. + + Returns: + dict: A dictionary containing the operators list and count, + or an empty dictionary if an error occurs. + """ + try: + brain_api_url = "https://api.worldquantbrain.com" + session = start_session() + operators_url = f"{brain_api_url}/operators" + response = session.get(operators_url) + response.raise_for_status() # Raise an exception for HTTP errors + + operators_list = response.json() + + if not isinstance(operators_list, list): + print(f"Error: Expected a list of operators, but received type (预期运算符列表,但收到类型): {type(operators_list)}", flush=True) + return {} + + if scope_filters: + filtered_operators = [ + op for op in operators_list + if any(s_filter in op.get('scope', []) for s_filter in scope_filters) + ] + return { + 'operators': filtered_operators, + 'count': len(filtered_operators) + } + else: + return { + 'operators': operators_list, + 'count': len(operators_list) + } + + except requests.exceptions.RequestException as e: + print(f"Error during API request for operators (获取运算符时API请求出错): {e}", flush=True) + return {} + except json.JSONDecodeError: + print("Error: Could not decode JSON response from operators API. (无法解析运算符API的JSON响应)", flush=True) + return {} + except Exception as e: + print(f"An unexpected error occurred while getting operators (获取运算符时发生意外错误): {e}", flush=True) + return {} + +async def call_llm(prompt: str, llm_client: openai.AsyncOpenAI, max_retries: int = 3) -> dict: + """ + Interface with a Large Language Model to process prompts and get a JSON response. + Includes retry logic for JSON parsing errors. + """ + # 声明使用全局变量 + global LLM_model_name + if not llm_client: + print("LLM client not initialized. Please check authentication. (LLM客户端未初始化,请检查认证)", flush=True) + return {} + + print("--- Calling LLM... (正在调用LLM...) ---", flush=True) + + for attempt in range(max_retries): + try: + response = await llm_client.chat.completions.create( + model=LLM_model_name, # Or your preferred model + messages=[ + {"role": "system", "content": "You are a quantitative finance expert and a helpful assistant designed to output JSON."}, + {"role": "user", "content": prompt}, + ], + # response_format={"type": "json_object"}, + ) + content = response.choices[0].message.content + + # Try to clean markdown code blocks if present + if "```json" in content: + content = content.split("```json")[1].split("```")[0].strip() + elif "```" in content: + content = content.split("```")[1].split("```")[0].strip() + + return json.loads(content) + except json.JSONDecodeError as e: + print(f"⚠ JSON Decode Error (Attempt {attempt + 1}/{max_retries}): {e}", flush=True) + if attempt == max_retries - 1: + print(f"❌ Failed to parse JSON after {max_retries} attempts. Raw content: {content[:100]}...", flush=True) + except Exception as e: + print(f"⚠ LLM Call Error (Attempt {attempt + 1}/{max_retries}): {e}", flush=True) + if attempt == max_retries - 1: + print(f"❌ Failed to call LLM after {max_retries} attempts.", flush=True) + + # Wait before retrying + await asyncio.sleep(2) + + return {} + +async def propose_alpha_templates(alpha_details: dict, template_summary: str, llm_client: openai.AsyncOpenAI, user_data_type: str = "MATRIX") -> dict: + """ + Uses an LLM to propose new alpha templates based on a seed alpha's details. + + Args: + alpha_details (dict): The details of the seed alpha. + template_summary (str): A summary of alpha templates to guide the LLM. + llm_client (openai.AsyncOpenAI): The authenticated OpenAI-compatible client. + user_data_type (str): The data type for the alpha (MATRIX or VECTOR). + + Returns: + dict: A dictionary of proposed alpha templates in JSON format. + """ + if not alpha_details.get('expression'): + print("Error: Alpha expression is missing. (错误:缺少Alpha表达式)", flush=True) + return {} + else: + print(f"current seed alpha detail (当前种子Alpha详情): {alpha_details.get('expression')}", flush=True) + + data_type_instruction = "" + if user_data_type == "MATRIX": + data_type_instruction = "\n**Important Note on Data Type:**\nThe user has specified the data type as **MATRIX**. Please do NOT use any vector-type operators (e.g., `vec_avg`, `vec_sum`) in your proposed templates, as they will raise errors for MATRIX type data in BRAIN. Note: 'MATRIX' is just a system identifier and does not refer to mathematical matrices." + elif user_data_type == "VECTOR": + data_type_instruction = "\n**Important Note on Data Type:**\nThe user has specified the data type as **VECTOR**. Please ensure you use vector-type operators (e.g., `vec_avg`, `vec_sum`) to handle the data fields before applying other operators." + + prompt = f""" +As a world-class BRAIN consultant, your task is to design new alpha templates based on an existing seed alpha. +You will be provided with the seed alpha's expression and a summary of successful alpha templates for inspiration. + +**Seed Alpha Expression:** +{alpha_details['expression']} + +**Inspiration: Summary of Alpha Templates:** +{template_summary} + +**Your Task:** +Based on the structure and potential economic rationale of the seed alpha, by the aid of the Alpha template summary, propose 3-5 new, diverse alpha templates. + +**Rules:** +1. The proposed templates must be valid BRAIN alpha expressions. +2. Use placeholders like `` for data fields and `` for operators that can be programmatically replaced later. +3. For each proposed template, provide a brief, clear explanation of its investment rationale. +4. Return the output as a single, valid JSON object where keys are the proposed template strings and values are their corresponding explanations. Do not include any other text or formatting outside of the JSON object. +5. The proposed new alpha template should be related to the economic sense of seed Alpha {alpha_details} but in different format such as. Utilize the inspiration well. +{data_type_instruction} + +**Example Output Format:** +{{ + "((, 60), industry)": "A cross-sectional momentum signal, neutralized by industry, to capture relative strength within peer groups.", + "(, 20)": "A simple short-term momentum operator applied to a data field." +}} + +Now, generate the JSON object with your proposed templates. +""" + + try: + print(f"\n[Step 1/5] 正在调用 LLM 生成 Alpha 模板...", flush=True) + print(f" - 模型: {LLM_model_name}", flush=True) + print(f" - 数据类型: {user_data_type}", flush=True) + alpha_expr = alpha_details.get('expression', {}) + if isinstance(alpha_expr, dict): + alpha_expr = alpha_expr.get('code', 'N/A') + print(f" - 种子 Alpha: {str(alpha_expr)[:50]}...", flush=True) + # print(f"现在的template summary是{template_summary}") + proposed_templates = await call_llm(prompt, llm_client) + print(f"✓ LLM 返回 {len(proposed_templates)} 个模板提议", flush=True) + return proposed_templates + except Exception as e: + print(f"An error occurred while calling the LLM (调用LLM时发生错误): {e}", flush=True) + return {} + +async def propose_datafield_keywords(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, user_category: Optional[Union[str, list]] = None) -> list[str]: + """ + Uses an LLM to propose search keywords for finding data fields. + """ + category_instruction = "" + if user_category: + category_instruction = f"\n**User Specified Data Category:**\nThe user has specified the data category: {user_category}. Please ensure the proposed keywords are relevant to this category." + else: + category_instruction = "\n**Data Category:**\n Please propose keywords across diverse and relevant data categories." + + prompt = f""" +As a quantitative researcher, you need to find the best data fields for an alpha template placeholder. +Based on the template's logic and the placeholder's name, suggest a list of 3-5 concise search keywords to use with the WorldQuant BRAIN `get_datafields` tool. + +**Alpha Template:** +`{template_expression}` + +**Template Explanation:** +`{template_explanation}` + +**Placeholder to Fill:** +`{placeholder}` +{category_instruction} + +**Your Task:** +Provide a list of search keywords that are likely to yield relevant data fields for this placeholder. The keywords should be specific and diverse. Return the output as a single, valid JSON array of strings. + +**Example Input:** +Placeholder: `` +Explanation: "measures the time-series evolution of a fund's relative rank on a slow-moving characteristic (e.g., fund style, expense tier)" + +**Example Output:** +["fund style", "expense ratio", "management fee", "turnover", "aum"] + + Now, generate the JSON array of search keywords for the given placeholder. +""" + print(f"--- Calling LLM to get keywords for placeholder (正在调用LLM获取占位符关键词): {placeholder} ---", flush=True) + response = await call_llm(prompt, llm_client) + print(f"AI使用如下提示词获取搜索关键词推荐:{prompt}", flush=True) + # Accept either a direct list or a dict containing a 'keywords' key + if isinstance(response, list) and all(isinstance(item, str) for item in response): + return response + if isinstance(response, dict): + # Common keys that might contain the list + for key in ('keywords', 'data', 'result', 'items'): + if key in response and isinstance(response[key], list) and all(isinstance(i, str) for i in response[key]): + return response[key] + print(f"Warning: LLM did not return a valid list of strings for keywords (警告:LLM未返回有效的关键词列表). Got: {response}", flush=True) + return [] + +async def get_datafield_candidates(s: SingleSession, alpha_details: dict, template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX") -> list[dict]: + """ + Gets candidate data fields for a placeholder by using an LLM to generate search keywords + and then calling the BRAIN API's get_datafields to retrieve the top N results for each keyword. + """ + keywords = await propose_datafield_keywords(template_expression, template_explanation, placeholder, llm_client, user_category=user_category) + if not keywords: + print(f"Could not generate keywords for placeholder (无法生成占位符关键词): {placeholder}", flush=True) + return [] + + print(f"LLM-proposed keywords for '{placeholder}' (LLM提议的关键词): {keywords}", flush=True) + + # Extract settings from alpha_details for the get_datafields call + settings = alpha_details.get('settings', {}) + print(f"Alpha settings for datafield search (用于数据字段搜索的Alpha设置):", flush=True) + instrument_type = settings.get('instrumentType', 'EQUITY') + + if user_region: + region = user_region + elif 'region' in settings: + region = settings['region'] + else: + print(f"❌ Error: Could not determine 'region' for datafield search. It is missing in Alpha settings and not provided by user. (错误:无法确定数据搜索的地区,Alpha设置中缺失且用户未提供)", flush=True) + return [] + print(f" 数据地区: {region}", flush=True) + + if user_universe: + universe = user_universe + elif 'universe' in settings: + universe = settings['universe'] + else: + print(f"❌ Error: Could not determine 'universe' for datafield search. It is missing in Alpha settings and not provided by user. (错误:无法确定数据搜索的范围,Alpha设置中缺失且用户未提供)", flush=True) + return [] + print(f" 数据范围: {universe}", flush=True) + + if user_delay is not None: + delay = user_delay + elif 'delay' in settings: + delay = settings['delay'] + else: + print(f"❌ Error: Could not determine 'delay' for datafield search. It is missing in Alpha settings and not provided by user. (错误:无法确定数据搜索的Delay,Alpha设置中缺失且用户未提供)", flush=True) + return [] + print(f" Delay: {delay} 类别", flush=True) + + if user_category: + print(f" Category Filter: {user_category}", flush=True) + + # Use asyncio.gather to make parallel API calls for efficiency + tasks = [] + for keyword in keywords: + tasks.append( + asyncio.to_thread(get_datafields, + s=s, + instrument_type=instrument_type, + region=region, + delay=delay, + universe=universe, + search=keyword, + category=user_category if user_category else "", + data_type=user_data_type + ) + ) + + results = await asyncio.gather(*tasks) + + # Process results to get top N from each keyword search + top_results_per_keyword = [] + for res_df in results: + if not res_df.empty: + top_results_per_keyword.append(res_df.head(top_n)) + + candidate_datafields = [] + if top_results_per_keyword: + # Concatenate the top N results from all keywords + combined_df = pd.concat(top_results_per_keyword, ignore_index=True) + # Remove duplicates from the combined list + combined_df.drop_duplicates(subset=['id'], inplace=True) + # Format the final list of candidates + candidate_datafields = combined_df[['id', 'description']].to_dict(orient='records') + + return candidate_datafields + +async def get_group_datafield_candidates(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 3) -> list[dict]: + """ + Uses an LLM to select suitable group data fields from a predefined list. + """ + predefined_group_fields = ["industry", "subindustry", "sector", "market", "exchange"] + + prompt = f""" + As a quantitative researcher, you need to select the most relevant group data fields for an alpha template placeholder. + Based on the template's logic and the placeholder's name, select {top_n} group fields from the following list that are most suitable: {predefined_group_fields}. + + **Alpha Template:** + `{template_expression}` + + **Template Explanation:** + `{template_explanation}` + + **Placeholder to Fill:** + `{placeholder}` + + **Your Task:** + Provide a list of selected group data fields. Return the output as a single, valid JSON array of strings. + + **Example Output Format:** + ["industry", "sector"] + + Now, generate the JSON array of selected group data fields. + """ + print(f"--- Calling LLM to select group datafields for placeholder (正在调用LLM选择分组数据字段): {placeholder} ---", flush=True) + response = await call_llm(prompt, llm_client) + + if isinstance(response, list) and all(isinstance(item, str) for item in response): + return [{"name": field} for field in response[:top_n]] + print(f"Warning: LLM did not return a valid list of strings for group datafields (警告:LLM未返回有效的分组数据字段列表). Got: {response}", flush=True) + return [{"name": field} for field in predefined_group_fields[:top_n]] # Fallback to default if LLM fails + +async def get_operator_candidates(template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI, top_n: int = 3) -> list[dict]: + """ + Gets candidate operators for a placeholder by first fetching all REGULAR scope operators + and then using an LLM to select the most relevant ones. + """ + operators_data = get_brain_operators(scope_filters=["REGULAR"]) + all_operators = operators_data.get('operators', []) + + if not all_operators: + print("No REGULAR scope operators found. (未找到REGULAR范围的运算符)", flush=True) + return [] + + # Create a summary of available operators for the LLM + operator_names_and_descriptions = "\n".join([f"- {op['name']}: {op.get('description', 'No description available.')}" for op in all_operators]) + + prompt = f""" + As a quantitative finance expert, you need to select the most relevant operators for an alpha template placeholder. + Based on the template's logic, its explanation, and the specific placeholder, select {top_n} operators from the provided list that are most suitable. + + **Alpha Template:** + `{template_expression}` + + **Template Explanation:** + `{template_explanation}` + + **Placeholder to Fill:** + `{placeholder}` + + **Available REGULAR Scope Operators:** + {operator_names_and_descriptions} + + **Your Task:** + Provide a list of selected operator names. Return the output as a single, valid JSON array of strings. + + **Example Output Format:** + ["ts_mean", "ts_rank", "ts_decay"] + + Now, generate the JSON array of selected operators. + """ + print(f"--- Calling LLM to select operator candidates for placeholder (正在调用LLM选择运算符候选): {placeholder} ---", flush=True) + response = await call_llm(prompt, llm_client) + + if isinstance(response, list) and all(isinstance(item, str) for item in response): + # Filter the full list of operators to return the selected ones with their descriptions + selected_ops_details = [] + for selected_name in response: + for op in all_operators: + if op['name'] == selected_name: + selected_ops_details.append({"name": op['name'], "description": op.get('description', '')}) + break + return selected_ops_details[:top_n] + + print(f"Warning: LLM did not return a valid list of strings for operator candidates (警告:LLM未返回有效的运算符候选列表). Got: {response}", flush=True) + # Fallback to a default set if LLM fails + return [{"name": op['name'], "description": op.get('description', '')} for op in all_operators[:top_n]] + +async def get_parameter_candidates(param_type: str, template_expression: str, template_explanation: str, placeholder: str, llm_client: openai.AsyncOpenAI) -> list[dict]: + """ + Uses an LLM to suggest sensible numerical candidates for parameters. + """ + param_description = "an integer value, typically a window length or count (e.g., `d` in `ts_mean(x, d)`)" if param_type == "integer_parameter" else \ + "a floating-point number, typically a threshold or factor" + + prompt = f""" + As a quantitative finance expert, you need to suggest sensible numerical candidates for a placeholder parameter. + Based on the alpha template's logic, its explanation, and the placeholder's type and context, propose 3-5 diverse numerical candidates. + + **Alpha Template:** + `{template_expression}` + + **Template Explanation:** + `{template_explanation}` + + **Placeholder to Fill:** + `{placeholder}` + + **Parameter Type:** + This placeholder represents {param_description}. + + **Your Task:** + Provide a list of numerical candidates that are appropriate for this parameter. Return the output as a single, valid JSON array of numbers. + + **Example Output (for integer_parameter):** + [10, 20, 60, 120, 252] + + **Example Output (for float_parameter):** + [0.01, 0.05, 0.1, 0.2, 0.5] + + Now, generate the JSON array of numerical candidates. + """ + print(f"--- Calling LLM to suggest candidates for {param_type} placeholder (正在调用LLM建议参数候选): {placeholder} ---", flush=True) + response = await call_llm(prompt, llm_client) + + if isinstance(response, list) and all(isinstance(item, (int, float)) for item in response): + return [{"value": val} for val in response] + print(f"Warning: LLM did not return a valid list of numbers for {param_type} candidates (警告:LLM未返回有效的数字候选列表). Got: {response}", flush=True) + + # Fallback to default if LLM fails + if param_type == "integer_parameter": + return [{"value": x} for x in [10, 20, 60, 120, 252]] + elif param_type == "float_parameter": + return [{"value": x} for x in [0.01, 0.05, 0.1, 0.2, 0.5]] + return [] + +async def judge_placeholder_type(placeholder: str, template_expression: str, template_explanation: str, operator_summary: str, llm_client: openai.AsyncOpenAI) -> str: + """ + Uses an LLM to judge the type of placeholder (e.g., "data_field", "integer_parameter", "group_operator"). + """ + prompt = f""" + As a world-class quantitative finance expert, your task is to classify the type of a placeholder within an alpha expression. + You will be provided with the alpha template, its explanation, the specific placeholder, and a comprehensive summary of available BRAIN operators and data field characteristics. + + **Alpha Template:** + `{template_expression}` + + **Template Explanation:** + `{template_explanation}` + + **Placeholder to Classify:** + `{placeholder}` + + **Available BRAIN Operators and Data Field Characteristics:** + {operator_summary} + + **Your Task:** + Classify the `{placeholder}` based on the provided context. The classification should be one of the following types: + - "data_field": If the placeholder clearly represents a financial data series (e.g., price, volume, fundamental ratio). + - "group_data_field": If the placeholder represents a categorical field used for grouping or neutralization (e.g., `industry` in `group_zscore(x, industry)`). + - "operator": If the placeholder represents a BRAIN operator that performs a calculation or transformation. + - "vector_operator": If the placeholder represents a vector operator (e.g., vec_avg, vec_sum). + - "integer_parameter": If the placeholder represents an integer value, typically a window length or count (e.g., `d` in `ts_mean(x, d)`). + - "float_parameter": If the placeholder represents a floating-point number, typically a threshold or factor. + - "string_parameter": If the placeholder represents a string value, like a group name (e.g., `industry` in `group_zscore(x, industry)`). + - "unknown": If the type cannot be determined from the context. + + Return the classification as a single JSON object with a key "placeholder_type" and its corresponding value. Do not include any other text or formatting outside of the JSON object. + + **Example Output Format:** + {{"placeholder_type": "data_field"}} + {{"placeholder_type": "integer_parameter"}} + + Now, classify the placeholder. + """ + print(f"--- Calling LLM to judge type for placeholder (正在调用LLM判断占位符类型): {placeholder} ---", flush=True) + + response = await call_llm(prompt, llm_client) + return response.get("placeholder_type", "unknown") + +async def populate_template(s: SingleSession, alpha_details: dict, template_expression: str, template_explanation: str, operator_summary: str, llm_client: openai.AsyncOpenAI, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX") -> dict: + """ + Populates placeholders in an alpha template with candidate data fields, operators, or parameters. + """ + placeholders = extract_placeholders(template_expression) + + if not placeholders: + print("No placeholders found in the template. (模板中未找到占位符)", flush=True) + return {} + + """ + Populates placeholders in an alpha template with candidate data fields, operators, or parameters. + """ + placeholders = extract_placeholders(template_expression) + print(f"Found placeholders in template (在模板中找到占位符): {placeholders}", flush=True) + + populated_placeholders = {} + + for ph in placeholders: + # Use LLM to judge placeholder type + ph_type = await judge_placeholder_type(ph, template_expression, template_explanation, operator_summary, llm_client) + print(f"'{ph}' judged as type (判断类型为): {ph_type}", flush=True) + + if ph_type == "data_field": + candidates = await get_datafield_candidates(s, alpha_details, template_expression, template_explanation, ph, llm_client, top_n=top_n_datafield, user_region=user_region, user_universe=user_universe, user_delay=user_delay, user_category=user_category, user_data_type=user_data_type) + populated_placeholders[ph] = {"type": "data_field", "candidates": candidates} + elif ph_type == "group_data_field": + candidates = await get_group_datafield_candidates(template_expression, template_explanation, ph, llm_client) + populated_placeholders[ph] = {"type": "group_data_field", "candidates": candidates} + elif ph_type in ["operator", "group_operator", "ts_operator","vector_operator"]: + candidates = await get_operator_candidates(template_expression, template_explanation, ph, llm_client) + populated_placeholders[ph] = {"type": ph_type, "candidates": candidates} + elif ph_type in ["integer_parameter", "float_parameter"]: + candidates = await get_parameter_candidates(ph_type, template_expression, template_explanation, ph, llm_client) + populated_placeholders[ph] = {"type": ph_type, "candidates": candidates} + elif ph_type == "string_parameter": + # Add logic for string_parameter if needed, for now it returns empty + populated_placeholders[ph] = {"type": "string_parameter", "candidates": []} + else: + print(f"Could not determine type for placeholder (无法确定占位符类型): {ph} (LLM classified as {ph_type})", flush=True) + populated_placeholders[ph] = {"type": "unknown", "candidates": []} + + return populated_placeholders + +def get_datafield_prefix(datafield_name: str) -> str: + """Extracts the prefix from a datafield name (e.g., 'anl44_...' -> 'anl44').""" + if '_' in datafield_name: + return datafield_name.split('_')[0] + return datafield_name + + + +async def generate_new_alphas(alpha_description, brain_session, template_summary: Optional[str] = None, top_n_datafield: int = 50, user_region: Optional[str] = None, user_universe: Optional[str] = None, user_delay: Optional[int] = None, user_category: Optional[Union[str, list]] = None, user_data_type: str = "MATRIX"): + """ + Main function to generate new alpha templates based on a seed alpha. + + Args: + alpha_description: The alpha description JSON string. + brain_session: The BRAIN session object. + template_summary: Optional template summary string. If None, will load from built-in. + top_n_datafield: Number of data field candidates to retrieve (default: 50). + user_data_type: Data type for datafield search (MATRIX or VECTOR). + """ + # 声明使用全局变量 + global LLM_model_name, LLM_API_KEY, llm_base_url + + # Load template summary if not provided + if template_summary is None: + template_summary = load_template_summary() + # --- Load Operator Summary --- + operator_summary = get_brain_operators(scope_filters=["REGULAR"]) + + try: + llm_api_key = get_token_from_auth_server() + llm_base_url_value = llm_base_url # 使用全局变量 + llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key) + print("✓ LLM Gateway 认证成功", flush=True) + except Exception as e: + print(f"❌ LLM Gateway 认证失败: {e}", flush=True) + sys.exit(1) + + details = json.loads(alpha_description) + + if not details: + print(f"Failed to retrieve details for Alpha (获取Alpha详情失败)", flush=True) + sys.exit(1) + + print("Alpha Details Retrieved (已获取Alpha详情):", flush=True) + print(json.dumps(details, indent=4), flush=True) + + + # --- Step 4: Propose New Alpha Templates --- + print(f"\n{'='*60}", flush=True) + print("[Step 2/5] 正在生成 Alpha 模板提议...", flush=True) + print(f"{'='*60}", flush=True) + proposed_templates = await propose_alpha_templates(details, template_summary, llm_client, user_data_type=user_data_type) + + if not proposed_templates: + print("Failed to generate proposed alpha templates. (生成提议模板失败)", flush=True) + sys.exit(1) + + print("\n--- Proposed Alpha Templates (JSON) (建议的Alpha模板,多样性会受到模型和模板总结文档的影响) ---", flush=True) + print(json.dumps(proposed_templates, indent=4), flush=True) + + # --- Validation: Drop templates with suspicious literal identifiers --- + try: + operators_meta = get_brain_operators().get('operators', []) + proposed_templates = _filter_valid_templates( + proposed_templates, + operators_meta, + brain_session, + details.get('settings', {}), + parse_alpha_code, + ) + except Exception as e: + print(f"⚠ 模板校验步骤出现异常,跳过校验: {e}", flush=True) + + if not proposed_templates: + print("❌ 所有模板在校验后被丢弃,无法继续。", flush=True) + sys.exit(1) + + # --- Step 5: Process all proposed templates and gather candidates --- + # --- Step 6: Prepare for Output --- + print(f"\n{'='*60}", flush=True) + print("[Step 3/5] 正在处理模板并收集候选数据字段...", flush=True) + print(f"{'='*60}", flush=True) + + # Ensure the output directory exists next to this script + output_dir = Path(__file__).parent / "output" + try: + output_dir.mkdir(parents=True, exist_ok=True) + print(f"✓ 输出目录已准备: {output_dir}", flush=True) + except Exception as e: + print(f"Warning: could not create directory {output_dir}: {e}", flush=True) + + output_filepath = output_dir / f"Alpha_candidates.json" + + final_output = {} + + # --- Step 5: Process all proposed templates and gather candidates --- + total_templates = len(proposed_templates) + for idx, (template_expr, template_expl) in enumerate(proposed_templates.items(), 1): + print(f"\n[模板 {idx}/{total_templates}] 正在处理: '{template_expr[:60]}...'", flush=True) + try: + populated_info = await populate_template(brain_session, details, template_expr, template_expl, operator_summary, llm_client, top_n_datafield=top_n_datafield, user_region=user_region, user_universe=user_universe, user_delay=user_delay, user_category=user_category, user_data_type=user_data_type) + + # Skip templates where any data_field placeholder has zero candidates + if _should_skip_due_to_empty_candidates(populated_info): + print("⚠ 该模板存在数据字段候选为空的占位符,跳过此模板。", flush=True) + continue + + final_output[template_expr] = { + "template_explanation": template_expl, + "seed_alpha_settings": details.get('settings', {}), + "placeholder_candidates": populated_info + } + + # --- Incremental Saving --- + try: + with output_filepath.open('w', encoding='utf-8') as f: + json.dump(final_output, f, indent=4) + print(f"✓ Progress saved to {output_filepath.name}", flush=True) + except IOError as e: + print(f"⚠️ Warning: Failed to save progress: {e}", flush=True) + + except Exception as e: + print(f"❌ Error processing template '{template_expr}': {e}", flush=True) + print("Skipping this template and continuing...", flush=True) + continue + + print(f"\n{'='*60}", flush=True) + print("[Step 4/5] 正在生成 Alpha 表达式组合...", flush=True) + print(f"{'='*60}", flush=True) + print(f"✓ 已处理 {len(final_output)} 个有效模板", flush=True) + + print("\n--- Final Consolidated Output (最终合并输出) ---", flush=True) + print(json.dumps(final_output, indent=4), flush=True) + + + generated_expressions = set() + + for template_expression, template_data in final_output.items(): + placeholder_candidates = template_data["placeholder_candidates"] + seed_alpha_settings = template_data["seed_alpha_settings"] + + # Prepare a dictionary to hold lists of candidates for each placeholder + candidates_for_placeholders = {} + for placeholder, details in placeholder_candidates.items(): + # Extract only the 'value' or 'name' from the candidates list + if details["type"] == "data_field": + candidates_for_placeholders[placeholder] = [c["id"] for c in details["candidates"]] + elif details["type"] in ["integer_parameter", "float_parameter"]: + candidates_for_placeholders[placeholder] = [str(c["value"]) for c in details["candidates"]] + elif details["type"] == "group_data_field": + candidates_for_placeholders[placeholder] = [c["name"] for c in details["candidates"]] + elif details["type"] == "operator": + candidates_for_placeholders[placeholder] = [c["name"] for c in details["candidates"]] + else: + candidates_for_placeholders[placeholder] = [] + + + # --- Step 3: Implement logic to generate all alpha expression combinations from the candidates --- + # Generate all possible combinations of placeholder values + placeholder_names = list(candidates_for_placeholders.keys()) + all_combinations_values = list(itertools.product(*candidates_for_placeholders.values())) + + for combination_values in all_combinations_values: + + # --- ATOM Mode --- + + datafield_values_in_combo = [] + placeholder_types = {ph: details["type"] for ph, details in placeholder_candidates.items()} + + for i, placeholder_name in enumerate(placeholder_names): + if placeholder_types.get(placeholder_name) == 'data_field': + datafield_values_in_combo.append(combination_values[i]) + + if len(datafield_values_in_combo) > 1: + first_prefix = get_datafield_prefix(datafield_values_in_combo[0]) + if not all(get_datafield_prefix(df) == first_prefix for df in datafield_values_in_combo): + continue # Skip this combination as prefixes do not match + + current_expression = template_expression + for i, placeholder_name in enumerate(placeholder_names): + current_expression = current_expression.replace(placeholder_name, combination_values[i]) + + # Check for duplicates before adding + if current_expression not in generated_expressions: + generated_expressions.add(current_expression) + # dump all unique generated expressions to a file, a list of strings in json file + print(f"\n{'='*60}", flush=True) + print("[Step 5/5] 正在验证生成的表达式...", flush=True) + print(f"{'='*60}", flush=True) + print(f"✓ 生成的唯一 Alpha 表达式总数: {len(generated_expressions)}", flush=True) + + # output_filepath = output_dir / f"Alpha_generated_expressions.json" + # try: + # with output_filepath.open('w', encoding='utf-8') as f: + # json.dump(list(generated_expressions), f, indent=4) + # print(f"\nGenerated expressions successfully written to {output_filepath} (生成的表达式已成功写入)") + # except IOError as e: + # print(f"Error writing generated expressions to file {output_filepath} (写入生成的表达式出错): {e}") + + + + validator = val.ExpressionValidator() + print("请注意,该文件仅用于验证表达式的格式正确性,\n不保证表达式在实际使用中的逻辑正确性或可执行性。\n", flush=True) + print("不在内置函数列表中的operator将无法检查,如有需要,请使用AI按需修改本源代码添加", flush=True) + + expressions_data = list(generated_expressions) + # 提取表达式列表 + # 假设JSON文件结构为 {"expressions": ["expr1", "expr2", ...]} 或直接是 ["expr1", "expr2", ...] + if isinstance(expressions_data, dict) and "expressions" in expressions_data: + expressions = expressions_data["expressions"] + elif isinstance(expressions_data, list): + expressions = expressions_data + else: + print("错误: JSON文件格式不正确,需要包含表达式列表", flush=True) + return + + # 验证表达式 + valid_expressions = [] + invalid_expressions = [] + + print(f"开始验证 {len(expressions)} 个表达式...", flush=True) + for i, expr in enumerate(expressions, 1): + if i % 10 == 0: + print(f"已验证 {i}/{len(expressions)} 个表达式", flush=True) + + result = validator.check_expression(expr) + if result["valid"]: + valid_expressions.append(expr) + else: + invalid_expressions.append({"expression": expr, "errors": result["errors"]}) + + # 生成输出文件路径 + name = "Alpha_generated_expressions" + valid_output_path = os.path.join(output_dir, f"{name}_success.json") + invalid_output_path = os.path.join(output_dir, f"{name}_error.json") + + # 保存结果到JSON文件 + print(f"\n验证完成!", flush=True) + print(f"有效表达式: {len(valid_expressions)}", flush=True) + print(f"无效表达式: {len(invalid_expressions)}", flush=True) + + # 保存有效表达式 + try: + with open(valid_output_path, 'w', encoding='utf-8') as f: + json.dump(valid_expressions, f, ensure_ascii=False, indent=2) + print(f"有效表达式已保存到: {valid_output_path}", flush=True) + except Exception as e: + print(f"错误: 保存有效表达式失败 - {e}", flush=True) + + # 保存无效表达式 + try: + with open(invalid_output_path, 'w', encoding='utf-8') as f: + json.dump(invalid_expressions, f, ensure_ascii=False, indent=2) + print(f"无效表达式已保存到: {invalid_output_path},文件包含错误详情", flush=True) + print("查看该文件,你将获得修改模板的灵感,你可以定位到错误的模板并在APP里修改", flush=True) + except Exception as e: + print(f"错误: 保存无效表达式失败 - {e}", flush=True) + + print("请注意,该文件仅用于验证表达式的格式正确性,\n不保证表达式在实际使用中的逻辑正确性或可执行性。\n", flush=True) + print("不在内置函数列表中的operator将无法检查,如有需要,请使用AI按需修改validator源代码添加", flush=True) + + print("不同模型效果不同,默认的kimi模型可能会产生Alpha语法错误,请检查生成的模板文件进行甄别", flush=True) + print("下一步,请下载已完成的模板,放入APP首页进行解析和语法检查,强烈建议生成表达式后手动尝试回测", flush=True) + + +async def main(): + """ + Main execution function. + """ + + # Check for command line argument for config file + if len(sys.argv) > 1: + config_path = sys.argv[1] + if os.path.exists(config_path): + try: + with open(config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + print(f"✓ 已从命令行参数加载配置: {config_path}", flush=True) + # Ensure all required fields are present or set defaults + if 'top_n_datafield' not in config: + config['top_n_datafield'] = 50 + if 'template_summary_path' not in config: + config['template_summary_path'] = None + except Exception as e: + print(f"❌ 加载配置文件失败: {e}", flush=True) + sys.exit(1) + else: + print(f"❌ 配置文件不存在: {config_path}", flush=True) + sys.exit(1) + else: + # --- Step 0: 交互式输入收集配置信息 --- + print("\n" + "="*60, flush=True) + print("交互式配置输入模式", flush=True) + print("="*60 + "\n", flush=True) + + config = interactive_input() + + # 设置全局变量 + global LLM_model_name, LLM_API_KEY, llm_base_url, username, password + LLM_model_name = config['LLM_model_name'] + LLM_API_KEY = config['LLM_API_KEY'] + llm_base_url = config['llm_base_url'] + username = config['username'] + password = config['password'] + + # --- Step 1: 加载模板总结 --- + template_summary = load_template_summary(config.get('template_summary_path')) + + # --- Step 2: 启动 BRAIN 会话 --- + print("--- 正在启动 BRAIN 会话... ---", flush=True) + s = start_session() + + # --- Step 3: 认证 LLM Gateway --- + llm_client = None + print("--- 正在认证 LLM Gateway... ---", flush=True) + try: + llm_api_key = get_token_from_auth_server() + llm_base_url_value = llm_base_url + llm_client = openai.AsyncOpenAI(base_url=llm_base_url_value, api_key=llm_api_key) + print("✓ LLM Gateway 认证成功", flush=True) + except Exception as e: + print(f"❌ LLM Gateway 认证失败: {e}", flush=True) + sys.exit(1) + + # --- Step 4: 获取 Alpha 详情 --- + alpha_id = config['alpha_id'] + print(f"\n--- 正在获取 Alpha ID: {alpha_id} 的详情... ---", flush=True) + + # --- Step 4.5: 交互式选择数据字段范围 --- + if len(sys.argv) > 1: + user_datafield_config = { + 'user_region': config.get('user_region'), + 'user_universe': config.get('user_universe'), + 'user_delay': config.get('user_delay'), + 'user_category': config.get('user_category'), + 'user_data_type': config.get('user_data_type', 'MATRIX') + } + else: + user_datafield_config = interactive_datafield_selection(s) + + details_str = await generate_alpha_description(alpha_id, brain_session=s) + await generate_new_alphas( + alpha_description=details_str, + brain_session=s, + template_summary=template_summary, + top_n_datafield=config.get('top_n_datafield', 50), + user_region=user_datafield_config.get('user_region'), + user_universe=user_datafield_config.get('user_universe'), + user_delay=user_datafield_config.get('user_delay'), + user_category=user_datafield_config.get('user_category'), + user_data_type=user_datafield_config.get('user_data_type', 'MATRIX') + ) + +def interactive_datafield_selection(s: SingleSession) -> dict: + """ + Interactively ask the user for datafield search configuration (Region, Universe, Delay). + """ + print("\n" + "="*60, flush=True) + print("【附加配置】数据字段搜索范围配置", flush=True) + print("正在获取有效的 Region/Universe/Delay 组合...", flush=True) + + try: + df = get_instrument_type_region_delay(s) + except Exception as e: + print(f"⚠ 获取配置选项失败: {e}", flush=True) + print("将使用 Seed Alpha 的默认设置", flush=True) + return {} + + # Filter for EQUITY only as per current logic + df_equity = df[df['InstrumentType'] == 'EQUITY'] + + if df_equity.empty: + print("未找到 EQUITY 类型的配置选项。", flush=True) + return {} + + # 1. Select Region + regions = df_equity['Region'].unique().tolist() + print(f"\n可用地区 (Region): {regions}", flush=True) + region_input = input(f"请输入地区 (直接回车使用 Seed Alpha 默认值): ").strip() + + selected_region = None + if region_input: + if region_input in regions: + selected_region = region_input + else: + print(f"⚠ 输入无效,将使用默认值", flush=True) + + # 2. Select Delay + # If region is selected, filter delays for that region + if selected_region: + delays = df_equity[df_equity['Region'] == selected_region]['Delay'].unique().tolist() + else: + delays = df_equity['Delay'].unique().tolist() + + print(f"\n可用延迟 (Delay): {delays}", flush=True) + delay_input = input(f"请输入延迟 (直接回车使用 Seed Alpha 默认值): ").strip() + + selected_delay = None + if delay_input: + try: + d_val = int(delay_input) + if d_val in delays: + selected_delay = d_val + else: + print(f"⚠ 输入不在列表中,将使用默认值", flush=True) + except ValueError: + print(f"⚠ 输入无效,将使用默认值", flush=True) + + # 3. Select Universe + # If region and delay are selected, filter universes + if selected_region and selected_delay is not None: + subset = df_equity[(df_equity['Region'] == selected_region) & (df_equity['Delay'] == selected_delay)] + if not subset.empty: + universes = subset.iloc[0]['Universe'] + else: + universes = [] + else: + # Just show all unique universes if we can't filter precisely + universes = set() + for u_list in df_equity['Universe']: + universes.update(u_list) + universes = list(universes) + + print(f"\n可用范围 (Universe): {universes}", flush=True) + universe_input = input(f"请输入范围 (直接回车使用 Seed Alpha 默认值): ").strip() + + selected_universe = None + if universe_input: + if universe_input in universes: + selected_universe = universe_input + else: + print(f"⚠ 输入无效,将使用默认值", flush=True) + + # 4. Select Category + print("\n正在获取数据类别 (Data Categories)...", flush=True) + categories = get_data_categories(s) + + selected_category = None + if categories: + print("\n可用类别 (Categories):", flush=True) + for i, cat in enumerate(categories): + print(f"{i+1}. {cat['name']} (ID: {cat['id']})", flush=True) + + cat_input = input(f"请输入类别编号或ID (多个用逗号分隔, 直接回车不筛选): ").strip() + + if cat_input: + selected_categories = [] + inputs = [x.strip() for x in cat_input.split(',')] + + for inp in inputs: + # Check if input is an index + if inp.isdigit(): + idx = int(inp) - 1 + if 0 <= idx < len(categories): + selected_categories.append(categories[idx]['id']) + print(f"已选择类别: {categories[idx]['name']}", flush=True) + else: + # Check if input is an ID + found = False + for cat in categories: + if cat['id'] == inp: + selected_categories.append(cat['id']) + print(f"已选择类别: {cat['name']}", flush=True) + found = True + break + if not found: + print(f"⚠ 输入无效: {inp}", flush=True) + + if selected_categories: + selected_category = selected_categories + else: + print(f"⚠ 未选择有效类别,将不筛选类别", flush=True) + else: + print("⚠ 无法获取类别列表,跳过类别选择", flush=True) + + # 5. Select Data Type + print("\n可用数据类型 (Data Type): [MATRIX, VECTOR]", flush=True) + data_type_input = input(f"请输入数据类型 (直接回车默认 MATRIX): ").strip().upper() + + selected_data_type = "MATRIX" + if data_type_input == "VECTOR": + print("⚠ 警告: 请确保您输入的原型Alpha中正确地使用了vector operator,否则极容易造成数据类型错误", flush=True) + confirm = input("确认使用 VECTOR 吗? (y/n): ").strip().lower() + if confirm == 'y': + selected_data_type = "VECTOR" + else: + print("已取消 VECTOR 选择,使用默认值 MATRIX", flush=True) + elif data_type_input and data_type_input != "MATRIX": + print(f"⚠ 输入无效,将使用默认值 MATRIX", flush=True) + + return { + 'user_region': selected_region, + 'user_universe': selected_universe, + 'user_delay': selected_delay, + 'user_category': selected_category, + 'user_data_type': selected_data_type + } + +if __name__ == "__main__": + # To allow asyncio to run in environments like Jupyter notebooks + if sys.platform.startswith('win') and sys.version_info[:2] >= (3, 8): + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + + asyncio.run(main()) + diff --git a/simple72/Tranformer/__init__.py b/simple72/Tranformer/__init__.py new file mode 100644 index 0000000..4b4d796 --- /dev/null +++ b/simple72/Tranformer/__init__.py @@ -0,0 +1 @@ +# Transformer module diff --git a/simple72/Tranformer/ace_lib.py b/simple72/Tranformer/ace_lib.py new file mode 100755 index 0000000..bac64a9 --- /dev/null +++ b/simple72/Tranformer/ace_lib.py @@ -0,0 +1,1514 @@ +import getpass +import json +import logging +import os +import threading +import time +from functools import partial +from multiprocessing.pool import ThreadPool +from pathlib import Path +from typing import Literal, Optional, Union +from urllib.parse import urljoin + +import pandas as pd +import requests +import tqdm +from helpful_functions import ( + expand_dict_columns, + save_pnl, + save_simulation_result, + save_yearly_stats, +) + +DEV = False + + +class SingleSession(requests.Session): + _instance = None + _lock = threading.Lock() + _relogin_lock = threading.Lock() + _initialized = False + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, *args, **kwargs): + if not self._initialized: + super(SingleSession, self).__init__(*args, **kwargs) + self._initialized = True + + def get_relogin_lock(self): + return self._relogin_lock + + +def setup_logger() -> logging.Logger: + """ + This function sets up a logger that writes log messages to the console and, + if the global variable DEV is set to True, also to a file named 'ace.log'. + + Returns: + logger (logging.Logger): The configured logger object. + + The logger's name is set to 'ace.log'. The level of the logger and the console handler + is set to INFO if DEV is True, and WARNING otherwise. The format for the log messages + is: 'asctime' - 'name' - 'levelname' - 'message'. + """ + logger = logging.getLogger("ace") + level = logging.DEBUG if DEV else logging.INFO + + logger.setLevel(level) + + console_handler = logging.StreamHandler() + console_handler.setLevel(level) + + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + console_handler.setFormatter(formatter) + + logger.addHandler(console_handler) + + file_handler = logging.FileHandler("ace.log") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + + +logger = setup_logger() + + +DEFAULT_CONFIG = { + "get_pnl": False, + "get_stats": False, + "save_pnl_file": False, + "save_stats_file": False, + "save_result_file": False, + "check_submission": False, + "check_self_corr": False, + "check_prod_corr": False, +} + +brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com") + + +def get_credentials() -> tuple[str, str]: + """ + Retrieve or prompt for platform credentials. + + This function attempts to read credentials from a JSON file in the user's home directory. + If the file doesn't exist or is empty, it prompts the user to enter credentials and saves them. + + Returns: + tuple: A tuple containing the email and password. + + Raises: + json.JSONDecodeError: If the credentials file exists but contains invalid JSON. + """ + + return ("xxx@xxxx.com", "xxxxxx") + + +def start_session() -> SingleSession: + """ + Start a new session with the WorldQuant BRAIN platform. + + This function authenticates the user, handles biometric authentication if required, + and creates a new session. + + Returns: + SingleSession: An authenticated session object. + + Raises: + requests.exceptions.RequestException: If there's an error during the authentication process. + """ + + s = SingleSession() + s.auth = get_credentials() + r = s.post(brain_api_url + "/authentication") + logger.debug(f"New session created (ID: {id(s)}) with authentication response: {r.status_code}, {r.json()}") + if r.status_code == requests.status_codes.codes.unauthorized: + if r.headers["WWW-Authenticate"] == "persona": + print( + "Complete biometrics authentication and press any key to continue: \n" + + urljoin(r.url, r.headers["Location"]) + + "\n" + ) + input() + s.post(urljoin(r.url, r.headers["Location"])) + + while True: + if s.post(urljoin(r.url, r.headers["Location"])).status_code != 201: + input( + "Biometrics authentication is not complete. Please try again and press any key when completed \n" + ) + else: + break + else: + logger.error("\nIncorrect email or password\n") + with open( + os.path.join(os.path.expanduser("~"), "secrets/platform-brain.json"), + "w", + ) as file: + json.dump({}, file) + return start_session() + return s + + +def check_session_timeout(s: SingleSession) -> int: + """ + Check if the current session has timed out. + + Args: + s (SingleSession): The current session object. + + Returns: + int: The number of seconds until the session expires, or 0 if the session has expired or an error occurred. + """ + + authentication_url = brain_api_url + "/authentication" + try: + result = s.get(authentication_url).json()["token"]["expiry"] + logger.debug(f"Session (ID: {id(s)}) timeout check result: {result}") + return result + except Exception: + return 0 + + +def generate_alpha( + regular: Optional[str] = None, + selection: Optional[str] = None, + combo: Optional[str] = None, + alpha_type: Literal["REGULAR", "SUPER"] = "REGULAR", + region: str = "USA", + universe: str = "TOP3000", + delay: Literal[0, 1] = 1, + decay: int = 0, + neutralization: str = "INDUSTRY", + truncation: float = 0.08, + pasteurization: Literal["ON", "OFF"] = "ON", + test_period: str = "P0Y0M0D", + unit_handling: Literal["VERIFY"] = "VERIFY", + nan_handling: Literal["ON", "OFF"] = "OFF", + max_trade: Literal["ON", "OFF"] = "OFF", + selection_handling: str = "POSITIVE", + selection_limit: int = 100, + visualization: bool = False, +) -> dict: + """ + Generate an alpha dictionary for simulation. If alpha_type='REGULAR', + function generates alpha dictionary using regular input. If alpha_type='SUPER', + function generates alpha dictionary using selection and combo inputs. + + Args: + regular (str, optional): The regular alpha expression. + selection (str, optional): The selection expression for super alphas. + combo (str, optional): The combo expression for super alphas. + alpha_type (str, optional): The type of alpha ("REGULAR" or "SUPER"). Defaults to "REGULAR". + region (str, optional): The region for the alpha. Defaults to "USA". + universe (str, optional): The universe for the alpha. Defaults to "TOP3000". + delay (int, optional): The delay for the alpha. Defaults to 1. + decay (int, optional): The decay for the alpha. Defaults to 0. + neutralization (str, optional): The neutralization method. Defaults to "INDUSTRY". + truncation (float, optional): The truncation value. Defaults to 0.08. + pasteurization (str, optional): The pasteurization setting. Defaults to "ON". + test_period (str, optional): The test period. Defaults to "P0Y0M0D". + unit_handling (str, optional): The unit handling method. Defaults to "VERIFY". + nan_handling (str, optional): The NaN handling method. Defaults to "OFF". + max_trade (str, optional): The max trade method. Defaults to "OFF". + selection_handling (str, optional): The selection handling method for super alphas. Defaults to "POSITIVE". + selection_limit (int, optional): The selection limit for super alphas. Defaults to 100. + visualization (bool, optional): Whether to include visualization. Defaults to False. + + Returns: + dict: A dictionary containing the alpha configuration for simulation. + + Raises: + ValueError: If an invalid alpha_type is provided. + """ + + settings = { + "instrumentType": "EQUITY", + "region": region, + "universe": universe, + "delay": delay, + "decay": decay, + "neutralization": neutralization, + "truncation": truncation, + "pasteurization": pasteurization, + "testPeriod": test_period, + "unitHandling": unit_handling, + "nanHandling": nan_handling, + "maxTrade": max_trade, + "language": "FASTEXPR", + "visualization": visualization, + } + if alpha_type == "REGULAR": + simulation_data = { + "type": alpha_type, + "settings": settings, + "regular": regular, + } + elif alpha_type == "SUPER": + simulation_data = { + "type": alpha_type, + "settings": { + **settings, + "selectionHandling": selection_handling, + "selectionLimit": selection_limit, + }, + "combo": combo, + "selection": selection, + } + else: + logger.error("alpha_type should be REGULAR or SUPER") + return {} + return simulation_data + + +def check_session_and_relogin(s: SingleSession) -> SingleSession: + """ + Checks for session timeout and if less than 2000 seconds are remaining, + it attempts to start a new session. + + Parameters: + s (SingleSession): The current session object. + + Returns: + s (SingleSession): The original session object if it hasn't timed out, + otherwise a new session object. + + If the remaining session time is less than 2000 seconds, the function + attempts to start a new session using the `start_session()` function. + If `start_session()` fails on the first attempt, it waits for 100 seconds + and then tries again. The function then returns the new session object. + """ + with s.get_relogin_lock(): + if check_session_timeout(s) < 2000: + logger.debug('Session less than 2000 seconds') + try: + s = start_session() + except Exception: + logger.info('Trying re-login, wait 100 seconds') + time.sleep(100) + s = start_session() + logger.debug(f"Session (ID: {id(s)}) after check and relogin") + return s + + +def start_simulation(s: SingleSession, simulate_data: Union[list[dict], dict]) -> requests.Response: + """ + Start a simulation with the provided simulation data. + + Args: + s (SingleSession): An authenticated session object. + simulate_data (dict): A dictionary containing the simulation parameters. + + Returns: + requests.Response: The response object from the simulation start request. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + simulate_response = s.post(brain_api_url + "/simulations", json=simulate_data) + return simulate_response + + +def simulation_progress( + s: SingleSession, + simulate_response: requests.Response, +) -> dict: + """ + Monitor the progress of a simulation and return the result when complete. + + Args: + s (SingleSession): An authenticated session object. + simulate_response (requests.Response): The response from starting the simulation. + + Returns: + dict: A dictionary containing the completion status and simulation result. + + Raises: + requests.exceptions.RequestException: If there's an error in the API requests. + """ + if simulate_response.status_code // 100 != 2: + logger.warning(f'Simulation failed. {simulate_response.text}, Status code: {simulate_response.status_code}') + return {"completed": False, "result": {}} + + simulation_progress_url = simulate_response.headers["Location"] + error_flag = False + retry_count = 0 + while True: + simulation_progress_response = s.get(simulation_progress_url) + if simulation_progress_response.status_code // 100 != 2: + logger.error( + f'Simulation {simulation_progress_url}, Status code: {simulation_progress_response.status_code}, Retry' + ) + time.sleep(30) + retry_count += 1 + if retry_count <= 2: + continue + else: + logger.error( + f'Simulation {simulation_progress_url} failed, Status code: {simulation_progress_response.status_code}' + ) + error_flag = True + break + if simulation_progress_response.headers.get("Retry-After", 0) == 0: + if simulation_progress_response.json().get("status", "ERROR") == "ERROR": + error_flag = True + break + + time.sleep(float(simulation_progress_response.headers["Retry-After"])) + + if error_flag: + logger.error(f"Simulation failed. {simulation_progress_response.json()}") + return {"completed": False, "result": {}} + + alpha = simulation_progress_response.json().get("alpha", 0) + if alpha == 0: + logger.warning( + f'Simulation {simulation_progress_response.json().get("id")} failed. {simulation_progress_response.json()}' + ) + return {"completed": False, "result": {}} + simulation_result = get_simulation_result_json(s, alpha) + if len(simulation_result) == 0: + return {"completed": False, "result": {}} + return {"completed": True, "result": simulation_result} + + +def get_simulation_result_json(s: SingleSession, alpha_id: str) -> dict: + """ + Retrieve the full simulation result for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + dict: A dictionary containing the full simulation result. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + if alpha_id is None: + return {} + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id) + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + try: + return result.json() + except Exception: + logger.error(f"alpha_id {alpha_id}, {result.headers}, {result.text}, {result.status_code}") + return {} + return s.get(brain_api_url + "/alphas/" + alpha_id).json() + + +def multisimulation_progress( + s: SingleSession, + simulate_response: requests.Response, +) -> dict: + """ + Monitor the progress of multiple simulations and return the results when complete. + + Args: + s (SingleSession): An authenticated session object. + simulate_response (requests.Response): The response from starting the simulations. + + Returns: + dict: A dictionary containing the completion status and simulation results. + + Raises: + requests.exceptions.RequestException: If there's an error in the API requests. + """ + if simulate_response.status_code // 100 != 2: + logger.warning(f'Simulation failed. {simulate_response.text}, Status code: {simulate_response.status_code}') + return {"completed": False, "result": {}} + + simulation_progress_url = simulate_response.headers["Location"] + error_flag = False + while True: + simulation_progress_response = s.get(simulation_progress_url) + if simulation_progress_response.status_code // 100 != 2: + time.sleep(30) + if simulation_progress_response.headers.get("Retry-After", 0) == 0: + if simulation_progress_response.json().get("status", "ERROR") == "ERROR": + error_flag = True + break + + time.sleep(float(simulation_progress_response.headers["Retry-After"])) + + children = simulation_progress_response.json().get("children", 0) + + if error_flag: + if children == 0: + logger.error(f"Simulation failed. {simulation_progress_response.json()}") + return {"completed": False, "result": {}} + for child in children: + child_progress = s.get(brain_api_url + "/simulations/" + child) + logger.error(f"Child Simulation failed: {child_progress.json()}") + return {"completed": False, "result": {}} + + if len(children) == 0: + logger.warning( + f'Multi-Simulation {simulation_progress_response.json().get("id")} failed. {simulation_progress_response.json()}' + ) + return {"completed": False, "result": {}} + children_list = [] + for child in children: + child_progress = s.get(brain_api_url + "/simulations/" + child) + alpha = child_progress.json().get("alpha", 0) + if alpha == 0: + logger.warning(f'Child-Simulation {child_progress.json().get("id")} failed. {child_progress.json()}') + return {"completed": False, "result": {}} + child_result = get_simulation_result_json(s, alpha) + children_list.append(child_result) + return {"completed": True, "result": children_list} + + +def get_prod_corr(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the production correlation data for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the production correlation data. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id + "/correlations/prod") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + if result.json().get("records", 0) == 0: + logger.warning(f"Failed to get production correlation for alpha_id {alpha_id}. {result.json()}") + return pd.DataFrame() + columns = [dct["name"] for dct in result.json()["schema"]["properties"]] + prod_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id) + prod_corr_df["alpha_max_prod_corr"] = result.json()["max"] + prod_corr_df["alpha_min_prod_corr"] = result.json()["min"] + + return prod_corr_df + + +def check_prod_corr_test(s: SingleSession, alpha_id: str, threshold: float = 0.7) -> pd.DataFrame: + """ + Check if the alpha's production correlation passes a specified threshold. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + threshold (float, optional): The correlation threshold. Defaults to 0.7. + + Returns: + pandas.DataFrame: A DataFrame containing the test result. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + + prod_corr_df = get_prod_corr(s, alpha_id) + if prod_corr_df.empty: + result = [ + { + "test": "PROD_CORRELATION", + "result": "NONE", + "limit": threshold, + "value": None, + "alpha_id": alpha_id, + } + ] + else: + value = prod_corr_df[prod_corr_df.alphas > 0]["max"].max() + result = [ + { + "test": "PROD_CORRELATION", + "result": "PASS" if value <= threshold else "FAIL", + "limit": threshold, + "value": value, + "alpha_id": alpha_id, + } + ] + return pd.DataFrame(result) + + +def get_self_corr(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the self-correlation data for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the self-correlation data. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id + "/correlations/self") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + if result.json().get("records", 0) == 0: + logger.warning(f"Failed to get self correlation for alpha_id {alpha_id}. {result.json()}") + return pd.DataFrame() + + records_len = len(result.json()["records"]) + if records_len == 0: + logger.warning(f"No self correlation for alpha_id {alpha_id}") + return pd.DataFrame() + + columns = [dct["name"] for dct in result.json()["schema"]["properties"]] + self_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id) + self_corr_df["alpha_max_self_corr"] = result.json()["max"] + self_corr_df["alpha_min_self_corr"] = result.json()["min"] + + return self_corr_df + + +def check_self_corr_test(s: SingleSession, alpha_id: str, threshold: float = 0.7) -> pd.DataFrame: + """ + Check if the alpha's self-correlation passes a specified threshold. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + threshold (float, optional): The correlation threshold. Defaults to 0.7. + + Returns: + pandas.DataFrame: A DataFrame containing the test result. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + + self_corr_df = get_self_corr(s, alpha_id) + if self_corr_df.empty: + result = [ + { + "test": "SELF_CORRELATION", + "result": "PASS", + "limit": threshold, + "value": 0, + "alpha_id": alpha_id, + } + ] + else: + value = self_corr_df["correlation"].max() + result = [ + { + "test": "SELF_CORRELATION", + "result": "PASS" if value < threshold else "FAIL", + "limit": threshold, + "value": value, + "alpha_id": alpha_id, + } + ] + return pd.DataFrame(result) + + +def get_check_submission(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the submission check results for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the submission check results. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id + "/check") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + if result.json().get("is", 0) == 0: + logger.warning(f"Cant check submission alpha_id {alpha_id}. {result.json()}") + return pd.DataFrame() + + checks_df = pd.DataFrame(result.json()["is"]["checks"]).assign(alpha_id=alpha_id) + + return checks_df + + +def simulate_multi_alpha( + s: SingleSession, + simulate_data_list: list, + tags: Optional[list[str]] = None, +) -> list[dict]: + """ + Simulate a list of alphas using multi-simulation. + + This function checks the session timeout, starts a new session if necessary, + initiates the simulation, monitors its progress, and sets alpha properties + upon completion. + + Args: + s (SingleSession): An authenticated session object. + simulate_data (dict): A list of dictionaries, each containing the simulation parameters for the alpha. + These should include all necessary information such as alpha type, settings, and expressions. + + Returns: + list: A list of dictionaries, each containing: + - 'alpha_id' (str): The ID of the simulated alpha if successful, None otherwise. + - 'simulate_data' (dict): The original simulation data provided. + + Raises: + requests.exceptions.RequestException: If there's an error in the API requests. + """ + + s = check_session_and_relogin(s) + if len(simulate_data_list) == 1: + return [simulate_single_alpha(s, simulate_data_list[0])] + simulate_response = start_simulation(s, simulate_data_list) + simulation_result = multisimulation_progress(s, simulate_response) + + if not simulation_result["completed"]: + return [{"alpha_id": None, "simulate_data": x} for x in simulate_data_list] + result = [ + { + "alpha_id": x["id"], + "simulate_data": { + "type": x["type"], + "settings": x["settings"], + "regular": x["regular"]["code"], + }, + } + for x in simulation_result["result"] + ] + if tags: + _ = [set_alpha_properties(s, x["id"], tags=tags) for x in simulation_result["result"]] + else: + _ = [set_alpha_properties(s, x["id"]) for x in simulation_result["result"]] + return result + + +def get_specified_alpha_stats( + s: SingleSession, + alpha_id: Union[str, None], + simulate_data: dict, + get_pnl: bool = False, + get_stats: bool = False, + save_pnl_file: bool = False, + save_stats_file: bool = False, + save_result_file: bool = False, + check_submission: bool = False, + check_self_corr: bool = False, + check_prod_corr: bool = False, +) -> dict: + """ + Retrieve and process specified statistics for a given alpha. + + Args: + s (SingleSession): The authenticated session object. + alpha_id (str): The ID of the alpha to retrieve statistics for. + simulate_data (dict): The original simulation data for the alpha. + get_pnl (bool, optional): Whether to retrieve PnL data. Defaults to False. + get_stats (bool, optional): Whether to retrieve yearly stats. Defaults to False. + save_pnl_file (bool, optional): Whether to save PnL data to a file. Defaults to False. + save_stats_file (bool, optional): Whether to save yearly stats to a file. Defaults to False. + save_result_file (bool, optional): Whether to save the simulation result to a file. Defaults to False. + check_submission (bool, optional): Whether to perform submission checks. Defaults to False. + check_self_corr (bool, optional): Whether to check self-correlation. Defaults to False. + check_prod_corr (bool, optional): Whether to check production correlation. Defaults to False. + + Returns: + dict: A dictionary containing various statistics and information about the alpha. + + Raises: + requests.exceptions.RequestException: If there's an error retrieving data from the API. + """ + pnl = None + stats = None + s = check_session_and_relogin(s) + logger.debug(f"Session (ID: {id(s)}) used in get_specified_alpha_stats for alpha_id: {alpha_id}") + if alpha_id is None: + return { + "alpha_id": None, + "simulate_data": simulate_data, + "is_stats": None, + "pnl": pnl, + "stats": stats, + "is_tests": None, + "train": None, + "test": None, + } + + result = get_simulation_result_json(s, alpha_id) + try: + region = result["settings"]["region"] + is_stats = pd.DataFrame([{key: value for key, value in result['is'].items() if key != 'checks'}]).assign( + alpha_id=alpha_id + ) + except Exception as e: + logger.error(f"Failed to retrieve simulation result for alpha_id {alpha_id}: {result}, {e}") + train = result["train"] + test = result["test"] + is_stats = pd.DataFrame([{key: value for key, value in result["is"].items() if key != "checks"}]).assign( + alpha_id=alpha_id + ) + + if get_pnl: + pnl = get_alpha_pnl(s, alpha_id) + if save_pnl_file: + save_pnl(pnl, alpha_id, region) + + if get_stats: + stats = get_alpha_yearly_stats(s, alpha_id) + if save_stats_file: + save_yearly_stats(stats, alpha_id, region) + + if save_result_file: + save_simulation_result(result) + + is_tests = pd.DataFrame(result["is"]["checks"]).assign(alpha_id=alpha_id) + + if check_submission: + is_tests = get_check_submission(s, alpha_id) + + return { + "alpha_id": alpha_id, + "simulate_data": simulate_data, + "is_stats": is_stats, + "pnl": pnl, + "stats": stats, + "is_tests": is_tests, + "train": train, + "test": test, + } + + if check_self_corr and not check_submission: + self_corr_test = check_self_corr_test(s, alpha_id) + is_tests = ( + pd.concat([is_tests, pd.DataFrame([self_corr_test])], ignore_index=True) + .drop_duplicates(subset=["test"], keep="last") + .reset_index(drop=True) + ) + if check_prod_corr and not check_submission: + prod_corr_test = check_prod_corr_test(s, alpha_id) + is_tests = ( + pd.concat([is_tests, pd.DataFrame([prod_corr_test])], ignore_index=True) + .drop_duplicates(subset=["test"], keep="last") + .reset_index(drop=True) + ) + + return { + "alpha_id": alpha_id, + "simulate_data": simulate_data, + "is_stats": is_stats, + "pnl": pnl, + "stats": stats, + "is_tests": is_tests, + "train": train, + "test": test, + } + + +def simulate_single_alpha( + s: SingleSession, + simulate_data: dict, +) -> dict: + """ + Simulate a single alpha using the provided session and simulation data. + + This function checks the session timeout, starts a new session if necessary, + initiates the simulation, monitors its progress, and sets alpha properties + upon completion. + + Args: + s (SingleSession): An authenticated session object. + simulate_data (dict): A dictionary containing the simulation parameters for the alpha. + This should include all necessary information such as alpha type, settings, and expressions. + + Returns: + dict: A dictionary containing: + - 'alpha_id' (str): The ID of the simulated alpha if successful, None otherwise. + - 'simulate_data' (dict): The original simulation data provided. + + Raises: + requests.exceptions.RequestException: If there's an error in the API requests. + """ + + s = check_session_and_relogin(s) + simulate_response = start_simulation(s, simulate_data) + simulation_result = simulation_progress(s, simulate_response) + + if not simulation_result["completed"]: + return {"alpha_id": None, "simulate_data": simulate_data} + set_alpha_properties(s, simulation_result["result"]["id"]) + return { + "alpha_id": simulation_result["result"]["id"], + "simulate_data": simulate_data, + } + + +def simulate_alpha_list( + s: SingleSession, + alpha_list: list, + limit_of_concurrent_simulations: int = 3, + simulation_config: dict = DEFAULT_CONFIG, +) -> list: + """ + Simulate a list of alphas concurrently. + + Args: + s (SingleSession): The authenticated session object. + alpha_list (list): A list of alpha configurations to simulate. + limit_of_concurrent_simulations (int, optional): The maximum number of concurrent simulations. Defaults to 3. + simulation_config (dict, optional): Configuration for the simulation. Defaults to DEFAULT_CONFIG. + + Returns: + list: A list of dictionaries containing simulation results for each alpha. + + Raises: + requests.exceptions.RequestException: If there's an error during the simulation process. + """ + if (limit_of_concurrent_simulations < 1) or (limit_of_concurrent_simulations > 8): + logger.warning("Limit of concurrent simulation should be 1..8, will be set to 3") + limit_of_concurrent_simulations = 3 + + result_list = [] + + with ThreadPool(limit_of_concurrent_simulations) as pool: + with tqdm.tqdm(total=len(alpha_list)) as pbar: + for result in pool.imap_unordered(partial(simulate_single_alpha, s), alpha_list): + result_list.append(result) + pbar.update() + + stats_list_result = [] + + def func(x): + return get_specified_alpha_stats(s, x["alpha_id"], x["simulate_data"], **simulation_config) + + with ThreadPool(3) as pool: + for result in pool.map(func, result_list): + stats_list_result.append(result) + + return _delete_duplicates_from_result(stats_list_result) + + +def simulate_alpha_list_multi( + s: SingleSession, + alpha_list: list, + limit_of_concurrent_simulations: int = 3, + limit_of_multi_simulations: int = 3, + simulation_config: dict = DEFAULT_CONFIG, + tags: Optional[list[str]] = None, +) -> list: + """ + Simulate a list of alphas using multi-simulation when possible. + + Args: + s (SingleSession): An authenticated session object. + alpha_list (list): A list of alpha configurations to simulate. + limit_of_concurrent_simulations (int, optional): The maximum number of concurrent simulation batches. Defaults to 3. + limit_of_multi_simulations (int, optional): The maximum number of alphas in a multi-simulation. Defaults to 3. + simulation_config (dict, optional): Configuration for the simulation. Defaults to DEFAULT_CONFIG. + + Returns: + list: A list of dictionaries containing simulation results for each alpha. + + Raises: + requests.exceptions.RequestException: If there's an error in the API requests. + """ + if (limit_of_multi_simulations < 2) or (limit_of_multi_simulations > 10): + logger.warning("Limit of multi-simulation should be 2..10, will be set to 3") + limit_of_multi_simulations = 3 + if (limit_of_concurrent_simulations < 1) or (limit_of_concurrent_simulations > 8): + logger.warning("Limit of concurrent simulation should be 1..8, will be set to 3") + limit_of_concurrent_simulations = 3 + if len(alpha_list) < 10: + logger.warning( + "List of alphas too short, single concurrent simulations will be used instead of multisimulations" + ) + return simulate_alpha_list( + s, + alpha_list, + limit_of_concurrent_simulations=limit_of_concurrent_simulations, + simulation_config=simulation_config, + ) + if any(d["type"] == "SUPER" for d in alpha_list): + logger.warning("Multi-Simulation is not supported for SuperAlphas, single concurrent simulations will be used") + return simulate_alpha_list( + s, + alpha_list, + limit_of_concurrent_simulations=3, + simulation_config=simulation_config, + ) + + tasks = [ + alpha_list[i : i + limit_of_multi_simulations] for i in range(0, len(alpha_list), limit_of_multi_simulations) + ] + result_list = [] + + with ThreadPool(limit_of_concurrent_simulations) as pool: + with tqdm.tqdm(total=len(tasks)) as pbar: + for result in pool.imap_unordered(partial(simulate_multi_alpha, s, tags=tags), tasks): + result_list.append(result) + pbar.update() + result_list_flat = [item for sublist in result_list for item in sublist] + + stats_list_result = [] + + def func(x): + return get_specified_alpha_stats(s, x["alpha_id"], x["simulate_data"], **simulation_config) + + with ThreadPool(3) as pool: + for result in pool.map(func, result_list_flat): + stats_list_result.append(result) + + return _delete_duplicates_from_result(stats_list_result) + + +def _delete_duplicates_from_result(result: list) -> list: + """ + Remove duplicate alpha results from the simulation output. + + Args: + result (list): A list of dictionaries containing simulation results. + + Returns: + list: A deduplicated list of simulation results. + """ + alpha_id_lst = [] + result_new = [] + for x in result: + if x["alpha_id"] is not None: + if x["alpha_id"] not in alpha_id_lst: + result_new.append(x) + alpha_id_lst.append(x["alpha_id"]) + else: + result_new.append(x) + return result_new + + +def set_alpha_properties( + s: SingleSession, + alpha_id: str, + name: Optional[str] = None, + color: Optional[str] = None, + regular_desc: Optional[str] = None, + selection_desc: str = "None", + combo_desc: str = "None", + tags: Optional[list[str]] = None, +) -> requests.Response: + """ + Update the properties of an alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha to update. + name (str, optional): The new name for the alpha. Defaults to None. + color (str, optional): The new color for the alpha. Defaults to None. + regular_desc (str, optional): Description for regular alpha. Defaults to None. + selection_desc (str, optional): Description for the selection part of a super alpha. Defaults to "None". + combo_desc (str, optional): Description for the combo part of a super alpha. Defaults to "None". + tags (list, optional): List of tags to apply to the alpha. Defaults to None. + + Returns: + requests.Response: The response object from the API call. + """ + + params = {} + if name is not None: + params["name"] = name + if color is not None: + params["color"] = color + if tags is not None: + params["tags"] = tags + if regular_desc is not None: + params.setdefault("regular", {})["description"] = regular_desc + if selection_desc != "None": # Assuming "None" is the default string value for selection_desc + params.setdefault("selection", {})["description"] = selection_desc + if combo_desc != "None": # Assuming "None" is the default string value for combo_desc + params.setdefault("combo", {})["description"] = combo_desc + + response = s.patch(brain_api_url + "/alphas/" + alpha_id, json=params) + + return response + + +def _get_alpha_pnl( + s: SingleSession, + alpha_id: str, + pnl_type: str = "pnl", +) -> pd.DataFrame: + """ + Retrieve the PnL data for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + pnl_type (str): 'pnl' to get cumulative pnl, 'daily-pnl' to get daily pnl. + + Returns: + pandas.DataFrame: A DataFrame containing the PnL data for the alpha. + """ + + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id + f"/recordsets/{pnl_type}") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + pnl = result.json() + if pnl.get("records", 0) == 0: + return pd.DataFrame() + columns = [dct["name"] for dct in pnl["schema"]["properties"]] + pnl_df = ( + pd.DataFrame(pnl["records"], columns=columns) + .assign(alpha_id=alpha_id, date=lambda x: pd.to_datetime(x.date, format="%Y-%m-%d")) + .set_index("date") + ) + return pnl_df + + +def get_alpha_pnl(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the cumulative PnL data for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the PnL data for the alpha. + """ + + return _get_alpha_pnl(s, alpha_id, "pnl") + + +def get_alpha_yearly_stats(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the yearly statistics for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the yearly statistics for the alpha. + """ + + while True: + result = s.get(brain_api_url + "/alphas/" + alpha_id + "/recordsets/yearly-stats") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + stats = result.json() + + if stats.get("records", 0) == 0: + return pd.DataFrame() + columns = [dct["name"] for dct in stats["schema"]["properties"]] + yearly_stats_df = pd.DataFrame(stats["records"], columns=columns).assign(alpha_id=alpha_id) + return yearly_stats_df + + +def get_datasets( + s: SingleSession, + instrument_type: str = "EQUITY", + region: str = "USA", + delay: int = 1, + universe: str = "TOP3000", + theme: str = "ALL", +) -> pd.DataFrame: + """ + Retrieve available datasets based on specified parameters. + + Args: + s (SingleSession): An authenticated session object. + instrument_type (str, optional): The type of instrument. Defaults to "EQUITY". + region (str, optional): The region. Defaults to "USA". + delay (int, optional): The delay. Defaults to 1. + universe (str, optional): The universe. Defaults to "TOP3000". + theme (str, optional): The theme. Defaults to "ALL". + + Returns: + pandas.DataFrame: A DataFrame containing information about available datasets. + """ + if theme == "ALL": + # Fetch both theme=false and theme=true + url_false = ( + brain_api_url + + "/data-sets?" + + f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme=false" + ) + result_false = s.get(url_false) + df_false = pd.DataFrame(result_false.json()["results"]) + + url_true = ( + brain_api_url + + "/data-sets?" + + f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme=true" + ) + result_true = s.get(url_true) + df_true = pd.DataFrame(result_true.json()["results"]) + + datasets_df = pd.concat([df_false, df_true], ignore_index=True) + else: + url = ( + brain_api_url + + "/data-sets?" + + f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme={theme}" + ) + result = s.get(url) + datasets_df = pd.DataFrame(result.json()["results"]) + + datasets_df = expand_dict_columns(datasets_df) + return datasets_df + + +def get_datafields( + s: SingleSession, + instrument_type: str = "EQUITY", + region: str = "USA", + delay: int = 1, + universe: str = "TOP3000", + theme: str = "false", + dataset_id: str = "", + data_type: str = "MATRIX", + search: str = "", +) -> pd.DataFrame: + """ + Retrieve available datafields based on specified parameters. + + Args: + s (SingleSession): An authenticated session object. + instrument_type (str, optional): The type of instrument. Defaults to "EQUITY". + region (str, optional): The region. Defaults to "USA". + delay (int, optional): The delay. Defaults to 1. + universe (str, optional): The universe. Defaults to "TOP3000". + theme (str, optional): The theme. Defaults to "false". + dataset_id (str, optional): The ID of a specific dataset. Defaults to "". + data_type (str, optional): The type of data. Defaults to "MATRIX". + search (str, optional): A search string to filter datafields. Defaults to "". + + Returns: + pandas.DataFrame: A DataFrame containing information about available datafields. + """ + type_param = f"&type={data_type}" if data_type != "ALL" else "" + if len(search) == 0: + url_template = ( + brain_api_url + + "/data-fields?" + + f"&instrumentType={instrument_type}" + + f"®ion={region}&delay={str(delay)}&universe={universe}{type_param}&dataset.id={dataset_id}&limit=50" + + "&offset={x}" + ) + count = s.get(url_template.format(x=0)).json()["count"] + if count == 0: + logger.warning( + f"No fields found: region={region}, delay={str(delay)}, universe={universe}, " + f"type={data_type}, dataset.id={dataset_id}" + ) + return pd.DataFrame() + + else: + url_template = ( + brain_api_url + + "/data-fields?" + + f"&instrumentType={instrument_type}" + + f"®ion={region}&delay={str(delay)}&universe={universe}{type_param}&limit=50" + + f"&search={search}" + + "&offset={x}" + ) + count = 100 + + max_try = 5 + datafields_list = [] + for x in range(0, count, 50): + for _ in range(max_try): + datafields = s.get(url_template.format(x=x)) + while datafields.status_code == 429: + print("status_code 429, sleep 3 seconds") + time.sleep(3) + datafields = s.get(url_template.format(x=x)) + if "results" in datafields.json(): + break + time.sleep(5) + + datafields_list.append(datafields.json()["results"]) + + datafields_list_flat = [item for sublist in datafields_list for item in sublist] + + datafields_df = pd.DataFrame(datafields_list_flat) + datafields_df = expand_dict_columns(datafields_df) + return datafields_df + + +def get_operators(s: SingleSession) -> pd.DataFrame: + """ + Fetches and processes the list of operators from the WorldQuant Brain API. + + This function retrieves the operators from the provided session `s`, + explodes the 'scope' column (which contains lists) into separate rows, + and returns the resulting DataFrame. + + Args: + s (SingleSession): An authenticated session object. + + Returns: + pd.DataFrame: A DataFrame containing the operators with each scope entry + as a separate row. + """ + df = pd.DataFrame(s.get(brain_api_url + "/operators").json()) + return df.explode('scope').reset_index(drop=True) + + +def get_instrument_type_region_delay(s: SingleSession) -> pd.DataFrame: + """ + Retrieves and organizes instrument type, region, and delay data into a DataFrame. + + Parameters: + s (SingleSession): The session object used for making the API call. + + Returns: + df (pd.DataFrame): A DataFrame containing the instrument type, region, delay, universe, and neutralization data. + + The function fetches the settings options from the simulations endpoint and extracts the 'Instrument type', + 'Region', 'Universe', 'Delay', and 'Neutralization' data. It then organizes this data into a list of dictionaries, + each containing the instrument type, region, delay, universe, and neutralization for a particular combination + of instrument type, region, and delay. This list is then converted into a DataFrame and returned. + """ + + settings_options = s.options('https://api.worldquantbrain.com/simulations').json()['actions']['POST']['settings'][ + 'children' + ] + data = [ + {settings_options[key]['label']: settings_options[key]['choices']} + for key in settings_options.keys() + if settings_options[key]['type'] == 'choice' + ] + + instrument_type_data = {} + region_data = {} + universe_data = {} + delay_data = {} + neutralization_data = {} + + for item in data: + if 'Instrument type' in item: + instrument_type_data = item['Instrument type'] + elif 'Region' in item: + region_data = item['Region']['instrumentType'] + elif 'Universe' in item: + universe_data = item['Universe']['instrumentType'] + elif 'Delay' in item: + delay_data = item['Delay']['instrumentType'] + elif 'Neutralization' in item: + neutralization_data = item['Neutralization']['instrumentType'] + + data_list = [] + + for instrument_type in instrument_type_data: + for region in region_data[instrument_type['value']]: + for delay in delay_data[instrument_type['value']]['region'][region['value']]: + row = {'InstrumentType': instrument_type['value'], 'Region': region['value'], 'Delay': delay['value']} + row['Universe'] = [ + item['value'] for item in universe_data[instrument_type['value']]['region'][region['value']] + ] + row['Neutralization'] = [ + item['value'] for item in neutralization_data[instrument_type['value']]['region'][region['value']] + ] + data_list.append(row) + + df = pd.DataFrame(data_list).sort_values( + by=['InstrumentType', 'Region', 'Delay'], ascending=False, ignore_index=True + ) + return df + + +def performance_comparison( + s: SingleSession, alpha_id: str, team_id: Optional[str] = None, competition: Optional[str] = None +) -> dict: + """ + Retrieve performance comparison data for merged performance check. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + team_id (str, optional): The ID of the team for comparison. Defaults to None. + competition (str, optional): The ID of the competition for comparison. Defaults to None. + + Returns: + dict: A dictionary containing the performance comparison data. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + if competition is not None: + part_url = f"competitions/{competition}" + elif team_id is not None: + part_url = f"teams/{team_id}" + else: + part_url = "users/self" + while True: + result = s.get(brain_api_url + f"/{part_url}/alphas/" + alpha_id + "/before-and-after-performance") + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + else: + break + if result.json().get("stats", 0) == 0: + logger.warning(f"Cant get performance comparison for alpha_id {alpha_id}. {result.json()}") + return {} + if result.status_code != 200: + logger.warning(f"Cant get performance comparison for alpha_id {alpha_id}. {result.json()}") + return {} + + return result.json() + + +def construct_selection_expression( + selection: str, + instrument_type: Literal["EQUITY"] = "EQUITY", + region: str = "USA", + delay: Literal[0, 1] = 1, + selection_limit: int = 1000, + selection_handling: str = "POSITIVE", +) -> dict: + """ + Construct a dictionary containing parameters for a selection expression. + + This function creates a dictionary with the necessary parameters to define + a selection expression for use in super alpha simulations. + + Args: + selection (str): The selection expression to be used. + instrument_type (str, optional): Instrument type to use. + Defaults to "EQUITY". + region (str, optional): The geographic region for the selection. + Defaults to "USA". + delay (int, optional): The delay parameter for the selection. + Defaults to 1. + selection_limit (int, optional): The maximum number of instruments + to be selected. Defaults to 1000. + selection_handling (str, optional): The method for handling the + selection. Defaults to "POSITIVE". + + Returns: + dict: A dictionary containing the constructed selection expression + parameters, ready to be used in API calls or other functions. + """ + selection_data = { + "settings.instrumentType": instrument_type, + "settings.region": region, + "settings.delay": delay, + "selection": selection, + "limit": 10, + "selectionLimit": selection_limit, + "selectionHandling": selection_handling, + } + return selection_data + + +def run_selection(s: SingleSession, selection_data: dict) -> dict: + """ + Run a selection simulation using the provided selection data. + + Args: + s (SingleSession): An authenticated session object. + selection_data (dict): A dictionary containing the selection parameters. + + Returns: + dict: A dictionary containing the count of selected alphas and any messages. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + selection_response = s.get(brain_api_url + "/simulations/super-selection", params=selection_data) + r = selection_response.json() + selected_alphas_count = r.get("count") + message = r.get("message", "") + time.sleep(2) + return {"selected_alphas_count": selected_alphas_count, "message": message} + + +def get_alpha_daily_pnl(s: SingleSession, alpha_id: str) -> pd.DataFrame: + """ + Retrieve the daily PnL data for a specific alpha. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha. + + Returns: + pandas.DataFrame: A DataFrame containing the PnL data for the alpha. + """ + + return _get_alpha_pnl(s, alpha_id, "daily-pnl") + + +def submit_alpha(s: SingleSession, alpha_id: str) -> bool: + """ + Submit an alpha for evaluation. + + Args: + s (SingleSession): An authenticated session object. + alpha_id (str): The ID of the alpha to submit. + + Returns: + bool: True if the submission was successful, False otherwise. + + Raises: + requests.exceptions.RequestException: If there's an error in the API request. + """ + result = s.post(brain_api_url + "/alphas/" + alpha_id + "/submit") + while True: + if "retry-after" in result.headers: + time.sleep(float(result.headers["Retry-After"])) + result = s.get(brain_api_url + "/alphas/" + alpha_id + "/submit") + else: + break + return result.status_code == 200 + + + +def main(): + """ + Main function to demonstrate the usage of the library. + + This function creates a session, generates a list of sample alphas, + and simulates them using the simulate_alpha_list function. + """ + + s = start_session() + + k = [ + "vwap * 2", + "open * close", + "high * low", + "vwap * 3", + "open * close", + "high * low", + ] + alpha_list = [generate_alpha(x) for x in k] + + simulate_alpha_list(s, alpha_list) + + +if __name__ == "__main__": + main() diff --git a/simple72/Tranformer/helpful_functions.py b/simple72/Tranformer/helpful_functions.py new file mode 100755 index 0000000..bd27370 --- /dev/null +++ b/simple72/Tranformer/helpful_functions.py @@ -0,0 +1,180 @@ +import json +import os +from typing import Union + +import pandas as pd +from pandas.io.formats.style import Styler + +brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com") +brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com") + + +def make_clickable_alpha_id(alpha_id: str) -> str: + """ + Create a clickable HTML link for an alpha ID. + + Args: + alpha_id (str): The ID of the alpha. + + Returns: + str: An HTML string containing a clickable link to the alpha's page on the platform. + """ + + url = brain_url + "/alpha/" + return f'{alpha_id}' + + +def prettify_result( + result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False +) -> Union[pd.DataFrame, Styler]: + """ + Combine and format simulation results into a single DataFrame for analysis. + + Args: + result (list): A list of dictionaries containing simulation results. + detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False. + clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False. + + Returns: + pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results, + optionally with clickable alpha IDs. + """ + list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None] + is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True) + is_stats_df = is_stats_df.sort_values("fitness", ascending=False) + + expressions = { + result[x]["alpha_id"]: ( + { + "selection": result[x]["simulate_data"]["selection"], + "combo": result[x]["simulate_data"]["combo"], + } + if result[x]["simulate_data"]["type"] == "SUPER" + else result[x]["simulate_data"]["regular"] + ) + for x in range(len(result)) + if result[x]["is_stats"] is not None + } + expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"]) + + list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None] + is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True) + is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"] + if detailed_tests_view: + cols = ["limit", "result", "value"] + is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records") + is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index() + else: + is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index() + + alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id") + alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id") + alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()]) + alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower() + if clickable_alpha_id: + return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))}) + return alpha_stats + + +def concat_pnl(result: list) -> pd.DataFrame: + """ + Combine PnL results from multiple alphas into a single DataFrame. + + Args: + result (list): A list of dictionaries containing simulation results with PnL data. + + Returns: + pandas.DataFrame: A DataFrame containing combined PnL data for all alphas. + """ + list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None] + pnls_df = pd.concat(list_of_pnls).reset_index() + + return pnls_df + + +def concat_is_tests(result: list) -> pd.DataFrame: + """ + Combine in-sample test results from multiple alphas into a single DataFrame. + + Args: + result (list): A list of dictionaries containing simulation results with in-sample test data. + + Returns: + pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas. + """ + is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None] + is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True) + return is_tests_df + + +def save_simulation_result(result: dict) -> None: + """ + Save the simulation result to a JSON file in the 'simulation_results' folder. + + Args: + result (dict): A dictionary containing the simulation result for an alpha. + """ + + alpha_id = result["id"] + region = result["settings"]["region"] + folder_path = "simulation_results/" + file_path = os.path.join(folder_path, f"{alpha_id}_{region}") + + os.makedirs(folder_path, exist_ok=True) + + with open(file_path, "w", encoding="utf-8") as file: + json.dump(result, file) + + +def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None: + """ + Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder. + + Args: + pnl_df (pandas.DataFrame): The DataFrame containing PnL data. + alpha_id (str): The ID of the alpha. + region (str): The region for which the PnL data was generated. + """ + + folder_path = "alphas_pnl/" + file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv") + os.makedirs(folder_path, exist_ok=True) + + pnl_df.to_csv(file_path) + + +def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str): + """ + Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder. + + Args: + yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics. + alpha_id (str): The ID of the alpha. + region (str): The region for which the statistics were generated. + """ + + folder_path = "yearly_stats/" + file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv") + os.makedirs(folder_path, exist_ok=True) + + yearly_stats.to_csv(file_path, index=False) + + +def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame: + """ + Expand dictionary columns in a DataFrame into separate columns. + + Args: + data (pandas.DataFrame): The input DataFrame with dictionary columns. + + Returns: + pandas.DataFrame: A new DataFrame with expanded columns. + """ + dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns)) + new_columns = pd.concat( + [data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns], + axis=1, + ) + + data = pd.concat([data, new_columns], axis=1) + return data diff --git a/simple72/Tranformer/output/Alpha_candidates.json b/simple72/Tranformer/output/Alpha_candidates.json new file mode 100644 index 0000000..6746f6d --- /dev/null +++ b/simple72/Tranformer/output/Alpha_candidates.json @@ -0,0 +1,112 @@ +{ + "ts_zscore(divide(avg_pct_change_estimate_12m_earnings_7d, add(count_analysts_lower_curr_qtr_earnings_30d, 0.0001)), 126)": { + "template_explanation": "This template applies a 126-day rolling z-score normalization to the original earnings confidence ratio. By standardizing the signal relative to its own historical distribution, it captures whether current earnings optimism (vs. near-term pessimism) is unusually strong or weak compared to historical norms, enabling mean-reversion or momentum trading around historical equilibrium points.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "placeholder_candidates": {} + }, + "group_zscore(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), industry)": { + "template_explanation": "This template calculates the 66-day mean of the 12-month earnings estimate change and then performs industry-relative z-score normalization. It extracts pure earnings momentum by removing sector-wide trends, identifying stocks within each industry that have stronger or weaker earnings revisions than their peer group average.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "placeholder_candidates": {} + }, + "ts_decay_linear(avg_pct_change_estimate_12m_earnings_7d, 20)": { + "template_explanation": "This template applies exponential decay weighting to the 12-month earnings estimate changes over a 20-day window. Recent earnings revisions receive higher weight than older ones, creating a smoothed momentum signal that responds quickly to new information while filtering out short-term noise\u2014a refined version focusing purely on the numerator's forward-looking signal.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "placeholder_candidates": {} + }, + "regression_neut(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), log(cap))": { + "template_explanation": "This template removes the market cap factor exposure from the earnings momentum signal using regression neutralization. By stripping out size bias (larger companies may have more analyst coverage and different revision patterns), this alpha isolates the pure earnings-specific component, reducing unintended factor tilts.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "placeholder_candidates": {} + }, + "divide(ts_rank(avg_pct_change_estimate_12m_earnings_7d, 252), add(ts_rank(count_analysts_lower_curr_qtr_earnings_30d, 126), 0.1))": { + "template_explanation": "This template converts both earnings estimate change and analyst cut counts into percentile ranks before taking their ratio. The 252-day rank for earnings captures long-term earnings momentum, while the 126-day rank for analyst cuts captures recent bearishness. Ranking before division creates a more robust, distribution-invariant signal that is comparable across different market regimes.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "IND", + "universe": "TOP500", + "delay": 1, + "decay": 6, + "neutralization": "SLOW_AND_FAST", + "truncation": 0.02, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "maxPosition": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2014-01-01", + "endDate": "2023-12-31" + }, + "placeholder_candidates": {} + } +} \ No newline at end of file diff --git a/simple72/Tranformer/output/Alpha_candidates_示例.json b/simple72/Tranformer/output/Alpha_candidates_示例.json new file mode 100755 index 0000000..6055565 --- /dev/null +++ b/simple72/Tranformer/output/Alpha_candidates_示例.json @@ -0,0 +1,654 @@ +{ + "group_rank(ts_mean(ts_delta(ts_backfill(, ), ), ) / ts_std_dev(ts_delta(ts_backfill(, ), ), ), )": { + "template_explanation": "Generalized risk-adjusted fundamental momentum: replaces EPS with any quarterly fundamental, keeps the Sharpe-like score and intra-group ranking to isolate smooth, persistent trends while neutralizing structural differences across groups.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "GLB", + "universe": "TOPDIV3000", + "delay": 1, + "decay": 0, + "neutralization": "STATISTICAL", + "truncation": 0.01, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2013-01-20", + "endDate": "2023-01-20" + }, + "placeholder_candidates": { + "": { + "type": "data_field", + "candidates": [ + { + "id": "fnd23_icsm_m_vers", + "description": "[Quarterly] Revenue" + }, + { + "id": "fnd72_s_pit_or_is_q_net_income", + "description": "Net Income" + }, + { + "id": "mdl219_1_ocfmargin", + "description": "Operating cash flow margin calculated as operating cash flow divided by sales." + }, + { + "id": "fnd23_tangible_bvps", + "description": "tangible book value per share." + }, + { + "id": "ebitda_per_share_trailing_twelve_months", + "description": "EBITDA per share for the trailing twelve months." + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 252 + }, + { + "value": 504 + }, + { + "value": 756 + }, + { + "value": 1008 + }, + { + "value": 1260 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 42 + }, + { + "value": 63 + }, + { + "value": 84 + }, + { + "value": 126 + }, + { + "value": 252 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 20 + }, + { + "value": 40 + }, + { + "value": 60 + }, + { + "value": 80 + }, + { + "value": 120 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 20 + }, + { + "value": 40 + }, + { + "value": 60 + }, + { + "value": 120 + }, + { + "value": 252 + } + ] + }, + "": { + "type": "group_data_field", + "candidates": [ + { + "name": "industry" + }, + { + "name": "sector" + }, + { + "name": "subindustry" + } + ] + } + } + }, + "group_rank(ts_mean(ts_delta(, ), ) / ts_std_dev(ts_delta(, ), ), )": { + "template_explanation": "Price-momentum version of the seed: uses returns instead of EPS change, still rewards consistent momentum within each sector/industry, producing a cleaner trend signal that is not distorted by industry-wide volatility regimes.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "GLB", + "universe": "TOPDIV3000", + "delay": 1, + "decay": 0, + "neutralization": "STATISTICAL", + "truncation": 0.01, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2013-01-20", + "endDate": "2023-01-20" + }, + "placeholder_candidates": { + "": { + "type": "data_field", + "candidates": [ + { + "id": "close", + "description": "Daily close price" + }, + { + "id": "open", + "description": "Daily open price" + }, + { + "id": "vwap", + "description": "Daily volume weighted average price" + }, + { + "id": "oth463_adjust_close", + "description": "Adjusted close price" + }, + { + "id": "high", + "description": "Daily high price" + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 42 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 42 + }, + { + "value": 63 + }, + { + "value": 126 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 20 + }, + { + "value": 40 + }, + { + "value": 60 + }, + { + "value": 80 + }, + { + "value": 120 + } + ] + }, + "": { + "type": "group_data_field", + "candidates": [ + { + "name": "industry" + }, + { + "name": "subindustry" + }, + { + "name": "sector" + } + ] + } + } + }, + "group_rank(( - ts_delay(, )) / ts_std_dev( - ts_delay(, ), ), )": { + "template_explanation": "SUE-style surprise on any quarterly metric: standardizes the YoY (or QoQ) jump by its own historical volatility, then ranks within peer groups to highlight genuine outliers while canceling systematic industry drift.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "GLB", + "universe": "TOPDIV3000", + "delay": 1, + "decay": 0, + "neutralization": "STATISTICAL", + "truncation": 0.01, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2013-01-20", + "endDate": "2023-01-20" + }, + "placeholder_candidates": { + "": { + "type": "data_field", + "candidates": [ + { + "id": "fnd72_s_pit_or_is_q_net_income", + "description": "Net Income" + }, + { + "id": "star_rev_surprise_prediction_fy2", + "description": "Revenue predicted surprise pct - forward 2 years" + }, + { + "id": "star_eps_smart_estimate_12m", + "description": "SmartEstimate F12M EPS" + }, + { + "id": "ebitda_per_share_trailing_twelve_months", + "description": "EBITDA per share for the trailing twelve months." + }, + { + "id": "mdl219_1_ocfmargin", + "description": "Operating cash flow margin calculated as operating cash flow divided by sales." + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 1 + }, + { + "value": 4 + }, + { + "value": 8 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 8 + }, + { + "value": 12 + }, + { + "value": 16 + }, + { + "value": 20 + } + ] + }, + "": { + "type": "group_data_field", + "candidates": [ + { + "name": "industry" + }, + { + "name": "sector" + }, + { + "name": "subindustry" + } + ] + } + } + }, + "group_rank(ts_mean(ts_delta(ts_backfill(, ), ), ) / ts_std_dev(ts_delta(ts_backfill(, ), ), ), )": { + "template_explanation": "Alternative-data momentum clone: applies the same risk-adjusted change logic to non-traditional data (web-traffic, sentiment, etc.), yielding a smooth, comparable signal that is industry-neutral and robust to data gaps.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "GLB", + "universe": "TOPDIV3000", + "delay": 1, + "decay": 0, + "neutralization": "STATISTICAL", + "truncation": 0.01, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2013-01-20", + "endDate": "2023-01-20" + }, + "placeholder_candidates": { + "": { + "type": "data_field", + "candidates": [ + { + "id": "fnd23_intfvalld1_qkim", + "description": "Traffic acquisition costs (TAC) represent the cost for a company to acquire traffic to their site." + }, + { + "id": "snt22pos_min_377", + "description": "minimum positive sentiment score." + }, + { + "id": "mdl106_tre", + "description": "earnings revenue trend" + }, + { + "id": "fnd23_intfvalld1_liim", + "description": "Click Rate, Total - %. It shows the frequency an advertisement downloaded with a webpage is clicked on. This is obtained using the number of clicks on an ad on a webpage, divided by the total number of times that the ad was downloaded with a page." + }, + { + "id": "headline_sentiment_value", + "description": "Aggregate sentiment value computed for the news headline." + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 14 + }, + { + "value": 30 + }, + { + "value": 60 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 42 + }, + { + "value": 63 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 42 + }, + { + "value": 63 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 20 + }, + { + "value": 40 + }, + { + "value": 60 + } + ] + }, + "": { + "type": "group_data_field", + "candidates": [ + { + "name": "industry" + }, + { + "name": "sector" + }, + { + "name": "subindustry" + } + ] + } + } + }, + "group_rank(ts_corr(ts_delta(ts_backfill(, ), ), ts_delta(, ), ), )": { + "template_explanation": "Earnings-to-price alignment factor: measures how tightly fundamental momentum co-moves with price momentum; high correlation within group flags firms where the market promptly rewards improving fundamentals, suggesting continued follow-through.", + "seed_alpha_settings": { + "instrumentType": "EQUITY", + "region": "GLB", + "universe": "TOPDIV3000", + "delay": 1, + "decay": 0, + "neutralization": "STATISTICAL", + "truncation": 0.01, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "ON", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2013-01-20", + "endDate": "2023-01-20" + }, + "placeholder_candidates": { + "": { + "type": "data_field", + "candidates": [ + { + "id": "ern3_all_delay_1_next_reptime", + "description": "next report time" + }, + { + "id": "star_rev_surprise_prediction_fy2", + "description": "Revenue predicted surprise pct - forward 2 years" + }, + { + "id": "fnd72_s_pit_or_is_q_net_income", + "description": "Net Income" + }, + { + "id": "ebitda_per_share_trailing_twelve_months", + "description": "EBITDA per share for the trailing twelve months." + }, + { + "id": "fnd7_ointfund_qfcnif", + "description": "Quarterly Fundamental Item: Financing Activities -> Net Cash Flow (Statement of Cash Flows)" + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 1 + }, + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 63 + }, + { + "value": 126 + } + ] + }, + "": { + "type": "data_field", + "candidates": [ + { + "id": "close", + "description": "Daily close price" + }, + { + "id": "pv37_volume_13", + "description": "Trading volume" + }, + { + "id": "srp_average_score", + "description": "Average score" + }, + { + "id": "pv37_open_13", + "description": "Open price" + }, + { + "id": "open", + "description": "Daily open price" + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 5 + }, + { + "value": 10 + }, + { + "value": 21 + }, + { + "value": 42 + }, + { + "value": 63 + } + ] + }, + "": { + "type": "integer_parameter", + "candidates": [ + { + "value": 20 + }, + { + "value": 40 + }, + { + "value": 60 + }, + { + "value": 120 + }, + { + "value": 252 + } + ] + }, + "": { + "type": "group_data_field", + "candidates": [ + { + "name": "industry" + }, + { + "name": "subindustry" + }, + { + "name": "sector" + } + ] + } + } + } +} \ No newline at end of file diff --git a/simple72/Tranformer/output/Alpha_generated_expressions_error.json b/simple72/Tranformer/output/Alpha_generated_expressions_error.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/simple72/Tranformer/output/Alpha_generated_expressions_error.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/simple72/Tranformer/output/Alpha_generated_expressions_success.json b/simple72/Tranformer/output/Alpha_generated_expressions_success.json new file mode 100644 index 0000000..ba294fe --- /dev/null +++ b/simple72/Tranformer/output/Alpha_generated_expressions_success.json @@ -0,0 +1,7 @@ +[ + "divide(ts_rank(avg_pct_change_estimate_12m_earnings_7d, 252), add(ts_rank(count_analysts_lower_curr_qtr_earnings_30d, 126), 0.1))", + "regression_neut(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), log(cap))", + "ts_decay_linear(avg_pct_change_estimate_12m_earnings_7d, 20)", + "ts_zscore(divide(avg_pct_change_estimate_12m_earnings_7d, add(count_analysts_lower_curr_qtr_earnings_30d, 0.0001)), 126)", + "group_zscore(ts_mean(avg_pct_change_estimate_12m_earnings_7d, 66), industry)" +] \ No newline at end of file diff --git a/simple72/Tranformer/parsetab.py b/simple72/Tranformer/parsetab.py new file mode 100755 index 0000000..2ffb599 --- /dev/null +++ b/simple72/Tranformer/parsetab.py @@ -0,0 +1,60 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'ASSIGN BOOLEAN CATEGORY COMMA DIVIDE EQUAL FIELD FUNCTION GREATER GREATEREQUAL IDENTIFIER LESS LESSEQUAL LPAREN MINUS NOTEQUAL NUMBER PLUS RPAREN STRING TIMESexpression : comparison\n | expression EQUAL comparison\n | expression NOTEQUAL comparison\n | expression GREATER comparison\n | expression LESS comparison\n | expression GREATEREQUAL comparison\n | expression LESSEQUAL comparisoncomparison : term\n | comparison PLUS term\n | comparison MINUS termterm : factor\n | term TIMES factor\n | term DIVIDE factorfactor : NUMBER\n | STRING\n | FIELD\n | CATEGORY\n | IDENTIFIER\n | BOOLEAN\n | MINUS factor\n | LPAREN expression RPAREN\n | function_callfunction_call : FUNCTION LPAREN args RPARENargs : arg_list\n | emptyarg_list : arg\n | arg_list COMMA argarg : expression\n | IDENTIFIER ASSIGN expressionempty :' + +_lr_action_items = {'NUMBER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'STRING':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'FIELD':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'CATEGORY':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'IDENTIFIER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[10,10,10,10,10,10,10,10,10,10,10,10,10,44,44,10,]),'BOOLEAN':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'MINUS':([0,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,38,44,45,46,47,],[4,22,-8,4,-11,-14,-15,-16,-17,-18,-19,4,-22,4,4,4,4,4,4,4,4,4,4,-20,4,22,22,22,22,22,22,-9,-10,-12,-13,-21,-18,-23,4,4,]),'LPAREN':([0,4,12,14,15,16,17,18,19,20,21,22,23,24,27,46,47,],[12,12,12,27,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'FUNCTION':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'$end':([1,2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,45,],[0,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,-23,]),'EQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[15,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,15,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,15,-18,-23,15,]),'NOTEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[16,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,16,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,16,-18,-23,16,]),'GREATER':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[17,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,17,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,17,-18,-23,17,]),'LESS':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[18,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,18,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,18,-18,-23,18,]),'GREATEREQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[19,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,19,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,19,-18,-23,19,]),'LESSEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[20,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,20,-18,-23,20,]),'RPAREN':([2,3,5,6,7,8,9,10,11,13,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,38,-30,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,45,-24,-25,-26,-28,-18,-23,-27,-29,]),'COMMA':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,40,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,46,-26,-28,-18,-23,-27,-29,]),'PLUS':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,44,45,],[21,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,21,21,21,21,21,21,-9,-10,-12,-13,-21,-18,-23,]),'TIMES':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[23,-11,-14,-15,-16,-17,-18,-19,-22,-20,23,23,-12,-13,-21,-18,-23,]),'DIVIDE':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[24,-11,-14,-15,-16,-17,-18,-19,-22,-20,24,24,-12,-13,-21,-18,-23,]),'ASSIGN':([44,],[47,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'expression':([0,12,27,46,47,],[1,26,43,43,49,]),'comparison':([0,12,15,16,17,18,19,20,27,46,47,],[2,2,28,29,30,31,32,33,2,2,2,]),'term':([0,12,15,16,17,18,19,20,21,22,27,46,47,],[3,3,3,3,3,3,3,3,34,35,3,3,3,]),'factor':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[5,25,5,5,5,5,5,5,5,5,5,36,37,5,5,5,]),'function_call':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'args':([27,],[39,]),'arg_list':([27,],[40,]),'empty':([27,],[41,]),'arg':([27,46,],[42,48,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> expression","S'",1,None,None,None), + ('expression -> comparison','expression',1,'p_expression','validator.py',405), + ('expression -> expression EQUAL comparison','expression',3,'p_expression','validator.py',406), + ('expression -> expression NOTEQUAL comparison','expression',3,'p_expression','validator.py',407), + ('expression -> expression GREATER comparison','expression',3,'p_expression','validator.py',408), + ('expression -> expression LESS comparison','expression',3,'p_expression','validator.py',409), + ('expression -> expression GREATEREQUAL comparison','expression',3,'p_expression','validator.py',410), + ('expression -> expression LESSEQUAL comparison','expression',3,'p_expression','validator.py',411), + ('comparison -> term','comparison',1,'p_comparison','validator.py',418), + ('comparison -> comparison PLUS term','comparison',3,'p_comparison','validator.py',419), + ('comparison -> comparison MINUS term','comparison',3,'p_comparison','validator.py',420), + ('term -> factor','term',1,'p_term','validator.py',427), + ('term -> term TIMES factor','term',3,'p_term','validator.py',428), + ('term -> term DIVIDE factor','term',3,'p_term','validator.py',429), + ('factor -> NUMBER','factor',1,'p_factor','validator.py',436), + ('factor -> STRING','factor',1,'p_factor','validator.py',437), + ('factor -> FIELD','factor',1,'p_factor','validator.py',438), + ('factor -> CATEGORY','factor',1,'p_factor','validator.py',439), + ('factor -> IDENTIFIER','factor',1,'p_factor','validator.py',440), + ('factor -> BOOLEAN','factor',1,'p_factor','validator.py',441), + ('factor -> MINUS factor','factor',2,'p_factor','validator.py',442), + ('factor -> LPAREN expression RPAREN','factor',3,'p_factor','validator.py',443), + ('factor -> function_call','factor',1,'p_factor','validator.py',444), + ('function_call -> FUNCTION LPAREN args RPAREN','function_call',4,'p_function_call','validator.py',472), + ('args -> arg_list','args',1,'p_args','validator.py',476), + ('args -> empty','args',1,'p_args','validator.py',477), + ('arg_list -> arg','arg_list',1,'p_arg_list','validator.py',484), + ('arg_list -> arg_list COMMA arg','arg_list',3,'p_arg_list','validator.py',485), + ('arg -> expression','arg',1,'p_arg','validator.py',492), + ('arg -> IDENTIFIER ASSIGN expression','arg',3,'p_arg','validator.py',493), + ('empty -> ','empty',0,'p_empty','validator.py',500), +] diff --git a/simple72/Tranformer/template_summary.md b/simple72/Tranformer/template_summary.md new file mode 100644 index 0000000..35db50a --- /dev/null +++ b/simple72/Tranformer/template_summary.md @@ -0,0 +1,3182 @@ +# BRAIN论坛Alpha模板精华总结 + +本文档旨在系统性地整理和总结优秀Alpha模板,它是一种可复用的标准化框架性表达式,它承载着特定的经济逻辑,并预留出若干 “配置项”(包括数据字段、算子、分组方式、衰减规则、中性化方案等),用于生成多个候选阿尔法因子。其典型流程为:数据清洗(数据回填、缩尾处理)→ 跨时间或跨标的维度进行转换 / 对比 → 排序 / 中性化处理 →(可选步骤)衰减调整 / 换手率优化。这种模板模式能够推动系统化的因子挖掘、复用与多元化配置,同时确保每一个因子都具备清晰可追溯的经济逻辑支撑。 +以下每个模板都附有其核心思想、变量说明、适用场景及原帖链接,方便您理解、应用和进一步探索。 +使用时请思考如何将下列模板与有的Alpha表达式结合,创造出新的模板来捕捉和发现市场规律,找到”好“公司和”坏“公司 +**使用前请注意:** +* **过拟合风险**:部分模板可能存在过拟合风险,请谨慎使用,并结合IS-Ladder测试、多市场回测等方法进行验证。 +* **参数调整**:模板中的参数(如时间窗口、数据集字段)需要根据您的具体研究目标和数据特性进行调整。 +* **持续学习**:最好的模板是您自己创造的。希望本文档能激发您的灵感,而不是限制您的思维。 + +--- + +## From: Alpha Examples from Learn101 + +### Momentum after news +**Hypothesis**: After news is released, if a stock takes a longer time to rise, it may show strong evidence of upward momentum, and it could be beneficial to take a long position in it. +**Expression**: `ts_backfill(vec_avg(nws12_prez_4l),504)` +**Settings**: Region: USA, Universe: TOP500, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **时序相对性 (Step 4)**: 这是一个典型的时序信号。`ts_backfill` 的使用暗示了新闻数据是稀疏的(Step 4.2.4),需要填补空白以维持信号连续性。 +* **算子深意**: `vec_avg` 用于聚合多维新闻向量,提取核心情绪/强度;`ts_backfill` 确保在无新闻日也能维持上一次的观点,直到新消息到来。 +**优化方向**: +* **去噪 (Step 0)**: 新闻情绪可能存在极端噪音,建议在 `vec_avg` 后增加 `winsorize` 或 `rank`。 +* **从属信号 (Subordinate)**: 叠加 `Social Media Effect`。若新闻情绪好但社媒热度低(噪音少),则放大权重;若社媒过热,可能反转。 +* **门限交易 (Step 5)**: 仅在新闻情绪显著偏离均值时交易,如 `trade_when(abs(zscore(news)) > 1.5, ...)`。 + +### Pretax Income +**Hypothesis**: Pretax income is a good measure of a company's financial health and profitability. +**Expression**: `quantile(ts_rank(pretax_income,250))` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 4, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON +**逻辑链深度解析**: +* **时序相对性 (Step 4)**: `ts_rank(..., 250)` 比较当前收入与过去一年的水平,寻找“自身改善”而非“绝对高收入”。 +* **分布重塑 (Step 0)**: `quantile` 强制将信号拉伸为均匀分布,避免了极值影响,只关注相对排序。 +**优化方向**: +* **区间优化 (Step 2)**: 收入微弱变化可能只是噪音。可改用 `ts_zscore` 并只在 >1 或 <-1 时交易。 +* **从属信号**: 引入 `market_cap`。大市值的收入创新高可能比小市值更稳健(质量溢价)。 + +### Operating Earnings Yield +**Hypothesis**: If the operating income of a company is currently higher than its past 1 year history, buy the company's stock and vice-versa. +**Expression**: `ts_rank(operating_income,252)` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SUBINDUSTRY, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **时序相对性 (Step 4)**: 纯粹的时序动量逻辑。`ts_rank` 将当前值映射到历史分位,捕捉“业绩改善”趋势。 +**优化方向**: +* **组内比较 (Step 3)**: 考虑行业周期性。先做 `group_zscore(operating_income, industry)` 再做 `ts_rank`,剔除行业景气度影响,只看个股相对行业的改善。 +* **门限 (Step 5)**: `trade_when(ts_rank > 0.8, ...)` 只做多业绩显著改善的股票。 + +### Appreciation of liabilities +**Hypothesis**: An increase in the fair value of liabilities could indicate a higher cost than expected. +**Expression**: `-ts_rank(fn_liab_fair_val_l1_a,252)` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SUBINDUSTRY, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **反向信号**: 负号 `-` 表示这是一个反向指标(负债增加是坏事)。 +* **时序相对性**: 同样基于 `ts_rank`,关注负债相对于自身历史的增长速度。 +**优化方向**: +* **去噪**: 负债数据可能存在跳变,建议先 `winsorize`。 +* **从属信号**: 结合 `cash_flow`。若负债增加但现金流同时也大幅增加(良性杠杆),则不应做空。 + +### Deferred Revenue +**Hypothesis**: Firms with high deferred revenue will surprise the market in the future when the deferred revenue is recognized. +**Expression**: `ts_backfill(fnd6_drc, 252)/assets` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SECTOR, Truncation: 1, Pasteurization: ON +**逻辑链深度解析**: +* **截面比较 (Step 3)**: 除以 `assets` 是为了标准化(Size Adjustment),使其在截面上可比。 +* **数据填补 (Step 0)**: `ts_backfill` 处理财报数据的低频更新特性。 +**优化方向**: +* **行业中性 (Step 3)**: 递延收入在软件/服务业常见,在制造业少见。必须做 `group_zscore(..., sector)` 或 `neutralize`,否则只是在做多特定行业。 +* **时序变化 (Step 4)**: 关注递延收入的 *增长率* `ts_delta`,而不仅仅是绝对值。 + +### Reducing debt +**Hypothesis**: Take a long position in companies whose debt has decreased compared to the past. +**Expression**: `-ts_quantile(debt, 126)` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON +**逻辑链深度解析**: +* **时序相对性**: `ts_quantile` 与 `ts_rank` 类似,捕捉债务下降趋势。 +**优化方向**: +* **从属信号**: 结合 `interest_coverage` (利息保障倍数)。只有在偿债能力弱的公司中,债务减少才最重要(困境反转逻辑)。 + +### Power of leverage +**Hypothesis**: Companies with high liability-to-asset ratios often leverage debt as a strategic tool. +**Expression**: `liabilities/assets` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: MARKET, Truncation: 0.01, Pasteurization: ON +**逻辑链深度解析**: +* **截面比较 (Step 3)**: 这是一个经典的截面因子(杠杆率)。 +**优化方向**: +* **非线性 (Step 1)**: 杠杆通常是倒U型关系(适度杠杆好,过高杠杆坏)。考虑使用 `bucket` 分段,或 `trade_when` 剔除极端高杠杆。 +* **行业中性**: 银行/地产杠杆天生高,必须行业中性化。 + +## From: Alpha Examples from Learn102 + +### Social Media Effect +**Hypothesis**: Poorly performing stocks are discussed more in general on social media platforms. +**Expression**: `-scl12_buzz` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.01, Pasteurization: ON +**逻辑链深度解析**: +* **反向指标**: 负号暗示“关注度高=坏事”(可能是负面新闻缠身)。 +* **原始信号**: 直接使用 `buzz`,假设线性关系。 +**优化方向**: +* **去噪 (Step 0)**: 社媒数据极值多,必须 `log` 或 `winsorize`。 +* **从属信号**: 结合 `sentiment`。若关注度高且情感为正,可能是好事;关注度高且情感负,才是做空机会。 +* **门限**: `trade_when(rank(buzz) > 0.9, ...)` 只在极度热门时做空。 + +### Valuation Disconnect Swing Short +**Hypothesis**: A stock with high momentum and value score correlation suggests a disconnect between the stock's price and its intrinsic value. +**Expression**: `-ts_corr(ts_backfill(fscore_momentum,66),ts_backfill(fscore_value,66),756)` +**Settings**: Region: USA, Universe: TOP200, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **高阶统计量**: 使用 `ts_corr` 捕捉两个因子之间的动态关系,而非因子本身。 +* **逻辑**: 动量与价值相关性高,意味着价格脱离基本面(泡沫),因此做空(负号)。 +**优化方向**: +* **窗口调整**: 756天(3年)非常长,捕捉的是长期结构变化。可尝试短窗口(如126天)捕捉短期背离。 + +### Network Dependence +**Hypothesis**: Long stocks of companies whose hub score of customers are low over the past two years. +**Expression**: `-ts_mean(pv13_ustomergraphrank_hub_rank,504)` +**Settings**: Region: USA, Universe: TOP1000, Delay: 1, Decay: 0, Neutralization: INDUSTRY, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **供应链逻辑**: 客户集中度/中心度过高可能意味着风险(依赖大客户)。 +* **平滑 (Step 4)**: `ts_mean(..., 504)` 说明这是一个非常慢的变量,关注长期结构。 +**优化方向**: +* **从属信号**: 结合 `volatility`。高依赖度+高波动 = 极度危险。 + +## From: Alpha Examples from Learn103 + +### News-driven Volatility +**Hypothesis**: Stocks of companies that face high differences in their prices after any news release can be subject to varying sentiments. +**Expression**: `(ts_arg_max(ts_backfill(news_session_range, 20), 60))` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 0, Neutralization: SECTOR, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **事件驱动 (Step 4.2.3)**: `ts_arg_max` 寻找过去60天内波动最大的那一天(新闻日)。 +* **算子深意**: 这不是直接用波动率,而是用“最大波动发生的时间距离”作为信号。 +**优化方向**: +* **衰减逻辑**: 结合 `days_from_last_change` 或 `exp_decay`,让信号随时间减弱。 +* **从属信号**: 叠加 `IV Skew`。若波动大且 Skew 偏空,做空;若 Skew 偏多,做多。 + +### Implied Volatility Spread as a predictor +**Hypothesis**: If the Call Open interest is higher than the Put Open interest, the stock may rise based on the intensity of the implied volatility spread. +**Expression**: `trade_when(pcr_oi_270 < 1, (implied_volatility_call_270-implied_volatility_put_270), -1)` +**Settings**: Region: USA, Universe: TOP3000, Delay: 1, Decay: 4, Neutralization: MARKET, Truncation: 0.08, Pasteurization: ON +**逻辑链深度解析**: +* **门限交易 (Step 5)**: `trade_when(pcr_oi < 1, ...)` 是典型的门禁逻辑。只有在看涨持仓量大于看跌时(情绪偏多),才使用 IV Spread 信号。 +* **条件分支**: 不满足条件时给 `-1`(做空),这是一个激进的二元策略。 +**优化方向**: +* **平滑**: IV 数据跳动大,建议对 Spread 做 `ts_mean` 或 `ts_decay_linear`。 + +## 《151 Trading Strategies》论文精华模板 + +本部分总结自Zura Kakushadze与Juan Andrés Serur合著的《151 Trading Strategies》一文,重点提炼其中适用于BRAIN平台的股票类策略,并将其泛化为可复用的Alpha模板。 + +--- + +### 1. 风险调整后动量模板 (Risk-Adjusted Momentum) + +* **模板表达式**: `ts_mean(ts_delay(returns, ), ) / ts_std_dev(ts_delay(returns, ), )` +* **核心思想**: 这是对经典动量因子的改进。它计算的是过去一段时间(lookback_period)的"时序夏普比率",即收益均值除以收益波动。同时,`ts_delay`跳过了最近一段时间(skip_period,通常为21天/1个月)的数据,以规避短期反转效应的干扰。该因子旨在寻找那些"高质量"的、持续且平稳的动量。 +* **变量说明**: + * ``: 跳过的近期交易日数,如 `21`。 + * ``: 计算动量的回看窗口,如 `252`。 +* **适用场景**: 通用性强,适用于构建稳健的动量类Alpha。 +* **逻辑链深度解析**: + * **时序标准化 (Step 4)**: 分子是收益均值,分母是波动率。本质是 Rolling Sharpe Ratio。 + * **去噪 (Step 0)**: `ts_delay` 跳过最近一个月,剔除了短期反转(Short-term Reversal)噪音,只保留中长期动量。 +* **优化方向**: + * **从属信号**: 叠加 `turnover`。在低换手率时,动量更可靠(量价配合)。 + * **残差化**: 先对 returns 做 `regression_neut` 剔除大盘影响,计算纯特异性动量。 +* **适配自**: Section 3.1, "Price-momentum", `Rrisk.adj` + +### 2. 标准化盈利超预期模板 (SUE - Standardized Unexpected Earnings) + +* **模板表达式**: `(fnd_eps_q - ts_delay(fnd_eps_q, 4)) / ts_std_dev(fnd_eps_q - ts_delay(fnd_eps_q, 4), 8)` +* **核心思想**: 捕捉超预期的盈利增长。它计算的是最新一季的EPS相较于去年同期的增量,并用该增量自身过去8个季度的波动性进行标准化。标准化后的值(SUE)越高,代表盈利惊喜越大,是经典的盈利动量因子。 +* **变量说明**: + * `fnd_eps_q`: 季度每股收益(EPS)字段。 +* **适用场景**: `Fundamental`(基本面)数据集,用于事件驱动型Alpha。 +* **逻辑链深度解析**: + * **季节性调整**: `ts_delay(..., 4)` 比较同比季度,消除季节性影响。 + * **波动率标准化 (Step 0)**: 除以过去8季度的波动,将“惊喜”转化为标准差单位(Z-Score),使其在不同波动率的公司间可比。 +* **优化方向**: + * **事件衰减 (Step 4)**: 叠加 `days_from_last_change`,让 SUE 信号随财报发布时间衰减。 + * **从属信号**: 叠加 `Analyst Revision`。若 SUE 高且分析师上调预期,信号更强。 +* **适配自**: Section 3.2, "Earnings-momentum", SUE + + +### 4. 隐含波动率偏斜动量模板 (Implied Volatility Skew Momentum) + +* **模板表达式**: `ts_delta(implied_volatility_call_, ) - ts_delta(implied_volatility_put_, )` +* **核心思想**: 捕捉市场情绪的变化。看涨期权IV的上升通常与乐观情绪相关,而看跌期权IV的上升则与悲观或避险情绪相关。该模板计算Call IV的变化量与Put IV变化量之差,旨在做多情绪改善、做空情绪恶化的股票。 +* **变量说明**: + * `implied_volatility_call_`: 不同期限的看涨期权隐含波动率。 + * `implied_volatility_put_`: 不同期限的看跌期权隐含波动率。 + * ``: 计算IV变化的时间窗口,如 `21` (月度变化)。 +* **适用场景**: `Option`(期权)数据集,用于捕捉短中期市场情绪变化。 +* **逻辑链深度解析**: + * **时序变化 (Step 4)**: 关注的是 IV 的 *变化* (`ts_delta`) 而非绝对值。 + * **情绪差**: Call IV 涨幅 > Put IV 涨幅 -> 情绪改善。 +* **优化方向**: + * **门限**: `trade_when(abs(skew_delta) > threshold, ...)` 只在情绪剧烈变化时交易。 + * **事件驱动**: 在财报前(IV 高企时)该策略可能失效,需用 `days_to_earnings` 过滤。 +* **适配自**: Section 3.5, "Implied volatility" + +### 5. 残差动量模板 (Residual Momentum) + +* **模板表达式**: `ts_mean(regression_neut(regression_neut(regression_neut(returns, ), ), ), )` +* **核心思想**: 提纯动量信号。传统动量可能包含了市场Beta、市值、价值等多种因子的敞口。此模板通过连续的中性化(例如依次对``, ``, ``执行`regression_neut`)剥离可被通用因子解释的部分,然后仅对无法被解释的"残差等价物"部分计算动量。 +* **变量说明**: + * ``, ``, ``: 市场通用因子,如 `mkt_beta`, `size_factor`, `value_factor`。 + * ``: 计算残差动量的时间窗口。 +* **适用场景**: 通用性强,是因子提纯、构建高质量Alpha的关键步骤。 +* **逻辑链深度解析**: + * **提纯 (Step 0)**: 通过连续 `regression_neut` 剥离 Beta、Size、Value 等风格暴露。 + * **时序动量**: 对剥离后的残差求 `ts_mean`。 +* **优化方向**: + * **加权**: 使用 `ts_decay_linear` 代替 `ts_mean`,给予近期残差更大权重。 + * **组内比较**: 在残差基础上再做 `group_rank`,寻找行业内最强特异动量。 +* **适配自**: Section 3.7, "Residual momentum" + +### 6. 风险加权回归均值回归模板 (Weighted Regression Mean-Reversion) + +* **模板表达式**: `reverse(regression_neut(multiply(returns, power(inverse(ts_std_dev(returns, )), 2)), ))` +* **核心思想**: 这是对标准行业中性化均值回归的增强。在对收益率进行行业中性化时,它为不同股票赋予了不同的权重。具体来说,它给历史波动率较低的股票更高的权重,认为这些股票的收益率数据更"可靠",在计算行业均值时应占更大比重。 +* **变量说明**: + * ``: 行业或分组的哑变量矩阵。 + * `weights`: 回归权重,通常是可靠性的度量,如 `1/variance`。 + * ``: 计算波动率的时间窗口。 +* **适用场景**: 适用于任何需要进行组内中性化或回归剥离的场景,尤其是当组内成员的信号质量或波动性差异较大时。 +* **逻辑链深度解析**: + * **加权最小二乘 (WLS)**: 使用 `1/variance` 作为权重,认为低波动的股票信息更可靠。 + * **均值回归**: `reverse` 捕捉残差的反转。 +* **优化方向**: + * **从属信号**: 引入 `liquidity` 权重。流动性好的股票回归更快。 +* **适配自**: Section 3.10, "Mean-reversion – weighted regression" + +### 7. 移动平均线交叉模板 (Moving Average Crossover) + +* **模板表达式**: `sign(ts_mean(, ) - ts_mean(, ))` +* **核心思想**: 经典的趋势跟踪策略。当短期均线上穿长期均线("金叉")时,表明短期趋势走强,产生买入信号。当短期均线下穿长期均线("死叉")时,表明趋势走弱,产生卖出信号。 +* **变量说明**: + * ``: `close`, `vwap` 等价格字段。 + * ``: 短期均线窗口,如 `10`, `20`。 + * ``: 长期均线窗口,如 `50`, `100`。 +* **适用场景**: 适用于趋势性较强的市场或资产。 +* **逻辑链深度解析**: + * **低通滤波**: MA 本质是滤除高频噪音。 + * **二元信号**: `sign` 输出 +1/-1,不包含强度信息。 +* **优化方向**: + * **连续化 (Step 1)**: 去掉 `sign`,直接使用差值并标准化 (`zscore`),保留强度信息。 + * **从属信号**: 结合 `ADX` (趋势强度指标)。只有在趋势强时才使用 MA 交叉。 +* **适配自**: Section 3.12, "Two moving averages" + + + +### 9. 渠道突破模板 (Channel Breakout) + +* **模板表达式**: `alpha = if_else(greater(close, ts_max(high, )), 1, if_else(less(close, ts_min(low, )), -1, 0)); reverse(alpha)` +* **核心思想**: 这是一个经典的反转策略。它定义了一个由过去N日最高价和最低价构成的价格渠道(Channel)。当价格向上突破渠道上轨时,认为市场过热,产生卖出信号(-1);当价格向下突破渠道下轨时,认为市场超卖,产生买入信号(+1)。 +* **变量说明**: + * ``: 定义渠道的时间窗口,如 `20`。 +* **适用场景**: 适用于有均值回归特性的市场或个股。 +* **逻辑链深度解析**: + * **区间突破 (Step 2)**: 典型的“只在尾部交易”逻辑。中间区间为 0。 + * **反转逻辑**: `reverse` 赌突破是假突破(False Breakout)。 +* **优化方向**: + * **顺势/逆势切换**: 结合 `volatility`。低波时做反转(假突破),高波时做顺势(真突破)。 +* **适配自**: Section 3.15, "Channel" + + +### 11. 价值因子基础模板 (Value Factor) + +* **模板表达式**: `group_rank( / )` +* **核心思想**: 经典的价值投资策略。它旨在买入账面价值相对于市场价值被低估的"价值股",并卖出被高估的"成长股"。最核心的衡量指标是账面市值比(Book-to-Price / Book-to-Market Ratio)。 +* **变量说明**: + * ``: 公司账面价值或每股净资产字段。 + * ``: 公司市值或收盘价字段。 +* **适用场景**: `Fundamental` (基本面) 数据集,作为构建多因子模型的基础因子之一。 +* **逻辑链深度解析**: + * **组内比较 (Step 3)**: 价值因子在不同行业间不可比(如科技 vs 银行),必须用 `group_rank`。 +* **优化方向**: + * **去噪**: 先 `winsorize` 再 `group_rank`。 + * **从属信号**: 叠加 `Quality` (ROE)。避免买入“价值陷阱”(便宜但烂的公司)。 +* **适配自**: Section 3.3, "Value" + + + +### 13. 配对交易均值回归框架 (Pairs Trading) + +* **模板表达式**: `signal_A = (close_A - close_B) - ts_mean(close_A - close_B, ); reverse(signal_A)` +* **核心思想**: 寻找历史上高度相关的两只股票(一个"配对"),当它们的价差(spread)偏离历史均值时进行套利。如果价差过大,则做空价高的股票、做多价低的股票,赌价差会回归。这是一个经典的统计套利和均值回归策略。 +* **变量说明**: + * `close_A`, `close_B`: 配对股票A和B的价格序列。 + * ``: 计算历史价差均值的时间窗口。 +* **适用场景**: 适用于同一行业内业务高度相似的公司,是构建市场中性策略的基础。 +* **逻辑链深度解析**: + * **协整关系**: 构造平稳序列 `Spread`。 + * **均值回归**: 赌 Spread 回归均值。 +* **优化方向**: + * **动态阈值**: 使用 `ts_std_dev(Spread)` 设定动态开仓线(如 2倍标准差)。 + * **止损**: 增加 `trade_when(abs(Spread) > 4*std, 0, ...)` 防止协整破裂。 +* **适配自**: Section 3.8, "Pairs trading" + +--- + +## 补充模板 + +### A. Analyst交叉分组打底(模板名:示例) +* **核心结构**: `financial_data = ts_backfill((), 60); gp = group_cartesian_product(country, industry); ((financial_data, gp), )` +* **思想**: 先对分析师字段做向量聚合(`vec_avg`、`vec_kurtosis`、`vec_ir`等),用`group_cartesian_product`构建国家×行业组合,再做组内标准化/中性化+时序处理,形成稳定的截面信号。 +* **变量要点**: `analyst_metric`覆盖`mdl26_*`、`star_arm_*`等Analyst/SmartEstimate场景;`vec_func`选择聚合方式;`group_operator`用于行业/国家组内的scale或neutralize;`ts_operator`用于时间平滑(`ts_mean`、`ts_zscore`等);`window`在20/60/90/200之间取值。 +* **适用场景**: 适合Analyst情感、预期修正类主题,想要跨国+行业分组的稳健截面信号。 +* **逻辑链深度解析**: + * **数据填补 (Step 0)**: 分析师数据稀疏,必须 `ts_backfill`。 + * **精细分组 (Step 3)**: `group_cartesian_product` 实现了“国家x行业”的精细化中性化,适合全球策略。 +* **优化方向**: + * **算子选择**: `vec_ir` (信息比率) 比 `vec_avg` 更能体现分析师的一致性。 + +### B. 双重中性化(模板名:双重中性化:以Analyst15为例) +* **核心结构**: 与上类似,先`ts_backfill(vec_func(Analyst15字段), 60)`,再按国家×行业分组,做组内中性化与时序处理。 +* **思想**: 针对`anl15_*`增长/估值/分红等字段,在截面层面做两次中性化(向量聚合后+组内处理),用于剥离共性行业/国家暴露。 +* **变量要点**: 数据集中`anl15_*`覆盖多期增长率、PE、估值、分红等;`vec_func`与`ts_operator`选择决定信号平滑度;窗口建议60–200以保证填补稳定。 +* **适用场景**: Analyst15预期修正、估值再定价类信号,需要同时消化国家+行业噪音的场景。 +* **逻辑链深度解析**: + * **多重剥离**: 彻底消除风格暴露,追求纯 Alpha。 +* **优化方向**: + * **顺序**: 先做行业中性,再做国家中性,通常更符合基本面逻辑。 + +### C. 组间比较(模板名:组间比较_GLB_topdiv) +* **核心结构**: 先在`country × `分组内对回填后的向量聚合结果做`ts_zscore`和`group_zscore`,再计算组均值/极值(`group_min/median/max/sum/count`),用`resid = (alpha, alpha_gpm)`求组间残差,最后再做组内+时序处理。 +* **思想**: 对同一层级(如行业/子行业/交易所)之间的相对强弱做剥离,得到“相对组均值”的残差信号,适合跨组对比的Alpha挖掘。 +* **变量要点**: `analyst_field`来源于`fnd8_*`基本面/现金流字段;`vec_op`可选`vec_max/avg/min`;`compare`可用`regression_neut`或`signed_power`提取残差;`t_window`取20/60/200/600,控制平滑与稳定性。 +* **适用场景**: GLB区域的分红/现金流因子(topdiv)在国家+行业框架下的相对价值比较,关注跨组差异的策略。 +* **逻辑链深度解析**: + * **相对价值**: 关注的是“我在我的组里是否优秀”,而不是“我绝对值多少”。 +* **优化方向**: + * **非线性**: 使用 `rank` 代替原始值计算残差,对异常值更鲁棒。 + +### D. 组间比较(Analyst15版,模板名:组间比较_glb_topdiv_anl15) +* **核心结构**: 与上一模板相同,但`analyst_field`替换为`anl15_*`系列的增长/估值/分红字段。 +* **思想**: 通过对Analyst15增长与估值预期的组间残差建模,捕捉行业/国家层面的相对高低估与预期修正。 +* **变量要点**: `group1`可选industry/subindustry/sector/exchange;`compare`与`group_stats`同上;`ts_op`和`group_op`用于残差后再标准化和时序平滑。 +* **适用场景**: 全球范围GLB,基于Analyst15预期数据的组间相对价值或动量信号。 +* **逻辑链深度解析**: + * **预期差**: 寻找行业内被分析师低估/高估的股票。 +* **优化方向**: + * **时序叠加**: 结合 `ts_delta`,寻找“行业内预期提升最快”的股票。 + +### E. 顾问分析示例(模板名:顾问分析示例) +* **核心结构**: `financial_data = ts_backfill(, 90); gp = industry; ((financial_data, gp), )` +* **思想**: 直接对`anl69_*`多字段做90日回填,行业组内标准化后再做时序平滑,生成简洁的行业中性信号。 +* **变量要点**: `mixdata`覆盖`anl69_*`的EPS/EBIT/现金分红/目标价/报告日期等;`ts_operator`可用`ts_zscore`、`ts_scale`、`ts_rank`等;`window`提供60/120/220/600可调节频率。 +* **适用场景**: Analyst69数据驱动的行业内预期跟踪、财报节奏/指引变化监控。 +* **逻辑链深度解析**: + * **标准流程**: 填补 -> 截面标准化 -> 时序平滑。这是构建稳健因子的标准三板斧。 +* **优化方向**: + * **事件驱动**: 在财报日前后缩短 `ts_mean` 的窗口,提高灵敏度。 + +--- + +## 新增模板(CAPM與估值、分析師期限、期權、搜尋優化) + +### 1. CAPM殘差模板(市場/行業中性收益) +* **表達式**: `ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0)`。 +* **核心思想**: 回歸剔除市場/行業暴露,保留超額收益殘差作為Alpha。 +* **適用場景**: 通用起手式,回歸殘差可作後續動量或價值信號的底板。 +* **優化**: 改`rettype=2`獲取beta斜率,用於風險排序或低/高beta組合;可加入`winsorize`、`ts_backfill`預處理。 + +### 2. CAPM廣義殘差(任意特徵) +* **表達式**: `data = winsorize(ts_backfill(,63), std=4); gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, gpm, 252, rettype=0)`。 +* **核心思想**: 將任意特徵去除組均值成分,提取行業相對的特異性部分。 +* **適用場景**: 基本面、情緒、替代數據的組內殘差提純。 +* **優化**: 先`group_zscore`再回歸;對`resid`再做`ts_zscore`或`ts_mean`平滑。 + +### 3. CAPM Beta排序模板 +* **表達式**: `target_data = winsorize(ts_backfill(,63), std=4); market_data = winsorize(ts_backfill(,63), std=4); beta = ts_regression(target_data, group_mean(market_data, log(ts_mean(cap,21)), sector), 252, rettype=2)`。 +* **核心思想**: 提取行業內相對beta,作為風險/防禦排序;低beta偏防禦,高beta偏進攻。 +* **優化**: 行業或國家分組;可按beta分桶做長低/短高,或反向用於高波段套利。 + +### 4. 實際-預估差異模板(Analyst Surprise) +* **表達式**: `group_zscore(subtract(group_zscore(, industry), group_zscore(, industry)), industry)`。 +* **核心思想**: 行業內標準化後的實際值與預估值差,捕捉超預期或低於預期的驚喜。 +* **適用場景**: analyst7/analyst14/earnings估值類字段。 +* **優化**: 對差分再做`ts_zscore`;門檻交易只在|z|>1.5時開倉。 + +### 5. 分析師期限結構模板(近遠期預估斜率) +* **表達式**: `group_zscore(subtract(group_zscore(anl14_mean_eps_, industry), group_zscore(anl14_mean_eps_, industry)), industry)`,`/`為fp1/fp2/fy1/fy2等。 +* **核心思想**: 比較短期與長期預估的行業內斜率,捕捉預期加速或鈍化。 +* **適用場景**: analyst14/15 期別字段;適用成長/拐點挖掘。 +* **優化**: 擴展到多期間差分或`ts_delta`跟蹤斜率變化;對斜率做`rank`或`winsorize`。 + +### 6. 期權Greeks淨值模板 +* **表達式**: `group_operator( - , )`,Greek可選Delta/Gamma/Vega/Theta。 +* **核心思想**: 同組內看多vs看空的期權敏感度差,反映隱含情緒或凸性差異。 +* **適用場景**: Option數據集;行業或市值分組下的情緒/波動信號。 +* **優化**: 多Greek加權組合;對淨值再`ts_mean`平滑;事件期(財報)可降權或過濾。 + +### 7. IV Skew動量擴展 +* **表達式**: `ts_delta(implied_volatility_call_,

) - ts_delta(implied_volatility_put_,

)`。 +* **核心思想**: Call與Put隱含波動變化差捕捉情緒轉折;可做多情緒改善、做空情緒惡化。 +* **優化**: 加`trade_when(abs(skew)>thr)`門檻;財報前後縮窗;行業中性。 + +### 8. 殘差動量精簡版 +* **表達式**: `res = regression_neut(returns, ); ts_mean(res, )`。 +* **核心思想**: 先剝離市場/風格暴露,再對特異收益做動量;較原版多重回歸更輕量。 +* **優化**: 使用`ts_decay_linear`增加近期權重;行業內`group_rank`提升截面穩定度。 + +### 9. 分紅/現金流組間殘差(簡版) +* **表達式**: `alpha = ts_zscore(ts_backfill(,90)); g = group_mean(alpha, , ); resid = alpha - g; group_zscore(resid, )`。 +* **核心思想**: 先回填平滑,再对組均值做殘差,捕捉組內相對高/低分紅或現金流質量。 +* **適用場景**: fnd8/fnd6/topdiv等分紅現金流字段;行業/國家分組。 +* **優化**: 權重可用log(cap)或vol逆;對resid再做`ts_mean`平滑。 + +--- + +## 模板格式说明 + +每个模板使用以下占位符格式: +- `` - 时间序列操作符,如 `ts_rank`, `ts_mean`, `ts_delta`, `ts_ir`, `ts_stddev`, `ts_zscore` +- `` - 分组操作符,如 `group_rank`, `group_neutralize`, `group_zscore` +- `` - 向量操作符,如 `vec_avg`, `vec_sum`, `vec_max`, `vec_min`, `vec_stddev` +- `` - 数据字段占位符 +- `` - 时间窗口参数,常用值: `{5, 22, 66, 126, 252, 504}` +- `` - 分组字段,如 `industry`, `sector`, `subindustry`, `market` + +--- + +## 第一部分:基础结构模板 (TPL-001 ~ TPL-010) + +### TPL-001: 基本面时序排名 +``` +模板: ((, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore`, `ts_delta`, `ts_ir` | 时序比较操作 | +| `` | `group_rank`, `group_zscore`, `group_neutralize` | 截面比较操作 | +| `` | 基本面字段: `eps`, `sales`, `assets`, `roe`, `roa` | 公司财务数据 | +| `` | `66`, `126`, `252` | 季度/半年/年 | +| `` | `industry`, `sector` | 行业分组 | + +**示例**: +``` +group_rank(ts_rank(eps, 252), industry) +group_zscore(ts_ir(sales, 126), sector) +``` + +--- + +### TPL-002: 利润/规模比率模板 +``` +模板: (/, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore`, `ts_mean`, `ts_delta` | 时序操作 | +| `` | `net_income`, `ebitda`, `operating_income`, `gross_profit` | 利润类字段 | +| `` | `assets`, `cap`, `sales`, `equity` | 规模类字段 | +| `` | `66`, `126`, `252` | 中长期窗口 | + +**示例**: +``` +ts_rank(net_income/assets, 252) +ts_zscore(ebitda/cap, 126) +ts_rank(operating_income/cap, 252)^2 +``` + +--- + +### TPL-003: 向量数据处理模板 (VECTOR字段必用) +``` +模板: ((), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_mean`, `ts_delta`, `ts_ir`, `ts_zscore` | 时序操作 | +| `` | `vec_avg`, `vec_sum`, `vec_max`, `vec_min`, `vec_stddev` | 向量聚合 | +| `` | 分析师数据: `anl4_*`, `analyst_*`, `oth41_*` | VECTOR类型字段 | +| `` | `22`, `66`, `126` | 短中期窗口 | + +**示例**: +``` +ts_delta(vec_avg(anl4_eps_mean), 22) +ts_rank(vec_sum(analyst_estimate), 66) +ts_ir(vec_avg(oth41_s_west_eps_ftm_chg_3m), 126) +``` + +--- + +### TPL-004: 双重中性化模板 +``` +模板: +a = (, ); +a1 = group_neutralize(a, bucket(rank(cap), range="")); +group_neutralize(a1, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_zscore`, `ts_rank`, `ts_ir` | 时序操作 | +| `` | 任意数据字段 | 主信号 | +| `` | `66`, `126`, `252` | 时间窗口 | +| `` | `"0.1,1,0.1"`, `"0,1,0.1"` | 市值分组范围 | +| `` | `industry`, `sector`, `subindustry` | 行业分组 | + +**示例**: +``` +a = ts_zscore(fnd72_s_pit_or_is_q_spe_si, 252); +a1 = group_neutralize(a, bucket(rank(cap), range="0.1,1,0.1")); +group_neutralize(a1, subindustry) +``` + +--- + +### TPL-005: 回归中性化模板 +``` +模板: +a = (, ); +a1 = group_neutralize(a, bucket(rank(cap), range="")); +a2 = group_neutralize(a1, ); +b = ts_zscore(cap, ); +b1 = group_neutralize(b, ); +regression_neut(a2, b1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_zscore`, `ts_rank` | 时序操作 | +| `` | 基本面或其他字段 | 主信号 | +| `` | `252`, `504` | 长期窗口 | +| `` | `"0.1,1,0.1"` | 市值分组 | +| `` | `subindustry`, `sector` | 行业分组 | + +--- + +### TPL-006: 基本面动量模板 +``` +模板: log(ts_mean(, )) - log(ts_mean(, )) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `anl4_{data}_{stats}`, 基本面字段 | 数据字段 | +| `` | `20`, `44` | 短期窗口 | +| `` | `44`, `126` | 长期窗口 | + +**示例**: +``` +log(ts_mean(anl4_eps_mean, 44)) - log(ts_mean(anl4_eps_mean, 20)) +``` + +--- + +### TPL-007: 财报事件驱动模板 +``` +模板: +event = ts_delta(, -1); +if_else(event != 0, , nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `assets`, `sales`, `eps` | 基本面字段 | +| `` | 主信号表达式 | 事件发生时的Alpha | + +**扩展版**: +``` +change = if_else(days_from_last_change() == , ts_delta(close, ), nan) +``` + +--- + +### TPL-008: 标准化回填模板 +``` +模板: (winsorize(ts_backfill(, ), std=), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_decay_linear`, `ts_zscore` | 时序操作 | +| `` | 低频数据字段 | 需要回填的字段 | +| `` | `115`, `120`, `180` | 回填窗口 | +| `` | `4`, `3`, `5` | winsorize标准差 | +| `` | `10`, `22`, `60` | 操作窗口 | + +**示例**: +``` +ts_decay_linear(-densify(zscore(winsorize(ts_backfill(anl4_adjusted_netincome_ft, 115), std=4))), 10) +ts_rank(winsorize(ts_backfill(, 120), std=4), 60) +``` + +--- + +### TPL-009: 信号质量分组模板 +``` +模板: +signal = (, ); +credit_quality = bucket(rank(ts_delay(signal, 1), rate=0), range=""); +group_neutralize((signal, k=), credit_quality) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore` | 信号计算 | +| `` | 任意数据字段 | 主字段 | +| `` | `60`, `120` | 窗口 | +| `` | `"0.2,1,0.2"` | 分组范围 | +| `` | `ts_weighted_decay` | 衰减操作 | +| `` | `0.5`, `0.3` | 衰减系数 | + +--- + +### TPL-010: 复合分组中性化 +``` +模板: group_neutralize(, densify()*1000 + densify()) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `subindustry`, `sector` | 主分组 | +| `` | `country`, `exchange` | 次分组 | + +--- + +## 第二部分:量价类模板 (TPL-101 ~ TPL-120) + +### TPL-101: 换手率反转 +``` +模板: -(volume/sharesout, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_mean`, `ts_rank`, `ts_std_dev` | 时序统计 | +| `` | `5`, `22`, `66` | 短中期窗口 | + +**示例**: +``` +-ts_mean(volume/sharesout, 22) +-ts_std_dev(volume/sharesout, 22) +``` + +--- + +### TPL-102: 量稳换手率 (STR) +``` +模板: -ts_std_dev(volume/sharesout, )/ts_mean(volume/sharesout, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `20`, `22` | 波动计算窗口 | +| `` | `20`, `22` | 均值计算窗口 | + +**优化版**: +``` +模板: -group_neutralize(ts_std_dev(volume/sharesout, )/ts_mean(volume/sharesout, ), bucket(rank(cap), range="0.1,1,0.1")) +``` + +--- + +### TPL-103: 价格反转模板 +``` +模板: -(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_delta`, `ts_mean`, `ts_rank` | 时序操作 | +| `` | `close`, `returns`, `close/open-1`, `open/ts_delay(close,1)-1` | 价格/收益字段 | +| `` | `3`, `5`, `22` | 短期窗口 | + +**示例**: +``` +-ts_delta(close, 5) # 价格变化反转 +-ts_mean(returns, 22) # 收益均值反转 +-ts_mean(close/open-1, 22) # 日内收益反转 +-(open/ts_delay(close,1)-1) # 隔夜收益反转 +``` + +--- + +### TPL-104: 价格乖离率 +``` +模板: -(close - ts_mean(close, ))/ts_mean(close, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `5`, `22`, `66` | MA周期 | + +--- + +### TPL-105: 量价相关性 +``` +模板: -ts_corr(, , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `close`, `returns`, `abs(returns)` | 价格类 | +| `` | `volume`, `volume/sharesout`, `adv20` | 成交量类 | +| `` | `22`, `66`, `126` | 相关性窗口 | + +--- + +### TPL-106: 跳跃因子 +``` +模板: -group_neutralize(ts_mean((close/open-1) - log(close/open), ), bucket(rank(cap), range="0.1,1,0.1")) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `22`, `30`, `66` | 平均窗口 | + +**带成交量增强版**: +``` +模板: -group_neutralize(ts_mean((close/open-1) - log(close/open), ) * ts_rank(volume, 5), bucket(rank(cap), range="0.1,1,0.1")) +``` + +--- + +### TPL-107: 指数衰减动量 +``` +模板: -ts_decay_exp_window(, , factor=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns`, `returns*(volume/sharesout)`, `close/open-1` | 收益类字段 | +| `` | `22`, `66`, `126` | 衰减窗口 | +| `` | `0.04`, `0.1`, `0.5`, `0.9` | 衰减因子,越小衰减越快 | + +--- + +### TPL-108: 成交量周期函数 (VOC) +``` +模板: +m_minus = ts_mean(volume, ) - ts_mean(volume, ); +delta = (ts_max(m_minus, ) - m_minus)/(ts_max(m_minus, ) - ts_min(m_minus, )); +*delta + *ts_delay(delta, 1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `30`, `66` | 长期均值窗口 | +| `` | `10`, `22` | 短期均值窗口 | +| `` | `0.33`, `0.5` | 当日权重 | +| `` | `0.67`, `0.5` | 前日权重 | + +--- + +### TPL-109: 市场相关性因子 +``` +模板: +mkt_ret = group_mean(returns, 1, market); +pt = ts_corr(returns, mkt_ret, ); +rank(1/(2*(1-pt))) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `10`, `22`, `66` | 相关性窗口 | + +--- + +### TPL-110: 成交量趋势模板 +``` +模板: ts_decay_linear(volume/ts_sum(volume, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `252`, `504` | 长期总量窗口 | +| `` | `10`, `22` | 衰减窗口 | + +--- + +### TPL-111: VWAP收益相关 +``` +模板: +returns > - ? (ts_ir(ts_corr(ts_returns(vwap, 1), ts_delay(group_neutralize(, market), ), ), )) : -1 +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `0.1`, `0.05` | 收益阈值 | +| `` | 任意数据字段 | 信号字段 | +| `` | `30`, `60` | 延迟窗口 | +| `` | `90`, `120` | 相关性窗口 | + +--- + +### TPL-112: 动量因子创建 +``` +模板: ts_sum(winsorize(ts_backfill(, ), std=4.0), *21) - ts_sum(winsorize(ts_backfill(, ), std=4.0), *21) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns`, 基本面字段 | 数据字段 | +| `` | `120`, `180` | 回填窗口 | +| `` | `6`, `12` | 长期月数 | +| `` | `1`, `0.1*n` | 短期月数 | + +--- + +### TPL-113: 线性衰减排名 +``` +模板: -ts_rank(ts_decay_linear(, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `percent`, 任意时序信号 | 输入信号 | +| `` | `10`, `22`, `150` | 衰减窗口 | +| `` | `50`, `126` | 排名窗口 | + +--- + +## 第三部分:情绪/新闻类模板 (TPL-201 ~ TPL-220) + +### TPL-201: 情绪差值模板 +``` +模板: (rank(ts_backfill(, )) - rank(ts_backfill(, )), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_mean`, `ts_rank`, `ts_zscore` | 时序操作 | +| `` | 正面情绪字段 | 积极信号 | +| `` | 负面情绪字段 | 消极信号 | +| `` | `20`, `30` | 回填窗口 | +| `` | `5`, `22` | 比较窗口 | + +--- + +### TPL-202: 新闻情绪回归残差 +``` +模板: +sentiment = ts_backfill(ts_delay((), 1), ); +vhat = ts_regression(volume, sentiment, ); +ehat = -ts_regression(returns, vhat, ); +group_rank(ehat, bucket(rank(cap), range="0,1,0.1")) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `vec_avg`, `vec_sum` | 情绪聚合方式 | +| `` | `scl12_sentiment`, `snt_buzz_ret`, `nws18_relevance` | 情绪数据 | +| `` | `20`, `30` | 回填窗口 | +| `` | `120`, `250` | 成交量回归窗口 | +| `` | `250`, `750` | 收益回归窗口 | + +--- + +### TPL-203: 社交媒体情绪 +``` +模板: rank((scl12_alltype_buzzvec) * (scl12_sentiment)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `vec_sum`, `vec_avg` | 向量聚合 | + +**带条件版**: +``` +模板: +sent_vol = vec_sum(scl12_alltype_buzzvec); +trade_when(rank(sent_vol) > 0.95, -zscore(scl12_buzz)*sent_vol, -1) +``` + +--- + +### TPL-204: 条件情绪过滤 +``` +模板: +group_rank( +sigmoid(if_else(ts_zscore(, ) > , ts_zscore(, ), 0)), + +) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 情绪字段 | 情绪数据 | +| `` | `22`, `30`, `66` | zscore窗口 | +| `` | `1`, `1.5`, `2` | z-score阈值 | +| `` | `industry`, `sector` | 分组字段 | + +--- + +### TPL-205: 情绪+波动率复合 +``` +模板: log(1 + sigmoid(ts_zscore(, )) * sigmoid(ts_zscore(, ))) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 情绪字段 | 情绪数据 | +| `` | `option8_*`, 波动率字段 | 波动率数据 | +| `` | `30`, `66` | 情绪窗口 | +| `` | `30`, `66` | 波动率窗口 | + +--- + +### TPL-206: 指数衰减情绪 +``` +模板: ts_decay_exp_window(vec_avg(), , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `mws85_sentiment`, `nws18_ber` | 情绪向量字段 | +| `` | `10`, `22` | 衰减窗口 | +| `` | `0.9`, `0.7` | 衰减因子 | + +**双情绪组合**: +``` +decayed_sentiment_1 = ts_decay_exp_window(vec_avg(mws85_sentiment), 10, 0.9); +decayed_sentiment_2 = ts_decay_exp_window(vec_avg(nws18_ber), 10, 0.9); +decayed_sentiment_1 + decayed_sentiment_2 +``` + +--- + +### TPL-207: 新闻结果排名 +``` +模板: +percent = ts_rank(vec_stddev(), ); +-ts_rank(ts_decay_linear(percent, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `nws12_prez_result2` | 新闻数据 | +| `` | `50`, `66` | 排名窗口 | +| `` | `150`, `252` | 衰减窗口 | + +--- + +### TPL-208: 分组行业提取情绪 +``` +模板: scale(group_extra(ts_sum(sigmoid(ts_backfill(, )), ) - ts_sum(sigmoid(ts_backfill(, )), ), 0.5, densify(industry))) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 情绪或基本面字段 | 数据字段 | +| `` | `180`, `252` | 回填窗口 | +| `` | `3`, `5` | 求和窗口 | + +--- + +## 第四部分:期权类模板 (TPL-301 ~ TPL-320) + +### TPL-301: 期权希腊字母差值 +``` +模板: ( - , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `group_rank`, `group_neutralize`, `group_zscore` | 分组操作 | +| `` | `put_delta`, `put_gamma`, `put_theta`, `put_vega` | Put希腊字母 | +| `` | `call_delta`, `call_gamma`, `call_theta`, `call_vega` | Call希腊字母 | +| `` | `industry`, `sector` | 分组字段 | + +--- + +### TPL-302: 期权价格信号 +``` +模板: group_rank((()/close, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_scale`, `ts_rank`, `ts_zscore` | 时序操作 | +| `` | `vec_max`, `vec_avg` | 向量操作 | +| `` | 期权价格字段 | 期权数据 | +| `` | `66`, `120`, `252` | 时间窗口 | +| `` | `industry`, `sector` | 分组字段 | + +--- + +### TPL-303: 期权波动率信号 +``` +模板: sigmoid(( - , )) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_ir`, `ts_stddev`, `ts_zscore`, `ts_mean` | 波动性操作 | +| `` | 期权高价字段 | 期权最高价 | +| `` | 期权收盘价字段 | 期权收盘价 | +| `` | `120`, `250`, `504` | 长期窗口 | + +**说明**: 期权波动类因子通常需要较长窗口(120-504天)来捕捉稳定信号 + +--- + +### TPL-304: 隐含波动率比率 +``` +模板: (implied_volatility_call_/parkinson_volatility_, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore`, `ts_delta` | 时序操作 | +| `` | `120`, `270` | 期权期限 | +| `` | `66`, `126`, `252` | 窗口 | + +--- + +### TPL-305: Put-Call成交量比 +``` +模板: (pcr_vol_, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_delta`, `ts_zscore` | 时序操作 | +| `` | `10`, `30`, `60` | 期限 | +| `` | `22`, `66`, `126` | 窗口 | + +--- + +### TPL-306: 期权盈亏平衡点 +``` +模板: group_rank(ts_zscore(/close, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `call_breakeven_10`, `put_breakeven_10` | 盈亏平衡字段 | +| `` | `66`, `126`, `252` | 窗口 | +| `` | `sector`, `industry` | 分组 | + +--- + +## 第五部分:分析师类模板 (TPL-401 ~ TPL-420) + +### TPL-401: 分析师预期变化 +``` +模板: (tail(tail(, lower=, upper=, newval=), lower=-, upper=-, newval=-)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `vec_avg`, `vec_sum` | 向量聚合 | +| `` | `oth41_s_west_eps_ftm_chg_3m`, `anl4_eps_chg` | 预期变化字段 | +| `` | `0.25`, `0.1` | 下截断值 | +| `` | `1000`, `100` | 上截断值 | + +--- + +### TPL-402: 剥离动量的分析师因子 +``` +模板: +afr = (); +short_mom = ts_mean(returns - group_mean(returns, 1, market), ); +long_mom = ts_delay(ts_mean(returns - group_mean(returns, 1, market), ), ); +regression_neut(regression_neut(afr, short_mom), long_mom) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `vec_avg`, `vec_sum` | 向量聚合 | +| `` | 分析师数据字段 | 一致预期等 | +| `` | `5`, `10` | 短期动量窗口 | +| `` | `20`, `22` | 长期动量窗口 | + +--- + +### TPL-403: 分析师覆盖度过滤 +``` +模板: +coverage_filter = ts_sum((), ) > ; +if_else(coverage_filter, , nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `vec_count` | 统计分析师数量 | +| `` | 分析师向量字段 | 分析师数据 | +| `` | `66`, `90`, `126` | 统计窗口 | +| `` | `2`, `3`, `5` | 最小覆盖数量 | +| `` | 主信号表达式 | 待过滤的Alpha | + +--- + +### TPL-404: 老虎哥回归模板 +``` +模板: group_rank(ts_regression(ts_zscore(, ), ts_zscore(vec_sum(), ), ), densify(sector)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意MATRIX字段 | Y变量 | +| `` | 任意VECTOR字段 | X变量 | +| `` | `252`, `504` | 回归窗口 | + +**说明**: 经典回归模板,适用于基本面与分析师数据组合 + +--- + +### TPL-405: 分析师预期时序变化 +``` +模板: ts_mean(vec_avg(), ) - ts_mean(vec_avg(), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `anl4_eps_mean`, `anl4_revenue_mean` | 分析师预测 | +| `` | `22`, `44` | 短期窗口 | +| `` | `66`, `126` | 长期窗口 | + +--- + +### TPL-406: 三因子组合模板 +``` +模板: +my_group = market; +rank( +group_rank(ts_decay_linear(volume/ts_sum(volume, 252), 10), my_group) * +group_rank(ts_rank(vec_avg(), ), my_group) * +group_rank(-ts_delta(close, 5), my_group) +) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 基本面VECTOR字段 | 基本面数据 | +| `` | `252`, `504` | 排名窗口 | + +--- + +### TPL-407: 分析师FCF比率 +``` +模板: ts_rank(vec_avg() / vec_avg(), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `anl4_fcf_value` | 自由现金流预测 | +| `` | `anl4_netprofit_low`, `anl4_netprofit_mean` | 利润预测 | +| `` | `66`, `126`, `252` | 排名窗口 | + +--- + +## 第六部分:中性化技术模板 (TPL-501 ~ TPL-515) + +### TPL-501: 市值分组中性化 +``` +模板: group_neutralize(, bucket(rank(cap), range="")) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号表达式 | 待中性化的Alpha | +| `` | `"0.1,1,0.1"`, `"0,1,0.1"` | 分组范围 | + +--- + +### TPL-502: 双重中性化 (行业+市值) +``` +模板: +a1 = group_neutralize(, bucket(rank(cap), range="")); +group_neutralize(a1, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `"0.1,1,0.1"` | 市值分组 | +| `` | `industry`, `sector`, `subindustry` | 行业分组 | + +--- + +### TPL-503: 回归中性化 +``` +模板: regression_neut(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `log(cap)`, `ts_ir(returns, 126)`, `ts_std_dev(returns, 22)` | 待剥离因子 | + +**多层回归中性化**: +``` +模板: regression_neut(regression_neut(, ), ) +``` + +--- + +### TPL-504: 中性化顺序优化 +``` +模板: +a = ts_zscore(, ); +a1 = group_neutralize(a, ); +a2 = group_neutralize(a1, bucket(rank(cap), range="")) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意数据字段 | 主信号 | +| `` | `252` | zscore窗口 | +| `` | `industry`, `subindustry` | 行业分组 | +| `` | `"0.1,1,0.1"` | 市值分组 | + +**说明**: 先行业中性化再市值中性化,与反向顺序效果可能不同 + +--- + +### TPL-505: sta1分组中性化 +``` +模板: group_neutralize(, sta1_top3000c20) +``` +**说明**: 使用预定义的sta1分组进行中性化 + +--- + +## 第七部分:条件交易模板 (TPL-601 ~ TPL-620) + +### TPL-601: 流动性过滤 +``` +模板: trade_when(volume > adv20 * , , -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `0.618`, `0.5`, `1` | 流动性阈值 | +| `` | 主信号 | 原始Alpha | + +**反向流动性**: +``` +trade_when(volume < adv20, , -1) +``` + +--- + +### TPL-602: 波动率过滤 +``` +模板: trade_when(ts_rank(ts_std_dev(returns, ), ) < , , -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `5`, `10`, `22` | 波动计算窗口 | +| `` | `126`, `180`, `252` | 排名窗口 | +| `` | `0.8`, `0.9` | 波动率阈值 | +| `` | 主信号 | 原始Alpha | + +--- + +### TPL-603: 极端收益过滤 +``` +模板: trade_when(abs(returns) < , , abs(returns) > ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `0.075`, `0.05` | 入场阈值 | +| `` | `0.1`, `0.095` | 出场阈值 | +| `` | 主信号 | 原始Alpha | + +--- + +### TPL-604: 市值过滤 +``` +模板: trade_when(rank(cap) > , , -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `0.3`, `0.5` | 市值排名阈值 | +| `` | 主信号 | 原始Alpha | + +--- + +### TPL-605: 触发条件交易 +``` +模板: +triggerTradeexp = (ts_arg_max(volume, ) < 1) && (volume > ts_sum(volume, )/); +triggerExitexp = -1; +trade_when(triggerTradeexp, , triggerExitexp) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `5`, `10` | 判断窗口 | +| `` | `-rank(ts_delta(close, 2))` | 主信号 | + +--- + +### TPL-606: 组合条件交易 +``` +模板: +my_group2 = bucket(rank(cap), range="0,1,0.1"); +trade_when(volume > adv20, group_neutralize(, my_group2), -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 复合信号 | 主信号 | + +--- + +### TPL-607: 条件排名交易 +``` +模板: +a = (, ); +trade_when(rank(a) > , -zscore()*a, -rank(a)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore` | 时序操作 | +| `` | 任意字段 | 条件字段 | +| `` | 任意字段 | 信号字段 | +| `` | `25`, `66` | 窗口 | +| `` | `0.03`, `0.1` | 下阈值 | +| `` | `0.25`, `0.5` | 上阈值 | + +--- + +## 第八部分:复合多因子模板 (TPL-701 ~ TPL-720) + +### TPL-701: 三因子乘积 +``` +模板: +my_group = market; +rank( +group_rank((, ), my_group) * +group_rank((, ), my_group) * +group_rank((, ), my_group) +) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_decay_linear`, `ts_rank` | 第一因子操作 | +| `` | `ts_rank`, `ts_zscore` | 第二因子操作 | +| `` | `-ts_delta` | 第三因子操作(反转) | +| `` | `volume/ts_sum(volume, 252)` | 成交量趋势 | +| `` | `vec_avg({Fundamental})` | 基本面信号 | +| `` | `close` | 价格信号 | +| ``, ``, `` | 各因子窗口 | 时间参数 | + +--- + +### TPL-702: 波动率条件反转 +``` +模板: +vol = ts_std_dev(, ); +vol_mean = group_mean(vol, 1, market); +flip_ret = if_else(vol < vol_mean, -, ); +-ts_mean(flip_ret, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns`, `close/open-1` | 收益字段 | +| `` | `20`, `22` | 窗口参数 | + +**说明**: 低波动环境做反转,高波动环境做动量 + +--- + +### TPL-703: 恐惧指标组合 +``` +模板: +fear = ts_mean( +abs(returns - group_mean(returns, 1, market)) / +(abs(returns) + abs(group_mean(returns, 1, market)) + 0.1), + +); +-group_neutralize(fear * , bucket(rank(cap), range="0.1,1,0.1")) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `20`, `22` | 恐惧指标窗口 | +| `` | 主信号表达式 | 待组合信号 | + +--- + +### TPL-704: 债务杠杆相关性 +``` +模板: group_neutralize(ts_zscore(, ) * ts_corr(, returns, ), sector) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `debt_to_equity`, `debt/assets` | 杠杆字段 | +| `` | `60`, `126` | zscore窗口 | +| `` | `20`, `66` | 相关性窗口 | + +--- + +### TPL-705: 模型数据信号 +``` +模板: - +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `mdl175_01dtsv`, `mdl175_01icc` | 模型字段 | + +**带排名版**: +``` +rank(group_rank(ts_rank(ts_backfill(, 5), 5), sta1_top3000c20)) +``` + +--- + +### TPL-706: 回归zscore模板 +``` +模板: ts_regression(ts_zscore(, ), ts_zscore(, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | MATRIX字段 | Y变量 | +| `` | MATRIX字段或vec_sum(VECTOR) | X变量 | +| `` | `252`, `500`, `504` | 回归窗口 | + +--- + +### TPL-707: 分组Delta模板 +``` +模板: group_neutralize(ts_delta(, ), sector) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意数据字段 | 主字段 | +| `` | `22`, `66`, `126` | 差分窗口 | + +--- + +## 第九部分:数据预处理模板 (TPL-801 ~ TPL-815) + +### TPL-801: Winsorize截断 +``` +模板: winsorize(, std=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 原始数据 | +| `` | `3`, `4`, `5` | 截断标准差 | + +--- + +### TPL-802: Sigmoid归一化 +``` +模板: sigmoid((, )) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_zscore`, `ts_ir`, `ts_rank` | 时序操作 | +| `` | 任意字段 | 原始数据 | +| `` | `22`, `66`, `252` | 窗口 | + +--- + +### TPL-803: 数据回填 +``` +模板: ts_backfill(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 低频数据字段 | 需要回填的字段 | +| `` | `115`, `120`, `180`, `252` | 回填窗口 | + +--- + +### TPL-804: 条件替换 +``` +模板: if_else(is_not_nan(), , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主字段 | 可能有NaN的字段 | +| `` | 替代字段或值 | NaN时的替代 | + +--- + +### TPL-805: 极端值替换 +``` +模板: tail(tail(, lower=, upper=, newval=), lower=-, upper=-, newval=-) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 原始数据 | +| `` | `0.25`, `0.1` | 下界 | +| `` | `100`, `1000` | 上界 | + +--- + +### TPL-806: 组合预处理 +``` +模板: (winsorize(ts_backfill(, ), std=), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore`, `ts_mean` | 时序操作 | +| `` | 低频字段 | 需要处理的字段 | +| `` | `120`, `180` | 回填窗口 | +| `` | `4` | winsorize参数 | +| `` | `22`, `66` | 操作窗口 | + +--- + +### TPL-807: ts_min/ts_max替代 +``` +模板: ts_backfill(if_else(ts_arg_min(, ) == 0, , nan), 120) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 原始数据 | +| `` | `22`, `66`, `126` | 窗口 | + +**说明**: 当ts_min/ts_max不可用时的替代方案 + +--- + +## 第十部分:高级统计模板 (TPL-901 ~ TPL-920) + +### TPL-901: 高阶矩模板 (ts_moment) +``` +模板: ((ts_moment(, , k=), )) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `rank`, `zscore`, `sigmoid` | 标准化操作 | +| `` | `group_rank`, `group_zscore` | 分组操作 | +| `` | 任意MATRIX字段 | 数据字段 | +| `` | `22`, `66`, `126` | 窗口 | +| `` | `2`, `3`, `4` | k=2方差, k=3偏度, k=4峰度 | + +**说明**: ts_moment(x, d, k)计算k阶中心矩 + +--- + +### TPL-902: 协偏度/协峰度模板 +``` +模板: (ts_co_skewness(, , ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `group_rank`, `group_zscore` | 分组操作 | +| `` | `returns`, `close` | 第一变量 | +| `` | `volume`, `vwap` | 第二变量 | +| `` | `66`, `126`, `252` | 窗口 | + +**协峰度版**: +``` +模板: (ts_co_kurtosis(, , ), ) +``` + +--- + +### TPL-903: 偏相关模板 (ts_partial_corr) +``` +模板: group_rank(ts_partial_corr(, , , ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns`, 收益相关 | Y变量 | +| `` | 任意字段 | X变量 | +| `` | `group_mean(returns, 1, market)` | 控制变量(市场收益) | +| `` | `60`, `126`, `252` | 窗口 | +| `` | `sector`, `industry` | 分组 | + +**说明**: 计算两变量偏相关,控制第三变量影响 + +--- + +### TPL-904: 三元相关模板 (ts_triple_corr) +``` +模板: group_rank(ts_triple_corr(, , , ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns` | 第一变量 | +| `` | `volume` | 第二变量 | +| `` | 基本面字段 | 第三变量 | +| `` | `60`, `126` | 窗口 | +| `` | `sector`, `industry` | 分组 | + +--- + +### TPL-905: Theil-Sen回归模板 +``` +模板: group_rank(ts_theilsen(, , ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意MATRIX字段 | Y变量 | +| `` | 任意MATRIX字段或`ts_step(1)` | X变量 | +| `` | `126`, `252`, `500` | 窗口 | +| `` | `sector`, `industry` | 分组 | + +**说明**: Theil-Sen回归比普通回归更鲁棒 + +--- + +### TPL-906: 多项式回归残差 +``` +模板: ts_poly_regression(, , , k=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | Y变量 | 被解释变量 | +| `` | X变量 | 解释变量 | +| `` | `126`, `252` | 窗口 | +| `` | `1`, `2`, `3` | 多项式阶数, k=2为二次回归 | + +**说明**: 返回 y - Ey (残差) + +--- + +### TPL-907: 向量中性化模板 +``` +模板: ts_vector_neut(, , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 待中性化Alpha | +| `` | `returns`, `cap` | 风险因子 | +| `` | `22`, `66`, `126` | 窗口(不宜过长,计算慢) | + +**分组向量中性化**: +``` +模板: group_vector_neut(, , ) +``` + +--- + +### TPL-908: 加权衰减模板 +``` +模板: group_neutralize(ts_weighted_decay(, k=), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 待衰减Alpha | +| `` | `0.3`, `0.5`, `0.7` | 衰减系数 | +| `` | `bucket(rank(cap), range="0.1,1,0.1")` | 分组 | + +--- + +### TPL-909: 回归斜率模板 +``` +模板: ts_regression(ts_zscore(, ), ts_step(1), , rettype=2) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意MATRIX字段 | 数据字段 | +| `` | `252`, `500` | 窗口 | + +**说明**: rettype=2返回斜率,用于检测趋势 + +--- + +### TPL-910: 最小最大压缩模板 +``` +模板: ts_min_max_cps(, , f=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `22`, `66`, `126` | 窗口 | +| `` | `2`, `0.5` | 压缩因子 | + +**等价公式**: `x - f * (ts_min(x, d) + ts_max(x, d))` + +--- + +## 第十一部分:事件驱动模板 (TPL-1001 ~ TPL-1020) + +### TPL-1001: 数据变化天数模板 +``` +模板: if_else(days_from_last_change() == , , nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 基本面字段 | 监测变化的字段 | +| `` | `1`, `2`, `5` | 距离变化的天数 | +| `` | `ts_delta(close, 5)`, 主信号 | 事件触发时的Alpha | + +**动态衰减版**: +``` +模板: / (1 + days_from_last_change()) +``` + +--- + +### TPL-1002: 最近差值模板 +``` +模板: (last_diff_value(, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore` | 时序操作 | +| `` | 任意字段 | 数据字段 | +| `` | `60`, `90`, `120` | 回溯窗口 | +| `` | `22`, `66` | 操作窗口 | + +**说明**: 返回过去d天内最近一次不同于当前值的历史值 + +--- + +### TPL-1003: 缺失值计数模板 +``` +模板: -ts_count_nans(ts_backfill(, ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 分析师数据等 | 可能有缺失的字段 | +| `` | `5`, `10` | 回填窗口 | +| `` | `20`, `30` | 计数窗口 | + +**应用**: 分析师覆盖度信号,缺失越少覆盖越好 + +--- + +### TPL-1004: 位置最大/最小模板 +``` +模板: if_else(ts_arg_max(, ) == , , nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `volume`, 任意字段 | 监测字段 | +| `` | `5`, `10` | 窗口 | +| `` | `0`, `1` | 0表示今天是最大值 | +| `` | 主信号 | 条件满足时的Alpha | + +**组合条件**: +``` +模板: (ts_arg_max(, ) == ts_arg_max(, )) * ( + ) +``` + +--- + +### TPL-1005: 财报发布事件模板 +``` +模板: +event_signal = if_else(ts_delta(, 1) != 0, , nan); +ts_decay_linear(event_signal, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `assets`, `sales`, `eps` | 基本面字段 | +| `` | `ts_delta(close, 5)`, 主信号 | 事件Alpha | +| `` | `10`, `22` | 衰减窗口 | + +--- + +### TPL-1006: 动态Decay事件驱动 +``` +模板: +decay_weight = 1 / (1 + days_from_last_change()); + * decay_weight +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 事件触发字段 | +| `` | 主信号 | 原始Alpha | + +--- + +### TPL-1007: 盈利公告模板 +``` +模板: +surprise = - ; +if_else(days_from_last_change() < , surprise, nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `eps` | 实际值 | +| `` | `vec_avg(anl4_eps_mean)` | 预测值 | +| `` | `5`, `10` | 事件有效窗口 | + +--- + +## 第十二部分:信号处理模板 (TPL-1101 ~ TPL-1120) + +### TPL-1101: 黄金比例幂变换 +``` +模板: signed_power(, 0.618) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号表达式 | 原始Alpha | + +**其他幂次**: +``` +signed_power(, 0.5) # 平方根 +signed_power(, 2) # 平方增强 +``` + +--- + +### TPL-1102: 尾部截断模板 +``` +模板: right_tail(, minimum=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0`, `0.1` | 最小阈值 | + +**左尾版**: +``` +模板: left_tail(, maximum=) +``` + +--- + +### TPL-1103: Clamp边界限制 +``` +模板: clamp(, lower=, upper=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `-1`, `-0.5` | 下界 | +| `` | `1`, `0.5` | 上界 | + +--- + +### TPL-1104: 分数映射模板 +``` +模板: fraction() +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | + +**说明**: 将连续变量映射到分布内的相对位置 + +--- + +### TPL-1105: NaN外推模板 +``` +模板: nan_out(, lower=, upper=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `-3`, `-5` | 下界 | +| `` | `3`, `5` | 上界 | + +**说明**: 将超出范围的值替换为NaN + +--- + +### TPL-1106: Purify数据清洗 +``` +模板: purify() +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 需要清洗的数据 | + +**说明**: 自动化数据清洗,减少噪声和异常值 + +--- + +### TPL-1107: 条件保留模板 +``` +模板: keep(, , period=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | ` > 0` | 保留条件 | +| `` | `3`, `5`, `10` | 滚动窗口 | + +**示例**: +``` +keep(returns, returns > 0, period=3) # 只保留正收益 +``` + +--- + +### TPL-1108: 缩放降维模板 +``` +模板: -scale_down((, ), constant=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_mean`, `ts_rank` | 时序操作 | +| `` | `returns`, 任意字段 | 数据字段 | +| `` | `2`, `5` | 窗口 | +| `` | `0.1`, `0.05` | 缩放常数 | + +--- + +### TPL-1109: Truncate截断模板 +``` +模板: truncate(, maxPercent=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0.01`, `0.05` | 截断百分比 | + +--- + +### TPL-1110: 组合Normalize模板 +``` +模板: group_normalize(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `sector`, `industry` | 分组 | + +**等价公式**: `alpha / group_sum(abs(alpha), group)` + +--- + +## 第十三部分:Turnover控制模板 (TPL-1201 ~ TPL-1215) + +### TPL-1201: 目标换手率Hump +``` +模板: ts_target_tvr_hump(, lambda_min=0, lambda_max=1, target_tvr=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0.1`, `0.15`, `0.2` | 目标换手率 | + +--- + +### TPL-1202: Delta限制换手率 +``` +模板: ts_target_tvr_delta_limit(, , lambda_min=0, lambda_max=1, target_tvr=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | 辅助因子 | 限制因子 | +| `` | `0.1`, `0.15` | 目标换手率 | + +--- + +### TPL-1203: Hump衰减组合 +``` +模板: hump_decay(, hump=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0.001`, `0.01` | Hump参数 | + +**嵌套版**: +``` +hump(hump_decay(, hump=0.001)) +``` + +--- + +### TPL-1204: 平均+Hump模板 +``` +模板: -ts_mean(ts_target_tvr_hump(group_rank(, country), lambda_min=0, lambda_max=1, target_tvr=), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `0.1` | 目标换手率 | +| `` | `5`, `10` | 平均窗口 | + +--- + +### TPL-1205: 简单Hump模板 +``` +模板: hump(, hump=) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0.01`, `0.001`, `0.0001` | Hump参数 | + +**示例**: +``` +hump(-ts_delta(close, 5), hump=0.01) +``` + +--- + +## 第十四部分:回填与覆盖模板 (TPL-1301 ~ TPL-1315) + +### TPL-1301: 分组回填模板 +``` +模板: group_backfill(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 需要回填的字段 | +| `` | `sector`, `industry`, `market` | 分组字段 | + +**说明**: 使用组内最近值填充NaN + +--- + +### TPL-1302: 嵌套回填排名 +``` +模板: rank(group_backfill(, )) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `sector`, `industry` | 分组 | + +--- + +### TPL-1303: 覆盖度过滤 +``` +模板: group_count(is_nan(), market) > ? : nan +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 检测字段 | +| `` | `40`, `50` | 最小覆盖数 | +| `` | 主信号 | 原始Alpha | + +--- + +### TPL-1304: NaN替换模板 +``` +模板: if_else(is_not_nan(), , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `0`, `0.5`, `nan` | 默认值 | + +--- + +### TPL-1305: 综合数据清洗 +``` +模板: (winsorize(group_backfill(ts_backfill(, ), ), std=), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `ts_rank`, `ts_zscore` | 时序操作 | +| `` | 低频字段 | 数据字段 | +| `` | `120`, `180` | 时序回填窗口 | +| `` | `sector`, `industry` | 分组回填 | +| `` | `4` | winsorize参数 | +| `` | `66`, `126` | 操作窗口 | + +--- + +## 第十五部分:组合提取模板 (TPL-1401 ~ TPL-1415) + +### TPL-1401: group_extra填补模板 +``` +模板: group_extra(, , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `0.5`, `1` | 权重 | +| `` | `densify(industry)`, `sector` | 分组 | + +**说明**: 用组均值填补缺失值 + +--- + +### TPL-1402: 组合提取sigmoid +``` +模板: scale(group_extra(ts_sum(sigmoid(ts_backfill(, )), ) - ts_sum(sigmoid(ts_backfill(, )), ), 0.5, densify(industry))) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `180` | 回填窗口 | +| `` | `3` | 求和窗口 | + +--- + +### TPL-1403: PnL反馈模板 +``` +模板: if_else(inst_pnl() > , , nan) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | +| `` | `0`, `-0.05` | PnL阈值 | + +**说明**: 基于单标的PnL进行条件交易 + +--- + +### TPL-1404: 流动性加权模板 +``` +模板: * log(volume) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | + +**说明**: 将仓位偏向高流动性股票 + +--- + +### TPL-1405: 市值回归中性化 +``` +模板: regression_neut(, log(cap)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 主信号 | 原始Alpha | + +**说明**: 剥离市值因子影响 + +--- + +## 第十六部分:百分位与分位数模板 (TPL-1501 ~ TPL-1510) + +### TPL-1501: 时序百分位模板 +``` +模板: ts_percentage(, , percentage=

) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `22`, `66`, `126` | 窗口 | +| `

` | `0.5`, `0.25`, `0.75` | 百分位 | + +--- + +### TPL-1502: 分位数模板 +``` +模板: (ts_quantile(, , ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `rank`, `zscore` | 标准化 | +| `` | 任意字段 | 数据字段 | +| `` | `66`, `126` | 窗口 | +| `` | `0.25`, `0.5`, `0.75` | 分位数 | +| `` | `22` | 操作窗口 | + +--- + +### TPL-1503: Max-Min比率模板 +``` +模板: ts_max_diff(, ) / ts_av_diff(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `22`, `66` | 窗口 | + +--- + +### TPL-1504: 中位数模板 +``` +模板: - ts_median(, ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | 任意字段 | 数据字段 | +| `` | `22`, `66`, `252` | 窗口 | + +--- + +### TPL-1505: 累积乘积模板 +``` +模板: ts_product(1 + , ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `returns`, 收益率字段 | 收益字段 | +| `` | `5`, `22`, `66` | 窗口 | + +**说明**: 计算累积收益 + +--- + +## 第十七部分:实战表达式模板 (TPL-1601 ~ TPL-1700) + +**说明**: 以下模板从社区高票帖子中提取,为实际验证过的表达式格式。 + +### TPL-1601: ts_max/ts_min替代公式 +``` +模板: {data} - ts_max_diff({data}, {d}) # 等效于 ts_max +模板: (({data} - ts_max_diff({data}, {d})) * ts_scale({data}, {d}) - {data}) / (ts_scale({data}, {d}) - 1) # 等效于 ts_min +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{data}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `22`, `66`, `126` | 窗口 | + +**应用**: 当平台不支持ts_max/ts_min时的替代方案 + +--- + +### TPL-1602: 线性衰减权重公式 +``` +模板: weight = {d} + ts_step(0); ts_sum({data} * weight, {d}) / ts_sum(weight, {d}) # 等效于 ts_decay_linear +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{data}` | 任意字段 | 数据字段 | +| `{d}` | `10`, `22`, `66` | 衰减窗口 | + +--- + +### TPL-1603: 组归一化公式 +``` +模板: {data} / group_sum(abs({data}), {group}) # 等效于 group_normalize +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{data}` | 任意字段 | 数据字段 | +| `{group}` | `industry`, `sector` | 分组字段 | + +--- + +### TPL-1604: IR+峰度组合模板 +``` +模板: +rank_data = rank({field}); +ts_ir(rank_data, {d}) + ts_kurtosis(rank_data, {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | `volume`, `returns`, 任意字段 | 数据字段 | +| `{d}` | `22`, `66` | 窗口 | + +**说明**: IR和峰度组合捕捉信号强度和分布特征 + +--- + +### TPL-1605: VWAP相关性信号 +``` +模板: returns > -{threshold} ? (ts_ir(ts_corr(ts_returns(vwap, 1), ts_delay(group_neutralize({field}, market), {d1}), {d2}), {d2})) : -1 +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意数据字段 | 信号字段 | +| `{threshold}` | `0.1`, `0.05` | 收益过滤阈值 | +| `{d1}` | `30`, `60` | 延迟窗口 | +| `{d2}` | `90`, `120` | 相关性窗口 | + +--- + +### TPL-1606: 球队硬币因子 (ballteam_coin) +``` +模板: +# 基础版 +rank(ballteam_coin) + +# 市值中性化版 +group_neutralize(rank(ballteam_coin), bucket(rank(assets), range='0.1,1,0.1')) +``` +**说明**: 经典球队vs硬币因子,用于捕捉收益持续性 + +--- + +### TPL-1607: 偏度因子模板 +``` +模板: -group_rank(ts_skewness(returns, {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `22`, `66`, `126` | 偏度计算窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 负偏度股票往往表现更好 + +--- + +### TPL-1608: 熵信号模板 +``` +模板: ts_zscore({field}, {d1}) * ts_entropy({field}, {d2}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | `returns`, 任意字段 | 信号字段 | +| `{d1}` | `14`, `22` | zscore窗口 | +| `{d2}` | `14`, `22` | 熵窗口 | + +**说明**: 结合标准化和不确定性度量 + +--- + +### TPL-1609: 分析师动量短长差模板 +``` +模板: log(ts_mean(anl4_{data}_{stats}, {d_short})) - log(ts_mean(anl4_{data}_{stats}, {d_long})) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{data}` | `eps`, `revenue`, `netprofit` | 分析师预测类型 | +| `{stats}` | `mean`, `low`, `high` | 统计量类型 | +| `{d_short}` | `20`, `44` | 短期窗口 | +| `{d_long}` | `44`, `126` | 长期窗口 | + +--- + +### TPL-1610: 目标换手率分组排名 +``` +模板: -ts_mean(ts_target_tvr_hump(group_rank({field}, country), lambda_min=0, lambda_max=1, target_tvr={target}), {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意字段 | 数据字段 | +| `{target}` | `0.1`, `0.15` | 目标换手率 | +| `{d}` | `5`, `10` | 平均窗口 | + +--- + +### TPL-1611: 最大差/均值差比率 +``` +模板: ts_max_diff({field}, {d}) / ts_av_diff({field}, {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意字段 | 数据字段 | +| `{d}` | `22`, `66` | 窗口 | + +**说明**: 捕捉极端值相对于平均变化的幅度 + +--- + +### TPL-1612: 模型数据三层嵌套 +``` +模板: +a = rank(group_rank(ts_rank(ts_backfill({model_field}, 5), 5), sta1_top3000c20)); +trade_when(rank(a) > 0.03, -zscore(ts_zscore({model_field}, 25)) * a, 0.25 - rank(a)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{model_field}` | `mdl175_01icc`, `mdl175_01dtsv` | 模型字段 | + +--- + +### TPL-1613: 量价触发条件交易 +``` +模板: +triggerTradeexp = (ts_arg_max(volume, {d}) < 1) && (volume > ts_sum(volume, {d}) / {d}); +triggerExitexp = -1; +alphaexp = -rank(ts_delta(close, 2)); +trade_when(triggerTradeexp, alphaexp, triggerExitexp) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `5`, `10` | 窗口 | + +**说明**: 今日成交量为近期最大且高于均值时交易 + +--- + +### TPL-1614: 情绪成交量交易 +``` +模板: +sent_vol = vec_sum(scl12_alltype_buzzvec); +trade_when(rank(sent_vol) > 0.95, -zscore(scl12_buzz) * sent_vol, -1) +``` +**说明**: 高情绪量时反向交易情绪 + +--- + +### TPL-1615: 双层中性化模板 +``` +模板: +a = ts_zscore({field}, 252); +a1 = group_neutralize(a, industry); +a2 = group_neutralize(a1, bucket(rank(cap), range='0.1,1,0.1')) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意字段 | 数据字段 | + +**说明**: 先行业后市值的双重中性化 + +--- + +### TPL-1616: 相关性计算公式 +``` +模板: +a = {field1}; +b = {field2}; +p = {d}; +c = ts_mean(ts_av_diff(a, p) * ts_av_diff(b, p), p); +c / ts_std_dev(a, p) / ts_std_dev(b, p) # 近似 ts_corr +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field1}` | `close`, `returns` | 第一字段 | +| `{field2}` | `volume`, `open` | 第二字段 | +| `{d}` | `5`, `22` | 窗口 | + +--- + +### TPL-1617: 回归中性化双因子 +``` +模板: +afr = vec_avg({analyst_field}); +short_mom = ts_mean(returns - group_mean(returns, 1, market), {d_short}); +long_mom = ts_delay(ts_mean(returns - group_mean(returns, 1, market), {d_long}), {d_long}); +regression_neut(regression_neut(afr, short_mom), long_mom) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{analyst_field}` | 分析师VECTOR字段 | 分析师数据 | +| `{d_short}` | `5`, `10` | 短期动量窗口 | +| `{d_long}` | `20`, `22` | 长期动量窗口 | + +**说明**: 剥离短期和长期动量后的分析师因子 + +--- + +### TPL-1618: 回归斜率趋势检测 +``` +模板: ts_regression(ts_zscore({field}, {d}), ts_step(1), {d}, rettype=2) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `252`, `500` | 窗口 | + +**说明**: rettype=2返回回归斜率,检测长期趋势 + +--- + +### TPL-1619: 三因子乘积组合 +``` +模板: +my_group = market; +rank( +group_rank(ts_decay_linear(volume / ts_sum(volume, 252), 10), my_group) * +group_rank(ts_rank(vec_avg({fundamental}), {d}), my_group) * +group_rank(-ts_delta(close, 5), my_group) +) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{fundamental}` | 基本面VECTOR字段 | 基本面数据 | +| `{d}` | `252`, `504` | 排名窗口 | + +**说明**: 成交量趋势 × 基本面排名 × 价格反转 + +--- + +### TPL-1620: 波动率条件反转 +``` +模板: +vol = ts_std_dev(returns, {d}); +vol_mean = group_mean(vol, 1, market); +flip_ret = if_else(vol < vol_mean, -returns, returns); +-ts_mean(flip_ret, {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `20`, `22` | 窗口 | + +**说明**: 低波动做反转,高波动做动量 + +--- + +### TPL-1621: 恐惧指标复合 +``` +模板: +fear = ts_mean( +abs(returns - group_mean(returns, 1, market)) / +(abs(returns) + abs(group_mean(returns, 1, market)) + 0.1), +{d} +); +-group_neutralize(fear * {signal}, bucket(rank(cap), range='0.1,1,0.1')) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `20`, `22` | 窗口 | +| `{signal}` | 主信号 | 待组合信号 | + +--- + +### TPL-1622: 财务质量单因子 +``` +模板: group_neutralize(rank({fundamental_field}), bucket(rank(cap), range='0,1,0.1')) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{fundamental_field}` | `roe`, `roa`, `net_income/assets` | 财务质量指标 | + +--- + +### TPL-1623: 老虎哥回归模板 +``` +模板: group_rank(ts_regression(ts_zscore({field1}, {d}), ts_zscore(vec_sum({field2}), {d}), {d}), densify(sector)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field1}` | 任意MATRIX字段 | Y变量 | +| `{field2}` | 任意VECTOR字段 | X变量 | +| `{d}` | `252`, `504` | 回归窗口 | + +--- + +### TPL-1624: 综合数据清洗模板 +``` +模板: ts_decay_linear(-densify(zscore(winsorize(ts_backfill({field}, 115), std=4))), 10) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 低频字段如 `anl4_adjusted_netincome_ft` | 需要处理的字段 | + +--- + +### TPL-1625: 延迟最大值位置模板 +``` +模板: ts_max({field}, {d}) = ts_delay({field}, ts_arg_max({field}, {d})) # 等效公式 +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意字段 | 数据字段 | +| `{d}` | `22`, `66` | 窗口 | + +--- + +### TPL-1626: 数据探索通用模板 +``` +模板: zscore(ts_delta(rank(ts_zscore({field}, {d1})), {d2})) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 待探索数据字段 | +| `{d1}` | `60`, `126`, `252` | zscore窗口 | +| `{d2}` | `5`, `10`, `22` | delta窗口 | + +**说明**: 顾问推荐的新数据探索模板,可替换op和时间参数 + +--- + +### TPL-1627: 自定义衰减权重模板 +``` +模板: +weight = {d} + ts_step(0); # 线性递增权重 +ts_sum({data} * weight, {d}) / ts_sum(weight, {d}) # 加权平均 + +# 替代版 (ts_step递减) +ts_sum({alpha} * ts_step(1), {d}) / ts_sum(ts_step(1), {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{data}` | 任意字段 | 数据字段 | +| `{alpha}` | 主信号 | 原始Alpha | +| `{d}` | `10`, `22`, `66` | 衰减窗口 | + +**说明**: 当没有ts_decay_linear权限时的替代方案 + +--- + +### TPL-1628: log_diff相对增长模板 +``` +模板: group_rank(log_diff({field}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 财务指标如 `sales`, `eps`, `assets` | 数据字段 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 检测相对增长率,对乘性变化更敏感 + +--- + +### TPL-1629: ts_product累积收益模板 +``` +模板: group_rank(ts_product(1 + {ret_field}, {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{ret_field}` | `returns`, 收益率字段 | 收益字段 | +| `{d}` | `22`, `66`, `126` | 窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 计算累积收益排名 + +--- + +### TPL-1630: ts_percentage阈值模板 +``` +模板: +high_threshold = ts_percentage({field}, {d}, percentage=0.5); +low_threshold = ts_percentage({field}, {d}, percentage=0.5); +{signal} +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | `close`, 价格字段 | 阈值计算字段 | +| `{d}` | `22`, `66` | 窗口 | +| `{signal}` | 主信号 | 条件信号 | + +**说明**: 用于震荡带突破策略的阈值构建 + +--- + +### TPL-1631: 动量反转切换模板 +``` +模板: +mom = ts_sum(returns, {d_long}) - ts_sum(returns, {d_short}); +reversal = -ts_delta(close, {d_short}); +if_else(ts_rank(ts_std_dev(returns, {d_short}), {d_long}) > 0.5, mom, reversal) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d_short}` | `5`, `10` | 短期窗口 | +| `{d_long}` | `22`, `66` | 长期窗口 | + +**说明**: 高波动环境用动量,低波动环境用反转 + +--- + +### TPL-1632: 市场收益率近似模板 (CHN) +``` +模板: +value = rank(cap) > 0.9 ? cap : 0; +market_return = group_sum(returns * value, country) / group_sum(value, country); +market_return +``` +**说明**: 用市值加权近似沪深300指数收益率,设置neutralization=NONE, decay=0 + +--- + +### TPL-1633: Beta回归中性化模板 +``` +模板: +market_return = group_mean(returns, 1, market); +ts_regression({field}, market_return, {d}) # 返回残差(Y - E[Y]) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 待中性化字段 | +| `{d}` | `126`, `252` | 回归窗口 | + +**说明**: 使用一元线性回归剥离市场因子 + +--- + +### TPL-1634: ts_moment高阶矩k值模板 +``` +模板: ts_moment({field}, {d}, k={k}) + +k=2: 方差 (等价于 ts_std_dev^2) +k=3: 偏度 (等价于 ts_skewness) +k=4: 峰度 (等价于 ts_kurtosis) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `22`, `66`, `126` | 窗口 | +| `{k}` | `2`, `3`, `4` | 阶数 | + +--- + +### TPL-1635: 龙头股因子增强模板 +``` +模板: sigmoid(rank(star_pm_global_rank)) +``` +**说明**: 对龙头股因子进行sigmoid增强 + +--- + +### TPL-1636: purify数据清洗嵌套模板 +``` +模板: group_rank(ts_rank(purify({field}), {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意字段 | 待清洗数据 | +| `{d}` | `22`, `66` | 排名窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: purify自动化清洗异常值和噪声 + +--- + +### TPL-1637: 理想振幅因子模板 +``` +模板: +amplitude = (high - low) / close; +ideal_amp = ts_percentage(amplitude, {d}, percentage=0.5); +group_rank(amplitude - ideal_amp, {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `22`, `66` | 百分位窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 实际振幅偏离理想振幅的程度 + +--- + +### TPL-1638: 异同离差乖离率因子 (MACD风格) +``` +模板: +ema_short = ts_decay_exp_window({field}, {d_short}, 0.9); +ema_long = ts_decay_exp_window({field}, {d_long}, 0.9); +dif = ema_short - ema_long; +ts_zscore(dif, {d_signal}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | `close`, 价格字段 | 数据字段 | +| `{d_short}` | `12`, `22` | 短期EMA窗口 | +| `{d_long}` | `26`, `66` | 长期EMA窗口 | +| `{d_signal}` | `9`, `22` | 信号线窗口 | + +--- + +### TPL-1639: 收益率条件筛选反转 +``` +模板: +high_ret = ts_rank(returns, {d1}) > 0.8; +low_ret = ts_rank(returns, {d1}) < 0.2; +if_else(high_ret, -returns, if_else(low_ret, returns, 0)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d1}` | `22`, `66` | 排名窗口 | + +**说明**: 只对极端收益做反转 + +--- + +### TPL-1640: 三阶模板优化版 +``` +模板: (((, ), ), ) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `` | `group_rank`, `group_zscore` | 外层分组操作 | +| `` | `ts_rank`, `ts_delta`, `ts_mean` | 中层时序操作 | +| `` | `ts_zscore`, `ts_rank`, `ts_ir` | 内层时序操作 | +| `` | 任意字段 | 数据字段 | +| `` | `60`, `126`, `252` | 内层窗口 | +| `` | `5`, `22`, `66` | 外层窗口 | +| `` | `sector`, `industry` | 分组 | + +**说明**: 经典三阶嵌套结构,可灵活替换各层操作符 + +--- + +### TPL-1641: ts_entropy信号检测模板 +``` +模板: ts_entropy({field}, {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | `returns`, `volume`, 任意MATRIX字段 | 数据字段 | +| `{d}` | `14`, `22`, `66` | 窗口 | + +**说明**: 衡量时序数据的不确定性,高熵值表示更多随机性 + +--- + +### TPL-1642: 熵+ZScore组合模板 +``` +模板: ts_zscore({field}, {d}) * ts_entropy({field}, {d}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `14`, `22` | 窗口 | + +**说明**: RSI超买超卖 + 熵不确定性组合,捕捉可能的修正 + +--- + +### TPL-1643: ts_ir+ts_entropy信号组合 +``` +模板: +signal = ts_ir({field}, {d}) + ts_entropy({field}, {d}); +group_rank(signal, {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `22`, `66` | 窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: IR(信息比率)和Entropy组合捕捉信号稳定性和分布特征 + +--- + +### TPL-1644: trade_when市值过滤模板 +``` +模板: trade_when(rank(cap) > {threshold}, {alpha}, -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{threshold}` | `0.3`, `0.5`, `0.7` | 市值排名阈值 | +| `{alpha}` | 主信号 | 原始Alpha | + +**说明**: 仅交易大市值股票,降低prod corr + +--- + +### TPL-1645: trade_when盈利过滤模板 +``` +模板: trade_when(eps > {threshold} * est_eps, group_rank((eps - est_eps)/est_eps, industry), -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{threshold}` | `1.0`, `1.1`, `1.2` | 盈利超预期比例 | + +**说明**: 只交易盈利超预期的股票 + +--- + +### TPL-1646: trade_when量价触发模板 +``` +模板: +triggerTrade = (ts_arg_max(volume, {d}) < 1) && (volume > ts_sum(volume, {d})/{d}); +trade_when(triggerTrade, {alpha}, -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `5`, `10` | 判断窗口 | +| `{alpha}` | `-rank(ts_delta(close, 2))` | 主信号 | + +**说明**: 量价突破触发条件交易 + +--- + +### TPL-1647: trade_when情绪量过滤模板 +``` +模板: +sent_vol = vec_sum({sentiment_vec}); +trade_when(rank(sent_vol) > {threshold}, -zscore({sentiment_field}) * sent_vol, -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{sentiment_vec}` | `scl12_alltype_buzzvec` 等VECTOR字段 | 情绪向量 | +| `{sentiment_field}` | `scl12_buzz`, `scl12_sentiment` | 情绪字段 | +| `{threshold}` | `0.9`, `0.95` | 情绪量阈值 | + +**说明**: 高情绪量时反向交易情绪 + +--- + +### TPL-1648: bucket市值分组中性化模板 +``` +模板: +my_group2 = bucket(rank(cap), range='{range}'); +group_neutralize({alpha}, my_group2) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{range}` | `'0,1,0.1'`, `'0.1,1,0.1'` | 分桶范围 | +| `{alpha}` | 主信号 | 原始Alpha | + +**说明**: 按市值分桶进行中性化,去除规模效应 + +--- + +### TPL-1649: group_zscore时序组合模板 +``` +模板: group_zscore(ts_ir({field}, {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `22`, `66`, `126` | IR窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 在分组内进行IR的Z-score标准化 + +--- + +### TPL-1650: scale+rank+ts组合模板 +``` +模板: scale(rank(ts_zscore({field}, {d}))) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `66`, `126`, `252` | 窗口 | + +**说明**: 多层标准化处理信号 + +--- + +### TPL-1651: Betting Against Beta模板 +``` +模板: +market_return = group_mean(returns, 1, market); +beta = ts_regression(returns, market_return, {d}, rettype=2); +-group_rank(beta, industry) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `126`, `252` | 回归窗口 | + +**说明**: 反Beta投注因子,做多低Beta股票 + +--- + +### TPL-1652: 跳跃因子模板 +``` +模板: +jump_up = ts_count(returns > ts_std_dev(returns, {d}) * {threshold}, {d}); +jump_down = ts_count(returns < -ts_std_dev(returns, {d}) * {threshold}, {d}); +group_rank(jump_down - jump_up, {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `22`, `66` | 统计窗口 | +| `{threshold}` | `2`, `2.5`, `3` | 标准差倍数 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 统计尾部跳跃事件的不对称性 + +--- + +### TPL-1653: 量小换手率模板 +``` +模板: +turnover = volume / sharesout; +low_turnover = ts_percentage(turnover, {d}, percentage=0.2); +group_rank(turnover < low_turnover, {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `22`, `66` | 百分位窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 识别低换手率状态 + +--- + +### TPL-1654: 隔夜收益因子模板 +``` +模板: +overnight_ret = open / ts_delay(close, 1) - 1; +group_rank(ts_mean(overnight_ret, {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `5`, `22`, `66` | 平均窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 隔夜"拉锯战"因子 + +--- + +### TPL-1655: sta1分组三因子模板 +``` +模板: +a = rank(group_rank(ts_rank(ts_backfill({field1}, {d1}), {d2}), sta1_top3000c20)); +trade_when(rank(a) > {threshold}, -zscore(ts_zscore({field2}, {d3})) * a, {exit_threshold} - rank(a)) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field1}` | 任意字段 | 第一因子字段 | +| `{field2}` | 模型字段如`mdl175_01dtsv` | 第二因子字段 | +| `{d1}`, `{d2}`, `{d3}` | 各窗口参数 | 时间窗口 | +| `{threshold}` | `0.03`, `0.1` | 入场阈值 | +| `{exit_threshold}` | `0.25`, `0.5` | 出场阈值 | + +**说明**: 使用sta1预定义分组的复合策略 + +--- + +### TPL-1656: macro泛化模板 +``` +模板: group_rank(ts_delta(ts_zscore({macro_field}, {d1}), {d2}), country) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{macro_field}` | 宏观数据字段 | 宏观数据 | +| `{d1}` | `126`, `252` | zscore窗口 | +| `{d2}` | `5`, `22` | delta窗口 | + +**说明**: 基于Labs分析macro的泛化模板 + +--- + +### TPL-1657: ASI broker模板 +``` +模板: +signal = group_rank(ts_rank({broker_field}, {d}), market); +trade_when(volume > adv20, signal, -1) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{broker_field}` | broker数据字段 | 券商数据 | +| `{d}` | `22`, `66` | 排名窗口 | + +**说明**: ASI区域broker因子,需设置max_trade=ON + +--- + +### TPL-1658: Earnings超预期模板 +``` +模板: +surprise = (actual_eps - est_eps) / abs(est_eps); +group_rank(ts_zscore(surprise, {d}), industry) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `66`, `126` | zscore窗口 | + +**说明**: 盈利超预期因子 + +--- + +### TPL-1659: CCI技术指标模板 +``` +模板: +tp = (high + low + close) / 3; +cci = (tp - ts_mean(tp, {d})) / (0.015 * ts_mean(abs(tp - ts_mean(tp, {d})), {d})); +group_rank(-cci, {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{d}` | `14`, `20` | CCI窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 商品通道指数(CCI)反转策略 + +--- + +### TPL-1660: 0.618黄金比例幂变换模板 +``` +模板: +power_signal = signed_power({field}, 0.618); +group_rank(ts_zscore(power_signal, {d}), {group}) +``` +| 占位符 | 可选值 | 说明 | +|--------|--------|------| +| `{field}` | 任意MATRIX字段 | 数据字段 | +| `{d}` | `66`, `126` | zscore窗口 | +| `{group}` | `sector`, `industry` | 分组 | + +**说明**: 使用黄金比例0.618进行幂次变换 + +--- + +## 附录A:标准时间窗口 + +| 窗口代号 | 天数 | 含义 | +|---------|------|------| +| `d_week` | 5 | 一周 | +| `d_month` | 22 | 一月 | +| `d_quarter` | 66 | 一季度 | +| `d_half` | 126 | 半年 | +| `d_year` | 252 | 一年 | +| `d_2year` | 504 | 两年 | + +**使用规则**: +- 反转因子: 短窗口 `{3, 5, 22}` +- 动量因子: 中窗口 `{22, 66}` +- 长期趋势: 长窗口 `{126, 252, 504}` +- 回归/波动: 超长窗口 `{250, 500, 750}` + +--- + +## 附录B:常用操作符分类 + +### 时序操作符 `` +| 操作符 | 用途 | +|--------|------| +| `ts_mean` | 移动平均 | +| `ts_rank` | 时序排名 | +| `ts_delta` | 差分 | +| `ts_std_dev` | 移动标准差 | +| `ts_ir` | 信息比率 | +| `ts_zscore` | 时序Z-score | +| `ts_corr` | 滚动相关性 | +| `ts_regression` | 滚动回归 | +| `ts_decay_linear` | 线性衰减 | +| `ts_decay_exp_window` | 指数衰减 | +| `ts_sum` | 滚动求和 | +| `ts_backfill` | 数据回填 | +| `ts_arg_min` | 最小值位置 | +| `ts_arg_max` | 最大值位置 | +| `ts_max` | 滚动最大值 | +| `ts_min` | 滚动最小值 | +| `ts_delay` | 延迟 | +| `ts_moment` | k阶中心矩 | +| `ts_co_skewness` | 协偏度 | +| `ts_co_kurtosis` | 协峰度 | +| `ts_partial_corr` | 偏相关 | +| `ts_triple_corr` | 三元相关 | +| `ts_theilsen` | Theil-Sen回归 | +| `ts_poly_regression` | 多项式回归残差 | +| `ts_vector_neut` | 向量中性化 | +| `ts_weighted_decay` | 加权衰减 | +| `ts_min_max_cps` | 最小最大压缩 | +| `ts_max_diff` | 与最大值差 | +| `ts_av_diff` | 与均值差 | +| `ts_quantile` | 分位数 | +| `ts_percentage` | 百分位 | +| `ts_median` | 中位数 | +| `ts_product` | 累积乘积 | +| `ts_count_nans` | NaN计数 | +| `ts_scale` | 时序缩放 | +| `ts_target_tvr_hump` | 目标换手率Hump | +| `ts_target_tvr_delta_limit` | Delta换手率限制 | + +### 分组操作符 `` +| 操作符 | 用途 | +|--------|------| +| `group_rank` | 分组排名 | +| `group_neutralize` | 分组中性化 | +| `group_zscore` | 分组Z-score | +| `group_mean` | 分组均值 | +| `group_sum` | 分组求和 | +| `group_extra` | 分组提取/填补 | +| `group_backfill` | 分组回填 | +| `group_normalize` | 分组归一化 | +| `group_vector_neut` | 分组向量中性化 | +| `group_vector_proj` | 分组向量投影 | +| `group_count` | 分组计数 | +| `group_std_dev` | 分组标准差 | + +### 向量操作符 `` +| 操作符 | 用途 | +|--------|------| +| `vec_avg` | 向量平均 | +| `vec_sum` | 向量求和 | +| `vec_max` | 向量最大 | +| `vec_min` | 向量最小 | +| `vec_stddev` | 向量标准差 | +| `vec_count` | 向量计数 | +| `vec_norm` | 向量归一化 | +| `vec_zscore` | 向量Z-score | +| `vec_range` | 向量范围 | + +### 事件/时间操作符 +| 操作符 | 用途 | +|--------|------| +| `days_from_last_change` | 距离上次变化天数 | +| `last_diff_value` | 最近不同值 | +| `ts_step` | 时间步长 | + +### 信号处理操作符 +| 操作符 | 用途 | +|--------|------| +| `signed_power` | 带符号幂变换 | +| `clamp` | 边界限制 | +| `left_tail` | 左尾截断 | +| `right_tail` | 右尾截断 | +| `fraction` | 分数映射 | +| `nan_out` | NaN外推 | +| `purify` | 数据清洗 | +| `keep` | 条件保留 | +| `scale_down` | 缩放降维 | +| `hump` | Hump平滑 | +| `hump_decay` | Hump衰减 | + +### 其他常用操作符 +| 操作符 | 用途 | +|--------|------| +| `rank` | 截面排名 | +| `zscore` | 截面Z-score | +| `sigmoid` | Sigmoid归一化 | +| `winsorize` | 极端值截断 | +| `truncate` | 截断 | +| `tail` | 尾部处理 | +| `scale` | 缩放 | +| `filter` | 过滤 | +| `densify` | 稠密化 | +| `bucket` | 分桶 | +| `log` | 对数 | +| `abs` | 绝对值 | +| `if_else` | 条件判断 | +| `trade_when` | 条件交易 | +| `regression_neut` | 回归中性化 | +| `regression_proj` | 回归投影 | +| `is_nan` | NaN检测 | +| `is_not_nan` | 非NaN检测 | +| `inst_pnl` | 单标的PnL | +| `convert` | 单位转换 | +| `pasteurize` | 去无效值 | + +--- + +## 附录C:数据字段分类 + +### 量价类 `` +``` +close, open, high, low, vwap +returns, volume, adv20, sharesout, cap +``` + +### 基本面类 `` +``` +assets, sales, ebitda, net_income, eps, operating_income +goodwill, debt, cash, equity, gross_profit +fnd6_*, fnd72_*, mdl175_*, mdl163_* +debt_to_equity, roe, roa +``` + +### 分析师类 `` (VECTOR) +``` +anl4_eps_mean, anl4_eps_low, anl4_eps_high +anl4_revenue_mean, anl4_fcf_value, anl4_netprofit_mean +anl4_adjusted_netincome_ft, anl4_bvps_flag +oth41_s_west_*, analyst_* +``` + +### 情绪类 `` +``` +scl12_sentiment, scl12_buzz, scl12_alltype_buzzvec +snt_value, snt_buzz, snt_buzz_ret, snt_buzz_bfl +nws18_relevance, nws18_ber +nws12_prez_result2, nws12_prez_short_interest +mws85_sentiment, mws46_mcv +``` + +### 期权类 `` +``` +option8_*, option14_* +implied_volatility_call_120, implied_volatility_call_270 +parkinson_volatility_120, parkinson_volatility_270 +pcr_vol_10, pcr_vol_30 +put_delta, call_delta, put_gamma, call_gamma +put_theta, call_theta, put_vega, call_vega +call_breakeven_10, put_breakeven_10 +``` + +### 模型类 `` +``` +mdl175_01dtsv, mdl175_01icc +mdl163_*, mdl* +``` + +### 分组类 `` +``` +industry, sector, subindustry +market, country, exchange +sta1_top3000c20, sta1_* +pv13_*, pv27_* +``` diff --git a/simple72/Tranformer/validator.py b/simple72/Tranformer/validator.py new file mode 100755 index 0000000..2fdaa4c --- /dev/null +++ b/simple72/Tranformer/validator.py @@ -0,0 +1,1261 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +表达式验证器 - 使用抽象语法树验证字符串表达式格式是否正确 + +本模块实现了一个能够检测字符串表达式格式是否正确的系统,基于PLY(Python Lex-Yacc) +构建词法分析器和语法分析器,识别表达式中的操作符、函数和字段,并验证其格式正确性。 +""" + +import re +import sys +import json +import os +from typing import List, Dict, Any, Optional, Tuple + +# 尝试导入PLY库,如果不存在则提供安装提示 +try: + import ply.lex as lex + import ply.yacc as yacc +except ImportError: + print("错误: 需要安装PLY库。请运行 'pip install ply' 来安装。") + sys.exit(1) + +# 1. 定义支持的操作符和函数 +supported_functions = { + # Group 类别函数 + 'group_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + # group_mean(x, w, group) + 'group_mean': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'category']}, + 'group_median': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_rank': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_vector_proj': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'category']}, + 'group_normalize': {'min_args': 2, 'max_args': 5, 'arg_types': ['expression', 'category', 'expression', 'expression', 'expression']}, + 'group_extra': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'category']}, + 'group_backfill': {'min_args': 3, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['x', 'cat', 'days', 'std']}, + 'group_scale': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_count': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_zscore': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_std_dev': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_sum': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_neutralize': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']}, + 'group_multi_regression': {'min_args': 4, 'max_args': 9, 'arg_types': ['expression'] * 9}, + 'group_cartesian_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['category', 'category']}, + 'combo_a': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']}, + + # Transformational 类别函数 + 'right_tail': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'bucket': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 第二个参数可以是string类型的range参数 + 'tail': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression']}, + 'left_tail': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'trade_when': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']}, + 'generate_stats': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + + # Cross Sectional 类别函数 + 'winsorize': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'std']}, + 'rank': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'regression_proj': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'vector_neut': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'regression_neut': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'multi_regression': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # 支持多个自变量 + + # Time Series 类别函数 + 'ts_std_dev': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_mean': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_delay': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_corr': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']}, + 'ts_zscore': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_returns': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'mode'], 'keyword_only': True}, + 'ts_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_backfill(x, d) + 'ts_backfill': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'd']}, + 'days_from_last_change': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'last_diff_value': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_scale(x, d, constant=0) + 'ts_scale': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'constant'], 'keyword_only': True}, + # Platform: ts_entropy(x, d) + 'ts_entropy': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'd']}, + 'ts_step': {'min_args': 1, 'max_args': 1, 'arg_types': ['number']}, + 'ts_sum': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_co_kurtosis': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']}, + 'inst_tvr': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_decay_exp_window': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'factor'], 'keyword_only': True}, + 'ts_av_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_kurtosis': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_min_max_diff(x, d, f=0.5) + 'ts_min_max_diff': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'f'], 'keyword_only': True}, + 'ts_arg_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_min_max_cps(x, d, f=2) + 'ts_min_max_cps': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'f'], 'keyword_only': True}, + # Platform: ts_rank(x, d, constant=0) + 'ts_rank': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'constant'], 'keyword_only': True}, + 'ts_ir': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_theilsen': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']}, + # Platform: hump_decay(x, p=0) + 'hump_decay': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'p'], 'keyword_only': True}, + # Platform: ts_weighted_decay(x, k=0.5) + 'ts_weighted_decay': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'k'], 'keyword_only': True}, + # Platform: ts_quantile(x, d, driver="gaussian") + 'ts_quantile': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'string'], 'param_names': ['x', 'd', 'driver'], 'keyword_only': True}, + 'ts_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_count_nans': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_covariance': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']}, + 'ts_co_skewness': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']}, + 'ts_min_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_decay_linear(x, d, dense=false) + 'ts_decay_linear': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'boolean'], 'param_names': ['x', 'd', 'dense'], 'keyword_only': True}, + # Platform: jump_decay(x, d, sensitivity=0.5, force=0.1) + 'jump_decay': {'min_args': 2, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'number'], 'param_names': ['x', 'd', 'sensitivity', 'force'], 'keyword_only': True}, + # Platform: ts_moment(x, d, k=0) + 'ts_moment': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'k'], 'keyword_only': True}, + 'ts_arg_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_regression': {'min_args': 3, 'max_args': 5, 'arg_types': ['expression', 'expression', 'number', 'number', 'number'], 'param_names': ['y', 'x', 'd', 'lag', 'rettype'], 'keyword_only': True}, + 'ts_skewness': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_max_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'kth_element': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']}, + 'hump': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'hump']}, + 'ts_median': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_delta': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + # Platform: ts_poly_regression(y, x, d, k=1) and k must be keyword if provided + 'ts_poly_regression': {'min_args': 3, 'max_args': 4, 'arg_types': ['expression', 'expression', 'number', 'number'], 'param_names': ['y', 'x', 'd', 'k'], 'keyword_only': True, 'keyword_only_from': 3}, + 'ts_target_tvr_decay': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'number'], 'param_names': ['x', 'lambda_min', 'lambda_max', 'target_tvr'], 'keyword_only': True}, + 'ts_target_tvr_delta_limit': {'min_args': 2, 'max_args': 5, 'arg_types': ['expression', 'expression', 'number', 'number', 'number'], 'param_names': ['x', 'y', 'lambda_min', 'lambda_max', 'target_tvr'], 'keyword_only': True}, + 'ts_target_tvr_hump': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'number'], 'param_names': ['x', 'lambda_min', 'lambda_max', 'target_tvr'], 'keyword_only': True}, + # Platform: ts_delta_limit(x, y, limit_volume=0.1) + 'ts_delta_limit': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'y', 'limit_volume'], 'keyword_only': True}, + + # Special 类别函数 + 'inst_pnl': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'self_corr': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'in': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字 + 'universe_size': {'min_args': 0, 'max_args': 0, 'arg_types': []}, + + # Missing functions from operators.py + 'quantile': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'driver', 'sigma']}, # quantile(x, driver = gaussian, sigma = 1.0) + 'normalize': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'boolean', 'number']}, # normalize(x, useStd = false, limit = 0.0) + 'zscore': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # zscore(x) + + # Logical 类别函数 + 'or': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字 + 'and': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字 + 'not': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # 注意:这是关键字 + 'is_nan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'is_not_nan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'less': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'greater': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'is_finite': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'if_else': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']}, + 'not_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'less_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'greater_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + + # Vector 类别函数 + 'vec_kurtosis': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_min': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_count': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_sum': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_skewness': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_max': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_avg': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_range': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_choose': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'nth']}, + 'vec_powersum': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']}, + 'vec_stddev': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_percentage': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'percentage']}, + 'vec_ir': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'vec_norm': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'ts_percentage': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'percentage']}, + 'signed_power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + 'ts_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']}, + + # Additional functions from test cases + 'rank_by_side': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'rate', 'scale']}, + 'log_diff': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'nan_mask': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, + 'ts_partial_corr': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'number']}, + 'ts_triple_corr': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'number']}, + 'clamp': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'lower', 'upper']}, + 'keep': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'condition', 'period']}, + 'replace': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'target', 'dest']}, + 'filter': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'h', 't']}, + 'one_side': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'string'], 'param_names': ['x', 'side']}, + 'scale_down': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']}, + + # Arithmetic 类别函数 + # add(x, y, ..., filter=false) + # NOTE: add() is variadic (>=2 terms) with an optional boolean filter flag. + # We validate it with custom logic in validate_function(). + 'add': {'min_args': 2, 'max_args': 101, 'arg_types': ['expression'] * 101}, + 'multiply': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 99 + ['boolean'], 'param_names': ['x', 'y', 'filter']}, # multiply(x, y, ..., filter=false) + 'sign': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'subtract': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # subtract(x, y, filter=false) + 'pasteurize': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'log': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'purify': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'arc_tan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'max': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # max(x, y, ...) + 'to_nan': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # to_nan(x, value=0, reverse=false) + 'abs': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'sigmoid': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'divide': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # divide(x, y) + 'min': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # min(x, y, ...) + 'tanh': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'nan_out': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'lower', 'upper']}, # nan_out(x, lower=0, upper=0) + 'signed_power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # signed_power(x, y) + 'inverse': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'round': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'sqrt': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 's_log_1p': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'reverse': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # -x + 'power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # power(x, y) + 'densify': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + 'floor': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, + # Appended missing operators + 'arc_cos': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']}, + 'arc_sin': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']}, + 'ceiling': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']}, + 'exp': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']}, + 'fraction': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']}, + 'round_down': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'f']}, + 'is_not_finite': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'negate': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'ts_rank_gmean_amean_diff': {'min_args': 5, 'max_args': 5, 'arg_types': ['expression', 'expression', 'expression', 'expression', 'number'], 'param_names': ['input1', 'input2', 'input3', '...', 'd']}, + 'ts_vector_neut': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'y', 'd']}, + 'ts_vector_proj': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'y', 'd']}, + 'scale': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['x', 'scale', 'longscale', 'shortscale']}, + 'generalized_rank': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['open', 'm']}, + 'rank_gmean_amean_diff': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['input1', 'input2', 'input3', '...']}, + 'truncate': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'maxPercent']}, + 'vector_proj': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'y']}, + 'vec_filter': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['vec', 'value']}, + 'group_coalesce': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['original_group', 'group2', 'group3', '…']}, + 'group_percentage': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'category', 'expression'], 'param_names': ['x', 'group', 'percentage']}, + 'group_vector_neut': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'y', 'g']}, + 'convert': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'mode']}, + 'reduce_avg': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']}, + 'reduce_choose': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['input', 'nth', 'ignoreNan']}, + 'reduce_count': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']}, + 'reduce_ir': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_kurtosis': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_max': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_min': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_norm': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_percentage': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'percentage']}, + 'reduce_powersum': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['input', 'constant', 'precise']}, + 'reduce_range': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_skewness': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, + 'reduce_stddev': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']}, + 'reduce_sum': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']}, +} + +# 2. 定义group类型字段 +group_fields = { + 'sector', 'subindustry', 'industry', 'exchange', 'country', 'market' +} + +# 3. 有效类别集合 +valid_categories = group_fields + +# 4. 字段命名模式 - 只校验字段是不是数字字母下划线组成 +field_patterns = [ + re.compile(r'^[a-zA-Z0-9_]+$'), # 只允许数字、字母和下划线组成的字段名 +] + +# 4. 抽象语法树节点类型 +class ASTNode: + """抽象语法树节点基类""" + def __init__(self, node_type: str, children: Optional[List['ASTNode']] = None, + value: Optional[Any] = None, line: Optional[int] = None): + self.node_type = node_type # 'function', 'operator', 'field', 'number', 'expression' + self.children = children or [] + self.value = value + self.line = line + + def __str__(self) -> str: + return f"ASTNode({self.node_type}, {self.value}, line={self.line})" + + def __repr__(self) -> str: + return self.__str__() + +class ExpressionValidator: + """表达式验证器类""" + + def __init__(self): + """初始化词法分析器和语法分析器""" + # 构建词法分析器 + self.lexer = lex.lex(module=self, debug=False) + # 构建语法分析器 + self.parser = yacc.yacc(module=self, debug=False) + # 错误信息存储 + self.errors = [] + # Cache for unit inference (unit/scalar/category) + self._unit_cache: Dict[int, str] = {} + # Cache for derived category detection (bucket/group_cartesian_product outputs) + self._derived_category_cache: Dict[int, bool] = {} + + # 词法分析器规则 + tokens = ('FUNCTION', 'FIELD', 'NUMBER', 'LPAREN', 'RPAREN', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'COMMA', 'CATEGORY', + 'EQUAL', 'ASSIGN', 'IDENTIFIER', 'STRING', 'GREATER', 'LESS', 'GREATEREQUAL', 'LESSEQUAL', 'NOTEQUAL', 'BOOLEAN') + + # 忽略空白字符 + t_ignore = ' \t\n' + + # 操作符 - 注意顺序很重要,长的操作符要放在前面 + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_COMMA = r',' + t_EQUAL = r'==' + t_NOTEQUAL = r'!=' + t_GREATEREQUAL = r'>=' + t_LESSEQUAL = r'<=' + t_GREATER = r'>' + t_LESS = r'<' + t_ASSIGN = r'=' + + # 数字(整数和浮点数) + def t_NUMBER(self, t): + r'\d+\.?\d*' + if '.' in t.value: + t.value = float(t.value) + else: + t.value = int(t.value) + return t + + # 字符串 - 需要放在所有其他标识符规则之前 + def t_STRING(self, t): + r"'[^']*'|\"[^\"]*\"" + # 去除引号 + t.value = t.value[1:-1] + return t + + # 函数和字段名 + def t_IDENTIFIER(self, t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + # 检查是否为布尔值 + if t.value.lower() in {'true', 'false'}: + t.type = 'BOOLEAN' + t.value = t.value.lower() # 转换为小写以保持一致性 + else: + # 查看当前token后面的字符,判断是否为参数名(后面跟着'=') + lexpos = t.lexpos + next_chars = '' + if lexpos + len(t.value) < len(t.lexer.lexdata): + # 查看当前token后面的字符,跳过空格 + next_pos = lexpos + len(t.value) + while next_pos < len(t.lexer.lexdata) and t.lexer.lexdata[next_pos].isspace(): + next_pos += 1 + if next_pos < len(t.lexer.lexdata): + next_chars = t.lexer.lexdata[next_pos:next_pos+1] + + # 如果后面跟着'=',则为参数名 + if next_chars == '=': + t.type = 'IDENTIFIER' + # 如果后面跟着'(',则为函数名 + elif next_chars == '(': + t.type = 'FUNCTION' + t.value = t.value.lower() # 转换为小写以保持一致性 + # 检查是否为参数名(支持更多参数名) + elif t.value in {'std', 'k', 'lambda_min', 'lambda_max', 'target_tvr', 'range', 'buckets', 'lag', 'rettype', 'mode', 'nth', 'constant', 'percentage', 'driver', 'sigma', 'rate', 'scale', 'filter', 'lower', 'upper', 'target', 'dest', 'event', 'sensitivity', 'force', 'h', 't', 'period', 'stddev', 'factor', 'k', 'useStd', 'limit', 'gaussian', 'uniform', 'cauchy'}: + t.type = 'IDENTIFIER' + # 检查是否为函数名(不区分大小写) + elif t.value.lower() in supported_functions: + t.type = 'FUNCTION' + t.value = t.value.lower() # 转换为小写以保持一致性 + # 检查是否为有效类别 + elif t.value in valid_categories: + t.type = 'CATEGORY' + # 检查是否为字段名 + elif self._is_valid_field(t.value): + t.type = 'FIELD' + else: + # 其他标识符,保留为IDENTIFIER类型 + t.type = 'IDENTIFIER' + return t + + # 行号跟踪 + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + # 错误处理 + def t_error(self, t): + if t: + # 检查是否为非法字符 + if not re.match(r'[a-zA-Z0-9_\+\-\*/\(\)\,\s=<>!]', t.value[0]): + # 这是一个非法字符 + self.errors.append(f"非法字符 '{t.value[0]}' (行 {t.lexer.lineno})") + else: + # 这是一个非法标记 + self.errors.append(f"非法标记 '{t.value}' (行 {t.lexer.lineno})") + # 跳过这个字符,继续处理 + t.lexer.skip(1) + else: + self.errors.append("词法分析器到达文件末尾") + + # 语法分析器规则 + def p_expression(self, p): + """expression : comparison + | expression EQUAL comparison + | expression NOTEQUAL comparison + | expression GREATER comparison + | expression LESS comparison + | expression GREATEREQUAL comparison + | expression LESSEQUAL comparison""" + if len(p) == 2: + p[0] = p[1] + else: + p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]}) + + def p_comparison(self, p): + """comparison : term + | comparison PLUS term + | comparison MINUS term""" + if len(p) == 2: + p[0] = p[1] + else: + p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]}) + + def p_term(self, p): + """term : factor + | term TIMES factor + | term DIVIDE factor""" + if len(p) == 2: + p[0] = p[1] + else: + p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]}) + + def p_factor(self, p): + """factor : NUMBER + | STRING + | FIELD + | CATEGORY + | IDENTIFIER + | BOOLEAN + | MINUS factor + | LPAREN expression RPAREN + | function_call""" + if len(p) == 2: + # 数字、字符串、字段、类别或标识符 + if p.slice[1].type == 'NUMBER': + p[0] = ASTNode('number', value=p[1]) + elif p.slice[1].type == 'STRING': + p[0] = ASTNode('string', value=p[1]) + elif p.slice[1].type == 'FIELD': + p[0] = ASTNode('field', value=p[1]) + elif p.slice[1].type == 'CATEGORY': + p[0] = ASTNode('category', value=p[1]) + elif p.slice[1].type == 'BOOLEAN': + p[0] = ASTNode('boolean', value=p[1]) + elif p.slice[1].type == 'IDENTIFIER': + p[0] = ASTNode('identifier', value=p[1]) + else: + p[0] = p[1] + elif len(p) == 3: + # 一元负号 + p[0] = ASTNode('unop', [p[2]], {'op': p[1]}) + elif len(p) == 4: + # 括号表达式 + p[0] = p[2] + else: + # 函数调用 + p[0] = p[1] + + def p_function_call(self, p): + '''function_call : FUNCTION LPAREN args RPAREN''' + p[0] = ASTNode('function', p[3], p[1]) + + def p_args(self, p): + '''args : arg_list + | empty''' + if len(p) == 2 and p[1] is not None: + p[0] = p[1] + else: + p[0] = [] + + def p_arg_list(self, p): + '''arg_list : arg + | arg_list COMMA arg''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + [p[3]] + + def p_arg(self, p): + '''arg : expression + | IDENTIFIER ASSIGN expression''' + if len(p) == 2: + p[0] = {'type': 'positional', 'value': p[1]} + else: + p[0] = {'type': 'named', 'name': p[1], 'value': p[3]} + + def p_empty(self, p): + '''empty :''' + p[0] = None + + # 语法错误处理 + def p_error(self, p): + if p: + self.errors.append(f"语法错误在位置 {p.lexpos}: 非法标记 '{p.value}'") + else: + self.errors.append("语法错误: 表达式不完整") + + def _is_valid_field(self, field_name: str) -> bool: + """检查字段名是否符合模式""" + for pattern in field_patterns: + if pattern.match(field_name): + return True + return False + + def validate_function(self, node: ASTNode, is_in_group_arg: bool = False) -> List[str]: + """验证函数调用的参数数量和类型""" + function_name = node.value + args = node.children + function_info = supported_functions.get(function_name) + + if not function_info: + return [f"未知函数: {function_name}"] + + # Custom validation for variadic functions with optional flags + if function_name == 'add': + return self._validate_add(args, is_in_group_arg) + + errors = [] + + # Keyword-only enforcement for optional parameters. + # If enabled, only the required leading arguments can be positional. + keyword_only_from = function_info.get('keyword_only_from') + if keyword_only_from is None and function_info.get('keyword_only'): + keyword_only_from = function_info.get('min_args', 0) + + # 检查参数数量 + if len(args) < function_info['min_args']: + errors.append(f"函数 {function_name} 需要至少 {function_info['min_args']} 个参数,但只提供了 {len(args)}") + elif len(args) > function_info['max_args']: + errors.append(f"函数 {function_name} 最多接受 {function_info['max_args']} 个参数,但提供了 {len(args)}") + + # 处理参数验证 + # 跟踪已使用的位置参数索引 + positional_index = 0 + + # 对于所有函数,支持命名参数 + for arg in args: + if isinstance(arg, dict): + if arg['type'] == 'named': + # 命名参数 + if 'param_names' in function_info and arg['name'] in function_info['param_names']: + # 查找参数在param_names中的索引 + param_index = function_info['param_names'].index(arg['name']) + if param_index < len(function_info['arg_types']): + expected_type = function_info['arg_types'][param_index] + arg_errors = self._validate_arg_type(arg['value'], expected_type, param_index, function_name, is_in_group_arg) + errors.extend(arg_errors) + # 对于winsorize函数,支持std和clip参数 + elif function_name == 'winsorize' and arg['name'] in ['std', 'clip']: + arg_errors = self._validate_arg_type(arg['value'], 'number', 0, function_name, is_in_group_arg) + errors.extend(arg_errors) + # 对于bucket函数,支持'range'和'buckets'参数 + elif function_name == 'bucket' and arg['name'] in ['range', 'buckets']: + # range和buckets参数应该是string类型 + arg_errors = self._validate_arg_type(arg['value'], 'string', 1, function_name, is_in_group_arg) + errors.extend(arg_errors) + else: + errors.append(f"函数 {function_name} 不存在参数 '{arg['name']}'") + elif arg['type'] == 'positional': + # 位置参数(字典形式) + if keyword_only_from is not None and positional_index >= keyword_only_from: + param_name = None + if 'param_names' in function_info and positional_index < len(function_info['param_names']): + param_name = function_info['param_names'][positional_index] + if param_name: + errors.append(f"函数 {function_name} 的第{positional_index+1}个参数必须使用命名参数 '{param_name}='") + else: + errors.append(f"函数 {function_name} 的第{positional_index+1}个参数必须使用命名参数") + else: + # 验证位置参数的类型 + if positional_index < len(function_info['arg_types']): + expected_type = function_info['arg_types'][positional_index] + arg_errors = self._validate_arg_type(arg['value'], expected_type, positional_index, function_name, is_in_group_arg) + errors.extend(arg_errors) + positional_index += 1 + else: + # 其他字典类型参数 + errors.append(f"参数 {positional_index+1} 格式错误") + positional_index += 1 + else: + # 位置参数(直接ASTNode形式) + if keyword_only_from is not None and positional_index >= keyword_only_from: + param_name = None + if 'param_names' in function_info and positional_index < len(function_info['param_names']): + param_name = function_info['param_names'][positional_index] + if param_name: + errors.append(f"函数 {function_name} 的第{positional_index+1}个参数必须使用命名参数 '{param_name}='") + else: + errors.append(f"函数 {function_name} 的第{positional_index+1}个参数必须使用命名参数") + else: + # 验证位置参数的类型 + if positional_index < len(function_info['arg_types']): + expected_type = function_info['arg_types'][positional_index] + arg_errors = self._validate_arg_type(arg, expected_type, positional_index, function_name, is_in_group_arg) + errors.extend(arg_errors) + positional_index += 1 + + return errors + + def _validate_arg_type(self, arg: ASTNode, expected_type: str, arg_index: int, function_name: str, is_in_group_arg: bool = False) -> List[str]: + """验证参数类型是否符合预期""" + errors = [] + + def _is_number_like(node: ASTNode) -> bool: + if node is None: + return False + if node.node_type == 'number': + return True + if node.node_type == 'unop' and isinstance(node.value, dict) and node.value.get('op') in {'-', '+'}: + if node.children and hasattr(node.children[0], 'node_type'): + return _is_number_like(node.children[0]) + return False + + # Unit compatibility check + # bucket()/group_cartesian_product() output a derived category (grouping key). + # It can only be consumed where a category/grouping key is expected. + if self._is_derived_category(arg) and expected_type != 'category': + errors.append( + f"Incompatible unit for input of \"{function_name}\" at index {arg_index}, expected \"Unit[]\", found \"Unit[Group:1]\"" + ) + return errors + + # 首先检查是否是group类型字段,如果是则只能用于Group类型函数 + # 但是如果当前函数是group_xxx或在group函数的参数链中,则允许使用 + if arg.node_type == 'category' and arg.value in group_fields: + if not (function_name.startswith('group_') or is_in_group_arg): + errors.append(f"Group类型字段 '{arg.value}' 只能用于Group类型函数的参数中") + + # 然后验证参数类型是否符合预期 + if expected_type == 'expression': + # 表达式可以是任何有效的AST节点 + pass + elif expected_type == 'number': + # 允许 -1 这类一元负号数字常量(解析为 unop(number)) + if not _is_number_like(arg): + errors.append(f"参数 {arg_index+1} 应该是一个数字,但得到 {arg.node_type}") + elif expected_type == 'boolean': + # 布尔值可以是 true/false 或数字(0/1) + if arg.node_type not in {'boolean', 'number'}: + errors.append(f"参数 {arg_index+1} 应该是一个布尔值(true/false 或 0/1),但得到 {arg.node_type}") + elif expected_type == 'field': + if arg.node_type != 'field' and arg.node_type != 'category': + # 允许field或category作为字段参数 + errors.append(f"参数 {arg_index+1} 应该是一个字段,但得到 {arg.node_type}") + elif arg.node_type == 'field' and not self._is_valid_field(arg.value): + errors.append(f"无效的字段名: {arg.value}") + elif expected_type == 'category': + if not function_name.startswith('group_'): + # 非group函数的category参数必须是category类型且在valid_categories中 + if arg.node_type != 'category': + errors.append(f"参数 {arg_index+1} 应该是一个类别,但得到 {arg.node_type}") + elif arg.value not in valid_categories: + errors.append(f"无效的类别: {arg.value}") + # group函数的category参数可以是任何类型(field、category等),不进行类型校验 + + return errors + + def _infer_unit(self, node: ASTNode) -> str: + """Infer the Unit kind of an AST node. + + Returns: + 'unit' - regular numeric time-series Unit[] + 'scalar' - literals (numbers/booleans/strings) + 'category' - category/grouping keys (industry/sector or derived via bucket/cartesian) + """ + if node is None: + return 'unit' + + cache_key = id(node) + cached = self._unit_cache.get(cache_key) + if cached is not None: + return cached + + unit = 'unit' + + if node.node_type in {'number', 'boolean', 'string'}: + unit = 'scalar' + elif node.node_type in {'field', 'identifier'}: + unit = 'unit' + elif node.node_type == 'category': + unit = 'category' + elif node.node_type in {'unop', 'binop'}: + child_units = [self._infer_unit(child) for child in node.children if hasattr(child, 'node_type')] + unit = 'category' if 'category' in child_units else 'unit' + elif node.node_type == 'function': + fname = node.value + if fname in {'bucket', 'group_cartesian_product'}: + unit = 'category' + else: + first_arg = None + for child in node.children: + if isinstance(child, dict): + if child.get('type') == 'positional': + first_arg = child.get('value') + break + else: + first_arg = child + break + if hasattr(first_arg, 'node_type'): + unit = self._infer_unit(first_arg) + else: + unit = 'unit' + + self._unit_cache[cache_key] = unit + return unit + + def _is_derived_category(self, node: ASTNode) -> bool: + """Return True if node is a derived category/grouping key (e.g., bucket/cartesian output).""" + if node is None: + return False + + cache_key = id(node) + cached = self._derived_category_cache.get(cache_key) + if cached is not None: + return cached + + derived = False + if node.node_type == 'function': + if node.value in {'bucket', 'group_cartesian_product'}: + derived = True + else: + function_info = supported_functions.get(node.value, {}) + arg_types = function_info.get('arg_types', []) + param_names = function_info.get('param_names', []) + + positional_index = 0 + for child in node.children: + if isinstance(child, dict): + if child.get('type') == 'named': + name = child.get('name') + value = child.get('value') + + expected_type = None + if name in param_names: + param_index = param_names.index(name) + if param_index < len(arg_types): + expected_type = arg_types[param_index] + + if expected_type == 'category': + continue + + if self._is_derived_category(value): + derived = True + break + elif child.get('type') == 'positional': + value = child.get('value') + expected_type = arg_types[positional_index] if positional_index < len(arg_types) else None + + if expected_type != 'category' and self._is_derived_category(value): + derived = True + break + positional_index += 1 + else: + expected_type = arg_types[positional_index] if positional_index < len(arg_types) else None + if expected_type != 'category' and self._is_derived_category(child): + derived = True + break + positional_index += 1 + elif node.node_type in {'unop', 'binop'}: + derived = any( + self._is_derived_category(child) + for child in node.children + if hasattr(child, 'node_type') + ) + + self._derived_category_cache[cache_key] = derived + return derived + + def _validate_add(self, args: List[Any], is_in_group_arg: bool = False) -> List[str]: + """Validate add(x, y, ..., filter=false). + + Rules: + - At least 2 positional expression terms. + - Optional filter flag can be provided as: + - named argument: filter= + - last positional argument: or 0/1 + """ + errors: List[str] = [] + + if len(args) < 2: + return [f"函数 add 需要至少 2 个参数,但只提供了 {len(args)}"] + + named_filter_nodes: List[ASTNode] = [] + positional_nodes: List[ASTNode] = [] + + for arg in args: + if isinstance(arg, dict) and arg.get('type') == 'named': + name = arg.get('name') + value = arg.get('value') + if name != 'filter': + errors.append(f"函数 add 不存在参数 '{name}'") + continue + if not hasattr(value, 'node_type'): + errors.append("函数 add 的参数 filter 格式错误") + continue + named_filter_nodes.append(value) + elif isinstance(arg, dict) and arg.get('type') == 'positional': + value = arg.get('value') + if hasattr(value, 'node_type'): + positional_nodes.append(value) + else: + errors.append("函数 add 的位置参数格式错误") + elif hasattr(arg, 'node_type'): + positional_nodes.append(arg) + else: + errors.append("函数 add 的参数格式错误") + + if len(named_filter_nodes) > 1: + errors.append("函数 add 的参数 'filter' 只能出现一次") + + positional_filter_node: Optional[ASTNode] = None + # Only infer a positional filter flag when: + # - no named filter is provided + # - there are at least 3 positional args (x, y, filter) + # - the last arg is boolean or numeric 0/1 + if not named_filter_nodes and len(positional_nodes) >= 3: + last = positional_nodes[-1] + if last.node_type == 'boolean' or (last.node_type == 'number' and last.value in {0, 1}): + positional_filter_node = positional_nodes.pop() + + if len(positional_nodes) < 2: + errors.append(f"函数 add 需要至少 2 个输入项(不含filter),但只提供了 {len(positional_nodes)}") + + for idx, node in enumerate(positional_nodes): + errors.extend(self._validate_arg_type(node, 'expression', idx, 'add', is_in_group_arg)) + + if positional_filter_node is not None and named_filter_nodes: + errors.append("函数 add 的 filter 不能同时用位置参数和命名参数传递") + if positional_filter_node is not None: + errors.extend(self._validate_arg_type(positional_filter_node, 'boolean', len(positional_nodes), 'add', is_in_group_arg)) + if named_filter_nodes: + errors.extend(self._validate_arg_type(named_filter_nodes[0], 'boolean', len(positional_nodes), 'add', is_in_group_arg)) + + return errors + + def validate_ast(self, ast: Optional[ASTNode], is_in_group_arg: bool = False) -> List[str]: + """递归验证抽象语法树""" + if not ast: + return ["无法解析表达式"] + + errors = [] + + # 根据节点类型进行验证 + if ast.node_type == 'function': + # 检查当前函数是否是group函数 + is_group_function = ast.value.startswith('group_') + # 确定当前是否在group函数的参数链中 + current_in_group_arg = is_in_group_arg or is_group_function + # 验证函数 + function_errors = self.validate_function(ast, current_in_group_arg) + errors.extend(function_errors) + + # 递归验证子节点时使用current_in_group_arg + for child in ast.children: + if isinstance(child, dict): + # 命名参数,验证其值 + if 'value' in child and hasattr(child['value'], 'node_type'): + child_errors = self.validate_ast(child['value'], current_in_group_arg) + errors.extend(child_errors) + elif hasattr(child, 'node_type'): + child_errors = self.validate_ast(child, current_in_group_arg) + errors.extend(child_errors) + elif ast.node_type in ['unop', 'binop']: + # 对操作符的子节点进行验证 + for child in ast.children: + if hasattr(child, 'node_type'): + child_errors = self.validate_ast(child, is_in_group_arg) + errors.extend(child_errors) + elif ast.node_type == 'field': + # 验证字段名 + if not self._is_valid_field(ast.value): + errors.append(f"无效的字段名: {ast.value}") + else: + # 递归验证子节点 + for child in ast.children: + if isinstance(child, dict): + # 命名参数,验证其值 + if 'value' in child and hasattr(child['value'], 'node_type'): + child_errors = self.validate_ast(child['value'], is_in_group_arg) + errors.extend(child_errors) + elif hasattr(child, 'node_type'): + child_errors = self.validate_ast(child, is_in_group_arg) + errors.extend(child_errors) + + return errors + + def _process_semicolon_expression(self, expression: str) -> Tuple[bool, str]: + """处理带有分号的表达式,将其转换为不带分号的简化形式 + + Args: + expression: 要处理的表达式字符串 + + Returns: + Tuple[bool, str]: (是否成功, 转换后的表达式或错误信息) + """ + def _top_level_equals_positions(stmt: str) -> List[int]: + """返回所有“顶层赋值”等号位置。 + + 仅统计括号外(()[]{})、引号外、且不属于比较操作符(==,!=,<=,>=)的 '='。 + 这样可以避免把关键字参数(如 rettype=0)误判为赋值语句。 + """ + positions: List[int] = [] + paren_depth = 0 + bracket_depth = 0 + brace_depth = 0 + in_single_quote = False + in_double_quote = False + escape = False + + for i, ch in enumerate(stmt): + if escape: + escape = False + continue + if ch == '\\': + escape = True + continue + + if in_single_quote: + if ch == "'": + in_single_quote = False + continue + if in_double_quote: + if ch == '"': + in_double_quote = False + continue + + if ch == "'": + in_single_quote = True + continue + if ch == '"': + in_double_quote = True + continue + + if ch == '(': + paren_depth += 1 + continue + if ch == ')': + paren_depth = max(0, paren_depth - 1) + continue + if ch == '[': + bracket_depth += 1 + continue + if ch == ']': + bracket_depth = max(0, bracket_depth - 1) + continue + if ch == '{': + brace_depth += 1 + continue + if ch == '}': + brace_depth = max(0, brace_depth - 1) + continue + + if paren_depth or bracket_depth or brace_depth: + continue + + if ch != '=': + continue + + prev_ch = stmt[i - 1] if i > 0 else '' + next_ch = stmt[i + 1] if i + 1 < len(stmt) else '' + if prev_ch in ['=', '!', '<', '>'] or next_ch == '=': + continue + + positions.append(i) + + return positions + + def _keyword_arg_names(stmt: str): + """提取函数调用中的命名参数名(如 rettype=0 中的 rettype)。 + + 只收集括号/中括号/大括号内部出现的 name= 形式,避免把脚本级赋值误当作命名参数。 + """ + names = set() + paren_depth = 0 + bracket_depth = 0 + brace_depth = 0 + in_single_quote = False + in_double_quote = False + escape = False + + i = 0 + while i < len(stmt): + ch = stmt[i] + + if escape: + escape = False + i += 1 + continue + if ch == '\\': + escape = True + i += 1 + continue + + if in_single_quote: + if ch == "'": + in_single_quote = False + i += 1 + continue + if in_double_quote: + if ch == '"': + in_double_quote = False + i += 1 + continue + + if ch == "'": + in_single_quote = True + i += 1 + continue + if ch == '"': + in_double_quote = True + i += 1 + continue + + if ch == '(': + paren_depth += 1 + i += 1 + continue + if ch == ')': + paren_depth = max(0, paren_depth - 1) + i += 1 + continue + if ch == '[': + bracket_depth += 1 + i += 1 + continue + if ch == ']': + bracket_depth = max(0, bracket_depth - 1) + i += 1 + continue + if ch == '{': + brace_depth += 1 + i += 1 + continue + if ch == '}': + brace_depth = max(0, brace_depth - 1) + i += 1 + continue + + inside_container = bool(paren_depth or bracket_depth or brace_depth) + + if inside_container and (ch.isalpha() or ch == '_'): + start = i + i += 1 + while i < len(stmt) and (stmt[i].isalnum() or stmt[i] == '_'): + i += 1 + name = stmt[start:i] + + j = i + while j < len(stmt) and stmt[j].isspace(): + j += 1 + + if j < len(stmt) and stmt[j] == '=': + next_ch = stmt[j + 1] if j + 1 < len(stmt) else '' + if next_ch != '=': + names.add(name.lower()) + continue + + i += 1 + + return names + + # 检查表达式是否以分号结尾 + if expression.strip().endswith(';'): + return False, "表达式不能以分号结尾" + + # 分割表达式为语句列表 + statements = [stmt.strip() for stmt in expression.split(';') if stmt.strip()] + if not statements: + return False, "表达式不能为空" + + # 存储变量赋值 + variables = {} + + # 处理每个赋值语句(除了最后一个) + for i, stmt in enumerate(statements[:-1]): + eq_positions = _top_level_equals_positions(stmt) + if not eq_positions: + return False, f"第{i+1}个语句必须是赋值语句(使用=符号)" + if len(eq_positions) > 1: + return False, f"第{i+1}个语句只能包含一个赋值符号(=)" + + real_equals_pos = eq_positions[0] + + # 分割变量名和值 + var_name = stmt[:real_equals_pos].strip() + var_value = stmt[real_equals_pos + 1:].strip() + + # 检查变量名是否有效 + if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var_name): + return False, f"第{i+1}个语句的变量名'{var_name}'无效,只能包含字母、数字和下划线,且不能以数字开头" + + var_name_lower = var_name.lower() # 变量名不区分大小写 + + # 检查变量名是否在后续表达式中使用 + # 这里不需要,因为后面的表达式会检查 + + # 检查变量值中使用的变量是否已经定义 + # 简单检查:提取所有可能的变量名 + kw_names = _keyword_arg_names(var_value) + used_vars = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', var_value) + for used_var in used_vars: + used_var_lower = used_var.lower() + if used_var_lower in kw_names: + continue + if used_var_lower not in variables: + # 检查是否是函数名 + if used_var not in supported_functions: + # 对于单个字母或简单单词,不自动视为字段名,要求先定义 + if len(used_var) <= 2: + return False, f"第{i+1}个语句中使用的变量'{used_var}'未在之前定义" + # 对于较长的字段名,仍然允许作为字段名 + elif not self._is_valid_field(used_var): + return False, f"第{i+1}个语句中使用的变量'{used_var}'未在之前定义" + + # 将之前定义的变量替换到当前值中 + for existing_var, existing_val in variables.items(): + # 使用单词边界匹配,避免替换到其他单词的一部分 + var_value = re.sub(rf'\b{existing_var}\b', existing_val, var_value) + + # 存储变量 + variables[var_name_lower] = var_value + + # 处理最后一个语句(实际的表达式) + final_stmt = statements[-1] + + # 检查最后一个语句是否是赋值语句 + if _top_level_equals_positions(final_stmt): + return False, "最后一个语句不能是赋值语句" + + # 检查最后一个语句中使用的变量是否已经定义 + kw_names = _keyword_arg_names(final_stmt) + used_vars = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', final_stmt) + for used_var in used_vars: + used_var_lower = used_var.lower() + if used_var_lower in kw_names: + continue + if used_var_lower not in variables: + # 检查是否是函数名 + if used_var not in supported_functions: + # 对于单个字母或简单单词,不自动视为字段名,要求先定义 + if len(used_var) <= 2: + return False, f"最后一个语句中使用的变量'{used_var}'未在之前定义" + + # 允许直接使用字段名/类别名(如 close/industry) + if self._is_valid_field(used_var) or used_var_lower in valid_categories or used_var_lower in group_fields: + continue + + return False, f"最后一个语句中使用的变量'{used_var}'未在之前定义" + + # 将变量替换到最后一个表达式中 + final_expr = final_stmt + for var_name, var_value in variables.items(): + final_expr = re.sub(rf'\b{var_name}\b', var_value, final_expr) + + return True, final_expr + + def check_expression(self, expression: str) -> Dict[str, Any]: + """ + 检查表达式格式是否正确 + + Args: + expression: 要验证的表达式字符串 + + Returns: + 包含验证结果的字典 + """ + # 重置错误列表 + self.errors = [] + # Reset unit inference cache for this expression + self._unit_cache = {} + self._derived_category_cache = {} + + try: + expression = expression.strip() + if not expression: + return { + 'valid': False, + 'errors': ['表达式不能为空'], + 'tokens': [], + 'ast': None + } + + # 处理带有分号的表达式 + if ';' in expression: + success, result = self._process_semicolon_expression(expression) + if not success: + return { + 'valid': False, + 'errors': [result], + 'tokens': [], + 'ast': None + } + expression = result + + # 重置词法分析器的行号 + self.lexer.lineno = 1 + + # 词法分析(用于调试) + self.lexer.input(expression) + tokens = [] + # 调试:打印识别的标记 + # print(f"\n调试 - 表达式: {expression}") + # print("识别的标记:") + for token in self.lexer: + # print(f" - 类型: {token.type}, 值: '{token.value}', 位置: {token.lexpos}") + tokens.append(token) + + # 重新设置词法分析器的输入,以便语法分析器使用 + self.lexer.input(expression) + self.lexer.lineno = 1 + + # 语法分析 + ast = self.parser.parse(expression, lexer=self.lexer) + + # 验证AST + validation_errors = self.validate_ast(ast) + + # 合并所有错误 + all_errors = self.errors + validation_errors + + # 检查括号是否匹配 + bracket_count = 0 + for char in expression: + if char == '(': + bracket_count += 1 + elif char == ')': + bracket_count -= 1 + if bracket_count < 0: + all_errors.append("括号不匹配: 右括号过多") + break + if bracket_count > 0: + all_errors.append("括号不匹配: 左括号过多") + + return { + 'valid': len(all_errors) == 0, + 'errors': all_errors, + 'tokens': tokens, + 'ast': ast + } + except Exception as e: + return { + 'valid': False, + 'errors': [f"解析错误: {str(e)}"], + 'tokens': [], + 'ast': None + } + + + diff --git a/simple72/__init__.py b/simple72/__init__.py new file mode 100644 index 0000000..e38997d --- /dev/null +++ b/simple72/__init__.py @@ -0,0 +1 @@ +# Alpha Transformer - diff --git a/simple72/config.json b/simple72/config.json new file mode 100644 index 0000000..5cbfe58 --- /dev/null +++ b/simple72/config.json @@ -0,0 +1,15 @@ +{ + "brain": { + "username": "jack0210_@hotmail.com", + "password": "!QAZ2wsx+0913" + }, + "llm": { + "api_key": "sk-cp-l_as8mjqPhsOIny9IFKZ8jzA92z1c0eRwchldhEf4KzQjs9cjVknV2o7VNCcvYUXsXFq7uF4aSgp2RxxmUHLXwPGKgIvzedM70_XUIXiBB3gu_UmLDQLfh4", + "base_url": "https://api.minimaxi.com/v1", + "model": "MiniMax-M2.7" + }, + "transformer": { + "top_n_datafield": 30, + "data_type": "MATRIX" + } +} diff --git a/simple72/config.json.example b/simple72/config.json.example new file mode 100644 index 0000000..5a01353 --- /dev/null +++ b/simple72/config.json.example @@ -0,0 +1,15 @@ +{ + "brain": { + "username": "your_brain_username", + "password": "your_brain_password" + }, + "llm": { + "api_key": "your_llm_api_key", + "base_url": "https://api.moonshot.cn/v1", + "model": "kimi-k2.5" + }, + "transformer": { + "top_n_datafield": 50, + "data_type": "MATRIX" + } +} diff --git a/simple72/main.py b/simple72/main.py new file mode 100644 index 0000000..05a69f6 --- /dev/null +++ b/simple72/main.py @@ -0,0 +1,404 @@ +# FastAPI 应用主入口 +from fastapi import FastAPI, Request +from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, FileResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +import sys +import os +import json +import subprocess +import uuid +import asyncio +import zipfile +from datetime import datetime + +# 创建 FastAPI 应用实例 +app = FastAPI(title="Alpha Transformer", version="1.0.0") + +# Chrome DevTools 健康检查端点(可选,阻止 404 日志) +@app.get("/.well-known/appspecific/com.chrome.devtools.json") +async def chrome_devtools_check(): + """Chrome DevTools 健康检查""" + return {"status": "ok"} + +# 提供模板目录中的 CSS 和 JS 文件 +@app.get("/styles.css") +async def get_styles(): + """提供 styles.css 文件""" + from fastapi.responses import FileResponse + return FileResponse("templates/styles.css") + +@app.get("/app.js") +async def get_app_js(): + """提供 app.js 文件""" + from fastapi.responses import FileResponse + return FileResponse("templates/app.js") + +# 配置 Jinja2 模板引擎,用于渲染 HTML 页面 +templates = Jinja2Templates(directory="templates") + +# 存储正在运行的任务信息(task_id -> 任务状态) +transformer_tasks = {} + +# 全局配置变量 +app_config = {} + +def load_config(): + """ + 加载配置文件 + 读取 config.json 文件,如果有则加载,否则返回空字典 + """ + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + if os.path.exists(config_path): + try: + with open(config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + print(f"✓ 已加载配置文件: {config_path}") + return config + except Exception as e: + print(f"⚠ 加载配置文件失败: {e}") + return {} + else: + print(f"⚠ 配置文件不存在: {config_path}") + return {} + +# 启动时加载配置 +app_config = load_config() + + +@app.get("/", response_class=HTMLResponse) +async def home(): + # 读取并返回前端首页 HTML + with open("templates/index.html", "r", encoding="utf-8") as f: + return f.read() + + +@app.get("/api/config/defaults") +async def get_config_defaults(): + """ + 获取默认配置 + 返回 config.json 中的配置作为表单默认值 + """ + return JSONResponse(content={ + "success": True, + "config": app_config + }) + + +@app.post("/api/config/save") +async def save_config(request: Request): + """ + 保存配置到 config.json + 用于在页面上修改配置后保存 + """ + try: + data = await request.json() + + # 更新全局配置 + global app_config + app_config = data + + # 写入文件 + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=4, ensure_ascii=False) + + return JSONResponse(content={ + "success": True, + "message": "配置已保存" + }) + except Exception as e: + return JSONResponse( + status_code=500, + content={"success": False, "error": str(e)} + ) + + +@app.post("/api/generate") +async def generate_alpha(request: Request): + """ + 生成 Alpha 变种的 API 端点 + 接收前端表单数据,启动 Transformer 脚本执行 Alpha 生成任务 + """ + print("=" * 50) + print("收到生成变种请求") + + try: + # 解析请求数据 + data = await request.json() + print(f"请求数据: alpha_id={data.get('alpha_id')}, llm_model={data.get('llm_model')}") + + # 生成唯一任务 ID + task_id = str(uuid.uuid4()) + print(f"生成任务 ID: {task_id}") + + # 定义必须提交的字段 + required_fields = [ + "alpha_id", + "llm_api_key", + "llm_base_url", + "llm_model", + "brain_username", + "brain_password" + ] + + # 检查必填字段是否完整 + for field in required_fields: + if not data.get(field): + print(f"缺少必填字段: {field}") + return JSONResponse( + status_code=400, + content={"success": False, "error": f"Missing required field: {field}"} + ) + + # 获取脚本所在目录和 Transformer 子目录 + script_dir = os.path.dirname(os.path.abspath(__file__)) + transformer_dir = os.path.join(script_dir, 'Tranformer') + print(f"Transformer 目录: {transformer_dir}") + + # 构建传递给 Transformer 脚本的配置 + config = { + "LLM_model_name": data.get('llm_model'), + "LLM_API_KEY": data.get('llm_api_key'), + "llm_base_url": data.get('llm_base_url'), + "username": data.get('brain_username'), + "password": data.get('brain_password'), + "alpha_id": data.get('alpha_id'), + "top_n_datafield": int(data.get('top_n_datafield', 50)), + "user_region": data.get('user_region'), + "user_universe": data.get('user_universe'), + "user_delay": int(data.get('user_delay')) if data.get('user_delay') else None, + "user_category": data.get('user_category'), + "user_data_type": data.get('user_data_type', 'MATRIX') + } + print(f"配置已构建: LLM_model={config['LLM_model_name']}, alpha_id={config['alpha_id']}") + + # 将配置写入临时 JSON 文件,供 Transformer 脚本读取 + config_path = os.path.join(transformer_dir, f'config_{task_id}.json') + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(config, f, indent=4) + print(f"配置文件已写入: {config_path}") + + try: + # 启动 Transformer.py 子进程执行 Alpha 生成 + print(f"启动 Transformer 脚本...") + process = subprocess.run( + [sys.executable, '-u', os.path.join(transformer_dir, 'Transformer.py'), config_path], + cwd=transformer_dir, + capture_output=True, + text=True, + timeout=600, + env={**os.environ, "PYTHONIOENCODING": "utf-8"} + ) + print(f"Transformer 脚本执行完成,返回码: {process.returncode}") + + # 定义输出文件路径 + output_file = os.path.join(transformer_dir, 'output', 'Alpha_generated_expressions_success.json') + candidates_file = os.path.join(transformer_dir, 'output', 'Alpha_candidates.json') + error_file = os.path.join(transformer_dir, 'output', 'Alpha_generated_expressions_error.json') + + # 构建响应数据 + result = { + "success": True, + "alpha_id": data.get('alpha_id'), + "stdout": process.stdout, + "stderr": process.stderr, + "return_code": process.returncode + } + + # 读取成功生成的表达式 + if os.path.exists(output_file): + print(f"读取成功表达式文件: {output_file}") + with open(output_file, 'r', encoding='utf-8') as f: + result['expressions_success'] = json.load(f) + else: + print(f"成功表达式文件不存在: {output_file}") + result['expressions_success'] = [] + + # 读取候选表达式 + if os.path.exists(candidates_file): + print(f"读取候选表达式文件: {candidates_file}") + with open(candidates_file, 'r', encoding='utf-8') as f: + result['candidates'] = json.load(f) + else: + print(f"候选表达式文件不存在: {candidates_file}") + result['candidates'] = [] + + # 读取生成失败的表达式 + if os.path.exists(error_file): + print(f"读取错误表达式文件: {error_file}") + with open(error_file, 'r', encoding='utf-8') as f: + result['expressions_error'] = json.load(f) + else: + print(f"错误表达式文件不存在: {error_file}") + result['expressions_error'] = [] + + print(f"成功: {len(result['expressions_success'])} 个, 候选: {len(result['candidates'])} 个, 错误: {len(result['expressions_error'])} 个") + print("=" * 50) + return JSONResponse(content=result) + + finally: + # 清理临时配置文件 + if os.path.exists(config_path): + os.remove(config_path) + print(f"已清理临时配置文件: {config_path}") + + except subprocess.TimeoutExpired: + print("任务执行超时 (600秒)") + return JSONResponse( + status_code=408, + content={"success": False, "error": "Task timeout (600s)"} + ) + except Exception as e: + print(f"执行异常: {str(e)}") + return JSONResponse( + status_code=500, + content={"success": False, "error": str(e)} + ) + + +@app.post("/api/transformer/login-and-fetch-options") +async def login_and_fetch_options(request: Request): + """ + 登录 BRAIN 并获取地区、Delay、Universe、类别等选项 + 用于填充高级选项表单 + """ + try: + data = await request.json() + username = data.get('username') + password = data.get('password') + + if not username or not password: + return JSONResponse( + status_code=400, + content={'success': False, 'error': 'Username and password are required'} + ) + + # 添加 Transformer 目录到 sys.path + script_dir = os.path.dirname(os.path.abspath(__file__)) + transformer_dir = os.path.join(script_dir, 'Tranformer') + if transformer_dir not in sys.path: + sys.path.append(transformer_dir) + + # 导入必要的模块 + from Tranformer.ace_lib import SingleSession, get_instrument_type_region_delay + import pandas as pd + + # 创建新的会话实例 + session = SingleSession() + session.auth = (username, password) + + brain_api_url = "https://api.worldquantbrain.com" + response = session.post(brain_api_url + "/authentication") + + if response.status_code == 201: + # 认证成功 + pass + elif response.status_code == 401: + return JSONResponse( + status_code=401, + content={'success': False, 'error': 'Authentication failed: Invalid credentials'} + ) + else: + return JSONResponse( + status_code=400, + content={'success': False, 'error': f'Authentication failed: {response.status_code}'} + ) + + # 获取 region/delay/universe 选项 + df = get_instrument_type_region_delay(session) + + # 获取数据类别 + categories_resp = session.get(brain_api_url + "/data-categories") + categories = [] + if categories_resp.status_code == 200: + categories_data = categories_resp.json() + if isinstance(categories_data, list): + categories = categories_data + elif isinstance(categories_data, dict): + categories = categories_data.get('results', []) + + # 转换 DataFrame 为前端需要的嵌套字典结构 + # 结构: Region -> Delay -> Universe + df_equity = df[df['InstrumentType'] == 'EQUITY'] + + options = {} + for _, row in df_equity.iterrows(): + region = row['Region'] + delay = row['Delay'] + universes = row['Universe'] # 这是一个列表 + + if region not in options: + options[region] = {} + + # 将 delay 转换为字符串作为字典的键 + delay_str = str(delay) + if delay_str not in options[region]: + options[region][delay_str] = universes + + return JSONResponse(content={ + 'success': True, + 'options': options, + 'categories': categories + }) + + except Exception as e: + print(f"登录获取选项失败: {str(e)}") + return JSONResponse( + status_code=500, + content={'success': False, 'error': str(e)} + ) + + +@app.get("/api/health") +async def health_check(): + """健康检查端点,用于验证服务是否正常运行""" + return {"status": "healthy", "service": "alpha-transformer"} + + +@app.get("/api/download/{alpha_id}") +async def download_results(alpha_id: str): + """ + 下载生成结果的 zip 压缩包 + 包含三个 JSON 文件:success, candidates, error + """ + try: + transformer_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Tranformer') + output_dir = os.path.join(transformer_dir, 'output') + + # 检查文件是否存在 + files_to_zip = { + 'Alpha_generated_expressions_success.json': os.path.join(output_dir, 'Alpha_generated_expressions_success.json'), + 'Alpha_candidates.json': os.path.join(output_dir, 'Alpha_candidates.json'), + 'Alpha_generated_expressions_error.json': os.path.join(output_dir, 'Alpha_generated_expressions_error.json') + } + + # 生成时间戳 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + zip_filename = f"{alpha_id}_{timestamp}.zip" + zip_path = os.path.join(output_dir, zip_filename) + + # 创建 zip 文件 + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for arcname, filepath in files_to_zip.items(): + if os.path.exists(filepath): + zipf.write(filepath, arcname) + + # 返回文件 + return FileResponse( + zip_path, + media_type='application/zip', + filename=zip_filename + ) + + except Exception as e: + return JSONResponse( + status_code=500, + content={"success": False, "error": str(e)} + ) + + +if __name__ == "__main__": + # 启动 FastAPI 应用,使用 uvicorn 作为 ASGI 服务器 + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/simple72/requirements.txt b/simple72/requirements.txt new file mode 100644 index 0000000..16a8f4a --- /dev/null +++ b/simple72/requirements.txt @@ -0,0 +1,7 @@ +fastapi>=0.100.0 +uvicorn>=0.20.0 +requests>=2.28.0 +openai>=1.0.0 +pandas>=2.0.0 +pydantic>=2.0.0 +jinja2>=3.0.0 diff --git a/simple72/running_error.txt b/simple72/running_error.txt new file mode 100644 index 0000000..5255c86 --- /dev/null +++ b/simple72/running_error.txt @@ -0,0 +1,10 @@ +{ + "success": true, + "alpha_id": "MP5gWQva", + "stdout": "✓ 已加载模板总结文件: /Users/jack/source/mySpace/mycode/my_project/py/alpha_odoo/alpha_tools/simple72/Tranformer/template_summary.md\n✓ 已从命令行参数加载配置: /Users/jack/source/mySpace/mycode/my_project/py/alpha_odoo/alpha_tools/simple72/Tranformer/config_51a505a0-322f-45f2-9180-98c25b2e731c.json\n✓ 使用内置模板总结\n--- 正在启动 BRAIN 会话... ---\nNew session created (ID: 12659217744) with authentication response: 201, {'user': {'id': 'YC93384'}, 'token': {'expiry': 14400.0}, 'permissions': ['BEFORE_AND_AFTER_PERFORMANCE_V2', 'BRAIN_LABS', 'BRAIN_LABS_JUPYTER_LAB', 'CONSULTANT', 'MULTI_SIMULATION', 'PROD_ALPHAS', 'REFERRAL', 'VISUALIZATION', 'WORKDAY']} (新会话已创建)\n--- 正在认证 LLM Gateway... ---\n✓ LLM Gateway 认证成功\n\n--- 正在获取 Alpha ID: MP5gWQva 的详情... ---\nNew session created (ID: 12659217744) with authentication response: 201, {'user': {'id': 'YC93384'}, 'token': {'expiry': 14400.0}, 'permissions': ['BEFORE_AND_AFTER_PERFORMANCE_V2', 'BRAIN_LABS', 'BRAIN_LABS_JUPYTER_LAB', 'CONSULTANT', 'MULTI_SIMULATION', 'PROD_ALPHAS', 'REFERRAL', 'VISUALIZATION', 'WORKDAY']} (新会话已创建)\nstatus_code 429, sleep 3 seconds\nLLM Gateway Authentication successful. (LLM网关认证成功)\n--- Calling LLM to propose templates... (正在调用LLM生成模板...) ---\nLLM Gateway Authentication successful. (LLM网关认证成功)\n--- Calling LLM to propose templates... (正在调用LLM生成模板...) ---\nAlpha MP5gWQva description updated on platform. (Alpha描述已在平台更新)\nNew session created (ID: 12659217744) with authentication response: 201, {'user': {'id': 'YC93384'}, 'token': {'expiry': 14400.0}, 'permissions': ['BEFORE_AND_AFTER_PERFORMANCE_V2', 'BRAIN_LABS', 'BRAIN_LABS_JUPYTER_LAB', 'CONSULTANT', 'MULTI_SIMULATION', 'PROD_ALPHAS', 'REFERRAL', 'VISUALIZATION', 'WORKDAY']} (新会话已创建)\n✓ LLM Gateway 认证成功\nAlpha Details Retrieved (已获取Alpha详情):\n{\n \"settings\": {\n \"instrumentType\": \"EQUITY\",\n \"region\": \"IND\",\n \"universe\": \"TOP500\",\n \"delay\": 1,\n \"decay\": 12,\n \"neutralization\": \"SLOW_AND_FAST\",\n \"truncation\": 0.02,\n \"pasteurization\": \"ON\",\n \"unitHandling\": \"VERIFY\",\n \"nanHandling\": \"ON\",\n \"maxTrade\": \"OFF\",\n \"maxPosition\": \"OFF\",\n \"language\": \"FASTEXPR\",\n \"visualization\": false,\n \"startDate\": \"2014-01-01\",\n \"endDate\": \"2023-12-31\"\n },\n \"expression\": {\n \"code\": \"divide(avg_pct_change_estimate_next_year_earnings_7d, add(analysts_count_revising_up_quarter2_earnings_30d, 0.0001))\",\n \"description\": \"{\\n \\\"text\\\": \\\"\\\\nWe need to generate a new, improved description for the alpha code.\\\\n\\\\nThe code:\\\\n\\\\ndivide(avg_pct_change_estimate_next_year_earnings_7d, add(analysts_count_revising_up_quarter2_earnings_30d, 0.0001))\\\\n\\\\nSo the alpha is dividing the average percent change in next-year earnings estimates over the past 7 days by the number of analysts revising up Q2 earnings over the last 30 days plus a small constant.\\\\n\\\\nWe need to produce an improved description: explain investment idea, rationale for data used, rationale for operators used.\\\\n\\\\nWe need to format as:\\\\n\\\\n\\\\\\\"Idea: xxxxx\\\\\\\\nRationale for data used: xxxxx\\\\\\\\nRationale for operators used: xxxxx\\\\\\\"\\\\n\\\\nWe should produce a description that clarifies the alpha: The alpha tries to measure the momentum in earnings estimate revisions relative to the breadth of analyst revisions, possibly indicating the strength of upward sentiment. By dividing the short-term (7d) average percentage change in next-year earnings estimates by the count of analysts revising up Q2 earnings (with a small floor), it normalizes the magnitude of estimate changes by the number of analysts, adjusting for market breadth. The small constant avoids division by zero.\\\\n\\\\nRationale for data used: avg_pct_change_estimate_next_year_earnings_7d captures recent changes in forward earnings expectations; analysts_count_revising_up_quarter2_earnings_30d captures recent positive revisions for near-term quarter; using next-year vs quarter provides longer horizon vs near-term; combining them reflects both magnitude and breadth.\\\\n\\\\nRationale for operators: divide normalizes the magnitude by breadth; add ensures non-zero denominator; maybe we can mention the constant 0.0001 is to avoid division by zero.\\\\n\\\\nThe description should be clear and concise, suitable for a quantitative alpha description.\\\\n\\\\nPotential nuance: The alpha may be interpreted as a measure of how large the per-analyst estimate change is, relative to the number of analysts revising up; if many analysts revise up but the average change is small, the ratio may be low; if a few analysts raise estimates dramatically, the ratio may be high. This can signal either concentrated strong conviction or broad moderate revisions.\\\\n\\\\nWe might also mention that the 0.0001 constant ensures stability and prevents division by zero while having negligible effect on scaling.\\\\n\\\\nWe need to incorporate the idea that the alpha uses a small denominator offset to avoid extreme values.\\\\n\\\\nThus the description will be something like:\\\\n\\\\nIdea: The alpha captures the intensity of recent upward revisions in earnings estimates by dividing the short\\\\u2011term percentage change in next\\\\u2011year consensus earnings by the count of analysts raising Q2 earnings. It thus reflects the average magnitude of upward revisions per revising analyst, combining the speed of estimate moves with the breadth of positive revisions.\\\\n\\\\nRationale for data used: ... etc.\\\\n\\\\nRationale for operators used: ... etc.\\\\n\\\\nLet's produce that. Use proper punctuation.\\\\n\\\\nMake sure to mention the constant 0.0001 for stability.\\\\n\\\\nOk.\\\\n\\\\n\\\\nIdea: The alpha measures the intensity of recent upward earnings revisions by taking the short\\\\u2011term percentage change in consensus estimates for the next fiscal year and scaling it by the breadth of analysts that have raised their second\\\\u2011quarter estimates. In other words, it reflects how large the average upward revision is per revising analyst, combining the momentum of estimate changes with the number of analysts turning bullish.\\\\n\\\\nRationale for data used: \\\\n- **avg_pct_change_estimate_next_year_earnings_7d** captures the most recent (7\\\\u2011day) directional momentum in forward\\\\u2011year earnings expectations, giving a timely signal of how fast analysts are revising their views. \\\\n- **analysts_count_revising_up_quarter2_earnings_30d** measures the breadth of positive sentiment over a slightly longer (30\\\\u2011day) window for the nearer\\\\u2011term quarter, indicating how many analysts are turning optimistic. By pairing a long\\\\u2011horizon momentum metric with a near\\\\u2011term breadth metric, the alpha blends the strength of the revision trend with the consensus behind it.\\\\n\\\\nRationale for operators used: \\\\n- **divide** normalizes the magnitude of the estimate change by the count of revising analysts, producing a \\\\u201cper\\\\u2011analyst\\\\u201d revision intensity that is comparable across stocks regardless of how many analysts cover them. \\\\n- **add(..., 0.0001)** introduces a tiny constant to the denominator to avoid division\\\\u2011by\\\\u2011zero when no analysts have revised up, ensuring numerical stability without materially affecting the ratio\\\\u2019s scaling. The small offset is negligible in normal conditions but prevents extreme values or errors in thin\\\\u2011coverage names.\\\"\\n}\",\n \"operatorCount\": 2\n }\n}\n\n============================================================\n[Step 2/5] 正在生成 Alpha 模板提议...\n============================================================\ncurrent seed alpha detail (当前种子Alpha详情): {'code': 'divide(avg_pct_change_estimate_next_year_earnings_7d, add(analysts_count_revising_up_quarter2_earnings_30d, 0.0001))', 'description': '{\\n \"text\": \"\\\\nWe need to generate a new, improved description for the alpha code.\\\\n\\\\nThe code:\\\\n\\\\ndivide(avg_pct_change_estimate_next_year_earnings_7d, add(analysts_count_revising_up_quarter2_earnings_30d, 0.0001))\\\\n\\\\nSo the alpha is dividing the average percent change in next-year earnings estimates over the past 7 days by the number of analysts revising up Q2 earnings over the last 30 days plus a small constant.\\\\n\\\\nWe need to produce an improved description: explain investment idea, rationale for data used, rationale for operators used.\\\\n\\\\nWe need to format as:\\\\n\\\\n\\\\\"Idea: xxxxx\\\\\\\\nRationale for data used: xxxxx\\\\\\\\nRationale for operators used: xxxxx\\\\\"\\\\n\\\\nWe should produce a description that clarifies the alpha: The alpha tries to measure the momentum in earnings estimate revisions relative to the breadth of analyst revisions, possibly indicating the strength of upward sentiment. By dividing the short-term (7d) average percentage change in next-year earnings estimates by the count of analysts revising up Q2 earnings (with a small floor), it normalizes the magnitude of estimate changes by the number of analysts, adjusting for market breadth. The small constant avoids division by zero.\\\\n\\\\nRationale for data used: avg_pct_change_estimate_next_year_earnings_7d captures recent changes in forward earnings expectations; analysts_count_revising_up_quarter2_earnings_30d captures recent positive revisions for near-term quarter; using next-year vs quarter provides longer horizon vs near-term; combining them reflects both magnitude and breadth.\\\\n\\\\nRationale for operators: divide normalizes the magnitude by breadth; add ensures non-zero denominator; maybe we can mention the constant 0.0001 is to avoid division by zero.\\\\n\\\\nThe description should be clear and concise, suitable for a quantitative alpha description.\\\\n\\\\nPotential nuance: The alpha may be interpreted as a measure of how large the per-analyst estimate change is, relative to the number of analysts revising up; if many analysts revise up but the average change is small, the ratio may be low; if a few analysts raise estimates dramatically, the ratio may be high. This can signal either concentrated strong conviction or broad moderate revisions.\\\\n\\\\nWe might also mention that the 0.0001 constant ensures stability and prevents division by zero while having negligible effect on scaling.\\\\n\\\\nWe need to incorporate the idea that the alpha uses a small denominator offset to avoid extreme values.\\\\n\\\\nThus the description will be something like:\\\\n\\\\nIdea: The alpha captures the intensity of recent upward revisions in earnings estimates by dividing the short\\\\u2011term percentage change in next\\\\u2011year consensus earnings by the count of analysts raising Q2 earnings. It thus reflects the average magnitude of upward revisions per revising analyst, combining the speed of estimate moves with the breadth of positive revisions.\\\\n\\\\nRationale for data used: ... etc.\\\\n\\\\nRationale for operators used: ... etc.\\\\n\\\\nLet\\'s produce that. Use proper punctuation.\\\\n\\\\nMake sure to mention the constant 0.0001 for stability.\\\\n\\\\nOk.\\\\n\\\\n\\\\nIdea: The alpha measures the intensity of recent upward earnings revisions by taking the short\\\\u2011term percentage change in consensus estimates for the next fiscal year and scaling it by the breadth of analysts that have raised their second\\\\u2011quarter estimates. In other words, it reflects how large the average upward revision is per revising analyst, combining the momentum of estimate changes with the number of analysts turning bullish.\\\\n\\\\nRationale for data used: \\\\n- **avg_pct_change_estimate_next_year_earnings_7d** captures the most recent (7\\\\u2011day) directional momentum in forward\\\\u2011year earnings expectations, giving a timely signal of how fast analysts are revising their views. \\\\n- **analysts_count_revising_up_quarter2_earnings_30d** measures the breadth of positive sentiment over a slightly longer (30\\\\u2011day) window for the nearer\\\\u2011term quarter, indicating how many analysts are turning optimistic. By pairing a long\\\\u2011horizon momentum metric with a near\\\\u2011term breadth metric, the alpha blends the strength of the revision trend with the consensus behind it.\\\\n\\\\nRationale for operators used: \\\\n- **divide** normalizes the magnitude of the estimate change by the count of revising analysts, producing a \\\\u201cper\\\\u2011analyst\\\\u201d revision intensity that is comparable across stocks regardless of how many analysts cover them. \\\\n- **add(..., 0.0001)** introduces a tiny constant to the denominator to avoid division\\\\u2011by\\\\u2011zero when no analysts have revised up, ensuring numerical stability without materially affecting the ratio\\\\u2019s scaling. The small offset is negligible in normal conditions but prevents extreme values or errors in thin\\\\u2011coverage names.\"\\n}', 'operatorCount': 2}\n\n[Step 1/5] 正在调用 LLM 生成 Alpha 模板...\n - 模型: MiniMax-M2.7\n - 数据类型: MATRIX\nAn error occurred while calling the LLM (调用LLM时发生错误): unhashable type: 'slice'\nFailed to generate proposed alpha templates. (生成提议模板失败)\n", + "stderr": "", + "return_code": 1, + "expressions_success": [], + "candidates": [], + "expressions_error": [] +} \ No newline at end of file diff --git a/simple72/templates/app.js b/simple72/templates/app.js new file mode 100644 index 0000000..54f6bac --- /dev/null +++ b/simple72/templates/app.js @@ -0,0 +1,290 @@ +document.addEventListener('DOMContentLoaded', async () => { + try { + const response = await fetch('/api/config/defaults'); + const result = await response.json(); + + if (result.success && result.config) { + const config = result.config; + + if (config.brain) { + if (config.brain.username) { + document.getElementById('brainUsername').value = config.brain.username; + } + if (config.brain.password) { + document.getElementById('brainPassword').value = config.brain.password; + } + } + + if (config.llm) { + if (config.llm.api_key) { + document.getElementById('llmApiKey').value = config.llm.api_key; + } + if (config.llm.base_url) { + document.getElementById('llmBaseUrl').value = config.llm.base_url; + } + if (config.llm.model) { + document.getElementById('llmModel').value = config.llm.model; + } + } + + if (config.transformer) { + if (config.transformer.top_n_datafield) { + document.getElementById('topNDatafield').value = config.transformer.top_n_datafield; + } + if (config.transformer.data_type) { + document.getElementById('dataType').value = config.transformer.data_type; + } + } + } + } catch (error) { + console.error('加载默认配置失败:', error); + } +}); + +const form = document.getElementById('transformerForm'); +const submitBtn = document.getElementById('submitBtn'); +const downloadBtn = document.getElementById('downloadBtn'); +const loginAndFetchBtn = document.getElementById('loginAndFetchBtn'); +const regionSelect = document.getElementById('region'); +const delaySelect = document.getElementById('delay'); +const universeSelect = document.getElementById('universe'); +const dataTypeSelect = document.getElementById('dataType'); +const categoryButtons = document.getElementById('category-buttons'); + +let optionsData = {}; + +dataTypeSelect.addEventListener('change', function() { + if (this.value === 'VECTOR') { + if (!confirm("请确保您输入的原型Alpha中正确地使用了vector operator,否则极容易造成数据类型错误!")) { + this.value = 'MATRIX'; + } + } +}); + +loginAndFetchBtn.addEventListener('click', async () => { + const username = document.getElementById('brainUsername').value.trim(); + const password = document.getElementById('brainPassword').value; + + if (!username || !password) { + alert('请先填写BRAIN用户名和密码'); + return; + } + + loginAndFetchBtn.disabled = true; + loginAndFetchBtn.textContent = '正在登录...'; + + try { + const response = await fetch('/api/transformer/login-and-fetch-options', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ username, password }) + }); + + const result = await response.json(); + + if (result.success) { + optionsData = result.options; + + populateRegionSelect(); + regionSelect.disabled = false; + + if (result.categories) { + populateCategoryButtons(result.categories); + } + + loginAndFetchBtn.textContent = '登录成功'; + } else { + alert('登录失败: ' + result.error); + loginAndFetchBtn.disabled = false; + loginAndFetchBtn.textContent = '登录BRAIN并获取选项'; + } + } catch (error) { + alert('登录出错: ' + error.message); + loginAndFetchBtn.disabled = false; + loginAndFetchBtn.textContent = '登录BRAIN并获取选项'; + } +}); + +function populateRegionSelect() { + while (regionSelect.options.length > 1) { + regionSelect.remove(1); + } + + const regions = Object.keys(optionsData); + regions.forEach(region => { + const option = document.createElement('option'); + option.value = region; + option.textContent = region; + regionSelect.appendChild(option); + }); +} + +function populateCategoryButtons(categories) { + categories.forEach(category => { + const btn = document.createElement('button'); + btn.type = 'button'; + btn.dataset.value = category.id || category; + btn.textContent = category.name || category; + btn.onclick = function() { toggleCategory(this); }; + btn.className = 'btn'; + btn.style.cssText = 'padding: 4px 12px; font-size: 11px;'; + categoryButtons.appendChild(btn); + }); +} + +function toggleCategory(btn) { + const allBtn = document.getElementById('cat-all'); + const isAllBtn = (btn === allBtn); + + if (isAllBtn) { + allBtn.style.backgroundColor = '#000080'; + allBtn.style.color = 'white'; + + const otherBtns = categoryButtons.querySelectorAll('button:not(#cat-all)'); + otherBtns.forEach(b => { + b.style.backgroundColor = '#c0c0c0'; + b.style.color = 'black'; + }); + } else { + if (btn.style.backgroundColor === 'rgb(0, 0, 128)') { + btn.style.backgroundColor = '#c0c0c0'; + btn.style.color = 'black'; + } else { + btn.style.backgroundColor = '#000080'; + btn.style.color = 'white'; + } + + const anySelected = categoryButtons.querySelectorAll('button:not(#cat-all)'); + let hasSelected = false; + anySelected.forEach(b => { + if (b.style.backgroundColor === 'rgb(0, 0, 128)') { + hasSelected = true; + } + }); + + if (hasSelected) { + allBtn.style.backgroundColor = '#c0c0c0'; + allBtn.style.color = 'black'; + } else { + allBtn.style.backgroundColor = '#000080'; + allBtn.style.color = 'white'; + } + } +} + +regionSelect.addEventListener('change', () => { + const selectedRegion = regionSelect.value; + + delaySelect.innerHTML = ''; + universeSelect.innerHTML = ''; + delaySelect.disabled = true; + universeSelect.disabled = true; + + if (selectedRegion && optionsData[selectedRegion]) { + const delays = Object.keys(optionsData[selectedRegion]); + delays.forEach(delay => { + const option = document.createElement('option'); + option.value = delay; + option.textContent = delay; + delaySelect.appendChild(option); + }); + delaySelect.disabled = false; + } +}); + +delaySelect.addEventListener('change', () => { + const selectedRegion = regionSelect.value; + const selectedDelay = delaySelect.value; + + universeSelect.innerHTML = ''; + universeSelect.disabled = true; + + if (selectedRegion && selectedDelay && optionsData[selectedRegion][selectedDelay]) { + const universes = optionsData[selectedRegion][selectedDelay]; + universes.forEach(universe => { + const option = document.createElement('option'); + option.value = universe; + option.textContent = universe; + universeSelect.appendChild(option); + }); + universeSelect.disabled = false; + } +}); + +form.addEventListener('submit', async (e) => { + e.preventDefault(); + + const formData = { + alpha_id: document.getElementById('alphaId').value.trim(), + llm_api_key: document.getElementById('llmApiKey').value.trim(), + llm_base_url: document.getElementById('llmBaseUrl').value.trim(), + llm_model: document.getElementById('llmModel').value.trim(), + brain_username: document.getElementById('brainUsername').value.trim(), + brain_password: document.getElementById('brainPassword').value.trim(), + top_n_datafield: parseInt(document.getElementById('topNDatafield').value) || 50, + data_type: document.getElementById('dataType').value || 'MATRIX' + }; + + const region = document.getElementById('region').value; + const delay = document.getElementById('delay').value; + const universe = document.getElementById('universe').value; + + if (region) formData.user_region = region; + if (delay) formData.user_delay = parseInt(delay); + if (universe) formData.user_universe = universe; + + const allBtn = document.getElementById('cat-all'); + let selectedCategories = []; + + if (allBtn.style.backgroundColor !== 'rgb(0, 0, 128)') { + const categoryBtns = categoryButtons.querySelectorAll('button:not(#cat-all)'); + categoryBtns.forEach(btn => { + if (btn.style.backgroundColor === 'rgb(0, 0, 128)') { + selectedCategories.push(btn.dataset.value); + } + }); + + if (selectedCategories.length > 0) { + formData.user_category = selectedCategories; + } + } + + submitBtn.disabled = true; + submitBtn.textContent = '处理中...'; + + try { + const response = await fetch('/api/generate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(formData) + }); + + const result = await response.json(); + + if (result.success) { + const successCount = result.expressions_success ? result.expressions_success.length : 0; + const candidateCount = result.candidates ? result.candidates.length : 0; + const errorCount = result.expressions_error ? result.expressions_error.length : 0; + alert('生成完成!成功: ' + successCount + ' 个, 候选: ' + candidateCount + ' 个, 错误: ' + errorCount + ' 个'); + + // 显示下载按钮 + downloadBtn.style.display = 'block'; + downloadBtn.onclick = function() { + const alphaId = document.getElementById('alphaId').value.trim(); + window.location.href = '/api/download/' + alphaId; + }; + } else { + alert('生成失败: ' + (result.error || '未知错误')); + downloadBtn.style.display = 'none'; + } + + } catch (error) { + alert('请求失败: ' + error.message); + downloadBtn.style.display = 'none'; + } finally { + submitBtn.disabled = false; + submitBtn.textContent = '生成变种'; + } +}); diff --git a/simple72/templates/index.html b/simple72/templates/index.html new file mode 100644 index 0000000..ff8c886 --- /dev/null +++ b/simple72/templates/index.html @@ -0,0 +1,148 @@ + + + + + + Alpha Transformer - + + + +

+
+

Alpha Transformer

+

- AI驱动的Alpha变种生成器

+
+ +
+
+

使用说明

+
    +
  • 输入您的种子Alpha ID,系统将基于BRAIN平台生成多个变种
  • +
  • 配置您的LLM(支持OpenAI兼容接口)和BRAIN凭证
  • +
  • 点击生成按钮,系统将自动完成所有处理并返回结果
  • +
+
+ +
+
+

BRAIN 凭证

+ +
+
+ + +
+ +
+ + +
+
+ + +
+ +
+

基本信息

+ +
+ + +
+
+ +
+

LLM 配置

+ +
+
+ + +
+ +
+ + +
+
+ +
+ + +
+
+ +
+

高级选项(可选)

+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ +
+ +
+
+
+ + + + +
+ + +
+
+ + + + diff --git a/simple72/templates/styles.css b/simple72/templates/styles.css new file mode 100644 index 0000000..bb98cf6 --- /dev/null +++ b/simple72/templates/styles.css @@ -0,0 +1,343 @@ +/* Monokai Theme for Alpha Transformer */ + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +/* Monokai Color Palette */ +:root { + --monokai-bg: #272822; + --monokai-fg: #f8f8f2; + --monokai-comment: #75715e; + --monokai-red: #f92672; + --monokai-orange: #fd971f; + --monokai-yellow: #e6db74; + --monokai-green: #a6e22e; + --monokai-cyan: #66d9ef; + --monokai-blue: #268bd2; + --monokai-purple: #ae81ff; + --monokai-dark: #1e1f1c; + --monokai-border: #3e3d32; +} + +body { + font-family: 'Consolas', 'Monaco', 'Courier New', monospace; + font-size: 14px; + background: var(--monokai-dark); + min-height: 100vh; + padding: 20px; + color: var(--monokai-fg); +} + +.container { + max-width: 900px; + margin: 0 auto; + background: var(--monokai-bg); + border: 2px solid var(--monokai-border); + border-radius: 8px; + box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5); +} + +.header { + background: linear-gradient(135deg, var(--monokai-dark) 0%, var(--monokai-bg) 100%); + color: var(--monokai-green); + padding: 20px; + font-weight: bold; + font-size: 18px; + border-bottom: 2px solid var(--monokai-border); + border-radius: 8px 8px 0 0; +} + +.header h1 { + color: var(--monokai-green); + text-shadow: 0 0 10px rgba(166, 226, 46, 0.3); +} + +.header p { + color: var(--monokai-comment); + font-size: 14px; + margin-top: 5px; +} + +.content { + padding: 20px; +} + +.form-section { + margin-bottom: 20px; + background: var(--monokai-dark); + border: 1px solid var(--monokai-border); + border-radius: 6px; + padding: 20px; +} + +.form-section h2 { + color: var(--monokai-cyan); + margin-bottom: 15px; + padding-bottom: 10px; + border-bottom: 1px solid var(--monokai-border); + font-size: 16px; + font-weight: bold; +} + +.form-group { + margin-bottom: 15px; +} + +.form-group label { + display: block; + margin-bottom: 8px; + color: var(--monokai-orange); + font-weight: normal; + font-size: 13px; +} + +.form-group input, +.form-group select { + width: 100%; + padding: 10px 12px; + border: 1px solid var(--monokai-border); + border-radius: 4px; + font-family: 'Consolas', 'Monaco', 'Courier New', monospace; + font-size: 13px; + background: var(--monokai-bg); + color: var(--monokai-fg); + transition: all 0.3s ease; +} + +.form-group input:focus, +.form-group select:focus { + outline: none; + border-color: var(--monokai-cyan); + box-shadow: 0 0 0 2px rgba(102, 217, 239, 0.2); +} + +.form-group input::placeholder { + color: var(--monokai-comment); +} + +.form-row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 15px; +} + +.btn { + padding: 10px 20px; + background: var(--monokai-purple); + color: var(--monokai-bg); + border: none; + border-radius: 4px; + font-family: 'Consolas', 'Monaco', 'Courier New', monospace; + font-size: 13px; + font-weight: bold; + cursor: pointer; + transition: all 0.3s ease; +} + +.btn:hover { + background: #c5a3ff; + transform: translateY(-1px); + box-shadow: 0 4px 12px rgba(174, 129, 255, 0.3); +} + +.btn:active { + transform: translateY(0); +} + +.btn:disabled { + background: var(--monokai-comment); + color: var(--monokai-dark); + cursor: not-allowed; + transform: none; + box-shadow: none; +} + +.submit-btn { + width: 100%; + padding: 14px 28px; + background: linear-gradient(135deg, var(--monokai-green) 0%, #8bc34a 100%); + color: var(--monokai-bg); + border: none; + border-radius: 6px; + font-size: 15px; + font-weight: bold; + margin-top: 10px; + text-transform: uppercase; + letter-spacing: 1px; +} + +.submit-btn:hover { + background: linear-gradient(135deg, #b8e068 0%, #9ccc65 100%); + box-shadow: 0 4px 15px rgba(166, 226, 46, 0.4); +} + +.instructions { + background: var(--monokai-dark); + border: 1px solid var(--monokai-border); + border-radius: 6px; + padding: 20px; + margin-bottom: 20px; +} + +.instructions h3 { + color: var(--monokai-yellow); + margin-bottom: 12px; + font-size: 15px; + font-weight: bold; +} + +.instructions ul { + color: var(--monokai-fg); + padding-left: 25px; +} + +.instructions li { + margin-bottom: 8px; + line-height: 1.6; +} + +.instructions li::marker { + color: var(--monokai-red); +} + +#category-buttons { + display: flex; + flex-wrap: wrap; + gap: 8px; + padding: 12px; + border: 1px solid var(--monokai-border); + border-radius: 4px; + background: var(--monokai-bg); +} + +/* Loading animation */ +.loading { + display: none; + text-align: center; + padding: 40px; + background: var(--monokai-dark); + border: 1px solid var(--monokai-border); + border-radius: 6px; + margin-top: 20px; +} + +.loading.active { + display: block; +} + +.spinner { + border: 3px solid var(--monokai-border); + border-top: 3px solid var(--monokai-cyan); + border-radius: 50%; + width: 40px; + height: 40px; + animation: spin 1s linear infinite; + margin: 0 auto 15px; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +/* Error message */ +.error-message { + background: rgba(249, 38, 114, 0.1); + color: var(--monokai-red); + padding: 15px; + margin-top: 15px; + border: 1px solid var(--monokai-red); + border-radius: 4px; + display: none; + font-weight: bold; +} + +.error-message.active { + display: block; +} + +/* Result section */ +.result-section { + margin-top: 20px; + display: none; +} + +.result-section.active { + display: block; +} + +.result-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 10px; + padding: 12px; + background: var(--monokai-dark); + color: var(--monokai-green); + font-weight: bold; + border: 1px solid var(--monokai-border); + border-radius: 4px 4px 0 0; +} + +.result-content { + background: var(--monokai-bg); + border: 1px solid var(--monokai-border); + border-radius: 0 0 4px 4px; + padding: 15px; + max-height: 400px; + overflow-y: auto; +} + +.result-content pre { + white-space: pre-wrap; + word-wrap: break-word; + font-family: 'Consolas', 'Monaco', 'Courier New', monospace; + font-size: 12px; + color: var(--monokai-fg); + line-height: 1.5; +} + +.btn-copy { + padding: 6px 14px; + background: var(--monokai-cyan); + color: var(--monokai-bg); + border: none; + border-radius: 4px; + font-size: 12px; + font-weight: bold; + cursor: pointer; + transition: all 0.3s ease; +} + +.btn-copy:hover { + background: #87e8f5; + box-shadow: 0 2px 8px rgba(102, 217, 239, 0.3); +} + +/* Scrollbar styling */ +::-webkit-scrollbar { + width: 10px; + height: 10px; +} + +::-webkit-scrollbar-track { + background: var(--monokai-dark); +} + +::-webkit-scrollbar-thumb { + background: var(--monokai-comment); + border-radius: 5px; +} + +::-webkit-scrollbar-thumb:hover { + background: var(--monokai-purple); +} + +/* Selection styling */ +::selection { + background: var(--monokai-purple); + color: var(--monokai-bg); +}