更新解码模板, 检测大括号, 如果占位符大括号不存在, 则自动添加

main
jack 4 weeks ago
parent 88ac366352
commit faaa8e3802
  1. 261
      alpha_submit/main.go
  2. 168
      alpha_submit/main.py
  3. 166
      check_llm_idea/add_braces_to_fields.py
  4. 199
      check_llm_idea/check_llm_idea.py
  5. 364
      check_llm_idea/llm_idea.md
  6. 1479
      check_llm_idea/prompt.md
  7. 366
      temporary_script/synchronize_alpha_performance_data.py
  8. 2
      test/wqb-login/login.py

@ -0,0 +1,261 @@
package main
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strconv"
"strings"
"time"
)
const (
totalRetryCount = 100000
nacosURL = "http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify"
)
// NacosConfig 对应 nacos 返回的账号配置
type NacosConfig struct {
UserName string `json:"user_name"`
Password string `json:"password"`
}
// basicAuthTransport 为每个请求自动添加 Basic Auth 头
type basicAuthTransport struct {
username string
password string
base http.RoundTripper
}
func (t *basicAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) {
req.SetBasicAuth(t.username, t.password)
return t.base.RoundTrip(req)
}
// Login 登录 WorldQuant Brain API,返回带 BasicAuth 的 HTTP Client
func Login() (*http.Client, error) {
// 1. 从 nacos 获取账号密码
resp, err := http.Get(nacosURL)
if err != nil {
log.Printf("获取账号配置失败: %v", err)
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("nacos 返回非 200 状态码: %d", resp.StatusCode)
}
var config NacosConfig
if err := json.NewDecoder(resp.Body).Decode(&config); err != nil {
return nil, fmt.Errorf("解析 nacos 配置失败: %w", err)
}
log.Printf("正在登录账户: %s", config.UserName)
// 2. 创建 HTTP Client,后续所有请求都会使用 BasicAuth
client := &http.Client{
Timeout: 30 * time.Second,
Transport: &basicAuthTransport{
username: config.UserName,
password: config.Password,
base: http.DefaultTransport,
},
}
// 3. 发送登录请求
loginReq, _ := http.NewRequest("POST", "https://api.worldquantbrain.com/authentication", nil)
loginResp, err := client.Do(loginReq)
if err != nil {
return nil, fmt.Errorf("登录请求失败: %w", err)
}
defer loginResp.Body.Close()
log.Printf("登录状态: %d", loginResp.StatusCode)
if loginResp.StatusCode == http.StatusCreated {
log.Println("登录成功!")
return client, nil
}
body, _ := io.ReadAll(loginResp.Body)
return nil, fmt.Errorf("登录失败: %s", string(body))
}
// SubmitAlpha 提交 alpha,自动重试直到成功或达到最大重试次数
// 返回 nil 表示提交成功,否则返回错误
func SubmitAlpha(alphaID string) error {
retryCount := 0
var client *http.Client
for retryCount < totalRetryCount {
// 如果没有有效 client,则重新登录
if client == nil {
var err error
client, err = Login()
if err != nil {
log.Printf("登录失败: %v, 10秒后重试 (重试次数: %d)", err, retryCount)
time.Sleep(10 * time.Second)
retryCount++
continue
}
}
url := fmt.Sprintf("https://api.worldquantbrain.com/alphas/%s/submit", alphaID)
log.Printf("请求 URL: %s", url)
// 发送提交请求
resp, err := client.Post(url, "application/json", nil)
if err != nil {
log.Printf("网络请求异常 (alpha=%s, retry=%d): %v", alphaID, retryCount, err)
retryCount++
time.Sleep(10 * time.Second)
continue
}
// 处理 400 特殊情形:已提交,进入轮询
if resp.StatusCode == http.StatusBadRequest {
bodyBytes, _ := io.ReadAll(resp.Body)
resp.Body.Close()
bodyStr := string(bodyBytes)
if strings.Contains(bodyStr, "The plain HTTP request was sent to HTTPS port") {
log.Println("Alpha 已提交,正在轮询状态...")
pollInterval := 1.0 // 秒
for {
time.Sleep(time.Duration(pollInterval) * time.Second)
fmt.Print(".")
// 重新 GET 查询状态
pollResp, err := client.Get(url)
if err != nil {
log.Printf("轮询请求失败: %v", err)
break
}
// 检查 Retry-After 头
if retryAfter := pollResp.Header.Get("Retry-After"); retryAfter != "" {
if f, err := strconv.ParseFloat(retryAfter, 64); err == nil && f > 0 {
pollInterval = max(f, 3.0)
} else {
pollInterval = 3.0
}
} else {
pollInterval = 3.0
}
// 当状态不再是 400 且不包含该特定消息时,退出轮询
if pollResp.StatusCode != http.StatusBadRequest {
resp = pollResp
break
}
bodyBytes2, _ := io.ReadAll(pollResp.Body)
pollResp.Body.Close()
if !strings.Contains(string(bodyBytes2), "The plain HTTP request was sent to HTTPS port") {
resp = pollResp
break
}
pollResp.Body.Close()
}
log.Printf("轮询结束,最终状态码: %d", resp.StatusCode)
} else {
// 非特殊 400,按普通错误处理
resp.Body.Close()
}
}
// 确保 resp 不为 nil(如果上面分支没有设置 resp,则跳过本次循环)
if resp == nil {
retryCount++
continue
}
// 根据状态码处理
switch resp.StatusCode {
case http.StatusTooManyRequests: // 429
log.Println("触发限流 (429),休眠 60 秒后重试")
resp.Body.Close()
time.Sleep(60 * time.Second)
retryCount++
continue
case http.StatusUnauthorized: // 401
log.Println("认证失效,重新登录")
resp.Body.Close()
client = nil
retryCount++
continue
case http.StatusNotFound: // 404
log.Printf("Alpha %s 不存在或超时,重试 (%d/%d)", alphaID, retryCount+1, totalRetryCount)
resp.Body.Close()
retryCount++
continue
case http.StatusForbidden: // 403
log.Printf("%s 提交失败 (403)", alphaID)
var failChecks []map[string]interface{}
var bodyMap map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&bodyMap); err == nil {
if isObj, ok := bodyMap["is"].(map[string]interface{}); ok {
if checks, ok := isObj["checks"].([]interface{}); ok {
for _, c := range checks {
if ch, ok := c.(map[string]interface{}); ok {
if result, ok := ch["result"]; ok && result == "FAIL" {
failChecks = append(failChecks, ch)
}
}
}
}
}
}
resp.Body.Close()
log.Printf("失败的检查项: %v", failChecks)
// 如果因为提交次数超限而失败,放弃重试
for _, ch := range failChecks {
if name, ok := ch["name"]; ok {
if name == "REGULAR_SUBMISSION" || name == "SUPER_SUBMISSION" {
return fmt.Errorf("提交次数超过限制: %v", failChecks)
}
}
}
return fmt.Errorf("提交失败,HTTP 403")
case http.StatusOK: // 200
log.Printf("%s 提交成功", alphaID)
resp.Body.Close()
return nil
default:
// 处理 5xx 错误
if resp.StatusCode >= 500 && resp.StatusCode < 600 {
log.Printf("服务器错误 %d,5 秒后重试", resp.StatusCode)
resp.Body.Close()
time.Sleep(5 * time.Second)
retryCount++
continue
}
// 其他非 2xx 状态码视为失败
bodyBytes, _ := io.ReadAll(resp.Body)
resp.Body.Close()
return fmt.Errorf("未处理的响应状态码 %d: %s", resp.StatusCode, string(bodyBytes))
}
}
return fmt.Errorf("达到最大重试次数 %d,提交失败", totalRetryCount)
}
// max 返回两个 float64 中的较大值
func max(a, b float64) float64 {
if a > b {
return a
}
return b
}
func main() {
// 使用示例
alphaID := "your_alpha_id_here"
if err := SubmitAlpha(alphaID); err != nil {
log.Fatalf("提交失败: %v", err)
}
log.Println("提交完成")
}

@ -0,0 +1,168 @@
import time
import logging
import httpx
from httpx import BasicAuth
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def login():
"""
登录 WorldQuant Brain API返回 httpx.Client 对象
注意登录成功后 client 已包含认证信息无需额外处理
"""
# 从 nacos 获取账号密码
nacos_resp = httpx.get('http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify')
if nacos_resp.status_code != 200:
logger.error('获取账号密码失败')
return None
config = nacos_resp.json()
username = config['user_name']
password = config['password']
logger.info(f"正在登录账户: {username}")
# 创建客户端并认证
client = httpx.Client(auth=BasicAuth(username, password))
# 发送登录请求
response = client.post('https://api.worldquantbrain.com/authentication')
logger.info(f"登录状态: {response.status_code}")
if response.status_code == 201:
logger.info("登录成功!")
logger.debug(response.json())
return client
else:
logger.error(f"登录失败: {response.json()}")
client.close()
return None
def submit_alpha(alpha_id):
"""
提交 alpha自动重试直到成功或达到最大重试次数
返回 True 表示提交成功False 表示最终失败
"""
TOTAL_RETRY_COUNT = 100000
retry_count = 0
client = None
while retry_count < TOTAL_RETRY_COUNT:
# 如果没有有效 client,则重新登录
if client is None:
client = login()
if client is None:
logger.error("登录失败,等待 10 秒后重试")
time.sleep(10)
retry_count += 1
continue
try:
url = f"https://api.worldquantbrain.com/alphas/{alpha_id}/submit"
logger.debug(f'url: {url}')
# 1. 发送提交请求
res = client.post(url)
# 处理特殊情况:已经提交过(需要轮询结果)
if res.status_code == 400 and "The plain HTTP request was sent to HTTPS port" in res.text:
logger.info("Alpha 已提交,正在轮询状态...")
# 轮询获取最终结果
poll_interval = 1.0 # 初始轮询间隔
while True:
time.sleep(poll_interval)
print(".", end="", flush=True)
# 使用 GET 请求查询当前状态
poll_res = client.get(url)
# 如果服务器返回了 Retry-After,则使用它
if "retry-after" in poll_res.headers:
poll_interval = max(float(poll_res.headers["retry-after"]), 3)
else:
poll_interval = 3 # 默认间隔
# 当状态不再是 400(处理中)时,退出轮询
if poll_res.status_code != 400 or "The plain HTTP request was sent to HTTPS port" not in poll_res.text:
res = poll_res
break
logger.info(f"轮询结束,最终状态码: {res.status_code}")
# 2. 处理各种状态码
if res.status_code == 429:
logger.info("触发限流 (429),休眠 60 秒后重试")
time.sleep(60)
retry_count += 1
continue
if res.status_code == 401:
logger.warning("认证失效,重新登录")
if client:
client.close()
client = None
retry_count += 1
continue
if res.status_code == 404:
logger.warning(f"Alpha {alpha_id} 不存在或超时,重试 ({retry_count+1}/{TOTAL_RETRY_COUNT})")
retry_count += 1
continue
if res.status_code // 100 == 5:
logger.warning(f"服务器错误 {res.status_code},5 秒后重试")
time.sleep(5)
retry_count += 1
continue
if res.status_code == 403:
logger.info(f"{alpha_id} 提交失败 (403)")
fail_checks = []
try:
checks = res.json()["is"]["checks"]
fail_checks = [x for x in checks if x.get("result") == "FAIL"]
except Exception as e:
logger.error(f"解析失败原因时出错: {e}")
logger.info(f"失败的检查项: {fail_checks}")
# 如果是提交次数超限,则直接退出,不再重试
if any(x.get("name") in ["REGULAR_SUBMISSION", "SUPER_SUBMISSION"] for x in fail_checks):
logger.error("提交次数超过限制,放弃重试")
return False
# 其他 403 错误也视为永久失败
return False
if res.status_code == 200:
logger.info(f"{alpha_id} 提交成功")
return True
# 其他非 2xx 状态码,视为未知错误,直接退出
logger.error(f"未处理的响应状态码 {res.status_code},放弃重试。响应内容: {res.text[:200]}")
return False
except httpx.RequestError as e:
logger.error(f"网络请求异常 (alpha_id={alpha_id}, retry={retry_count}): {e}")
retry_count += 1
time.sleep(10)
continue
except Exception as e:
logger.error(f"未预期的异常 (alpha_id={alpha_id}): {e}")
return False
# 超过最大重试次数
logger.error(f"达到最大重试次数 {TOTAL_RETRY_COUNT},提交失败")
return False
# 使用示例
if __name__ == "__main__":
# 测试提交一个 alpha
alpha_id = "your_alpha_id_here"
success = submit_alpha(alpha_id)
if success:
print("提交完成")
else:
print("提交失败")

@ -0,0 +1,166 @@
#!/usr/bin/env python3
"""
脚本功能读取 llm_idea.md 文件
检查 **Implementation Example** 中的数据字段是否缺少大括号
如果缺少则添加大括号然后回写文件
"""
import re
from pathlib import Path
def get_function_names():
"""获取已知的函数名列表"""
return {
'divide', 'subtract', 'add', 'abs', 'power',
'greater', 'greater_equal', 'equal', 'and', 'or', 'not',
'ts_delay', 'ts_mean', 'ts_std_dev', 'ts_sum', 'ts_backfill',
'group_rank', 'group_neutralize', 'zscore',
'days_from_last_change'
}
def get_keywords():
"""获取关键字列表"""
return {
'True', 'False', 'None', 'and', 'or', 'not', 'if', 'else',
'for', 'while', 'return', 'in', 'is', 'lambda'
}
def add_braces_to_expression(expression):
"""
给表达式中的数据字段添加大括号
使用更健壮的方法
1. 找出所有已经被大括号包围的部分标记为已处理
2. 找出所有未被包围的变量添加大括号
Args:
expression: 原始表达式字符串
Returns:
str: 添加了大括号后的表达式
"""
func_names = get_function_names()
keywords = get_keywords()
# 创建结果列表(用于构建最终字符串)
result = []
i = 0
length = len(expression)
while i < length:
# 如果当前字符是 '{',说明已经是大括号包围的内容,直接跳过到匹配的 '}'
if expression[i] == '{':
# 找到对应的 '}'
j = expression.find('}', i)
if j != -1:
# 保留原有的大括号内容
result.append(expression[i:j+1])
i = j + 1
else:
# 没有找到闭合的 '}',当作普通字符处理
result.append(expression[i])
i += 1
elif expression[i].isalpha() or expression[i] == '_':
# 可能是变量名的开始
j = i
while j < length and (expression[j].isalnum() or expression[j] == '_'):
j += 1
word = expression[i:j]
# 检查是否是变量(不是函数名、关键字、数字)
if (word and
not word[0].isdigit() and
word not in func_names and
word not in keywords):
# 这是一个变量,添加大括号
result.append('{' + word + '}')
else:
# 不是变量,保持原样
result.append(word)
i = j
else:
# 其他字符(数字、运算符、括号等),直接保留
result.append(expression[i])
i += 1
return ''.join(result)
def process_file(file_path):
"""
处理文件 Implementation Example 中的数据字段添加大括号
Args:
file_path: Path对象指向要处理的文件
Returns:
bool: 是否成功处理
"""
try:
content = file_path.read_text(encoding='utf-8')
except FileNotFoundError:
print(f"错误:找不到文件 {file_path}")
return False
except Exception as e:
print(f"读取文件时出错:{e}")
return False
# 正则匹配 **Implementation Example**: 后面的内容
pattern = r'(\*\*Implementation Example\*\*:\s*`(.*?)`)'
def replace_match(match):
"""替换函数:对匹配到的内容进行处理"""
prefix = match.group(1).split('`')[0] + '`' # **Implementation Example**: `
original_expr = match.group(2) # 表达式内容
suffix = '`' # 结束的 `
# 给表达式添加大括号
new_expr = add_braces_to_expression(original_expr)
return prefix + new_expr + suffix
# 执行替换
new_content = re.sub(pattern, replace_match, content, flags=re.DOTALL)
# 检查是否有变化
if new_content != content:
try:
file_path.write_text(new_content, encoding='utf-8')
print(f"✓ 成功更新文件:{file_path}")
return True
except Exception as e:
print(f"写入文件时出错:{e}")
return False
else:
print("✓ 文件无需更改(所有数据字段已有大括号)")
return True
def main():
# 获取当前目录下的 llm_idea.md 文件
current_dir = Path(__file__).parent
file_path = current_dir / "llm_idea.md"
print("=" * 80)
print("Implementation Example 字段大括号补全工具")
print("=" * 80)
print(f"\n正在处理文件:{file_path}\n")
# 处理文件
success = process_file(file_path)
if success:
print("\n" + "=" * 80)
print("处理完成!")
print("=" * 80)
else:
print("\n处理失败!")
if __name__ == "__main__":
main()

@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""
脚本功能读取当前目录下的 llm_idea.md 文件
匹配 **Implementation Example**: 后面的内容
并提取每个示例中函数调用的参数变量名
"""
import re
from pathlib import Path
def extract_implementation_examples(file_path):
"""
从markdown文件中提取所有Implementation Example的内容
Args:
file_path: Path对象指向要读取的文件
Returns:
list: 匹配到的所有示例内容列表
"""
try:
content = file_path.read_text(encoding='utf-8')
except FileNotFoundError:
print(f"错误:找不到文件 {file_path}")
return []
except Exception as e:
print(f"读取文件时出错:{e}")
return []
# 正则匹配 **Implementation Example**: 后面的内容
pattern = r'\*\*Implementation Example\*\*:\s*`(.*?)`'
matches = re.findall(pattern, content, re.DOTALL)
return matches
def extract_variables_from_expression(expression):
"""
从表达式中提取所有变量名函数参数中的变量
排除数字常量 1, 5, 20, 60
Args:
expression: 函数调用表达式字符串
Returns:
list: 提取到的变量名列表保持出现顺序
"""
# 匹配函数调用中的参数:函数名(参数1, 参数2, ...)
# 使用正则匹配所有在括号内、由逗号分隔的内容
variables = []
# 方法1:匹配所有函数调用括号内的内容
# 这个正则匹配函数名(参数列表)
func_call_pattern = r'(\w+)\s*\(([^()]*(?:\([^()]*\)[^()]*)*)\)'
def extract_from_text(text):
"""递归提取文本中的变量"""
# 查找所有函数调用
matches = re.finditer(func_call_pattern, text)
for match in matches:
func_name = match.group(1)
args_str = match.group(2)
# 分割参数(考虑嵌套括号)
args = split_args_keeping_nesting(args_str)
# 处理每个参数
for arg in args:
arg = arg.strip()
# 如果参数是数字,跳过
if re.match(r'^-?\d+(\.\d+)?$', arg):
continue
# 如果参数是函数调用,递归提取
if '(' in arg and ')' in arg:
extract_from_text(arg)
# 如果参数是变量名(字母、数字、下划线组成,以字母或下划线开头)
elif re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', arg):
if arg not in variables: # 去重但保持顺序
variables.append(arg)
# 如果是复杂表达式(包含运算符),也尝试提取其中的变量
elif any(op in arg for op in ['+', '-', '*', '/', '>', '<', '=']):
extract_variables_from_complex_expression(arg)
def split_args_keeping_nesting(args_str):
"""
分割函数参数考虑嵌套的括号
例如: "a, b, c" -> ['a', 'b', 'c']
"func(a,b), c" -> ['func(a,b)', 'c']
"""
args = []
current_arg = []
paren_count = 0
bracket_count = 0 # 方括号
brace_count = 0 # 花括号
for char in args_str:
if char == ',' and paren_count == 0 and bracket_count == 0 and brace_count == 0:
args.append(''.join(current_arg).strip())
current_arg = []
else:
current_arg.append(char)
if char == '(':
paren_count += 1
elif char == ')':
paren_count -= 1
elif char == '[':
bracket_count += 1
elif char == ']':
bracket_count -= 1
elif char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if current_arg:
args.append(''.join(current_arg).strip())
return args
def extract_variables_from_complex_expression(expr):
"""从复杂表达式中提取变量(如 a + b * c)"""
# 匹配变量名(字母或下划线开头,后面跟字母、数字、下划线)
var_pattern = r'\b[a-zA-Z_][a-zA-Z0-9_]*\b'
# 排除常见的函数名和关键字
keywords = {'and', 'or', 'not', 'if', 'else', 'for', 'while', 'return',
'True', 'False', 'None', 'in', 'is', 'lambda'}
for match in re.finditer(var_pattern, expr):
var = match.group()
if var not in keywords and not re.match(r'^\d+$', var):
if var not in variables:
variables.append(var)
# 开始提取
extract_from_text(expression)
# 如果上面没提取到,尝试直接匹配简单变量(没有函数调用的表达式)
if not variables:
# 匹配简单的变量名
simple_var_pattern = r'\b[a-zA-Z_][a-zA-Z0-9_]*\b'
keywords = {'and', 'or', 'not', 'if', 'else', 'for', 'while', 'return',
'True', 'False', 'None', 'divide', 'subtract', 'add', 'abs',
'greater', 'equal', 'ts_delay', 'ts_mean', 'ts_std_dev',
'count_bias_adjusted_price_target_estimates', 'group_rank',
'zscore', 'days_from_last_change', 'ts_sum', 'power'}
for match in re.finditer(simple_var_pattern, expression):
var = match.group()
# 排除数字和函数名
if (not re.match(r'^\d+$', var) and
var not in keywords and
var not in ['and', 'or', 'not']):
if var not in variables:
variables.append(var)
return variables
def main():
# 使用Path模块获取当前文件路径
current_dir = Path.cwd()
file_path = current_dir / "llm_idea.md"
print(f"正在读取文件:{file_path}")
print("=" * 80)
# 提取所有Implementation Example
examples = extract_implementation_examples(file_path)
if not examples:
print("未找到匹配的 **Implementation Example**: `...` 内容")
return
print(f"找到 {len(examples)} 个 Implementation Example:\n")
# 遍历每个示例
for idx, example in enumerate(examples, 1):
print(f"{'='*80}")
print(f"示例 {idx}:")
print(f"{'-'*40}")
print(f"表达式: {example}")
print(f"{'-'*40}")
# 提取变量名
variables = extract_variables_from_expression(example)
if variables:
print(f"提取到的变量 ({len(variables)} 个):")
for var_idx, var in enumerate(variables, 1):
print(f" {var_idx}. {var}")
else:
print("未提取到变量名")
print() # 空行分隔
print("=" * 80)
print(f"处理完成!共处理 {len(examples)} 个示例")
if __name__ == "__main__":
main()

@ -0,0 +1,364 @@
# biasfree_analyst Feature Engineering Analysis Report
**Dataset**: biasfree_analyst
**Category**: Analyst
**Region**: USA
**Analysis Date**: 2026-04-09
**Fields Analyzed**: 54
---
## Executive Summary
**Primary Question Answered by Dataset**: How do analysts' bias-adjusted forecasts (price targets and fundamentals) vary across multiple "analogues" (bias removal methods), and what do these variations reveal about uncertainty, consensus strength, and potential mispricing?
**Key Insights from Analysis**:
- This dataset is unique because it provides multiple "bias-free analogues" (first, second, third) for the same underlying metric, rather than just a single consensus or raw value. This allows us to measure the *stability* of the bias-adjustment process itself.
- The presence of standard deviation fields for each analogue group allows for direct measurement of *disagreement* among bias correction methodologies, which is a novel proxy for forecast ambiguity.
- Revision counts (upward/downward) provide a dynamic signal of how the "clean" view of analysts is changing, stripped of systematic optimism or pessimism.
**Critical Field Relationships Identified**:
- The `_first_`, `_second_`, and `_third_` biasfree analogues represent different statistical approaches to removing bias. Comparing them reveals the sensitivity of the forecast to the choice of bias model.
- `mean_` vs `median_` fields within the same analogue group highlight the skewness of the distribution of bias-adjusted estimates.
- `stddev_` fields serve as direct measures of cross-analyst (or cross-model) uncertainty for the bias-free view.
**Most Promising Feature Concepts**:
1. **Bias Adjustment Fragility (Dispersion of Analogues)** - because it quantifies how much the "true" forecast changes depending on the specific bias-correction technique used.
2. **Bias-Free Revision Momentum (Up-Down Ratio)** - because it isolates the directional change in analyst conviction *after* removing systematic biases.
3. **Bias-Free Target Dispersion Ratio (Uncertainty-Adjusted Upside)** - because it evaluates the risk-adjusted upside implied by bias-free price targets.
---
## Dataset Deep Understanding
### Dataset Description
This dataset contains bias-adjusted analyst estimates for price targets and fundamentals. Unlike standard consensus data, it provides multiple "bias-free analogues" (first, second, third) generated by different statistical models. It also includes distribution statistics (mean, median, stddev, min, max, count) and revision counts for these bias-free metrics. The goal is to provide a cleaner, less behaviorally skewed view of analyst expectations.
### Field Inventory
| Field ID | Description | Data Type | Update Frequency | Coverage |
|----------|-------------|-----------|------------------|----------|
| `biasfree_analyst_price_target` | Single analyst's bias-adjusted price target | Float | Event-driven | Moderate |
| `biasfree_analyst_fundamental_estimate` | Single analyst's bias-adjusted fundamental | Float | Event-driven | Moderate |
| `mean_bias_adjusted_price_target` | Mean of bias-adjusted price target estimates | Float | Event-driven | High |
| `mean_bias_adjusted_fundamental_estimate` | Mean of bias-adjusted fundamental estimates | Float | Event-driven | High |
| `median_bias_adjusted_price_target` | Median of bias-adjusted price target estimates | Float | Event-driven | High |
| `stddev_bias_adjusted_price_target` | Standard deviation of bias-adjusted price targets | Float | Event-driven | High |
| `num_upward_biasfree_price_target_revisions` | Count of upward bias-free PT revisions | Integer | Event-driven | Moderate |
| `num_downward_biasfree_price_target_revisions` | Count of downward bias-free PT revisions | Integer | Event-driven | Moderate |
| `avg_first_biasfree_price_target_estimate` | Average of first bias-free PT analogue | Float | Event-driven | High |
| `avg_second_biasfree_price_target_estimate` | Average of second bias-free PT analogue | Float | Event-driven | High |
| `avg_third_biasfree_price_target_estimate` | Average of third bias-free PT analogue | Float | Event-driven | High |
| `forecast_horizon_months` | Time horizon in months for the estimate | Integer | Static | High |
*(Note: Only representative fields shown for brevity; analysis encompasses all 54 fields.)*
### Field Deconstruction Analysis
#### biasfree_analyst_price_target: Bias-Adjusted Analyst Price Target
- **What is being measured?**: A single analyst's price target after removing statistical bias (e.g., over-optimism).
- **How is it measured?**: Raw analyst target processed through a bias-correction model.
- **Time dimension**: Point-in-time snapshot (Event).
- **Business context**: Raw analyst targets are notoriously optimistic; this field aims to provide a "truer" expectation of future price.
- **Generation logic**: Proprietary bias model applied to raw data.
- **Reliability considerations**: Depends heavily on the accuracy of the bias model. Missing values mean no estimate was made or the bias model couldn't be applied.
#### avg_first_biasfree_price_target_estimate: First Bias-Free Analogue Mean
- **What is being measured?**: The consensus (mean) of analyst estimates after applying the *first* specific bias-correction methodology.
- **How is it measured?**: Average of all `biasfree_analyst_price_target` values generated using "Model 1".
- **Time dimension**: Point-in-time snapshot (Event).
- **Business context**: Represents the "clean" view of the street using one specific debiasing lens.
- **Generation logic**: Cross-sectional mean calculation.
- **Reliability considerations**: Outliers (single extreme analysts) can skew the mean.
#### stddev_first_biasfree_price_target_estimate: Dispersion of First Analogue
- **What is being measured?**: The level of disagreement among analysts *after* applying the first bias-correction model.
- **How is it measured?**: Standard deviation of the `avg_first_biasfree_price_target_estimate` component inputs.
- **Time dimension**: Point-in-time snapshot (Event).
- **Business context**: High standard deviation indicates that even after removing common bias, analysts strongly disagree on valuation.
- **Generation logic**: Cross-sectional standard deviation.
- **Reliability considerations**: Requires a minimum number of estimates (count) to be statistically meaningful.
#### num_upward_biasfree_price_target_revisions: Bias-Free Optimism Flow
- **What is being measured?**: The number of analysts who raised their *bias-adjusted* price target.
- **How is it measured?**: Count of events where current bias-adjusted target > previous bias-adjusted target.
- **Time dimension**: Cumulative over a period (Event count).
- **Business context**: Distinguishes between "analyst getting more bullish" and "analyst just being less biased." A rise here signals genuine improvement in the *debiased* outlook.
- **Generation logic**: Event tracking and comparison.
- **Reliability considerations**: Zeros can mean no revisions or no coverage.
### Field Relationship Mapping
**The Story This Data Tells**:
This data tells the story of *consensus fragility* and *true conviction*. It doesn't just ask "What is the forecast?" but "How much does that forecast depend on *how* we clean the data?" and "How confident are analysts in the cleaned data?" The multiple analogues (`first_`, `second_`, `third_`) allow us to see the variance in the output of the data cleaning pipeline itself.
**Key Relationships Identified**:
1. **Analogue Convergence/Divergence**: The spread between `avg_first`, `avg_second`, and `avg_third` biasfree estimates indicates the sensitivity of the "fair value" to the statistical debiasing technique. A large spread implies the valuation is highly dependent on the model assumption (High Uncertainty).
2. **Cross-Analyst Disagreement**: The `stddev_` fields measure how much individual analysts disagree *even after* removing their collective biases. High StdDev = High Disagreement = High Risk.
3. **Directional Pressure**: The ratio of `num_upward` to `num_downward` revisions shows the vector of change in the *bias-free* consensus. This is a leading indicator of changes in "smart money" expectations.
**Missing Pieces That Would Complete the Picture**:
- **The Specific Bias Models**: Knowing if "first" is a simple industry adjustment and "third" is a complex ML model would add context.
- **Historical Timestamps**: We have the fields, but knowing the exact date of each revision/release is crucial for backtesting (implied by `delay=1`, but field-level dates are opaque here).
- **Actual Reported Fundamentals**: To calculate the "surprise" of the bias-free estimate vs. reality.
---
## Feature Concepts by Question Type
### Q1: "What is stable?" (Invariance Features)
**Concept**: Bias Adjustment Fragility Score
- **Sample Fields Used**: `avg_first_biasfree_price_target_estimate`, `avg_second_biasfree_price_target_estimate`, `avg_third_biasfree_price_target_estimate`
- **Definition**: The coefficient of variation across the three distinct bias-free price target analogues. Formula: `stddev(analogue1, analogue2, analogue3) / mean(analogue1, analogue2, analogue3)`.
- **Why This Feature**: It answers: "Is the fair value estimate robust to the choice of debiasing technique?" If the answer is no (high fragility), the stock's valuation is highly subjective and likely prone to larger price swings on news.
- **Logical Meaning**: Measures the model risk inherent in the analyst consensus. A fragile stock is one where quants cannot agree on what the "clean" number even is.
- **Is filling nan necessary**: Yes. If only one analogue exists, fragility is undefined. We should fill NaN with 0 (meaning no evidence of fragility) or use a neutral value. Better yet, mask the feature where `count_bias_adjusted_price_target_estimates` < 2.
- **Directionality**: High Value = High Fragility/Model Risk (Potentially bearish/risky). Low Value = Robust Consensus (Potentially safer/more reliable).
- **Boundary Conditions**: Extremely high values indicate the bias correction methods contradict each other violently (one says buy, one says sell).
- **Implementation Example**: `divide({stddev_analogues}, abs({mean_analogues}))` where the inputs are the three average fields.
**Concept**: Fundamental Estimate Robustness Ratio
- **Sample Fields Used**: `median_bias_adjusted_fundamental_estimate`, `min_bias_adjusted_fundamental_estimate`, `max_bias_adjusted_fundamental_estimate`
- **Definition**: The ratio of the interquartile range or full range of bias-adjusted fundamental estimates relative to the median. Proxy: `(max_bias_adjusted_fundamental_estimate - min_bias_adjusted_fundamental_estimate) / abs(median_bias_adjusted_fundamental_estimate)`.
- **Why This Feature**: Similar to the above but for fundamentals (EPS, Sales). High range means analysts wildly disagree on the upcoming fundamental performance *even after debiasing*.
- **Logical Meaning**: Measures uncertainty about the company's near-term operational reality.
- **Is filling nan necessary**: Yes. Use `group_mean` backfill or 0 if range is undefined (only 1 estimate).
- **Directionality**: High Value = High Earnings Uncertainty. Low Value = High Earnings Visibility.
- **Boundary Conditions**: Infinite if median is 0. Cap at a reasonable threshold (e.g., 10).
- **Implementation Example**: `divide(subtract({max_bias_adjusted_fundamental_estimate}, {min_bias_adjusted_fundamental_estimate}), abs({median_bias_adjusted_fundamental_estimate}))`
---
### Q2: "What is changing?" (Dynamics Features)
**Concept**: Bias-Free Revision Momentum (PT)
- **Sample Fields Used**: `num_upward_biasfree_price_target_revisions`, `num_downward_biasfree_price_target_revisions`
- **Definition**: The net directional flow of bias-free price target changes. Formula: `(Up - Down) / (Up + Down + 1)`.
- **Why This Feature**: Raw revision ratios are often skewed by analyst optimism. Since this is *bias-free* revisions, a positive momentum signals genuine improvement in the clean data signal, not just behavioral bias.
- **Logical Meaning**: Net directional conviction of the bias-corrected analyst community.
- **Is filling nan necessary**: Yes. Use `ts_backfill` or `0` if no revisions. The `+1` in denominator prevents division by zero.
- **Directionality**: High Positive = Strong Bias-Free Upward Momentum (Bullish). High Negative = Strong Bias-Free Downward Momentum (Bearish).
- **Boundary Conditions**: Values near +1 or -1 indicate unanimous revision direction in the recent period.
- **Implementation Example**: `divide(subtract({num_upward_biasfree_price_target_revisions}, {num_downward_biasfree_price_target_revisions}), add({num_upward_biasfree_price_target_revisions}, {num_downward_biasfree_price_target_revisions}, 1))`
**Concept**: Bias-Free Earnings Momentum Change
- **Sample Fields Used**: `num_upward_biasfree_fundamental_revisions`, `num_downward_biasfree_fundamental_revisions`
- **Definition**: The change in the Bias-Free Revision Momentum (calculated above) over a short window (e.g., 5 days). `momentum_today - momentum_5_days_ago`.
- **Why This Feature**: Captures the *acceleration* or *deceleration* of bias-free sentiment. A shift from negative to positive momentum is a powerful turnaround signal.
- **Logical Meaning**: The rate of change of clean analyst conviction.
- **Is filling nan necessary**: Yes. Use `ts_backfill` for missing historical momentum values.
- **Directionality**: Positive Change = Improving Bias-Free Outlook. Negative Change = Deteriorating Bias-Free Outlook.
- **Boundary Conditions**: Requires sufficient revision volume. Noisy on illiquid stocks.
- **Implementation Example**: `subtract({momentum}, ts_delay({momentum}, 5))`
---
### Q3: "What is anomalous?" (Deviation Features)
**Concept**: Bias-Free Consensus Divergence
- **Sample Fields Used**: `biasfree_analyst_price_target`, `median_bias_adjusted_price_target`, `stddev_bias_adjusted_price_target`
- **Definition**: The z-score of the current *median* bias-adjusted price target relative to its own 20-day history. `(median_pt - ts_mean(median_pt, 20)) / ts_std_dev(median_pt, 20)`.
- **Why This Feature**: Detects when the "clean" consensus view of fair value has moved significantly away from its recent range. This is a structural shift in how quants/modelers view the stock.
- **Logical Meaning**: A breakout or breakdown in the bias-free valuation framework.
- **Is filling nan necessary**: Yes. Backfill with `ts_backfill` for recent gaps. Mask if `ts_std_dev` is 0.
- **Directionality**: High Z-Score = Bias-Free Target has spiked up significantly (Bullish momentum). Low Z-Score = Bias-Free Target has crashed (Bearish momentum).
- **Boundary Conditions**: Extreme values (>3 or <-3) indicate a potential regime change or data error.
- **Implementation Example**: `divide(subtract({median_bias_adjusted_price_target}, ts_mean({median_bias_adjusted_price_target}, 20)), ts_std_dev({median_bias_adjusted_price_target}, 20))`
**Concept**: Analyst Silent Treatment (Zero Revision Anomaly)
- **Sample Fields Used**: `num_upward_biasfree_price_target_revisions`, `num_downward_biasfree_price_target_revisions`, `count_bias_adjusted_price_target_estimates`
- **Definition**: A binary flag identifying stocks with high coverage (`count > 5`) but zero bias-free revisions (`up = 0 AND down = 0`) for a sustained period (e.g., 10 days).
- **Why This Feature**: If many analysts cover a stock but NO ONE is changing their bias-adjusted view, it signals extreme uncertainty or a "wait and see" mode preceding a major event (earnings, FDA approval). It's the calm before the storm.
- **Logical Meaning**: Information vacuum or gridlock in the professional forecasting community.
- **Is filling nan necessary**: No. We use logical operators to create a binary flag. NaN in counts/revisions should be treated as 0 (no data = no signal).
- **Directionality**: Flag = 1 indicates an anomaly (Potential for high volatility breakout).
- **Boundary Conditions**: Avoid flagging small caps with 1 or 2 analysts.
- **Implementation Example**: `and(greater({count_bias_adjusted_price_target_estimates}, 5), equal(add({num_upward_biasfree_price_target_revisions}, {num_downward_biasfree_price_target_revisions}), 0))`
---
### Q4: "What is combined?" (Interaction Features)
**Concept**: Uncertainty-Adjusted Price Target Upside
- **Sample Fields Used**: `median_bias_adjusted_price_target`, `stddev_bias_adjusted_price_target`, `close` (External Data)
- **Definition**: The implied return to the bias-free price target, penalized by the dispersion of those estimates. Formula: `(Target / Price - 1) / (1 + CoV_Target)`.
- **Why This Feature**: A stock with 20% upside but high disagreement among bias-adjusted models is riskier than a stock with 10% upside and tight agreement. This metric favors high-conviction, low-uncertainty opportunities.
- **Logical Meaning**: Risk-adjusted expected return based solely on the bias-free analyst view.
- **Is filling nan necessary**: Yes. Fill `stddev` with mean if missing, or mask. Fill `Price` with `ts_backfill`.
- **Directionality**: High Value = Attractive Risk/Reward based on clean analyst data.
- **Boundary Conditions**: Negative values mean target is below current price (Downside).
- **Implementation Example**: `divide(subtract(divide({median_bias_adjusted_price_target}, {price}), 1), add(1, divide({stddev_bias_adjusted_price_target}, abs({median_bias_adjusted_price_target}))))`
**Concept**: Bias-Free Earnings Visibility Score
- **Sample Fields Used**: `median_bias_adjusted_fundamental_estimate`, `stddev_bias_adjusted_fundamental_estimate`, `count_bias_adjusted_fundamental_estimates`
- **Definition**: A composite score measuring the "cleanliness" and "strength" of the fundamental forecast. Formula: `Count / (1 + (StdDev / Median))`.
- **Why This Feature**: High count + Low dispersion = High visibility. Low count + High dispersion = Low visibility. This distills the quality of the earnings signal into one number.
- **Logical Meaning**: A measure of how reliable the bias-adjusted earnings forecast is.
- **Is filling nan necessary**: Yes. Cap denominator at some max. Treat missing `median` as 0.
- **Directionality**: High Value = High Visibility/Reliability. Low Value = Garbage In, Garbage Out.
- **Boundary Conditions**: Very high scores indicate "obvious" earnings stories (low alpha potential due to efficiency). Very low scores indicate "speculative" stories (high risk/reward).
- **Implementation Example**: `divide({count_bias_adjusted_fundamental_estimates}, add(1, divide({stddev_bias_adjusted_fundamental_estimate}, abs({median_bias_adjusted_fundamental_estimate}))))`
---
### Q5: "What is structural?" (Composition Features)
**Concept**: Model Dependency Ratio (First vs. Third Analogue)
- **Sample Fields Used**: `avg_first_biasfree_price_target_estimate`, `avg_third_biasfree_price_target_estimate`
- **Definition**: The ratio of the First Analogue Mean to the Third Analogue Mean. `First_Mean / Third_Mean`.
- **Why This Feature**: If the first analogue (presumably simpler) and third analogue (presumably complex/ML) diverge significantly, it indicates a stock whose valuation is highly sensitive to complex model specifications. This is a proxy for "Quant Complexity Risk."
- **Logical Meaning**: Measures how much the "fair value" estimate changes when using a sophisticated bias model vs. a basic one.
- **Is filling nan necessary**: Yes. Fill missing analogues with the median of the available ones.
- **Directionality**: Value >> 1.0 = Complex model values stock much lower (Model Risk). Value << 1.0 = Complex model values stock much higher (Model Speculation).
- **Boundary Conditions**: Values near 1.0 indicate model stability.
- **Implementation Example**: `divide({avg_first_biasfree_price_target_estimate}, {avg_third_biasfree_price_target_estimate})`
**Concept**: Target Horizon Skew Indicator
- **Sample Fields Used**: `forecast_horizon_months`, `median_bias_adjusted_price_target`
- **Definition**: The ratio of the median price target to the current price, annualized by the forecast horizon. `(Target/Price)^(12/Horizon) - 1`.
- **Why This Feature**: Normalizes the price target return for time. A 20% return over 24 months is less impressive than a 15% return over 6 months.
- **Logical Meaning**: Annualized expected return derived from bias-free price targets.
- **Is filling nan necessary**: Yes. If `forecast_horizon_months` is missing, assume 12 months.
- **Directionality**: High Value = High annualized expected return.
- **Boundary Conditions**: Very short horizons (1 month) with extreme targets can produce unrealistic annualized figures. Cap at 1000%.
- **Implementation Example**: `subtract(power(divide({median_bias_adjusted_price_target}, {price}), divide(12, {forecast_horizon_months})), 1)`
---
### Q6: "What is cumulative?" (Accumulation Features)
**Concept**: Cumulative Bias-Free Revision Imbalance
- **Sample Fields Used**: `num_upward_biasfree_price_target_revisions`, `num_downward_biasfree_price_target_revisions`
- **Definition**: The cumulative sum of the net revision count (Up - Down) over a trailing 60-day window. `ts_sum(up - down, 60)`.
- **Why This Feature**: Smooths out the daily noise in revision counts to reveal the medium-term trend in bias-free sentiment. A consistently positive imbalance over 60 days is a strong bull signal.
- **Logical Meaning**: The accumulated pressure of bias-free analyst conviction.
- **Is filling nan necessary**: Yes. Treat NaN revisions as 0 in the sum.
- **Directionality**: High Positive = Sustained Bias-Free Optimism. High Negative = Sustained Bias-Free Pessimism.
- **Boundary Conditions**: Reversal patterns occur when cumulative sum peaks and rolls over.
- **Implementation Example**: `ts_sum(subtract({num_upward_biasfree_price_target_revisions}, {num_downward_biasfree_price_target_revisions}), 60)`
**Concept**: Bias-Free Estimate Convergence Countdown
- **Sample Fields Used**: `stddev_bias_adjusted_fundamental_estimate`, `count_bias_adjusted_fundamental_estimates`
- **Definition**: A time decay feature that counts the number of days since the `stddev_bias_adjusted_fundamental_estimate` last widened significantly.
- **Why This Feature**: As earnings announcement approaches, uncertainty (StdDev) should drop as information is disseminated. If StdDev remains high *and* we are close to the announcement date, it signals a high-probability surprise event.
- **Logical Meaning**: Measures the failure of the market to resolve uncertainty before a known catalyst.
- **Is filling nan necessary**: Yes. Use `ts_backfill`.
- **Directionality**: High Days Count + High Current StdDev = Elevated Risk of Earnings Surprise.
- **Boundary Conditions**: Requires knowledge of earnings calendar (external data) for best accuracy.
- **Implementation Example**: `days_from_last_change({stddev_bias_adjusted_fundamental_estimate})`
---
### Q7: "What is relative?" (Comparison Features)
**Concept**: Bias-Free Target vs. Sector Median
- **Sample Fields Used**: `median_bias_adjusted_price_target`
- **Definition**: The cross-sectional rank of the bias-free price target upside within its sector (requires external sector mapping). `group_rank(upside, sector)`.
- **Why This Feature**: A high bias-free target is only meaningful if it's higher than peers. This identifies stocks where the *clean data* suggests relative outperformance within a sector.
- **Logical Meaning**: Relative attractiveness of the bias-free valuation.
- **Is filling nan necessary**: Yes. Use `group_neutralize` or `group_rank`.
- **Directionality**: High Rank (0.8-1.0) = Top relative bias-free upside. Low Rank (0.0-0.2) = Bottom relative bias-free upside.
- **Boundary Conditions**: Sectors with few stocks will have noisy ranks.
- **Implementation Example**: `group_rank(divide({median_bias_adjusted_price_target}, {price}), {sector})`
**Concept**: Bias-Free Fundamental vs. Historical Actual
- **Sample Fields Used**: `median_bias_adjusted_fundamental_estimate`, `eps_actual_ttm` (External Data)
- **Definition**: The ratio of the bias-free fundamental estimate to the trailing twelve-month actual fundamental. `Estimate / Actual`.
- **Why This Feature**: Shows the expected growth/decline in fundamentals, stripped of analyst bias. A high ratio suggests strong expected operational growth.
- **Logical Meaning**: Bias-adjusted expected growth rate.
- **Is filling nan necessary**: Yes. Backfill actuals.
- **Directionality**: High Value = High Expected Fundamental Growth.
- **Boundary Conditions**: Extreme values may indicate one-time items or data errors in the "Actual" field.
- **Implementation Example**: `divide({median_bias_adjusted_fundamental_estimate}, {eps_actual_ttm})`
---
### Q8: "What is essential?" (Essence Features)
**Concept**: Bias-Free Alpha Signal Strength
- **Sample Fields Used**: `mean_bias_adjusted_price_target`, `stddev_bias_adjusted_price_target`, `num_upward_biasfree_price_target_revisions`, `num_downward_biasfree_price_target_revisions`
- **Definition**: A composite z-score of the three core components of this dataset: (1) Implied Upside, (2) Estimate Dispersion, (3) Revision Momentum. Combined into a single score.
- **Why This Feature**: This distills the entire dataset into one clean alpha signal. It answers: "Based on *all* the bias-free data, how bullish or bearish is the clean signal?"
- **Logical Meaning**: The holistic, model-free (ironically) summary of the bias-free analyst view.
- **Is filling nan necessary**: Yes. Each component z-score should be normalized cross-sectionally.
- **Directionality**: High Positive = Strong Bias-Free Bullish Signal. High Negative = Strong Bias-Free Bearish Signal.
- **Boundary Conditions**: This is the core trading signal derived from the dataset.
- **Implementation Example**: `zscore({upside}) - zscore({dispersion}) + zscore({momentum})`
**Concept**: Bias-Free Data Quality Flag
- **Sample Fields Used**: `count_bias_adjusted_price_target_estimates`, `count_bias_adjusted_fundamental_estimates`
- **Definition**: A binary mask: 1 if `count_pt >= 3 AND count_fund >= 3`, else 0.
- **Why This Feature**: All derived features from this dataset are statistically meaningless if the underlying sample size is too small. This flag ensures we only trade on robust data.
- **Logical Meaning**: Minimum Viable Data Threshold for Bias-Free Analysis.
- **Is filling nan necessary**: No. Treat NaN counts as 0.
- **Directionality**: 1 = Reliable Data. 0 = Unreliable Data.
- **Boundary Conditions**: This should be used as a filter `trade_when` condition.
- **Implementation Example**: `and(greater_equal({count_bias_adjusted_price_target_estimates}, 3), greater_equal({count_bias_adjusted_fundamental_estimates}, 3))`
---
## Implementation Considerations
### Data Quality Notes
- **Coverage**: Moderate to High for TOP200 universe. Smaller cap stocks may have sparse or missing analyst coverage.
- **Timeliness**: Event-driven. Data updates when analysts publish or revise estimates. There can be gaps of weeks with no new data.
- **Accuracy**: Depends on the proprietary bias-correction models used by the data vendor. The "truth" of the bias correction is unobservable.
- **Potential Biases**: Survivorship bias (analysts drop coverage of failing companies). Model bias (the bias-correction models themselves may have systematic errors).
### Computational Complexity
- **Lightweight features**: Ratio calculations, logical flags, simple differences.
- **Medium complexity**: Rolling Z-scores (`ts_zscore`), cumulative sums (`ts_sum`).
- **Heavy computation**: Cross-sectional group ranks and neutralizations (`group_rank`, `group_neutralize`).
### Recommended Prioritization
**Tier 1 (Immediate Implementation)**:
1. **Bias Adjustment Fragility Score** - Unique differentiator of this dataset.
2. **Bias-Free Revision Momentum (PT)** - Direct, clean alpha signal.
3. **Bias-Free Data Quality Flag** - Essential filter for all other features.
**Tier 2 (Secondary Priority)**:
1. **Uncertainty-Adjusted Price Target Upside** - Combines signal with risk.
2. **Fundamental Estimate Robustness Ratio** - Checks earnings visibility.
**Tier 3 (Requires Further Validation)**:
1. **Analyst Silent Treatment** - Interesting anomaly but needs backtest validation.
---
## Critical Questions for Further Exploration
### Unanswered Questions:
1. What is the exact statistical difference between the `first`, `second`, and `third` bias-free analogues? (e.g., Linear Regression vs. Neural Net vs. Bayesian).
2. What is the average decay rate of a bias-free revision? Does it predict returns for 5 days or 50 days?
3. Are there specific sectors where bias-free data is most predictive (e.g., Tech) and others where it fails (e.g., Utilities)?
### Recommended Additional Data:
- **Sector/Industry Classification**: Required for cross-sectional relative value features.
- **Actual Earnings Announcement Dates**: To align estimates with reality and measure "Bias-Free Surprise."
- **Historical Stock Prices**: Required for all return/upside calculations.
### Assumptions to Challenge:
- **Assumption**: "Bias-free" means "Better." We should challenge if removing bias removes a *predictive* signal (e.g., some biases are self-fulfilling prophecies).
- **Assumption**: All analogues are equally valid. The market may favor one bias-correction method over another.
---
## Methodology Notes
**Analysis Approach**: This report was generated by:
1. Deep field deconstruction to understand data essence (Multiple bias-correction analogues).
2. Question-driven feature generation (8 fundamental questions).
3. Logical validation of each feature concept.
4. Transparent documentation of reasoning.
**Design Principles**:
- Focus on logical meaning over conventional patterns.
- Every feature must answer a specific question.
- Clear documentation of "why" for each suggestion.
- Emphasis on data understanding over prediction.
---
*Report generated: 2026-04-09*
*Analysis depth: Comprehensive field deconstruction + 8-question framework*
*Next steps: Implement Tier 1 features, validate assumptions, gather additional data as needed*

File diff suppressed because it is too large Load Diff

@ -0,0 +1,366 @@
# -*- coding: utf-8 -*-
import json
import httpx
import random
import time
from datetime import datetime, timedelta
class OdooClient:
"""Odoo JSON-RPC 客户端类"""
def __init__(self, url, db_name, username, password):
"""
初始化 Odoo 客户端并自动登录
Args:
url: Odoo 服务器地址
db_name: 数据库名称
username: 用户名
password: 密码
"""
self.url = url
self.db_name = db_name
self.username = username
self.password = password
self.client = None
self.uid = None
# 自动登录
self.login()
def login(self):
"""登录并获取 uid 和 client"""
try:
self.client = httpx.Client(timeout=30.0)
# Odoo 登录
payload = {
"jsonrpc": "2.0",
"method": "call",
"params": {
"service": "common",
"method": "login",
"args": [self.db_name, self.username, self.password]
},
"id": 1
}
response = self.client.post(f"{self.url}/jsonrpc", json=payload)
result = response.json()
# 检查是否有错误
if "error" in result:
raise Exception(f"登录失败: {result['error']}")
# Odoo 的登录响应中,result 直接就是 uid
self.uid = result.get("result")
if not self.uid:
raise Exception("登录失败:未获取到UID")
print(f"登录成功,UID: {self.uid}")
return True
except Exception as e:
print(f"登录失败: {e}")
if self.client:
self.client.close()
self.client = None
self.uid = None
raise
def logout(self):
"""退出登录并关闭连接"""
if self.client:
self.client.close()
self.client = None
self.uid = None
print("已退出")
def search_data(self, model, domain, fields=None, order=None, limit=None):
"""
通用搜索方法
Args:
model: 模型名称
domain: 搜索条件列表
fields: 需要返回的字段列表
order: 排序规则
limit: 返回记录数量限制
Returns:
查询结果列表失败返回 None
"""
if not self.client or not self.uid:
raise Exception("未登录或连接已断开")
# 构建参数
args = [domain]
if fields:
args.append(fields)
kwargs = {}
if order:
kwargs['order'] = order
if limit:
kwargs['limit'] = limit
payload = {
"jsonrpc": "2.0",
"method": "call",
"params": {
"service": "object",
"method": "execute_kw",
"args": [
self.db_name,
self.uid,
self.password,
model,
"search_read",
args,
kwargs
]
},
"id": 2
}
try:
response = self.client.post(f"{self.url}/jsonrpc", json=payload)
result = response.json()
if "error" in result:
print(f"查询失败: {result['error']}")
return None
return result.get("result", [])
except Exception as e:
print(f"查询异常: {e}")
return None
def write_data(self, model, record_id, values):
"""
更新记录
Args:
model: 模型名称
record_id: 记录ID
values: 要更新的字段值字典
Returns:
是否更新成功
"""
if not self.client or not self.uid:
raise Exception("未登录或连接已断开")
payload = {
"jsonrpc": "2.0",
"method": "call",
"params": {
"service": "object",
"method": "execute_kw",
"args": [
self.db_name,
self.uid,
self.password,
model,
"write",
[[record_id], values]
]
},
"id": 3
}
try:
response = self.client.post(f"{self.url}/jsonrpc", json=payload)
result = response.json()
if "error" in result:
print(f"更新失败: {result['error']}")
return False
return result.get("result", False)
except Exception as e:
print(f"更新异常: {e}")
return False
def __enter__(self):
"""上下文管理器入口"""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""上下文管理器出口,自动登出"""
self.logout()
class SimpleAlphaFetcher:
def __init__(self):
"""
初始化 Alpha 获取器
"""
self.client = None
self.login()
def login(self):
"""登录 WorldQuant Brain API"""
try:
# 从 nacos 获取账号密码
with httpx.Client(timeout=10.0) as temp_client:
nacos_resp = temp_client.get(
'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
)
if nacos_resp.status_code != 200:
print('获取账号密码失败')
return False
config = nacos_resp.json()
username = config.get('user_name')
password = config.get('password')
if not username or not password:
print('账号密码不完整')
return False
print(f"正在登录账户: {username}")
# 创建客户端并设置超时
timeout = httpx.Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
self.client = httpx.Client(
auth=httpx.BasicAuth(username, password),
timeout=timeout
)
# 发送登录请求
response = self.client.post('https://api.worldquantbrain.com/authentication')
if response.status_code == 201:
print("登录成功!")
return True
else:
print(f"登录失败: {response.status_code} - {response.text}")
self.client.close()
self.client = None
return False
except Exception as e:
print(f"登录异常: {e}")
return False
def get_alpha_detail(self, alpha_id):
"""
获取 Alpha 详细信息带3次重试
Args:
alpha_id: Alpha ID
Returns:
Alpha 详细信息字典失败返回 None
"""
if not self.client:
print("客户端未初始化")
return None
url = f"https://api.worldquantbrain.com/alphas/{alpha_id}"
for attempt in range(3):
try:
response = self.client.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"获取 Alpha 失败 (尝试 {attempt + 1}/3): {response.status_code} - {response.text}")
except Exception as e:
print(f"获取 Alpha 异常 (尝试 {attempt + 1}/3): {e}")
if attempt < 2:
sleep_time = random.uniform(5, 8)
print(f"等待 {sleep_time:.1f} 秒后重试...")
time.sleep(sleep_time)
return None
def logout(self):
"""退出登录"""
if self.client:
self.client.close()
self.client = None
print("Alpha 客户端已退出")
# 使用 OdooClient 类重构后的函数
def fetch_local_performance(odoo_client, model, domain, fields, limit):
"""获取本地表现数据"""
# 执行搜索
result = odoo_client.search_data(model=model, domain=domain, fields=fields, order="id desc", limit=limit)
if result:
return result
else:
print("未获取到数据")
exit(1)
# 使用示例
if __name__ == "__main__":
# ============================== Odoo 连接配置 ====================================
ODOO_URL = "http://192.168.31.41:32000"
DB_NAME = "quantify"
USERNAME = "rpc"
PASSWORD = "aaaAAA111"
# ============================== 搜索设置 ====================================
days = 7
now = datetime.now()
today_zero = now.replace(hour=0, minute=0, second=0, microsecond=0)
days_ago_zero = today_zero - timedelta(days=days)
time_range = ('write_date', '>=', days_ago_zero.strftime('%Y-%m-%d %H:%M:%S'))
# 模型名称
model = "alpha.expression.line"
# 搜索条件
domain = [('status', '=', 'success'), ('performance', '=', '{}')]
# 搜索字段
fields = ['alpha_id']
# 搜索数量限制
limit = 1
try:
with OdooClient(ODOO_URL, DB_NAME, USERNAME, PASSWORD) as odoo:
all_data = fetch_local_performance(odoo, model, domain, fields, limit)
# 初始化 Alpha 获取器
alpha_fetcher = SimpleAlphaFetcher()
try:
for data in all_data:
alpha_expression_line_id = data.get('id')
alpha_id = data.get('alpha_id')
print(f'正在处理: {alpha_id}')
if not alpha_id:
print(f"记录 {alpha_expression_line_id} 没有 alpha_id,跳过")
continue
# 获取 Alpha 详细信息
alpha_detail = alpha_fetcher.get_alpha_detail(alpha_id)
if alpha_detail:
# 反写到 Odoo
update_values = {'performance': json.dumps(alpha_detail, indent=4, ensure_ascii=False)}
success = odoo.write_data(model, alpha_expression_line_id, update_values)
if success:
print(f"成功更新记录 {alpha_expression_line_id}")
else:
print(f"更新记录 {alpha_expression_line_id} 失败")
else:
print(f"获取 Alpha {alpha_id} 失败,跳过该记录")
finally:
alpha_fetcher.logout()
except Exception as e:
print(f"程序执行失败: {e}")

@ -2,7 +2,7 @@ import httpx
from httpx import BasicAuth from httpx import BasicAuth
def login(credentials_file='account.txt'): def login():
"""登录WorldQuant Brain API""" """登录WorldQuant Brain API"""
# 从nacos获取账号密码 # 从nacos获取账号密码
nacos_resp = httpx.get('http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify') nacos_resp = httpx.get('http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify')

Loading…
Cancel
Save