From 88ac366352758c51520502eb0fc9099875143bb2 Mon Sep 17 00:00:00 2001 From: Jack Date: Thu, 9 Apr 2026 00:52:05 +0800 Subject: [PATCH] ++ --- alpha_check/main.go | 35 ++++----- alpha_check/main.py | 169 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 155 insertions(+), 49 deletions(-) diff --git a/alpha_check/main.go b/alpha_check/main.go index f99f280..1482782 100644 --- a/alpha_check/main.go +++ b/alpha_check/main.go @@ -6,16 +6,15 @@ import ( "fmt" "math" "strconv" - "strings" "time" "github.com/valyala/fasthttp" ) const ( - baseURL = "https://api.worldquantbrain.com" + baseURL = "https://api.worldquantbrain.com" zeroStreakThreshold = 5 * 252 - requiredDays = 2920 + requiredDays = 2920 ) type Client struct { @@ -25,17 +24,17 @@ type Client struct { } type AlphaRecord struct { - ID string `json:"id"` - Name string `json:"name"` - DateCreated string `json:"dateCreated"` - Sharpe float64 `json:"sharpe"` - Fitness float64 `json:"fitness"` - Turnover float64 `json:"turnover"` - Margin float64 `json:"margin"` - LongCount float64 `json:"longCount"` - ShortCount float64 `json:"shortCount"` - Decay int `json:"decay"` - Code string `json:"code"` + ID string `json:"id"` + Name string `json:"name"` + DateCreated string `json:"dateCreated"` + Sharpe float64 `json:"sharpe"` + Fitness float64 `json:"fitness"` + Turnover float64 `json:"turnover"` + Margin float64 `json:"margin"` + LongCount float64 `json:"longCount"` + ShortCount float64 `json:"shortCount"` + Decay int `json:"decay"` + Code string `json:"code"` } type AlphaResponse struct { @@ -59,7 +58,11 @@ type PnlResponse struct { func NewClient(username, password string) *Client { return &Client{ - client: &fasthttp.Client{}, + client: &fasthttp.Client{ + ReadTimeout: 60 * time.Second, + WriteTimeout: 10 * time.Second, + MaxIdleConnDuration: 120 * time.Second, + }, username: username, password: password, } @@ -205,7 +208,7 @@ func GetAlphas(c *Client, startDate, endDate string, sharpeTh, fitnessTh float64 for _, item := range alphaResp.Results { alphaID := item.ID - name := item.Name + _ = item.Name // avoid unused variable dateCreated := item.DateCreated sharpe := getFloat(item.Is, "sharpe") fitness := getFloat(item.Is, "fitness") diff --git a/alpha_check/main.py b/alpha_check/main.py index a738d33..1b78aff 100644 --- a/alpha_check/main.py +++ b/alpha_check/main.py @@ -17,50 +17,89 @@ def login(): limits=limits, transport=transport ) + print("[INFO] 正在登录...") response = s.post('https://api.worldquantbrain.com/authentication') - print(response.content) + print(f"[INFO] 登录响应: {response.content}") return s def wait_get(s, url: str, max_retries: int = 10): + print(f"[WAIT_GET] 开始请求: {url[:80]}...") retries = 0 while retries < max_retries: - while True: - simulation_progress = s.get(url) - if simulation_progress.headers.get("Retry-After", 0) == 0: + print(f"[WAIT_GET] 第 {retries + 1} 次尝试...") + simulation_progress = s.get(url) + status_code = simulation_progress.status_code + content_length = simulation_progress.headers.get("content-length", "unknown") + retry_after = simulation_progress.headers.get("Retry-After", 0) + print(f"[WAIT_GET] 状态码: {status_code}, content-length: {content_length}, retry-after: {retry_after}") + + # 检查 Retry-After 头(即使状态码是200也可能需要等待) + if retry_after and float(retry_after) > 0: + wait_time = float(retry_after) + print(f"[WAIT_GET] 需要等待 {wait_time} 秒后重试...") + time.sleep(wait_time) + continue # 不增加重试计数,直接再试 + + # 成功且有内容 + if status_code < 400: + # 检查 content-length + if content_length and content_length != "0": + print(f"[WAIT_GET] 请求成功,有数据") break - time.sleep(float(simulation_progress.headers["Retry-After"])) - if simulation_progress.status_code < 400: + else: + # 200 但无内容,可能是数据还没准备好,指数退避等待 + wait_time = 2 ** retries + print(f"[WAIT_GET] 状态码200但无内容,等待 {wait_time} 秒后重试...") + time.sleep(wait_time) + retries += 1 + continue + + # 404 - 资源不存在,不需要重试 + if status_code == 404: + print(f"[WAIT_GET] 404 Not Found,跳过重试") break - else: - time.sleep(2 ** retries) - retries += 1 + + # 429 - Rate Limit + if status_code == 429: + print(f"[WAIT_GET] 429 Rate Limit") + continue + + # 其他错误,指数退避重试 + wait_time = 2 ** retries + print(f"[WAIT_GET] 请求失败,等待 {wait_time} 秒后重试...") + time.sleep(wait_time) + retries += 1 + return simulation_progress -def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage): - s = login() +def get_alphas(s, start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage): + print(f"[INFO] 开始获取Alpha列表, 参数: start_date={start_date}, end_date={end_date}, sharpe_th={sharpe_th}, fitness_th={fitness_th}, region={region}, alpha_num={alpha_num}, usage={usage}") output = [] count = 0 for i in range(0, alpha_num, 100): - print(i) + print(f"[GET_ALPHAS] 处理偏移量: {i}") url_e = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d" % (i) \ - + "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \ - + "T00:00:00-04:00&dateCreated%3C2025-" + end_date \ + + "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=" + start_date \ + + "T00:00:00-04:00&dateCreated%3C=" + end_date \ + "T00:00:00-04:00&is.fitness%3E" + str(fitness_th) + "&is.sharpe%3E" \ + str(sharpe_th) + "&settings.region=" + region + "&order=-is.sharpe&hidden=false&type!=SUPER" url_c = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d" % (i) \ - + "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \ - + "T00:00:00-04:00&dateCreated%3C2025-" + end_date \ + + "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=" + start_date \ + + "T00:00:00-04:00&dateCreated%3C=" + end_date \ + "T00:00:00-04:00&is.fitness%3C-" + str(fitness_th) + "&is.sharpe%3C-" \ + str(sharpe_th) + "&settings.region=" + region + "&order=is.sharpe&hidden=false&type!=SUPER" urls = [url_e] if usage != "submit": urls.append(url_c) for url in urls: + print(f"[GET_ALPHAS] 请求URL: {url[:100]}...") response = s.get(url) + print(f"[GET_ALPHAS] 响应状态码: {response.status_code}") try: alpha_list = response.json()["results"] + print(f"[GET_ALPHAS] 获取到 {len(alpha_list)} 个alpha") for j in range(len(alpha_list)): alpha_id = alpha_list[j]["id"] name = alpha_list[j]["name"] @@ -78,7 +117,7 @@ def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, u if sharpe < -sharpe_th: exp = "-%s" % exp rec = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay] - print(rec) + print(f"[GET_ALPHAS] 符合条件的alpha: {rec}") if turnover > 0.7: rec.append(decay * 4) elif turnover > 0.6: @@ -92,16 +131,18 @@ def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, u elif turnover > 0.3: rec.append(decay + 2) output.append(rec) - except: - print("%d finished re-login" % i) + except Exception as e: + print(f"[GET_ALPHAS] 处理第 {i} 个时出错: {e}, 重新登录...") s = login() - print("count: %d" % count) + print(f"[GET_ALPHAS] 总共获取 {count} 个alpha, 符合条件的有 {len(output)} 个") return output, s def check_consecutive_non_zero_values(alpha_id, data, required_streak=200): + print(f"[CHECK_NON_ZERO] 检查alpha {alpha_id}, 数据长度: {len(data) if data else 0}") if not data or len(data) < required_streak: + print(f"[CHECK_NON_ZERO] 数据不足{required_streak}条, 跳过检查") return True def check_column(column_data): @@ -133,30 +174,65 @@ def check_consecutive_non_zero_values(alpha_id, data, required_streak=200): column1_values.append(row[1]) column2_values.append(row[2]) + print(f"[CHECK_NON_ZERO] 列1数据点数: {len(column1_values)}, 列2数据点数: {len(column2_values)}") + if column1_values and column2_values: is_col1_all_zeros = all(v == 0 for v in column1_values) is_col2_all_zeros = all(v == 0 for v in column2_values) if is_col1_all_zeros or is_col2_all_zeros: - print(alpha_id, "不合法") + print(f"[CHECK_NON_ZERO] {alpha_id} 不合法: 存在全零列") return False if not check_column(column1_values): - print(alpha_id, "不合法") + print(f"[CHECK_NON_ZERO] {alpha_id} 不合法: 列1存在连续非零值") return False if not check_column(column2_values): - print(alpha_id, "不合法") + print(f"[CHECK_NON_ZERO] {alpha_id} 不合法: 列2存在连续非零值") return False + print(f"[CHECK_NON_ZERO] {alpha_id} 通过检查") return True def get_alpha_pnl_legal(s, alpha_id: str) -> bool: + print(f"\n[PNL_CHECK] ===== 开始检查Alpha: {alpha_id} =====") not_legal_id = [] - pnl = wait_get(s, "https://api.worldquantbrain.com/alphas/" + alpha_id + "/recordsets/pnl").json() - records = pnl["records"] + print(f"[PNL_CHECK] 正在获取PNL数据...") + resp = wait_get(s, "https://api.worldquantbrain.com/alphas/" + alpha_id + "/recordsets/pnl") + + # 检查状态码 + if resp.status_code == 404: + print(f"[PNL_CHECK] {alpha_id} PNL接口返回404,可能Alpha不存在或无权限,判定为不合法") + return False + + if resp.status_code >= 400: + print(f"[PNL_CHECK] {alpha_id} PNL接口返回错误状态码 {resp.status_code},判定为不合法") + return False + + # 检查响应内容是否为空 + if not resp.content or len(resp.content) == 0: + print(f"[PNL_CHECK] 警告: {alpha_id} 响应内容为空!") + print(f"[PNL_CHECK] 状态码: {resp.status_code}") + print(f"[PNL_CHECK] 响应头: {dict(resp.headers)}") + print(f"[PNL_CHECK] 此Alpha可能是好的,但无法获取PNL数据,跳过处理(不隐藏)") + return True # 返回True表示不处理,不隐藏 + + try: + pnl = resp.json() + except Exception as e: + print(f"[PNL_CHECK] 警告: {alpha_id} JSON解析失败: {e}") + print(f"[PNL_CHECK] 响应内容前500字符: {resp.text[:500]}") + print(f"[PNL_CHECK] 状态码: {resp.status_code}") + print(f"[PNL_CHECK] 响应头: {dict(resp.headers)}") + print(f"[PNL_CHECK] 此Alpha可能是好的,但无法解析PNL数据,跳过处理(不隐藏)") + return True # 返回True表示不处理,不隐藏 + + records = pnl.get("records", []) + print(f"[PNL_CHECK] 获取到 {len(records)} 条PNL记录") if not records: + print(f"[PNL_CHECK] {alpha_id} 无PNL记录, 判定为不合法") return False date_list = [] @@ -165,16 +241,21 @@ def get_alpha_pnl_legal(s, alpha_id: str) -> bool: date_obj = datetime.datetime.strptime(record[0], '%Y-%m-%d').date() date_list.append(date_obj) except Exception: + print(f"[PNL_CHECK] {alpha_id} 日期解析失败, 判定为不合法") return False min_date = min(date_list) max_date = max(date_list) total_days = (max_date - min_date).days + print(f"[PNL_CHECK] 日期范围: {min_date} 到 {max_date}, 总天数: {total_days}") + if total_days < 2920: + print(f"[PNL_CHECK] {alpha_id} 时间跨度不足8年({total_days}天), 判定为不合法") return False zero_streak_threshold = 5 * 252 col1_zeros = [record[1] == 0 for record in records] + print(f"[PNL_CHECK] 列1零值数量: {sum(col1_zeros)}/{len(col1_zeros)}") def max_consecutive_zeros(arr): max_streak = current_streak = 0 @@ -184,39 +265,61 @@ def get_alpha_pnl_legal(s, alpha_id: str) -> bool: return max_streak col1_max_zero_streak = max_consecutive_zeros(col1_zeros) + print(f"[PNL_CHECK] 列1最大连续零值: {col1_max_zero_streak}") if col1_max_zero_streak >= zero_streak_threshold: - print(f"{alpha_id} 不合法:存在连续{zero_streak_threshold // 252}年零值") + print(f"[PNL_CHECK] {alpha_id} 不合法:存在连续{zero_streak_threshold // 252}年零值") not_legal_id.append(str(alpha_id)) return False + print(f"[PNL_CHECK] 正在检查连续非零值...") if not check_consecutive_non_zero_values(alpha_id, records): return False + print(f"[PNL_CHECK] {alpha_id} 合法") return True def mute(s, alpha_id): + print(f"[MUTE] 正在隐藏Alpha: {alpha_id}") url = "https://api.worldquantbrain.com/alphas/" + alpha_id data = { "hidden": True } response = s.patch(url, json=data) + print(f"[MUTE] 隐藏 {alpha_id} 完成, 状态码: {response.status_code}") def main(): - fo_tracker, s = get_alphas('12-01', '12-31', 1, 0.5, 'USA', 1000, 'submit') + print("=" * 60) + print("[MAIN] 程序启动") + print("=" * 60) + s = login() + fo_tracker, _ = get_alphas(s, '2025-09-17', '2026-12-31', 1, 0.5, 'USA', 1000, 'submit') f_num = len(fo_tracker) - print(f_num, "个alpha 进行pnl合法检测,请耐心等待") + print(f"\n[MAIN] 共 {f_num} 个alpha 进行pnl合法检测,请耐心等待") + print(f"[MAIN] 实际列表长度: {len(fo_tracker)}") + count = 0 - print(len(fo_tracker)) + total = len(fo_tracker) + for i in fo_tracker[::-1][0:]: if count % 25 == 0: - print('===========', count, '===========') + print(f'\n[MAIN] =========== 进度: {count}/{total} ({count/total*100:.1f}%) ===========') count += 1 - if get_alpha_pnl_legal(s, i[0]) == False: - print(i[0], '已经隐藏') - mute(s, i[0]) + + alpha_id = i[0] + print(f"\n[MAIN] 正在处理第 {count} 个alpha: {alpha_id}") + + if get_alpha_pnl_legal(s, alpha_id) == False: + print(f"[MAIN] {alpha_id} 检测结果: 不合法, 执行隐藏操作") + mute(s, alpha_id) + else: + print(f"[MAIN] {alpha_id} 检测结果: 合法") + + print("\n" + "=" * 60) + print(f"[MAIN] 全部处理完成, 共处理 {count} 个alpha") + print("=" * 60) if __name__ == "__main__":