You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
180 lines
6.5 KiB
180 lines
6.5 KiB
import json
|
|
import os
|
|
from typing import Union
|
|
|
|
import pandas as pd
|
|
from pandas.io.formats.style import Styler
|
|
|
|
brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
|
|
brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
|
|
|
|
|
|
def make_clickable_alpha_id(alpha_id: str) -> str:
|
|
"""
|
|
Create a clickable HTML link for an alpha ID.
|
|
|
|
Args:
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
str: An HTML string containing a clickable link to the alpha's page on the platform.
|
|
"""
|
|
|
|
url = brain_url + "/alpha/"
|
|
return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
|
|
|
|
|
|
def prettify_result(
|
|
result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
|
|
) -> Union[pd.DataFrame, Styler]:
|
|
"""
|
|
Combine and format simulation results into a single DataFrame for analysis.
|
|
|
|
Args:
|
|
result (list): A list of dictionaries containing simulation results.
|
|
detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
|
|
clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
|
|
|
|
Returns:
|
|
pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
|
|
optionally with clickable alpha IDs.
|
|
"""
|
|
list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
|
|
is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
|
|
is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
|
|
|
|
expressions = {
|
|
result[x]["alpha_id"]: (
|
|
{
|
|
"selection": result[x]["simulate_data"]["selection"],
|
|
"combo": result[x]["simulate_data"]["combo"],
|
|
}
|
|
if result[x]["simulate_data"]["type"] == "SUPER"
|
|
else result[x]["simulate_data"]["regular"]
|
|
)
|
|
for x in range(len(result))
|
|
if result[x]["is_stats"] is not None
|
|
}
|
|
expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
|
|
|
|
list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
|
|
is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
|
|
if detailed_tests_view:
|
|
cols = ["limit", "result", "value"]
|
|
is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
|
|
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
|
|
else:
|
|
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
|
|
|
|
alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
|
|
alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
|
|
alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
|
|
alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
|
|
if clickable_alpha_id:
|
|
return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
|
|
return alpha_stats
|
|
|
|
|
|
def concat_pnl(result: list) -> pd.DataFrame:
|
|
"""
|
|
Combine PnL results from multiple alphas into a single DataFrame.
|
|
|
|
Args:
|
|
result (list): A list of dictionaries containing simulation results with PnL data.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
|
|
"""
|
|
list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
|
|
pnls_df = pd.concat(list_of_pnls).reset_index()
|
|
|
|
return pnls_df
|
|
|
|
|
|
def concat_is_tests(result: list) -> pd.DataFrame:
|
|
"""
|
|
Combine in-sample test results from multiple alphas into a single DataFrame.
|
|
|
|
Args:
|
|
result (list): A list of dictionaries containing simulation results with in-sample test data.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
|
|
"""
|
|
is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
|
|
return is_tests_df
|
|
|
|
|
|
def save_simulation_result(result: dict) -> None:
|
|
"""
|
|
Save the simulation result to a JSON file in the 'simulation_results' folder.
|
|
|
|
Args:
|
|
result (dict): A dictionary containing the simulation result for an alpha.
|
|
"""
|
|
|
|
alpha_id = result["id"]
|
|
region = result["settings"]["region"]
|
|
folder_path = "simulation_results/"
|
|
file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
|
|
|
|
os.makedirs(folder_path, exist_ok=True)
|
|
|
|
with open(file_path, "w", encoding="utf-8") as file:
|
|
json.dump(result, file)
|
|
|
|
|
|
def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
|
|
"""
|
|
Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
|
|
|
|
Args:
|
|
pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
|
|
alpha_id (str): The ID of the alpha.
|
|
region (str): The region for which the PnL data was generated.
|
|
"""
|
|
|
|
folder_path = "alphas_pnl/"
|
|
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
os.makedirs(folder_path, exist_ok=True)
|
|
|
|
pnl_df.to_csv(file_path)
|
|
|
|
|
|
def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
|
|
"""
|
|
Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
|
|
|
|
Args:
|
|
yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
|
|
alpha_id (str): The ID of the alpha.
|
|
region (str): The region for which the statistics were generated.
|
|
"""
|
|
|
|
folder_path = "yearly_stats/"
|
|
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
os.makedirs(folder_path, exist_ok=True)
|
|
|
|
yearly_stats.to_csv(file_path, index=False)
|
|
|
|
|
|
def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
Expand dictionary columns in a DataFrame into separate columns.
|
|
|
|
Args:
|
|
data (pandas.DataFrame): The input DataFrame with dictionary columns.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A new DataFrame with expanded columns.
|
|
"""
|
|
dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
|
|
new_columns = pd.concat(
|
|
[data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
|
|
axis=1,
|
|
)
|
|
|
|
data = pd.concat([data, new_columns], axis=1)
|
|
return data
|
|
|