from pathlib import Path
from typing import Optional
import pandas as pd
from metaDMG.utils import append_fit_predictions, get_results_dir
[docs]def load_results(
config_file: Optional[Path] = None,
results_dir: Optional[Path] = None,
) -> pd.DataFrame:
"""Load the results from either a config file or a results-directory
Parameters
----------
config_file
The the config file to use to locate the results directory, by default None
results_dir
The results directory, by default None
Returns
-------
A dataframe of all the results
"""
results_dir = get_results_dir(
config_file=config_file,
results_dir=results_dir,
)
df_results = pd.read_parquet(results_dir)
return df_results
def filter_results(
df_results: pd.DataFrame,
query: str,
) -> pd.DataFrame:
"""Filter the results given a Pandas query
Parameters
----------
df_results
Input dataframe
query
Pandas query
Returns
-------
Output dataframe
"""
if query:
if query.startswith(" & "):
query = query[3:]
df_results = df_results.query(query)
return df_results
def save_results(df_results, output):
suffixes = "".join(output.suffixes)
if ".csv" in suffixes:
sep = ","
elif ".tsv" in suffixes:
sep = "\t"
else:
s = (
f"'{suffixes}' not implemented yet, only .csv and .tsv "
"(and compressed versions)"
)
raise AssertionError(s)
output.parent.mkdir(parents=True, exist_ok=True)
df_results.to_csv(output, sep=sep, index=False)
def filter_and_save_results(
output,
query,
config_file=None,
results_dir=None,
add_fit_predictions=False,
):
df_results = load_results(
config_file=config_file,
results_dir=results_dir,
)
df_results = filter_results(df_results, query)
if add_fit_predictions:
df_results = append_fit_predictions(df_results)
save_results(df_results, output)