Statistics and Evidence

Statistics and Evidence#

This notebook collates summary statistics needed for the first paper data release. It loads the sampler outputs specified by config.mk, extracts run-level metrics, and emits both tables and LaTeX macros needed for the paper.

import h5py
from pathlib import Path
import numpy as np
import yaml
from aspire_analysis_tools.utils import read_make_config

Locate Results and Prepare Outputs#

Create the tables/ directory for exported summaries and read the RESULTS_MAPPING YAML pointed to by config.mk. Each entry in the mapping is resolved relative to the data release directory so downstream code can open the HDF5 samples without manual path edits.

output = Path("tables")
output.mkdir(exist_ok=True)

make_config = read_make_config("config.mk")
results_mapping_file = Path(make_config["RESULTS_MAPPING"])
data_release_path = results_mapping_file.parent

with open(results_mapping_file, "r") as f:
    result_paths = yaml.safe_load(f)

# Add data release path to each result path
for key, value in result_paths.items():
    for subkey, subvalue in value.items():
        for subsubkey, subsubvalue in subvalue.items():
            result_paths[key][subkey][subsubkey] = \
                data_release_path / Path(subsubvalue)

Collect Sampler Diagnostics#

Iterate over every analysis/waveform/sampler combination, opening the corresponding HDF5 file to record likelihood evaluations, run time, evidence values, and posterior sample counts. Missing files or keys are skipped so partially completed releases do not break the notebook.

stats = {}
for label, wf_result_files in result_paths.items():
    stats[label] = {}
    for waveform, result_files in wf_result_files.items():
        stats[label][waveform] = {}
        for sampler, result_file in result_files.items():
            stats[label][waveform][sampler] = {
                "evals": None,
                "time": None,
                "log_evidence": None,
                "log_evidence_err": None,
                "n_posterior_samples": None
            }
            try:
                with h5py.File(result_file, "r") as f:
                    stats[label][waveform][sampler]["evals"] = f["num_likelihood_evaluations"][()]
                    stats[label][waveform][sampler]["time"] = f["sampling_time"][()]
                    stats[label][waveform][sampler]["log_evidence"] = f["log_evidence"][()]
                    stats[label][waveform][sampler]["log_evidence_err"] = f["log_evidence_err"][()]
                    stats[label][waveform][sampler]["n_posterior_samples"] = len(f["posterior/chirp_mass"][()])
            except (KeyError, OSError):
                pass

Build Summary DataFrame#

Flatten the nested statistics dictionary into a pandas.DataFrame that lists evidence, run duration, and efficiency metrics for each sampler configuration. The resulting table is displayed for quick inspection.

# Make a table with all the evidence estimates for a given piece of data
import pandas as pd
df_list = []
for label, data in stats.items():
    for waveform, stat_dict in data.items():
        for key, value in stat_dict.items():
            df_list.append({
                "Analysis": label,
                "Waveform": waveform,
                "Sampler": key,
                "Log Evidence": f"{value['log_evidence']:.2f} +/- {value['log_evidence_err']:.2f}" if value['log_evidence'] is not None else "N/A",
                "Evals": f"{value['evals']:,}" if value['evals'] is not None else "N/A",
                "Time (hours)": f"{value['time'] / 3600:.2f}" if value['time'] is not None else "N/A",
                "Posterior Samples": f"{value['n_posterior_samples']:,}" if value['n_posterior_samples'] is not None else "N/A",
                "Evals per Sample": f"{value['evals'] / value['n_posterior_samples']:.2f}" if value['evals'] is not None and value['n_posterior_samples'] is not None else "N/A",
            })
df = pd.DataFrame(df_list)
df = df[["Analysis", "Waveform", "Sampler", "Log Evidence", "Evals", "Time (hours)", "Posterior Samples", "Evals per Sample"]]  

df

	Analysis	Waveform	Sampler	Log Evidence	Evals	Time (hours)	Posterior Samples	Evals per Sample
0	GW150914	IMRPhenomXO4a	aspire	-12768.08 +/- 0.07	9,539,000	2.09	10,000	953.90
1	GW150914	IMRPhenomXO4a	dynesty	-12767.84 +/- 0.16	32,198,141	11.59	5,627	5722.08
2	GW150914	IMRPhenomXPHM	dynesty	-12767.24 +/- 0.16	34,672,491	9.09	5,443	6370.11
3	GW150914-like	IMRPhenomD	dynesty	-57891.93 +/- 0.15	7,903,596	1.19	4,415	1790.17
4	GW150914-like	IMRPhenomPv2	aspire from IMRPhenomD	-57893.11 +/- 0.06	1,231,000	0.30	10,000	123.10
5	GW150914-like	IMRPhenomPv2	dynesty	-57893.12 +/- 0.15	7,151,572	1.16	4,737	1509.73
6	GW150914-like	IMRPhenomXO4a	aspire from IMRPhenomXPHM	-57893.36 +/- 0.06	8,033,000	4.01	10,000	803.30
7	GW150914-like	IMRPhenomXO4a	dynesty	-57893.23 +/- 0.16	32,405,018	14.62	5,810	5577.46
8	GW150914-like	IMRPhenomXPHM	dynesty	-57893.40 +/- 0.17	30,773,397	12.91	5,702	5396.95
9	eccentric	TaylorF2	dynesty	-1929.12 +/- 0.16	25,013,795	0.54	4,564	5480.67
10	eccentric	TaylorF2Ecc	aspire	-1928.34 +/- 0.06	7,551,000	0.23	10,000	755.10
11	eccentric	TaylorF2Ecc	dynesty	-1928.15 +/- 0.16	27,105,765	0.66	4,240	6392.87
12	q4	IMRPhenomXO4a	aspire from IMRPhenomXPHM	-54803.41 +/- 0.10	13,555,000	3.38	10,000	1355.50
13	q4	IMRPhenomXO4a	dynesty	-54802.73 +/- 0.21	49,615,985	15.68	6,511	7620.33
14	q4	IMRPhenomXPHM	dynesty	-54800.09 +/- 0.21	57,907,257	18.01	6,259	9251.84

Prepare Side-by-Side Evidence Comparison#

Filter the summary to the analyses where both dynesty and aspire results exist, then reshape the data so the LaTeX table presents the evidences side-by-side in a consistent order with human-readable analysis labels.

# View that only show analyses with the same waveform and different samplers
df_grouped = df.groupby(["Analysis", "Waveform"]).filter(lambda x: len(x) > 1)
# Convert to table with Analysis, dynesty, aspire
# This includes:
# GW150914-like IMRPhenomXO4a dynesty vs aspire from IMRPhenomXPHM
# q4 IMRPhenomXO4a dynesty vs aspire from IMRPhenomXPHM
# GW150914-like IMRPhenomPv2 dynesty vs aspire from IMRPhenomD
# eccentric TaylorF2Ecc dynesty vs aspire from TaylorF2Ecc
# Only show evidence
table_df = pd.DataFrame(columns=["Analysis", "Waveform", "aspire", "dynesty"])
for (analysis, waveform), group in df_grouped.groupby(["Analysis", "Waveform"]):
    dynesty_row = group[group["Sampler"].str.contains("dynesty")]
    aspire_row = group[group["Sampler"].str.contains("aspire")]
    if not dynesty_row.empty and not aspire_row.empty:
        table_df = pd.concat([table_df, pd.DataFrame({
            "Analysis": [analysis],
            "Waveform": [waveform],
            "aspire": aspire_row["Log Evidence"].values[0],
            "dynesty": dynesty_row["Log Evidence"].values[0],
        })], ignore_index=True)

# Reorder table to match the order above
order = [
    ("GW150914-like", "IMRPhenomXO4a"),
    ("q4", "IMRPhenomXO4a"),
    ("GW150914-like", "IMRPhenomPv2"),
    ("eccentric", "TaylorF2Ecc"),
]
# Change order manually
table_df["Order"] = table_df.apply(lambda row: order.index((row["Analysis"], row["Waveform"])) if (row["Analysis"], row["Waveform"]) in order else len(order), axis=1)
table_df = table_df.sort_values("Order").drop(columns=["Order"])
# Rename analysis to be more descriptive
# Different names for the two GW150914-like analyses
table_df["Analysis"] = table_df.apply(lambda row: "GW150914-like (IMRPhenomXO4a)" if row["Analysis"] == "GW150914-like" and row["Waveform"] == "IMRPhenomXO4a" else ("GW150914-like (IMRPhenomPv2)" if row["Analysis"] == "GW150914-like" and row["Waveform"] == "IMRPhenomPv2" else ("q4" if row["Analysis"] == "q4" else ("eccentric" if row["Analysis"] == "eccentric" else row["Analysis"]))), axis=1)
# Replace analysis names
table_df["Analysis"] = table_df["Analysis"].replace({
    "GW150914-like (IMRPhenomXO4a)": "Changing waveform (GW150914-like)",
    "GW150914-like (IMRPhenomPv2)": "Adding spin precession",
    "q4": "Changing waveform ($q=4$)",
    "eccentric": "Adding orbital eccentricity",
    "GW150914": "GW150914 (real data)"
})
# Use `texttt` for waveform names
table_df["Waveform"] = table_df["Waveform"].apply(lambda x: f"\\texttt{{{x}}}")
# Use `texttt` for sampler names in column names
table_df = table_df.rename(columns={"dynesty": "\\texttt{dynesty}", "aspire": "\\texttt{aspire}"})

table_df.to_latex(output / "evidence_table.tex", index=False, escape=False)

table_df

	Analysis	Waveform	\texttt{aspire}	\texttt{dynesty}
2	Changing waveform (GW150914-like)	\texttt{IMRPhenomXO4a}	-57893.36 +/- 0.06	-57893.23 +/- 0.16
4	Changing waveform ($q=4$)	\texttt{IMRPhenomXO4a}	-54803.41 +/- 0.10	-54802.73 +/- 0.21
1	Adding spin precession	\texttt{IMRPhenomPv2}	-57893.11 +/- 0.06	-57893.12 +/- 0.15
3	Adding orbital eccentricity	\texttt{TaylorF2Ecc}	-1928.34 +/- 0.06	-1928.15 +/- 0.16
0	GW150914 (real data)	\texttt{IMRPhenomXO4a}	-12768.08 +/- 0.07	-12767.84 +/- 0.16

LaTeX Macros for Manuscript#

Create helper commands capturing the ratios of likelihood evaluations and wall-clock time per posterior sample between dynesty and aspire. The macros are written to macros/result_macros.tex so the paper and data release notes can reference the same numbers without manual transcriptions.

macros_output = Path("macros")
macros_output.mkdir(exist_ok=True)

decimal_places = 0

macros_file = macros_output / "result_macros.tex"

# Clear the file if it exists
with open(macros_file, "w") as f:
    f.write("% Macros for results\n")

def make_macro(name, value):
    with open(macros_file, "a") as f:
        # Include \\xspace for spacing
        f.write(f"\\newcommand{{\\{name}}}{{{value}\\xspace}}\n")


# GW150914LikeEvals
FirstDetectionLike_dynesty_evals_per_sample = stats["GW150914-like"]["IMRPhenomXO4a"]["dynesty"]["evals"] / stats["GW150914-like"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
FirstDetectionLike_aspire_evals_per_sample = stats["GW150914-like"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["evals"] / stats["GW150914-like"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["n_posterior_samples"]
make_macro("FirstDetectionLikeEvalsPerSample", int(np.round(FirstDetectionLike_dynesty_evals_per_sample / FirstDetectionLike_aspire_evals_per_sample, decimal_places)))

# GW150914LikeTimePerSample
FirstDetectionLike_dynesty_time_per_sample = stats["GW150914-like"]["IMRPhenomXO4a"]["dynesty"]["time"] / stats["GW150914-like"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
FirstDetectionLike_aspire_time_per_sample = stats["GW150914-like"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["time"] / stats["GW150914-like"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["n_posterior_samples"]
make_macro("FirstDetectionLikeTimePerSample", int(np.round(FirstDetectionLike_dynesty_time_per_sample / FirstDetectionLike_aspire_time_per_sample, decimal_places)))

# q4Evals
Asymm_dynesty_evals_per_sample = stats["q4"]["IMRPhenomXO4a"]["dynesty"]["evals"] / stats["q4"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
Asymm_aspire_evals_per_sample = stats["q4"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["evals"] / stats["q4"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["n_posterior_samples"]
make_macro("AsymmEvalsPerSample", int(np.round(Asymm_dynesty_evals_per_sample / Asymm_aspire_evals_per_sample, decimal_places)))

# AsymmTime
Asymm_dynesty_time_per_sample = stats["q4"]["IMRPhenomXO4a"]["dynesty"]["time"] / stats["q4"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
Asymm_aspire_time_per_sample = stats["q4"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["time"] / stats["q4"]["IMRPhenomXO4a"]["aspire from IMRPhenomXPHM"]["n_posterior_samples"]
make_macro("AsymmTimePerSample", int(np.round(Asymm_dynesty_time_per_sample / Asymm_aspire_time_per_sample, decimal_places)))

# eccentricEvals
eccentric_dynesty_evals_per_sample = stats["eccentric"]["TaylorF2Ecc"]["dynesty"]["evals"] / stats["eccentric"]["TaylorF2Ecc"]["dynesty"]["n_posterior_samples"]
eccentric_aspire_evals_per_sample = stats["eccentric"]["TaylorF2Ecc"]["aspire"]["evals"] / stats["eccentric"]["TaylorF2Ecc"]["aspire"]["n_posterior_samples"]
make_macro("EccentricEvalsPerSample", int(np.round(eccentric_dynesty_evals_per_sample / eccentric_aspire_evals_per_sample, decimal_places)))

# eccentricTime
eccentric_dynesty_time_per_sample = stats["eccentric"]["TaylorF2Ecc"]["dynesty"]["time"] / stats["eccentric"]["TaylorF2Ecc"]["dynesty"]["n_posterior_samples"]
eccentric_aspire_time_per_sample = stats["eccentric"]["TaylorF2Ecc"]["aspire"]["time"] / stats["eccentric"]["TaylorF2Ecc"]["aspire"]["n_posterior_samples"]
make_macro("EccentricTimePerSample", int(np.round(eccentric_dynesty_time_per_sample / eccentric_aspire_time_per_sample, decimal_places)))

# PrecessingEvals
precessing_dynesty_evals_per_sample = stats["GW150914-like"]["IMRPhenomPv2"]["dynesty"]["evals"] / stats["GW150914-like"]["IMRPhenomPv2"]["dynesty"]["n_posterior_samples"]
precessing_aspire_evals_per_sample = stats["GW150914-like"]["IMRPhenomPv2"]["aspire from IMRPhenomD"]["evals"] / stats["GW150914-like"]["IMRPhenomPv2"]["aspire from IMRPhenomD"]["n_posterior_samples"]
make_macro("PrecessingEvalsPerSample", int(np.round(precessing_dynesty_evals_per_sample / precessing_aspire_evals_per_sample, decimal_places)))

# PrecessingTime
precessing_dynesty_time_per_sample = stats["GW150914-like"]["IMRPhenomPv2"]["dynesty"]["time"] / stats["GW150914-like"]["IMRPhenomPv2"]["dynesty"]["n_posterior_samples"]
precessing_aspire_time_per_sample = stats["GW150914-like"]["IMRPhenomPv2"]["aspire from IMRPhenomD"]["time"] / stats["GW150914-like"]["IMRPhenomPv2"]["aspire from IMRPhenomD"]["n_posterior_samples"]
make_macro("PrecessingTimePerSample", int(np.round(precessing_dynesty_time_per_sample / precessing_aspire_time_per_sample, decimal_places)))

# GW150914 Evals
FirstDetection_dynesty_evals_per_sample = stats["GW150914"]["IMRPhenomXO4a"]["dynesty"]["evals"] / stats["GW150914"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
FirstDetection_aspire_evals_per_sample = stats["GW150914"]["IMRPhenomXO4a"]["aspire"]["evals"] / stats["GW150914"]["IMRPhenomXO4a"]["aspire"]["n_posterior_samples"]
make_macro("FirstDetectionEvalsPerSample", int(np.round(FirstDetection_dynesty_evals_per_sample / FirstDetection_aspire_evals_per_sample, decimal_places)))

# GW150914 Time
FirstDetection_dynesty_time_per_sample = stats["GW150914"]["IMRPhenomXO4a"]["dynesty"]["time"] / stats["GW150914"]["IMRPhenomXO4a"]["dynesty"]["n_posterior_samples"]
FirstDetection_aspire_time_per_sample = stats["GW150914"]["IMRPhenomXO4a"]["aspire"]["time"] / stats["GW150914"]["IMRPhenomXO4a"]["aspire"]["n_posterior_samples"]
make_macro("FirstDetectionTimePerSample", int(np.round(FirstDetection_dynesty_time_per_sample / FirstDetection_aspire_time_per_sample, decimal_places)))