Source code for datapipe_testbench.visualization

#!/usr/bin/env python3
"""
Functions to help visualize Benchmarks and their inputs.
"""

from dataclasses import asdict

import graphviz

from .benchmark import Benchmark
from .inputdataset import InputDataset

__all__ = ["graphviz_inputs_to_benchmarks"]

SET = "&#10004;"
UNSET = " - "
DEFAULT_FILENAME_LENGTH = 20


def _dict_to_label(title, thedict, file_length=DEFAULT_FILENAME_LENGTH):
    """Make a nice graphviz-friendly html table."""
    text = [
        "<",
        '<TABLE BORDER="1" CELLBORDER="1" CELLSPACING="0">',
        f'<TR><TD COLSPAN="2" bgcolor="#cccccc"><B>{title}</B></TD></TR>',
    ]
    for field, value in thedict.items():
        if field == "name":
            text.append(
                f'<TR><TD ALIGN="center" colspan="2" bgcolor="#dddddd"> <i>{value}</i> </TD></TR>'
            )
        else:
            val = "" if not value else f"<i>...{str(value)[-file_length:]}</i>"
            state = SET if value else UNSET
            text.append(
                f'<TR><TD ALIGN="LEFT"> {state} {field} </TD><TD PORT="{field}"> {val} </TD></TR>'
            )
    text.append("</TABLE>")
    text.append(">")
    return "\n".join(text)



[docs]
def graphviz_inputs_to_benchmarks(
    input_dataset_list: list[InputDataset],
    benchmark_list: list[Benchmark],
    file_length: int = DEFAULT_FILENAME_LENGTH,
) -> graphviz.Digraph:
    """Generate a graphviz diagram mapping InputDataSets to MetricsStores via benchmarks.

    The output will display automatically in a Jupyter Notebook, or can be saved
    to a file by calling the ``render()`` method of the Digraph.

    Parameters
    ----------
    input_dataset_list: list[InputDataset]
        List of InputDatasets the user will pass to each Benchmark's generate_metrics
        method.
    benchmark_list: list[Benchmark]:
        Which benchmarks will process the InputDatasets.
    file_length: int
        Max length of the filenames in the InputDataset. The last N characters
        of the filename will be retained, and the rest truncated.

    Returns
    -------
    graphviz.Digraph:
        graph that can be displayed or rendered.

    """
    digraph = graphviz.Digraph("Benchmark Workflow")
    digraph.attr(rankdir="LR")

    for ii, input_dataset in enumerate(input_dataset_list):
        nodename = f"InputDataset {ii}"
        digraph.node(
            nodename,
            label=_dict_to_label(
                nodename, asdict(input_dataset), file_length=file_length
            ),
            shape="plaintext",
        )

        for benchmark in benchmark_list:
            digraph.node(
                benchmark.name,
                label=_dict_to_label(
                    " ".join([benchmark.name, " Metrics"]),
                    benchmark.outputs(),
                    file_length=file_length,
                ),
                shape="plaintext",
            )

            for req in list(benchmark.required_inputs):
                has_input = getattr(input_dataset, req)
                exists = getattr(input_dataset, req).exists() if has_input else False
                color = "red"
                if has_input:
                    color = "yellow"
                if exists:
                    color = "darkgreen"
                digraph.edge(
                    nodename + ":" + req + ":e",
                    benchmark.name,
                    color=color,
                )

    return digraph