Source code for bionetgen.core.tools.result

import os
import numpy as np

from bionetgen.core.utils.logging import BNGLogger


[docs]class BNGResult:
    """
    Class that loads in gdat/cdat/scan files

    Usage: BNGResult(path="/path/to/folder") OR
           BNGResult(direct_path="/path/to/file.gdat")

    Arguments
    ---------
    path : str
        path that points to a folder containing files to be
        loaded by the class
    direct_path : str
        path that directly points to a file to load

    Methods
    -------
    load(fpath)
        loads in the direct path to the file and returns
        numpy.recarray
    """

    def __init__(self, path=None, direct_path=None, app=None):
        self.app = app
        self.logger = BNGLogger(app=self.app)
        self.logger.debug(
            "Setting up BNGResult object", loc=f"{__file__} : BNGResult.__init__()"
        )
        # defaults
        self.process_return = None
        self.output = None
        # TODO Make it so that with path you can supply an
        # extension or a list of extensions to load in
        self.gdats = {}
        self.cdats = {}
        self.scans = {}
        self.cnames = {}
        self.snames = {}
        self.gnames = {}
        if direct_path is not None:
            path, fname = os.path.split(direct_path)
            fnoext, fext = os.path.splitext(fname)
            self.direct_path = direct_path
            self.file_name = fnoext
            self.file_extension = fext
            self.gnames[fnoext] = direct_path
            self.gdats[fnoext] = self.load(direct_path)
        elif path is not None:
            # TODO change this pattern so that each method
            # is stand alone and usable.
            self.path = path
            self.find_dat_files()
            self.load_results()
        else:
            self.logger.info(
                "BNGResult needs either a path or a direct path kwarg to load gdat/cdat/scan files from",
                loc=f"{__file__} : BNGResult.__init__()",
            )

    def __repr__(self) -> str:
        s = f"gdats from {len(self.gdats)} models: "
        for r in self.gdats.keys():
            s += f"{r} "
        if len(self.cdats) > 0:
            s += f"\ncdats from {len(self.cdats)} models: "
            for r in self.cdats.keys():
                s += f"{r} "
        if len(self.scans) > 0:
            s += f"\nscans from {len(self.scans)} models: "
            for r in self.scans.keys():
                s += f"{r} "
        return s

    def __getitem__(self, key):
        if isinstance(key, int):
            k = list(self.gdats.keys())[key]
            it = self.gdats[k]
        else:
            it = self.gdats[key]
        return it

    def __iter__(self):
        return self.gdats.__iter__()

[docs]    def load(self, fpath):
        self.logger.debug(f"Loading file {fpath}", loc=f"{__file__} : BNGResult.load()")
        path, fname = os.path.split(fpath)
        fnoext, fext = os.path.splitext(fname)
        if fext == ".gdat" or fext == ".cdat":
            return self._load_dat(fpath)
        elif fext == ".scan":
            return self._load_scan(fpath)
        else:
            self.logger.info(
                "BNGResult doesn't know the file type of {}".format(fpath),
                loc=f"{__file__} : BNGResult.load()",
            )
            return None

    def _load_scan(self, fpath):
        return self._load_dat(fpath)

[docs]    def find_dat_files(self):
        self.logger.debug(
            f"Scanning for valid files in folder {self.path}",
            loc=f"{__file__} : BNGResult.find_dat_files()",
        )
        files = os.listdir(self.path)
        ext = "gdat"
        gdat_files = filter(lambda x: x.endswith(f".{ext}"), files)
        for dat_file in gdat_files:
            name = dat_file.replace(f".{ext}", "")
            self.gnames[name] = dat_file

        ext = "cdat"
        cdat_files = filter(lambda x: x.endswith(f".{ext}"), files)
        for dat_file in cdat_files:
            name = dat_file.replace(f".{ext}", "")
            self.cnames[name] = dat_file

        ext = "scan"
        scan_files = filter(lambda x: x.endswith(f".{ext}"), files)
        for dat_file in scan_files:
            name = dat_file.replace(f".{ext}", "")
            self.snames[name] = dat_file

[docs]    def load_results(self):
        self.logger.debug(
            f"Loading results from {self.path}",
            loc=f"{__file__} : BNGResult.load_results()",
        )
        # load gdat files
        for name in self.gnames:
            gdat_path = os.path.join(self.path, self.gnames[name])
            self.gdats[name] = self.load(gdat_path)
        # load gdat files
        for name in self.cnames:
            cdat_path = os.path.join(self.path, self.cnames[name])
            self.cdats[name] = self.load(cdat_path)
        # load scan files
        for name in self.snames:
            scan_path = os.path.join(self.path, self.snames[name])
            self.scans[name] = self.load(scan_path)

    def _load_dat(self, path, dformat="f8"):
        """
        This function takes a path to a gdat/cdat file as a string and loads that
        file into a numpy structured array, including the correct header info.
        TODO: Add link

        Optional argument allows you to set the data type for every column. See
        numpy dtype/data type strings for what's allowed. TODO: Add link
        """
        # First step is to read the header,
        # we gotta open the file and pull that line in
        with open(path, "r") as f:
            header = f.readline()
        # Ensure the header info is actually there
        # TODO: Transition to BNGErrors and logging
        assert header.startswith("#"), "No header line that starts with #"
        # Now turn it into a list of names for our struct array
        header = header.replace("#", "")
        headers = header.split()
        # For a magical reason this is how numpy.loadtxt wants it,
        # in tuples passed as a dictionary with names/formats as keys
        names = tuple(headers)
        formats = tuple([dformat for i in range(len(headers))])
        # return the loadtxt result as a record array
        # which is similar to pandas data format without the helper functions
        return np.rec.array(
            np.loadtxt(path, dtype={"names": names, "formats": formats})
        )