import os
import numpy as np
from bionetgen.core.utils.logging import BNGLogger
[docs]class BNGResult:
"""
Class that loads in gdat/cdat/scan files
Usage: BNGResult(path="/path/to/folder") OR
BNGResult(direct_path="/path/to/file.gdat")
Arguments
---------
path : str
path that points to a folder containing files to be
loaded by the class
direct_path : str
path that directly points to a file to load
Methods
-------
load(fpath)
loads in the direct path to the file and returns
numpy.recarray
"""
def __init__(self, path=None, direct_path=None, app=None):
self.app = app
self.logger = BNGLogger(app=self.app)
self.logger.debug(
"Setting up BNGResult object", loc=f"{__file__} : BNGResult.__init__()"
)
# defaults
self.process_return = None
self.output = None
# TODO Make it so that with path you can supply an
# extension or a list of extensions to load in
self.gdats = {}
self.cdats = {}
self.scans = {}
self.cnames = {}
self.snames = {}
self.gnames = {}
if direct_path is not None:
path, fname = os.path.split(direct_path)
fnoext, fext = os.path.splitext(fname)
self.direct_path = direct_path
self.file_name = fnoext
self.file_extension = fext
self.gnames[fnoext] = direct_path
self.gdats[fnoext] = self.load(direct_path)
elif path is not None:
# TODO change this pattern so that each method
# is stand alone and usable.
self.path = path
self.find_dat_files()
self.load_results()
else:
self.logger.info(
"BNGResult needs either a path or a direct path kwarg to load gdat/cdat/scan files from",
loc=f"{__file__} : BNGResult.__init__()",
)
def __repr__(self) -> str:
s = f"gdats from {len(self.gdats)} models: "
for r in self.gdats.keys():
s += f"{r} "
if len(self.cdats) > 0:
s += f"\ncdats from {len(self.cdats)} models: "
for r in self.cdats.keys():
s += f"{r} "
if len(self.scans) > 0:
s += f"\nscans from {len(self.scans)} models: "
for r in self.scans.keys():
s += f"{r} "
return s
def __getitem__(self, key):
if isinstance(key, int):
k = list(self.gdats.keys())[key]
it = self.gdats[k]
else:
it = self.gdats[key]
return it
def __iter__(self):
return self.gdats.__iter__()
[docs] def load(self, fpath):
self.logger.debug(f"Loading file {fpath}", loc=f"{__file__} : BNGResult.load()")
path, fname = os.path.split(fpath)
fnoext, fext = os.path.splitext(fname)
if fext == ".gdat" or fext == ".cdat":
return self._load_dat(fpath)
elif fext == ".scan":
return self._load_scan(fpath)
else:
self.logger.info(
"BNGResult doesn't know the file type of {}".format(fpath),
loc=f"{__file__} : BNGResult.load()",
)
return None
def _load_scan(self, fpath):
return self._load_dat(fpath)
[docs] def find_dat_files(self):
self.logger.debug(
f"Scanning for valid files in folder {self.path}",
loc=f"{__file__} : BNGResult.find_dat_files()",
)
files = os.listdir(self.path)
ext = "gdat"
gdat_files = filter(lambda x: x.endswith(f".{ext}"), files)
for dat_file in gdat_files:
name = dat_file.replace(f".{ext}", "")
self.gnames[name] = dat_file
ext = "cdat"
cdat_files = filter(lambda x: x.endswith(f".{ext}"), files)
for dat_file in cdat_files:
name = dat_file.replace(f".{ext}", "")
self.cnames[name] = dat_file
ext = "scan"
scan_files = filter(lambda x: x.endswith(f".{ext}"), files)
for dat_file in scan_files:
name = dat_file.replace(f".{ext}", "")
self.snames[name] = dat_file
[docs] def load_results(self):
self.logger.debug(
f"Loading results from {self.path}",
loc=f"{__file__} : BNGResult.load_results()",
)
# load gdat files
for name in self.gnames:
gdat_path = os.path.join(self.path, self.gnames[name])
self.gdats[name] = self.load(gdat_path)
# load gdat files
for name in self.cnames:
cdat_path = os.path.join(self.path, self.cnames[name])
self.cdats[name] = self.load(cdat_path)
# load scan files
for name in self.snames:
scan_path = os.path.join(self.path, self.snames[name])
self.scans[name] = self.load(scan_path)
def _load_dat(self, path, dformat="f8"):
"""
This function takes a path to a gdat/cdat file as a string and loads that
file into a numpy structured array, including the correct header info.
TODO: Add link
Optional argument allows you to set the data type for every column. See
numpy dtype/data type strings for what's allowed. TODO: Add link
"""
# First step is to read the header,
# we gotta open the file and pull that line in
with open(path, "r") as f:
header = f.readline()
# Ensure the header info is actually there
# TODO: Transition to BNGErrors and logging
assert header.startswith("#"), "No header line that starts with #"
# Now turn it into a list of names for our struct array
header = header.replace("#", "")
headers = header.split()
# For a magical reason this is how numpy.loadtxt wants it,
# in tuples passed as a dictionary with names/formats as keys
names = tuple(headers)
formats = tuple([dformat for i in range(len(headers))])
# return the loadtxt result as a record array
# which is similar to pandas data format without the helper functions
return np.rec.array(
np.loadtxt(path, dtype={"names": names, "formats": formats})
)