Source code for bionetgen.modelapi.pattern_reader

from bionetgen.core.utils.logging import BNGLogger
from bionetgen.modelapi.pattern import Pattern, Molecule, Component
import pyparsing as pp


[docs]class BNGParsers: """ Container for parsers for the reader """ def __init__(self) -> None: pass
[docs]class BNGPatternReader: """ Class that generates parsers to read BNG pattern strings and form Pattern objects from them. Usage: BNGPatternReader(pattern_str) Arguments --------- pattern_str : str The pattern string to read and generate a Pattern object from Attributes ---------- pattern : Pattern The Pattern object formed from the parsed string parsers : BNGParsers Container object that has parsers for various parts of a BNG pattern string Methods ------- define_parsers : None runs other defined parser commands to setup all parsers define_component_parser : None defines pyparsing parser for components define_molecule_parser : None defines pyparsing parser for molecules define_pattern_parser : None defines pyparsing parser for overall patterns make_pattern : Pattern forms the actual Pattern object from the pattern string using the defined parsers """ def __init__(self, pattern_str) -> None: self.logger = BNGLogger() self.pattern_str = pattern_str self.parsers = BNGParsers() self.define_parsers() self.pattern = self.make_pattern(self.pattern_str)
[docs] def define_parsers(self): self.define_component_parser() self.define_molecule_parser() self.define_pattern_parser()
[docs] def define_component_parser(self): """ Defines specific parsers for BNG components """ # bng names are alpha numericals and _1 self.parsers.base_name = pp.Word(pp.alphas, pp.alphanums + "_") # components have optional states and bonds self.parsers.state = pp.Combine( pp.Word("~") + (self.parsers.base_name ^ pp.Word(pp.nums)) ) ^ pp.Word("~?") self.parsers.bond = pp.Combine( (pp.Word("!") + pp.Word(pp.nums)) ^ (pp.Word("!?")) ^ (pp.Word("!+")) ) self.parsers.component = ( self.parsers.base_name + pp.Optional(self.parsers.state) + pp.Optional(self.parsers.bond) ) component_parser = pp.Combine(self.parsers.component) # components are separated by commas component_separator = pp.Word(",") self.parsers.components_parser = pp.delimited_list( component_parser, delim=component_separator ) self.parsers.combined_components_parser = pp.delimited_list( component_parser, delim=component_separator, combine=True )
[docs] def define_molecule_parser(self): """ Defines specific parsers for BNG molecules """ # molecules can have tags self.parsers.tag = pp.Combine( pp.Word("%") + (self.parsers.base_name ^ pp.Word(pp.nums)) ) # and compartments self.parsers.compartment = pp.Combine(pp.Word("@") + self.parsers.base_name) # combine tag and compartment tag_comp = ( pp.Optional(self.parsers.tag) + pp.Optional(self.parsers.compartment) ) ^ (pp.Optional(self.parsers.compartment) + pp.Optional(self.parsers.tag)) # full molecule self.parsers.molecule = ( self.parsers.base_name + tag_comp + pp.Word("(") + pp.Optional(self.parsers.combined_components_parser) + pp.Word(")") + tag_comp ) molecule_parser = pp.Combine(self.parsers.molecule) # molecules # components are separated by commas molecule_separator = pp.Word(".") self.parsers.molecules_parser = pp.delimited_list( molecule_parser, delim=molecule_separator ) self.parsers.combined_molecules_parser = pp.delimited_list( molecule_parser, delim=molecule_separator, combine=True )
[docs] def define_pattern_parser(self): """ Defines specific parsers for overall BNG patterns """ # a pattern can start with a tag or a compartment mods = pp.Word("$") ^ pp.Word("{MatchOnce}") # zero molecule is a simple 0 zeroMolecule = pp.Word("0") # quantifier quantifier = pp.Combine( ( pp.Word("<") ^ pp.Word("<=") ^ pp.Word("==") ^ pp.Word(">=") ^ pp.Word(">") ) + pp.Word(pp.nums) ) # combine tag and compartment tag = self.parsers.tag + (pp.Word(":") ^ pp.Word("::")) comp = self.parsers.compartment + (pp.Word(":") ^ pp.Word("::")) tag_comp_1 = ( self.parsers.tag + self.parsers.compartment + (pp.Word(":") ^ pp.Word("::")) ) tag_comp_2 = ( self.parsers.compartment + self.parsers.tag + (pp.Word(":") ^ pp.Word("::")) ) tag_comp = tag ^ comp ^ tag_comp_1 ^ tag_comp_2 pattern = ( pp.Optional(tag_comp) + pp.Optional(mods) + self.parsers.combined_molecules_parser + pp.Optional(quantifier) ) # full pattern self.parsers.pattern = pattern ^ zeroMolecule
[docs] def make_pattern(self, pattern_str): """ Forms the Pattern object from the given string using the parsed defined above. """ # if pattern_str == "X()": # # import ipdb;ipdb.set_trace() # import IPython,sys;IPython.embed();sys.exit() # set location for logging log_loc = f"{__file__} : BNGPatternReader.make_pattern()" # instantiate a pattern pattern = Pattern(molecules=[]) # start parsing parsed_pattern = self.parsers.pattern.parseString(pattern_str) split_molecs = None # first we'll pull out any features that are pattern only for parsed_val in parsed_pattern: # these are pattern features and the entire molecules section if parsed_val.startswith("@"): # this is a pattern-wide compartment self.logger.debug(f"found compartment in {parsed_val}", loc=log_loc) pattern.compartment = parsed_val.replace("@", "") continue elif parsed_val.startswith("%"): # this is a pattern-wide tag self.logger.debug(f"found tag in {parsed_val}", loc=log_loc) pattern.label = parsed_val.replace("%", "") continue elif parsed_val.startswith(":"): # this is a pattern-wide separator self.logger.debug(f"found separator in {parsed_val}", loc=log_loc) continue elif ("$" in parsed_val) or ("{MatchOnce}" in parsed_val): # this is a constant value species pattern or a MatchOnce observable self.logger.debug(f"found mod in {parsed_val}", loc=log_loc) if "$" in parsed_val: pattern.fixed = True elif "{MatchOnce}" in parsed_val: pattern.MatchOnce = True continue elif ( ("<" in parsed_val) or ("<=" in parsed_val) or ("==" in parsed_val) or (">=" in parsed_val) or (">" in parsed_val) ): self.logger.debug(f"found quantifier in {parsed_val}", loc=log_loc) if "==" in parsed_val: pattern.relation = "==" elif "<=" in parsed_val: pattern.relation = "<=" elif ">=" in parsed_val: pattern.relation = ">=" elif "<" in parsed_val: pattern.relation = "<" elif ">" in parsed_val: pattern.relation = ">" pattern.quantity = int(parsed_val.replace(pattern.relation, "")) # this is a quantifier continue elif parsed_val == "0": # this is a zero molecule m = Molecule(components=[]) m.parent_pattern = pattern pattern.molecules.append(m) self.logger.debug(f"found zero molecule in {parsed_val}", loc=log_loc) continue # only molecules should be remaining split_molecs = self.parsers.molecules_parser.parseString(parsed_val) # if we had a zero molecule we are done if split_molecs is None: # this is the zero molecule self.logger.debug( f"no molecules found in: {self.pattern_str}, done", loc=log_loc ) return pattern # we got the molecule list, let's loop over molecules now self.logger.debug(f"molecules: {split_molecs}", loc=log_loc) for molec_str in split_molecs: molecule = Molecule(components=[]) molecule.parent = pattern # each molec str is a molecule string with all features parsed_molec = self.parsers.molecule.parseString(molec_str) self.logger.debug(f"parsed molecule: {parsed_molec}", loc=log_loc) in_molec = False for parsed_val in parsed_molec: # we need to pull out the molecule features if parsed_val.startswith("@"): # this is a molecule compartment self.logger.debug(f"found compartment in {parsed_val}", loc=log_loc) molecule.compartment = parsed_val.replace("@", "") continue elif parsed_val.startswith("%"): # this is a molecule tag self.logger.debug(f"found tag in {parsed_val}", loc=log_loc) molecule.label = parsed_val.replace("%", "") continue elif parsed_val == "(" or parsed_val == ")": if parsed_val == "(": in_molec = True else: in_molec = False # this molecule opening and closing self.logger.debug(f"found paran in {parsed_val}", loc=log_loc) continue if not in_molec: # if we aren't in molecule yet, this can only be the name molecule.name = parsed_val else: # only components remain, parse those and loop split_components = self.parsers.components_parser.parseString( parsed_val ) self.logger.debug( f"split components: {split_components}", loc=log_loc ) for component_str in split_components: # each component is parsed separately now parsed_component = self.parsers.component.parseString( component_str ) component = Component() component.parent_molecule = molecule # import ipdb;ipdb.set_trace() for icomp, comp in enumerate(parsed_component): if icomp == 0: component.name = comp elif "!" in comp: # this is a bond, we'll have to figure out how to make the bonds splt = comp.split("!") component.bonds += list( filter(lambda x: len(x) > 0, splt) ) elif "~" in comp: splt = comp.split("~") splt = list(filter(lambda x: len(x) > 0, splt)) if len(splt) > 1: component.states += splt else: component.state = splt[0] elif "%" in comp: # this is a label, need to parse this pass # self._label = None molecule.components.append(component) self.logger.debug( f"split components: {split_components}", loc=log_loc ) self.logger.debug( f"molecule parsed: {molecule}", loc=log_loc, ) pattern.molecules.append(molecule) # ship the finalized pattern object pattern.canonicalize() return pattern