from bionetgen.core.utils.logging import BNGLogger
logger = BNGLogger()
# All classes that deal with patterns
[docs]class Pattern:
"""
Pattern object. Fundamentally it's a list of molecules
which are defined later.
Attributes
----------
_bonds : Bonds
setting a pattern requires you to keep track of all bonds to
correctly label them, this object tracks everything
compartment : str
compartment of the overall pattern (not the same thing as
molecule compartment, those have their own)
_label : str
label of the overall pattern (not the same thing as molecule
label, those have their own)
molecules : list[Molecule]
list of molecule objects that are in the pattern
fixed : bool
used for constant species, sets "$" at the beginning of the
pattern string
MatchOnce : bool
used for matchOnce syntax, "{MatchOnce}PatternStr"
relation : str
alongside quantity this is only used for patterns of the form
e.g. pattern==5, pattern<=3 etc
quantity : str
alongside relation this is only used for patterns of the form
e.g. pattern==5, pattern<=3 etc
nautyG : Graph
if canonicalization was done on the pattern this will return a graph
object from the library `pynauty` which is just python bindings to the
canonical labelling library `nauty`.
canonical_certificate : str
if canonicalization was done on the pattern this will return a
string that has the canonical labelling for the underlying graph.
this doesn't take into account node names so it can only be used to
compare the graph topology
canonical_label : str
if canonicalization was done on the pattern this will return a canonical
label for the entire molecule that's unique and all isomorphic patterns will
have the same label. comparing this label to another molecules canonical label
will tell you if they are the same molecule or not.
Methods
-------
canonicalize : None
This method will generate a canonical label stored in `canonical_label` attribute.
This label can be used to compare patterns to see if they are the same pattern quickly.
This method will only run if `pynauty` is installed. See [nauty documentation](https://users.cecs.anu.edu.au/~bdm/nauty/)
for more information
"""
def __init__(
self, molecules=[], bonds=None, compartment=None, label=None, canonicalize=False
):
self.molecules = molecules
self._bonds = bonds
self.compartment = compartment
self.label = label
self.fixed = False
self.MatchOnce = False
self.relation = None
self.quantity = None
self.nautyG = None
self.canonical_certificate = None
self.canonical_label = None
if canonicalize:
self.canonicalize()
[docs] def canonicalize(self):
"""
This method will use `pynauty` library to generate a canonical label
for the pattern. This pattern will be stored in `canonical_label` attribute.
"""
# set a location for logging
loc = f"{__file__} : Pattern.canonicalize()"
# try importing pynauty to canonicalize the labeling
try:
import pynauty
except ImportError:
logger.warning(
f"Importing pynauty failed, cannot canonicalize. Pattern equality checking is not guaranteed to work for highly symmetrical species.",
loc=loc,
)
return
# find how many vertices we need
lmol = len(self.molecules)
lcomp = sum([len(x.components) for x in self.molecules])
node_cnt = lmol + lcomp
# initialize our pynauty graph
G = pynauty.Graph(node_cnt)
# going to need to figure out bonding
bond_dict = {}
# save our IDs
rev_grpIds = {}
grpIds = {}
# also pointers to each object
node_ptrs = {}
bond_node_ptrs = {}
# we'll need to seutp coloring
colors = {}
currId = 0
mCopyId = 0
cCopyId = 0
# let's loop over everything in the pattern
for molec in self.molecules:
# setting colors
color_id = (molec.name, None, None)
if color_id in colors:
colors[color_id].add(currId)
else:
colors[color_id] = set([currId])
# saving IDs
parent_id = (molec.name, None, mCopyId, cCopyId)
if parent_id in grpIds:
mCopyId += 1
parent_id = (molec.name, None, mCopyId, cCopyId)
grpIds[parent_id] = currId
else:
grpIds[parent_id] = currId
rev_grpIds[currId] = parent_id
node_ptrs[currId] = molec
currId += 1
# now looping over components
for comp in molec.components:
# saving component coloring
comp_color_id = (molec.name, comp.name, comp.state)
if comp_color_id in colors:
colors[comp_color_id].add(currId)
else:
colors[comp_color_id] = set([currId])
chid_id = (molec.name, comp.name, mCopyId, cCopyId)
# connecting the component to the molecule
G.connect_vertex(grpIds[parent_id], [currId])
# saving component IDs
if chid_id in grpIds:
cCopyId += 1
chid_id = (molec.name, comp.name, mCopyId, cCopyId)
grpIds[chid_id] = currId
else:
grpIds[chid_id] = currId
rev_grpIds[currId] = chid_id
node_ptrs[currId] = comp
currId += 1
# saving bonds
if len(comp._bonds) != 0:
for bond in comp._bonds:
if bond not in bond_dict.keys():
bond_dict[bond] = [chid_id]
else:
bond_dict[bond].append(chid_id)
# now we got everything, we implement it in the graph
for bond in bond_dict:
# check if each of our bonds have exactly two end points
if len(bond_dict[bond]) == 2:
id1 = bond_dict[bond][0]
id1 = grpIds[id1]
id2 = bond_dict[bond][1]
id2 = grpIds[id2]
G.connect_vertex(id1, [id2])
else:
# raise a warning
logger.warning(
f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.",
loc=loc,
)
# we get our color sets
color_sets = list(colors.values())
# set vertex coloring
G.set_vertex_coloring(color_sets)
# save our graph
self.nautyG = G
# generate the canonical certificate for the entire graph
self.canonical_certificate = pynauty.certificate(self.nautyG)
# generate the canonical label for the entire graph
# first, we give every node their canonical order
canon_order = pynauty.canon_label(self.nautyG)
for iordr, ordr in enumerate(canon_order):
node_ptrs[ordr].canonical_order = iordr
# relabeling bonds
relabeling_bond_dict = {}
for bond in bond_dict:
# check if each of our bonds have exactly two end points
if len(bond_dict[bond]) == 2:
id1 = bond_dict[bond][0]
id1 = grpIds[id1]
comp1 = node_ptrs[id1]
id2 = bond_dict[bond][1]
id2 = grpIds[id2]
comp2 = node_ptrs[id2]
parent_order = min(
comp1.parent_molecule.canonical_order,
comp2.parent_molecule.canonical_order,
)
comp_order = min(comp1.canonical_order, comp2.canonical_order)
relabeling_bond_dict[(parent_order, comp_order)] = (comp1, comp2)
else:
# raise a warning
logger.warning(
f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.",
loc=loc,
)
# this will give us the keys to canonically sorted bonds
sorted_order = sorted(relabeling_bond_dict.keys())
for ibond, sbond in enumerate(sorted_order):
# now we add a canonical bond ID to each component
c1, c2 = relabeling_bond_dict[sbond]
if c1.canonical_bonds is None:
c1.canonical_bonds = [str(ibond + 1)]
else:
c1.canonical_bonds.append(str(ibond + 1))
if c2.canonical_bonds is None:
c2.canonical_bonds = [str(ibond + 1)]
else:
c2.canonical_bonds.append(str(ibond + 1))
# and now we can get the canonical label
self.canonical_label = self.print_canonical()
[docs] def print_canonical(self):
"""
Returns canonical label for the pattern
"""
# need to make sure we don't print useless compartments
self.consolidate_molecule_compartments()
canon_label = ""
# we first deal with the pattern compartment
if self.compartment is not None:
canon_label += "@{}".format(self.compartment)
if self.label is not None:
canon_label += "%{}".format(self.label)
if self.label is not None or self.compartment is not None:
canon_label += ":"
# now loop over all molecules in canonical order
canon_ords = [m.canonical_order for m in self.molecules]
canon_ord_pairs = zip(range(len(self.molecules)), canon_ords)
sorted_canon_ord_pairs = sorted(canon_ord_pairs, key=lambda x: x[1])
for imol, mol in enumerate(sorted_canon_ord_pairs):
mol_id = mol[0]
if imol == 0:
if self.fixed:
canon_label += "$"
if self.MatchOnce:
canon_label += "{MatchOnce}"
if imol > 0:
canon_label += "."
canon_label += self.molecules[mol_id].print_canonical()
if self.relation is not None:
canon_label += f"{self.relation}{self.quantity}"
return canon_label
def __contains__(self, val):
return val in self.molecules
def __eq__(self, other):
loc = f"{__file__} : Pattern.__eq__()"
if isinstance(other, Pattern):
logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
# checking pattern-wide properties
if (other.compartment == self.compartment) and (other.label == self.label):
logger.debug(
f"Compartment or label matches: {other.compartment}, {other.label}",
loc=loc,
)
# checking mods
if (other.fixed == self.fixed) and (other.MatchOnce == self.MatchOnce):
logger.debug(
f"fixed or matchonce matches: {other.fixed}, {other.MatchOnce}",
loc=loc,
)
# checking quantifiers
if (other.relation == self.relation) and (
other.quantity == self.quantity
):
logger.debug(
f"relation or quantity matches: {other.relation}, {other.quantity}",
loc=loc,
)
# if we made the label, we can just compare the two
if (self.canonical_label is not None) and (
other.canonical_label is not None
):
return self.canonical_label == other.canonical_label
# now we can check contents
for molecule in self:
if molecule not in other.molecules:
logger.debug(
f"molecule doesn't match: {molecule}", loc=loc
)
return False
# isomorphism check if we have the certificate
if (self.canonical_certificate is not None) and (
other.canonical_certificate is not None
):
if (
self.canonical_certificate
!= other.canonical_certificate
):
return False
# TODO: molecules match, check bonds
# Bonds match, patterns are the same
logger.debug("patterns match!", loc=loc)
return True
return False
@property
def compartment(self):
return self._compartment
@compartment.setter
def compartment(self, value):
# TODO: Build in logic to set the
# outer compartment
# print("Warning: Logical checks are not complete")
self._compartment = value
[docs] def consolidate_molecule_compartments(self):
# if the molecule compartment matches overall pattern
# compartment, don't print the molecule compartments
overall_comp = self.compartment
if overall_comp is not None:
for molec in self.molecules:
if molec.compartment == overall_comp:
molec.compartment = None
@property
def label(self):
return self._label
@label.setter
def label(self, value):
# TODO: Build in logic to set
# the outer label
# print("Warning: Logical checks are not complete")
self._label = value
def __str__(self):
# need to make sure we don't print useless compartments
self.consolidate_molecule_compartments()
sstr = ""
# we first deal with the pattern compartment
if self.compartment is not None:
sstr += "@{}".format(self.compartment)
if self.label is not None:
sstr += "%{}".format(self.label)
if self.label is not None or self.compartment is not None:
sstr += ":"
# now loop over all molecules
for imol, mol in enumerate(self.molecules):
if imol == 0:
if self.fixed:
sstr += "$"
if self.MatchOnce:
sstr += "{MatchOnce}"
if imol > 0:
sstr += "."
sstr += str(mol)
if self.relation is not None:
sstr += f"{self.relation}{self.quantity}"
return sstr
def __repr__(self):
return str(self)
def __getitem__(self, key):
return self.molecules[key]
def __iter__(self):
return self.molecules.__iter__()
# TODO: Implement __contains__
[docs]class Molecule:
"""
Molecule object. A pattern is a list of molecules.
This object also handles molecule types where components
have a list of possible states.
Attributes
----------
_name : str
name of the molecule
_compartment : str
compartment of the molecule
_label : str
label of the molecule
_components : list[Component]
list of components for this molecule
Methods
-------
add_component(name, state=None, states=[])
add a component object to the list of components with name
"name", current state "state" or a list of states
(for molecule types) "states"
"""
def __init__(self, name="0", components=[], compartment=None, label=None):
self._name = name
self._components = components
self._compartment = compartment
self._label = label
self.canonical_order = None
self.canonical_label = None
self.parent_pattern = None
def __contains__(self, val):
return val in self.components
def __eq__(self, other):
loc = f"{__file__} : Molecule.__eq__()"
# check object type
if isinstance(other, Molecule):
logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
# check attributes
if (
(other.name == self.name)
and (other.compartment == self.compartment)
and (other.label == self.label)
):
logger.debug(
f"name, compartment and labels match: {other.name}, {other.compartment}, {other.label}",
loc=loc,
)
if (self.canonical_label is not None) and (
other.canonical_label is not None
):
# we can check canonical labels
if self.canonical_label != other.canonical_label:
return False
# check components now
for component in self:
if component not in other.components:
logger.debug(f"component doesn't match: {component}", loc=loc)
return False
# everything matches
logger.debug("molecules match", loc=loc)
return True
return False
def __getitem__(self, key):
if isinstance(key, int):
return self.components[key]
def __iter__(self):
return self.components.__iter__()
# TODO: implement __setitem__, __contains__
def __str__(self):
mol_str = self.name
# we have a null species
if not self.name == "0":
mol_str += "("
# we _could_ just not do () if components
# don't exist but that has other issues,
# especially for extension highlighting
if len(self.components) > 0:
for icomp, comp in enumerate(self.components):
if icomp > 0:
mol_str += ","
mol_str += str(comp)
# we have a null species
if not self.name == "0":
mol_str += ")"
if self.compartment is not None:
mol_str += "@{}".format(self.compartment)
if self.label is not None:
mol_str += "%{}".format(self.label)
return mol_str
[docs] def print_canonical(self):
"""
Returns canonical label for the pattern
"""
# print in canonical order
canon_label = self.name
# we have a null species
if not self.name == "0":
canon_label += "("
# we _could_ just not do () if components
# don't exist but that has other issues,
# especially for extension highlighting
if len(self.components) > 0:
canon_ords = [c.canonical_order for c in self.components]
canon_ord_pairs = zip(range(len(self.components)), canon_ords)
sorted_canon_ord_pairs = sorted(canon_ord_pairs, key=lambda x: x[1])
for icomp, comp in enumerate(sorted_canon_ord_pairs):
comp_id = comp[0]
if icomp > 0:
canon_label += ","
canon_label += self.components[comp_id].print_canonical()
# we have a null species
if not self.name == "0":
canon_label += ")"
if self.compartment is not None:
canon_label += "@{}".format(self.compartment)
if self.label is not None:
canon_label += "%{}".format(self.label)
return canon_label
### PROPERTIES ###
@property
def name(self):
return self._name
@name.setter
def name(self, value):
# print("Warning: Logical checks are not complete")
# TODO: Check for invalid characters
self._name = value
@property
def components(self):
return self._components
@components.setter
def components(self, value):
# print("Warning: Logical checks are not complete")
self._components = value
def __repr__(self):
return str(self)
@property
def compartment(self):
return self._compartment
@compartment.setter
def compartment(self, value):
# print("Warning: Logical checks are not complete")
self._compartment = value
@property
def label(self):
return self._label
@label.setter
def label(self, value):
# print("Warning: Logical checks are not complete")
self._label = value
def _add_component(self, name, state=None, states=[]):
comp_obj = Component()
comp_obj.name = name
comp_obj.state = state
comp_obj.states = states
self.components.append(comp_obj)
[docs] def add_component(self, name, state=None, states=[]):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._add_component(name, state, states)
[docs]class Component:
"""
Component object that describes the state, label and bonding
for each component. Molecules can optionally contain components
Attributes
----------
name : str
name of the component
_label : str
label of the component
_state : str
state of the component, not used for molecule types
_states : list[str]
list of states for molecule types
_bonds : list[Bond]
list of bond objects that describes bonding of the component
Methods
-------
add_state()
not implemented. will eventually be used to add additional states
to an existing component
add_bond()
not implemented. will eventually be used to add additional bonds
to an existing component
"""
def __init__(self):
self._name = ""
self._label = None
self._state = None
self._states = []
self._bonds = []
self.canonical_label = None
self.canonical_order = None
self.canonical_bonds = None
self.parent_molecule = None
def __eq__(self, other):
loc = f"{__file__} : Component.__eq__()"
# check type
# import ipdb;ipdb.set_trace()
if isinstance(other, Component):
logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
# check attributes
if (other.name == self.name) and (other.label == self.label):
logger.debug(
f"name and labels match: {other.name}, {other.label}", loc=loc
)
# check states
if len(other.states) == len(self.states):
logger.debug(f"state lists match: {other.states}", loc=loc)
# check current state
if other.state == self.state:
logger.debug(f"states match: {other.state}", loc=loc)
if (self.canonical_label is not None) and (
other.canonical_label is not None
):
# we can check canonical labels
if self.canonical_label != other.canonical_label:
return False
# check bonds
# TODO: try to decide if A(b!1).B(a!1) is the same
# as A(b!2).B(a!2), if so, the bond check is much harder
# for bond in self.bonds:
# if bond not in other.bonds:
# logger.debug(
# f"bonds don't match!: {other.bonds}", loc=loc
# )
# return False
if len(self.bonds) == len(other.bonds):
logger.debug("components match", loc=loc)
return True
return False
def __repr__(self):
return str(self)
def __str__(self):
comp_str = self.name
# only for molecule types
if len(self.states) > 0:
for istate, state in enumerate(self.states):
comp_str += "~{}".format(state)
# for any other pattern
if self.state is not None:
comp_str += "~{}".format(self.state)
if self.label is not None:
comp_str += "%{}".format(self.label)
if len(self.bonds) > 0:
for bond in self.bonds:
comp_str += "!{}".format(bond)
return comp_str
[docs] def print_canonical(self):
"""
Returns canonical label for the pattern
"""
comp_str = self.name
# only for molecule types
if len(self.states) > 0:
for istate, state in enumerate(self.states):
comp_str += "~{}".format(state)
# for any other pattern
if self.state is not None:
comp_str += "~{}".format(self.state)
if self.label is not None:
comp_str += "%{}".format(self.label)
if self.canonical_bonds is not None:
for bond in self.canonical_bonds:
comp_str += "!{}".format(bond)
return comp_str
### PROPERTIES ###
@property
def name(self):
return self._name
@name.setter
def name(self, value):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._name = value
@property
def label(self):
return self._label
@label.setter
def label(self, value):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._label = value
@property
def state(self):
return self._state
@state.setter
def state(self, value):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._state = value
@property
def states(self):
return self._states
@states.setter
def states(self, value):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._states = value
@property
def bonds(self):
return self._bonds
@bonds.setter
def bonds(self, value):
# TODO: Add built-in logic here
# print("Warning: Logical checks are not complete")
self._bonds = value
def _add_state(self):
raise NotImplementedError
[docs] def add_state(self):
self._add_state()
def _add_bond(self):
raise NotImplementedError
[docs] def add_bond(self):
self._add_bond()