Source code for bionetgen.modelapi.pattern

from bionetgen.core.utils.logging import BNGLogger

logger = BNGLogger()


# All classes that deal with patterns
[docs]class Pattern:
    """
    Pattern object. Fundamentally it's a list of molecules
    which are defined later.

    Attributes
    ----------
    _bonds : Bonds
        setting a pattern requires you to keep track of all bonds to
        correctly label them, this object tracks everything
    compartment : str
        compartment of the overall pattern (not the same thing as
        molecule compartment, those have their own)
    _label : str
        label of the overall pattern (not the same thing as molecule
        label, those have their own)
    molecules : list[Molecule]
        list of molecule objects that are in the pattern
    fixed : bool
        used for constant species, sets "$" at the beginning of the
        pattern string
    MatchOnce : bool
        used for matchOnce syntax, "{MatchOnce}PatternStr"
    relation : str
        alongside quantity this is only used for patterns of the form
        e.g. pattern==5, pattern<=3 etc
    quantity : str
        alongside relation this is only used for patterns of the form
        e.g. pattern==5, pattern<=3 etc
    nautyG : Graph
        if canonicalization was done on the pattern this will return a graph
        object from the library `pynauty` which is just python bindings to the
        canonical labelling library `nauty`.
    canonical_certificate : str
        if canonicalization was done on the pattern this will return a
        string that has the canonical labelling for the underlying graph.
        this doesn't take into account node names so it can only be used to
        compare the graph topology
    canonical_label : str
        if canonicalization was done on the pattern this will return a canonical
        label for the entire molecule that's unique and all isomorphic patterns will
        have the same label. comparing this label to another molecules canonical label
        will tell you if they are the same molecule or not.

    Methods
    -------
    canonicalize : None
        This method will generate a canonical label stored in `canonical_label` attribute.
        This label can be used to compare patterns to see if they are the same pattern quickly.
        This method will only run if `pynauty` is installed. See [nauty documentation](https://users.cecs.anu.edu.au/~bdm/nauty/)
        for more information
    """

    def __init__(
        self, molecules=[], bonds=None, compartment=None, label=None, canonicalize=False
    ):
        self.molecules = molecules
        self._bonds = bonds
        self.compartment = compartment
        self.label = label
        self.fixed = False
        self.MatchOnce = False
        self.relation = None
        self.quantity = None
        self.nautyG = None
        self.canonical_certificate = None
        self.canonical_label = None
        if canonicalize:
            self.canonicalize()

[docs]    def canonicalize(self):
        """
        This method will use `pynauty` library to generate a canonical label
        for the pattern. This pattern will be stored in `canonical_label` attribute.
        """
        # set a location for logging
        loc = f"{__file__} : Pattern.canonicalize()"
        # try importing pynauty to canonicalize the labeling
        try:
            import pynauty
        except ImportError:
            logger.warning(
                f"Importing pynauty failed, cannot canonicalize. Pattern equality checking is not guaranteed to work for highly symmetrical species.",
                loc=loc,
            )
            return
        # find how many vertices we need
        lmol = len(self.molecules)
        lcomp = sum([len(x.components) for x in self.molecules])
        node_cnt = lmol + lcomp
        # initialize our pynauty graph
        G = pynauty.Graph(node_cnt)
        # going to need to figure out bonding
        bond_dict = {}
        # save our IDs
        rev_grpIds = {}
        grpIds = {}
        # also pointers to each object
        node_ptrs = {}
        bond_node_ptrs = {}
        # we'll need to seutp coloring
        colors = {}
        currId = 0
        mCopyId = 0
        cCopyId = 0
        # let's loop over everything in the pattern
        for molec in self.molecules:
            # setting colors
            color_id = (molec.name, None, None)
            if color_id in colors:
                colors[color_id].add(currId)
            else:
                colors[color_id] = set([currId])
            # saving IDs
            parent_id = (molec.name, None, mCopyId, cCopyId)
            if parent_id in grpIds:
                mCopyId += 1
                parent_id = (molec.name, None, mCopyId, cCopyId)
                grpIds[parent_id] = currId
            else:
                grpIds[parent_id] = currId
            rev_grpIds[currId] = parent_id
            node_ptrs[currId] = molec
            currId += 1
            # now looping over components
            for comp in molec.components:
                # saving component coloring
                comp_color_id = (molec.name, comp.name, comp.state)
                if comp_color_id in colors:
                    colors[comp_color_id].add(currId)
                else:
                    colors[comp_color_id] = set([currId])
                chid_id = (molec.name, comp.name, mCopyId, cCopyId)
                # connecting the component to the molecule
                G.connect_vertex(grpIds[parent_id], [currId])
                # saving component IDs
                if chid_id in grpIds:
                    cCopyId += 1
                    chid_id = (molec.name, comp.name, mCopyId, cCopyId)
                    grpIds[chid_id] = currId
                else:
                    grpIds[chid_id] = currId
                rev_grpIds[currId] = chid_id
                node_ptrs[currId] = comp
                currId += 1
                # saving bonds
                if len(comp._bonds) != 0:
                    for bond in comp._bonds:
                        if bond not in bond_dict.keys():
                            bond_dict[bond] = [chid_id]
                        else:
                            bond_dict[bond].append(chid_id)
        # now we got everything, we implement it in the graph
        for bond in bond_dict:
            # check if each of our bonds have exactly two end points
            if len(bond_dict[bond]) == 2:
                id1 = bond_dict[bond][0]
                id1 = grpIds[id1]
                id2 = bond_dict[bond][1]
                id2 = grpIds[id2]
                G.connect_vertex(id1, [id2])
            else:
                # raise a warning
                logger.warning(
                    f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.",
                    loc=loc,
                )
        # we get our color sets
        color_sets = list(colors.values())
        # set vertex coloring
        G.set_vertex_coloring(color_sets)
        # save our graph
        self.nautyG = G
        # generate the canonical certificate for the entire graph
        self.canonical_certificate = pynauty.certificate(self.nautyG)
        # generate the canonical label for the entire graph
        # first, we give every node their canonical order
        canon_order = pynauty.canon_label(self.nautyG)
        for iordr, ordr in enumerate(canon_order):
            node_ptrs[ordr].canonical_order = iordr
        # relabeling bonds
        relabeling_bond_dict = {}
        for bond in bond_dict:
            # check if each of our bonds have exactly two end points
            if len(bond_dict[bond]) == 2:
                id1 = bond_dict[bond][0]
                id1 = grpIds[id1]
                comp1 = node_ptrs[id1]
                id2 = bond_dict[bond][1]
                id2 = grpIds[id2]
                comp2 = node_ptrs[id2]
                parent_order = min(
                    comp1.parent_molecule.canonical_order,
                    comp2.parent_molecule.canonical_order,
                )
                comp_order = min(comp1.canonical_order, comp2.canonical_order)
                relabeling_bond_dict[(parent_order, comp_order)] = (comp1, comp2)
            else:
                # raise a warning
                logger.warning(
                    f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.",
                    loc=loc,
                )
        # this will give us the keys to canonically sorted bonds
        sorted_order = sorted(relabeling_bond_dict.keys())
        for ibond, sbond in enumerate(sorted_order):
            # now we add a canonical bond ID to each component
            c1, c2 = relabeling_bond_dict[sbond]
            if c1.canonical_bonds is None:
                c1.canonical_bonds = [str(ibond + 1)]
            else:
                c1.canonical_bonds.append(str(ibond + 1))
            if c2.canonical_bonds is None:
                c2.canonical_bonds = [str(ibond + 1)]
            else:
                c2.canonical_bonds.append(str(ibond + 1))
        # and now we can get the canonical label
        self.canonical_label = self.print_canonical()

[docs]    def print_canonical(self):
        """
        Returns canonical label for the pattern
        """
        # need to make sure we don't print useless compartments
        self.consolidate_molecule_compartments()
        canon_label = ""
        # we first deal with the pattern compartment
        if self.compartment is not None:
            canon_label += "@{}".format(self.compartment)
        if self.label is not None:
            canon_label += "%{}".format(self.label)
        if self.label is not None or self.compartment is not None:
            canon_label += ":"
        # now loop over all molecules in canonical order
        canon_ords = [m.canonical_order for m in self.molecules]
        canon_ord_pairs = zip(range(len(self.molecules)), canon_ords)
        sorted_canon_ord_pairs = sorted(canon_ord_pairs, key=lambda x: x[1])
        for imol, mol in enumerate(sorted_canon_ord_pairs):
            mol_id = mol[0]
            if imol == 0:
                if self.fixed:
                    canon_label += "$"
                if self.MatchOnce:
                    canon_label += "{MatchOnce}"
            if imol > 0:
                canon_label += "."
            canon_label += self.molecules[mol_id].print_canonical()
        if self.relation is not None:
            canon_label += f"{self.relation}{self.quantity}"
        return canon_label

    def __contains__(self, val):
        return val in self.molecules

    def __eq__(self, other):
        loc = f"{__file__} : Pattern.__eq__()"
        if isinstance(other, Pattern):
            logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
            # checking pattern-wide properties
            if (other.compartment == self.compartment) and (other.label == self.label):
                logger.debug(
                    f"Compartment or label matches: {other.compartment}, {other.label}",
                    loc=loc,
                )
                # checking mods
                if (other.fixed == self.fixed) and (other.MatchOnce == self.MatchOnce):
                    logger.debug(
                        f"fixed or matchonce matches: {other.fixed}, {other.MatchOnce}",
                        loc=loc,
                    )
                    # checking quantifiers
                    if (other.relation == self.relation) and (
                        other.quantity == self.quantity
                    ):
                        logger.debug(
                            f"relation or quantity matches: {other.relation}, {other.quantity}",
                            loc=loc,
                        )
                        # if we made the label, we can just compare the two
                        if (self.canonical_label is not None) and (
                            other.canonical_label is not None
                        ):
                            return self.canonical_label == other.canonical_label
                        # now we can check contents
                        for molecule in self:
                            if molecule not in other.molecules:
                                logger.debug(
                                    f"molecule doesn't match: {molecule}", loc=loc
                                )
                                return False
                        # isomorphism check if we have the certificate
                        if (self.canonical_certificate is not None) and (
                            other.canonical_certificate is not None
                        ):
                            if (
                                self.canonical_certificate
                                != other.canonical_certificate
                            ):
                                return False
                        # TODO: molecules match, check bonds
                        # Bonds match, patterns are the same
                        logger.debug("patterns match!", loc=loc)
                        return True
        return False

    @property
    def compartment(self):
        return self._compartment

    @compartment.setter
    def compartment(self, value):
        # TODO: Build in logic to set the
        # outer compartment
        # print("Warning: Logical checks are not complete")
        self._compartment = value

[docs]    def consolidate_molecule_compartments(self):
        # if the molecule compartment matches overall pattern
        # compartment, don't print the molecule compartments
        overall_comp = self.compartment
        if overall_comp is not None:
            for molec in self.molecules:
                if molec.compartment == overall_comp:
                    molec.compartment = None

    @property
    def label(self):
        return self._label

    @label.setter
    def label(self, value):
        # TODO: Build in logic to set
        # the outer label
        # print("Warning: Logical checks are not complete")
        self._label = value

    def __str__(self):
        # need to make sure we don't print useless compartments
        self.consolidate_molecule_compartments()
        sstr = ""
        # we first deal with the pattern compartment
        if self.compartment is not None:
            sstr += "@{}".format(self.compartment)
        if self.label is not None:
            sstr += "%{}".format(self.label)
        if self.label is not None or self.compartment is not None:
            sstr += ":"
        # now loop over all molecules
        for imol, mol in enumerate(self.molecules):
            if imol == 0:
                if self.fixed:
                    sstr += "$"
                if self.MatchOnce:
                    sstr += "{MatchOnce}"
            if imol > 0:
                sstr += "."
            sstr += str(mol)
        if self.relation is not None:
            sstr += f"{self.relation}{self.quantity}"
        return sstr

    def __repr__(self):
        return str(self)

    def __getitem__(self, key):
        return self.molecules[key]

    def __iter__(self):
        return self.molecules.__iter__()

    # TODO: Implement __contains__


[docs]class Molecule:
    """
    Molecule object. A pattern is a list of molecules.
    This object also handles molecule types where components
    have a list of possible states.

    Attributes
    ----------
    _name : str
        name of the molecule
    _compartment : str
        compartment of the molecule
    _label : str
        label of the molecule
    _components : list[Component]
        list of components for this molecule

    Methods
    -------
    add_component(name, state=None, states=[])
        add a component object to the list of components with name
        "name", current state "state" or a list of states
        (for molecule types) "states"
    """

    def __init__(self, name="0", components=[], compartment=None, label=None):
        self._name = name
        self._components = components
        self._compartment = compartment
        self._label = label
        self.canonical_order = None
        self.canonical_label = None
        self.parent_pattern = None

    def __contains__(self, val):
        return val in self.components

    def __eq__(self, other):
        loc = f"{__file__} : Molecule.__eq__()"
        # check object type
        if isinstance(other, Molecule):
            logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
            # check attributes
            if (
                (other.name == self.name)
                and (other.compartment == self.compartment)
                and (other.label == self.label)
            ):
                logger.debug(
                    f"name, compartment and labels match: {other.name}, {other.compartment}, {other.label}",
                    loc=loc,
                )
                if (self.canonical_label is not None) and (
                    other.canonical_label is not None
                ):
                    # we can check canonical labels
                    if self.canonical_label != other.canonical_label:
                        return False
                # check components now
                for component in self:
                    if component not in other.components:
                        logger.debug(f"component doesn't match: {component}", loc=loc)
                        return False
                # everything matches
                logger.debug("molecules match", loc=loc)
                return True
        return False

    def __getitem__(self, key):
        if isinstance(key, int):
            return self.components[key]

    def __iter__(self):
        return self.components.__iter__()

    # TODO: implement __setitem__,  __contains__

    def __str__(self):
        mol_str = self.name
        # we have a null species
        if not self.name == "0":
            mol_str += "("
        # we _could_ just not do () if components
        # don't exist but that has other issues,
        # especially for extension highlighting
        if len(self.components) > 0:
            for icomp, comp in enumerate(self.components):
                if icomp > 0:
                    mol_str += ","
                mol_str += str(comp)
        # we have a null species
        if not self.name == "0":
            mol_str += ")"
        if self.compartment is not None:
            mol_str += "@{}".format(self.compartment)
        if self.label is not None:
            mol_str += "%{}".format(self.label)
        return mol_str

[docs]    def print_canonical(self):
        """
        Returns canonical label for the pattern
        """
        # print in canonical order
        canon_label = self.name
        # we have a null species
        if not self.name == "0":
            canon_label += "("
        # we _could_ just not do () if components
        # don't exist but that has other issues,
        # especially for extension highlighting
        if len(self.components) > 0:
            canon_ords = [c.canonical_order for c in self.components]
            canon_ord_pairs = zip(range(len(self.components)), canon_ords)
            sorted_canon_ord_pairs = sorted(canon_ord_pairs, key=lambda x: x[1])
            for icomp, comp in enumerate(sorted_canon_ord_pairs):
                comp_id = comp[0]
                if icomp > 0:
                    canon_label += ","
                canon_label += self.components[comp_id].print_canonical()
        # we have a null species
        if not self.name == "0":
            canon_label += ")"
        if self.compartment is not None:
            canon_label += "@{}".format(self.compartment)
        if self.label is not None:
            canon_label += "%{}".format(self.label)
        return canon_label

    ### PROPERTIES ###
    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        # print("Warning: Logical checks are not complete")
        # TODO: Check for invalid characters
        self._name = value

    @property
    def components(self):
        return self._components

    @components.setter
    def components(self, value):
        # print("Warning: Logical checks are not complete")
        self._components = value

    def __repr__(self):
        return str(self)

    @property
    def compartment(self):
        return self._compartment

    @compartment.setter
    def compartment(self, value):
        # print("Warning: Logical checks are not complete")
        self._compartment = value

    @property
    def label(self):
        return self._label

    @label.setter
    def label(self, value):
        # print("Warning: Logical checks are not complete")
        self._label = value

    def _add_component(self, name, state=None, states=[]):
        comp_obj = Component()
        comp_obj.name = name
        comp_obj.state = state
        comp_obj.states = states
        self.components.append(comp_obj)

[docs]    def add_component(self, name, state=None, states=[]):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._add_component(name, state, states)


[docs]class Component:
    """
    Component object that describes the state, label and bonding
    for each component. Molecules can optionally contain components

    Attributes
    ----------
    name : str
        name of the component
    _label : str
        label of the component
    _state : str
        state of the component, not used for molecule types
    _states : list[str]
        list of states for molecule types
    _bonds : list[Bond]
        list of bond objects that describes bonding of the component

    Methods
    -------
    add_state()
        not implemented. will eventually be used to add additional states
        to an existing component
    add_bond()
        not implemented. will eventually be used to add additional bonds
        to an existing component
    """

    def __init__(self):
        self._name = ""
        self._label = None
        self._state = None
        self._states = []
        self._bonds = []
        self.canonical_label = None
        self.canonical_order = None
        self.canonical_bonds = None
        self.parent_molecule = None

    def __eq__(self, other):
        loc = f"{__file__} : Component.__eq__()"
        # check type
        # import ipdb;ipdb.set_trace()
        if isinstance(other, Component):
            logger.debug(f"Comparison class matches: {other.__class__}", loc=loc)
            # check attributes
            if (other.name == self.name) and (other.label == self.label):
                logger.debug(
                    f"name and labels match: {other.name}, {other.label}", loc=loc
                )
                # check states
                if len(other.states) == len(self.states):
                    logger.debug(f"state lists match: {other.states}", loc=loc)
                    # check current state
                    if other.state == self.state:
                        logger.debug(f"states match: {other.state}", loc=loc)
                        if (self.canonical_label is not None) and (
                            other.canonical_label is not None
                        ):
                            # we can check canonical labels
                            if self.canonical_label != other.canonical_label:
                                return False
                        # check bonds
                        # TODO: try to decide if A(b!1).B(a!1) is the same
                        # as A(b!2).B(a!2), if so, the bond check is much harder
                        # for bond in self.bonds:
                        #     if bond not in other.bonds:
                        #         logger.debug(
                        #             f"bonds don't match!: {other.bonds}", loc=loc
                        #         )
                        #         return False
                        if len(self.bonds) == len(other.bonds):
                            logger.debug("components match", loc=loc)
                            return True
        return False

    def __repr__(self):
        return str(self)

    def __str__(self):
        comp_str = self.name
        # only for molecule types
        if len(self.states) > 0:
            for istate, state in enumerate(self.states):
                comp_str += "~{}".format(state)
        # for any other pattern
        if self.state is not None:
            comp_str += "~{}".format(self.state)
        if self.label is not None:
            comp_str += "%{}".format(self.label)
        if len(self.bonds) > 0:
            for bond in self.bonds:
                comp_str += "!{}".format(bond)
        return comp_str

[docs]    def print_canonical(self):
        """
        Returns canonical label for the pattern
        """
        comp_str = self.name
        # only for molecule types
        if len(self.states) > 0:
            for istate, state in enumerate(self.states):
                comp_str += "~{}".format(state)
        # for any other pattern
        if self.state is not None:
            comp_str += "~{}".format(self.state)
        if self.label is not None:
            comp_str += "%{}".format(self.label)
        if self.canonical_bonds is not None:
            for bond in self.canonical_bonds:
                comp_str += "!{}".format(bond)
        return comp_str

    ### PROPERTIES ###
    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._name = value

    @property
    def label(self):
        return self._label

    @label.setter
    def label(self, value):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._label = value

    @property
    def state(self):
        return self._state

    @state.setter
    def state(self, value):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._state = value

    @property
    def states(self):
        return self._states

    @states.setter
    def states(self, value):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._states = value

    @property
    def bonds(self):
        return self._bonds

    @bonds.setter
    def bonds(self, value):
        # TODO: Add built-in logic here
        # print("Warning: Logical checks are not complete")
        self._bonds = value

    def _add_state(self):
        raise NotImplementedError

[docs]    def add_state(self):
        self._add_state()

    def _add_bond(self):
        raise NotImplementedError

[docs]    def add_bond(self):
        self._add_bond()