Source code for bionetgen.atomizer.rulifier.postAnalysis

from . import componentGroups
import argparse
import pprint
from collections import defaultdict
import itertools
from copy import copy
from bionetgen.atomizer.utils import readBNGXML

import functools
import marshal

[docs]def memoize(obj): cache = obj.cache = {} @functools.wraps(obj) def memoizer(*args, **kwargs): key = marshal.dumps([args, kwargs]) if key not in cache: cache[key] = obj(*args, **kwargs) return cache[key] return memoizer
[docs]@memoize def resolveEntry(dependencyGraph, moleculeSet): """ resolve an entry to its basic components according to dependency graph >>> dependencyGraph = {'EGF_EGFR_2':[['EGF_EGFR','EGF_EGFR']],'EGF_EGFR':[['EGF','EGFR']],'EGFR':[],'EGF':[]} >>> resolveEntry(dependencyGraph, ['EGF_EGFR_2']) ['EGF', 'EGFR', 'EGF', 'EGFR'] """ if type(moleculeSet) == str: return [moleculeSet] if len(moleculeSet) == 1 and dependencyGraph[moleculeSet[0]] == []: return moleculeSet compositionList = [] for molecule in moleculeSet: if len(dependencyGraph[molecule]) == 0: compositionList.append(molecule) else: compositionList.extend( resolveEntry(dependencyGraph, dependencyGraph[molecule][0]) ) return compositionList
[docs]class ModelLearning: def __init__(self, fileName, rawFileName=None): self.molecules, self.rules, _ = readBNGXML.parseXML(fileName) ( self.dependencies, self.patternXreactions, _, _, ) = componentGroups.getContextRequirements(fileName, collapse=False) self.transposePatternsReactions() self.reverseDependencies = componentGroups.reverseContextDict(self.dependencies) self.moleculeMotifDict, self.motifMoleculeDict = self.classifyPairsByMotif( self.reverseDependencies ) if rawFileName: self.processRawSBML(rawFileName) else: self.rawRules = {}
[docs] def processRawSBML(self, inputfile): _, rawrules, _ = readBNGXML.parseXML(inputfile) self.rawRules = {x[0].label: x[0] for x in rawrules}
[docs] def transposePatternsReactions(self): self.reactionsXpatterns = defaultdict( lambda: defaultdict(lambda: defaultdict(list)) ) for product in self.patternXreactions: for reactionCenter in self.patternXreactions[product]: for contextcomponent in self.patternXreactions[product][reactionCenter]: for contextState in self.patternXreactions[product][reactionCenter][ contextcomponent ]: for reaction in self.patternXreactions[product][reactionCenter][ contextcomponent ][contextState]: self.reactionsXpatterns[reaction][product][ reactionCenter ].append((contextcomponent, contextState))
[docs] def classifyPairsByMotif(self, reverseDependencies): """ recives a dict arranged by molecule->componentPair->dependency organizes molecule pairs by a concept called 'motif' """ motifDependencies = defaultdict(lambda: defaultdict(list)) motifDependencies2 = defaultdict(lambda: defaultdict(list)) for molecule in reverseDependencies: for moleculePair in reverseDependencies[molecule]: motifDependencies[molecule][frozenset(moleculePair)].append( reverseDependencies[molecule][moleculePair] ) for molecule in motifDependencies: for moleculePair in motifDependencies[molecule]: requirementPair = tuple( sorted(motifDependencies[molecule][moleculePair]) ) motifDependencies2[requirementPair][molecule].append(moleculePair) return motifDependencies, motifDependencies2
[docs] def getMotifFromPair(self, molecule, component1, component2): return self.moleculeMotifDict[molecule][frozenset([component1, component2])]
[docs] def getParticipatingReactions(self, molecule, componentPair, reactionDictionary): correlationList = {} for moleculeName in reactionDictionary: if moleculeName.startswith(molecule + "%"): for component in reactionDictionary[moleculeName]: if component[0] in componentPair and ( component[1] == 1 or component[2] not in ["0", 0, ""] ): for componentComplement in [ x for x in reactionDictionary[moleculeName][component] if x in componentPair ]: correlationList[ (component[0], componentComplement) ] = reactionDictionary[moleculeName][component][ componentComplement ] return correlationList
[docs] def getPairsFromMotif(self, motif1, motif2, excludedComponents): moleculeCorrelationList = defaultdict(dict) for element in self.motifMoleculeDict: if motif1 in element and motif2 in element: if motif1 == motif2 and len([x for x in element if x == motif1]) < 2: continue for molecule in self.motifMoleculeDict[element]: correlationList = {} if len(self.motifMoleculeDict[element][molecule]) > 0: for componentPair in self.motifMoleculeDict[element][molecule]: if not any(x in excludedComponents for x in componentPair): correlationList[ componentPair ] = self.getParticipatingReactions( molecule, componentPair, self.patternXreactions ) moleculeCorrelationList[molecule].update(correlationList) return dict(moleculeCorrelationList)
[docs] def analyzeRedundantBonds(self, assumptions): """ Analyzes a system of molecules with redundant bonds between them (more than one path between any two nodes in the system). The function attemps to score the bonds by looking out for partial competition relationships (e.g. the presence of one component excludes the activation of another, but in the other direction we see independence) which are less likely to occur than a fully independence relationship. The method will thus nominate such edges for deletion if the overall systems still forms a fully connected graph after the bond removal. """ def fullyConnectedGraph(nodes, edges): """ Lazy implementation. This only works if there is one common elements to all subgraphs """ if edges == []: return False tmpNodeList = [set(x) for x in edges] superGraph = set.intersection(*map(set, edges)) if len(superGraph) > 0: return True return False conserveBonds = [] deleteBonds = {} for redundantBondSet in assumptions: allBonds = [sorted(x) for x in itertools.combinations(redundantBondSet, 2)] conserveBonds = [] for molecule in redundantBondSet: for x in itertools.combinations( [x for x in redundantBondSet if x != molecule], 2 ): contextMotif = self.getMotifFromPair( molecule, x[0].lower(), x[1].lower() ) if ( "independent" in contextMotif and not ( "requirement" in contextMotif or "nullrequirement" in contextMotif ) ) or set(contextMotif) == set(["requirement", "nullrequirement"]): conserveBonds.append(sorted([molecule, x[0]])) conserveBonds.append(sorted([molecule, x[1]])) if fullyConnectedGraph(redundantBondSet, conserveBonds): deleteBonds[redundantBondSet] = [ x for x in allBonds if x not in conserveBonds ] return deleteBonds
[docs] def scoreHypotheticalBonds(self, assumptions): """ TODO: we need some way to evaluate the confidence in a bond based on context information """ pass
[docs] def getDifference(self, pattern1, pattern2, translator): if pattern1 not in translator or pattern2 not in translator: return None species1 = translator[pattern1] species2 = translator[pattern2] species1.sort() species2.sort() componentDifference = [] for molecule1, molecule2 in zip(species1.molecules, species2.molecules): for component1, component2 in zip( molecule1.components, molecule2.components ): if ( len(component1.bonds) != len(component2.bonds) or component1.activeState != component2.activeState ): componentDifference.append( return componentDifference
[docs] def processContextMotifInformation(self, assumptionList, database): def getClassification(keys, translator): flags = [key in [x.lower() for x in translator.keys()] for key in keys] if all(flags): return "binding-binding" elif any(flags): return "modification-binding" return "modification-modification" motifInformationDict = self.getContextMotifInformation() motifFinalLog = defaultdict(set) motifReactionDefinitions = {} for motifInformation in motifInformationDict: # iterate over motifs that are known to be problematic if motifInformation in [ frozenset(["nullrequirement", "independent"]), frozenset(["requirement", "independent"]), ]: for molecule in motifInformationDict[motifInformation]: if len(motifInformationDict[motifInformation][molecule]) == 0: continue # if the candidate definitions for a given compound are related to a molecule with problematic motifs for assumption in ( x for x in assumptionList for y in eval(x[3][1]) for z in y if molecule in z ): candidates = eval(assumption[1][1]) alternativeCandidates = eval(assumption[2][1]) original = eval(assumption[3][1]) # further confirm that the change is about the pair of interest # by iterating over all candidates and comparing one by one for candidate in candidates: for alternativeCandidate in alternativeCandidates: difference = [ x for x in candidate if x not in alternativeCandidate ] difference.extend( [ x for x in alternativeCandidate if x not in candidate ] ) # if we are referencing a molecule that is not about this particular context change # dont store it in the motif/species table, just keep information about the motif alone localAnalysisFlag = True if not any( [ molecule in database.prunnedDependencyGraph[x][0] if len(database.prunnedDependencyGraph[x]) > 0 else molecule in x for x in difference ] ): localAnalysisFlag = False # continue if localAnalysisFlag: # get those elements that differ between the two candidates and that correspond to the current <molecule> being analyzed difference = [ x for x in candidate if x not in alternativeCandidate and resolveEntry( database.prunnedDependencyGraph, [x] )[0] == molecule ] alternativeDifference = [ x for x in alternativeCandidate if x not in candidate and molecule in resolveEntry( database.prunnedDependencyGraph, [x] )[0] ] # get the difference patterns for the two species if not difference or not alternativeDifference: continue componentDifference = self.getDifference( difference[0], alternativeDifference[0], database.translator, ) # make sure that the current motif candidate intersects with the difference pattern for keys in motifInformationDict[motifInformation][ molecule ]: if localAnalysisFlag and any( key in componentDifference for key in keys ): motifFinalLog[ "{0}({1})".format(molecule, ", ".join(keys)) ].add(assumption[0]) classification = getClassification( keys, database.translator ) if classification not in motifReactionDefinitions: motifReactionDefinitions[classification] = {} motifReactionDefinitions[classification][ "{0}({1})".format(molecule, ", ".join(keys)) ] = ( motifInformation, motifInformationDict[motifInformation][ molecule ][keys], ) # pprint.pprint(dict(motifFinalLog)) # pprint.pprint(motifReactionDefinitions) return motifFinalLog, motifReactionDefinitions
[docs] def getContextMotifInformation(self): """ returns the reactions in the system classified by context-component motif pairs. e.g. a requirement,nullrequirement motif pair is a pair of components A->B such that B needs for A to be activated to activate, whereas A needs for B to be inactivated to activate. """ relationshipCombinations = itertools.combinations( ["independent", "requirement", "nullrequirement", "exclusion"], 2 ) motifDictionary = {} for relCombi in relationshipCombinations: motifDictionary[frozenset(relCombi)] = self.getPairsFromMotif( relCombi[0], relCombi[1], [] ) for requirementClass in [ "independent", "requirement", "nullrequirement", "exclusion", ]: motifDictionary[ frozenset([requirementClass, requirementClass]) ] = self.getPairsFromMotif(requirementClass, requirementClass, []) return motifDictionary
[docs] def getComplexReactions(self, threshold=2): complexRules = [] for rule in self.rules: if ( len( [ x for x in rule[0].actions if x.action not in ["ChangeCompartment"] ] ) >= threshold ): complexRules.append(rule) return complexRules
[docs] def analyzeComplexReactions(self, threshold=2): def getActionableComponentPartners(actions, molecule): actionableComponents = [] for action in actions: if action[1] and action[1] in molecule.lower(): actionableComponents.append(action[2]) if action[2] and action[2] in molecule.lower(): actionableComponents.append(action[1]) return actionableComponents for reaction in self.getComplexReactions(): # analyze reactions with cis-allostery (e.g. two actions sites are on the same molecule) if ( len( [ x for x in self.reactionsXpatterns[reaction[0].label] if len(self.reactionsXpatterns[reaction[0].label][x]) > 1 ] ) == 0 ): continue print("----------") if reaction[0].label in self.rawRules: print(str(self.rawRules[reaction[0].label])) print(">>>>>>") print(str(reaction[0])) else: print(str(reaction[0].label)) # print(str(reaction[0])) resolvedActions = [] print("Actions:") changeFlag = 0 for action in reaction[0].actions: molecule1 = ( reaction[-3]["_".join(action.site1.split("_")[:-1])] if action.site1 else "" ) molecule2 = ( reaction[-3]["_".join(action.site2.split("_")[:-1])] if action.site2 else "" ) site1 = reaction[-3][action.site1] if action.site1 else "" site2 = reaction[-3][action.site2] if action.site2 else "" print( "\t{0}= {1}({2}), {3}({4})".format( action.action, molecule1, site1, molecule2, site2 ) ) if action.action == "DeleteBond": changeFlag = 1 resolvedActions.append([action.action, site1, site2]) print("Context:") for reactionCenter in self.reactionsXpatterns[reaction[0].label]: # cys rules if len(self.reactionsXpatterns[reaction[0].label][reactionCenter]) > 1: for state in self.reactionsXpatterns[reaction[0].label][ reactionCenter ]: # we will focus on statechange actions for now if state[2] not in [""]: # print(self.patternXreactions[reactionCenter]) actionableComponents = getActionableComponentPartners( [ x for x in resolvedActions if x[0] in ["AddBond", "DeleteBond"] ], reactionCenter, ) for component in actionableComponents: print( "\treaction center <{0}>, context <{1}> in molecule <{2}>:".format( component, state[0], reactionCenter ) ) print( "\t", { x: dict( self.patternXreactions[reactionCenter][ (component, changeFlag, "") ][x] ) for x in self.patternXreactions[reactionCenter][ (component, changeFlag, "") ] if x in [state[0], state[0].lower()] }, ) print("+++++++++")
[docs]def defineConsole(): """ defines the program console line commands """ parser = argparse.ArgumentParser(description="SBML to BNGL translator") parser.add_argument("-i", "--input", type=str, help="sbml file") parser.add_argument("-r", "--raw", type=str, help="raw sbml file") parser.add_argument("-t", "--tests", action="store_true", help="run unit tests") return parser
[docs]def runTests(): import doctest doctest.testmod()
if __name__ == "__main__": parser = defineConsole() namespace = parser.parse_args() if namespace.tests: runTests() exit() inputFile = namespace.input modelLearning = ModelLearning(namespace.input, namespace.raw) # print(modelLearning.getMotifFromPair('EGFR','grb2','shc')) # print(modelLearning.getMotifFromPair('Shc','grb2','egfr')) # modelLearning.analyzeComplexReactions() # for rule in complexRules: # print(str(rule[0])) relationshipCombinations = itertools.combinations( ["independent", "requirement", "nullrequirement", "exclusion"], 2 ) motifDictionary = {} for relCombi in relationshipCombinations: motifDictionary[relCombi] = modelLearning.getPairsFromMotif( relCombi[0], relCombi[1], ["imod"] ) if len(motifDictionary[relCombi]) > 0: print( relCombi, { x: len(motifDictionary[relCombi][x]) for x in motifDictionary[relCombi] }, ) for requirementClass in [ "independent", "requirement", "nullrequirement", "exclusion", ]: motifDictionary[ (requirementClass, requirementClass) ] = modelLearning.getPairsFromMotif( requirementClass, requirementClass, ["imod"] ) if len(motifDictionary[(requirementClass, requirementClass)]) > 0: print( (requirementClass, requirementClass), { x: len(motifDictionary[(requirementClass, requirementClass)][x]) for x in motifDictionary[(requirementClass, requirementClass)] }, ) print(modelLearning.getPairsFromMotif("independent", "requirement", ["imod"])) print("---") # print(modelLearning.getPairsFromMotif('independent','nullrequirement',['imod']))