Source code for bionetgen.atomizer.utils.pathwaycommons

import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import functools
import marshal
from .util import logMess
import json


[docs]def memoize(obj): cache = obj.cache = {} @functools.wraps(obj) def memoizer(*args, **kwargs): # key = str(args) + str(kwargs) key = marshal.dumps([str(obj), args, kwargs]) if key not in cache: cache[key] = obj(*args, **kwargs) return cache[key] return memoizer
''' from bioservices import UniProt u = UniProt(verbose=False) @memoize def name2uniprot(nameStr): """ get the uniprot id for a given biological name. gives preference to human data """ data = u.search('{0}+AND+organism:9606'.format(nameStr), limit=5, columns="entry name,id") if len(data) == 0: data = u.search('{0}'.format(nameStr), limit=10,columns="entry name,id") parsedData = [x.split('\t') for x in data.split('\n')][1:] if len([x for x in parsedData if nameStr in x[0]]) > 0: return [x[1] for x in parsedData if nameStr in x[0]] return [x[1] for x in parsedData if len(x) == 2] '''
[docs]@memoize def queryBioGridByName(name1, name2, organism, truename1, truename2): url = "http://webservice.thebiogrid.org/interactions/?" response = None if organism: organismExtract = list(organism)[0].split("/")[-1] d = { "geneList": "|".join([name1, name2]), "taxId": "|".join(organism), "format": "json", "accesskey": "f74b8d6f4c394fcc9d97b11c8c83d7f3", "includeInteractors": "false", } # FIXME: check if all "organism"s are the wrong thing, # for model 48 this returns a process identifier https://www.ebi.ac.uk/QuickGO/term/GO:0007173 # and not an organism taxonomy identifier data = urllib.parse.urlencode(d).encode("utf-8") try: response = urllib.request.urlopen(url, data=data).read() except urllib.error.HTTPError: logMess( "ERROR:MSC02", "A connection could not be established to biogrid while testing with taxon {1} and genes {0}, trying without organism taxonomy limitation".format( "|".join([name1, name2]), "|".join(organism) ), ) # return False if response is None: d = { "geneList": "|".join([name1, name2]), "format": "json", "accesskey": "f74b8d6f4c394fcc9d97b11c8c83d7f3", "includeInteractors": "false", } data = urllib.parse.urlencode(d).encode("utf-8") try: response = urllib.request.urlopen(url, data=data).read() except urllib.error.HTTPError: logMess("ERROR:MSC02", "A connection could not be established to biogrid") return False results = json.loads(response) referenceName1 = truename1.lower() if truename1 else name1.lower() referenceName2 = truename2.lower() if truename2 else name2.lower() for result in results: resultName1 = results[result]["OFFICIAL_SYMBOL_A"].lower() resultName2 = results[result]["OFFICIAL_SYMBOL_B"].lower() synonymName1 = results[result]["SYNONYMS_A"].split("|") synonymName1 = [x.lower() for x in synonymName1] synonymName2 = results[result]["SYNONYMS_B"].split("|") synonymName2 = [x.lower() for x in synonymName2] # FIXME: This should correctly warn the user where the interaction is coming # from exactly # FIXME: Let the user select individual interactions to include. Maybe an # interactive mode if truename1 != None and truename2 != None and resultName1 != resultName2: logMess( "WARNING:ATO005", "BioGrid result only matched a synonym. " + f"{resultName1} to {resultName2}", ) return True elif ( truename1 != None and truename2 != None and truename1 == truename2 and resultName1 == resultName2 ): logMess( "WARNING:ATO005", "BioGrid result only matched a synonym. " + f"{truename1} to {truename2} or " + f"{resultName1} to {resultName2}", ) return True if (referenceName1 == resultName1 or referenceName1 in synonymName1) and ( referenceName2 == resultName2 or referenceName2 in synonymName2 ): logMess( "WARNING:ATO005", "BioGrid result only matched a synonym. " + f"{referenceName1} to {resultName1} or " + f"{referenceName1} to {synonymName1} or " + f"{referenceName2} to {resultName2} or " + f"{referenceName2} to {synonymName2}", ) return True if (referenceName2 == resultName1 or referenceName2 in synonymName1) and ( referenceName1 == resultName2 or referenceName1 in synonymName2 ): logMess( "WARNING:ATO005", "BioGrid result only matched a synonym. " + f"{referenceName2} to {resultName1} or " + f"{referenceName2} to {synonymName1} or " + f"{referenceName1} to {resultName2} or " + f"{referenceName1} to {synonymName2}", ) return True return False
[docs]@memoize def queryActiveSite(nameStr, organism): url = "http://www.uniprot.org/uniprot/?" response = None retry = 0 while retry < 3: retry += 1 if organism: organismExtract = list(organism)[0].split("/")[-1] # ASS - Updating the query to conform with a regular RESTful API request and work in Python3 xparams = { "query": "{}+AND+organism:{}".format(nameStr, organismExtract), "columns": "name,id,feature(ACTIVE SITE)", "format": "tab", "limit": "5", "sort": "score", } xparams = urllib.parse.urlencode(xparams).encode("utf-8") try: xparams = urllib.parse.urlencode(xparams).encode("utf-8") req = urllib.request.Request(url) with urllib.request.urlopen(req, data=xparams) as f: response = f.read().decode("utf-8") except urllib.error.HTTPError: logMess( "ERROR:MSC03", "A connection could not be established to uniprot" ) response = str(response) if response in ["", None]: url = "http://www.uniprot.org/uniprot/?" # ASS - Updating the query to conform with a regular RESTful API request and work in Python3 xparams = { "query": nameStr, "columns": "name,id,feature(ACTIVE SITE)", "format": "tab", "limit": "5", "sort": "score", } xparams = urllib.parse.urlencode(xparams).encode("utf-8") try: req = urllib.request.Request(url, data=xparams) with urllib.request.urlopen(req) as f: response = f.read().decode("utf-8") except urllib.error.HTTPError: logMess( "ERROR:MSC03", "A connection could not be established to uniprot" ) response = str(response) if not response: return response parsedData = [x.split("\t") for x in response.split("\n")][1:] # return parsedData return [ x[0] for x in parsedData if len(x) == 3 and any(nameStr.lower() in z for z in [y.lower() for y in x[0].split("_")]) and len(x[2]) > 0 ]
[docs]@memoize def name2uniprot(nameStr, organism): url = "http://www.uniprot.org/uniprot/?" response = None if organism: organismExtract = list(organism)[0].split("/")[-1] d = { "query": f"{nameStr}+AND+organism:{organismExtract}", "format": "tab&limit=5", "columns": "entry name,id", "sort": "score", } data = urllib.parse.urlencode(d).encode("utf-8") try: response = urllib.request.urlopen(url, data=data).read() except urllib.error.HTTPError: logMess("ERROR:MSC03", "A connection could not be established to uniprot") return None if response in ["", None]: url = "http://www.uniprot.org/uniprot/?" d = { "query": f"{nameStr}", "format": "tab&limit=5", "columns": "entry name,id", "sort": "score", } data = urllib.parse.urlencode(d).encode("utf-8") try: response = urllib.request.urlopen(url, data=data).read() except urllib.error.HTTPError: return None parsedData = [x.split("\t") for x in str(response).split("\n")][1:] return [ x[1] for x in parsedData if len(x) == 2 and any(nameStr.lower() in z for z in [y.lower() for y in x[0].split("_")]) ]
[docs]@memoize def getReactomeBondByUniprot(uniprot1, uniprot2): """ Queries reactome to see if two proteins references by their uniprot id are bound in the same complex """ url = "http://www.pathwaycommons.org/pc2/graph" d = { "kind": "PATHSFROMTO", "format": "EXTENDED_BINARY_SIF", "source": "|".join(uniprot1), "target": "|".join(uniprot2), } data = urllib.parse.urlencode(d).encode("utf-8") # query reactome try: response = urllib.request.urlopen(url, data=data).read() except urllib.error.HTTPError: # logMess('ERROR:pathwaycommons','A connection could not be established to pathwaycommons') return None # divide by line parsedResponse = [x.split("\t") for x in str(response).split("\n")] # response is divided in two sections. actual protein-protein relationships and protein descriptors separation = [i for i, x in enumerate(parsedResponse) if len(x) < 2] # separate the first half and focus on actual ppi entries ppi = [x for x in parsedResponse[: separation[0]] if x[1] == "in-complex-with"] # ppi = [x for x in parsedResponse[:separation[0]]] # get protein descriptors and filter by the initial uniprot id given in the method parameters includedElements = [[x[0], x[-1]] for x in parsedResponse[separation[0] :]] includedElements1 = [ x for x in includedElements if any(y in x[1] for y in uniprot1) ] includedElements2 = [ x for x in includedElements if any(y in x[1] for y in uniprot2) ] includedElements1 = [x[0] for x in includedElements1] includedElements2 = [x[0] for x in includedElements2] # filter protein interaction by those uniprot ids and names we truly care about ppi = [ x[0:3] for x in ppi if ( len([y for y in includedElements1 if y == x[0]]) == 1 and len([y for y in includedElements2 if y == x[2]]) == 1 ) or ( len([y for y in includedElements1 if y == x[2]]) == 1 and len([y for y in includedElements2 if y == x[0]]) == 1 ) ] # ppi = [x[0:3] for x in ppi if len([y for y in includedElements1 if y in x]) == 1 and len([y for y in includedElements2 if y in x]) == 1] return ppi
[docs]@memoize def getReactomeBondByName(name1, name2, sbmlURI, sbmlURI2, organism=None): """ resolves the uniprot id of parameters *name1* and *name2* and obtains whether they can be bound in the same complex or not based on reactome information """ if len(sbmlURI) > 0: uniprot1 = [x.split("/")[-1] for x in sbmlURI] else: uniprot1 = name2uniprot(name1, organism) if len(sbmlURI2) > 0: uniprot2 = [x.split("/")[-1] for x in sbmlURI2] else: uniprot2 = name2uniprot(name2, organism) uniprot1 = uniprot1 if uniprot1 else [name1] uniprot2 = uniprot2 if uniprot2 else [name2] result = getReactomeBondByUniprot(uniprot1, uniprot2) return result
[docs]def isInComplexWith(name1, name2, sbmlURI=[], sbmlURI2=[], organism=None): nameset = sorted([name1, name2], key=lambda x: x[0]) result = None retry = 0 while retry < 3: result = getReactomeBondByName( nameset[0][0], nameset[1][0], nameset[0][1], nameset[1][1], organism ) retry += 1 if result: return any([x[1] == "in-complex-with" for x in result]) return False
if __name__ == "__main__": # pass # results = isInComplexWith('Crk','Ras') # print(getReactomeBondByName('EGF', 'Grb2', [], [])) print((queryActiveSite("EGF", ""))) # print(getReactomeBondByName('EGF', 'EGF', ['P07522'], ['P07522'])) # print(name2uniprot('MEKK1')) # print(results) # print(getReactomeBondByUniprot('Q9QX70','Q9QX70')) # print(getReactomeBondByUniprot('P07522','P07522'))