Skip to content
Snippets Groups Projects
Commit 2708fc81 authored by Molkentin, Alina's avatar Molkentin, Alina
Browse files

Merge branch 'main' of...

parents 0c1f1725 6011746e
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
Showing
with 467 additions and 267 deletions
......@@ -9,7 +9,7 @@ def back_to_valid_edges(links_from_json, processed_input_list):
function that deletes edges, if one ore two including nodes are deleted nodes
'''
list_of_valid_edges = links_from_json
list_of_valid_edges = links_from_json.copy()
#iterates over all edges from old graph
......@@ -23,6 +23,8 @@ def back_to_valid_edges(links_from_json, processed_input_list):
# increases counter if adjacent node was found
if (adj_node == pub.doi_url):
found_adj_nodes += 1
if (found_adj_nodes == 2):
break
#removes the edge if less than 2 adjacent nodes found
if (found_adj_nodes < 2):
......
......@@ -13,22 +13,21 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
#import input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from json_demo import output_to_json
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
......@@ -38,24 +37,19 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
'''
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
if(test_var): #checks that it is a test and chooses test-input function
pub = input_test_func(pub_doi) #creates an object of class Publication
else: #checks that it isnt a test and chooses standart-input function
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
continue
except IndexError:
continue
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
print(pub)
continue
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
......@@ -81,34 +75,34 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
return(references_pub_obj_list, citations_pub_obj_list)
def complete_inner_edges(test_var):
'''
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes inner edges between nodes of group height and depth
'''
for node in nodes:
if (node.group == "depth"):
for citation in node.citations:
for cit in nodes:
if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges):
edges.append([citation.doi_url, node.doi_url])
if (citation == cit.doi_url and [citation, node.doi_url] not in edges):
edges.append([citation, node.doi_url])
if (node.group == "height"):
for reference in node.references:
for ref in nodes:
if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges):
edges.append([node.doi_url,reference.doi_url])
if (reference == ref.doi_url and [node.doi_url, reference] not in edges):
edges.append([node.doi_url,reference])
# adds a node for every publication unknown
# adds edges for references between publications
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Class Publication
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -118,29 +112,24 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes
if (reference.doi_url == node.doi_url): #determines that the node already exists
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
if (test_var): #determines that it is a test and chooses the test-input function
reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class
else: #determines that it isnt a test and chooses the standart function
#reference_pub_obj = Input(reference.doi_url)
inter = Input()
try:
reference_pub_obj = inter.get_publication(reference.doi_url)
except ValueError:
continue
# nur aus Testzwecken, da noch was bei Input falsch ist
except IndexError:
continue
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
......@@ -148,19 +137,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference.doi_url] not in edges:
edges.append([pub.doi_url,reference.doi_url])
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
# recursive function to implement height-first-search on references
# references_pub_obj_list: input list of references as publication objects
# search_depth: current search_depth of height-first-search
# search_depth_max: maximal search_depth for dfs
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: list of objects of type Class Publications
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -170,6 +155,8 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
......@@ -180,14 +167,12 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Class Publication
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -197,28 +182,24 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
if (test_var):
citation_pub_obj = input_test_func(citation.doi_url)
else:
#citation_pub_obj = Input(citation.doi_url)
inter = Input()
try:
citation_pub_obj = inter.get_publication(citation.doi_url)
except ValueError:
continue
except IndexError:
continue
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
......@@ -226,20 +207,15 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation.doi_url,pub.doi_url] not in edges:
edges.append([citation.doi_url,pub.doi_url])
elif [citation,pub.doi_url] not in edges:
edges.append([citation,pub.doi_url])
return citations_pub_obj_list
# recursive function to implement height-first-search on citations
# citations_pub_obj_list: input list of citations as publication objects
# search_height: current search_height of height-first-search
# search_height_max: maximal search_height for dfs
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: list of objects of type Class Publications
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -249,6 +225,8 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
......@@ -261,8 +239,6 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
......@@ -276,6 +252,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
'''
# ERROR-Handling doi_array = NULL
......@@ -291,10 +269,10 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
nodes = [] # create empty array for the nodes
edges = [] # create empty array for the edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
......@@ -307,14 +285,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
complete_inner_edges(test_var)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges)
# only for unit tests
if (test_var == True):
doi_nodes_list = []
for node in nodes:
doi_nodes_list.append(node.doi_url)
return(doi_nodes_list, edges)
output_to_json(nodes,edges, test_var)
return(nodes,edges)
import unittest
from Processing import process_main
from import_from_json import input_from_json
from update_graph import check_graph_updates
class ProcessingTest(unittest.TestCase):
maxDiff = None
def testCycle(self):
nodes, edges = process_main(['doiz1'],1,1,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = process_main(['doiz1'],2,2,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
#def testBigCycle(self):
......@@ -19,48 +27,88 @@ class ProcessingTest(unittest.TestCase):
def testEmptyDepthHeight(self):
nodes, edges = process_main(['doi1'],0,0,True)
self.assertCountEqual(nodes,['doi1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2', 'doi3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = process_main(['doi_ie1'],1,1,True)
self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = process_main(['doi_h01'],1,0,True)
self.assertCountEqual(nodes,['doi_h01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_h02'],1,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = process_main(['doi_h02'],2,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = process_main(['doi_d01'],0,1,True)
self.assertCountEqual(nodes,['doi_d01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_d02'],0,1,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = process_main(['doi_d02'],0,2,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
def test_import_from_json(self):
nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
nodes_new_both, edges_new_both = input_from_json('test_output.json')
nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
def keep_only_dois(nodes):
'''
:param nodes: input list of nodes of type Publication
:type nodes: List[Publication]
gets nodes of type pub and return only their doi
'''
doi_list = []
for node in nodes:
doi_list.append(node.doi_url)
return doi_list
if __name__ == "__main__":
unittest.main()
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
# -*- coding: utf-8 -*-
"""
A function to return an object of Type Publication for a given doi
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
def get_pub(pub_doi, test_var):
#checks if it's a test and chooses appropiate function
if(test_var):
pub = input_test_func(pub_doi)
#checks that it isnt a test and chooses standart-input function
else:
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
return(ValueError)
except IndexError:
return(IndexError)
except AttributeError:
return(AttributeError)
return(pub)
\ No newline at end of file
......@@ -15,46 +15,19 @@ __status__ = "Production"
import json
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
import sys
sys.path.append("../")
from input.interface import InputInterface as Input
class Publication:
#def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
def __init__(self, doi_url, title, contributors, journal, publication_date, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.references = []
self.citations = []
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def create_pubs_from_json(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
creates list of publication retrieved from old json file
'''
#iterates over the list of nodes
......@@ -67,29 +40,24 @@ def create_pubs_from_json(input_dict):
def add_ref_and_cit_to_pubs(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
adds references and citations to retrieved publication list
'''
# iterates over the list of edges
for edge in input_dict["links"]:
for source in list_of_nodes_py:
for target in list_of_nodes_py:
# when correct dois found, adds then as references/citatons to publication list
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date)
new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference")
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date)
new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation")
target.citations.append(new_citation)
# adds reference to current node
# if (node.doi_url == edge["source"]):
# new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.references.append(new_reference)
# # adds citation to current node
# if (node.doi_url == edge["target"]):
# new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.citations.append(new_citation)
# adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]])
......@@ -97,8 +65,10 @@ def add_ref_and_cit_to_pubs(input_dict):
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: Json File
:param json_file: Json-Datei for the old graph
:type json_file: String
retrieves information from old json file to be reused for new graph construction
'''
# creates global sets for nodes and edges
......@@ -117,5 +87,4 @@ def input_from_json(json_file):
add_ref_and_cit_to_pubs(input_dict)
return(list_of_nodes_py, list_of_edges_py)
return(list_of_nodes_py, list_of_edges_py)
\ No newline at end of file
import sys
sys.path.append("../")
from input.publication import Publication
class Publication:
def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
if references is None:
self.references = []
else:
self.references = ref(references)
if citations is None:
self.citations = []
else:
self.citations = cit(citations)
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def input_test_func(pub_doi):
'''
:param pub_doi: pub doi to find publication in list_of_arrays
:type pub_doi: String
returns the publication class for given doi
'''
for array in list_of_arrays:
if pub_doi == array[0]:
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8])
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7])
return pub
def cit(list_doi):
'''
:param list_doi list of citation dois to get their Citation Class
:type list_doi: List[String]
returns a list of citations objects for given doi list
'''
cits = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -54,6 +34,13 @@ def cit(list_doi):
return cits
def ref(list_doi):
'''
:param list_doi list of reference dois to get their Reference Class
:type list_doi: List[String]
returns a list of reference objects for given doi list
'''
refs = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -62,27 +49,57 @@ def ref(list_doi):
return refs
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], '']
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], '']
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], '']
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], '']
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], '']
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], '']
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], '']
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], '']
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], '']
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], '']
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], '']
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], '']
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], '']
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], '']
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], '']
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], '']
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], '']
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], '']
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3]
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']]
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']]
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []]
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']]
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']]
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']]
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']]
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []]
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []]
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']]
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']]
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']]
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []]
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []]
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []]
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []]
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []]
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []]
large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []]
large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []]
large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12'], []]
large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']]
large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']]
large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']]
large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']]
large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']]
large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']]
large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']]
large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12']]
large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []]
large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []]
large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']]
large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']]
large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i'], ['doi_lg_2_h21','doi_lg_2_h22']]
large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']]
large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12']]
large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']]
large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']]
large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']]
large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']]
large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']]
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3,
right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3,
large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12,
large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12,
large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23]
......@@ -12,11 +12,11 @@ def format_nodes(V):
list_of_node_dicts = list()
for node in V:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["doi"] = node.doi_url
new_dict["group"] = node.group
new_dict["citations"] = len(node.citations)
list_of_node_dicts.append(new_dict)
......@@ -34,14 +34,18 @@ def format_edges(E):
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E):
def output_to_json(V,E, test_var):
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
if (test_var):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
#knoten = ["doi1", "doi2", "doi3"]
#kanten = [[1,2],[3,4],[5,6]]
......
{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input", "citations": 1}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height", "citations": 1}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height", "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]}
\ No newline at end of file
{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []}
\ No newline at end of file
{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citations": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citations": 1}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citations": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citations": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citations": 12}, {"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input", "citations": 1}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth", "citations": 14}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth", "citations": 9}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth", "citations": 100}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth", "citations": 48}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth", "citations": 32}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth", "citations": 100}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth", "citations": 100}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth", "citations": 100}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth", "citations": 25}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth", "citations": 12}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth", "citations": 100}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth", "citations": 74}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth", "citations": 65}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth", "citations": 97}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth", "citations": 100}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth", "citations": 99}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth", "citations": 100}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height", "citations": 0}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "group": "height", "citations": 0}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "group": "height", "citations": 0}, {"name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "doi": "https://doi.org/10.1021/acs.jpca.1c06264", "group": "height", "citations": 0}, {"name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00226", "group": "height", "citations": 0}, {"name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "group": "height", "citations": 11}, {"name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "doi": "https://doi.org/10.1021/acs.est.9b06379", "group": "height", "citations": 100}, {"name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci049714+", "group": "depth", "citations": 99}, {"name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci3001277", "group": "depth", "citations": 100}, {"name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.5b00559", "group": "depth", "citations": 98}, {"name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci7004498", "group": "depth", "citations": 100}, {"name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020155c", "group": "depth", "citations": 100}, {"name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm9602928", "group": "depth", "citations": 100}, {"name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "doi": "https://doi.org/10.1021/ci025599w", "group": "depth", "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]}
\ No newline at end of file
......@@ -51,6 +51,14 @@ def print_extended_graph(nodes, edges):
print(len(edges))
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random dois
def try_known_publications():
......@@ -79,15 +87,15 @@ def try_known_publications():
def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = process_main(doi_list,1,1)
print_graph(nodes, edges)
#print_simple(nodes, edges)
list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
print_graph(valid_nodes, valid_edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
......@@ -13,17 +13,27 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from Knoten_Vergleich import doi_listen_vergleichen
from Kanten_Vergleich import back_to_valid_edges
from update_graph_del import delete_nodes_and_edges
def get_old_input_dois(old_obj_input_list):
'''
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
function to return pub dois for old publications of group input retrieved from json file
'''
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == "input"):
......@@ -31,55 +41,65 @@ def get_old_input_dois(old_obj_input_list):
return old_input_dois
def get_new_input_dois(new_input, test_var):
doi_input_list = []
'''
:param new_input: input list of doi from UI
:type new_input: list of strings
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return pub dois for input urls
'''
# new list to save doi_url for each new input url
new_input_dois = []
for new_node in new_input:
if(test_var):
pub = input_test_func(new_node)
else:
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(new_node)
except ValueError:
continue
except IndexError:
continue
doi_input_list.append(pub.doi_url)
return doi_input_list
def delete_ref_nodes_rec(pub):#, old_obj_list):
for reference in pub.references:
for ref_pub in processed_input_list:
if (ref_pub.doi_url == reference.doi_url):
delete_ref_nodes_rec(ref_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
def delete_cit_nodes_rec(pub):
for citation in pub.citations:
for cit_pub in processed_input_list:
if (cit_pub.doi_url == citation.doi_url):
delete_cit_nodes_rec(cit_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
# retrieves information and adds to new list if successful
pub = get_pub(new_node, test_var)
if (type(pub) != Publication):
print(pub)
continue
new_input_dois.append(pub.doi_url)
return(new_input_dois)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: list of strings
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
:param old_edges_list: list of links between publications retrieved from old json file
:type old_edges_list: List[List[String,String]]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list
processed_input_list = old_obj_input_list.copy()
valid_edges = []
# save the return values of global lists
processed_input_list_del = []
valid_edges_del = []
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
if (len(inserted_nodes) == 0):
for del_node in deleted_nodes:
for pub in processed_input_list:
if (del_node == pub.doi_url):
delete_ref_nodes_rec(pub)#, processed_input_list)
delete_cit_nodes_rec(pub)#, processed_input_list)
processed_input_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_input_list)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list)
return(processed_input_list, valid_edges)
return(processed_input_list_del, valid_edges_del)
# -*- coding: utf-8 -*-
"""
Functions to remove publications/links from nodes/edges list, if they can no longer be reached
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from Kanten_Vergleich import back_to_valid_edges
def delete_ref_nodes_rec(pub):
'''
:param pub: pub go get deleted after recursive call
:type pub: Publication
function that removes nodes of group "height", if they aren't reachable from input nodes
'''
for reference in pub.references:
for ref_pub in processed_list:
if (ref_pub.doi_url == reference):
# to find a cyclus and avoid recursion error
if (reference not in pub.citations):
delete_ref_nodes_rec(ref_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_cit_nodes_rec(pub):
'''
:param pub: publication to be removed after recursive call
:type pub: Publication
function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes
'''
for citation in pub.citations:
for cit_pub in processed_list:
if (cit_pub.doi_url == citation):
# to find a cyclus and avoid recursion error
if (citation not in pub.references):
delete_cit_nodes_rec(cit_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list):
'''
:param input_list: list of publications to get reduced
:type input_list: List[Publication]
:param deleted_nodes: list of input dois which are not in new call
:type deleted_nodes: List[String]
:param old_doi_input_list: list of input dois from old call
:type old_doi_input_list: List[String]
:param old_edges_list: list of links between publications from old call
:type old_edges_list: List[List[String,String]]
function to start recursive node removal for references and citations and to return edge list to valid state
'''
# global list to save the process of removing unneeded publications
global processed_list
processed_list = input_list.copy()
for del_node in deleted_nodes:
for pub in processed_list:
if (del_node == pub.doi_url):
# checks for every reference if it is citet more than once. If not it calls deletion function
for reference in pub.references:
only_reference = True
for ref_cit in processed_list:
if (reference == ref_cit.doi_url):
for citation in ref_cit.citations:
if ((citation in old_doi_input_list) and (citation != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_ref_nodes_rec(pub)
# checks for every citation if it cites more than once. If not it calls deletion function
for citation in pub.citations:
only_reference = True
for cit_ref in processed_list:
if (citation == cit_ref.doi_url):
for reference in cit_ref.references:
if ((reference in old_doi_input_list) and (reference != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_cit_nodes_rec(pub)
# removes publication of type input after start of recursive call to both directions
processed_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_list)
return(processed_list, valid_edges)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment