Skip to content
Snippets Groups Projects
Commit fcb188f2 authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

citation recursion adjusted

parent a2eb5514
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
Showing
with 231 additions and 303 deletions
......@@ -23,7 +23,6 @@ from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
from .add_references_rec import add_references, create_global_lists_ref
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
......@@ -61,39 +60,36 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
break
if (not_in_nodes): #there is no node with this doi in the set
nodes.append(pub) #appends Publication Object
pub.group = "input"
pub.group = 0
else:
doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_global_lists_ref(nodes, edges, pub, 0, search_depth_max, test_var):
for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
references_pub_obj_list.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, test_var):
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
citations_pub_obj_list.append(citation)
return(references_pub_obj_list, citations_pub_obj_list)
def complete_inner_edges(test_var):
def complete_inner_edges():
'''
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes inner edges between nodes of group height and depth
'''
for node in nodes:
if (node.group == "depth"):
if (node.group < 0):
for citation in node.citations:
for cit in nodes:
if (citation == cit.doi_url and [citation, node.doi_url] not in edges):
edges.append([citation, node.doi_url])
if (node.group == "height"):
if (node.group > 0):
for reference in node.references:
for ref in nodes:
if (reference == ref.doi_url and [node.doi_url, reference] not in edges):
......@@ -103,7 +99,7 @@ def complete_inner_edges(test_var):
def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:type doi_input_list: List[String]
:param search_height: maximum height to search for citations
:type search_height: int
......@@ -139,11 +135,11 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, test_var)
add_references(nodes, edges, references_obj_list, 1, search_depth, test_var)
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
complete_inner_edges()
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges, test_var)
......
......@@ -17,12 +17,27 @@ __status__ = "Production"
import sys
from pathlib import Path
from os import error
sys.path.append("../../")
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def create_global_lists_cit(input_nodes, input_edges, pub, search_height, search_height_max, test_var):
def get_cit_type_list(pub, cit_type):
'''
:param pub: Publication which citations will be added
:type pub: Publication
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
function to create nodes and edges and call create_graph_structure_citations
'''
if (cit_type == "Citation"):
return(pub.citations)
else:
return(pub.references)
def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
......@@ -33,11 +48,14 @@ def create_global_lists_cit(input_nodes, input_edges, pub, search_height, search
:param pub: Publication which citations will be added
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
......@@ -49,19 +67,22 @@ def create_global_lists_cit(input_nodes, input_edges, pub, search_height, search
nodes = input_nodes
edges = input_edges
return create_graph_structure_citations(pub, search_height, search_height_max, test_var)
return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param pub: publication which citations will be added
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
......@@ -71,40 +92,51 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
'''
citations_pub_obj_list = []
for citation in pub.citations:
for citation in get_cit_type_list(pub, cit_type):
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
if (cit_type == "Citation"):
citation_pub_obj.group = search_depth + 1
edges.append([citation_pub_obj.doi_url,pub.doi_url])
else:
citation_pub_obj.group = -(search_depth + 1)
edges.append([pub.doi_url,citation_pub_obj.doi_url])
nodes.append(citation_pub_obj)
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation,pub.doi_url] not in edges:
elif (cit_type == "Citation"):
if ([citation,pub.doi_url] not in edges):
edges.append([citation,pub.doi_url])
elif (cit_type == "Reference"):
if ([pub.doi_url,citation] not in edges):
edges.append([pub.doi_url,citation])
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
......@@ -114,14 +146,14 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_height < search_height_max):
process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var)
# If the maximum depth has not yet been reached, calls function recursivly with increased depth
if (search_depth < search_depth_max):
process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_height, search_height_max, test_var):
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
......@@ -132,11 +164,14 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_heigh
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
......@@ -147,5 +182,5 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_heigh
nodes = input_nodes
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var)
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
return(nodes, edges)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to add references recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which references will be added
:type pub: Publication
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var)
return(nodes, edges)
\ No newline at end of file
#!/usr/bin/env python3
import json
# -*- coding: utf-8 -*-
"""
Functions that format the computed graph to match the interface to the output-part
Functions that format the computed graph to match the interface to the output-part and saves as a json file
"""
# creates a list that contains a dictionary for each node
# the dictionaries store the values for the attributes
def format_nodes(V):
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import json
def format_nodes(nodes):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
creates a list that contains a dictionary for each node
'''
list_of_node_dicts = list()
for node in V:
for node in nodes:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["group"] = node.group
if (node.group == 0):
new_dict["group"] = "Input"
elif (node.group > 0):
new_dict["group"] = "Citedby"
else:
new_dict["group"] = "Reference"
new_dict["depth"] = node.group
new_dict["citations"] = len(node.citations)
list_of_node_dicts.append(new_dict)
return list_of_node_dicts
# creates a list that contains a disctionary for each edge
# the dictionaries contain the source as keys and the target as values
def format_edges(E):
def format_edges(edges):
'''
:param edges: list of links to export to json
:type edges: List[String,String]
function to format links, append to list and return list to output_to_json
'''
list_of_edge_dicts = list()
for edge in E:
for edge in edges:
new_dict_2 = dict()
new_dict_2["source"] = edge[0]
new_dict_2["target"] = edge[1]
list_of_edge_dicts.append(new_dict_2)
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E, test_var):
def output_to_json(nodes, edges, test_var):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
:param edges: list of links to export to json
:type edges: List[String,String]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to export nodes and links as a dictionary to json file
'''
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
list_of_node_dicts = format_nodes(nodes)
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
if (test_var):
......@@ -46,7 +84,3 @@ def output_to_json(V,E, test_var):
else:
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
#knoten = ["doi1", "doi2", "doi3"]
#kanten = [[1,2],[3,4],[5,6]]
#output_to_json(knoten,kanten)
......@@ -17,11 +17,10 @@ __status__ = "Production"
import sys
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
sys.path.append("../")
from input.interface import InputInterface as Input
sys.path.append("../../")
from verarbeitung.construct_new_graph.Processing import process_main
from verarbeitung.update_graph.import_from_json import input_from_json
from update_graph.update_graph import check_graph_updates
from verarbeitung.update_graph.update_graph import check_graph_updates
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
......
......@@ -23,7 +23,17 @@ from verarbeitung.test.input_test import input_test_func
def get_pub(pub_doi, test_var):
'''
:param pub_doi: input doi to get Publication object for
:type pub_doi: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return an object of type Publication for given input doi depending on whether its a test or url doi
'''
#checks if it's a test and chooses appropiate function
# print(pub_doi)
if(test_var):
pub = input_test_func(pub_doi)
......@@ -31,11 +41,11 @@ def get_pub(pub_doi, test_var):
else:
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
pub = inter.get_publication(pub_doi.doi_url) #creates an object of class Publication
except AttributeError:
pub = inter.get_publication(pub_doi)
except ValueError:
return(ValueError)
except IndexError:
return(IndexError)
except AttributeError:
return(AttributeError)
return(pub)
\ No newline at end of file
{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Input", "depth": 0, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Input", "depth": 0, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}]}
\ No newline at end of file
import sys
from pathlib import Path
from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes
try_delete_nodes()
\ No newline at end of file
......@@ -12,16 +12,16 @@ class ProcessingTest(unittest.TestCase):
maxDiff = None
def testCycle(self):
nodes, edges = process_main(['doiz1'],1,1,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
# def testCycle(self):
# nodes, edges = process_main(['doiz1'],1,1,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
# self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = process_main(['doiz1'],2,2,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
# nodes, edges = process_main(['doiz1'],2,2,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
# self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
#def testBigCycle(self):
......@@ -29,66 +29,66 @@ class ProcessingTest(unittest.TestCase):
#def testEmptyDepth(self):
def testEmptyDepthHeight(self):
nodes, edges = process_main(['doi1'],0,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = process_main(['doi_ie1'],1,1,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = process_main(['doi_h01'],1,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_h02'],1,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = process_main(['doi_h02'],2,0,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = process_main(['doi_d01'],0,1,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_d02'],0,1,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = process_main(['doi_d02'],0,2,True)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
def test_import_from_json(self):
nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
# def testEmptyDepthHeight(self):
# nodes, edges = process_main(['doi1'],0,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi1'])
# self.assertCountEqual(edges, [])
# nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes, ['doi1','doi2'])
# self.assertCountEqual(edges, [['doi1', 'doi2']])
# nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
# self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
# def testInnerEdges(self):
# nodes, edges = process_main(['doi_ie1'],1,1,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
# self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
# def testRightHeight(self):
# nodes, edges = process_main(['doi_h01'],1,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_h01'])
# self.assertCountEqual(edges, [])
# nodes, edges = process_main(['doi_h02'],1,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
# self.assertCountEqual(edges, [['doi_h1','doi_h02']])
# nodes, edges = process_main(['doi_h02'],2,0,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
# self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
# def testRightDepth(self):
# nodes, edges = process_main(['doi_d01'],0,1,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_d01'])
# self.assertCountEqual(edges, [])
# nodes, edges = process_main(['doi_d02'],0,1,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
# self.assertCountEqual(edges, [['doi_d02','doi_d1']])
# nodes, edges = process_main(['doi_d02'],0,2,True)
# doi_nodes = keep_only_dois(nodes)
# self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
# self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
# def test_import_from_json(self):
# nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True)
# nodes_new, edges_new = input_from_json('test_output.json')
# self.assertCountEqual(nodes_old,nodes_new)
# self.assertCountEqual(edges_old, edges_new)
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True)
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_i", "name": "title_lg_2_i", "author": ["contributor_lg_2_i"], "year": "date_lg_2_i", "journal": "journal_lg_2_i", "group": "Input", "depth": 0, "citations": 3}, {"doi": "doi_lg_2_d11", "name": "title_lg_2_d11", "author": ["contributor_lg_2_d11"], "year": "date_lg_2_d11", "journal": "journal_lg_2_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_2_d12", "name": "title_lg_2_d12", "author": ["contributor_lg_2_d12"], "year": "date_lg_2_d12", "journal": "journal_lg_2_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_2_h11", "name": "title_lg_2_h11", "author": ["contributor_lg_2_h11"], "year": "date_lg_2_h11", "journal": "journal_lg_2_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_h12", "name": "title_lg_2_h12", "author": ["contributor_lg_2_h12"], "year": "date_lg_2_h12", "journal": "journal_lg_2_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_cg_i", "name": "title_cg_i", "author": ["contributor_cg_i"], "year": "date_cg_i", "journal": "journal_cg_i", "group": "Citedby", "depth": 1, "citations": 3}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h21", "name": "title_lg_2_h21", "author": ["contributor_lg_2_h21"], "year": "date_lg_2_h21", "journal": "journal_lg_2_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h22", "name": "title_lg_2_h22", "author": ["contributor_lg_2_h22"], "year": "date_lg_2_h22", "journal": "journal_lg_2_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h23", "name": "title_lg_2_h23", "author": ["contributor_lg_2_h23"], "year": "date_lg_2_h23", "journal": "journal_lg_2_h23", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_lg_2_h24", "name": "title_lg_2_h24", "author": ["contributor_lg_2_h24"], "year": "date_lg_2_h24", "journal": "journal_lg_2_h24", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_cg_h11", "name": "title_cg_h11", "author": ["contributor_cg_h11"], "year": "date_cg_h11", "journal": "journal_cg_h11", "group": "Citedby", "depth": 2, "citations": 2}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_2_d21", "name": "title_lg_2_d21", "author": ["contributor_lg_2_d21"], "year": "date_lg_2_d21", "journal": "journal_lg_2_d21", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d22", "name": "title_lg_2_d22", "author": ["contributor_lg_2_d22"], "year": "date_lg_2_d22", "journal": "journal_lg_2_d22", "group": "Reference", "depth": -2, "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d11"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d12"}, {"source": "doi_lg_2_h11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_h12", "target": "doi_lg_2_i"}, {"source": "doi_cg_i", "target": "doi_lg_2_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h21", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h22", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h23"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h24"}, {"source": "doi_lg_1_h23", "target": "doi_cg_i"}, {"source": "doi_cg_h11", "target": "doi_cg_i"}, {"source": "doi_lg_2_h11", "target": "doi_cg_i"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d21"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d22"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_d12"}]}
\ No newline at end of file
......@@ -19,9 +19,8 @@ from os import error
sys.path.append("../")
from import_from_json import input_from_json
from verarbeitung.construct_new_graph.Processing import initialize_nodes_list, complete_inner_edges, create_graph_structure_references, create_graph_structure_citations
from verarbeitung.construct_new_graph.Processing import initialize_nodes_list, complete_inner_edges
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.add_references_rec import add_references
from verarbeitung.construct_new_graph.export_to_json import output_to_json
def connect_old_and_new_input(json_file, new_doi_list, search_height, search_depth, test_var = False):
......@@ -37,8 +36,8 @@ def connect_old_and_new_input(json_file, new_doi_list, search_height, search_dep
references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, test_var)
add_references(nodes, edges, references_obj_list, 1, search_depth, test_var)
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
......@@ -54,16 +53,16 @@ def complete_changed_group_nodes(new_doi_list, search_height_max, search_depth_m
changed_group_node_references = []
for node in nodes:
if (node.group != "input") and (node.doi in new_doi_list):
node.group = "input"
if (node.group != 0) and (node.doi in new_doi_list):
node.group = 0
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_graph_structure_references(node, 0, search_depth_max, test_var):
for reference in add_citations(nodes, edges, node, 0, search_depth_max, "Reference", test_var):
changed_group_node_references.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_graph_structure_citations(node, 0, search_height_max, test_var):
for citation in add_citations(nodes, edges, node, 0, search_height_max, "Citation", test_var):
changed_group_node_citations.append(citation)
......@@ -34,7 +34,7 @@ def create_pubs_from_json(input_dict):
for node in input_dict["nodes"]:
#creates for the nodes the objects class Publication
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["group"])
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["depth"] )
#appends the objects to a list
list_of_nodes_py.append(pub)
......
......@@ -36,7 +36,7 @@ def get_old_input_dois(old_obj_input_list):
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == "input"):
if (pub.group == 0):
old_input_dois.append(pub.doi_url)
return old_input_dois
......
......@@ -36,7 +36,7 @@ def delete_ref_nodes_rec(pub):
delete_ref_nodes_rec(ref_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
if (pub.group != 0):
processed_list.remove(pub)
......@@ -56,7 +56,7 @@ def delete_cit_nodes_rec(pub):
delete_cit_nodes_rec(cit_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
if (pub.group != 0):
processed_list.remove(pub)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment