Skip to content
Snippets Groups Projects
Commit 6011746e authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

various changes and new implementations -MS

parent adc4dcbf
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
Showing
with 467 additions and 267 deletions
......@@ -9,7 +9,7 @@ def back_to_valid_edges(links_from_json, processed_input_list):
function that deletes edges, if one ore two including nodes are deleted nodes
'''
list_of_valid_edges = links_from_json
list_of_valid_edges = links_from_json.copy()
#iterates over all edges from old graph
......@@ -23,6 +23,8 @@ def back_to_valid_edges(links_from_json, processed_input_list):
# increases counter if adjacent node was found
if (adj_node == pub.doi_url):
found_adj_nodes += 1
if (found_adj_nodes == 2):
break
#removes the edge if less than 2 adjacent nodes found
if (found_adj_nodes < 2):
......
......@@ -13,22 +13,21 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
#import input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from json_demo import output_to_json
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
......@@ -38,25 +37,20 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
'''
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
if(test_var): #checks that it is a test and chooses test-input function
pub = input_test_func(pub_doi) #creates an object of class Publication
else: #checks that it isnt a test and chooses standart-input function
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
continue
except IndexError:
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
print(pub)
continue
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
for node in nodes: #iterates over every node in the set of nodes
......@@ -87,28 +81,28 @@ def complete_inner_edges(test_var):
'''
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes inner edges between nodes of group height and depth
'''
for node in nodes:
if (node.group == "depth"):
for citation in node.citations:
for cit in nodes:
if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges):
edges.append([citation.doi_url, node.doi_url])
if (citation == cit.doi_url and [citation, node.doi_url] not in edges):
edges.append([citation, node.doi_url])
if (node.group == "height"):
for reference in node.references:
for ref in nodes:
if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges):
edges.append([node.doi_url,reference.doi_url])
if (reference == ref.doi_url and [node.doi_url, reference] not in edges):
edges.append([node.doi_url,reference])
# adds a node for every publication unknown
# adds edges for references between publications
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Class Publication
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -118,28 +112,23 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes
if (reference.doi_url == node.doi_url): #determines that the node already exists
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
if (test_var): #determines that it is a test and chooses the test-input function
reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class
else: #determines that it isnt a test and chooses the standart function
#reference_pub_obj = Input(reference.doi_url)
inter = Input()
try:
reference_pub_obj = inter.get_publication(reference.doi_url)
except ValueError:
continue
# nur aus Testzwecken, da noch was bei Input falsch ist
except IndexError:
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
......@@ -148,19 +137,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference.doi_url] not in edges:
edges.append([pub.doi_url,reference.doi_url])
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
# recursive function to implement height-first-search on references
# references_pub_obj_list: input list of references as publication objects
# search_depth: current search_depth of height-first-search
# search_depth_max: maximal search_depth for dfs
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: list of objects of type Class Publications
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -170,6 +155,8 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
......@@ -182,12 +169,10 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Class Publication
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -197,27 +182,23 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
if (test_var):
citation_pub_obj = input_test_func(citation.doi_url)
else:
#citation_pub_obj = Input(citation.doi_url)
inter = Input()
try:
citation_pub_obj = inter.get_publication(citation.doi_url)
except ValueError:
continue
except IndexError:
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
......@@ -226,20 +207,15 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation.doi_url,pub.doi_url] not in edges:
edges.append([citation.doi_url,pub.doi_url])
elif [citation,pub.doi_url] not in edges:
edges.append([citation,pub.doi_url])
return citations_pub_obj_list
# recursive function to implement height-first-search on citations
# citations_pub_obj_list: input list of citations as publication objects
# search_height: current search_height of height-first-search
# search_height_max: maximal search_height for dfs
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: list of objects of type Class Publications
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -249,6 +225,8 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
......@@ -261,8 +239,6 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
......@@ -276,6 +252,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
'''
# ERROR-Handling doi_array = NULL
......@@ -291,10 +269,10 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
nodes = [] # create empty array for the nodes
edges = [] # create empty array for the edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
......@@ -307,14 +285,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
complete_inner_edges(test_var)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges)
# only for unit tests
if (test_var == True):
doi_nodes_list = []
for node in nodes:
doi_nodes_list.append(node.doi_url)
return(doi_nodes_list, edges)
output_to_json(nodes,edges, test_var)
return(nodes,edges)
import unittest
from Processing import process_main
from import_from_json import input_from_json
from update_graph import check_graph_updates
class ProcessingTest(unittest.TestCase):
maxDiff = None
def testCycle(self):
nodes, edges = process_main(['doiz1'],1,1,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = process_main(['doiz1'],2,2,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
#def testBigCycle(self):
......@@ -19,48 +27,88 @@ class ProcessingTest(unittest.TestCase):
def testEmptyDepthHeight(self):
nodes, edges = process_main(['doi1'],0,0,True)
self.assertCountEqual(nodes,['doi1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2', 'doi3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = process_main(['doi_ie1'],1,1,True)
self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = process_main(['doi_h01'],1,0,True)
self.assertCountEqual(nodes,['doi_h01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_h02'],1,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = process_main(['doi_h02'],2,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = process_main(['doi_d01'],0,1,True)
self.assertCountEqual(nodes,['doi_d01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_d02'],0,1,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = process_main(['doi_d02'],0,2,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
def test_import_from_json(self):
nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
nodes_new_both, edges_new_both = input_from_json('test_output.json')
nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
def keep_only_dois(nodes):
'''
:param nodes: input list of nodes of type Publication
:type nodes: List[Publication]
gets nodes of type pub and return only their doi
'''
doi_list = []
for node in nodes:
doi_list.append(node.doi_url)
return doi_list
if __name__ == "__main__":
unittest.main()
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
# -*- coding: utf-8 -*-
"""
A function to return an object of Type Publication for a given doi
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
def get_pub(pub_doi, test_var):
#checks if it's a test and chooses appropiate function
if(test_var):
pub = input_test_func(pub_doi)
#checks that it isnt a test and chooses standart-input function
else:
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
return(ValueError)
except IndexError:
return(IndexError)
except AttributeError:
return(AttributeError)
return(pub)
\ No newline at end of file
......@@ -15,46 +15,19 @@ __status__ = "Production"
import json
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
import sys
sys.path.append("../")
from input.interface import InputInterface as Input
class Publication:
#def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
def __init__(self, doi_url, title, contributors, journal, publication_date, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.references = []
self.citations = []
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def create_pubs_from_json(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
creates list of publication retrieved from old json file
'''
#iterates over the list of nodes
......@@ -69,27 +42,22 @@ def add_ref_and_cit_to_pubs(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
adds references and citations to retrieved publication list
'''
# iterates over the list of edges
for edge in input_dict["links"]:
for source in list_of_nodes_py:
for target in list_of_nodes_py:
# when correct dois found, adds then as references/citatons to publication list
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date)
new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference")
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date)
new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation")
target.citations.append(new_citation)
# adds reference to current node
# if (node.doi_url == edge["source"]):
# new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.references.append(new_reference)
# # adds citation to current node
# if (node.doi_url == edge["target"]):
# new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.citations.append(new_citation)
# adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]])
......@@ -98,7 +66,9 @@ def add_ref_and_cit_to_pubs(input_dict):
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: Json File
:type json_file: String
retrieves information from old json file to be reused for new graph construction
'''
# creates global sets for nodes and edges
......@@ -118,4 +88,3 @@ def input_from_json(json_file):
return(list_of_nodes_py, list_of_edges_py)
\ No newline at end of file
import sys
sys.path.append("../")
from input.publication import Publication
class Publication:
def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
if references is None:
self.references = []
else:
self.references = ref(references)
if citations is None:
self.citations = []
else:
self.citations = cit(citations)
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def input_test_func(pub_doi):
'''
:param pub_doi: pub doi to find publication in list_of_arrays
:type pub_doi: String
returns the publication class for given doi
'''
for array in list_of_arrays:
if pub_doi == array[0]:
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8])
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7])
return pub
def cit(list_doi):
'''
:param list_doi list of citation dois to get their Citation Class
:type list_doi: List[String]
returns a list of citations objects for given doi list
'''
cits = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -54,6 +34,13 @@ def cit(list_doi):
return cits
def ref(list_doi):
'''
:param list_doi list of reference dois to get their Reference Class
:type list_doi: List[String]
returns a list of reference objects for given doi list
'''
refs = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -62,27 +49,57 @@ def ref(list_doi):
return refs
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], '']
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], '']
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], '']
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], '']
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], '']
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], '']
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], '']
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], '']
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], '']
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], '']
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], '']
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], '']
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], '']
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], '']
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], '']
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], '']
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], '']
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], '']
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3]
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']]
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']]
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []]
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']]
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']]
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']]
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']]
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []]
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []]
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']]
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']]
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']]
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []]
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []]
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []]
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []]
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []]
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []]
large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []]
large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []]
large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12'], []]
large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']]
large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']]
large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']]
large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']]
large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']]
large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']]
large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']]
large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12']]
large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []]
large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []]
large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']]
large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']]
large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i'], ['doi_lg_2_h21','doi_lg_2_h22']]
large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']]
large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12']]
large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']]
large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']]
large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']]
large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']]
large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']]
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3,
right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3,
large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12,
large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12,
large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23]
......@@ -12,11 +12,11 @@ def format_nodes(V):
list_of_node_dicts = list()
for node in V:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["doi"] = node.doi_url
new_dict["group"] = node.group
new_dict["citations"] = len(node.citations)
list_of_node_dicts.append(new_dict)
......@@ -34,12 +34,16 @@ def format_edges(E):
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E):
def output_to_json(V,E, test_var):
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
if (test_var):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
......
{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input", "citations": 1}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height", "citations": 1}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height", "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]}
\ No newline at end of file
{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []}
\ No newline at end of file
This diff is collapsed.
......@@ -51,6 +51,14 @@ def print_extended_graph(nodes, edges):
print(len(edges))
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random dois
def try_known_publications():
......@@ -79,15 +87,15 @@ def try_known_publications():
def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = process_main(doi_list,1,1)
print_graph(nodes, edges)
#print_simple(nodes, edges)
list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
print_graph(valid_nodes, valid_edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
......@@ -13,17 +13,27 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from Knoten_Vergleich import doi_listen_vergleichen
from Kanten_Vergleich import back_to_valid_edges
from update_graph_del import delete_nodes_and_edges
def get_old_input_dois(old_obj_input_list):
'''
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
function to return pub dois for old publications of group input retrieved from json file
'''
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == "input"):
......@@ -31,55 +41,65 @@ def get_old_input_dois(old_obj_input_list):
return old_input_dois
def get_new_input_dois(new_input, test_var):
doi_input_list = []
'''
:param new_input: input list of doi from UI
:type new_input: list of strings
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return pub dois for input urls
'''
# new list to save doi_url for each new input url
new_input_dois = []
for new_node in new_input:
if(test_var):
pub = input_test_func(new_node)
else:
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(new_node)
except ValueError:
# retrieves information and adds to new list if successful
pub = get_pub(new_node, test_var)
if (type(pub) != Publication):
print(pub)
continue
except IndexError:
continue
doi_input_list.append(pub.doi_url)
return doi_input_list
def delete_ref_nodes_rec(pub):#, old_obj_list):
for reference in pub.references:
for ref_pub in processed_input_list:
if (ref_pub.doi_url == reference.doi_url):
delete_ref_nodes_rec(ref_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
def delete_cit_nodes_rec(pub):
for citation in pub.citations:
for cit_pub in processed_input_list:
if (cit_pub.doi_url == citation.doi_url):
delete_cit_nodes_rec(cit_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
new_input_dois.append(pub.doi_url)
return(new_input_dois)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: list of strings
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
:param old_edges_list: list of links between publications retrieved from old json file
:type old_edges_list: List[List[String,String]]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list
processed_input_list = old_obj_input_list.copy()
valid_edges = []
# save the return values of global lists
processed_input_list_del = []
valid_edges_del = []
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
if (len(inserted_nodes) == 0):
for del_node in deleted_nodes:
for pub in processed_input_list:
if (del_node == pub.doi_url):
delete_ref_nodes_rec(pub)#, processed_input_list)
delete_cit_nodes_rec(pub)#, processed_input_list)
processed_input_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_input_list)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list)
return(processed_input_list, valid_edges)
return(processed_input_list_del, valid_edges_del)
# -*- coding: utf-8 -*-
"""
Functions to remove publications/links from nodes/edges list, if they can no longer be reached
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from Kanten_Vergleich import back_to_valid_edges
def delete_ref_nodes_rec(pub):
'''
:param pub: pub go get deleted after recursive call
:type pub: Publication
function that removes nodes of group "height", if they aren't reachable from input nodes
'''
for reference in pub.references:
for ref_pub in processed_list:
if (ref_pub.doi_url == reference):
# to find a cyclus and avoid recursion error
if (reference not in pub.citations):
delete_ref_nodes_rec(ref_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_cit_nodes_rec(pub):
'''
:param pub: publication to be removed after recursive call
:type pub: Publication
function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes
'''
for citation in pub.citations:
for cit_pub in processed_list:
if (cit_pub.doi_url == citation):
# to find a cyclus and avoid recursion error
if (citation not in pub.references):
delete_cit_nodes_rec(cit_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list):
'''
:param input_list: list of publications to get reduced
:type input_list: List[Publication]
:param deleted_nodes: list of input dois which are not in new call
:type deleted_nodes: List[String]
:param old_doi_input_list: list of input dois from old call
:type old_doi_input_list: List[String]
:param old_edges_list: list of links between publications from old call
:type old_edges_list: List[List[String,String]]
function to start recursive node removal for references and citations and to return edge list to valid state
'''
# global list to save the process of removing unneeded publications
global processed_list
processed_list = input_list.copy()
for del_node in deleted_nodes:
for pub in processed_list:
if (del_node == pub.doi_url):
# checks for every reference if it is citet more than once. If not it calls deletion function
for reference in pub.references:
only_reference = True
for ref_cit in processed_list:
if (reference == ref_cit.doi_url):
for citation in ref_cit.citations:
if ((citation in old_doi_input_list) and (citation != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_ref_nodes_rec(pub)
# checks for every citation if it cites more than once. If not it calls deletion function
for citation in pub.citations:
only_reference = True
for cit_ref in processed_list:
if (citation == cit_ref.doi_url):
for reference in cit_ref.references:
if ((reference in old_doi_input_list) and (reference != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_cit_nodes_rec(pub)
# removes publication of type input after start of recursive call to both directions
processed_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_list)
return(processed_list, valid_edges)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment