Skip to content
Snippets Groups Projects
Commit 2708fc81 authored by Molkentin, Alina's avatar Molkentin, Alina
Browse files

Merge branch 'main' of...

parents 0c1f1725 6011746e
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
Showing
with 467 additions and 267 deletions
......@@ -9,7 +9,7 @@ def back_to_valid_edges(links_from_json, processed_input_list):
function that deletes edges, if one ore two including nodes are deleted nodes
'''
list_of_valid_edges = links_from_json
list_of_valid_edges = links_from_json.copy()
#iterates over all edges from old graph
......@@ -23,6 +23,8 @@ def back_to_valid_edges(links_from_json, processed_input_list):
# increases counter if adjacent node was found
if (adj_node == pub.doi_url):
found_adj_nodes += 1
if (found_adj_nodes == 2):
break
#removes the edge if less than 2 adjacent nodes found
if (found_adj_nodes < 2):
......
......@@ -13,22 +13,21 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
#import input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from json_demo import output_to_json
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
......@@ -38,24 +37,19 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
'''
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
if(test_var): #checks that it is a test and chooses test-input function
pub = input_test_func(pub_doi) #creates an object of class Publication
else: #checks that it isnt a test and chooses standart-input function
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
continue
except IndexError:
continue
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
print(pub)
continue
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
......@@ -81,34 +75,34 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
return(references_pub_obj_list, citations_pub_obj_list)
def complete_inner_edges(test_var):
'''
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes inner edges between nodes of group height and depth
'''
for node in nodes:
if (node.group == "depth"):
for citation in node.citations:
for cit in nodes:
if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges):
edges.append([citation.doi_url, node.doi_url])
if (citation == cit.doi_url and [citation, node.doi_url] not in edges):
edges.append([citation, node.doi_url])
if (node.group == "height"):
for reference in node.references:
for ref in nodes:
if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges):
edges.append([node.doi_url,reference.doi_url])
if (reference == ref.doi_url and [node.doi_url, reference] not in edges):
edges.append([node.doi_url,reference])
# adds a node for every publication unknown
# adds edges for references between publications
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Class Publication
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -118,29 +112,24 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes
if (reference.doi_url == node.doi_url): #determines that the node already exists
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
if (test_var): #determines that it is a test and chooses the test-input function
reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class
else: #determines that it isnt a test and chooses the standart function
#reference_pub_obj = Input(reference.doi_url)
inter = Input()
try:
reference_pub_obj = inter.get_publication(reference.doi_url)
except ValueError:
continue
# nur aus Testzwecken, da noch was bei Input falsch ist
except IndexError:
continue
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
......@@ -148,19 +137,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference.doi_url] not in edges:
edges.append([pub.doi_url,reference.doi_url])
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
# recursive function to implement height-first-search on references
# references_pub_obj_list: input list of references as publication objects
# search_depth: current search_depth of height-first-search
# search_depth_max: maximal search_depth for dfs
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: list of objects of type Class Publications
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
......@@ -170,6 +155,8 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
......@@ -180,14 +167,12 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Class Publication
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -197,28 +182,24 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
if (test_var):
citation_pub_obj = input_test_func(citation.doi_url)
else:
#citation_pub_obj = Input(citation.doi_url)
inter = Input()
try:
citation_pub_obj = inter.get_publication(citation.doi_url)
except ValueError:
continue
except IndexError:
continue
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
......@@ -226,20 +207,15 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation.doi_url,pub.doi_url] not in edges:
edges.append([citation.doi_url,pub.doi_url])
elif [citation,pub.doi_url] not in edges:
edges.append([citation,pub.doi_url])
return citations_pub_obj_list
# recursive function to implement height-first-search on citations
# citations_pub_obj_list: input list of citations as publication objects
# search_height: current search_height of height-first-search
# search_height_max: maximal search_height for dfs
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: list of objects of type Class Publications
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
......@@ -249,6 +225,8 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
......@@ -261,8 +239,6 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
......@@ -276,6 +252,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
'''
# ERROR-Handling doi_array = NULL
......@@ -291,10 +269,10 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
nodes = [] # create empty array for the nodes
edges = [] # create empty array for the edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
......@@ -307,14 +285,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
complete_inner_edges(test_var)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges)
# only for unit tests
if (test_var == True):
doi_nodes_list = []
for node in nodes:
doi_nodes_list.append(node.doi_url)
return(doi_nodes_list, edges)
output_to_json(nodes,edges, test_var)
return(nodes,edges)
import unittest
from Processing import process_main
from import_from_json import input_from_json
from update_graph import check_graph_updates
class ProcessingTest(unittest.TestCase):
maxDiff = None
def testCycle(self):
nodes, edges = process_main(['doiz1'],1,1,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = process_main(['doiz1'],2,2,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
#def testBigCycle(self):
......@@ -19,48 +27,88 @@ class ProcessingTest(unittest.TestCase):
def testEmptyDepthHeight(self):
nodes, edges = process_main(['doi1'],0,0,True)
self.assertCountEqual(nodes,['doi1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2', 'doi3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = process_main(['doi_ie1'],1,1,True)
self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = process_main(['doi_h01'],1,0,True)
self.assertCountEqual(nodes,['doi_h01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_h02'],1,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = process_main(['doi_h02'],2,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = process_main(['doi_d01'],0,1,True)
self.assertCountEqual(nodes,['doi_d01'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_d02'],0,1,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = process_main(['doi_d02'],0,2,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2'])
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
def test_import_from_json(self):
nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
nodes_new_both, edges_new_both = input_from_json('test_output.json')
nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
def keep_only_dois(nodes):
'''
:param nodes: input list of nodes of type Publication
:type nodes: List[Publication]
gets nodes of type pub and return only their doi
'''
doi_list = []
for node in nodes:
doi_list.append(node.doi_url)
return doi_list
if __name__ == "__main__":
unittest.main()
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
# -*- coding: utf-8 -*-
"""
A function to return an object of Type Publication for a given doi
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
def get_pub(pub_doi, test_var):
#checks if it's a test and chooses appropiate function
if(test_var):
pub = input_test_func(pub_doi)
#checks that it isnt a test and chooses standart-input function
else:
inter = Input()
try:
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except ValueError:
return(ValueError)
except IndexError:
return(IndexError)
except AttributeError:
return(AttributeError)
return(pub)
\ No newline at end of file
......@@ -15,46 +15,19 @@ __status__ = "Production"
import json
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
import sys
sys.path.append("../")
from input.interface import InputInterface as Input
class Publication:
#def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
def __init__(self, doi_url, title, contributors, journal, publication_date, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.references = []
self.citations = []
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def create_pubs_from_json(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
creates list of publication retrieved from old json file
'''
#iterates over the list of nodes
......@@ -67,29 +40,24 @@ def create_pubs_from_json(input_dict):
def add_ref_and_cit_to_pubs(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
adds references and citations to retrieved publication list
'''
# iterates over the list of edges
for edge in input_dict["links"]:
for source in list_of_nodes_py:
for target in list_of_nodes_py:
# when correct dois found, adds then as references/citatons to publication list
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date)
new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference")
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date)
new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation")
target.citations.append(new_citation)
# adds reference to current node
# if (node.doi_url == edge["source"]):
# new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.references.append(new_reference)
# # adds citation to current node
# if (node.doi_url == edge["target"]):
# new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
# node.citations.append(new_citation)
# adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]])
......@@ -97,8 +65,10 @@ def add_ref_and_cit_to_pubs(input_dict):
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: Json File
:param json_file: Json-Datei for the old graph
:type json_file: String
retrieves information from old json file to be reused for new graph construction
'''
# creates global sets for nodes and edges
......@@ -117,5 +87,4 @@ def input_from_json(json_file):
add_ref_and_cit_to_pubs(input_dict)
return(list_of_nodes_py, list_of_edges_py)
return(list_of_nodes_py, list_of_edges_py)
\ No newline at end of file
import sys
sys.path.append("../")
from input.publication import Publication
class Publication:
def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
if references is None:
self.references = []
else:
self.references = ref(references)
if citations is None:
self.citations = []
else:
self.citations = cit(citations)
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
from input.publication import Publication, Citation
def input_test_func(pub_doi):
'''
:param pub_doi: pub doi to find publication in list_of_arrays
:type pub_doi: String
returns the publication class for given doi
'''
for array in list_of_arrays:
if pub_doi == array[0]:
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8])
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7])
return pub
def cit(list_doi):
'''
:param list_doi list of citation dois to get their Citation Class
:type list_doi: List[String]
returns a list of citations objects for given doi list
'''
cits = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -54,6 +34,13 @@ def cit(list_doi):
return cits
def ref(list_doi):
'''
:param list_doi list of reference dois to get their Reference Class
:type list_doi: List[String]
returns a list of reference objects for given doi list
'''
refs = []
for doi_url in list_doi:
for array in list_of_arrays:
......@@ -62,27 +49,57 @@ def ref(list_doi):
return refs
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], '']
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], '']
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], '']
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], '']
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], '']
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], '']
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], '']
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], '']
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], '']
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], '']
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], '']
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], '']
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], '']
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], '']
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], '']
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], '']
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], '']
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], '']
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3]
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']]
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']]
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []]
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']]
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']]
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']]
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']]
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []]
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []]
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']]
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']]
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']]
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []]
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []]
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []]
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []]
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []]
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []]
large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []]
large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []]
large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12'], []]
large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']]
large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']]
large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']]
large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']]
large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']]
large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']]
large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']]
large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12']]
large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []]
large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []]
large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']]
large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']]
large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i'], ['doi_lg_2_h21','doi_lg_2_h22']]
large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']]
large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12']]
large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']]
large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']]
large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']]
large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']]
large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']]
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3,
right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3,
large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12,
large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12,
large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23]
......@@ -12,11 +12,11 @@ def format_nodes(V):
list_of_node_dicts = list()
for node in V:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["doi"] = node.doi_url
new_dict["group"] = node.group
new_dict["citations"] = len(node.citations)
list_of_node_dicts.append(new_dict)
......@@ -34,14 +34,18 @@ def format_edges(E):
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E):
def output_to_json(V,E, test_var):
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
if (test_var):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
#knoten = ["doi1", "doi2", "doi3"]
#kanten = [[1,2],[3,4],[5,6]]
......
{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input", "citations": 1}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height", "citations": 1}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height", "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]}
\ No newline at end of file
{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []}
\ No newline at end of file
This diff is collapsed.
......@@ -51,6 +51,14 @@ def print_extended_graph(nodes, edges):
print(len(edges))
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random dois
def try_known_publications():
......@@ -79,15 +87,15 @@ def try_known_publications():
def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = process_main(doi_list,1,1)
print_graph(nodes, edges)
#print_simple(nodes, edges)
list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
print_graph(valid_nodes, valid_edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
......@@ -13,17 +13,27 @@ __status__ = "Production"
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from os import error
sys.path.append("../")
from input.interface import InputInterface as Input
from input_test import input_test_func
from input.publication import Publication
from get_pub_from_input import get_pub
from Knoten_Vergleich import doi_listen_vergleichen
from Kanten_Vergleich import back_to_valid_edges
from update_graph_del import delete_nodes_and_edges
def get_old_input_dois(old_obj_input_list):
'''
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
function to return pub dois for old publications of group input retrieved from json file
'''
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == "input"):
......@@ -31,55 +41,65 @@ def get_old_input_dois(old_obj_input_list):
return old_input_dois
def get_new_input_dois(new_input, test_var):
doi_input_list = []
'''
:param new_input: input list of doi from UI
:type new_input: list of strings
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return pub dois for input urls
'''
# new list to save doi_url for each new input url
new_input_dois = []
for new_node in new_input:
if(test_var):
pub = input_test_func(new_node)
else:
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(new_node)
except ValueError:
continue
except IndexError:
continue
doi_input_list.append(pub.doi_url)
return doi_input_list
def delete_ref_nodes_rec(pub):#, old_obj_list):
for reference in pub.references:
for ref_pub in processed_input_list:
if (ref_pub.doi_url == reference.doi_url):
delete_ref_nodes_rec(ref_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
def delete_cit_nodes_rec(pub):
for citation in pub.citations:
for cit_pub in processed_input_list:
if (cit_pub.doi_url == citation.doi_url):
delete_cit_nodes_rec(cit_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
# retrieves information and adds to new list if successful
pub = get_pub(new_node, test_var)
if (type(pub) != Publication):
print(pub)
continue
new_input_dois.append(pub.doi_url)
return(new_input_dois)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: list of strings
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
:param old_edges_list: list of links between publications retrieved from old json file
:type old_edges_list: List[List[String,String]]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list
processed_input_list = old_obj_input_list.copy()
valid_edges = []
# save the return values of global lists
processed_input_list_del = []
valid_edges_del = []
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
if (len(inserted_nodes) == 0):
for del_node in deleted_nodes:
for pub in processed_input_list:
if (del_node == pub.doi_url):
delete_ref_nodes_rec(pub)#, processed_input_list)
delete_cit_nodes_rec(pub)#, processed_input_list)
processed_input_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_input_list)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list)
return(processed_input_list, valid_edges)
return(processed_input_list_del, valid_edges_del)
# -*- coding: utf-8 -*-
"""
Functions to remove publications/links from nodes/edges list, if they can no longer be reached
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../")
from input.interface import InputInterface as Input
from Kanten_Vergleich import back_to_valid_edges
def delete_ref_nodes_rec(pub):
'''
:param pub: pub go get deleted after recursive call
:type pub: Publication
function that removes nodes of group "height", if they aren't reachable from input nodes
'''
for reference in pub.references:
for ref_pub in processed_list:
if (ref_pub.doi_url == reference):
# to find a cyclus and avoid recursion error
if (reference not in pub.citations):
delete_ref_nodes_rec(ref_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_cit_nodes_rec(pub):
'''
:param pub: publication to be removed after recursive call
:type pub: Publication
function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes
'''
for citation in pub.citations:
for cit_pub in processed_list:
if (cit_pub.doi_url == citation):
# to find a cyclus and avoid recursion error
if (citation not in pub.references):
delete_cit_nodes_rec(cit_pub)
# removes publication from list after recursion and if it's not of group input
if (pub.group != "input"):
processed_list.remove(pub)
def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list):
'''
:param input_list: list of publications to get reduced
:type input_list: List[Publication]
:param deleted_nodes: list of input dois which are not in new call
:type deleted_nodes: List[String]
:param old_doi_input_list: list of input dois from old call
:type old_doi_input_list: List[String]
:param old_edges_list: list of links between publications from old call
:type old_edges_list: List[List[String,String]]
function to start recursive node removal for references and citations and to return edge list to valid state
'''
# global list to save the process of removing unneeded publications
global processed_list
processed_list = input_list.copy()
for del_node in deleted_nodes:
for pub in processed_list:
if (del_node == pub.doi_url):
# checks for every reference if it is citet more than once. If not it calls deletion function
for reference in pub.references:
only_reference = True
for ref_cit in processed_list:
if (reference == ref_cit.doi_url):
for citation in ref_cit.citations:
if ((citation in old_doi_input_list) and (citation != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_ref_nodes_rec(pub)
# checks for every citation if it cites more than once. If not it calls deletion function
for citation in pub.citations:
only_reference = True
for cit_ref in processed_list:
if (citation == cit_ref.doi_url):
for reference in cit_ref.references:
if ((reference in old_doi_input_list) and (reference != del_node)):
only_reference = False
break
if (only_reference == False):
break
if (only_reference):
delete_cit_nodes_rec(pub)
# removes publication of type input after start of recursive call to both directions
processed_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_list)
return(processed_list, valid_edges)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment