Skip to content
Snippets Groups Projects
Commit 8927980a authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

verarbeitung: final changes to documentation and clean up

parent b9e9c3c8
No related branches found
No related tags found
1 merge request!32verarbeitung: final merge to main
This commit is part of merge request !32. Comments created here will be created in the context of that merge request.
Showing
with 214 additions and 167 deletions
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
# Starten des Programms: # Starten des Programms:
Um das Programm nutzen zu können muss zuerst \grqq citation\_parser\_ui.py\grqq \, ausgeführt werden und der entstandene Lik in einen Browser eingefügt werden. Danach öffnet sich die Benutzeroberfläche im Browser. Um das Programm nutzen zu können muss zuerst 'citation\_parser\_ui.py', ausgeführt werden und der entstandene Lik in einen Browser eingefügt werden. Danach öffnet sich die Benutzeroberfläche im Browser.
# Übersicht der Benutzeroberfläche: # Übersicht der Benutzeroberfläche:
...@@ -43,5 +43,6 @@ Um das Programm nutzen zu können muss zuerst \grqq citation\_parser\_ui.py\grqq ...@@ -43,5 +43,6 @@ Um das Programm nutzen zu können muss zuerst \grqq citation\_parser\_ui.py\grqq
- Alina Molkentin - Alina Molkentin
- Donna Löding - Donna Löding
- Malte Schokolowski - Malte Schokolowski
- Judith Große
- Katja Ehlers - Katja Ehlers
- Merle Stahl - Merle Stahl
...@@ -6,7 +6,7 @@ Functions to add citations recursively for multiple ACS/Nature journals ...@@ -6,7 +6,7 @@ Functions to add citations recursively for multiple ACS/Nature journals
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
# __copyright__ = "" # __copyright__ = ""
# __credits__ = ["", "", "", ""] # __credits__ = ["", "", "", ""]
...@@ -28,6 +28,21 @@ from verarbeitung.get_pub_from_input import get_pub ...@@ -28,6 +28,21 @@ from verarbeitung.get_pub_from_input import get_pub
def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes, def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes,
test_edges): test_edges):
''' '''
:param pub: publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differentiate citation and reference call
:type cit_type: String
:param test_var: variable to differentiate between test and url call
:type test_var: boolean
:param test_nodes: list of publications from unit test :param test_nodes: list of publications from unit test
:type test_nodes: List[Publication] :type test_nodes: List[Publication]
...@@ -117,12 +132,16 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty ...@@ -117,12 +132,16 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty
citations_pub_obj_list = [] citations_pub_obj_list = []
for citation in get_cit_type_list(pub, cit_type): for citation in get_cit_type_list(pub, cit_type):
not_in_nodes = True not_in_nodes = True
for node in nodes: # checks every citation for duplication for node in nodes:
# checks every citation for duplication
if (citation.doi_url == node.doi_url): if (citation.doi_url == node.doi_url):
not_in_nodes = False not_in_nodes = False
break break
if (not_in_nodes): if (not_in_nodes):
if (search_depth < search_depth_max): # checks if its a test and chooses input function accordingly if (search_depth < search_depth_max):
# checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation.doi_url, test_var) citation_pub_obj = get_pub(citation.doi_url, test_var)
if (type(citation_pub_obj) != Publication): if (type(citation_pub_obj) != Publication):
# print(pub) # print(pub)
...@@ -165,11 +184,10 @@ def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max ...@@ -165,11 +184,10 @@ def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max
:param test_var: variable to differentiate between test and url call :param test_var: variable to differentiate between test and url call
:type test_var: boolean :type test_var: boolean
recursive function to implement depth-first-search on citations recursive function to implement breadth-first-search on citations
''' '''
# adds next level to nodes/edges # searches citations for every publication in list and adds the one found to new list
new_citation_pub_obj_save_list = [] new_citation_pub_obj_save_list = []
for pub in citations_pub_obj_list: for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type,
......
...@@ -6,7 +6,7 @@ Functions that format the computed graph to match the interface to the output-pa ...@@ -6,7 +6,7 @@ Functions that format the computed graph to match the interface to the output-pa
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
# __copyright__ = "" # __copyright__ = ""
# __credits__ = ["", "", "", ""] # __credits__ = ["", "", "", ""]
...@@ -45,8 +45,6 @@ def format_nodes(nodes): ...@@ -45,8 +45,6 @@ def format_nodes(nodes):
return list_of_node_dicts return list_of_node_dicts
# creates a list that contains a dictionary for each edge
# the dictionaries contain the source as keys and the target as values
def format_edges(edges): def format_edges(edges):
''' '''
:param edges: list of links to export to json :param edges: list of links to export to json
...@@ -74,7 +72,7 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file='json_te ...@@ -74,7 +72,7 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file='json_te
:param test_var: variable to differentiate between test and url call :param test_var: variable to differentiate between test and url call
:type test_var: boolean :type test_var: boolean
function to export nodes and links as a dictionary to json file function to export nodes and links as a dictionary to a given json file
''' '''
dict_of_all = dict() dict_of_all = dict()
list_of_node_dicts = format_nodes(nodes) list_of_node_dicts = format_nodes(nodes)
...@@ -82,6 +80,8 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file='json_te ...@@ -82,6 +80,8 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file='json_te
dict_of_all["nodes"] = list_of_node_dicts dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts dict_of_all["links"] = list_of_edge_dicts
dict_of_all["depth_height"] = [search_depth, search_height] dict_of_all["depth_height"] = [search_depth, search_height]
# output to json. json name depends on test_var and if a non standard filename was given.
if (test_var): if (test_var):
if json_file != 'json_text.json': if json_file != 'json_text.json':
with open(json_file, 'w') as outfile: with open(json_file, 'w') as outfile:
......
...@@ -6,7 +6,7 @@ Functions to generate a graph representing citations between multiple ACS/Nature ...@@ -6,7 +6,7 @@ Functions to generate a graph representing citations between multiple ACS/Nature
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
# __copyright__ = "" # __copyright__ = ""
# __credits__ = ["", "", "", ""] # __credits__ = ["", "", "", ""]
...@@ -24,17 +24,29 @@ sys.path.append("../") ...@@ -24,17 +24,29 @@ sys.path.append("../")
from input.publication import Publication from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit from .add_citations_rec import add_citations, create_global_lists_cit
def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var): def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var):
''' '''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differentiate between test and url call
:type test_var: boolean
for unit test purposes only for unit test purposes only
''' '''
global nodes, edges global nodes, edges
nodes = [] nodes = []
edges = [] edges = []
return (initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var)) return (initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var))
...@@ -52,6 +64,7 @@ def complete_inner_edges_test(test_nodes, test_edges): ...@@ -52,6 +64,7 @@ def complete_inner_edges_test(test_nodes, test_edges):
global nodes, edges global nodes, edges
nodes = test_nodes nodes = test_nodes
edges = test_edges edges = test_edges
complete_inner_edges() complete_inner_edges()
return (nodes, edges) return (nodes, edges)
...@@ -132,11 +145,15 @@ def complete_inner_edges(update_var=False, input_nodes=[], input_edges=[]): ...@@ -132,11 +145,15 @@ def complete_inner_edges(update_var=False, input_nodes=[], input_edges=[]):
for node in nodes: for node in nodes:
if (node.group < 0): if (node.group < 0):
# searches nodes fron 'other' group to find cross references
for citation in node.citations: for citation in node.citations:
for pub in nodes: for pub in nodes:
if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)): if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)):
edges.append([citation.doi_url, node.doi_url]) edges.append([citation.doi_url, node.doi_url])
if (node.group > 0): if (node.group > 0):
# searches nodes fron 'other' group to find cross references
for reference in node.references: for reference in node.references:
for pub in nodes: for pub in nodes:
if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)): if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)):
......
Dieser Ordner ist nur für uns intern, um Testläufe mit echten DOIs zu starten.
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to test and print the nodes and edges sets
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
import sys
# sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
sys.path.append("../../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import update_graph
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print("\nKanten:\n")
for edge in edges:
print(edge, "\n")
print(len(nodes))
print(len(edges))
print(" ")
def print_extended_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print(node.doi_url)
for reference in node.references:
print(reference.doi_url)
for citation in node.citations:
print(citation.doi_url)
print("\nKanten:\n")
for edge in edges:
print(edge, "\n")
print(len(nodes))
print(len(edges))
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random DOIs
def try_known_publications():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
# arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
# arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
# arr.append('https://doi.org/10.1021/ci700007b')
# doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292')
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675')
# url = sys.argv[1]
# arr.append[url]
nodes, edges = init_graph_construction(doi_list, 2, 2)
print_graph(nodes, edges)
return (nodes, edges)
def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = init_graph_construction(doi_list, 1, 1)
# print_simple(nodes, edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
print_extended_graph(nodes, edges)
# nodes, edges = try_known_publications()
# nodes_new, edges_new = input_from_json("json_text.json")
# print_graph(nodes_new, edges_new)
try_delete_nodes()
# try_import()
...@@ -6,7 +6,7 @@ A function to return an object of Type Publication for a given DOI ...@@ -6,7 +6,7 @@ A function to return an object of Type Publication for a given DOI
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
#__copyright__ = "" #__copyright__ = ""
#__credits__ = ["", "", "", ""] #__credits__ = ["", "", "", ""]
#__license__ = "" #__license__ = ""
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": ["new_height.json", true]}
\ No newline at end of file
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": [2, 2]}
\ No newline at end of file
Zyklus
großer Zyklus
Innere Kanten vervollständigen
...@@ -6,7 +6,7 @@ main function to call to generate a graph representing citations between multipl ...@@ -6,7 +6,7 @@ main function to call to generate a graph representing citations between multipl
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
#__copyright__ = "" #__copyright__ = ""
#__credits__ = ["", "", "", ""] #__credits__ = ["", "", "", ""]
#__license__ = "" #__license__ = ""
...@@ -39,7 +39,7 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso ...@@ -39,7 +39,7 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso
:param json_file: file to export graph to :param json_file: file to export graph to
:type json_file: String :type json_file: String
main function to construct new or updated publication graphs main function to construct new or update known publication graph
''' '''
# updates graph if json file is known in directory otherwise starts new graph construction # updates graph if json file is known in directory otherwise starts new graph construction
......
"""
This file is for testing purposes only. We left it in the directory for ease of use.
To use it you need to shift it into the main directory of the project
"""
import sys import sys
import gc import gc
from pathlib import Path from pathlib import Path
from verarbeitung.process_main import Processing from verarbeitung.process_main import Processing
#from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes
doi_list = [] doi_list = []
......
import unittest # -*- coding: utf-8 -*-
"""
Functions to unittest functions which construct a new graph
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Finished"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
import unittest
import sys import sys
sys.path.append("../") sys.path.append("../")
...@@ -98,7 +114,7 @@ class ConstructionTest(unittest.TestCase): ...@@ -98,7 +114,7 @@ class ConstructionTest(unittest.TestCase):
self.assertCountEqual(edges, [['doi1', 'doi2'], ['doi3', 'doi1']]) self.assertCountEqual(edges, [['doi1', 'doi2'], ['doi3', 'doi1']])
self.assertCountEqual(err_list, ['doi2ic']) self.assertCountEqual(err_list, ['doi2ic'])
## From here the tests for the individual functions ## ## from here: tests for the individual functions ##
# initialize_graph.py: # initialize_graph.py:
......
# -*- coding: utf-8 -*-
"""
Functions implement an test input function analogue to the one from the input group.
Also implements example graphs
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Finished"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
import sys import sys
sys.path.append("../") sys.path.append("../")
...@@ -23,9 +41,12 @@ def input_test_func(pub_doi): ...@@ -23,9 +41,12 @@ def input_test_func(pub_doi):
def cit(list_doi, cit_type): def cit(list_doi, cit_type):
''' '''
:param list_doi list of citation DOIs to get their Citation Class :param list_doi: list of citation DOIs to get their Citation Class
:type list_doi: List[String] :type list_doi: List[String]
:param cit_type: variable to differentiate citation and reference call
:type cit_type: String
returns a list of citations objects for given DOI list returns a list of citations objects for given DOI list
''' '''
...@@ -37,6 +58,8 @@ def cit(list_doi, cit_type): ...@@ -37,6 +58,8 @@ def cit(list_doi, cit_type):
return cits return cits
# large_graph_1, large_graph_2 and crossed_graph are visualized in test_graphs_plan.pdf
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']]
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']]
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []]
......
import unittest # -*- coding: utf-8 -*-
"""
Functions to unittest functions which are updating a known graph
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Finished"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
import unittest
import sys import sys
from pathlib import Path from pathlib import Path
...@@ -79,7 +95,7 @@ class UpdatingTest(unittest.TestCase): ...@@ -79,7 +95,7 @@ class UpdatingTest(unittest.TestCase):
self.assertCountEqual(new_nodes, nodes) self.assertCountEqual(new_nodes, nodes)
self.assertCountEqual(new_edges, edges) self.assertCountEqual(new_edges, edges)
## From here the tests for the individual functions ## ## From here: tests for the individual functions ##
# update_graph.py: # update_graph.py:
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": [2, 2]}
\ No newline at end of file
#!/usr/bin/env python3 # -*- coding: utf-8 -*-
"""
compares old and new input list to find common, deleted and inserted input dois.
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Finished"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
from collections import Counter from collections import Counter
...@@ -29,11 +45,5 @@ def compare_old_and_new_node_lists(old_doi_node_list, new_doi_node_list): ...@@ -29,11 +45,5 @@ def compare_old_and_new_node_lists(old_doi_node_list, new_doi_node_list):
elif ((doi in dois_from_new_graph) & ( elif ((doi in dois_from_new_graph) & (
doi not in dois_from_old_graph)): # if the DOI occurs ince and it is from new graph it is a inserted node doi not in dois_from_old_graph)): # if the DOI occurs ince and it is from new graph it is a inserted node
inserted_nodes.append(doi) # appends the DOI to the inserted ones inserted_nodes.append(doi) # appends the DOI to the inserted ones
return (common_nodes, inserted_nodes, deleted_nodes)
# Test Prints return (common_nodes, inserted_nodes, deleted_nodes)
# liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"]
# liste_2 = ["doi_1","doi_2","doi_3","doi_6","doi_7"]
# print("gemeinsame Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[0])
# print("hinzugefügte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[1])
# print("gelöschte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[2])
...@@ -6,7 +6,7 @@ Functions to update a graph representing citations between multiple ACS/Nature j ...@@ -6,7 +6,7 @@ Functions to update a graph representing citations between multiple ACS/Nature j
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
# __copyright__ = "" # __copyright__ = ""
# __credits__ = ["", "", "", ""] # __credits__ = ["", "", "", ""]
...@@ -23,10 +23,34 @@ sys.path.append("../") ...@@ -23,10 +23,34 @@ sys.path.append("../")
from input.publication import Publication from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub from verarbeitung.get_pub_from_input import get_pub
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list
def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type): def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type):
"""
:param test_nodes: list of publications from unit test
:type test_nodes: List[Publication]
:param test_edges: list of links from unit test
:type test_edges: List[List[String,String]]
:param new_nodes: list of nodes which are generated separately from main node list to avoid recursive problems
:type new_nodes List[Publication]
:param new_edges: list of edges which are generated separately from main edge list to avoid recursive problems
:type new_edges: List[List[String,String]]
:param node: node which is known but not from input group
:type node: Publication
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param cit_type: determines whether the function call is for a reference or citation
:type cit_type: String
for unit test purposes only
"""
global nodes, edges global nodes, edges
nodes = test_nodes nodes = test_nodes
edges = test_edges edges = test_edges
...@@ -36,6 +60,30 @@ def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_se ...@@ -36,6 +60,30 @@ def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_se
def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height,
new_search_depth, new_search_height): new_search_depth, new_search_height):
"""
:param test_nodes: list of publications from unit test
:type test_nodes: List[Publication]
:param test_edges: list of links from unit test
:type test_edges: List[List[String,String]]
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
for unit test purposes only
"""
global nodes, edges global nodes, edges
nodes = test_nodes nodes = test_nodes
edges = test_edges edges = test_edges
......
...@@ -6,7 +6,7 @@ Functions to remove publications/links from nodes/edges list, if they can no lon ...@@ -6,7 +6,7 @@ Functions to remove publications/links from nodes/edges list, if they can no lon
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de" __email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production" __status__ = "Finished"
#__copyright__ = "" #__copyright__ = ""
#__credits__ = ["", "", "", ""] #__credits__ = ["", "", "", ""]
#__license__ = "" #__license__ = ""
...@@ -20,15 +20,31 @@ sys.path.append("../../") ...@@ -20,15 +20,31 @@ sys.path.append("../../")
from .update_edges import back_to_valid_edges from .update_edges import back_to_valid_edges
def search_ref_cit_graph_rec_test(pubs, new_test_input, old_max_depth, cit_var): def search_ref_cit_graph_rec_test(pubs, new_test_input, old_max_depth, cit_type):
'''
:param pub: pub go get appended to usable_nodes
:type pub: Publication
:param new_test_input: current recursion depth
:type new_test_input: int
:param old_max_depth: old max search depth
:type old_max_depth: int
:param cit_type: variable to differentiate citation and reference call
:type cit_type: String
for unit test purposes only
'''
global usable_nodes, input_obj_list global usable_nodes, input_obj_list
usable_nodes = [] usable_nodes = []
input_obj_list = pubs input_obj_list = pubs
if cit_var == "Reference": if cit_type == "Reference":
for pub in new_test_input: for pub in new_test_input:
search_ref_graph_rec(pub, 1, old_max_depth) search_ref_graph_rec(pub, 1, old_max_depth)
elif cit_var == "Citation": elif cit_type == "Citation":
for pub in new_test_input: for pub in new_test_input:
search_cit_graph_rec(pub, 1, old_max_depth) search_cit_graph_rec(pub, 1, old_max_depth)
return usable_nodes return usable_nodes
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment