Skip to content
Snippets Groups Projects
Commit 7ed27902 authored by Ehlers, Katja's avatar Ehlers, Katja
Browse files

Merge branch 'main' into 'output'

please

See merge request bax9187/projekt-cis-biochemie-2021-22!6
parents 85ece1d0 e4fd1022
Branches
No related tags found
No related merge requests found
Showing
with 558 additions and 0 deletions
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
# Projekt CiS-Projekt 2021/22
Directory for functions to create the fundamental graph structure at first time call of programm.
## Files in directory
initialize_graph.py
- Führt den grundlegendem Graphbauprozess aus. Die Input-DOIs werden
als Klassenobjekt zur Knotenmenge hinzugefügt und über einen rekursiven Aufruf
wird die angegene Zitierungstiefe in beide Richtungen zu den Kanten hinzugefügt.
add_citations_rec.py
- Die DOIs, die in den Zitierungen des Inputs zu finden sind, werden ebenfalls zu Knoten
und je nach angegebener Höhe oder Tiefe wird dies für weitere Tiefen erneut ausgeführt.
export_to_json.py
- Wandelt die berechnete Knoten- und Kantenmenge in eine Json Datei um.
## Authors
- Donna Löding
- Alina Molkentin
- Xinyi Tang
- Judith Große
- Malte Schokolowski
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to add citations recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def get_cit_type_list(pub, cit_type):
'''
:param pub: Publication which citations will be added
:type pub: Publication
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
function to create nodes and edges and call create_graph_structure_citations
'''
if (cit_type == "Citation"):
return(pub.citations)
else:
return(pub.references)
def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param pub: publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in get_cit_type_list(pub, cit_type):
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation.doi_url, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
if (cit_type == "Citation"):
citation_pub_obj.group = search_depth + 1
edges.append([citation_pub_obj.doi_url,pub.doi_url])
else:
citation_pub_obj.group = -(search_depth + 1)
edges.append([pub.doi_url,citation_pub_obj.doi_url])
nodes.append(citation_pub_obj)
citations_pub_obj_list.append(citation_pub_obj)
# adds just the edge if citation already exists
else:
if (cit_type == "Citation"):
if ([citation.doi_url,pub.doi_url] not in edges):
edges.append([citation.doi_url,pub.doi_url])
else:
if ([pub.doi_url,citation.doi_url] not in edges):
edges.append([pub.doi_url,citation.doi_url])
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
# If the maximum depth has not yet been reached, calls function recursivly with increased depth
if (search_depth < search_depth_max):
process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
#return(nodes, edges)
\ No newline at end of file
#!/usr/bin/env python3
import json
from input_fj import input
# -*- coding: utf-8 -*-
"""
Functions that format the computed graph to match the interface to the output-part
Functions that format the computed graph to match the interface to the output-part and saves as a json file
"""
# creates a list that contains a dictionary for each node
# the dictionaries store the values for the attributes
def format_nodes(V):
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import json
def format_nodes(nodes):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
creates a list that contains a dictionary for each node
'''
list_of_node_dicts = list()
for node in V:
for node in nodes:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["doi"] = node.doi_url
new_dict["group"] = node.group
if (node.group == 0):
new_dict["group"] = "Input"
elif (node.group > 0):
new_dict["group"] = "Citedby"
else:
new_dict["group"] = "Reference"
new_dict["depth"] = node.group
new_dict["citations"] = len(node.citations)
list_of_node_dicts.append(new_dict)
return list_of_node_dicts
# creates a list that contains a disctionary for each edge
# the dictionaries contain the source as keys and the target as values
def format_edges(E):
def format_edges(edges):
'''
:param edges: list of links to export to json
:type edges: List[String,String]
function to format links, append to list and return list to output_to_json
'''
list_of_edge_dicts = list()
for edge in E:
for edge in edges:
new_dict_2 = dict()
new_dict_2["source"] = edge[0]
new_dict_2["target"] = edge[1]
list_of_edge_dicts.append(new_dict_2)
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E):
def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
:param edges: list of links to export to json
:type edges: List[String,String]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to export nodes and links as a dictionary to json file
'''
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
list_of_node_dicts = format_nodes(nodes)
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
with open('json_text.json','w') as outfile:
if (test_var and json_file == 'json_text.json'):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open(json_file,'w') as outfile:
json.dump(dict_of_all, outfile)
#knoten = ["doi1", "doi2", "doi3"]
#kanten = [[1,2],[3,4],[5,6]]
#output_to_json(knoten,kanten)
# -*- coding: utf-8 -*-
"""
Functions to generate a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
'''
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
print(pub)
continue
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
for node in nodes: #iterates over every node in the set of nodes
if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
not_in_nodes = False #false --> node will not be created
break
if (not_in_nodes): #there is no node with this doi in the set
nodes.append(pub) #appends Publication Object
pub.group = 0
else:
doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
references_pub_obj_list.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
citations_pub_obj_list.append(citation)
return(references_pub_obj_list, citations_pub_obj_list)
def complete_inner_edges():
'''
completes inner edges between nodes of group height and depth
'''
for node in nodes:
if (node.group < 0):
for citation in node.citations:
for pub in nodes:
if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)):
edges.append([citation.doi_url, node.doi_url])
if (node.group > 0):
for reference in node.references:
for pub in nodes:
if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)):
edges.append([node.doi_url,reference.doi_url])
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_height: maximum height to search for citations
:type search_height: int
:param search_depth: maximum depth to search for references
:type search_depth: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
'''
# ERROR-Handling doi_array = NULL
if (len(doi_input_list) == 0):
print("Error, no input data")
# ERROR- if a negative number is entered for height
if (search_height < 0):
print("Error, search_height of search must be positive")
# ERROR- if a negative number is entered for depth
if (search_depth < 0):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
# calls a skript to save nodes and edges of graph in .json file
#output_to_json(nodes, edges, test_var)
return(nodes,edges)
Dieser Ordner ist nur für uns intern, um Testläufe mir echten DOIs zu starten.
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to test and print the nodes and edges sets
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
sys.path.append("../../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import update_graph
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print("\nKanten:\n")
for edge in edges:
print(edge,"\n")
print(len(nodes))
print(len(edges))
print(" ")
def print_extended_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print(node.doi_url)
for reference in node.references:
print(reference.doi_url)
for citation in node.citations:
print(citation.doi_url)
print("\nKanten:\n")
for edge in edges:
print(edge,"\n")
print(len(nodes))
print(len(edges))
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random dois
def try_known_publications():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
#arr.append('https://doi.org/10.1021/ci700007b')
#doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675')
#url = sys.argv[1]
#arr.append[url]
nodes, edges = init_graph_construction(doi_list,2,2)
print_graph(nodes, edges)
return(nodes, edges)
def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = init_graph_construction(doi_list,1,1)
#print_simple(nodes, edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
print_extended_graph(nodes,edges)
#nodes, edges = try_known_publications()
#nodes_new, edges_new = input_from_json("json_text.json")
#print_graph(nodes_new, edges_new)
try_delete_nodes()
#try_import()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment