Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.


Select target project
No results found


Select target project
  • baw8330/projekt-cis-biochemie-2021-22
  • bax5890/projekt-cis-biochemie-2021-22
2 results
Show changes
with 610 additions and 82 deletions
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
# Projekt CiS-Projekt 2021/22
Directory for functions to create the fundamental graph structure at first time call of programm.
## Files in directory
- Führt den grundlegendem Graphbauprozess aus. Die Input-DOIs werden
als Klassenobjekt zur Knotenmenge hinzugefügt und über einen rekursiven Aufruf
wird die angegene Zitierungstiefe in beide Richtungen zu den Kanten hinzugefügt.
- Die DOIs, die in den Zitierungen des Inputs zu finden sind, werden ebenfalls zu Knoten
und je nach angegebener Höhe oder Tiefe wird dies für weitere Tiefen erneut ausgeführt.
- Wandelt die berechnete Knoten- und Kantenmenge in eine Json Datei um.
## Authors
- Donna Löding
- Alina Molkentin
- Xinyi Tang
- Judith Große
- Malte Schokolowski
\ No newline at end of file
# -*- coding: utf-8 -*-
Functions to add citations recursivly for multiple ACS/Nature journals
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = ""
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def get_cit_type_list(pub, cit_type):
:param pub: Publication which citations will be added
:type pub: Publication
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
function to create nodes and edges and call create_graph_structure_citations
if (cit_type == "Citation"):
def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_citations
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
:param pub: publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
citations_pub_obj_list = []
for citation in get_cit_type_list(pub, cit_type):
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
not_in_nodes = False
if (not_in_nodes):
if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation.doi_url, test_var)
if (type(citation_pub_obj) != Publication):
if (cit_type == "Citation"): = search_depth + 1
else: = -(search_depth + 1)
# adds just the edge if citation already exists
if (cit_type == "Citation"):
if ([citation.doi_url,pub.doi_url] not in edges):
if ([pub.doi_url,citation.doi_url] not in edges):
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
# If the maximum depth has not yet been reached, calls function recursivly with increased depth
if (search_depth < search_depth_max):
process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of citations
global nodes, edges
nodes = input_nodes
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
#return(nodes, edges)
\ No newline at end of file
#!/usr/bin/env python3
import json
from input_fj import input
# -*- coding: utf-8 -*-
Functions that format the computed graph to match the interface to the output-part
Functions that format the computed graph to match the interface to the output-part and saves as a json file
# creates a list that contains a dictionary for each node
# the dictionaries store the values for the attributes
def format_nodes(V):
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = ""
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import json
def format_nodes(nodes):
:param nodes: list of publications to export to json
:type nodes: List[Publication]
creates a list that contains a dictionary for each node
list_of_node_dicts = list()
for node in V:
for node in nodes:
new_dict = dict()
new_dict["doi"] = node.doi_url
new_dict["name"] = node.title
new_dict["author"] = node.contributors
new_dict["year"] = node.publication_date
new_dict["journal"] = node.journal
new_dict["doi"] = node.doi_url
new_dict["group"] =
if ( == 0):
new_dict["group"] = "Input"
elif ( > 0):
new_dict["group"] = "Citedby"
new_dict["group"] = "Reference"
new_dict["depth"] =
new_dict["citations"] = len(node.citations)
return list_of_node_dicts
# creates a list that contains a disctionary for each edge
# the dictionaries contain the source as keys and the target as values
def format_edges(E):
def format_edges(edges):
:param edges: list of links to export to json
:type edges: List[String,String]
function to format links, append to list and return list to output_to_json
list_of_edge_dicts = list()
for edge in E:
for edge in edges:
new_dict_2 = dict()
new_dict_2["source"] = edge[0]
new_dict_2["target"] = edge[1]
return list_of_edge_dicts
# combine the lists of nodes and edges to a dictionary and saves it to a json file
def output_to_json(V,E):
def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False):
:param nodes: list of publications to export to json
:type nodes: List[Publication]
:param edges: list of links to export to json
:type edges: List[String,String]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to export nodes and links as a dictionary to json file
dict_of_all = dict()
list_of_node_dicts = format_nodes(V)
list_of_edge_dicts = format_edges(E)
list_of_node_dicts = format_nodes(nodes)
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
with open('json_text.json','w') as outfile:
json.dump(dict_of_all, outfile)
#knoten = ["doi1", "doi2", "doi3"]
#kanten = [[1,2],[3,4],[5,6]]
if (test_var and json_file == 'json_text.json'):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
with open(json_file,'w') as outfile:
json.dump(dict_of_all, outfile)
# -*- coding: utf-8 -*-
Functions to generate a graph representing citations between multiple ACS/Nature journals
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = ""
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
for node in nodes: #iterates over every node in the set of nodes
if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
not_in_nodes = False #false --> node will not be created
if (not_in_nodes): #there is no node with this doi in the set
nodes.append(pub) #appends Publication Object = 0
doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
return(references_pub_obj_list, citations_pub_obj_list)
def complete_inner_edges():
completes inner edges between nodes of group height and depth
for node in nodes:
if ( < 0):
for citation in node.citations:
for pub in nodes:
if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)):
edges.append([citation.doi_url, node.doi_url])
if ( > 0):
for reference in node.references:
for pub in nodes:
if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)):
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False):
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_height: maximum height to search for citations
:type search_height: int
:param search_depth: maximum depth to search for references
:type search_depth: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
# ERROR-Handling doi_array = NULL
if (len(doi_input_list) == 0):
print("Error, no input data")
# ERROR- if a negative number is entered for height
if (search_height < 0):
print("Error, search_height of search must be positive")
# ERROR- if a negative number is entered for depth
if (search_depth < 0):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
# calls a skript to save nodes and edges of graph in .json file
#output_to_json(nodes, edges, test_var)
Dieser Ordner ist nur für uns intern, um Testläufe mir echten DOIs zu starten.
\ No newline at end of file
# -*- coding: utf-8 -*-
Functions to test and print the nodes and edges sets
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = ""
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import update_graph
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
for node in nodes:
print(node.title, "\n")
for edge in edges:
print(" ")
def print_extended_graph(nodes, edges):
for node in nodes:
print(node.title, "\n")
for reference in node.references:
for citation in node.citations:
for edge in edges:
print(" ")
def print_simple(nodes, edges):
# for node in nodes:
# print(node)
# for edge in edges:
# print(edge)
print(" ")
# program test with some random dois
def try_known_publications():
doi_list = []
#url = sys.argv[1]
nodes, edges = init_graph_construction(doi_list,2,2)
print_graph(nodes, edges)
return(nodes, edges)
def try_delete_nodes():
doi_list = []
nodes, edges = init_graph_construction(doi_list,1,1)
#print_simple(nodes, edges)
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('')
# valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
#nodes, edges = try_known_publications()
#nodes_new, edges_new = input_from_json("json_text.json")
#print_graph(nodes_new, edges_new)
\ No newline at end of file
# -*- coding: utf-8 -*-
A function to return an object of Type Publication for a given doi
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = ""
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from input.interface import InputInterface as Input
from verarbeitung.test.input_test import input_test_func
def get_pub(pub_doi, test_var):
:param pub_doi: input doi to get Publication object for
:type pub_doi: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return an object of type Publication for given input doi depending on whether its a test or url doi
#checks if it's a test and chooses appropiate function
pub = input_test_func(pub_doi)
#checks that it isnt a test and chooses standart-input function
inter = Input()
pub = inter.get_publication(pub_doi) #creates an object of class Publication
except AttributeError:
pub = inter.get_publication(pub_doi)
except ValueError:
except IndexError:
\ No newline at end of file
class Publication:
def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
if references is None:
self.references = []
self.references = ref(references)
if citations is None:
self.citations = []
self.citations = cit(citations) = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
def input_test_func(pub_doi):
for array in list_of_arrays:
if pub_doi == array[0]:
pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7])
return pub
def cit(list_doi):
cits = []
for doi_url in list_doi:
for array in list_of_arrays:
if doi_url == array[0]:
cits.append(Citation(array[0], array[1], array[2], array[3], array[4]))
return cits
def ref(list_doi):
refs = []
for doi_url in list_doi:
for array in list_of_arrays:
if doi_url == array[0]:
refs.append(Citation(array[0], array[1], array[2], array[3], array[4]))
return refs
beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], '']
beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], '']
beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], '']
zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], '']
zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], '']
inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], '']
inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], '']
inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], '']
right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], '']
right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], '']
right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], '']
right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], '']
right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], '']
right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], '']
right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], '']
right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], '']
right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], '']
right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], '']
list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3]
This diff is collapsed.
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]}
\ No newline at end of file