From 6011746eeab0c9919f29f6a8eb9e6a4f578c0c77 Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Fri, 10 Dec 2021 03:20:41 +0100 Subject: [PATCH] various changes and new implementations -MS --- verarbeitung/Kanten_Vergleich.py | 4 +- verarbeitung/Processing.py | 142 +++++++----------- verarbeitung/Processing_unittest.py | 72 +++++++-- .../__pycache__/Processing.cpython-39.pyc | Bin 7151 -> 7262 bytes .../__pycache__/input_test.cpython-39.pyc | Bin 3865 -> 7228 bytes .../__pycache__/json_demo.cpython-39.pyc | Bin 1162 -> 1256 bytes verarbeitung/get_pub_from_input.py | 41 +++++ verarbeitung/import_from_json.py | 71 +++------ verarbeitung/input_test.py | 137 +++++++++-------- verarbeitung/json_demo.py | 12 +- verarbeitung/json_text.json | 2 +- verarbeitung/json_with_citations_v2.json | 1 - verarbeitung/print_graph_test.py | 22 ++- verarbeitung/test_output.json | 1 + verarbeitung/update_graph.py | 108 +++++++------ verarbeitung/update_graph_del.py | 121 +++++++++++++++ 16 files changed, 467 insertions(+), 267 deletions(-) create mode 100644 verarbeitung/get_pub_from_input.py delete mode 100644 verarbeitung/json_with_citations_v2.json create mode 100644 verarbeitung/test_output.json create mode 100644 verarbeitung/update_graph_del.py diff --git a/verarbeitung/Kanten_Vergleich.py b/verarbeitung/Kanten_Vergleich.py index 0b3225d..da43e21 100644 --- a/verarbeitung/Kanten_Vergleich.py +++ b/verarbeitung/Kanten_Vergleich.py @@ -9,7 +9,7 @@ def back_to_valid_edges(links_from_json, processed_input_list): function that deletes edges, if one ore two including nodes are deleted nodes ''' - list_of_valid_edges = links_from_json + list_of_valid_edges = links_from_json.copy() #iterates over all edges from old graph @@ -23,6 +23,8 @@ def back_to_valid_edges(links_from_json, processed_input_list): # increases counter if adjacent node was found if (adj_node == pub.doi_url): found_adj_nodes += 1 + if (found_adj_nodes == 2): + break #removes the edge if less than 2 adjacent nodes found if (found_adj_nodes < 2): diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index a3e245a..4beb8e8 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -13,22 +13,21 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" -from bs4 import BeautifulSoup as bs -import requests as req + import sys from pathlib import Path -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from os import error sys.path.append("../") -from input.interface import InputInterface as Input -#import input -from input_test import input_test_func + +from input.publication import Publication +from get_pub_from_input import get_pub from json_demo import output_to_json def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' :param doi_input_list: input list of doi from UI - :type doi_input_list: list of strings + :type doi_input_list: List[String] :param search_depth_max: maximum depth to search for references :type search_depth_max: int @@ -38,24 +37,19 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds input dois to nodes and retrieves citations and references for input publications ''' + # saves found citations and references in lists references_pub_obj_list = [] citations_pub_obj_list = [] for pub_doi in doi_input_list: #iterates over every incoming doi - if(test_var): #checks that it is a test and chooses test-input function - pub = input_test_func(pub_doi) #creates an object of class Publication - else: #checks that it isnt a test and chooses standart-input function - #print(pub_doi) - inter = Input() - try: - pub = inter.get_publication(pub_doi) #creates an object of class Publication - except ValueError: - continue - except IndexError: - continue - + pub = get_pub(pub_doi, test_var) + if (type(pub) != Publication): + print(pub) + continue # checks if publication already exists in nodes not_in_nodes = True #boolean value to check if a node already exists in the set of nodes @@ -81,34 +75,34 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t return(references_pub_obj_list, citations_pub_obj_list) - + def complete_inner_edges(test_var): ''' :param test_var: variable to differenciate between test and url call :type test_var: boolean + + completes inner edges between nodes of group height and depth ''' for node in nodes: if (node.group == "depth"): for citation in node.citations: for cit in nodes: - if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): - edges.append([citation.doi_url, node.doi_url]) + if (citation == cit.doi_url and [citation, node.doi_url] not in edges): + edges.append([citation, node.doi_url]) if (node.group == "height"): for reference in node.references: for ref in nodes: - if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): - edges.append([node.doi_url,reference.doi_url]) - + if (reference == ref.doi_url and [node.doi_url, reference] not in edges): + edges.append([node.doi_url,reference]) -# adds a node for every publication unknown -# adds edges for references between publications + def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): ''' :param pub: publication which references will be added - :type pub: Class Publication + :type pub: Publication :param search_depth: current depth to search for references :type search_depth: int @@ -118,29 +112,24 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds a node for every referenced publication unknown + adds edges to added references ''' references_pub_obj_list = [] for reference in pub.references: #iterates over the references of the considered paper not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes - for node in nodes: #iterates over all nodes in set of nodes - if (reference.doi_url == node.doi_url): #determines that the node already exists + for node in nodes: #iterates over all nodes in set of nodes # + if (reference == node.doi_url): #determines that the node already exists not_in_nodes = False #boolean false --> node will not be created break if (not_in_nodes): #checks that there is no node with this doi if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit - if (test_var): #determines that it is a test and chooses the test-input function - reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class - else: #determines that it isnt a test and chooses the standart function - #reference_pub_obj = Input(reference.doi_url) - inter = Input() - try: - reference_pub_obj = inter.get_publication(reference.doi_url) - except ValueError: - continue - # nur aus Testzwecken, da noch was bei Input falsch ist - except IndexError: - continue + reference_pub_obj = get_pub(reference, test_var) + if (type(reference_pub_obj) != Publication): + print(pub) + continue reference_pub_obj.group = "depth" nodes.append(reference_pub_obj) # appends the object to the set of nodes @@ -148,19 +137,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references # adds edge only if citation already exists - elif [pub.doi_url,reference.doi_url] not in edges: - edges.append([pub.doi_url,reference.doi_url]) + elif [pub.doi_url,reference] not in edges: + edges.append([pub.doi_url,reference]) return references_pub_obj_list -# recursive function to implement height-first-search on references -# references_pub_obj_list: input list of references as publication objects -# search_depth: current search_depth of height-first-search -# search_depth_max: maximal search_depth for dfs def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): ''' :param references_pub_obj_list: list of publications which references will be added - :type references_pub_obj_list: list of objects of type Class Publications + :type references_pub_obj_list: List[Publication] :param search_depth: current depth to search for references :type search_depth: int @@ -170,6 +155,8 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m :param test_var: variable to differenciate between test and url call :type test_var: boolean + + recursive function to implement height-first-search on references ''' # adds next level to nodes/edges @@ -180,14 +167,12 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m if (search_depth < search_depth_max): process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) - - def create_graph_structure_citations(pub, search_height, search_height_max, test_var): ''' :param pub: publication which citations will be added - :type pub: Class Publication + :type pub: Publication :param search_height: current height to search for citations :type search_height_max: int @@ -197,28 +182,24 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds a node for every citing publication unknown + adds edges to added citations ''' citations_pub_obj_list = [] for citation in pub.citations: not_in_nodes = True for node in nodes: # checks every citation for duplication - if (citation.doi_url == node.doi_url): + if (citation == node.doi_url): not_in_nodes = False break if (not_in_nodes): if (search_height < search_height_max): #checks if its a test and chooses input function accordingly - if (test_var): - citation_pub_obj = input_test_func(citation.doi_url) - else: - #citation_pub_obj = Input(citation.doi_url) - inter = Input() - try: - citation_pub_obj = inter.get_publication(citation.doi_url) - except ValueError: - continue - except IndexError: - continue + citation_pub_obj = get_pub(citation, test_var) + if (type(citation_pub_obj) != Publication): + print(pub) + continue citation_pub_obj.group = "height" nodes.append(citation_pub_obj) @@ -226,20 +207,15 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test citations_pub_obj_list.append(citation_pub_obj) # adds only edge if citation already exists - elif [citation.doi_url,pub.doi_url] not in edges: - edges.append([citation.doi_url,pub.doi_url]) + elif [citation,pub.doi_url] not in edges: + edges.append([citation,pub.doi_url]) return citations_pub_obj_list - -# recursive function to implement height-first-search on citations -# citations_pub_obj_list: input list of citations as publication objects -# search_height: current search_height of height-first-search -# search_height_max: maximal search_height for dfs def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): ''' :param citations_pub_obj_list: list of publications which citations will be added - :type citations_pub_obj_list: list of objects of type Class Publications + :type citations_pub_obj_list: List[Publication] :param search_height: current height to search for citations :type search_height_max: int @@ -249,6 +225,8 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m :param test_var: variable to differenciate between test and url call :type test_var: boolean + + recursive function to implement depth-first-search on citations ''' # adds next level to nodes/edges @@ -261,8 +239,6 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m - - def process_main(doi_input_list, search_height, search_depth, test_var = False): ''' :param doi_input_list: input list of doi from UI @@ -276,6 +252,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): :param test_var: variable to differenciate between test and url call :type test_var: boolean + + main function to start graph generation ''' # ERROR-Handling doi_array = NULL @@ -291,10 +269,10 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): print("Error, search_depth of search must be positive") - + # creates empty lists to save nodes and edges global nodes, edges - nodes = [] # create empty array for the nodes - edges = [] # create empty array for the edges + nodes = [] + edges = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -307,14 +285,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): complete_inner_edges(test_var) # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges) - - - # only for unit tests - if (test_var == True): - doi_nodes_list = [] - for node in nodes: - doi_nodes_list.append(node.doi_url) - return(doi_nodes_list, edges) + output_to_json(nodes,edges, test_var) return(nodes,edges) diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py index 772d572..f0a0a87 100644 --- a/verarbeitung/Processing_unittest.py +++ b/verarbeitung/Processing_unittest.py @@ -1,14 +1,22 @@ import unittest + from Processing import process_main +from import_from_json import input_from_json +from update_graph import check_graph_updates class ProcessingTest(unittest.TestCase): + maxDiff = None + + def testCycle(self): nodes, edges = process_main(['doiz1'],1,1,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) nodes, edges = process_main(['doiz1'],2,2,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) #def testBigCycle(self): @@ -19,48 +27,88 @@ class ProcessingTest(unittest.TestCase): def testEmptyDepthHeight(self): nodes, edges = process_main(['doi1'],0,0,True) - self.assertCountEqual(nodes,['doi1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi1']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2']) self.assertCountEqual(edges, [['doi1', 'doi2']]) nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) def testInnerEdges(self): nodes, edges = process_main(['doi_ie1'],1,1,True) - self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) def testRightHeight(self): nodes, edges = process_main(['doi_h01'],1,0,True) - self.assertCountEqual(nodes,['doi_h01']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h01']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi_h02'],1,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) self.assertCountEqual(edges, [['doi_h1','doi_h02']]) nodes, edges = process_main(['doi_h02'],2,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) def testRightDepth(self): nodes, edges = process_main(['doi_d01'],0,1,True) - self.assertCountEqual(nodes,['doi_d01']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d01']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi_d02'],0,1,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) self.assertCountEqual(edges, [['doi_d02','doi_d1']]) nodes, edges = process_main(['doi_d02'],0,2,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) + def test_import_from_json(self): + nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) + nodes_new, edges_new = input_from_json('test_output.json') + self.assertCountEqual(nodes_old,nodes_new) + self.assertCountEqual(edges_old, edges_new) + + def test_deleted_input_dois(self): + nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True) + nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True) + nodes_new_both, edges_new_both = input_from_json('test_output.json') + nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True) + self.assertCountEqual(nodes_old_single,nodes_new_single) + self.assertCountEqual(edges_old_single, edges_new_single) + + + + +def keep_only_dois(nodes): + ''' + :param nodes: input list of nodes of type Publication + :type nodes: List[Publication] + + gets nodes of type pub and return only their doi + ''' + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list + + if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc index f16ff9fc08e6d1b3ae555bd65772c0b66c866779..a86e804167e4c2fdf2e9a1f4b354ef6506381740 100644 GIT binary patch literal 7262 zcmeHM&2J<}74OfPp7GeRy<YEEfKG@4Lu_w|Ac|H9?P|kDMA?;vHwX=mn)XzCrpG<q z<LVllwR$9^jSyEPE=VXW9FUMW<q&a1${!J_ubgs4;=+mFtNxg2d%PseT3nb>b=T*s z?yC2DzgMr_#l@zA-)}$p*u8yIQT|Sa*`JQWZKU`kT$s{PnA%a9*3p>W(V4-_z~~sb z*8+1`>(o>Q`Fc<vS{+MOJY&#!(qz^jR9W8$7KW`(OXi!w;&7?6B=ZZw3&Z8kGV(39 z_@UBSVN2`<+*jE$TfzMjTV<DUUt??RGVYhzI=h1VI=jKHvFjhIoh!`TQQ9w_>^AR> z!>;h7Ft$Zx_q@>KuJCNv?s0e2x4AdsUhIX!4|{gk7jD*d&lAU<7uv&dApB9_*>~>l z+<f4QG5732H0Gfj#LedTK9=C^C=6Zuy-z=35V2w3349dqN5O#<x?%ss4<GsVLzf!g z9W!6_?RR+e>F>P_`@S3CQFglh=pYKB<M_ZodAaMyn<E|#ysmiT^*6qK`(&@bH4gpF zzB}9-^WGNo(&jdg*jT>Q=Xe0+L0e6YZCCVhH$BcHexh$}-9%|&d%PF;UHMWd)_b0C zM&mtXm!h#4jfEp3XAnnW*C0XpuZ4RJ=`K?IS6q6^#6U6=<!$AIKTc{wNotAySew*^ zn$#bwLOam;>q4izsvNeMI<bU-zE>xWq&_f{#=tz#_*+byKnD(gbof?c@xCyg)RMZ? z1dMd*VKY&lSV@g)1jaEVQ85e2S(-Ex>fNNa!(WJ|Y}Lxvg~UYbf^7Zhu$35zxvydW zP4|wbC`~*6-5R;v9oj7N9Y3VE5Byl%${J;bO%-;uPhISN9u4jHe>fd29*w;DgYEY) z?x#C~Lj^u)PDf<(#h%N%eTR7?(RYUK&*hA``@=CFhH6PQvtIUo#G#b?9{0ko7f(gW z$>&P4{ZO1YXW#RCeNjwW%$e1mnzU5@`Eyn!&7D(tv2c!Dp08-Ot|<HN9uS0AWd45k zVm?8zz!(j)-H_R19@t$s2u?k~T(S0E6a}6ePG8(*EXHGDTWOCZ+QW!>u@ucc?2UJX zQu&&BeX-JV;j`gDNCBE`10LQ>P1(euO-g%#6FO3ZwuGCMHAmQak(zSCv`z|!mq}~x zXyk<~HG4c7kJ1|VhS8CizSQL&7Qm4f$cY69mSI?&>9(i#IbF)9Z@1E=svf7yr?n$p z$!n&2ms-VYrdOuZkxg)-y@A9-x?WCFsqfUO7v@ZLWYSg`382WyUfG~q+9*Y)Rw3li z3jYe0=wCbCTf6Va9*=jW)!Ti?7rS@;olV%Xfp;KwyZ&Zw_zuM85sWn7^L#N5d%Kka zx&sz5aJDsilwS5jU-)j|pLp4W$Tzrw`QmTkqO2OKp;>BEZK;;3=?1PX)lh3lP1Vwx z#;V#vTGIH}(7UVBnEXduTt|{dkf=_O@%zn0P1J$PRG71E<TTX1IniX>#zaqGk_I~3 zY~<8|-qVC}q;eak&Ma!Ak+Ki3N;w+(!pse7QjU?!G0A{QIen6n7`YrWnPyQV<@67K zc4#mirp`F|+Y~pM-6^(O;_-Ro@C^RC(GWz5IRQeDtnINL@&(8<D8jNzDA>6LBGgL* zC?7pFWj?KCtq<CDzJP`2OSq(sLZM<#`~;#S`FxS`%~J2;c8y=AG1uwxA}$>r139Tg zs-v4!#_aM{s#_}_8@i0AcBIs8w8ZPUC>ALXR7Jh2u9Z3dDq1Rx*+2yu0;w#2L53JJ zQ3j+kznhp$1!t<ruTScU2}Y!CUk4k)J38DtTu*BJSKvDBK;eG^i<**K!Jvr+ZZ*NJ z9Z^dZ$*oPMCzb==1yf2dN;HsD23lebw4O#v0jA8`4#B8sF`+&UsA040iyEmuU{z8b z_s4Ts5+JV_a%s*{S(rxqxbJt#fhsNNu^$B3a2uA`V^d|6%r#e<y*(8-HRL%<&swwc zLAqlOVuXc%R@N*f&1JCJISbOX3)Xz*IZH`%=d7SK<4oIr-e@i4o<Xf-P1h!PN(KZ+ z@<-E6W%lfjkHdp7Iu6UNm4h>!gWct}Plp!N$!*}F-Q=&}mf@N3D=Cx{ZX|<pI&(;% zOGNXpqu6fntGK1D*<In6sH2290aOB;U!xo$zOz!QW5Hdg0MD1vZ67KACN4w}O^^Yh zJfnq@EVLTxKo9y7u5wfb@k9@lB;wyhM*7AZ6PR!qZ(x~xQ+Ui7stx$b$TRhdlC=Q$ z1|ttucMs-ZuY8NRc#YguE&S7&U5URO5YH8z`LHvHe7Yk$1rl`YIl_`O^(hXm1mP4= z==>bvdEVS}AxfPu3Z^9c3{<&?WsdzL05w0-l27du>ki2#W~{ln@AFt}W|*;~@H8Ml zCnx7YTazlCuH&dl{?v@`iEI}vD$(@~^qoN*!Lf5a^p2<9EBbjFYi;!ZHBxNhqO4l# zl5VM-YAaa!U$ZBelX!9td*Z}P^)u|5*HqYZdk%sD&<a9mM=Q28W6#p5u`h%?GnFVF zq(Euzd{xh!<&%`w{nqS}GaFL^?<oka_AJnOL37SU=0&0M3?k=~CsvYzNi!ze{vyO# z-GGPp_)EYEc=Ki4o{2XXbKcCBp0AM{Nt{<GN51h4aV{6r7l1yE<rVZN&-TmlCXK3K z{rU0cJk%90ahf*ExX#R`6ueoEc`5|UUd8&9u_GD!`69HL?Zf}Uh``oG#DD%1Jxb5v z3_F(lD$O1zr;x~z^ftX*kYm{?=SB%23NXxoN^;{h;>MzPMN>}0={1c045><uRD16X zUdof@FVPJD5+^|!fLIfVyVI$VJWHG%UqM-yWlfg9gR(*8p5kgqI!=`2f6A!pt~P;x zD$K-W<fXz}Mbx#HAbwwXsG^Qqj24#Aa4OXw>&PP~Bday{pKv6mTyaih_1UAUg{O<~ z7_2FR?ch)Os~!)hp!)BbMndx`%qBHRb}Ht2<|AQd1?J6lhS4s9&CW2{(DlQLTNw+N zi~Ji&{?P>AP%@Cf-BC`~en1}@HgIx;yDo#Su5eFo=Cu{-p@4esj}FI(^&&(tiej+j zk$19H9gr>P`TZq8+l^EYyf8a6$_No17i9>K6N03NDT6eeB6x6SSE*J4R(iC=f}Kk< zC!r$t*;y^X8<+9WQ^-wQg;k(6`v)Y&M5W3qLX7ZkWik<V)K_tGgukn|G`@-UwtBC9 zHLW{{pmrT6U2q(CEcy|TQD``h2WbJaO$PyCd}pF+J$@9Y^$|Yo1pXe^AtcqJnA4}4 z)S&7t`n4k0e`|)?<dM3wnt$za=tD;K!9$+9I}Q5cgU=DeNUl$|aIZqDJQHNAM+Puk yOnUg4HK1TFm!UK3dqaGN3cT-=KE&jv81&+*fuLE7{+hh%)_1LS>ow~JrGEqA-^}d* literal 7151 zcmeHMPi))P8RtJy6fMh+<2ZlUv0b-8o$aJSP+&nZq)nGDLlXpWnyrKfjTR}1GDS*{ zPf2W0zySBy^R9C*0k%UAJM6OEy5kNz?8L(^D+ctm+Xn2k-}fj|k|U>EW*-Jj!h3vw zzehfPzwh_GkIv$vDdG3*r@waIdP|c2LY3*Cipnjd_!%-+vL#oxWmmBkSG84Fb4!8N zE7`g%p{xgG+Yr}^UBR^+n7sviL6*Eqw|cnf8lTEyTqUUWmh5FwHiI+06?;XL7lO0B zReKfXs=N5HWS?_u?h>wR?y`FZ*Yoa*dluIV?y7qZ*Ng6&dmh(I?q&C)d+B4@zU=B- zQvJ&Qws~h5Hn|^#vBe{+?S&q5xMw+5n>qcC#k@ZAVlU)=*tVKJce0^79)IF_q177( z-0ugT_0H|BtM?r~WS-THhAeb~*ffXtuodq`Vdz*NeEu63BG#>U0w2|TQLrn7u3JC$ z!#&@6=uqbmhpx{%*842_{1@K3b<YWKD_hM@v>OG{lX%zPf34}q8+{gay(YhY?fRRy z_IEm)!_eR8IK7=AYj3(<YCd3*I}{J~C2l~f%jx2~o-^crYZz=r!~T7=Xb&9T*;h9= zucA`B8}^6%Zpb~>a+)4mmV9bya4+VK7IugRMMEyyqDD84!lp*D$G=5fD@eDI;=dwm zOCt@blt}MMKl$TGPgM2_mlFLz9+kseTt1LS#(pg^4wXdz6m36}!Z$RDt9w6?CHj9P z4XPhW;Tqbsz3rJc<r-9LRN*B^zA!SAa#v5xuD+|VmaB{^vNZVFpp{hE&w1&voRozs zsY$tEgk~ZgR%kt0;+W@ZiJYhh8syYQ3yFr&3)DCGj4y~@rRc3DB&#ZV|1hW~T2g8$ zP@t0Yg)B*?HU8b~JIv`>Zsa$z{R{k<-^@BigGCKi)S@9)i$y)_!@HB&e6R1#pIpp~ zIfEKOw~j0sdk$-M8m`ypokq`jDi+1n?+tN3)Jp1|jj~!1gJQQl=7mi!o`@3b&y{5P zAwO=-j_0>KJYO_lGiyDvXaS)4YtBlVyQWac#||5BYPJig`_2wfh(~n&R`y^%!8pe$ zO|zWPwT3LPnobZLxq-Q2t(_<eJSP-mAKpnz;uSIl>Rqog9hbCpmtc}EwLRYG4|f8; zDGWqvK5~Mg_kG49mYR1%*L#|kOJU@Caatw~!L6ja)9-tso0i%PxJ-5C^`ggK`f8JT zxc!E(K@Gffpc$s6F?ngJb<B`ri|dPNZB`M}<)iO5Ju_~Z?0jnEdy$@>EJqL5h<3US z(%*EgSY)Q%F4HKinW`wzcEO&c)iB~fLnC`~jV@`WkenL1tY1pZ!j}GXt9f(#!`Nf- zwlII&@B4iFw!gIjqt^9y`F7La7+b(yzVR5=n(cT#ABOGinF@wOw_|{1v%i;~^FyEe zPT=o**^P(?xq|iL8_1+3P1Y1c)?`!FWks&ZRoOr?HBDSKS*JePP)uz_t|HYG_8R6j zf&Su08c{|PM(|fCa58>}BRP?~vMa+tJwi!A+uI{W^j#mRiPBXQwX34n5@orowiT{D zmYK!1!_uTp7$0kJK}b_D9_Fj73u)R|S}D;Wtt6y{=vApcmKF+WMVpWo4q5}vRbj}q z{k;h!GAowrfq>dcLu?vaX-OEURL`{Ve!a|yPuMcDw36#b%xY9Gd2ZW_*%Fn_LXG0O z&d$@EuTl1OWVVWl>=hcOVwljaEPIvOR+~`|ybX-`At=8gq&`AVyoOBD4ZLl<Z~2P6 zR+LyBJu|dN9#KJiU!X#?mq=ZD?|&MVTzRB(8RZM3GTw>;s-pL{2AYB&G`KccOLX=- zkdU$~u|I<%Kj8HC6p*A5zRNX`WMzLjsXz<R3Ys*=G^y`BEhy0((`0$?lbJT6Ni{JV z2A4s4!lU9QN)qnF=qhc<lO>SexNiV*L=V=1zL?NHvluPfgua-GM!MqsV-D#7@-s$R zm}WHQHqm<0@tfpO<-B7(@q+*s+Hzdib0^v-SZJ;^>vrJ8vGrgg^f*ou3RT=rbI3q- z{N;JDkTjQ*j#^bQI_JSt;@%l>=Dxs5gEW^r4c3Qu*f*f)^#%5ATr!{%Fa;kn0s|vd zum#Ghlo5;tUfH)$s+;T*E@^do*Vs84BLIAXN(5U*)>+`$K2s=7&Nu8bby`J6m}V=; z0;KplGBW8Vj5e9^%zTr1uL|?NOy*sIc~^9dfk9W7<yBeCpMf_SbV|aY<IE!oZ+3kI za|;g;uqFQxK5hnJjmrtju6$m~dH`|FB`;BRz6+;eqT!i+EgmY)pGuI@&BE840#E#( zq32l9g=}y+P7pD<q5^NvA?w91<^&MXkrRX$Y1|83O#!Cd(-hiyf-pO7?YU5+!KVc` zl06N%FUiTcUsr^JrfWF;k!LgI45HWs0Sf56fw5C?BCOe$Lhs3B_>8t4#n3v&A0ovD zGHKb6YpNkLYMnvG|03~$^oZi-5FZFG>zN|HqHTuw{{HgBSNJif$$Oaxm7_BUX4ZP* zXR+tb4zz9#$WvN#E*MV>h|?eh@4XHbfcL(M%ggiL;>@lw@-{Q#yGSMST&IX{Iait- zvvYaxEGGRH>EGi$I@Zkc9?hRY_sQ{|HHY`|XF5uI1vky%y<*M_!BdRNw`)wORw4T3 zY488YI<3eG`wsMv=#BI+J(ttl<Yi{;Ch)O=Ayb$U>~@jZEgwDe=8uxwCT9N)X_nlm z_0Ej`6Ze&ngx~iW3h?)&gX{>u$`RB)ka2XPy2M^bT@`gj)Ni1!QN1lW3X;lkAcQ|C zB8}V12>v3+an2>L5#A#rk$QsQc<wRsGFma48(iV)fr52ZmtvdGaJ1Ap1rL^SnACA> zD|b!7O`jw>S1B4vGelIf?EHQXPt68{V5?s~BAG`qzaknr!OVZp3WC$V$~6>&^0i+2 z<Rzrcbwi(HpVMHTY1R?J#{E?hJY2WJX#CDxhdcXM$E`DjLJob_fNv9sdE$88k7C5W z9(((nvlFuYIexsr<-__yss>(|mih=h^NjcC94EqGjIblTLlGNc6#dGMdNq{l23w^D z0b1dg(h1K#J9RpNJr*|FUdaI*7hy(QgfB(dP>ji9I~GY48J+pkYHluijvt!P`j~D` zTE+nnj;lx|#P-w`wMMS2YHD;0gj<UPAbS%d>++rYm9*SI+_KqdqzjD(K9qDK7Nb&W zG(5-+P&69|!{Yl6nr(F_zQI&lUD=6kgL#7?!ppIZIMX0i<2}T8`}nvL_&bcgk)#^c zvWQjoS!Gj1M(IlnBQqmT&9*7vD89Iev$S2IuPlh@_o8fHDc+cEGaYoZT_=;7y##ue wDSKy8RqT-Qb`M{J0`GgEVksscMxz~(wKByxaXiLBSR~cBX{;GHjLVe%1q>U3RsaA1 diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc index 604973a2f2c133e5085aba44dcabe0ac4fa9ac05..550ce300289531fa2018d232b0f3bbf9986d3cd0 100644 GIT binary patch literal 7228 zcmc&&d3+;f8J<bfHa&LR?%lF1*#p=B?QW74bwxxMMG@AO3Sub6Y@e^`>}HehWQw+S z1qH!d#0xJj0YyAfM8yld@B;6nc%$NaE1rnQ^L{g#d^5>k{`~!He>30rKJWKl&-=}n zWXj8y^;!IT;I>=JH*U17`$RDRvJ_6?)mYPIS+HPNt)iu@efBj8B(Ak>!IIVFfmAVd zpsUzLIfbrEtYSC1(ZjI^Y4mbTqYwQUsMuJB<rw5R#A~=>SJK5^3}FRUQeK7CSVPQ0 zpVstY7;CYPnh}iR2#$hVwCTrs9ElA$s**qk8?lMH&HhF005)R_j>a+69*eD7yPjHJ z)Gk97$KiOKK<yaDv5lJTymDH*9NUpc0XwLD7*52)B_b_yl>}`Ek;5bKNbIEcQ8)>Y zrd^iS37yptPR3(!3bDuHaoDAEm28J`Djts~5I+r1#FMng({;2JI2}*M8PuGK-FS-D z?9rN)*n_9yENUk3G@Pw9=V;9;oP($1Txy<yBF+<yj=ma`I3Ld>Uc$3b4)HbEiz$G( z!nEM1XkEIv78T6EA+itqQRR3b(5%A&)KDifi-T}EHUiBE8t^bj<N`b!&(WyVIEr~J z;JL&XaR|@jcwu011TMn!@d6?j;}X13G`KWiY&|Z+i|}G%FTqRkGLDx9R!4Hx%kc^# zuf*kem1uAUS)a#6x)U2HzZ$P0{#v{aujhCLuB84VT&DF$QGNs7Nc>HBGv31SDy{cp zr;8cNSK}Ju*Wx<7mE-lef%aE0e!933SK>yzjp*BP6W+n`W?ni2YBzE1JMk_e@5X!Z zUXC~Lx=QS+-HaRXKHNg&{rCWG<#@f;Uaz%Va5FxL+lYJ!AI3+dTIMA&kH+ozC_YBb z$MFe#lH(l#7sudEd<vf?@)>*<pEESa;x2q1Um)^Dd<kFXcsK50758Y5TX7G*g0B+& z8orKi=vd#p&c<C@o5daY7QRjGcko>-alDt;?YL9tc^o}`58o&91N;y_3bc2LcjkIL z?!}Ms6QcLwr}!DipW}YUy#se^{RxzRfy2aqiC^K@Ix@Y}at!z5H~1|zzr*kG2abQl z1ClYLi|295f5M-M{{?@=-?ZiZ+HxBnz~Av8HUGdv_@|-Sj(_3bm)o!^Ht%vJcqNS$ zIn}HBRKFTf%hYl;sD{+ATA@~|Rcf_bqt>c*YDA5yBh-3zq}rg4QW>>TZBm=n7Im~b zMjflRs;oLr9j{JMV`^M&Q`=Rpk}c+8Pgq-1wq+fbLv!~+a(sMSHnG&dXKruRnJRlu zy|$Fz<#>8F#dYCdlCyqZyLdIu<;}BBwa(vt$X>J-S;C05KjEf4+n@8E7!f?F?<sxn z()VtC@6q@4h;?!LP+~EWuxx9>I+#N5gq7`ExVkSB{Oz1AyX6C!*}1(X)SaC&$#9bK z>X~V$29cY2XPjE5>NLDkeY#Y3-ST{+<;9zyRV^o(3D0$EmA+8#DsRrMH8S3eUKM(r zs+Jp#%yiw&RGbS`O(JU>SBN>RZwZ+RH-}Sl<A;uQxLa;LyzM5n%cD8DVvtQP^#mm? zb?ID}l9X?<+(Bv|oS51<d3Hm&jmb01RZmTx=6I94oQbhLZhfEH?@dlQV~w#{Ul_YU zxn*~+a=f`(Wilc-wb?nZ<f(=?K0CiO*b+<AbG0eSuaV}>N~i4s`$XFv<~-7JsVHpu zSrjUkw3}@|^-w~$f9Ij(Vq!5Fw0~&PzQ*<fnW$`mq}~Ea$QDR~Tj1g@`4m1lbTGZh zy_oG@xHW7;&}>bZG+8E@`gCS0*pI?Ln^ezKlqbD%+)Q(;W_IaTwOr|rH$CmloIzLT z`E5VHz3=6=P>oD|?>;r<#k#N8wYGpO47t*CS9<OWr#B^C;cTh9JUgpuaAj{~Q%mV) zF2xkfX)N`ymeQPCb(e{uwB5fAiEWZgR&&xA;LS>>>@@!e>~Y&&L;3&EquW}<`VVBY znvghGO)FQ`rc{R}ofYc-M_p<K`)|ACt{{=wl)H*UF-3n3R|xOi4XU?L+1`z{+M1or z6}vshtEyaaV5(l@>9TjutGl^kdS9IfOSzgWcA@N1oo>wOeU&SwXqMOaLSUSa80SOd zyl+e(FwO@Cg@{2RG${B61OfvA>dr#0*rT&t$Sn*SX)ff(+kkwrH%OgU-I6=U-oPEF z+F!ouFCX=nkNeAy8~&KR_D5@Z1{|`|tJ9L{!myE;0Cd#`<%|76a@smA?)0obNKgxb z_b^PM6(%3|DH+GYNX|wWvLA#Y+zOK~_W6`DWFhXN(C)Pm^~x3nUJItzf=|uN_K+Hx znmsEC5T+-B??}VV^hD4s%xowBKF2bRIWp=LZPSrar@-CPtQp1${N#$u%~ny<*K7*s zJ-RFb!(?P1i3>&uT;yXeNM`6l+9qw~@|KHc7Njk3QHZ%H7%qfG?Sf#KML;b`O~8mu zBa8?TQY(Vbh=!YfM9|ctMXgL@jtpu=+jJDvfsjqkB5YC)so>nNL%=Y8z$QXAIg7e5 z*rYCSVX#S1Hv}P@oJCz2Y*H7vFxVtGAU&;CN;!6lMfhzA<yGbu3DKcumqTQkZd51^ zHOmYUBTSBmtPzxDM}p>yor30z@t}GBtd`vogy4c$P#(vvFu917CsC20g^ocBZ9xms zpj;3O8gtBAJ5)n7Rt*ue8d(r^Or$TY#))Rn+ByYkqm1h!LE0#bkOYwolAPj-w#~pq zB3r@XYm%@hR7PrWjDCRim>m$gy#_&}L<wTlpch1f2-0dhxTQ@gy4R$_PC?_u$+}3; zIB}Y)usewDE=i(87^1P-h?rqyL9{j^5z_J<^Cud29L--A#2m+nVFjHsjz14lX(Z$L z^B@bF{O4l_j4#K-_^6q`kAz`@1xEfpVuUfafauKHe7RH5C~5w>NYE&0dcl$rG}3g@ zwo~Y%@!AXug9WkL3T7DN5tY?Dj1A$-ood?=*%1D^NNrpY3%VnlbVuY1^*Y{+1Z1nX z_8?u6><%G`#_A+uhLi=-kVKj%T)t$Ua1x|_qx)z)h_TUw1+gIJnI>W4XPOReoM|G5 zXPQ<<9JbE1e2GVoJ1RdYOn&4%YUf9vTahS&82so3kw}6>9e3hH+7sm@a(GFS@qH{v zYag2@CRfKEo=!)ZtcrJA5VM-tQRbUOs$rlGG|5R7rHM$yaa%Q9U{+&(9!uCxPo<#o zFp;NHFNk&{U&t1gl8yPsQfjvB&A2P&F&WmcXvU3qTKa8~eubdl&gh^2uKv~8%U3>) zSx51wWjN)R{3gn8YYX%Ht8)#0dGsgzD(ci~$}K5W_>;>JA{rL#I+Yo(#Mdhv_%$~w zlRs0>KAB=nOBQ@B)T}4JMj|4aHnjYtHKeGnP#mt7-HIwz-16*9@LjVF%D1m-H{@dw zH{=E$jJkK?Yvk#PnaRG7L)@orP={5qta#GI4F!rB3e>@HM>L|GTCpSqid85FQ6y1r z?RK-a3jOCUZrGkE_q%=vX1vzYFSzu(t@KF;>X@slljZekV}>`r+DZ&1)63I?>EZSE ys6Cq4U~iC%_jMxkf1NQKwPjUC|Lav;Ihv4FO(~=Gf^13**{kd!dxbsl(0>4@w+>MN literal 3865 zcmcguS#ujj5T4!Dv65~1h~pzMS0D)jEUXo0fN(evu9y&WE)I*V9?NShk}|uZDlX+I z`62M=Dt^(v^5iGpdE)EY)q15A6qTZoYI^#Z?(P1zXJ(r-Gi8C_!5_ahU!N1=cNVh0 z1qkn91TUFFTo)wBc!=@9bXhXVf@DE5Amu0zDG$k{0u>($*CLxrm~&L7Da?7ArWvZV z4VtAnn#a6=ai(p%1)8TtItyuumfM1+6Mc~uXoXfGoul)tg^kzSv_?<Rlk`;Eq^Ic_ zx_}it*jAzo^ejC`7h$_ZmuU@p9&9Vq^Hil5=tbCGqIIf4|8ioRq8e>bogCO2^a`Hz zJZz@+o2HlPReFtHhx`V;2}`YQCb`Ve6?%)_MwEBxYFi}Q3cXA3(fiPTK%3BAOP(@I z*XTp~2-0=>82V2V{TzKlpVDWLKBpVdZzcM9+M+M$CZsRvRvLYQw&^SS8rpB@+f;jo zzN7ESh4u&9q1!21i*%cQBoF$Zs7br2{w(cMiwJs2d(=+#OVp-)@}WPV!!-J`EXbm? zWl5Ihl$@3`vLa{YoSc^n@{C-RXXTPyZZEhi)Y%r*JFRRVF^>+4dub7j6s|!6onnxw zJH>)eE}FVI*zNeOX6X02trB}BKjt~WCPuIWGBk$56GIc!7+Rp_Fb8T4^Pst5fy6+l zT3bNMcgta6Sa`r5`C(C806nB4EcURdEybY?O^L*|I5xJb<)}zK-y5h-ln?!|Bcth7 zuNx|VcM$ee5ET!41J!MIqPhNQ${saC8I_gXlS+15GKehjQK{vJx+KV8qkLQS2K{O& z$_28sr!17Ca!m3}3QUSjY$m)x)ilVlP!&vnZnv)Nd>u#??A&N}Lb-F@4|g{G?e&|g zcOVbLotD2ItoLJK{b#9~YFGN<pxfRV6Z~#}5PG2u!dm}cWP6_9^+V5Nkb@NvVHDE8 zZP;eni0RQ{<{i~WZd=DWj?r!2k)9WoJ?~DB1|8<7J@4+I*@-O*hfG|Ct|cb3tTXtD z7y&0>8(My>7P2=Juv-k|=*N88KNleUbN_G`C|-Y@|J&DdTu?Ju@wfKL3b}M{{XieN zDYi%Rql8bvRP!8|-*$#*n9f3XhJz$9vHVytm2Qoi^gqE#Epf8TOit6*8FSfd7?Wi$ zqzU|+y166IbCjfRmRsB}qaSagDU3~w;5(2|Tobpq?qkyr4Kh~6p{a7A5%ZwtD(fvx zbDHKgEofTQ)Ls=2?EB`>G|`&d;%<)HqFUNIL3nY(Pe0*wPWag;yn#=61D|lOo$v;( zno*_W2cg&7^O{OE?*+Q8t5#IRq2b|tiSk+>S&%1yi)K%EsJA!hww^`AfR6-$oj7g0 zW~htc$3Wm5QV?!IXbT1aKwP_TYCx{sw{T`!2_SPr;};`LWM~-_OM_w&gJKatd656r zFvQ)tyY|q)*;v?8tB9=5G2vID`0c7EK#nc-45p~i?Du7t6gMXhbz5VAYTX=8zaZW) zT+Kwqic^09BjDps7&$(1P-)H3mjAm_bqPtR%OKU9s$z0;IL!YceZHd{j7QUF3%-JS ze%&qT%cSmRZ<D%fC$~u*-$=MZ>Uy!GX(JMelQZVzq)txk<Rr@)W6K)pvPQg&r=EXQ zcZ)icqx#Wo=3+al*B((gZYfC*UIoq%vku~nf;i(roXH?gEfWNZ>LBn&CZ3OSOF9>3 zkLEMkumYhTQ915Zl9BedIh^K_fNA8$X=G<I3@14enbXWf2E<8Z$1TTb;VF$tug0T+ z8smZR5|TiTxaR$hv}R_rZ@^<kngk26FLS!Q{S9Cl<HhWRZ>9jYj(iz&V7-rMiEG`h zWRZrYJi-)wQRfRLc0-`V&6#imGN~KG&TnFW^fc6+xHTr+8W}gnM7t&65GqXyWOl4T zR;1vt5Esat28T|;VRphd0}fk9zDc^2f&*?09LQ;v;Nx_`XmBV62i&+@1{~NDx6#v5 zaA*P?$fr*mfkTb0>Z-mz^s%b=0@N3PJ{y%jVM_SW3;Mq70KXB7UKz0nb#(8rGYC*Y z%m5F++m*_b)W$B%2xAuHr~LMQ=<Q3Mfc3cSxHDOMeO#QdHbz#G{g9hF@r^~Cad9Xu z;+V`x<F<yrYcf}e{_fyUj>vvH{`$PCKPWe#sw`COiftEd+b*3mR*V&M%~<0By2LX6 HOBwDjI*G|J diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index 20afc4f92dda6846dc720c9f3eb15bed60c69002..3a90cf68398464b1132fc65f1a598bac313a51a3 100644 GIT binary patch delta 584 zcmah`!Aj&n5bf%8lF@Opqa!FHf`VXhSa&bG9=v(?A}Hn(q8)KGcE(OeUCA(qIV^Zk zBo`463Vwn3HSq^L`3+VlD&Fj(-|JM>dugiAhhB!9pwSp6xcWDbLDRl+p65J+$DM+S z;4t*8AXR&@^w_^JkcGV;r&0t_UWeJ5ja_~!qkIt-ng%*ZhENz$D5RN3@E1n&2lx-I zyiI#`OE=gW8u=4ls*mbp31j&yb^BBJy`x<|mhP?LUd5f<A}BH-Jwri82GC=QW`JyF zEHgN)kjzA;$W(4CgBzR;=;d_JQtS>ZnRQQrY!Xfw#=rn_bp4wo8@~vYvDHTse^6+C zClpN-UVI8s$H_Wzt+m1wF%J{Zj5^{VIcusgQGT4NZmNFnC$T8t<jm0rUxlYt%C)MI zv`8PPp+``#xEqRsl>)}o!>(&f2McmSS8Ju|aUi`q@9d!ct{+5^{Gl5r5j-1zxZ8=s zYP8jSwa}?9;;Y!#*7KN4#t{fuP{UZ(z+eR0bOhuq7F;H0kYr1PzFL|6;`{!1f=#~x D!@Gj$ delta 453 zcmYjMy-veG48FTul9tc_rAV#l09{Ze1T2W9Vqs^1sTER)npC7*lj?F2e~B16Ah95n z7hqtAfoEXi362+F;u-Lz1!3zx+n?>T{Z+m%+is;Yhd{dTukMZUXn(dO4c(F7Gu%X+ zMO@M=-{sOAMhOpH*3-Nw$Su5{@8XRHg+^%t<0PDMT}2x6LvG{@@8N3xjPGzQ-z1r3 zLXMXd*!dgTwyMf4U^)LL?UPlA`vl-u3~*Ha0lfGWK%fcAh`=eD;7k)F#b9ZwrNqM> zlv1#ygM{X39-WYfW~K}Jq^Bg+^A~O9_Gky;Lc4$-I!0~%Q0igi`LWbl5T3`^_Arh@ zNBEawOL-Dsw2kS`HJH!UijqcSKa{k{Q!g2fp=3d2>i|8=3;M!|29C=ZUsQBM(Poq^ ri9x{p>BY0N$F#n0LmFWNmu5PTc?*~}<!fO0-`s-z*FF2<gEoHv<xOQJ diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py new file mode 100644 index 0000000..a8636a4 --- /dev/null +++ b/verarbeitung/get_pub_from_input.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" +A function to return an object of Type Publication for a given doi + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +sys.path.append("../") + +from input.interface import InputInterface as Input +from input_test import input_test_func + + +def get_pub(pub_doi, test_var): + #checks if it's a test and chooses appropiate function + if(test_var): + pub = input_test_func(pub_doi) + + #checks that it isnt a test and chooses standart-input function + else: + inter = Input() + try: + pub = inter.get_publication(pub_doi) #creates an object of class Publication + except ValueError: + return(ValueError) + except IndexError: + return(IndexError) + except AttributeError: + return(AttributeError) + return(pub) \ No newline at end of file diff --git a/verarbeitung/import_from_json.py b/verarbeitung/import_from_json.py index 9fe099f..11e5d82 100644 --- a/verarbeitung/import_from_json.py +++ b/verarbeitung/import_from_json.py @@ -15,46 +15,19 @@ __status__ = "Production" import json -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') import sys sys.path.append("../") -from input.interface import InputInterface as Input - - -class Publication: - #def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - def __init__(self, doi_url, title, contributors, journal, publication_date, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - self.references = [] - self.citations = [] - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date + +from input.publication import Publication, Citation + def create_pubs_from_json(input_dict): ''' - :param input_dict: dictionary read from old graph Json File - :type json_file: dictionary + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + creates list of publication retrieved from old json file ''' #iterates over the list of nodes @@ -67,29 +40,24 @@ def create_pubs_from_json(input_dict): def add_ref_and_cit_to_pubs(input_dict): ''' - :param input_dict: dictionary read from old graph Json File - :type json_file: dictionary + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + adds references and citations to retrieved publication list ''' # iterates over the list of edges for edge in input_dict["links"]: for source in list_of_nodes_py: for target in list_of_nodes_py: + + # when correct dois found, adds then as references/citatons to publication list if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])): - new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date) + new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference") source.references.append(new_reference) - new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date) + new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation") target.citations.append(new_citation) - # adds reference to current node - # if (node.doi_url == edge["source"]): - # new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - # node.references.append(new_reference) - - # # adds citation to current node - # if (node.doi_url == edge["target"]): - # new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - # node.citations.append(new_citation) # adds edge to list list_of_edges_py.append([edge["source"],edge["target"]]) @@ -97,8 +65,10 @@ def add_ref_and_cit_to_pubs(input_dict): def input_from_json(json_file): ''' - :param json_file: Json-Datei for the old graph - :type json_file: Json File + :param json_file: Json-Datei for the old graph + :type json_file: String + + retrieves information from old json file to be reused for new graph construction ''' # creates global sets for nodes and edges @@ -117,5 +87,4 @@ def input_from_json(json_file): add_ref_and_cit_to_pubs(input_dict) - return(list_of_nodes_py, list_of_edges_py) - + return(list_of_nodes_py, list_of_edges_py) \ No newline at end of file diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py index dcd476c..7172d57 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/input_test.py @@ -1,51 +1,31 @@ import sys sys.path.append("../") -from input.publication import Publication - - -class Publication: - def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - if references is None: - self.references = [] - else: - self.references = ref(references) - if citations is None: - self.citations = [] - else: - self.citations = cit(citations) - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date +from input.publication import Publication, Citation + def input_test_func(pub_doi): + ''' + :param pub_doi: pub doi to find publication in list_of_arrays + :type pub_doi: String + + returns the publication class for given doi + ''' + for array in list_of_arrays: if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8]) + pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) return pub def cit(list_doi): + ''' + :param list_doi list of citation dois to get their Citation Class + :type list_doi: List[String] + + returns a list of citations objects for given doi list + ''' + cits = [] for doi_url in list_doi: for array in list_of_arrays: @@ -54,6 +34,13 @@ def cit(list_doi): return cits def ref(list_doi): + ''' + :param list_doi list of reference dois to get their Reference Class + :type list_doi: List[String] + + returns a list of reference objects for given doi list + ''' + refs = [] for doi_url in list_doi: for array in list_of_arrays: @@ -62,27 +49,57 @@ def ref(list_doi): return refs -beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], ''] -beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], ''] -beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], ''] - -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] - -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], ''] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] - -right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], ''] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], ''] -right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], ''] -right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], ''] -right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], ''] - -right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], ''] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], ''] -right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], ''] -right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], ''] -right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], ''] -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3] +beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] +beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] +beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] + +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] + +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] + +right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] +right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] +right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] +right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] + +right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] +right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] +right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] +right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] + +large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] +large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] +large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12'], []] +large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] +large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] +large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] +large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] +large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] +large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] +large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] +large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12']] + +large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] +large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] +large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] +large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] +large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i'], ['doi_lg_2_h21','doi_lg_2_h22']] +large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] +large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']] +large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] +large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, + right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, + large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, + large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23] diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index 893945e..1ad0002 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -12,11 +12,11 @@ def format_nodes(V): list_of_node_dicts = list() for node in V: new_dict = dict() + new_dict["doi"] = node.doi_url new_dict["name"] = node.title new_dict["author"] = node.contributors new_dict["year"] = node.publication_date new_dict["journal"] = node.journal - new_dict["doi"] = node.doi_url new_dict["group"] = node.group new_dict["citations"] = len(node.citations) list_of_node_dicts.append(new_dict) @@ -34,14 +34,18 @@ def format_edges(E): return list_of_edge_dicts # combine the lists of nodes and edges to a dictionary and saves it to a json file -def output_to_json(V,E): +def output_to_json(V,E, test_var): dict_of_all = dict() list_of_node_dicts = format_nodes(V) list_of_edge_dicts = format_edges(E) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts - with open('json_text.json','w') as outfile: - json.dump(dict_of_all, outfile) + if (test_var): + with open('test_output.json','w') as outfile: + json.dump(dict_of_all, outfile) + else: + with open('json_text.json','w') as outfile: + json.dump(dict_of_all, outfile) #knoten = ["doi1", "doi2", "doi3"] #kanten = [[1,2],[3,4],[5,6]] diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index fbdfcd7..de3ddf5 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input", "citations": 1}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height", "citations": 1}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height", "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]} \ No newline at end of file +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []} \ No newline at end of file diff --git a/verarbeitung/json_with_citations_v2.json b/verarbeitung/json_with_citations_v2.json deleted file mode 100644 index 60c6160..0000000 --- a/verarbeitung/json_with_citations_v2.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citations": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citations": 1}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citations": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citations": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citations": 12}, {"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input", "citations": 1}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth", "citations": 14}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth", "citations": 9}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth", "citations": 100}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth", "citations": 48}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth", "citations": 32}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth", "citations": 100}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth", "citations": 100}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth", "citations": 100}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth", "citations": 25}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth", "citations": 12}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth", "citations": 100}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth", "citations": 74}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth", "citations": 65}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth", "citations": 97}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth", "citations": 100}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth", "citations": 99}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth", "citations": 100}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height", "citations": 0}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "group": "height", "citations": 0}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "group": "height", "citations": 0}, {"name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "doi": "https://doi.org/10.1021/acs.jpca.1c06264", "group": "height", "citations": 0}, {"name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00226", "group": "height", "citations": 0}, {"name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "group": "height", "citations": 11}, {"name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "doi": "https://doi.org/10.1021/acs.est.9b06379", "group": "height", "citations": 100}, {"name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci049714+", "group": "depth", "citations": 99}, {"name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci3001277", "group": "depth", "citations": 100}, {"name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.5b00559", "group": "depth", "citations": 98}, {"name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci7004498", "group": "depth", "citations": 100}, {"name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020155c", "group": "depth", "citations": 100}, {"name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm9602928", "group": "depth", "citations": 100}, {"name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "doi": "https://doi.org/10.1021/ci025599w", "group": "depth", "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/print_graph_test.py b/verarbeitung/print_graph_test.py index 3209485..efb62ec 100644 --- a/verarbeitung/print_graph_test.py +++ b/verarbeitung/print_graph_test.py @@ -51,6 +51,14 @@ def print_extended_graph(nodes, edges): print(len(edges)) print(" ") +def print_simple(nodes, edges): + # for node in nodes: + # print(node) + # for edge in edges: + # print(edge) + print(len(nodes)) + print(len(edges)) + print(" ") # program test with some random dois def try_known_publications(): @@ -79,15 +87,15 @@ def try_known_publications(): def try_delete_nodes(): doi_list = [] doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') nodes, edges = process_main(doi_list,1,1) - print_graph(nodes, edges) + #print_simple(nodes, edges) - list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') - doi_list = [] - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) - print_graph(valid_nodes, valid_edges) + # list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') + # doi_list = [] + # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + # valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) + # print_simple(valid_nodes, valid_edges) def try_import(): nodes, edges = input_from_json('json_text.json') diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json new file mode 100644 index 0000000..b013fbf --- /dev/null +++ b/verarbeitung/test_output.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph.py b/verarbeitung/update_graph.py index 5711068..9853456 100644 --- a/verarbeitung/update_graph.py +++ b/verarbeitung/update_graph.py @@ -13,17 +13,27 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" + import sys from pathlib import Path -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from os import error sys.path.append("../") -from input.interface import InputInterface as Input -from input_test import input_test_func + +from input.publication import Publication +from get_pub_from_input import get_pub from Knoten_Vergleich import doi_listen_vergleichen -from Kanten_Vergleich import back_to_valid_edges +from update_graph_del import delete_nodes_and_edges def get_old_input_dois(old_obj_input_list): + ''' + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + function to return pub dois for old publications of group input retrieved from json file + ''' + + # new list to save doi_url for each old publication of group input old_input_dois = [] for pub in old_obj_input_list: if (pub.group == "input"): @@ -31,55 +41,65 @@ def get_old_input_dois(old_obj_input_list): return old_input_dois def get_new_input_dois(new_input, test_var): - doi_input_list = [] + ''' + :param new_input: input list of doi from UI + :type new_input: list of strings + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to return pub dois for input urls + ''' + + # new list to save doi_url for each new input url + new_input_dois = [] for new_node in new_input: - if(test_var): - pub = input_test_func(new_node) - else: - #print(pub_doi) - inter = Input() - try: - pub = inter.get_publication(new_node) - except ValueError: - continue - except IndexError: - continue - doi_input_list.append(pub.doi_url) - return doi_input_list - -def delete_ref_nodes_rec(pub):#, old_obj_list): - for reference in pub.references: - for ref_pub in processed_input_list: - if (ref_pub.doi_url == reference.doi_url): - delete_ref_nodes_rec(ref_pub) - if (pub.group != "input"): - processed_input_list.remove(pub) - -def delete_cit_nodes_rec(pub): - for citation in pub.citations: - for cit_pub in processed_input_list: - if (cit_pub.doi_url == citation.doi_url): - delete_cit_nodes_rec(cit_pub) - if (pub.group != "input"): - processed_input_list.remove(pub) + # retrieves information and adds to new list if successful + pub = get_pub(new_node, test_var) + if (type(pub) != Publication): + print(pub) + continue + + new_input_dois.append(pub.doi_url) + return(new_input_dois) def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False): + ''' + :param new_doi_input_list: input list of doi from UI + :type new_doi_input_list: list of strings + + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + :param old_edges_list: list of links between publications retrieved from old json file + :type old_edges_list: List[List[String,String]] + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges + ''' + + # one global list to save the process of removing unneeded publications and one to save valid edges global processed_input_list, valid_edges - processed_input_list = old_obj_input_list + processed_input_list = old_obj_input_list.copy() valid_edges = [] + # save the return values of global lists + processed_input_list_del = [] + valid_edges_del = [] + + # get dois from lists to compare for differences old_doi_input_list = get_old_input_dois(old_obj_input_list) new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) + + # retrieve which publications are already known, removed, inserted common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list) - if (len(inserted_nodes) == 0): - for del_node in deleted_nodes: - for pub in processed_input_list: - if (del_node == pub.doi_url): - delete_ref_nodes_rec(pub)#, processed_input_list) - delete_cit_nodes_rec(pub)#, processed_input_list) - processed_input_list.remove(pub) - valid_edges = back_to_valid_edges(old_edges_list, processed_input_list) + + # deletes publications and edges from node_list if publications can no longer be reached + if (len(deleted_nodes) > 0): + processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list) - return(processed_input_list, valid_edges) + return(processed_input_list_del, valid_edges_del) diff --git a/verarbeitung/update_graph_del.py b/verarbeitung/update_graph_del.py new file mode 100644 index 0000000..10ca42b --- /dev/null +++ b/verarbeitung/update_graph_del.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +Functions to remove publications/links from nodes/edges list, if they can no longer be reached + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +sys.path.append("../") + +from input.interface import InputInterface as Input +from Kanten_Vergleich import back_to_valid_edges + + +def delete_ref_nodes_rec(pub): + ''' + :param pub: pub go get deleted after recursive call + :type pub: Publication + + function that removes nodes of group "height", if they aren't reachable from input nodes + ''' + for reference in pub.references: + for ref_pub in processed_list: + if (ref_pub.doi_url == reference): + + # to find a cyclus and avoid recursion error + if (reference not in pub.citations): + delete_ref_nodes_rec(ref_pub) + + # removes publication from list after recursion and if it's not of group input + if (pub.group != "input"): + processed_list.remove(pub) + + +def delete_cit_nodes_rec(pub): + ''' + :param pub: publication to be removed after recursive call + :type pub: Publication + + function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes + ''' + for citation in pub.citations: + for cit_pub in processed_list: + if (cit_pub.doi_url == citation): + + # to find a cyclus and avoid recursion error + if (citation not in pub.references): + delete_cit_nodes_rec(cit_pub) + + # removes publication from list after recursion and if it's not of group input + if (pub.group != "input"): + processed_list.remove(pub) + + +def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list): + ''' + :param input_list: list of publications to get reduced + :type input_list: List[Publication] + + :param deleted_nodes: list of input dois which are not in new call + :type deleted_nodes: List[String] + + :param old_doi_input_list: list of input dois from old call + :type old_doi_input_list: List[String] + + :param old_edges_list: list of links between publications from old call + :type old_edges_list: List[List[String,String]] + + function to start recursive node removal for references and citations and to return edge list to valid state + ''' + + # global list to save the process of removing unneeded publications + global processed_list + processed_list = input_list.copy() + + for del_node in deleted_nodes: + for pub in processed_list: + if (del_node == pub.doi_url): + + # checks for every reference if it is citet more than once. If not it calls deletion function + for reference in pub.references: + only_reference = True + for ref_cit in processed_list: + if (reference == ref_cit.doi_url): + for citation in ref_cit.citations: + if ((citation in old_doi_input_list) and (citation != del_node)): + only_reference = False + break + if (only_reference == False): + break + if (only_reference): + delete_ref_nodes_rec(pub) + + # checks for every citation if it cites more than once. If not it calls deletion function + for citation in pub.citations: + only_reference = True + for cit_ref in processed_list: + if (citation == cit_ref.doi_url): + for reference in cit_ref.references: + if ((reference in old_doi_input_list) and (reference != del_node)): + only_reference = False + break + if (only_reference == False): + break + if (only_reference): + delete_cit_nodes_rec(pub) + + # removes publication of type input after start of recursive call to both directions + processed_list.remove(pub) + + valid_edges = back_to_valid_edges(old_edges_list, processed_list) + return(processed_list, valid_edges) \ No newline at end of file -- GitLab