From 396593b91d358c5a2640db392d002b220573a7c2 Mon Sep 17 00:00:00 2001
From: Malte Schokolowski <baw8441@uni-hamburg.de>
Date: Sun, 14 Nov 2021 17:22:05 +0100
Subject: [PATCH] updated processing_test_doi_ueberarbeitet

---
 .../Processing_test_doi_ueberarbeitet.py      |  37 +++++++++-----
 .../__pycache__/input_fj.cpython-38.pyc       | Bin 0 -> 4341 bytes
 .../__pycache__/json_demo.cpython-38.pyc      | Bin 0 -> 750 bytes
 {input => verarbeitung}/input_fj.py           |   0
 verarbeitung/json_demo.py                     |  46 ++++++++++--------
 verarbeitung/json_text.txt                    |   1 +
 verarbeitung/json_text_json.txt               |   0
 7 files changed, 52 insertions(+), 32 deletions(-)
 create mode 100644 verarbeitung/__pycache__/input_fj.cpython-38.pyc
 create mode 100644 verarbeitung/__pycache__/json_demo.cpython-38.pyc
 rename {input => verarbeitung}/input_fj.py (100%)
 mode change 100755 => 100644
 create mode 100644 verarbeitung/json_text.txt
 create mode 100644 verarbeitung/json_text_json.txt

diff --git a/verarbeitung/Processing_test_doi_ueberarbeitet.py b/verarbeitung/Processing_test_doi_ueberarbeitet.py
index ac6ce63..bfa533a 100644
--- a/verarbeitung/Processing_test_doi_ueberarbeitet.py
+++ b/verarbeitung/Processing_test_doi_ueberarbeitet.py
@@ -10,12 +10,13 @@ import requests as req
 import sys  
 from pathlib import Path
 from input_fj import input
+from json_demo import output_to_json
 
 
 
-def process_main(array, depth):
+def process_main(doi_input_array, depth):
     # ERROR-Handling doi_array = NULL
-    if (len(array) == 0):
+    if (len(doi_input_array) == 0):
         print("Error, no input data")
 
     # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird
@@ -30,14 +31,21 @@ def process_main(array, depth):
     edges = []
     
     # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt.
-    for pub in array:
-        if (pub not in nodes):
+    for pub_doi in doi_input_array:
+        pub = input(pub_doi)
+        not_in_nodes = True
+        for node in nodes:
+            if (pub.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
             nodes.append(pub)
         else:
-            array.remove(pub)
+            doi_input_array.remove(pub_doi)
 
-    process_rec_depth(array, 0, depth)
+    process_rec_depth(doi_input_array, 0, depth)
 
+    output_to_json(nodes,edges)
     return(nodes,edges)
     
     
@@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max):
             # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe 
             # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich 
             # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. 
-            if (citation.doi_url not in nodes):
+            not_in_nodes = True
+            for node in nodes:
+                if (citation.doi_url == node.doi_url):
+                    not_in_nodes = False
+                    break
+            if (not_in_nodes):
                 if (depth <= depth_max):
-                    nodes.append(citation.doi_url)
+                    nodes.append(citation)
                     edges.append([pub.doi_url,citation.doi_url])
 
             # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation 
@@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max):
 # Programmtest, weil noch keine Verbindung zum Input besteht.
 arr = []
 arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
+arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
 #arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
 
 #arr.append('https://doi.org/10.1021/ci700007b')
@@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
 nodes,edges = process_main(arr,1)
 
 print("Knoten:\n")
-for vortex in nodes:
-    print(vortex, "\n")
+for node in nodes:
+    print(node.title, "\n")
 print("\nKanten:\n")
 for edge in edges:
     print(edge,"\n")
\ No newline at end of file
diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb7f56fc9742c5d19e2fff3d15c51dc4d59e1b1a
GIT binary patch
literal 4341
zcmb7HOLN=E5yp!k2tM@mLmN9`9<>QKCY?$pm5s95vR95>>#P-LWnWlIp&$+@frbET
zX6Ut0;3e9_9+Lckq@1#j{tNp9_K=@|YcARRfE>1xIA0Gyic+l(0gFA|gPy@mPxse7
zPn%6w!}I!IfAAl!Y1-dOSbR(fckmaVfiSJFF}<%dqi-;?Z!(KjBI~fyw{;D?9aZ~|
zYHR%(+G^w;*86o`d!R9g)jri&?ZoIeAi1m#sSc^h8m#%L)^D*ETSB|Ume~s0Wwy%J
z(5|p`b_MM!yUMPiU1Ro+*14X3<ldje0~sc<Xb%(K4r9<mpE}w+kUR{Y`%ya?wtX(c
zK@>oXCx>l$5VY@Z?X*Y9gvU^Lxb@*9*G-r1M+p!8xcx9090ajQ8^h5ZKW6?Q>9L^i
zKIRFVsIPqqGiV(>Z+;N?6B!OC(M~cMZ==V0?8}1zy<U6_2zT%o*Fgdew$u8?z6RUD
zirA0EOzg+X>yIaUQ8-W=7*wG<dx)n=w(u7}0Lk=B^R&zW)iV>+$ShDZtAJXW4O(H^
zj`mvL?$~)nhB6BB<?-3{9`j|8HwH-zGwn@e!bM(XN$5>@lsjUwHwp$)<jr_;=nX=t
zmLxjW+!8@F<TjSTX;IuEQX@hx#OolhHQq$?)o-^pcb|%Yi`_?lB!k@tq1@dHce=3S
zD0n7!2Vqxq$A!>+9&n%U1)-e8`@83aFdk2&HyriG$GPKqVH`@&+rS4z14J`zefCS&
z7wp%Yscv^FG%ofJ1JBD{&pX61M#MKf?`Yyj^EdN-@+C~HHnBij)MtTQ!Cw&L7?z=W
zE<f;fsLc+%JZ!T(Y-g)9>!1Rq*+cyNq;EjD?4;>*Icaw+bsTxMbmy}p=~OtGs5mWJ
z_1UY7`=Skh4}U@Dsg)K@S7u9GFs8+qp}|*(tP;7@R<!r;;xA}Fj?PIQ=peI)Mv@aL
zul@&^OdA1O7+~d(##GOANB|d-M*7q!WxJG3CF`t8@;&W$t*Mn+uMFm#>eI^6d*$1j
zn#<nTrq#@TWit0vhwl2+$*Qlc%z=LWR6o^q?dU*_01&AWHH>hx+KAw-ReoW4TF+dJ
zs*j2he=WbUq((Hd#;79x#+Ike*dZQkZb}<5YGy9tvYItd>}e}&A=Fp4bCZS7)0-tQ
zc?W^d0^W^+p=^&QQKY!ndTtLQUkDFjH}>PSeh}S{SoaA<bJ{{QcPWay0J%w!Ub!$A
zf|+93I^e-Dy>{WH^j{=#a(JBUe&IdFB<h<{;-_oHH?|63xI2*t3Gc6$!ygf-4um&I
zCcq6kOM--97_;1x!3&w2`+>|I>hSz1%58r<4r0c?0ZkPtoh57qSee_!j{D8o!W2X)
zOv$Y%jGrk`f%0TeC}_z|u(<`m63UHp6KvlCJ`EJ=<R<KK!L9RUEXSLDfwIFpSctd<
zqUkMt8NXG%0nWmIRkcK0<o||axVoVkZxG`&>RgPQ=%CnBp8F7t)JFQ;bK$k{UJbrg
zQQp>u?;7wu`xM^#%-}Z=(<H-Ff2K|KqZ`u7^b`1Cro%UC+x#kKOm6~?x-9u5#;LF_
z<H?u=m_C*PjDL-E)(@l{i_P1&v3Jq)2cnno{leM=(do5W0Jo9HkP?Rh58^=}_*-}j
zc=!$)ev?R>$hSc9%9w|-?3lS7ycj1OX9_ps1P)(o-PsB$aeM$JK{!d)C7Y6s5&1pz
zoQr(2A0eo+-#sv}79#&t=Wl^imOM3>u>tEyph2cz(@qEhV6{_?zbh?fk-cIET!lD7
z!;ZcEmHx{w$tzQ*cY7Ej;onZz?xSt{5l4g_w}Tg<kfQ8l2`Yj(kwq`eL6*pEF2+$P
zIju3ThQgzX_zxj<Ox~qV_;(ag*2t|f()~rVI5U&{8p=hI(p?y$0aLgJ|0#M0H2LhI
zWJwuR8RMT|_F)c6?h7tu0*<WA92qk}1c+UqRseWN`6KI9h1slnYA7Ik8@hI>>&&4`
z!CzJ#SxV<)=s1}P7p;|mSOXZES?!fg5UF&yg23XN_4zymfhHiZnCH*3${Nxk_dOvH
zerMVs5PxOL8VZuehK5pvsH-SoaYH*PMt-i2tht>VNtABD*8|oqecK!C9nTzB`8lU_
zpML*RrCEq~i#kW|<y!jo<=SPU?9vey!9wAX_AVb$LWo-6tXwMK&*dnX7h~w#Bz_Yl
zukv6V`GcSgEAGepL2e?pR3_{+d52V~Vp4I4@+**j7>V3g+Ykzv`^|E-s(=x>MT<s(
zT~um9Ib?x{Rk9uym+Rq0k6a-b#O_oThYD(#Ji%ARk3lHcQ>@y$@y0c-&bb*T&fX9{
zuIsjDzu|W=>RdEaey43H2mS(#L@dJ%^uK5f73#=9iyC!gqP1RDDPASzddlr~sQ*dH
z?5w}C(2q)Xgg4B2X7SIFO$(_86dmI$u0nxTaCLfv`5P}y>VxFK9ecnJjWv;{X)P_Z
zHMC1;D`=O|(wbIY)@aPpXTTGUtrD-Cb8S0ay<6NI+UiI)+j!D7ccIK|QwH9o&U?T4
zu>17mkCYDSL9_0<J7p!kiTo9eQ{z@UZMj<uRrjXyBmN_pE&aLs<RBC%0Oq%VHbmu?
z?Y(1lZy6?0lzbA#`-@teZhGtg8JaFFjusfc=F)<nP+>9OK-u}QxFwa{W#_pa(C%sY
zc?+aX)k!btN#Fh26%zc;H|WR5?F6?ozPP+z$5rTCl(Fj4adzKO*N<Poc10YiSfom~
zW2orLotf8P42hP?avn^r;vvKsL1{_@Ok2qL)_>g!<*sFevY2bE0>Z8Gjb&Q@u`KJ~
zttzM7buJbttIabT<wJ>lmT9N@2o<eFCSw4~sez1dAfuVG0-(0x-v<2qeOb-sJ$P@z
zkLhA)Zlm&(hf#0rbKIv2;Bj)32S*B93b-*)C{dxp&&)xQit7O-t$vMW@Tf>Dvc3Wz
zN<D>TC~M~xb=Uem*fMQ3uyY6dU)A{o^mf+ss^_s}P+X*N_eF6CzUFzsA)u@17QmT_
z0B`PzpKt;PexGJ{u--|4^vi!mqA8AX<)j1}g?lR7stQqoW3drB50x$z<S1LKWh|Vf
p+JIWS^ImbUd!Ifi2!O2$4otVIly@DyVYFJUs&n0`;djN^_#b_}8^Hho

literal 0
HcmV?d00001

diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..519d14366f17ebf12bfdc3f6ae2e985ad9d6d229
GIT binary patch
literal 750
zcmY*XO>Yx15FLBHAK4~I^nk>H4{;(A=>gOWiV#wYI8i~X0*V%`me`3lb#@oqOAFeZ
z${q0!+8g{OUpe(JaANE#qS*4AdE+-8k2FVvK?=C+lOM%20Ql|5^+l09$D{8faNtD3
z)LAOYicTp39&q*zrU561FbpqI1^Yu{!?J2xGjr*i<qGxiOz`NR2nNnzO+5yNpwBwk
zgAWhap=VJSE@>BWG6wVl^eB4toORI!A@J!|M^OD}9eZ`s#XPtTUBVfvVjg-wLO;Na
z#Aobvv|~JWF|^4TOvrmYJ)_^Z*F6)R#~tf>7ueK0*o8N60PnF?Y-?-ZvZ`1K8}PbR
z1ix-0UAJl`F0Y6w)It~=Ry7yehO(@V_1D2t*Hv!Bso627#*&j^Vwo(pv5c29RGZRB
zVPkwVZ<VxB(KMpsmepu1bJ?hDSIscB<gFzy?6%9!>v`_#<n9suFDCe66WelEPFF~Y
zKS!}mDskd7EOSDeZg~FK#&v7vr4+wF-ND}e988{OM_MSIy(**;*~`*olk#x%TGdN&
zY_eH7(xc{zj6MrhsE?vFZMDe$BffnuR`pKv#ooXgI1st1a~BWY3_L&p68W7VrX;0(
x;=I1!2}zKvd#D;RbrX$E{8;7lrE-(yul%2{!CkK#uWH^(@yy9ILr6%U{srzD$y5LU

literal 0
HcmV?d00001

diff --git a/input/input_fj.py b/verarbeitung/input_fj.py
old mode 100755
new mode 100644
similarity index 100%
rename from input/input_fj.py
rename to verarbeitung/input_fj.py
diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py
index e6cd618..77ce148 100644
--- a/verarbeitung/json_demo.py
+++ b/verarbeitung/json_demo.py
@@ -1,27 +1,33 @@
 #!/usr/bin/env python3
 import json
+from input_fj import input
 
 def output_to_json(V,E):
-  list_of_node_dicts = list()
-  list_of_edge_dicts = list()
-  dict_of_all = dict()
-  for node in V:
-    new_dict = dict()
-    new_dict["doi"] = node
-    list_of_node_dicts.append(new_dict)
-  for edge in E:
-    new_dict_2 = dict()
-    new_dict_2["source"] = edge[0]
-    new_dict_2["target"] = edge[1]
-    list_of_edge_dicts.append(new_dict_2)
-  dict_of_all["nodes"] = list_of_node_dicts
-  dict_of_all["links"] = list_of_edge_dicts
-  return(dict_of_all)
-  with open('json_text_json','w') as outfile:
-    json_dump(dict_of_all, outfile)
+    list_of_node_dicts = list()
+    list_of_edge_dicts = list()
+    dict_of_all = dict()
+    for node in V:
+        new_dict = dict()
+        new_dict["name"] = node.title
+        new_dict["author"] = node.contributors
+        new_dict["year"] = node.publication_date
+        new_dict["doi"] = node.doi_url
+        
+        
+        list_of_node_dicts.append(new_dict)
+    for edge in E:
+        new_dict_2 = dict()
+        new_dict_2["source"] = edge[0]
+        new_dict_2["target"] = edge[1]
+        list_of_edge_dicts.append(new_dict_2)
+    dict_of_all["nodes"] = list_of_node_dicts
+    dict_of_all["links"] = list_of_edge_dicts
+    #return(dict_of_all)
+    with open('json_text.txt','w') as outfile:
+        json.dump(dict_of_all, outfile)
   
 
-knoten = ["doi1", "doi2", "doi3"]
-kanten = [[1,2],[3,4],[5,6]]
-output_to_json(knoten,kanten)
+#knoten = ["doi1", "doi2", "doi3"]
+#kanten = [[1,2],[3,4],[5,6]]
+#output_to_json(knoten,kanten)
 
diff --git a/verarbeitung/json_text.txt b/verarbeitung/json_text.txt
new file mode 100644
index 0000000..b5e7fa9
--- /dev/null
+++ b/verarbeitung/json_text.txt
@@ -0,0 +1 @@
+{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]}
\ No newline at end of file
diff --git a/verarbeitung/json_text_json.txt b/verarbeitung/json_text_json.txt
new file mode 100644
index 0000000..e69de29
-- 
GitLab