From 396593b91d358c5a2640db392d002b220573a7c2 Mon Sep 17 00:00:00 2001
From: Malte Schokolowski <baw8441@uni-hamburg.de>
Date: Sun, 14 Nov 2021 17:22:05 +0100
Subject: [PATCH] updated processing_test_doi_ueberarbeitet

---
 .../Processing_test_doi_ueberarbeitet.py      |  37 +++++++++-----
 .../__pycache__/input_fj.cpython-38.pyc       | Bin 0 -> 4341 bytes
 .../__pycache__/json_demo.cpython-38.pyc      | Bin 0 -> 750 bytes
 {input => verarbeitung}/input_fj.py           |   0
 verarbeitung/json_demo.py                     |  46 ++++++++++--------
 verarbeitung/json_text.txt                    |   1 +
 verarbeitung/json_text_json.txt               |   0
 7 files changed, 52 insertions(+), 32 deletions(-)
 create mode 100644 verarbeitung/__pycache__/input_fj.cpython-38.pyc
 create mode 100644 verarbeitung/__pycache__/json_demo.cpython-38.pyc
 rename {input => verarbeitung}/input_fj.py (100%)
 mode change 100755 => 100644
 create mode 100644 verarbeitung/json_text.txt
 create mode 100644 verarbeitung/json_text_json.txt

diff --git a/verarbeitung/Processing_test_doi_ueberarbeitet.py b/verarbeitung/Processing_test_doi_ueberarbeitet.py
index ac6ce63..bfa533a 100644
--- a/verarbeitung/Processing_test_doi_ueberarbeitet.py
+++ b/verarbeitung/Processing_test_doi_ueberarbeitet.py
@@ -10,12 +10,13 @@ import requests as req
 import sys  
 from pathlib import Path
 from input_fj import input
+from json_demo import output_to_json
 
 
 
-def process_main(array, depth):
+def process_main(doi_input_array, depth):
     # ERROR-Handling doi_array = NULL
-    if (len(array) == 0):
+    if (len(doi_input_array) == 0):
         print("Error, no input data")
 
     # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird
@@ -30,14 +31,21 @@ def process_main(array, depth):
     edges = []
     
     # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt.
-    for pub in array:
-        if (pub not in nodes):
+    for pub_doi in doi_input_array:
+        pub = input(pub_doi)
+        not_in_nodes = True
+        for node in nodes:
+            if (pub.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
             nodes.append(pub)
         else:
-            array.remove(pub)
+            doi_input_array.remove(pub_doi)
 
-    process_rec_depth(array, 0, depth)
+    process_rec_depth(doi_input_array, 0, depth)
 
+    output_to_json(nodes,edges)
     return(nodes,edges)
     
     
@@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max):
             # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe 
             # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich 
             # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. 
-            if (citation.doi_url not in nodes):
+            not_in_nodes = True
+            for node in nodes:
+                if (citation.doi_url == node.doi_url):
+                    not_in_nodes = False
+                    break
+            if (not_in_nodes):
                 if (depth <= depth_max):
-                    nodes.append(citation.doi_url)
+                    nodes.append(citation)
                     edges.append([pub.doi_url,citation.doi_url])
 
             # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation 
@@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max):
 # Programmtest, weil noch keine Verbindung zum Input besteht.
 arr = []
 arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
+arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
 #arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
 
 #arr.append('https://doi.org/10.1021/ci700007b')
@@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
 nodes,edges = process_main(arr,1)
 
 print("Knoten:\n")
-for vortex in nodes:
-    print(vortex, "\n")
+for node in nodes:
+    print(node.title, "\n")
 print("\nKanten:\n")
 for edge in edges:
     print(edge,"\n")
\ No newline at end of file
diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb7f56fc9742c5d19e2fff3d15c51dc4d59e1b1a
GIT binary patch
literal 4341
zcmWIL<>g{vU|?AJttZh-n1SIjh=Yuo85kHG7#J9eI~W)kQW&BbQW&EcQ<$QdQkbKd
zQ&>`1b69d&qu3Z3Qdm;ha@eCdz%*wRX9`OSdk$AFcN8}xgF8bCM+#>PLkedzQxuOo
zLkd?4cMC%bcQaEIZwgNeZwo^dUkYCee+xqte~LhgU<*T(K#EX`a0^3}V2VhJXbVG>
zP>NWJcnd?6a0**6gQi5450_hMUUErheqOOcT7Hp2W?ou;QEnngK%pqLq$o4BEHOtR
zKTRRAs3bEvC$(51ttdZNp(G<!!O=NbAuGSMC@(Qb!OPjxmy4^4-z_J<C^Ion!7D#G
zBQ>wMiYG10Au%r{F*#o^B{hmGpeR436l~i|P~d1X-r{viO)M?ROe@U^&Mz(S(`3BG
z5|CJukqnYX#!RsAlVD(ANM(p(Oks#(N@s{-PGL-8YGH_CNns9V&}6yA9Z;H-lbH<m
zKr%a$2_QDe5NA*%XfQA^)G*dC#52?|)iA^})-cyF#52{f)G)*|*Ra+w#Iw|})iA`f
zrZ5CEtYq}lWV^*$l39|IdP|@HVSIc_VoB;Pp5*+zlA_F{(vtk5;#=$~`I+&hMLD-P
zic6ESQj<%HZ}H}p=Ef&ymVlj9tjT_hr8qSwt%!|*fuRWGs3Hy!ixWhEl28#hC>b#n
z@iH(l{0eloiU}=FEh>)jP0T4tjd9N`iE+*h)(t4i&q~cMiAm1XE!Hi7aCFO3ixP{H
zQZq|R^U`BbIGK3`r6uucS$YMPw>aYCGxIV_;^QSj0l@<bA!as47_1UO3O_xVQa?>r
zkiNXc+|>B^TU_z+x%nxjIUqJqe0*VPVh&se?$08AkcU8?0Q&?&@PWiw7#J8tL3V=}
z984@sMIZ+52p2($aA;&RLnE8R85*p~tRR0tF^CP1X&F2*4T{|&kZMg9aCF>ahbCu4
zlxVURfr1DuT?BG9LJHy~un)oh0(n>k<S$TsGC)JLN)YBmq`+h;5&#8+AcznG5g@Hd
z#v#lE`CS=gCdfY=j71=RG9%QVAPS@tl!(Bo`US|Z3|S1IP)cF$WvXSYVax&*D6Ak}
z7Go_Fl+Om`GlThzDeNG=3qvelElUl{0;Uv>g^abVg*H%GPLOOZTRKB6dkxzH<`k}l
zj4lkZBDEYf><d_GI2JIaa4%$B$jHc0m;u(olfnzu!I{ob%T>df1uEY7Kq4*-u>!T+
zHCzi=YPho?I_80OFsAT><ZF3qc(PcFPo)Ud^5$`X*jbFVyd`WkOf}3kTs5pU>@~d2
zY_)tfe8CKwf_}G{Q!>k{WT6F9d`4<wN@|gAPHI|-LP2Rx4w$u)@fKThPGWI!{4JK^
zg2cQk?u;C_oD|&<P;Rc`D=1CU1?6Jhvc#Oy)GARlvr_UiL3x=aqbN14N*qnHB(Wkt
zFF&{P7GokfdE8<u&52^p$xo~jhS=c@DHt3}OEU6{qC}wDeL;n4a&dfeeraAw5hyM}
zMM7F;Udk<&lGKWlTg>UHCAT<00`ZACIk(sn3kp*6Qi`NO2?dl>H2H6_6z7)~++u_H
zIEoi)ay+<*0+%VbSaLG+vWqww7#MD`6qhCy7lDeFTg=6!Nw-*%^Ye<q$@ms?a(-SE
zOI~SiGPu;a#avLDgp>|J$@-Q+dTL2L%mz^Hpan{gI-tbI$j2zaD8MMh$iv9T$im3S
zC<LZKBp)dKGjT9+F)}eQ{byk+0_j4@<Dd!=l=r|X*NcIHp@boe5uS1xQdnvkYZw+V
zrLclin?fy94buYV6t;zoDeT!yMKU#vU_Qq}MstQ*#zL7AmKw%pMn;Aj#$X0bPQM~C
z1_p*I+2qU;-IV<Dyqx^R6y21f`~nas9#mqd=9LtQgVcy*l#~<{Tj}c;lqMDHB_<c^
z<rk$xLK{>>SBb+iutH*9ib7FpT53^hUUF)2krV?1gC=W{5-46|L4*Q`kOu`hYe7+F
zUWq32Ew<E(g8ZVATg;_JIYpqt0c+@j90hVAsN!$}g)S)I8Ngx52M%Kn#v(li1_qRz
z4+=j}asY>)3pnR5U`SzH$XFy*!>|Axk_(wqm?Rl$8B3T_m}?lt8Ja;Q0Mi2Ig$zX&
zB`hf{pzzA$C}Bxq1+hS-p(dN(FGky!pj27Kp{JjgnUh+qUnT68nUkuJm{XLRm{O^b
zT9H{?QVbPL$xkg-$jdK*h-$JHfkNUITTyXAPG(6F$c?wyGmGOv1{G<80*tvx7ZgGq
zAQ4claf_uOu_OaMSU6xo;tdK8PzlVy$i*nY$iv9N$i-A-3JMI6;h<Cq!k`ipl-R+6
z@rr?gA)NtIxg#Yh=5&TymKx?PCQxe&RNS~Q#7fk%*03yKu3^n$S-_gYmcqV}30#n=
zr8CsBLDh0V)e1n0zZ&)|7O<M2bcR}v8s-H|DV*RWDpJc?!?A$5hI0WMsE7osDFPRP
z5Iecy=75So<{ECOIkQUGQ+P@^K*_h6$%P?StCpvRA&X%Fa|ve+OAU`CLk%04<N}i%
zAhH?af?eP!<Mq46l%G>2nVeaYnxYF$+wsXsm9T^hPUl6Epv10@QxPnS>w*iBTa0?Q
z7^>88tA<vTx}Ye8WEN0%sM5o&16qWD9SCbKL5e@FB4JR@0i~QGa2`?wapgeCguN)W
zASW?7wMZVsV=YR|OHaMUoS9bwt_d}Hi!?z>K`kb5KD@=Al3J3OnNxg=4eX6#aKRkK
z3w3Qg$hF0{SU^-12ehSD3{H@c@{k=AdPSK@w-__gQ=}lM7=*QM;z8wL2q<Y9fl?nM
z8#u4BF*5z<ViIFyVT7<?Wg>zv#3;hZ#=!Q!$Ptu)LAea19fU#kJIEX01bhr!8m2JT
zFk~^#W=LVGVaQ^d&5*)e!;r;1n<0f|E;}f{mN3*XWU-{Mrm$r)_AxRtlrW?q#95KV
z*=iWF*qa$sII>xac9p<*oY_o81tknATp+XZgi07vxIyY>Go<j$Wd@1WFl2GKFvRNB
zGL|r;@YXP9f!xAZ!<fZ6n<0h2hB1qEHbaU)4I|i1f^#`RdI~%8I7%2&gg|U?;uQ9)
z5^{v}92CG&Vx^#<pim{uh1ASc0M)=&AVC*@Pu<WUAFvvbIt7I)ZZ581XiM7a7ISfG
zK^2pZLKPnuv~mTvx~;(JsK@}6!Y#Q%GBS%5kecZVnZ*j=WT}u;3GTO~<>%z&muKds
zBdfCFs?s4vYZX7T?qVyhDq${=lfexOxCfwuUXWfAR2V9V;scQHLE2G6po$l4CP)H=
zt1Pg(5LEoaJ(!qRsgPfiky?c8JUvY=aOn%q4&Vj?xM=`x6o6BHku^LYf%77$rK`yV
z&Rw@SV5uHGOYnhfx%h(8ByfJn1my=%HO0Wl!^rZVg^A@K7pnjx2crNJs21a55@O_H
zWP!@CFthw+VPW~h$6f@gyHIL2P^|{S;Jg5;4@(#pFxD_EWXxi!Wh`N?VN79cW~yZ>
zVX0wiW~^l{VXa}#VrynhVajGHvMXV)fs52IH8V3Z)POpM%zjne86~+ndIgC^#i>Q$
z1f$7ui@7MZ5L~ttfr}faq+)PG1t~Z{IaQMdTt-B3f=q}9HPXPP4LEqv1BDgb*J=WV
z1*F>IVdP@s0Jn{b+(8wf$SwBx_>}x)NJlCW+&GMnzr`6JpPHMPnFA3nE=epYEsl@B
z#hg@ZQUod-irhdYAV*PZVQFe{NpX=Yh|64DS$vBfRDtGXCV^{Pa9;{sI6^!EiVtvg
z2`+NLVGVW$lCwc|XA!8yaf`zy7u>J216f!M3K14o7EqM2gX&!lMjj?UK0bC12@XyU
JP7YBHNdSGl8^Hho

literal 0
HcmV?d00001

diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..519d14366f17ebf12bfdc3f6ae2e985ad9d6d229
GIT binary patch
literal 750
zcmWIL<>g{vU|_gqJTWnfnStRkh=Yuo7#J8F7#J9e6&M&8QW#Pga~PsPG*b>^E>jc}
zBSQ*v3QG$^6mtq=FoP!ROOReaO~zZSnRx}JCCMP|AR!P2S;Wb}z~BrrZUqAaLkUAO
zLoE}SWG-QBW~gPUVQ^uH)u?3!^Vn)wvzThwQW$L*YFM+FYuHklY#3^o7O>Q?Eo5Y5
zC={z<bYY0qsbvSN=BQy$VU}d5;YeYzVW?qGVFk0<Y#3^o7qHZDfOWXkFx9Z6u!F>E
zSW-A_7)n@EI8(TqnfirlIZN0Uu-CBEa4uwGWC&+4We8*lVu)Z!VF+f><o3J8l9!m9
zdW$(FKeLDt6kcq_`K3k4sV^a%lEk9))RJ4QdHE@+#kW{<GV`*Ft9Y}D^Yh|MQY%XI
zN-9cjF_vp`++xYeEH1gll9HKRa*MSjvm__=7JEv5W_)Q;&MmgYf`Zh%lv^zM1*v(r
zSU|dOv80sd7HD$aVhp>*=z2>Cq&q%8Egob~Jjjq@gji}ydMZ?m8zco%mza}tiv^_k
z7DrxcIau}<3rOKDE*L-F=oWi^X-QgUPU=dAB3T9ohF?L>RxzQ)sYS&xzKJ;{sWI-E
zB{9yK!MXuO`B|yiB{9jFy2ZK$5RPtHYEfcQQfg*NX<m8^3J2tY_>|P#e7%CoTYUMY
zB?YA=@g@23AaPbu8c+hI3Pv_C<X~iD<YMGv;$Z}_co;#DgOP)Yhp|YWfq_AjrAQDI
r>>S`^6`z(>1WH+8DX^Q72syAio80`A(wtN~kg{SH1_lNWMjj>r?i$Hd

literal 0
HcmV?d00001

diff --git a/input/input_fj.py b/verarbeitung/input_fj.py
old mode 100755
new mode 100644
similarity index 100%
rename from input/input_fj.py
rename to verarbeitung/input_fj.py
diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py
index e6cd618..77ce148 100644
--- a/verarbeitung/json_demo.py
+++ b/verarbeitung/json_demo.py
@@ -1,27 +1,33 @@
 #!/usr/bin/env python3
 import json
+from input_fj import input
 
 def output_to_json(V,E):
-  list_of_node_dicts = list()
-  list_of_edge_dicts = list()
-  dict_of_all = dict()
-  for node in V:
-    new_dict = dict()
-    new_dict["doi"] = node
-    list_of_node_dicts.append(new_dict)
-  for edge in E:
-    new_dict_2 = dict()
-    new_dict_2["source"] = edge[0]
-    new_dict_2["target"] = edge[1]
-    list_of_edge_dicts.append(new_dict_2)
-  dict_of_all["nodes"] = list_of_node_dicts
-  dict_of_all["links"] = list_of_edge_dicts
-  return(dict_of_all)
-  with open('json_text_json','w') as outfile:
-    json_dump(dict_of_all, outfile)
+    list_of_node_dicts = list()
+    list_of_edge_dicts = list()
+    dict_of_all = dict()
+    for node in V:
+        new_dict = dict()
+        new_dict["name"] = node.title
+        new_dict["author"] = node.contributors
+        new_dict["year"] = node.publication_date
+        new_dict["doi"] = node.doi_url
+        
+        
+        list_of_node_dicts.append(new_dict)
+    for edge in E:
+        new_dict_2 = dict()
+        new_dict_2["source"] = edge[0]
+        new_dict_2["target"] = edge[1]
+        list_of_edge_dicts.append(new_dict_2)
+    dict_of_all["nodes"] = list_of_node_dicts
+    dict_of_all["links"] = list_of_edge_dicts
+    #return(dict_of_all)
+    with open('json_text.txt','w') as outfile:
+        json.dump(dict_of_all, outfile)
   
 
-knoten = ["doi1", "doi2", "doi3"]
-kanten = [[1,2],[3,4],[5,6]]
-output_to_json(knoten,kanten)
+#knoten = ["doi1", "doi2", "doi3"]
+#kanten = [[1,2],[3,4],[5,6]]
+#output_to_json(knoten,kanten)
 
diff --git a/verarbeitung/json_text.txt b/verarbeitung/json_text.txt
new file mode 100644
index 0000000..b5e7fa9
--- /dev/null
+++ b/verarbeitung/json_text.txt
@@ -0,0 +1 @@
+{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]}
\ No newline at end of file
diff --git a/verarbeitung/json_text_json.txt b/verarbeitung/json_text_json.txt
new file mode 100644
index 0000000..e69de29
-- 
GitLab