From 3802ad87f2e216bee49f3a7ad2bcabe398bd3307 Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Wed, 8 Dec 2021 02:17:03 +0100 Subject: [PATCH] changed import_from_json.py --- verarbeitung/Processing.py | 41 ++----- .../__pycache__/Processing.cpython-39.pyc | Bin 4045 -> 3974 bytes .../__pycache__/json_demo.cpython-38.pyc | Bin 1105 -> 1153 bytes .../__pycache__/json_demo.cpython-39.pyc | Bin 777 -> 1173 bytes verarbeitung/import_from_json.py | 101 +++++++++++++++--- verarbeitung/json_demo.py | 1 + verarbeitung/json_text.json | 2 +- verarbeitung/print_graph_test.py | 58 ++++++++++ verarbeitung/read_json.py | 8 +- 9 files changed, 156 insertions(+), 55 deletions(-) create mode 100644 verarbeitung/print_graph_test.py diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index 54ee02d..d44aa0d 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -18,6 +18,7 @@ import requests as req import sys from pathlib import Path #sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +sys.path.append(".") from input.interface import InputInterface as Input #import input from input_test import input_test_func @@ -113,6 +114,7 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ # nur aus Testzwecken, da noch was bei Input falsch ist except IndexError: print(reference.doi_url) + continue reference_pub_obj.group = "depth" nodes.append(reference_pub_obj) @@ -167,6 +169,7 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test continue except IndexError: print(citation.doi_url) + continue citation_pub_obj.group = "height" nodes.append(citation_pub_obj) @@ -233,44 +236,12 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): # calls a skript to save nodes and edges of graph in .json file output_to_json(nodes,edges) + return(nodes,edges) + # only for unit tests if (test_var == True): doi_nodes_list = [] for node in nodes: doi_nodes_list.append(node.doi_url) return(doi_nodes_list, edges) - - - - -# a function to print nodes and edges from a graph -def print_graph(nodes, edges): - print("Knoten:\n") - for node in nodes: - print(node.title, "\n") - print("\nKanten:\n") - for edge in edges: - print(edge,"\n") - - -# program test, because there is no connection to UI yet. -def try_known_publications(): - doi_list = [] - #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') - #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') - #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') - - #arr.append('https://doi.org/10.1021/ci700007b') - #arr.append('https://doi.org/10.1021/acs.jcim.5b00292') - #url = sys.argv[1] - #arr.append[url] - - - process_main(doi_list,1,1) - - print_graph(nodes, edges) - -#try_known_publications() \ No newline at end of file + \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc index 54c63251bbf3affbdd176d3d55f4956c2fc08406..29df691cb577fd54163d38a814b22628a3bd296b 100644 GIT binary patch literal 3974 zcmdT{&5ztj6?gfo-R_>AkIYv-SiH;f(J(W~hKK`-&@KsC0?BBl2@7k@%6Qr}-R^PQ z-R0^eGjg9M;x9lPxG>8BX}KWnfg_idxUz>mS%M1}mIHqPCw{Ly-I)#nA;E<iRn@Ck z<>#*Ve(zV$;lhHe;cxiI--MUGqG^Ao$;CedlQ+@wU!jXMUyGQ}BHh;`!#5%`vJ!J( z`8Lxa+eyuL)L8fH7;A|;X!s4L@p`{`wh%cVFtx9qv<8d*l9JtId9dQID7leb8m#)O zkekuMGtFO%TG1lL^=K(t#<&r!M3*pbMyt^p#>>%qw1M$TbTzshU3td*tC78@wXdD* zyZ6RvN5)y2douI7Jmn&k+zY*~2!}mS@S)&2Pi35Ty-qB{YU=@)PdHD#!8noeFyY=? zclNIDhH@;p*U!cx4U^n;#}DC(yIGos-h+>S8^Oeu_f`^P`Y=n5l+i8kM{#-_dyhg| z{N6Z<WzTzCWFP;6Z+Q>H1V3f3)60&sBzuw{#V4<I;(U83vOe#~n>TL0@#e`vZ)cpw z+r4mbFc#gNh?nj=B8$fAP@mxkXl+(5e4B@386S?5y=*+(g~og*l)aMeKy2Mlhhup^ zm0TQ#9S+T6OqxLQTn2~m4{gfEQt7gwpJ!>uB+KW23mEHYchK^`qU&lC6U{2LyW0Do zPVB-EuSl)1PuZlF-j(cBn>Z(}!a36m`vd5HsHHomCXM6oGL8P<(?-qrwDbye=JAg! zowVqAT1)jC0N120ZCKx&xJ9jBE8KqVNEiJ`pVXN)dN%48b@6j)pE*TMfypRpK|OT~ z?aZZjGEFv$T4WZiFiuU_X-yi1iQNra8+|03YE_@DHVX@@O||;Ts9BhWb*KYE*6yQw zrKS8ObR50CAZi#&OF6Y%>~a|l#|KH=QP3&fcf(}NzavB@O80&m@u!t+4Mm*F(n_<4 z=VdL*;$SS2(hi41o<^nB6$o5u3qHsm^YYb>;DBkMKpNyyAl$%SAow8{Jng`Q_b;~0 zK-^v^TlAVL)=8Ypaw+Gb==6e!4`nYHgip)mb4`!O-JVn`N5SH8D9VlJucI>t*+D;0 zcGhPvnbZ3<+J$$P1_bgNAXhfiOyWlb)yYj7%KFT5=}c{XsEOC$*}v~~zPbN{oQr&4 z<;VWpvE08C?`;G1eSReOJMs2;iX6%9$4F;!z+*X1yZdt!Y=@h2B<0TVxLk|VSjJ%z zpYZC3sDpeC@5^tY)0oLj-7!q&8aC5ei#3^p)-p{sn#{%ympQs?uCOLrOBb)>xelYJ z&;M+U1P+xazlI<wKh7qsV0{)b;P+ifI&`-ux>~zAF$%qJ6h_~`s;6YuH@dnsA2Z=e z^UQj#Q`q!Io64Gw{Xlo0a^&{JJhx^QCahV?T8dSJ=I7Q@Wo@QY*3!}8$c!}L+}JHG z1&h+Iu+XlFMI1q_peyUsAmw6-<`$2-JQvF(yR%s4ZChNXXFgBetLS_K4+;-bX<(ZQ zaMgW*bgP|g0Cok?;}pakD68McO1_RxgX>*#I=jZ!XOj3DR_5r5?yVU;{S6bMr$XzK z^ZzulA~vxlgS<JZ!8JPQgq*hyD#2|vx-nWWZ1H;#g?^-ozktdfNOGYLGIP=g(gc~+ zPnL>0LIEx4%mJO<0(IHPPiJ)Ig0?*9YR5l2r!%Lrp~#L_iPVh33F?x8qSOs28;~^o zBB!r+VVP;5sPnZ^wSqSyd<}rXo$XdS6+X8x5jqnTCBBGrx0|Akp^6*DNl}#`|A+>4 zP3nlvMT<HLt%{+*g|D?;aScP+yhsvagZ8MPULnb&?iK1N#{Tjw_ERDdlv6LnS*$|0 zk2cFS7un?KP4tvkly^;)cT1Fa&?D@+jUKsXEU{IlEX)O+a*dkGwQoRBchMGQ0=OH& zBxc7ARfW}*tbiP`4Xs*1;1%va)OSCl6mIM473DhaUZRZ)Obf9CV5{SAVe18#kiYyZ zDSz^O`_vDLUtaXXb?pBMTJE6JbceMJhY8Zoo$db{&;T=m`Xzt{aI2LIfS&1I1oW5S z6#)9n82(E@FU;L>Vv{T?L|-L|XzBu@m!?cN4dhD!y@rQ=iuRuY8le6sK$C^Jp#P5m zEw*78XeRsR+7!*xLlLeC%<AMfu;qd`6qqj)n5Vm^E_f-H-@wzqLz~AkY46RQi=b|y zsYLxH1SF_-T3v^$5?>Hh^igVpeP74iP;*_)Z((lIysL#enjyjKNMKde?duaHILbyV zqBKXkqv*2>e2Gj`T-Kn))6$W;G){F$7^$p#e+hSieOAX^;40ufE9`1TU52$?RqLIs zs@iSKOEXVZQC<|v@Z|crb`DlvcIZ_Kd^F_JJK)|h%TaSY<|jMz52$PQ-|QDV@Y17p zqcjqpR!psKVj^i(bO`dYq78K!3UYD9<3##xN6_slRCpC(5!n4pGpf#Kg`B@K4fO?` zDjHR>R>fUi=2V$oxVW2BU=6}J{R7r=N>>fc?SRo#vrthPD@Kbr&Tvs%QRQ!<vJzj% z&NjQ(zE;)(REwP;C>ue5@0VUCa!l$$z+pFm=msc$@!18nZye;egn?&E{UG-p!AE0! zzvMm&i&1Ih$Ed1@_~uFC13@1zrAf1@M5(@ac2qq|pFo0=P2G#WTb0LD^Qzm@uhZub zD&#>{`IV}?%FT3(RbHp$s)9ggxxn(uO{%nUZV&L0mhf-s*pZVFlRU!A8da94E#WKG POlQYgcE0Mg(3bxJ-SV$k literal 4045 zcmd5<&2QYs73cSIsnv&NNs&Kd({5sP3rUHM04<ClaB5cpnpg-Rr6>p$1jQNca>XTA z!&xO3xIqE?;7hN)Rz14M9(w4dhxD)DwY~U~KOmRp_lEl+8Mb>Xx(jmN%)A-S{NC@q zc@xdgdm4WK+5SuPhxaw@UsO5&Gf{bfr}!&gObaxo2RbtXgPDQJEM})x-wqsIgY2Y@ zz*S{4XrgSSUVko_Q+;mQ>dy!BN^YhL{l#EW$zHnDzZ6`8JjYtkv|yRdvjvow*&<s) zxxy~7Wt6MzGFw5p##Y%H%6FL0*4dS3da%x}vTJC$!mhJ-QC?*?*iDqzn6s<3Z=LRY z+ruoDNuCwH%>53}xQHb8Bfle}LDv_2Ab7zunPeS5PGnT89&q_J=b7IhrZO3%-2e39 z?gu-O918CD@}bD0wD7#)W5Dt#&$7t>;=8{wOtImAnkJ||&eJ0`>4yK?Bzuzhdl7a1 zddL#l^*<B&cYop={^KacQg-8Rew3&A*TqqCdOJ>v&4I{!JeK!9ym$A(=|OjEm?fLt zsDCgNoh`-#?@N)hq1x0-EI@1P)%-7cG?dBVFx}0EgB`S3Uq-Tv(oV9$P(oN#LMVA5 z!$V+#riFYc(HY7->=k(y8#LSa=b*IkJj7ER;#F#owBPNIj8YeCQY($8`pC>&sn-(I zN0v08u|Bd(vuBoe&pa~39cGLyT|0hoe5bU;Z=`W%mFAyy7~Cw)&_1?G?aV0+YB{!; z29sGkd)tCO+-?XPg_0{f%NZ}K2FsIhDALM_1_Pe4%I*m4w{isU=O?`FR12h0?=(#c zS-EPzCy}TcgW*Al-pYhfwX$47FRbUasCc1?Wqk05-T40gR|OZvzOv8$XNlZ@nCxx> z^&UTx`*E^4HqnvXJOQ1=0Z-&G>+D~sKt1L~0a9!Yo>Z$zmdGSZlT%)QGo<BR$M=e> zcxk$8TDoV@+tpjTr8{^$-8IAtdSjh*#y_%51CO%IKOso6%-=_Psfl&iL+_DojLehf zL`G>b9rhTMM$d-!$0JjXx;KJ7dPZsW4D{Ytve7d-hICGJabG%Tjai$r(*5ImYBtih zFq^Apv!}D!)7hS**8mSDvprX{P21FL&yRmUw!67g*^Cclw{p5X>2zhgDQK5930#th z6@mg*)tq{2Wpmcyg=kW<C-@;3JOl1+N6eG@C3-DVr#l%FIArbcZq=NsE5p~X%cQ#! z=Y4p+<e&%wMyS4V4?P80tTtB5@Jx?Pq+i$9jEN*jA*Q%F`H{JZ$jV&LAe35<4ED{) zWaKnD<l1QzSb%kT>@*w4%W#_g5-xV6i*J?7m{Om(khDnx#2P`Y;B;nnX~Tfd>vU#r zwuaRJsCQ7*SSy$bVTGqyulp5z^97GY+zo5vRF^JP!+!LQARdb)dMVJ!m(D@z$ATm9 zhAQlag%k*2h^FDhD#aJjANztwUSLeTz=bb(`ii-vi<{8-AA$>D3D7CH)A~7Zr){+X zX5g-3tpN85!Jvrz7I0f*G;0S~I$u-JUZY9g2<@#mKs%*(!7%>6VS5`oF}c{}N9!f> zD&m$9N}cIH*C@__vOS6%;s#Cv3(>|_CxMbAXiSuSYU&sP>T06Z(MArP2pD&J)L=&0 z=vgOP5tJsQQ{qwSoY~Z-FnlDP(xH*1`IOF%7Ge6r+IdMqFfgOI*gJnVC{olMRtT#| zu!<v;qVJ4eq2O`b4d&^5m~u|Miz&qodZ}o!KoZeCSeWXlKwXpjBBcc~Chc211^J1F z*gcNnM$2?5dRxo-f~mCre{mH*!O}0lNj#`oJA-p=0_TiUV?5P)J8Km_jlM;%R{&j{ zDb<K7PQQZa9Sl6f^L9i@Yx@GE7h>!m(G6N^PeE6ZYegbseToEw=nFEwhq|fihN^#r zI*6-l9W65Om=Z||sAuS!v`dZQK&^=gT2gIHONoP#MoCv1QfY(abqchE6v#Sjg7BUq zbXi)`JT#Ep*gJwqe7f>0A#$<dXSuI(6`w^iI{jeWdLgv>`JrDoAQ>&Bf581gULcn^ z;ip>{2dL=r7X9{IWu`o<?14x!SqCMeu1YZ!PX$4%jxa%x)~5i`0D-JU4a#49lE}Qs z^WjY@g?eqwZ%t^^kCN;k7*G(oG(>jzO8wZDZs8bRz~9AzO4-kXF51w#kd0DYqDRG| zPasH`LK%@_(bCAEjI>9FhS`o==&emmhwB36I*2rN-_OPo`qcd#DIm}8dzJ2;HoecI zOx4<k_$lVEY?;WEx2<z9(Xkd-lncC?FU%=K>$C0O&?*QlqzP&_^cEYa$J99anGmLF z!7oq&eL&hX?2AZ)JWgW<{)mD5#t8Ww;ws3eXo<Y8md1MtY}n!Su6PgA>|x0i0u_d8 zE9%=XaAAvtIJcXNu2*>~QI4O)DgPeB3w1H1C}u7jo*}l-_U84|hiU!fb_{L?tS74` z_fe}>Yc+RX(j{XLD3guV(!?f7uU(FU^#^npL`O}xTFZv`5K>#;ZeOn&ArksH41>8a z#Pz3}ivpEq7;+FUh3JLIG;nL9X7ix<NYKXv<Ddv!!H<Wy3Kf9~sWOWvxLpizWlEC+ zLAD9p`WA86Q+a3L&8|->B@LQ%WeOQC9<{Gfo?g2SWhp|1Q92=NpPKEKkW_m(w`H|? tm3g`!_i?vN`6sl}f|zKLi|AGZPJ@4|{yXlPd(~aXbKAY?z3+N%>qkd?fyDp- diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc index 4227be8f77a3c09fba1e059e2f19b5109e8f7ab7..4daf8e7b50fdad59d6cac52e92a9ab3bd02a395f 100644 GIT binary patch delta 424 zcmYjM!AiqG5Zy^bx=EWD3$=n`dr)Y+c+p$&RP^FaJZcC@T2j(%-E1x;qNRdIDLHx5 zFYsIRm|yVd4>+sTc9(tZyqS6P=7Y|3^U5?0g6He=-FsM=Lp#AhkU6=7ob<`{&Em1& z5j0mu`W*5~Q0<W%UX8r+20(6&94?7N7v*7NYWb1yM8uMg&$2`~wL*jQjY=FQrM^t1 z#RVKgvzWnXZ(EK94JbZi?OIR=-YTlf6WGPGa&*$fI{ph=_$%uJ#>bv;!(bSuJIJIK z!cwV@-PqqQzLl5z7V^A}P^xT#KGadl5QA(yaWuXuvr?d}1(7dYHgLT-<_$^jO1jE2 zF^J;uZ}`fAh5RHFu3)YdJiyvD4-_||(nI$Q6OPLFrQp96-j(5Mr<E-2jaiVz;i)18 KH3X_tn;Jibuvy^% delta 376 zcmZqVyvV^D%FD~e00d73*Cx76<V_by0CGwgni*;tYZzP@Vgn|&xCqrSF5swPUdYJE zP{>uo7|fu_KJkSXlP2e66-M{T)r`iB!jsoCX32<w3}R$s<N#tWB*?>9Bs$rgshyE; z@_r_LQ(>U0A}}EWWZmLP%P-1JEQwD|Nlz`70*Vy#0STBnJd7Y%Bs*D|d1k#RP`pSM zNZew{FG$V1#gbK=4<u7ca|<-tiuggYk|0^`l+5Ik`24i^#GITW0gy1r+*|DVr6p;Z zIjIoK5iZ~ZiWiiY#Fym9gA7ywszq`G2hcwpAUCMF0VOn9ij+VG!ktp22I3<8gfN!F YCO1E&G$+-L31lD}kl^6t;OF210Pm1VD*ylh diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index 453d99172fe839affc06d41a37b8e18f6de3b08c..7e26222f87d54c667059b60f41230e98859b09d0 100644 GIT binary patch literal 1173 zcmZ`&L2uJA6t)v5&AL*m!GQ@56M{n}Isr#iA*6vgQK3zMs0tyAy^=1CgY7UjWfD@k z^B1-wC;kgZUpY<u15Ui>QdW>~<oCY!>^%Fu_c^(6p-Eu;`1UpV7!vXwjpgBi@c^d2 z0fG}wb222{<NhZyq@3{pw8ul<0PS<SL%Pu!>?DJfqWV1;Xq;OxbpgVV89Amkc}QNp zKK2X+%dfqOS3`_G0ptVn9AbcchXO#%p%75uPy<ldCnkb_R%R5e2Kebov-VFZCGVQG zyVJn3BAJR|m{jJiRF)lzMA^nfR;oyHxVg-1FjlfEkJ@Qw5|hbdBb8NQx~rDunKqX3 zEHySTnaPE1rLr(8+pUaL+IGrnH_uY{SIiS56eepM5H+q;t^&0DToiU?5nS6aDN9lC zF0=HxUBe?{xfd6bix^MRaAUy3s1OIv@r9^0i1Twhy*GNMh0>!ZNp8gGab`wa*-r1N zk`u9SMrqd5y>f2!-V2qeU6Glp7?1ukCR!FT7gM=e9@^HPRMW(`)AcHxqOX7;G^7#C z=F-=uEvhcV+W-68ht;M2-a79ur|Sed<oe2ehq0gZ1p2<ztze*#JT}zOo>ZL84U?#` zFkN2(O^IrtXoJA|Jvy(S;Nw5_Lp*FQ>KEU2^_z=J7=Mv9?$cMnqcPwj>lS1RxuQSC zSFjj(d<4^R`3(S<D{#0bGkWYBucn}ZwYBHC%s@lN23XA5%=>)pI56xasJ%Kk^(c7( zuZqDp^C>AhcZlA;O{n``g5(5VR4}jsazWc5&x(D0)OO7?;)B^lWa&XSoL2-p<F0I( z1a?{G))}v+C2YBftJvA$DpCyjmgKqWpnDzN4OyAJEEoS)stx|KGO>{{2D}M-mueLO rOCqe*b?|qY+5qJ^;%?%+O97F@%GE#WPbIH%aUaX8U0qkAPSpGb^a>Z? literal 777 zcmY*XPm9zr6n{z6G&}1qy2ygy$=iS<?s^d!L|Da>i#RA)Dy2@cr8}LbHmR%70dYP1 z8PuBxzl-NwJ<V6}<V%*-HIMw>d&%$p$!m+Ptr6h*nEWVyQh;CHxwsfQ4{)0)f&@uw zNI_B=eS(yfAlWyPQW;BLQkh7BxFJU+Eu%D2L{Sx;Qi4%47;mmI102q=%HQQ{`w9oR z&1VD)r?8>{BTLX`13ZLR*H>}y_z(|Vk|_-I3my1ZLg3BrK#*~Bl>~+u5=pPYAY_EB zM6zJVXh#?!Ld4yH4oq@?hB`sbX0Qk5!)+izULs1so=1miBz4shY|U|_`);n*XO1np zQdt)_S}Nn>x@s0C-99$D$*p>4b8k6vGL9XqD`Op#)!aH>w5@8SV;WVu5$LjCw&Rf_ zFC2N4?s#&pk8+<k_c7+*nUdxC*`~kpyrQlh^W;=C>Lf%s=85U(+|T#ZMECZns?~Ss zF5_E%Kbzmno*UJf>`75ul|8O(c2LbGPdk0A7B-t#6EkVgoXK0&72Ru9*}f^W|D3b_ z)l%PT-@9$B(e^gCIy~NgS91da2oE9<1-XQqBYF`i3`gGWAZr}qjC8dZa6jOkF2ako z|6|s{@;TSB2aYgij`<Ekc*+-zwm4M2C>qq^ofqH=1~@TYYT4K7zE5ccf}rB6AiLxb DLy5+# diff --git a/verarbeitung/import_from_json.py b/verarbeitung/import_from_json.py index c318e60..7eec532 100644 --- a/verarbeitung/import_from_json.py +++ b/verarbeitung/import_from_json.py @@ -1,37 +1,108 @@ -#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Functions to read old json files to recreate old grpah structure + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + import json +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from input.interface import InputInterface as Input +#import input + class Publication: + #def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): def __init__(self, doi_url, title, contributors, journal, publication_date, group): self.doi_url = doi_url self.title = title self.contributors = contributors self.journal = journal self.publication_date = publication_date + self.references = [] + self.citations = [] self.group = group + +class Citation: + def __init__(self,doi_url, title, contributors, journal, publication_date): + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + +class Reference: + def __init__(self,doi_url, title, contributors, journal, publication_date): + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + + +def create_pubs_from_json(input_dict): + ''' + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + ''' + + #iterates over the list of nodes + for node in input_dict["nodes"]: + #creates for the nodes the objects class Publication + + pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["group"]) + #appends the objects to a list + list_of_nodes_py.append(pub) + +def add_ref_and_cit_to_pubs(input_dict): + ''' + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + ''' + + # iterates over the list of edges + for edge in input_dict["links"]: #iterates over the list of edges + for node in list_of_nodes_py: + if (node.doi_url == edge["source"]): + new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) + node.references.append(new_reference) + if (node.doi_url == edge["target"]): + new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) + node.citations.append(new_citation) + list_of_edges_py.append([edge["source"],edge["target"]]) + + def input_from_json(json_file): ''' :param json_file: Json-Datei for the old graph :type json_file: Json File ''' + + global list_of_nodes_py, list_of_edges_py + list_of_nodes_py = [] + list_of_edges_py = [] with open(json_file,'r') as file: #opens the json file with reading permission - python_dict2 = json.load(file) #saves the information in a dictionary + input_dict = json.load(file) #saves the information in a dictionary + + create_pubs_from_json(input_dict) - list_of_nodes_with_all_info = python_dict2["nodes"] - list_of_edges_in_json_format = python_dict2["links"] - list_of_node_objects = [] - list_of_edges = [] - for node in list_of_nodes_with_all_info: #iterates over the list of nodes - pub = Publication(node["doi"],node["name"],node["author"],node["journal"],node["year"], node["group"]) #creates for the nodes the objects class Publication - list_of_node_objects.append(pub) #appends the objects to a list + add_ref_and_cit_to_pubs(input_dict) + -# Es fehlt für Jedes Objekt noch die Liste der References und Citations -# Iteriert über die Liste der Kanten_dictionaries und speichert sie als Liste - for edge in list_of_edges_in_json_format: #iterates over the list of edges - new_list = [edge["source"],edge["target"]] #converts the edges to other representation - list_of_edges.append(new_list) #appends the edges to a list + # Iteriert über die Liste der Kanten_dictionaries speichert sie als Liste + + - return(list_of_node_objects, list_of_edges) + return(list_of_nodes_py, list_of_edges_py) diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index bbfce9f..2c9f68d 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -18,6 +18,7 @@ def format_nodes(V): new_dict["journal"] = node.journal new_dict["doi"] = node.doi_url new_dict["group"] = node.group + new_dict["citation count"] = len(node.citations) list_of_node_dicts.append(new_dict) return list_of_node_dicts diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 1a89557..ad30e34 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input"}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth"}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth"}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth"}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth"}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth"}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth"}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth"}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth"}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth"}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth"}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth"}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth"}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth"}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth"}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth"}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth"}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth"}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth"}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth"}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth"}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}]} \ No newline at end of file +{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citation count": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citation count": 1}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citation count": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citation count": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citation count": 12}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "group": "height", "citation count": 0}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "group": "height", "citation count": 0}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}]} \ No newline at end of file diff --git a/verarbeitung/print_graph_test.py b/verarbeitung/print_graph_test.py new file mode 100644 index 0000000..274e81e --- /dev/null +++ b/verarbeitung/print_graph_test.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" +Functions to test and print the nodes and edges sets + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +sys.path.append(".") +from input.interface import InputInterface as Input +#import input +from Processing import process_main + +# a function to print nodes and edges from a graph +def print_graph(nodes, edges): + print("Knoten:\n") + for node in nodes: + print(node.title, "\n") + print("\nKanten:\n") + for edge in edges: + print(edge,"\n") + print(len(nodes)) + print(len(edges)) + + +# program test, because there is no connection to UI yet. +def try_known_publications(): + doi_list = [] + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') + #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') + + #arr.append('https://doi.org/10.1021/ci700007b') + #doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292') + + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675') + #url = sys.argv[1] + #arr.append[url] + + + nodes, edges = process_main(doi_list,1,1) + + print_graph(nodes, edges) + +try_known_publications() \ No newline at end of file diff --git a/verarbeitung/read_json.py b/verarbeitung/read_json.py index ee7b206..7300d8f 100644 --- a/verarbeitung/read_json.py +++ b/verarbeitung/read_json.py @@ -88,10 +88,10 @@ def read_json(): print(pub.journal) print(pub.group) print(" ") - for edge in obj["edges"]: - for cit_doi in nodes: - if (edge[0] == cit_doi.doi_url): - cit_doi.references.append() + #for edge in obj["edges"]: + #for cit_doi in nodes: + #if (edge[0] == cit_doi.doi_url): + #cit_doi.references.append() global nodes, edges nodes = [] -- GitLab