From 396593b91d358c5a2640db392d002b220573a7c2 Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Sun, 14 Nov 2021 17:22:05 +0100 Subject: [PATCH] updated processing_test_doi_ueberarbeitet --- .../Processing_test_doi_ueberarbeitet.py | 37 +++++++++----- .../__pycache__/input_fj.cpython-38.pyc | Bin 0 -> 4341 bytes .../__pycache__/json_demo.cpython-38.pyc | Bin 0 -> 750 bytes {input => verarbeitung}/input_fj.py | 0 verarbeitung/json_demo.py | 46 ++++++++++-------- verarbeitung/json_text.txt | 1 + verarbeitung/json_text_json.txt | 0 7 files changed, 52 insertions(+), 32 deletions(-) create mode 100644 verarbeitung/__pycache__/input_fj.cpython-38.pyc create mode 100644 verarbeitung/__pycache__/json_demo.cpython-38.pyc rename {input => verarbeitung}/input_fj.py (100%) mode change 100755 => 100644 create mode 100644 verarbeitung/json_text.txt create mode 100644 verarbeitung/json_text_json.txt diff --git a/verarbeitung/Processing_test_doi_ueberarbeitet.py b/verarbeitung/Processing_test_doi_ueberarbeitet.py index ac6ce63..bfa533a 100644 --- a/verarbeitung/Processing_test_doi_ueberarbeitet.py +++ b/verarbeitung/Processing_test_doi_ueberarbeitet.py @@ -10,12 +10,13 @@ import requests as req import sys from pathlib import Path from input_fj import input +from json_demo import output_to_json -def process_main(array, depth): +def process_main(doi_input_array, depth): # ERROR-Handling doi_array = NULL - if (len(array) == 0): + if (len(doi_input_array) == 0): print("Error, no input data") # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird @@ -30,14 +31,21 @@ def process_main(array, depth): edges = [] # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt. - for pub in array: - if (pub not in nodes): + for pub_doi in doi_input_array: + pub = input(pub_doi) + not_in_nodes = True + for node in nodes: + if (pub.doi_url == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): nodes.append(pub) else: - array.remove(pub) + doi_input_array.remove(pub_doi) - process_rec_depth(array, 0, depth) + process_rec_depth(doi_input_array, 0, depth) + output_to_json(nodes,edges) return(nodes,edges) @@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max): # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. - if (citation.doi_url not in nodes): + not_in_nodes = True + for node in nodes: + if (citation.doi_url == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): if (depth <= depth_max): - nodes.append(citation.doi_url) + nodes.append(citation) edges.append([pub.doi_url,citation.doi_url]) # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation @@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max): # Programmtest, weil noch keine Verbindung zum Input besteht. arr = [] arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') -#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') -#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332') +arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332') #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') #arr.append('https://doi.org/10.1021/ci700007b') @@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') nodes,edges = process_main(arr,1) print("Knoten:\n") -for vortex in nodes: - print(vortex, "\n") +for node in nodes: + print(node.title, "\n") print("\nKanten:\n") for edge in edges: print(edge,"\n") \ No newline at end of file diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb7f56fc9742c5d19e2fff3d15c51dc4d59e1b1a GIT binary patch literal 4341 zcmWIL<>g{vU|?AJttZh-n1SIjh=Yuo85kHG7#J9eI~W)kQW&BbQW&EcQ<$QdQkbKd zQ&>`1b69d&qu3Z3Qdm;ha@eCdz%*wRX9`OSdk$AFcN8}xgF8bCM+#>PLkedzQxuOo zLkd?4cMC%bcQaEIZwgNeZwo^dUkYCee+xqte~LhgU<*T(K#EX`a0^3}V2VhJXbVG> zP>NWJcnd?6a0**6gQi5450_hMUUErheqOOcT7Hp2W?ou;QEnngK%pqLq$o4BEHOtR zKTRRAs3bEvC$(51ttdZNp(G<!!O=NbAuGSMC@(Qb!OPjxmy4^4-z_J<C^Ion!7D#G zBQ>wMiYG10Au%r{F*#o^B{hmGpeR436l~i|P~d1X-r{viO)M?ROe@U^&Mz(S(`3BG z5|CJukqnYX#!RsAlVD(ANM(p(Oks#(N@s{-PGL-8YGH_CNns9V&}6yA9Z;H-lbH<m zKr%a$2_QDe5NA*%XfQA^)G*dC#52?|)iA^})-cyF#52{f)G)*|*Ra+w#Iw|})iA`f zrZ5CEtYq}lWV^*$l39|IdP|@HVSIc_VoB;Pp5*+zlA_F{(vtk5;#=$~`I+&hMLD-P zic6ESQj<%HZ}H}p=Ef&ymVlj9tjT_hr8qSwt%!|*fuRWGs3Hy!ixWhEl28#hC>b#n z@iH(l{0eloiU}=FEh>)jP0T4tjd9N`iE+*h)(t4i&q~cMiAm1XE!Hi7aCFO3ixP{H zQZq|R^U`BbIGK3`r6uucS$YMPw>aYCGxIV_;^QSj0l@<bA!as47_1UO3O_xVQa?>r zkiNXc+|>B^TU_z+x%nxjIUqJqe0*VPVh&se?$08AkcU8?0Q&?&@PWiw7#J8tL3V=} z984@sMIZ+52p2($aA;&RLnE8R85*p~tRR0tF^CP1X&F2*4T{|&kZMg9aCF>ahbCu4 zlxVURfr1DuT?BG9LJHy~un)oh0(n>k<S$TsGC)JLN)YBmq`+h;5+AcznG5g@Hd z#v#lE`CS=gCdfY=j71=RG9%QVAPS@tl!(Bo`US|Z3|S1IP)cF$WvXSYVax&*D6Ak} z7Go_Fl+Om`GlThzDeNG=3qvelElUl{0;Uv>g^abVg*H%GPLOOZTRKB6dkxzH<`k}l zj4lkZBDEYf><d_GI2JIaa4%$B$jHc0m;u(olfnzu!I{ob%T>df1uEY7Kq4*-u>!T+ zHCzi=YPho?I_80OFsAT><ZF3qc(PcFPo)Ud^5$`X*jbFVyd`WkOf}3kTs5pU>@~d2 zY_)tfe8CKwf_}G{Q!>k{WT6F9d`4<wN@|gAPHI|-LP2Rx4w$u)@fKThPGWI!{4JK^ zg2cQk?u;C_oD|&<P;Rc`D=1CU1?6Jhvc#Oy)GARlvr_UiL3x=aqbN14N*qnHB(Wkt zFF&{P7GokfdE8<u&52^p$xo~jhS=c@DHt3}OEU6{qC}wDeL;n4a&dfeeraAw5hyM} zMM7F;Udk<&lGKWlTg>UHCAT<00`ZACIk(sn3kp*6Qi`NO2?dl>H2H6_6z7)~++u_H zIEoi)ay+<*0+%VbSaLG+vWqww7#MD`6qhCy7lDeFTg=6!Nw-*%^Ye<q$@ms?a(-SE zOI~SiGPu;a#avLDgp>|J$@-Q+dTL2L%mz^Hpan{gI-tbI$j2zaD8MMh$iv9T$im3S zC<LZKBp)dKGjT9+F)}eQ{byk+0_j4@<Dd!=l=r|X*NcIHp@boe5uS1xQdnvkYZw+V zrLclin?fy94buYV6t;zoDeT!yMKU#vU_Qq}MstQ*#zL7AmKw%pMn;Aj#$X0bPQM~C z1_p*I+2qU;-IV<Dyqx^R6y21f`~nas9#mqd=9LtQgVcy*l#~<{Tj}c;lqMDHB_<c^ z<rk$xLK{>>SBb+iutH*9ib7FpT53^hUUF)2krV?1gC=W{5-46|L4*Q`kOu`hYe7+F zUWq32Ew<E(g8ZVATg;_JIYpqt0c+@j90hVAsN!$}g)S)I8Ngx52M%Kn#v(li1_qRz z4+=j}asY>)3pnR5U`SzH$XFy*!>|Axk_(wqm?Rl$8B3T_m}?lt8Ja;Q0Mi2Ig$zX& zB`hf{pzzA$C}Bxq1+hS-p(dN(FGky!pj27Kp{JjgnUh+qUnT68nUkuJm{XLRm{O^b zT9H{?QVbPL$xkg-$jdK*h-$JHfkNUITTyXAPG(6F$c?wyGmGOv1{G<80*tvx7ZgGq zAQ4claf_uOu_OaMSU6xo;tdK8PzlVy$i*nY$iv9N$i-A-3JMI6;h<Cq!k`ipl-R+6 z@rr?gA)NtIxg#Yh=5&TymKx?PCQxe&RNS~Q#7fk%*03yKu3^n$S-_gYmcqV}30#n= zr8CsBLDh0V)e1n0zZ&)|7O<M2bcR}v8s-H|DV*RWDpJc?!?A$5hI0WMsE7osDFPRP z5Iecy=75So<{ECOIkQUGQ+P@^K*_h6$%P?StCpvRA&X%Fa|ve+OAU`CLk%04<N}i% zAhH?af?eP!<Mq46l%G>2nVeaYnxYF$+wsXsm9T^hPUl6Epv10@QxPnS>w*iBTa0?Q z7^>88tA<vTx}Ye8WEN0%sM5o&16qWD9SCbKL5e@FB4JR@0i~QGa2`?wapgeCguN)W zASW?7wMZVsV=YR|OHaMUoS9bwt_d}Hi!?z>K`kb5KD@=Al3J3OnNxg=4eX6#aKRkK z3w3Qg$hF0{SU^-12ehSD3{H@c@{k=AdPSK@w-__gQ=}lM7=*QM;z8wL2q<Y9fl?nM z8#u4BF*5z<ViIFyVT7<?Wg>zv#3;hZ#=!Q!$Ptu)LAea19fU#kJIEX01bhr!8m2JT zFk~^#W=LVGVaQ^d&5*)e!;r;1n<0f|E;}f{mN3*XWU-{Mrm$r)_AxRtlrW?q#95KV z*=iWF*qa$sII>xac9p<*oY_o81tknATp+XZgi07vxIyY>Go<j$Wd@1WFl2GKFvRNB zGL|r;@YXP9f!xAZ!<fZ6n<0h2hB1qEHbaU)4I|i1f^#`RdI~%8I7%2&gg|U?;uQ9) z5^{v}92CG&Vx^#<pim{uh1ASc0M)=&AVC*@Pu<WUAFvvbIt7I)ZZ581XiM7a7ISfG zK^2pZLKPnuv~mTvx~;(JsK@}6!Y#Q%GBS%5kecZVnZ*j=WT}u;3GTO~<>%z&muKds zBdfCFs?s4vYZX7T?qVyhDq${=lfexOxCfwuUXWfAR2V9V;scQHLE2G6po$l4CP)H= zt1Pg(5LEoaJ(!qRsgPfiky?c8JUvY=aOn%q4&Vj?xM=`x6o6BHku^LYf%77$rK`yV z&Rw@SV5uHGOYnhfx%h(8ByfJn1my=%HO0Wl!^rZVg^A@K7pnjx2crNJs21a55@O_H zWP!@CFthw+VPW~h$6f@gyHIL2P^|{S;Jg5;4@(#pFxD_EWXxi!Wh`N?VN79cW~yZ> zVX0wiW~^l{VXa}#VrynhVajGHvMXV)fs52IH8V3Z)POpM%zjne86~+ndIgC^#i>Q$ z1f$7ui@7MZ5L~ttfr}faq+)PG1t~Z{IaQMdTt-B3f=q}9HPXPP4LEqv1BDgb*J=WV z1*F>IVdP@s0Jn{b+(8wf$SwBx_>}x)NJlCW+&GMnzr`6JpPHMPnFA3nE=epYEsl@B z#hg@ZQUod-irhdYAV*PZVQFe{NpX=Yh|64DS$vBfRDtGXCV^{Pa9;{sI6^!EiVtvg z2`+NLVGVW$lCwc|XA!8yaf`zy7u>J216f!M3K14o7EqM2gX&!lMjj?UK0bC12@XyU JP7YBHNdSGl8^Hho literal 0 HcmV?d00001 diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..519d14366f17ebf12bfdc3f6ae2e985ad9d6d229 GIT binary patch literal 750 zcmWIL<>g{vU|_gqJTWnfnStRkh=Yuo7#J8F7#J9e6&M&8QW#Pga~PsPG*b>^E>jc} zBSQ*v3QG$^6mtq=FoP!ROOReaO~zZSnRx}JCCMP|AR!P2S;Wb}z~BrrZUqAaLkUAO zLoE}SWG-QBW~gPUVQ^uH)u?3!^Vn)wvzThwQW$L*YFM+FYuHklY#3^o7O>Q?Eo5Y5 zC={z<bYY0qsbvSN=BQy$VU}d5;YeYzVW?qGVFk0<Y#3^o7qHZDfOWXkFx9Z6u!F>E zSW-A_7)n@EI8(TqnfirlIZN0Uu-CBEa4uwGWC&+4We8*lVu)Z!VF+f><o3J8l9!m9 zdW$(FKeLDt6kcq_`K3k4sV^a%lEk9))RJ4QdHE@+#kW{<GV`*Ft9Y}D^Yh|MQY%XI zN-9cjF_vp`++xYeEH1gll9HKRa*MSjvm__=7JEv5W_)Q;&MmgYf`Zh%lv^zM1*v(r zSU|dOv80sd7HD$aVhp>*=z2>Cq&q%8Egob~Jjjq@gji}ydMZ?m8zco%mza}tiv^_k z7DrxcIau}<3rOKDE*L-F=oWi^X-QgUPU=dAB3T9ohF?L>RxzQ)sYS&xzKJ;{sWI-E zB{9yK!MXuO`B|yiB{9jFy2ZK$5RPtHYEfcQQfg*NX<m8^3J2tY_>|P#e7%CoTYUMY zB?YA=@g@23AaPbu8c+hI3Pv_C<X~iD<YMGv;$Z}_co;#DgOP)Yhp|YWfq_AjrAQDI r>>S`^6`z(>1WH+8DX^Q72syAio80`A(wtN~kg{SH1_lNWMjj>r?i$Hd literal 0 HcmV?d00001 diff --git a/input/input_fj.py b/verarbeitung/input_fj.py old mode 100755 new mode 100644 similarity index 100% rename from input/input_fj.py rename to verarbeitung/input_fj.py diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index e6cd618..77ce148 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -1,27 +1,33 @@ #!/usr/bin/env python3 import json +from input_fj import input def output_to_json(V,E): - list_of_node_dicts = list() - list_of_edge_dicts = list() - dict_of_all = dict() - for node in V: - new_dict = dict() - new_dict["doi"] = node - list_of_node_dicts.append(new_dict) - for edge in E: - new_dict_2 = dict() - new_dict_2["source"] = edge[0] - new_dict_2["target"] = edge[1] - list_of_edge_dicts.append(new_dict_2) - dict_of_all["nodes"] = list_of_node_dicts - dict_of_all["links"] = list_of_edge_dicts - return(dict_of_all) - with open('json_text_json','w') as outfile: - json_dump(dict_of_all, outfile) + list_of_node_dicts = list() + list_of_edge_dicts = list() + dict_of_all = dict() + for node in V: + new_dict = dict() + new_dict["name"] = node.title + new_dict["author"] = node.contributors + new_dict["year"] = node.publication_date + new_dict["doi"] = node.doi_url + + + list_of_node_dicts.append(new_dict) + for edge in E: + new_dict_2 = dict() + new_dict_2["source"] = edge[0] + new_dict_2["target"] = edge[1] + list_of_edge_dicts.append(new_dict_2) + dict_of_all["nodes"] = list_of_node_dicts + dict_of_all["links"] = list_of_edge_dicts + #return(dict_of_all) + with open('json_text.txt','w') as outfile: + json.dump(dict_of_all, outfile) -knoten = ["doi1", "doi2", "doi3"] -kanten = [[1,2],[3,4],[5,6]] -output_to_json(knoten,kanten) +#knoten = ["doi1", "doi2", "doi3"] +#kanten = [[1,2],[3,4],[5,6]] +#output_to_json(knoten,kanten) diff --git a/verarbeitung/json_text.txt b/verarbeitung/json_text.txt new file mode 100644 index 0000000..b5e7fa9 --- /dev/null +++ b/verarbeitung/json_text.txt @@ -0,0 +1 @@ +{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]} \ No newline at end of file diff --git a/verarbeitung/json_text_json.txt b/verarbeitung/json_text_json.txt new file mode 100644 index 0000000..e69de29 -- GitLab