From 396593b91d358c5a2640db392d002b220573a7c2 Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Sun, 14 Nov 2021 17:22:05 +0100 Subject: [PATCH] updated processing_test_doi_ueberarbeitet --- .../Processing_test_doi_ueberarbeitet.py | 37 +++++++++----- .../__pycache__/input_fj.cpython-38.pyc | Bin 0 -> 4341 bytes .../__pycache__/json_demo.cpython-38.pyc | Bin 0 -> 750 bytes {input => verarbeitung}/input_fj.py | 0 verarbeitung/json_demo.py | 46 ++++++++++-------- verarbeitung/json_text.txt | 1 + verarbeitung/json_text_json.txt | 0 7 files changed, 52 insertions(+), 32 deletions(-) create mode 100644 verarbeitung/__pycache__/input_fj.cpython-38.pyc create mode 100644 verarbeitung/__pycache__/json_demo.cpython-38.pyc rename {input => verarbeitung}/input_fj.py (100%) mode change 100755 => 100644 create mode 100644 verarbeitung/json_text.txt create mode 100644 verarbeitung/json_text_json.txt diff --git a/verarbeitung/Processing_test_doi_ueberarbeitet.py b/verarbeitung/Processing_test_doi_ueberarbeitet.py index ac6ce63..bfa533a 100644 --- a/verarbeitung/Processing_test_doi_ueberarbeitet.py +++ b/verarbeitung/Processing_test_doi_ueberarbeitet.py @@ -10,12 +10,13 @@ import requests as req import sys from pathlib import Path from input_fj import input +from json_demo import output_to_json -def process_main(array, depth): +def process_main(doi_input_array, depth): # ERROR-Handling doi_array = NULL - if (len(array) == 0): + if (len(doi_input_array) == 0): print("Error, no input data") # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird @@ -30,14 +31,21 @@ def process_main(array, depth): edges = [] # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt. - for pub in array: - if (pub not in nodes): + for pub_doi in doi_input_array: + pub = input(pub_doi) + not_in_nodes = True + for node in nodes: + if (pub.doi_url == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): nodes.append(pub) else: - array.remove(pub) + doi_input_array.remove(pub_doi) - process_rec_depth(array, 0, depth) + process_rec_depth(doi_input_array, 0, depth) + output_to_json(nodes,edges) return(nodes,edges) @@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max): # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. - if (citation.doi_url not in nodes): + not_in_nodes = True + for node in nodes: + if (citation.doi_url == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): if (depth <= depth_max): - nodes.append(citation.doi_url) + nodes.append(citation) edges.append([pub.doi_url,citation.doi_url]) # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation @@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max): # Programmtest, weil noch keine Verbindung zum Input besteht. arr = [] arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') -#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') -#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332') +arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332') #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') #arr.append('https://doi.org/10.1021/ci700007b') @@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') nodes,edges = process_main(arr,1) print("Knoten:\n") -for vortex in nodes: - print(vortex, "\n") +for node in nodes: + print(node.title, "\n") print("\nKanten:\n") for edge in edges: print(edge,"\n") \ No newline at end of file diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb7f56fc9742c5d19e2fff3d15c51dc4d59e1b1a GIT binary patch literal 4341 zcmb7HOLN=E5yp!k2tM@mLmN9`9<>QKCY?$pm5s95vR95>>#P-LWnWlIp&$+@frbET zX6Ut0;3e9_9+Lckq@1#j{tNp9_K=@|YcARRfE>1xIA0Gyic+l(0gFA|gPy@mPxse7 zPn%6w!}I!IfAAl!Y1-dOSbR(fckmaVfiSJFF}<%dqi-;?Z!(KjBI~fyw{;D?9aZ~| zYHR%(+G^w;*86o`d!R9g)jri&?ZoIeAi1m#sSc^h8m#%L)^D*ETSB|Ume~s0Wwy%J z(5|p`b_MM!yUMPiU1Ro+*14X3<ldje0~sc<Xb%(K4r9<mpE}w+kUR{Y`%ya?wtX(c zK@>oXCx>l$5VY@Z?X*Y9gvU^Lxb@*9*G-r1M+p!8xcx9090ajQ8^h5ZKW6?Q>9L^i zKIRFVsIPqqGiV(>Z+;N?6B!OC(M~cMZ==V0?8}1zy<U6_2zT%o*Fgdew$u8?z6RUD zirA0EOzg+X>yIaUQ8-W=7*wG<dx)n=w(u7}0Lk=B^R&zW)iV>+$ShDZtAJXW4O(H^ zj`mvL?$~)nhB6BB<?-3{9`j|8HwH-zGwn@e!bM(XN$5>@lsjUwHwp$)<jr_;=nX=t zmLxjW+!8@F<TjSTX;IuEQX@hx#OolhHQq$?)o-^pcb|%Yi`_?lB!k@tq1@dHce=3S zD0n7!2Vqxq$A!>+9&n%U1)-e8`@83aFdk2&HyriG$GPKqVH`@&+rS4z14J`zefCS& z7wp%Yscv^FG%ofJ1JBD{&pX61M#MKf?`Yyj^EdN-@+C~HHnBij)MtTQ!Cw&L7?z=W zE<f;fsLc+%JZ!T(Y-g)9>!1Rq*+cyNq;EjD?4;>*Icaw+bsTxMbmy}p=~OtGs5mWJ z_1UY7`=Skh4}U@Dsg)K@S7u9GFs8+qp}|*(tP;7@R<!r;;xA}Fj?PIQ=peI)Mv@aL zul@&^OdA1O7+~d(##GOANB|d-M*7q!WxJG3CF`t8@;&W$t*Mn+uMFm#>eI^6d*$1j zn#<nTrq#@TWit0vhwl2+$*Qlc%z=LWR6o^q?dU*_01&AWHH>hx+KAw-ReoW4TF+dJ zs*j2he=WbUq((Hd#;79x#+Ike*dZQkZb}<5YGy9tvYItd>}e}&A=Fp4bCZS7)0-tQ zc?W^d0^W^+p=^&QQKY!ndTtLQUkDFjH}>PSeh}S{SoaA<bJ{{QcPWay0J%w!Ub!$A zf|+93I^e-Dy>{WH^j{=#a(JBUe&IdFB<h<{;-_oHH?|63xI2*t3Gc6$!ygf-4um&I zCcq6kOM--97_;1x!3&w2`+>|I>hSz1%58r<4r0c?0ZkPtoh57qSee_!j{D8o!W2X) zOv$Y%jGrk`f%0TeC}_z|u(<`m63UHp6KvlCJ`EJ=<R<KK!L9RUEXSLDfwIFpSctd< zqUkMt8NXG%0nWmIRkcK0<o||axVoVkZxG`&>RgPQ=%CnBp8F7t)JFQ;bK$k{UJbrg zQQp>u?;7wu`xM^#%-}Z=(<H-Ff2K|KqZ`u7^b`1Cro%UC+x#kKOm6~?x-9u5#;LF_ z<H?u=m_C*PjDL-E)(@l{i_P1&v3Jq)2cnno{leM=(do5W0Jo9HkP?Rh58^=}_*-}j zc=!$)ev?R>$hSc9%9w|-?3lS7ycj1OX9_ps1P)(o-PsB$aeM$JK{!d)C7Y6s5&1pz zoQr(2A0eo+-#sv}79#&t=Wl^imOM3>u>tEyph2cz(@qEhV6{_?zbh?fk-cIET!lD7 z!;ZcEmHx{w$tzQ*cY7Ej;onZz?xSt{5l4g_w}Tg<kfQ8l2`Yj(kwq`eL6*pEF2+$P zIju3ThQgzX_zxj<Ox~qV_;(ag*2t|f()~rVI5U&{8p=hI(p?y$0aLgJ|0#M0H2LhI zWJwuR8RMT|_F)c6?h7tu0*<WA92qk}1c+UqRseWN`6KI9h1slnYA7Ik8@hI>>&&4` z!CzJ#SxV<)=s1}P7p;|mSOXZES?!fg5UF&yg23XN_4zymfhHiZnCH*3${Nxk_dOvH zerMVs5PxOL8VZuehK5pvsH-SoaYH*PMt-i2tht>VNtABD*8|oqecK!C9nTzB`8lU_ zpML*RrCEq~i#kW|<y!jo<=SPU?9vey!9wAX_AVb$LWo-6tXwMK&*dnX7h~w#Bz_Yl zukv6V`GcSgEAGepL2e?pR3_{+d52V~Vp4I4@+**j7>V3g+Ykzv`^|E-s(=x>MT<s( zT~um9Ib?x{Rk9uym+Rq0k6a-b#O_oThYD(#Ji%ARk3lHcQ>@y$@y0c-&bb*T&fX9{ zuIsjDzu|W=>RdEaey43H2mS(#L@dJ%^uK5f73#=9iyC!gqP1RDDPASzddlr~sQ*dH z?5w}C(2q)Xgg4B2X7SIFO$(_86dmI$u0nxTaCLfv`5P}y>VxFK9ecnJjWv;{X)P_Z zHMC1;D`=O|(wbIY)@aPpXTTGUtrD-Cb8S0ay<6NI+UiI)+j!D7ccIK|QwH9o&U?T4 zu>17mkCYDSL9_0<J7p!kiTo9eQ{z@UZMj<uRrjXyBmN_pE&aLs<RBC%0Oq%VHbmu? z?Y(1lZy6?0lzbA#`-@teZhGtg8JaFFjusfc=F)<nP+>9OK-u}QxFwa{W#_pa(C%sY zc?+aX)k!btN#Fh26%zc;H|WR5?F6?ozPP+z$5rTCl(Fj4adzKO*N<Poc10YiSfom~ zW2orLotf8P42hP?avn^r;vvKsL1{_@Ok2qL)_>g!<*sFevY2bE0>Z8Gjb&Q@u`KJ~ zttzM7buJbttIabT<wJ>lmT9N@2o<eFCSw4~sez1dAfuVG0-(0x-v<2qeOb-sJ$P@z zkLhA)Zlm&(hf#0rbKIv2;Bj)32S*B93b-*)C{dxp&&)xQit7O-t$vMW@Tf>Dvc3Wz zN<D>TC~M~xb=Uem*fMQ3uyY6dU)A{o^mf+ss^_s}P+X*N_eF6CzUFzsA)u@17QmT_ z0B`PzpKt;PexGJ{u--|4^vi!mqA8AX<)j1}g?lR7stQqoW3drB50x$z<S1LKWh|Vf p+JIWS^ImbUd!Ifi2!O2$4otVIly@DyVYFJUs&n0`;djN^_#b_}8^Hho literal 0 HcmV?d00001 diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..519d14366f17ebf12bfdc3f6ae2e985ad9d6d229 GIT binary patch literal 750 zcmY*XO>Yx15FLBHAK4~I^nk>H4{;(A=>gOWiV#wYI8i~X0*V%`me`3lb#@oqOAFeZ z${q0!+8g{OUpe(JaANE#qS*4AdE+-8k2FVvK?=C+lOM%20Ql|5^+l09$D{8faNtD3 z)LAOYicTp39&q*zrU561FbpqI1^Yu{!?J2xGjr*i<qGxiOz`NR2nNnzO+5yNpwBwk zgAWhap=VJSE@>BWG6wVl^eB4toORI!A@J!|M^OD}9eZ`s#XPtTUBVfvVjg-wLO;Na z#Aobvv|~JWF|^4TOvrmYJ)_^Z*F6)R#~tf>7ueK0*o8N60PnF?Y-?-ZvZ`1K8}PbR z1ix-0UAJl`F0Y6w)It~=Ry7yehO(@V_1D2t*Hv!Bso627#*&j^Vwo(pv5c29RGZRB zVPkwVZ<VxB(KMpsmepu1bJ?hDSIscB<gFzy?6%9!>v`_#<n9suFDCe66WelEPFF~Y zKS!}mDskd7EOSDeZg~FK#&v7vr4+wF-ND}e988{OM_MSIy(**;*~`*olk#x%TGdN& zY_eH7(xc{zj6MrhsE?vFZMDe$BffnuR`pKv#ooXgI1st1a~BWY3_L&p68W7VrX;0( x;=I1!2}zKvd#D;RbrX$E{8;7lrE-(yul%2{!CkK#uWH^(@yy9ILr6%U{srzD$y5LU literal 0 HcmV?d00001 diff --git a/input/input_fj.py b/verarbeitung/input_fj.py old mode 100755 new mode 100644 similarity index 100% rename from input/input_fj.py rename to verarbeitung/input_fj.py diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index e6cd618..77ce148 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -1,27 +1,33 @@ #!/usr/bin/env python3 import json +from input_fj import input def output_to_json(V,E): - list_of_node_dicts = list() - list_of_edge_dicts = list() - dict_of_all = dict() - for node in V: - new_dict = dict() - new_dict["doi"] = node - list_of_node_dicts.append(new_dict) - for edge in E: - new_dict_2 = dict() - new_dict_2["source"] = edge[0] - new_dict_2["target"] = edge[1] - list_of_edge_dicts.append(new_dict_2) - dict_of_all["nodes"] = list_of_node_dicts - dict_of_all["links"] = list_of_edge_dicts - return(dict_of_all) - with open('json_text_json','w') as outfile: - json_dump(dict_of_all, outfile) + list_of_node_dicts = list() + list_of_edge_dicts = list() + dict_of_all = dict() + for node in V: + new_dict = dict() + new_dict["name"] = node.title + new_dict["author"] = node.contributors + new_dict["year"] = node.publication_date + new_dict["doi"] = node.doi_url + + + list_of_node_dicts.append(new_dict) + for edge in E: + new_dict_2 = dict() + new_dict_2["source"] = edge[0] + new_dict_2["target"] = edge[1] + list_of_edge_dicts.append(new_dict_2) + dict_of_all["nodes"] = list_of_node_dicts + dict_of_all["links"] = list_of_edge_dicts + #return(dict_of_all) + with open('json_text.txt','w') as outfile: + json.dump(dict_of_all, outfile) -knoten = ["doi1", "doi2", "doi3"] -kanten = [[1,2],[3,4],[5,6]] -output_to_json(knoten,kanten) +#knoten = ["doi1", "doi2", "doi3"] +#kanten = [[1,2],[3,4],[5,6]] +#output_to_json(knoten,kanten) diff --git a/verarbeitung/json_text.txt b/verarbeitung/json_text.txt new file mode 100644 index 0000000..b5e7fa9 --- /dev/null +++ b/verarbeitung/json_text.txt @@ -0,0 +1 @@ +{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]} \ No newline at end of file diff --git a/verarbeitung/json_text_json.txt b/verarbeitung/json_text_json.txt new file mode 100644 index 0000000..e69de29 -- GitLab