From a07147ed5c6832ffaf6ad57ff03470d3c496a734 Mon Sep 17 00:00:00 2001 From: Florian Jochens <fj@andaco.de> Date: Mon, 22 Nov 2021 09:37:41 +0100 Subject: [PATCH] added updated input files --- example_input.py | 14 ++ input/README.md | 26 ++- input/__init__.py | 6 + input/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 290 bytes input/__pycache__/interface.cpython-39.pyc | Bin 0 -> 2292 bytes input/__pycache__/publication.cpython-39.pyc | Bin 0 -> 5659 bytes input/get/__init__.py | 7 + input/get/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 350 bytes input/get/__pycache__/acs.cpython-39.pyc | Bin 0 -> 3164 bytes .../journal_fetcher.cpython-39.pyc | Bin 0 -> 2224 bytes input/get/__pycache__/nature.cpython-39.pyc | Bin 0 -> 1757 bytes .../publication_interface.cpython-39.pyc | Bin 0 -> 2041 bytes input/get/acs.py | 105 ++++++++++ input/get/acs_fj.py | 100 ++++++++++ input/get/journal_fetcher.py | 60 ++++++ input/get/nature.py | 63 ++++++ input/get/template_.py | 52 +++++ input/interface.py | 77 ++++++++ input/publication.py | 183 ++++++++++++++++++ input/requirements.txt | 2 + input/tempdir/input_fj.py | 144 ++++++++++++++ input/{ => tempdir}/pub.py | 0 input/tempdir/test.py | 15 ++ input/test_doi.txt | 4 + input/test_input_get_publication.py | 28 +++ input_old/README.md | 3 + .../__pycache__/input_fj.cpython-39.pyc | Bin {input => input_old}/example_urls | 0 input_old/input_fj.py | 154 +++++++++++++++ input_old/pub.py | 32 +++ {input => input_old}/test.py | 0 {input => input_old}/x | 0 32 files changed, 1074 insertions(+), 1 deletion(-) create mode 100755 example_input.py create mode 100644 input/__init__.py create mode 100644 input/__pycache__/__init__.cpython-39.pyc create mode 100644 input/__pycache__/interface.cpython-39.pyc create mode 100644 input/__pycache__/publication.cpython-39.pyc create mode 100755 input/get/__init__.py create mode 100644 input/get/__pycache__/__init__.cpython-39.pyc create mode 100644 input/get/__pycache__/acs.cpython-39.pyc create mode 100644 input/get/__pycache__/journal_fetcher.cpython-39.pyc create mode 100644 input/get/__pycache__/nature.cpython-39.pyc create mode 100644 input/get/__pycache__/publication_interface.cpython-39.pyc create mode 100755 input/get/acs.py create mode 100755 input/get/acs_fj.py create mode 100755 input/get/journal_fetcher.py create mode 100755 input/get/nature.py create mode 100755 input/get/template_.py create mode 100755 input/interface.py create mode 100755 input/publication.py create mode 100644 input/requirements.txt create mode 100755 input/tempdir/input_fj.py rename input/{ => tempdir}/pub.py (100%) create mode 100755 input/tempdir/test.py create mode 100644 input/test_doi.txt create mode 100755 input/test_input_get_publication.py create mode 100644 input_old/README.md rename {input => input_old}/__pycache__/input_fj.cpython-39.pyc (100%) rename {input => input_old}/example_urls (100%) create mode 100755 input_old/input_fj.py create mode 100644 input_old/pub.py rename {input => input_old}/test.py (100%) rename {input => input_old}/x (100%) diff --git a/example_input.py b/example_input.py new file mode 100755 index 0000000..a8331cb --- /dev/null +++ b/example_input.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +from input.interface import InputInterface +import input.publication + +def main(url: str): + #print(get_publication(url)) + print(InputInterface.get_publication(url)) + #pub.print_pub() + +if __name__ == "__main__": + #main("https://doi.org/10.1021/acs.jcim.1c00203") + #main("https://doi.org/10.1021/acs.jcim.1c00917") + main("https://doi.org/10.1021/acs.jcim.5b00332") diff --git a/input/README.md b/input/README.md index 76bd11d..0ebd7e1 100644 --- a/input/README.md +++ b/input/README.md @@ -1,3 +1,27 @@ # Projekt CiS-Projekt 2021/22 -Input-Skripts +Input-Package to fetch publication information with a given url. + +## Usage/Examples + +```python +from input.interface import get_publication +from input.publication import Publication + +def main(url): + try: + pub = get_publication(url) + except Exception as error: + raise error + + print(pub) + pub.title = "Cool new Title" + print(pub) + +if __name__=="__main__": + main("https://doi.org/10.1021/acs.chemrev.8b00728") +``` +## Authors +- Florian Jochens +- Sam Ockenden +- Julius Schenk \ No newline at end of file diff --git a/input/__init__.py b/input/__init__.py new file mode 100644 index 0000000..428d906 --- /dev/null +++ b/input/__init__.py @@ -0,0 +1,6 @@ +""" +init.py for Input-Package. +""" + +from input.publication import Publication +from input.interface import InputInterface diff --git a/input/__pycache__/__init__.cpython-39.pyc b/input/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c120e069d340aff9e417dc18dfa02e8284e91486 GIT binary patch literal 290 zcmYe~<>g{vU|=}ZIW4h@fq~&Mh=Yuo7#J8F7#J9e6&M&8QW&BbQW#U1au{=&qL>&# zY~~#1T$U&nMurrYU<OUrDp{_~yv!24f=Y$7{2~R<yn@ma-GId8?8Nj`Jq50pAfq)I zZ*d2dCgo%%CzfR9=V>zD;sdMm%qvMPN=r;m_0wd%#U3A@lAjzOe@ieEWQJY=LPHVA zO}F?V;+Zf_Ma&Eg3@aIm*clig#4kPljQreG{j@CoqSS)?Vtud%-GZY0tkmoh2v0vg i9^~Bkc)fzkTO2mI`6;D2sdkJY|FSSJFtD)jFaiLI>`t8k literal 0 HcmV?d00001 diff --git a/input/__pycache__/interface.cpython-39.pyc b/input/__pycache__/interface.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..845c6597fc1a11f6533754364429af32ba2ae9aa GIT binary patch literal 2292 zcmYe~<>g{vU|=}ZIW2J~7X!m%5C<7EGcYhXFfcF_yD=~@q%cG=q%fv1<uK+lMKLjg z*vvW1QOqffDa<)6Q7mAZHHtNbF@+_EEtfrtosq$vA%!)Ct%V_lt(hr`BZWDbL6f~I zipw*vB(*3lF*#KsEx$;iBqLS9Gq0euL^mKYIXf{uRUtnwr&6ILBePhcI3vF_Cq*GC zRUt7sIkmVrHANw<C_h&rzqF({GbI(G2&zqwi|ZxG^_q;gSjrP~vi&p}Z*d2dCgo%% zCzfR9=Ou$AkueL*dln1~45<uJj42FJOeu^}%qdJ!Ea?nUtf_3N?5P|n%yXDhSW;M9 zSehB5I8)hD*izVA7@8TQxKfyc88kU=@qt|r_f(Y=mx6+VLU3kYdQNIdex5>kMrLw` zLP}*`Vs2(~Vopw_LS}A3eo;xWLSkNuLT+MSVtQ(^LRxA`az<)VF<7&oCetnE(xRMO zY(=RhrA2wkj7Sb=VqjnZx!oBQH6aWP3?&R%jOh&7Ohq~+3=5bRGB6^GFsCpsU@2k9 zVqM7C%-F;TQtQGHD^tr@!?=L0hG8LNkx>of0(KCKiGh(Jn4!=J>@Vi3S74`r{1TFp zs*nkbG~L|Pl8pQmg_3-Q^wbiCL<OWU*3HZ-E=kNwPUV8@4M;3X%uOwU`3z*7E*!vB zSrvekDU=rFSb+$T;V1?p6qZyLq=J-KDHN9!A%r2}WQFV)1(;(L@{1Jw^7B#^GSd|D z@=FwoOAA2ZpPB;KUZv@muTWf&nw*)InXKRjjS7YGjMO4fA}!5JQAjMYQc$n1Rrk~6 zC<2B4Eq-W>#V6+^78j=$-x5wwEr~}L<Z~=3DauSLElG7PD#|aq#g>*|l$%&`ivy&% zAh9GPiaR+mFTS)mH69e-{9s{3qSa(7Vr5`pxW$WN?n;ItJ_ZJcU;6qP`MIh3X<7P3 zsRjAP`kCNtR8W+km6}}w;pxNUO|PJ`h=+lJK@^m1L_q0@hl`Pek&BUyk>x)d3l}37 zqX2W29BN|LLvs&GCISTmHv<C$I3Y?gGBBhtE?_8OT)<SqypS=AWdZ9#hJ}nJZ1L>r z47Cg;93`A3Y+3Bhj44bhOz8}1%*{+G%#sW)46$;xj3rzPxKmha7#A|7FqJYEnbj~X z;HhC;$jHc0Xja3J&XCPqR8+#7!n%MVg>3;JD6xW)Gb2L{NdLTA#;Uv${sjUl><bub zK<0C_GD$KlWL(Ht%Ty&+!j>htfTxCOflvx3NQ{vooFUJOg`q^a@(CkDEi)rSl~M_B z3Ky#0W=4>@NQM-qTIM?DJT3@rz+k`-&QMqe(mjE(QizcO1WQCxxY2btF-9^JmKiV< zmVvVqPt{6zY70(HEK1HuEmp|PQ&3M&Em7A2rOXm=VoNMaRfta1t%}vsE2vb+%}*)K zNi9~;z(|{#aMM7ANJ&v<YB4C&KuQ)+nVPEyR}q|81}b3kLCF|qBFJi3no7&eNi9}K zsIOwHuGLRZE%AE^Dr%}Y!S2!4E2ylJb^{xdmY<W8U!IwluHXxCpp`;(Z55M_LKP1< z34&6MUUluuzyJULk79>rrz&lTVsI`}NXbtv24}{M#IjU{L<M!IDs^3OcB+y`R-Bxe z2UeSu3N1QQQ#3`3K$X`mc2FM5$xOP%5|CJuaf>599^{+&_*-lRiAAY-B~k1}sm1v@ zWvSpyTLen3w^;I0D@uw$e!Im~lzNLbH?br+;}%<DK|yL>$}QG{qRhOKTP#`mnR&N3 zApr`qfEU7yhlF4eJ1AQU#K)(nmL$TmczpaVPS=X$)B;d_bBo&(T+D;>y(UYP5OQ$G z=OyN*7T;n4Ikt!wWE5*EsC)qDYY4&5z`#(%!N9;^0LrimpbX0?!OX%a$0)$a!^FWT z!py<Q#3;eY#3;td$7sOF$5^F=CG)~;1qW+tu_iYtj6k->$KT?Lhx#Ty{uWPsd|_!~ z4pfFcK0YNsIX)g7U=Y9Z6qh8HWF|vO|69z(B}GM`1_HQD0lO38Y*4`jVSzmWDkDJt zD+Y;kFtV_)FtRW)F$zgBF@j@6le0(-<b9_6VsMNU$$-2q1R}&i;)0L_Re&fGA-00d ig;)bJ7{UTsc#FdZ5<zw##}$JLJQfxnMh-CKVFCcf_jcI; literal 0 HcmV?d00001 diff --git a/input/__pycache__/publication.cpython-39.pyc b/input/__pycache__/publication.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecb94eef74276d09e4ce203ffd67d6033002ae8a GIT binary patch literal 5659 zcmYe~<>g{vU|=}ZIW2LvBm=`^5C<7EGcYhXFfcF_>oG7exHF_Mq%gKHq%bx!MS*xs zDa<VlDa_4GQH<^kDJ&_hEet8F%}h~DDQv+En(WEQCV|-8Fw?3S7#LC+q8L*cqL@+` zqnJ|!Q(54UHI+q@A(btaHI-eG0YauQ&*21{&l<&%%96r1hcSgcg`<TfiZhifg)@b# zg`t@-iaUh~EW@3`)4~$PlgbO`@uu*#utf2t@TUm0FhudEFa<Mc3f<xkC{4=AOinDx z%+IS*=Td-zpwxn*)Z)~<l46BK1tcZKU=@CvoVVCh@-yR0i*jzUmSmRXq~795&d)0; z%1kOP$uBCt#h#U4T9lWVb4#EAVSao{VoB;Pj^fg!tkmR^;#<6VrMdCRnI&M$i*Ioi zrKY78rRF847T@B83nzo)1r%ru3=AMP69WT-Gbkns7#J987;6~f8ETkn7~&agm}?l~ znQB;S7~+|0SZf&KS!&p77~)xLI8qo(8H!BO8EP2f+49(GI6!m_Cs@=AEXtn8R>N7t z5YJx24i-%-;izHAVs8eS&Y8zn!(PJ>&zZs)%%I7XxQmH_0UQdTpbAJVO3Y0yNiBi| z3`j^94&bV+3P8#fpdn@jV<_aODU@WSDj)?BLQ6?yK`Kn4l|pez5kd-L8aUXk6u=Z= zlfcTbnu8o~Rymo)B_P8Rb8-}rB@mW_-GwY=r2x_t9g9!`aUa-M#Tlt7Rtlha2bo%w znv<GXoQhBhHVjE27H`5*ixoJipgfRappZjKnP`SW6&6EXS6otr;Xp)kgP83MO^FKS z8JWo$3Yo<UNvWxM3UFm93Q3g;B^jB;$QeQb&0e@_n9YfKl^8a|6E{|i5k_FxhGrR( zIp8=zwW}z#q_il{O2IEb4-w>knp{Po98?4<(27_X7#NCJK?EC!U<VPLAOe)pZZYSi z<`r=;FfeFx-C`+D%}FDm6Qr~Vu6-p#5ibJ+!!JYqjQreG{j@CoqSS)?V*Sj#g3=P* zf};Ga)a()nPam8FlELMGUP0w8j`;Y@yv&mLc#wmN<v^KVoQ)9yIT$&Z*cf3XA5)bG zQbN#!X<o^AiwzP6$&4T)pcusFgOwpZ3=9k@j0+fQ7_t~^7_yjBm=-YCFl4bT1o2pF z7_!))BJ2wpQ<y<=3qU0U=R!slJ{O3;kRzBulf|#9D+E*^T7k*}5U#G}a>>usEiKA{ z3RTx~IhK}W<QEkq1iT={B|-q~AyDp7$j`G<sIKJ-hL)NTCDpZDLGaQNB*ax+%LPlo z5OYByznF9sZZUFca^GT0%P-1JEP+HjC;{AJ$;!{n!<GVW38$x)z{}<M;*z4wymX{^ zWh~+WB@s~Mf@8P{6q>i#<3TFp<BdQ?Gf0Gik%dEuk%N(oQGik4OO-I#N6<Kiszr$x zP=W*{MR3^;s%2{!(iyUuiuh6((;0&q(iv(QYZzP@Vufm%YCti@w19CT!$L+zhC;C# zaLMd<iv^U_HJNU)B^DH<=A{%#fYL5=5hyW7@xw9;$eEdW>9;sw0uWDty#sQ0kr)F5 zgCocnAa61-aWJwmu`qHl3NTj5Bl`hMASHtW7Mjc$K>h*cDzJab7#J8z7-|@s8B0KQ zkrXJcEnu!;1i3#i3@XD?%1{(n!|1{gYf#Hn!cfBmRqX*)%>+_is8zz6!nlA9BEOKa znX!qnP%BReF3Q-<2oVM80@qk<j7T+>BRFV4)jFui02QS0WTTK@qyXb6l;kUbvb-KF zl#2DZ;H3||)`N@Stn*;ntq>;X7b(EgF35iT1vz#DKyHtQxfQ8=h2(cDG^fLzS!Ewz zjFh$&QY#8llS_)>ZYu@_fe*9}gc)C~qmY+hVx^#7U8}Ciev1WEY2D(0$%0c?krV?1 z!z~t&t8Q_IR2HPV78T_e6^TI7A1ECaNi#4oM6rOf6?%FDB~NcqdIK54z{SJJ$IQdX z!N|kN$Hd0S_MeLxM6xkgi6W;ySg;hMq&ZOf17UEQ16f<cki}5Ln8lC{YV9#r87MgC z=N2Rur51y0)6%@m!curE0~{CS`N*|fu_hz9%-3WB=lNT#`6U^tMQ9$l#TFl*S{NUn z2J!>Qb_S*@A!N5gq){?ENExUR40h--aOOw>6}gPF8B&-)33oO_3aB7vp3RWLGMBxX zkr9-A7_wMWSX0<C8T-Ke6ofb{k~kZr>6XHg%~G_h1jggcW-2NuVMyTunUyD0!jQrZ zQa76+g=a1^NUVk-i^GK>R<D+^gdv5uhA|7|7QPzBEY8^sDf~5zS*)`eQUq!kvp{YV zoXZK)Q`nKmQNoZS1Y)N!1T$y~`&9`!7L{Zs=cGbfW>%moph}nvDLlZ<AuEL{VJ;Vc zPu<WUAFvvbIt7I)ZZ1Sge~Y;|wV+6dfq|ilkINaUd2RI)RLHyp)nrwcTp^$)2~rIT zYPlq5mZU=4E2#=;`8hfH<(YZu$SqAPt|}c;v{vyW>n^t9suBigE7ZykDu`O0K?PBK z0P;S_%cwz6B?vYXBmu)!7Fb=GmtUd)_he#Tr2;r*B0Et}lj|01K~ZKNw9*5W#o)wT z1Zq`)E52LIAX#v;1(XTFEg@)92j^Gxgw6>zF20~NDGQX=L6sB(BM&3Xe-<W|e_X5r zpjv{7kCBg&i%E!)i;)E?!@|t+mxYDp4<9>f9)_Ey$?WInrzrqx_v9t!rpCwL;);*Y z%}*)K0kL`F;|og@bD%Qp@$o77$?@^Gn2Sq_ia>3nA~jIq!<?8`S;P;jm_fDPEuLaf zw<<X|wIm}y1ze?p3#wZzN%{FXMT#JG>L3DK?I8$o1qe!4pxUcghJk^BgOP=mfsx@a z8zT!N7bDMK9&J8;CPpq!K50IdB9Kaq&Mhc-!ChrD<jyTeDl-_eq%un~q%x;4&S6af zb(>kESW}r(SmrQ-a&QYv6k7^=FoPy%ku0d}WQ9Z+$T8r^fy5f9=8gxI!pWdG1!V;g z2C+fK3b;s70`(0+y+TM25li1N1>9yx>_@H};q}@u>CJ(AW}sr7^zIzC4pVYw2{_oT z6oLy<lQYvYQ;QWc(~v83MEe1rkF7uv1<I;WtteeQSal6*x50JzX|f`ksfcJVG61Cr zHgH9REm?r&(Q+&}L$rhPDky(52*SD$Rf4eU8{S(e5@29pK&hiZi5pahf)f-duOoL9 zpdAKK9R;elSU~&~rUl?G!$L-ISAf~CDulDTRu{E|tght(3A*HGA{F~kLDXUsDu`Na zR@ZWYYXnVpaO(o%X>k4krF}?fiGngVxN9d4Vxi@3Q1LJi6eJ+OF)*^QaDaMJjC?<< z1dsxyxTGjP-cOSeoDWyB7ioY@(gYD&AVLR3fD0dZk%JWUU>m_j2`Cr0!7SroX7~r{ zwlV$V5eJ)#QFMUX|KOqn)Qf<Pi-C&`=2UROk-|8K1zc>fU@bN{ZgIk!mYOWsQb{t% z3Q%zY!XP#%*uXIiD(t`mNmz;uaNW-6r^$jXVQRACPLtq}02AQg00q`+P;h|i90q8Q zwn_-@W&)`Zqzmj;P{^aDM&=Zz1)x;Ox{z@pOE7~bvtLy)A~1@vq(Zo@#k!cO5lIp= zRU%2Er%p)XWP>D5lGEizMA(4RCExEVLAYId&`zUYks&DYfddsmfP)v5YF2@a04077 pMhUhekcgiqGq|DxCpoYPwxoKC!v<30+JRhM4604|>KR!0Q~-3+NjCrh literal 0 HcmV?d00001 diff --git a/input/get/__init__.py b/input/get/__init__.py new file mode 100755 index 0000000..2a6ddd0 --- /dev/null +++ b/input/get/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +""" +__init__ for journalFetcher-module +temp file with nothing in it right now +""" +from input.publication import Publication +from input.get.journal_fetcher import JournalFetcher diff --git a/input/get/__pycache__/__init__.cpython-39.pyc b/input/get/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6f5d85fed9093448393bae5028b8443ca4550c9 GIT binary patch literal 350 zcmYe~<>g{vU|=}ZIW6%70|Ucj5C<7EF)%PVFfcF_D=;uHq%cG=q%fv1<uK+lMKLjg z*vvW1xhzpEj0`C(!3>(LRo-0j@tJv<CGqhJY57G8S^1?!d5Jl0sU^u7sYSZE`6;D2 zsaz$gxdjSonK`Kn<(VZJ3VHb@8JT(M3YmEdnI#HEnduoN3VHeETrWWm(PX^E9Z;H- zlbM`Yl9`{U$#{#;3&l`BP1alN@$o77$?@^G1T*srN=x(#5E_a=K8TWph^MEP=s}$n zp9XPL5i<h=!%BuCHU<U=@ykd*BR@A)KP^kYD77HJSRbrax1cCLD>b_W!qZPrEzyU0 ZRIi}&7KaVQYjz;pidh&K7+6?%7y++lWxfCa literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/acs.cpython-39.pyc b/input/get/__pycache__/acs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0f9cbd06555179aaf5d113b3567d24f4d08bebf GIT binary patch literal 3164 zcmYe~<>g{vU|=}ZIW2Lu00YBg5C<8vFfcGUFfcF_+b}RNq%cG=q%fv1<uFDurZA>3 z=P>0mM=>*k#8`4za#^ETbJ?QU7#Z9dQdm>iS{PE;Qkk2Xqu5iJf*CZ~tGc<IGct2h z6q0iii;ETV(-ge&ON;UnbKFu(k~30^xI&8)(^IV!oHJ6BvlTMa6hez~6p|D36p~UE zN{drd6v{J8G8EL46Z7Iri&NuEi*nSt6d)imFGZmwBQ;MU4P=x;L1|J>W^!UlW_})2 zQ+jGiJd&t7*GrHm{WKYG@u50Glj#<B07CmM4(H4gutCWn4agWCGF}V}45<uJj42FJ zOeu^}%qdLij8QD9tf_3N>?zE1m{M3$SX)?{8KXGBVw_+xwiI@-7*{H53P%cO3qvzw z6n6?&FoP!dEp}*tR2hK%7hI5<oSBxHtN;~ONXst*`BuTvIanbJ5`@KiV5Lzkh6YB4 znoPHtON(-Du@$A3losVBGa@;giGhIu<Z@?FoJuh;FqAMXU`%1GVOYpi%UHvh!X(L1 z!j#2a#t_V)$y}8OHWd`cp~b1i3PGvqsTB$(`3k8OB}IwJB{16+ieXMHE=?*fDay=C zFILFRQ*g=8gt!Z2LWsYMzm-C8X+c4LQHercK3H0}v?!+-Zb_B4Mn*|VL9xA+zP_ft zMoNCBUVc$Jh-qk`rxBA<ZBnbLA5*Ta7Za?l`4Z#}O_p0sMX9$~a}!IFGm2Ol7#MB| z1&0O%_y>i!y2N|=hX(mM`UGn-6|pfeFhp^Km8GVD(#}eTA|3_?hF`k+8Tq-X`e|AE zMX3e(#rm0f1*Ijr1x5K;so5nEo_>02iGE^ov0g#tEpDXbCk_fjkSiG2co?gMq0ys< zqyi=AWf>S4K$#63^fQ<sL0`kLkP#H{3z$+E7lMPomZ^rZmbr#$0aFSyL?ny3mIcgb zf$~{uS<@M6*}!6~3mIJ)Vg+j1YuFaB*05)>EM#P4DD(o$vBA}IfcfljK4&^ZEf-jf z10q(-UBkVADTNbi8cz+^0@fNHsA)4&m}+^`8EW}zm=`dma4lr4<*(sSXUJxrz*sb? zh98t@7BHo7gZ1vI6{rzdz?8zXkP)haD}{Ff8%zwOHj8Be`$C2kz7+meCP{{cjCo8c z0=0r*6@qYGdN4jj2Uv{|SWF16CJ4qyQX`zsP%8o!7Y4gWsFuA(1Q9Y57z<Ns_!h91 zaMTFa2-OJJh@^<5i1xCAL+wgBL#-&-k75fMYsG5Bz&>CqYN!zd*#P(Bv0CvOaY+1t zRWKIaf$>4=vRJ^bl>m#0LtV>M!~o+XsgVSWNx;>p!1zdNq|zB`rNQEokhscXS-_bh zwUDt^x<+~d*FuH`>?zU<8Ea)~WKv|ZnI|w6rPN3-V6Bk>`Kd+}>{p2z$r`B|=@eO* zUnejY?kM4|VX0xQVXNV&;jH1R;jQ7T5pCuSX3&(YS_IFNkW^i)kWrGGqW~&rlZ#Rl zOHzv!oKh1@GSf<Pit|egbTjjcOA_;vQ$e}DAh9SluLM%cBeFld6iY5H)-6s=EK1Hu zEe025j?Tfl&_cKv#RjBu7;XW=z$$*d#G;bS<eb#_jMT)G)S@C#Iaei=7+sQCl9Oty z<l^tC5E|s86kElkmyzR^lcE~}lBg1a@|^SYN{TX*97{_w@{6jt^pZ16birDy`1J}( zlXO!OOHy^q5_3vZt0d5@(k)4>$j{5qt-QsUSjC&4qg<4lmRgjWmz-LBizz3wie0ZL zHO(bIvq}hDo>}SZ!^*Pi+LxeG%I_A7Uw&R{m5`pFk8eEG9}wGbF=j%AlOYb&O)ttX zEx5%Q1hYev@fJsNW=VWWWkG6{6v*AFDY_uVsd**w$w`&4DyvEZw;ZaSRqA+DLaP|v zyu{qpDt$Z(Vcq~ar6@D0v?RZ%xQa<fp^8aQ;TA&?sHD{7DFW3%w>Ur*GAQNTVk=I~ zNlh-f#RjQ2Zn2c4R+QXgODrfz%}cq(mX=?Xn^<y-6(m~#HsTg*aY0UI$t~uB{DLB8 z1_lOAS#aroiv?s}6gP;3ridt>qSUl_P*8ycZ?UK3XM&3UTdd${zQu#=&?vS<a6-7n z4)t#oC%C~-oROMxi=((SDJwO(q&SKP%8G}W2<6@4%`43XSp;q>6yM@Pq=zVe1RJa= ziW4pn#SU^`N`59-EFSEtC~lA#%nbq{4ze?EaVA3>B*js@FecbsaGC(yAH@#xGT2xU z18OXY2{#tRK{hr@6kQ}fC$YE$Ttz|%UQpf24~jN$us~Z^pc=3O)V$!}V&r3FVHRK% zU}9n9W8`BJV1lw3`IuOkSs1w(xfq3*c>b#~GW};_=J_vzLWA@OFi9~X)S=2l)ibd$ ziZStl&5~i_VdVH<B@QW{iuBN2hS6LD6%OE<7nBTBK+PLQKZwsj%>X18*rPlpsl_Gn z;MQbnQ98(%pk&LyR3!ql4_SeqCeJO7_;^qTiI2a<6(66QpHiBW8Xtd)CqBNgG%*J% z!yX@>lAjzOU&IXxOrBy;%Q87PwIm}y<rZ^sNzpBqr2PDxB0dHN25`zQ(gv|~Km-;? zf}1=1Ah!uHFfed1vaqr-F)<2pFfj_bF)@N=H93k5K)N|W_DO=9HlTpggQf$R2O)V< r5F(Ehy%0x$0t{>#QXqky0}d$;8;ED^Kv}Vv1Jt}^<6z=p6krAb8vBSl literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/journal_fetcher.cpython-39.pyc b/input/get/__pycache__/journal_fetcher.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40e849f122b4d16639be285ad011e0852d92f38e GIT binary patch literal 2224 zcmYe~<>g{vU|=}ZIW2Jm8w0~*5C<8vGB7YWFfcF_doeIDq%cG=q%fv1<uK+lMKR?v zM=>*k_{=#hxvWvFU^Yt*TNGOgV+v~ydoD*52P1<!Lke38dkaGfdn!{3M=x^}X9`O& zgC=K{GFL!iQEFa^LUK-Gaj`;Levv|PL27blT4u6>SAJ<xUSbZ{OOR7EnQpN=Iyw8M zmL%TdOH3**DN0N($xSWE$WPH^yv6I3npj$rnO2$;oL^etr^$GWJD@ZvCo?&*Br`uR z8Ds!5hWjUlfq@~FA&N1DA&M!5F^V~rC6zUmErn?gQwnnmOAAXgV-$NTODabyYbqyL zlr@D7B-+dv#RV1T28*+&a6rX*K;kK!DO@cK&5X^AQM@VK!3>%_xA>smaZ4>p&PXk) zGUHNEP*4cT$ShXKELKQVfcZ%m5fW(-r3$(EDWy57#b6DZOt+Xzi*jzU6{VJx7Ud-~ zBKeD%fq?-Ok<Or~@?l_LsNySOSio4ru#mBqF`OaKfQ6xiA&V)Cxzdi2p_Yk}p-QBL zrG}}Qv57I9A(A15v6iWhDUS<68!#9!l(5z?X0fF(H8TY>Xfjtl1iJv_?4Z<=qRiB? z)MAD5)TH9flvLe}lH44H#Jm&*h`T^OLWvXI%)H`~#JuEGF1R59iA9OIsU@jJ#a3`J zT?BxOS(Q{4q$-pa<ya{cmlPp{3qblI;wkx=x}`-q3MKgpi3$aXm`5mvILk`GDK!zE zP7rPa`3Z#ms<=TO(JM$SDo!oZWWU8xlv-GtT3k|mi#a{D<Q7+IMRIBZC`}bdae9P= z1h^Iz<rm%J3a%_JNzHYw$Sf&hVPIg0Vo%P`D@n~O(PS=S2l1JUQVVY}rxq2hWGLce zU|{%Vsh^Rbo2s9drC*d<kYB8ynO9I+qFYdupOu<j0^#YWr<UkvLGnO6G%4s6RNmr9 zPc4Zr&Mz$x1Vu9_z8Scg82K2580DC%#8Fe39!xDtIsv5_kTb#QM2LZbp@boY5t2?o zX(0s?+tu*EbA)C$-L%rYWRT}U;hdDJkeOSMlbV~FSCX2dkeR1YT$%(<yLxbq&Kar6 z*~JQ(X$t9?WvO|fxKhZ;%*!qY8IY(@Tv||&Uj)((^(I_nl_jb_zy*VPb*;KWMq+VZ z35w0?$%%RKrNyc7rA0X!n(BTlnQrkpmXs7_CY6??LgI%lEx#x?v1BD9I5xmB03rAp z7#MDGBN?C!3R5Ofo?@?(LUn^4k}@)aGYjrH@`JM|HL)ZWl(mowH+UjO1bdY&s>|_) z9w>z%<rq*17J*V$kq83=B(Wj|6+fD49i-rrL$wr~^pFh3D1<?w3{HVO&=iQ2_L3<N ztkAs765W#2;u5g$OEOX+RSZ%nS))4GN+B4UfUzYqkV&vy2nr)j#v)M8frJexOG8-T z1jU1_*c2&<q)~0uLssXf$#;t*K0YroH#I)~7FT>cq|}d(zr_<DUs#%$1C?Qqk59=@ z1_>9JB$i|*L+Zd=%*7=|;IgMk02FVas^At&Qht6;ktm1{s_}}%KrC?(fp9RG1#)!} z8v_FaFUYYH3=9k$j4aGdj6#A;j7*Fo><AL9N|W&xXKrdqA~?5eaurE~auIW4QgRU} zPK!WxM=>WAn?T&d36d2A7jSw=>7fYhB#@<$kOMmeOn_W-i^B%ue>;$A#h_A-g++jo KhmnJkhZz7XWKkjj literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/nature.cpython-39.pyc b/input/get/__pycache__/nature.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c6ae402187f9b6c4bda32d8c3b57e41947bfce6 GIT binary patch literal 1757 zcmYe~<>g{vU|=}ZIW4i1oq^#oh=Yt-7#J8F7#J9e4Hy_0QW&BbQW#U1au{=&qL>&# zY~~#1T$U&nMh16=6qXd$7KRkoRHkO;DAp9VU<OV0s%|dljLe)Ah2)&X;$nsTGzG8x z(xSY?9Jkbx<c!oJuF&Gd^i(Sa=Zw_kY=z7;h0vlLh2+FMg``x4(&E$<h4Rdj3<dS% z#Ju>@;?(%kq8xQD1qevYOHnAvNX=770~w`IP@0sJnVeXXnV$#Ml%84=k0h$j^%CSU zO~zY%sBX|?yu}@W(47oYg^b}r;K9JakjfCnn8Fanl)@OroX!x%lFFLOmdc*OG>0jL zIfbQ#rI|5`11!b~7Gq6e1B-E`vZk=7aI`QqGe&Wza0WAIa@}HwhC!7T*h|I9MTrHe z#V8)hO)W`GNi0cJNGr<ERY+7QD9SI(Oi4{qC@snXYtm%8#avpHbBnDgwWPEtFPRa^ z;UG2$JA<MR<cSo<U<OU5Drc}xP#}Usy%-ei>6vAzc_332ax(L>ixo=p6%rMSOA89} zi%L>c6tW<ZrU%#Qwvs`Uv51v{fuV>UM*K3+&&bbB)lbXPFG?-QFV@e@D=00|Ehx&* zO3f~T@buGDOZ4*+OG=AU^$IF)aU(^n1jw08Aop`r2}6TM4@n-yZ=giW!N9-(_S-E6 z28I%b1&lQe3mIz}Qy6O)vzW4&Q<zeiGZ|A@dKnk6q_8ez6lbVqg3GWW%hWQ%#o1BB zS>WOvDB`T?47F_O47KcFdDaxB6i$e|3q!0(Ek_O80=60sh^-4585s&Y;Oe-L)N$6Z zFJP<TL{hh*guRBThPj5VhNXtJhCPM5nLU_6lc&lM9`4RXsfi`2#R`cENGVn~Gq1QL zF)uk45kFM|86_nJ#a8<IDfyXt`9<maw^;HLb5o<3UHm;Y8E>&BmXs6~-(pYB&nrpI zD~aMu&MX1vo%oW>lAP2iF*qL@^$_tY(SoAP;#@tXY~_+zl3K-;lB|~ua$0^7m|a|& zl$DxXQUpqVnjA%-SiQvo%8$kQr3JTGGEx&$Zn30g=B3<XPERel#SUUR=H%RBODrfz z%}Xg_W?*2@<N~L!TPz?wQ5^9p`I(^nAH@c7N)$&t)M-&%kZfL@k(v_48xQhQQD#zU zNq$jr6lXltDaBFjPzT;(hYBH5ZV@Oc-Qov15Xomspacm@l?-fLOfrlTV93JA!X(Bh z1jbe3uw;s+6Qc+Lg#$RLg0mq*FoPzeACmR(ECg`~Pf2QVNj$h%OD(bmxdIe`3`|ub zF#C`d_-XRo;)st2#cq83Ew1?Z-29Z%oYeUETRidcg{6r(P#O04_>}zQ`1mNH;Lv~o z|DX_8mv}G#&>%lYpWs_O#h~a<&P^@J$WOV&TwGFgizO*PKc|Qjl)FLEQ6vgtiGc`& zbHOZ7;1z*9#s_k$00RR92NMe$3lkHg5GNC(kQ5UmSk_OItw@T2f#H@UxLnXnPc6}d t#vshEkc1)#kw=ONaC!h6f*xofyKiyWK)h@Ra%V9c0|NsKI}aldGXVOz(6s;n literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/publication_interface.cpython-39.pyc b/input/get/__pycache__/publication_interface.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd087d9c0a0a26e010a6faa45f267a8bccf438f3 GIT binary patch literal 2041 zcmYe~<>g{vU|_iZWpZK(7X!m%5C<7EGcYhXFfcF_2Qe@(q%cG=q%fv1<uK+lMKLjg z*vvW1QOqffDa<)6Q7mAZHHtNbF@+_EEtfrt9jt~mha;CWij$GSogsxSg}sF#g}s?6 ziYtXVm_d`H%7M$JGA}VVGdZVHAv3oizo?{GAu%UMp|~J5IWsLYS-~s6v?wnz$1Sxa zIU}{GSRpYlMIk4%xTIL2BqKFfkLxAKy_$@-SjrP~vi&p}Z*d2dCgo%%CzfR9=V>zD zVu$KX2B|~FtT4Z2FfcHrGDI<^Fhnt>Fh((_vMgXtWn0LY#lC=JAwv{rIztp!Dt9VR zDsL)p8j~bL3ez0M6y_9`7M5nlD85wgRL(TUR35M>YYJNnYcpdMe=2t>Z>m5F`y8ee zjucL?O2HJaU<OU@TjK7iB}h*6%qvMPN=r;mt;*$6P*6|^&df{CNiE6GQz*$-NP&k< z8p!|PkiixpMX4o4nW<%|3W*9xM(cX!6_+IDC8sK+<rgWWXO^YrDU=rF6oc*2WW2>z zlv+|+l$Xqi6f7(Z3=E)<aR$Y@3j+f~IzugE2}22E4MQ_y3S$amIzt+BGgAtaBttDz z4U-E)tXwT~3DW}R6y_S{g-j`orHn-iHH-^bYM2)?GBOk@)G!7!XtGr82U`mApmRoQ za(1yoW|~4~o`QNoVo8R&LSj*>LKY;V<H0^nEmBC!%t?hrH^iCX2+u51$V<#kg&XFS zSe%-oke{beo{?ArGcB`NA+toGJQI|RQWJ}kGZYdPQZm!hic<4R6jCyaQj<&aiz@Zt zh6I-+=A|SSr6^>A?8q#}rpE6j69WT76=!s!ZdI(dUO}ZM>n)bN)QXZKkoYa8qSRZg zxrrso8MoLH3kp*6QZ!j^v4A`m#Rv0IJlIE3TwogHi{e`>Afc5EMSKhl48QF4GxBp& z_0zKSi&6{ni}f?}3Q9|K3ySiyQnO1SJpJ_468!>1`i{?pr5?S4%3FfzsU@hEiGgB~ z2Nb;=EQ}J29E<{tJd9PEI8vY<hJK8s0V*!QNduH=N*ER})-Wt&tYu6GB?d+phS-2w z<|?HcrUgtT%nMjj7#FbCFsCr3Ft;*EGAv{SK}LpfhC(40h7z{Q1V#oBEMZS!K~>kx z*u)sgP$*=;P$*Qx6wIK>S``3~EKj6DA}zl(FGayOKcy5ssenodh5R&Vk&>FCkPA@+ zH>QdQ9K0Ye>s8nKz5M(C|Nkl}x6GVW1@-D$b%o@_yb^_^RJZ|}97UjPQ6#{?z)-}^ zz`$^e6H;8|WG3C>g)rkGmfT`X%P-1JEV;$$T9KSu04j%WaYLN#T2z!@q{&hQijpW^ zXi5jChvHlOFm7^AVsUY5aS<=bDp1M*Cyye2kfl6GQB<r7id_j%?6Pt(vM_QmF)?y5 zvM_S}XZp{@%)=<hRHZ^hOc!f1-C`~+$|+)FU|>Lra8S7bssX?e4vKkLga<QdGWuyU z6@dziTl^p=A*D%8#v(~fmlsJfFff>d91O}Y3`|w(_?(7hoS&x9EspqjP=1Mzzr_^~ z4dnRvTRidcg{6r(P#O04_>}zQ`1o7wpj-!v6_$X+k_?buP$r9yzr|LN2r2<?u@|Kl z=jW8AMsb#8WERJRloyGE1Gl&&u_QA&H?<@qKjjt+C=K0WE-op8L^>#-i$K8v_6j6U zP-6$2LO?>rLJSNH9E>b10*ow7OpFqIOpGFeOpIW8P3|H&kY!By#o(f&h!Yf8APq(G zAaOxRx<m>Au%%JF5b^ZX61~LaVsOxd&BNx9TO2l!q-X~U>|&6USeSVjIlz#Ik%t8U D-MT6t literal 0 HcmV?d00001 diff --git a/input/get/acs.py b/input/get/acs.py new file mode 100755 index 0000000..3a54a15 --- /dev/null +++ b/input/get/acs.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: Check if Url can be used with 'can_use_url' + and then fetch publication with 'get_publication' +""" + +import re + +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication, Citation + + +class Fetcher(JournalFetcher): + """ + Specific Fetcher for the ACS journals. + """ + + # Constant for the abbreviations of the supported Journals + SUPPORTED_JOURNALS = ['1021'] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Uses Regex to extract journal specific substrings in Doi. + TODO: Support non Doi-urls + """ + matched_url = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url) + return matched_url[4] in Fetcher.SUPPORTED_JOURNALS + + @staticmethod + def get_publication(url: str) -> Publication: + """ + Fetches html and creates Beatifulsoup-instance in parent class. + Specific css-searches for ACS-Journals and creates Publication-instance. + """ + + # Creation of Soup + soup = JournalFetcher.get_soup(url) + soup_header = soup.select('.article_header')[0] + ref_cit_soup = soup + + # Creates Publication + doi_url = soup_header.select('a[title="DOI URL"]')[0].string + title = soup_header.select(".hlFld-Title")[0].text + + contributors = [] + for author in soup_header.select(".hlFld-ContribAuthor"): + contributors.append(author.text) + + journal = soup_header.select(".cit-title")[0].text + + published = soup_header.select(".pub-date-value")[0].text + + subjects = [] + subject_soup = soup_header.select('.article_header-taxonomy')[0] + for subject in subject_soup.select('a'): + subjects.append(subject.text) + + num_citations = 0 + + + references = [] + references_soup = ref_cit_soup.select('ol#references') + if references_soup != []: + for reference in references_soup[0].select('li'): + ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:])\ + if reference.select('.refDoi') != [] else "None" + ref_title = reference.select('.NLM_article-title')[0].text\ + if reference.select('.NLM_article-title') != [] else "None" + ref_journal = reference.select('i')[0].text\ + if reference.select('i') != [] else "None" + + ref_contributors=[] + for author in reference.select('.NLM_contrib-group'): + ref_contributors.append(author.text) + + references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference")) + + citations = [] + citation_soup = ref_cit_soup.select('.cited-content_cbyCitation') + if citation_soup != []: + for citation in citation_soup[0].select('li'): + cit_doi = citation.select('a[title="DOI URL"]')[0].text\ + if citation.select('a[title="DOI URL"]') != [] else "None" + cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\ + if citation.select('.cited-content_cbyCitation_article-title')!= [] else "None" + cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\ + if citation.select('.cited-content_cbyCitation_journal-name') != [] else "None" + cit_contributors =[] + cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0].text.split(', ') + # clean up of the last Entry + cit_contributors_last = cit_contributors.pop().strip(". ") + if cit_contributors_last != '': + cit_contributors.append(cit_contributors_last) + citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation")) + + return Publication(doi_url, title, contributors, journal, published + , subjects, num_citations, references, citations) + + + @staticmethod + def test_fetcher(): + pass diff --git a/input/get/acs_fj.py b/input/get/acs_fj.py new file mode 100755 index 0000000..2880838 --- /dev/null +++ b/input/get/acs_fj.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +JCIM +""" + +# import re +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication, Citation, Reference +import requests as req +from bs4 import BeautifulSoup as bs + +class Fetcher(JournalFetcher): + + """ + """ + + # TODO: Naming-Convention: + # Class: 'Fetcher' + # file: input_get_[journal-/organisation-name] + # format = "input_get_[a-z]*.py" allowed + # TODO: List of Compatable Journals + _SUPPORTED_JOURNALS = [] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Checks if given url links to a supported journal. + """ + + # TODO: Check the URL for compatability + # re.match in _SUPPORTED_JOURNALS + return True + + @staticmethod + def get_publication(url: str) -> Publication: + return input(url) + + + @staticmethod + def test_fetcher(): + pass + +def get_article_info(soup): + header = soup.find('div', class_ = 'article_header-left pull-left') + article_title = header.find('span', class_ = 'hlFld-Title').text + publication_date = header.find('span', class_ = 'pub-date-value').text + for link in header.find('div', class_ = 'article_header-doiurl'): + doi_url = link.get('href') + subs = header.find('div', class_ = 'article_header-taxonomy') + subjects = [] + for sub in subs.find_all('a'): + subjects.append(sub.get('title')) + cons = header.find('ul', class_ = 'loa') + contributors = [] + for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): + contributors.append(con.text) + numc = header.find('div', class_ = 'articleMetrics_count') + if not numc.a: + num_citations = 0 + else: + num_citations = numc.a.text + pub = Publication(doi_url, article_title, contributors, "JCIM", + publication_date, subjects, num_citations) + #pub = Publication(article_title, publication_date, contributors, doi_url, + # subjects, num_citations) + return pub + + +def get_citation_info(pub, num_citations, soup): + details = soup.find('ol', class_ = 'cited-content_cbyCitation') + titles = [] + for title in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-title'): + titles.append(title.text.replace('.', '')) + journal_names = [] + for name in details.find_all('span', + class_ = 'cited-content_cbyCitation_journal-name'): + journal_names.append(name.text) + doi_urls = [] + for url in details.find_all('a'): + doi_urls.append(url.get('href')) + # TODO: There are a few diffrent types how Contributors are listed + contributors = [] + for contrib in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-contributors'): + contributors.append(contrib.text) + for i in range(0, int(num_citations)): + pub.citations.append(Citation(doi_urls[i], titles[i], journal_names[i], \ + contributors[i])) + +def input(url): + html_text = req.get(url).text + soup = bs(html_text, 'html.parser') + + pub = get_article_info(soup) + if int(pub.num_citations) > 0: + get_citation_info(pub, int(pub.num_citations), soup) + return pub diff --git a/input/get/journal_fetcher.py b/input/get/journal_fetcher.py new file mode 100755 index 0000000..097eb24 --- /dev/null +++ b/input/get/journal_fetcher.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +""" +Parent class for specific Journal +""" + +from abc import ABCMeta, abstractmethod +from bs4 import BeautifulSoup +import requests +from input.publication import Publication + + +class JournalFetcher(metaclass=ABCMeta): + """ + This is a abstract-class for fetcher modules + """ + @staticmethod + def get_soup(url: str) -> BeautifulSoup: + """ + Retrieves webside-html and returns a BeautifulSoup-instance + + Parameters: + ----------- + :type url: str + :param url: doi-url to a publication + :return: BeatifulSoup-instance + """ + try: + req = requests.get(url) + except requests.exceptions.HTTPError as err: + raise SystemExit(err) + + return BeautifulSoup(req.content, 'html.parser') + + @staticmethod + @abstractmethod + def can_use_url(url: str) -> bool: + """ + Abstract-function to be implemented in subclass. + Checks if given url links to a supported journal + """ + raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url)) + + @staticmethod + @abstractmethod + def get_publication(url: str) -> Publication: + """ + Abstract-function to be implemented in subclass. + Creates a Publication-instance. + """ + raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url)) + + @staticmethod + @abstractmethod + def test_fetcher(): + """ + Abstract-function to be implemented in subclass. + Unit-test for the class. + """ + raise AttributeError("JournalFetcher: Subclass hasnt implemented 'test_fetcher()'") diff --git a/input/get/nature.py b/input/get/nature.py new file mode 100755 index 0000000..d08d74f --- /dev/null +++ b/input/get/nature.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: Check if Url can be used with 'can_use_url' + and then fetch publication with 'get_publication' +""" + +# import re +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication + + +class Fetcher(JournalFetcher): + + """ + scrapes publication metadata from a provided url + """ + + # TODO: List of Compatable Journals + # NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead + SUPPORTED_JOURNALS = [] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Checks if given url links to a supported journal. + """ + + # TODO: Check the URL for compatability + # re.match in SUPPORTED_JOURNALS + return False + + @staticmethod + def get_publication(url: str) -> Publication: + """ + Creates a Publication-instance. + """ + + soup = JournalFetcher.get_soup(url) + + _doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content") + _title = soup.head.find(attrs={"name": "citation_title"}).get("content") + _journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content") + _published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content") + _contributors = [] + _subjects = [] + + for creator in soup.head.findAll(attrs={"name": "dc.creator"}): + _contributors.append(creator.get("content")) + + for subject in soup.head.findAll(attrs={"name": "dc.subject"}): + _subjects.append(subject.get("content")) + + return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects, 0) + + # TODO: Exceptions-handling + # raise ValueException("Cant Fetch: '{}'".format(error)) + # return None + + @staticmethod + def test_fetcher(): + pass diff --git a/input/get/template_.py b/input/get/template_.py new file mode 100755 index 0000000..72f3cf9 --- /dev/null +++ b/input/get/template_.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: None, this is just a template and should be ignored +""" + +# import re +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication + + +class Fetcher(JournalFetcher): + + """ + This is only a template and therefore has no functionality + """ + + # TODO: Naming-Convention: + # Class: 'Fetcher' + # file: [journal-/organisation-name] + # format = "[a-z]*.py" allowed + # TODO: List of Compatable Journals + SUPPORTED_JOURNALS = [] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Checks if given url links to a supported journal. + """ + + # TODO: Check the URL for compatability + # url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url) + # return url_re[4] in SUPPORTED_JOURNALS + return False + + @staticmethod + def get_publication(url: str) -> Publication: + """ + Creates a Publication-instance. + """ + + # TODO: Fetch data from the HTML + # soup = JournalFetcher.get_soup(url) + # doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] + # TODO: Create new Publication-instance + # return Publication(doi,title,contributors[],journal,publication_date,subjects[],num_citation=None ,references[],citations[]) + return None + + @staticmethod + def test_fetcher(): + pass diff --git a/input/interface.py b/input/interface.py new file mode 100755 index 0000000..c0d6df4 --- /dev/null +++ b/input/interface.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +""" +Interface for the Input-Package only this should be accessed from outside this Package. + +""" +from os import walk +import importlib +import pathlib +import re +from input.publication import Publication + +class InputInterface: + """ + Singleton which dynamically imports and manages fetchers + """ + + get_path = None + fetcher_classes=[] + + @staticmethod + def get_publication(url: str) -> Publication: + """ + The interface-method to get a Publication-instance + + Parameters + ---------- + :param url: url to a Publication + :type url: str + :return: Publication instance or None if not supported + """ + # Initializes 'fetcher_classes', the list of imported modules + if InputInterface.fetcher_classes ==[]: + InputInterface.get_fetcher_classes() + if InputInterface.fetcher_classes ==[]: + raise AttributeError("No specific Fetchers where found at: '{}'" + .format(InputInterface.get_path)) + + # Checks if module supports the 'url' and returns a Publication if it does. + for fetcher_class in InputInterface.fetcher_classes: + if fetcher_class.can_use_url(url): + return fetcher_class.get_publication(url) + + # No Module for given url was found + return None + + + @staticmethod + def get_fetcher_classes(): + """ + Searches in 'get', if there are [a-z]*.py modules (specific Fetchers) + and tries to import them. + Saves found modules in 'fetcher_files'. + """ + + # Path to 'get'-package + InputInterface.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve()) + + # Searches for modules with given Pattern + fetcher_file_names=[] + for file in next(walk(InputInterface.get_path), (None, None, []))[2]: + if re.match(r'[a-z]+.py', file) is not None: + fetcher_file_names.append(file) + + if fetcher_file_names !=[]: + print("Found following Modules: {}".format(", ".join(fetcher_file_names))) + + # Tries to import those modules and saves their 'Fetcher'-class + for file in fetcher_file_names: + try: + fetcher_class = importlib.import_module("input.get.{}".format(file[:-3])) + try: + InputInterface.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher')) + except Exception as error: + print("Module '{}' does not have a 'Fetcher'-class".format(file[:-3])) + except Exception: + raise ImportError("Module '{}' can not be imported".format(file[:-3])) diff --git a/input/publication.py b/input/publication.py new file mode 100755 index 0000000..792d779 --- /dev/null +++ b/input/publication.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 + +class Publication: + """ + Represents a Publications + """ + def __init__(self, doi_url: str, title: str + , contributors: str, journal: str + , publication_date: str, subjects: list[str], num_citations: int = None + , references: list[any] = None, citations: list[any] = None ): + """ + Parameters + ---------- + :param doi_url: doi_url of the publication + :type doi_url: str + :param title: title of the publication + :type title: str + :param contributors:list of all contributors + :type contributors: list[] + :param published: date of release + :type published: str + :param subjects: the subject of the Publication + :type subjects: list[str] + :param references: the Citation which is been referenced by this Publication + :type references: list[any] + :param citations: the Citation which references this Publication + :type citations: list[any] + :return: None + """ + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + self.subjects = subjects + if references is None: + self.references = [] + else: + self.references = references + if citations is None: + self.citations = [] + else: + self.citations = citations + if num_citations is None: + self.num_citations = len(self.citations) + else: + self.num_citations = num_citations # braucht man nicht einfach len(citations) + + + def __str__(self) -> str: + return ("Title: {}\n" + "Doi-url: {}\n" + "Authors: {}\n" + "Journal: {}\n" + "Published on: {}\n" + "Subjects: {}\n" + "References: \n{}\n" + "Citations: \n{}\n")\ + .format(self.title, self.doi_url, ", ".join(self.contributors) + , self.journal, self.publication_date + , ", ".join(self.subjects) + , "\n".join(self.get_citation_string(self.references)) + , "\n".join(self.get_citation_string(self.citations))) + + @staticmethod + def get_citation_string(citations): + if citations == []: + return ["None"] + else: + citation_string = [] + for citation in citations: + citation_string.append(citation.__str__()) + return citation_string + + def citations(self, citation) -> None: + """ + Appends a list of Citations or Citation to self.citations. + + Parameter + --------- + :param citation: Citation or Reference of the Publication + :type citation: Citation or list[Citation] + :return: self.citations + """ + if type(citation) is Citation: + self.citations.append(citation) + + # Checks if 'citation' is a list of Citations + elif type(citation) is list: + for _cit in citation: + if type(_cit) is Citation: + self.citations.append(_cit) + else: + raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" + .format(type(_cit))) + else: + raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" + .format(type(citation))) + + return self.citations + + def __eq__(self, other) -> bool: + """ Compares the unique doi_url of two Publications""" + return self.doi_url == other.doi_url + + def print_pub(self): + print(f'''Article title: {self.title} +Publication date: {self.publication_date} +DOI-URL: {self.doi_url} + +Subjects:''') + print(*(self.subjects), sep = ", ") + print('\nContributors:') + print(*(self.contributors), sep = ", ") + + if int(self.num_citations) > 0: + if int(self.num_citations) == 1: + print(f'\nThis publication is cited by the following publication:\n') + else: + print(f'\nThis publication is cited by the following {self.num_citations} publications:\n') + for citation in self.citations: + print(f''' + Title: {citation.title} + Journal: {citation.journal} + Contributors: {citation.contributors} + DOI-URL: {citation.doi_url} + ''') + else: + print('\nThis publication is not cited by any other publication.') + + + + +class Citation: + def __init__(self, doi_url: str, title: str + , journal: str, contributors: list[str] + , cit_type: str = "Citation"): + """ + Parameters + ---------- + :param doi_url: doi_url of the publication + :type doi_url: str + :param title: title of the publication + :type title: str + :param contributors: list of all contributors + :type contributors: list[str] + :param cit_type: Specifies if Reference or Citation + :type cit_type: str + :return: None + """ + + self.title = title + self.doi_url = doi_url + self.journal = journal + self.contributors = contributors + self.cit_type = cit_type + + def __str__(self) -> str: + return ("\t{}-Title: {}\n" + "\t{}-Doi: {}\n" + "\t{}-Journal: {}\n" + "\t{}-Contributors: {}\n")\ + .format(self.cit_type, self.title + , self.cit_type, self.doi_url + , self.cit_type, self.journal + , self.cit_type, ", ".join(self.contributors)) + + +# This is just a replica of Citations +class Reference: + def __init__(self, doi_url: str, title: str, journal: str, contributors: list[str]): + self.title = title + self.doi_url = doi_url + self.journal = journal + self.contributors = contributors + + def __str__(self) -> str: + return ("\tReferences-Title: {}\n" + "\tReferences-Doi: {}\n" + "\tReferences-Journal: {}\n" + "\tReferences-Contributors: {}")\ + .format(self.title, self.doi_url + , self.journal, ", ".join(self.contributors)) diff --git a/input/requirements.txt b/input/requirements.txt new file mode 100644 index 0000000..a151126 --- /dev/null +++ b/input/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4 +requests \ No newline at end of file diff --git a/input/tempdir/input_fj.py b/input/tempdir/input_fj.py new file mode 100755 index 0000000..00bb012 --- /dev/null +++ b/input/tempdir/input_fj.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Functions for information retrieval of articles from the ACS journal JCIM + +""" + +__author__ = "Florian Jochens" +__email__ = "fj@andaco.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +from bs4 import BeautifulSoup as bs +import requests as req +import sys +from pathlib import Path + +class Publication: + #_registry = [] + _citations = [] + + def __init__(self, title, publication_date, contributors, doi_url, + subjects, num_citations): + #self._registry.append(self) + self.title = title + self.publication_date = publication_date + self.contributors = contributors + self.doi_url = doi_url + self.subjects = subjects + self.num_citations = num_citations + +class Citation: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + +def get_article_info(soup): + header = soup.find('div', class_ = 'article_header-left pull-left') + article_title = header.find('span', class_ = 'hlFld-Title').text + publication_date = header.find('span', class_ = 'pub-date-value').text + for link in header.find('div', class_ = 'article_header-doiurl'): + doi_url = link.get('href') + subs = header.find('div', class_ = 'article_header-taxonomy') + subjects = [] + for sub in subs.find_all('a'): + subjects.append(sub.get('title')) + cons = header.find('ul', class_ = 'loa') + contributors = [] + for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): + contributors.append(con.text) + numc = header.find('div', class_ = 'articleMetrics_count') + if not numc.a: + num_citations = 0 + else: + num_citations = numc.a.text + + pub = Publication(article_title, publication_date, contributors, doi_url, + subjects, num_citations) + return pub + +def get_download_url(): + export = soup.find('div', class_ = 'cit-download-dropdown_content') + url = 'https://pubs.acs.org' + for link in export.find_all('a'): + if link.get('title') == 'Citation and references': + url += link.get('href') + print(url) + return url + +def download(url): # Download citation and references file + if url.find('='): + filename = url.rsplit('=', 1)[1] + path = Path(('./files/' + filename)) + if path.is_file(): + print("File already exists") + else: + print("File does not exist") + +def get_citation_info(pub, num_citations, soup): + pub._citations = [] + details = soup.find('ol', class_ = 'cited-content_cbyCitation') + titles = [] + for title in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-title'): + titles.append(title.text.replace('.', '')) + journal_names = [] + for name in details.find_all('span', + class_ = 'cited-content_cbyCitation_journal-name'): + journal_names.append(name.text) + doi_urls = [] + for url in details.find_all('a'): + doi_urls.append(url.get('href')) + contributors = [] + for contrib in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-contributors'): + contributors.append(contrib.text) + for i in range(0, int(num_citations)): + pub._citations.append(Citation(titles[i], journal_names[i], + contributors[i], doi_urls[i])) +def print_pub_info(pub): + print(f'''Article title: {pub.title} +Publication date: {pub.publication_date} +DOI-URL: {pub.doi_url} + +Subjects:''') + print(*(pub.subjects), sep = ", ") + print('\nContributors:') + print(*(pub.contributors), sep = ", ") + + if int(pub.num_citations) > 0: + if int(pub.num_citations) == 1: + print(f'\nThis publication is cited by the following publication:\n') + else: + print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n') + for citation in pub._citations: + print(f''' + Title: {citation.title} + Journal: {citation.journal} + Contributors: {citation.contributors} + DOI-URL: {citation.doi_url} + ''') + else: + print('\nThis publication is not cited by any other publication.') + +def input(url): + html_text = req.get(url).text + soup = bs(html_text, 'html.parser') + + pub = get_article_info(soup) + if int(pub.num_citations) > 0: + get_citation_info(pub, int(pub.num_citations), soup) + return pub + +#if len(sys.argv) != 2: +# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) +# exit(1) +#url = sys.argv[1] +#pub = input(url) +#print_pub_info(pub) diff --git a/input/pub.py b/input/tempdir/pub.py similarity index 100% rename from input/pub.py rename to input/tempdir/pub.py diff --git a/input/tempdir/test.py b/input/tempdir/test.py new file mode 100755 index 0000000..bdd12e8 --- /dev/null +++ b/input/tempdir/test.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +from input_fj import input, print_pub_info +import sys + +if len(sys.argv) != 3: + sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) + exit(1) +url = sys.argv[1] +url2 = sys.argv[2] +pub = input(url) +print_pub_info(pub) +pub2 = input(url2) +print_pub_info(pub2) + diff --git a/input/test_doi.txt b/input/test_doi.txt new file mode 100644 index 0000000..ced8c84 --- /dev/null +++ b/input/test_doi.txt @@ -0,0 +1,4 @@ +https://doi.org/10.1021/acs.jcim.1c00203 +https://doi.org/10.1021/acs.jcim.1c00917 +https://doi.org/10.1021/acs.jmedchem.0c01332 +10.1093/bioinformatics/btaa190 diff --git a/input/test_input_get_publication.py b/input/test_input_get_publication.py new file mode 100755 index 0000000..941dbc7 --- /dev/null +++ b/input/test_input_get_publication.py @@ -0,0 +1,28 @@ +import unittest +""" +Testing the Publication fetcher + +Publication 1: 'https://doi.org/10.1021/acs.jcim.1c00203' +Publication 2: 'doi.org/10.1021/acs.jcim.1c00917' +Publication 3: '10.1038/nchem.1781' +Publication 4: '11.12/jaj' +Publication 5: '11.12/' +Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF +""" + + +class TestGetPublication(unittest.TestCase): + + def test_publication1(self): + pass + + def test_publication2(self): + pass + + def test_publication3(self): + pass + + +if __name__=="__main__": + print("test") + unittest.main() \ No newline at end of file diff --git a/input_old/README.md b/input_old/README.md new file mode 100644 index 0000000..76bd11d --- /dev/null +++ b/input_old/README.md @@ -0,0 +1,3 @@ +# Projekt CiS-Projekt 2021/22 +Input-Skripts + diff --git a/input/__pycache__/input_fj.cpython-39.pyc b/input_old/__pycache__/input_fj.cpython-39.pyc similarity index 100% rename from input/__pycache__/input_fj.cpython-39.pyc rename to input_old/__pycache__/input_fj.cpython-39.pyc diff --git a/input/example_urls b/input_old/example_urls similarity index 100% rename from input/example_urls rename to input_old/example_urls diff --git a/input_old/input_fj.py b/input_old/input_fj.py new file mode 100755 index 0000000..ecc8e68 --- /dev/null +++ b/input_old/input_fj.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +Functions for information retrieval of articles from the ACS journal JCIM + +""" + +__author__ = "Florian Jochens" +__email__ = "fj@andaco.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +from bs4 import BeautifulSoup as bs +import requests as req +import sys +from pathlib import Path + +class Publication: + #_registry = [] + _citations = [] + _references = [] + + def __init__(self, title, publication_date, contributors, doi_url, + subjects = None, num_citations = None): + #self._registry.append(self) + self.title = title + self.publication_date = publication_date + self.contributors = contributors + self.doi_url = doi_url + self.subjects = subjects + self.num_citations = num_citations + #self._citations = [] + #self._references = [] + +class Citation: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + +class References: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + +def get_article_info(soup): + header = soup.find('div', class_ = 'article_header-left pull-left') + article_title = header.find('span', class_ = 'hlFld-Title').text + publication_date = header.find('span', class_ = 'pub-date-value').text + for link in header.find('div', class_ = 'article_header-doiurl'): + doi_url = link.get('href') + subs = header.find('div', class_ = 'article_header-taxonomy') + subjects = [] + for sub in subs.find_all('a'): + subjects.append(sub.get('title')) + cons = header.find('ul', class_ = 'loa') + contributors = [] + for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): + contributors.append(con.text) + numc = header.find('div', class_ = 'articleMetrics_count') + if not numc.a: + num_citations = 0 + else: + num_citations = numc.a.text + + pub = Publication(article_title, publication_date, contributors, doi_url, + subjects, num_citations) + return pub + +def get_download_url(): + export = soup.find('div', class_ = 'cit-download-dropdown_content') + url = 'https://pubs.acs.org' + for link in export.find_all('a'): + if link.get('title') == 'Citation and references': + url += link.get('href') + print(url) + return url + +def download(url): # Download citation and references file + if url.find('='): + filename = url.rsplit('=', 1)[1] + path = Path(('./files/' + filename)) + if path.is_file(): + print("File already exists") + else: + print("File does not exist") + +def get_citation_info(pub, num_citations, soup): + pub._citations = [] + details = soup.find('ol', class_ = 'cited-content_cbyCitation') + titles = [] + for title in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-title'): + titles.append(title.text.replace('.', '')) + journal_names = [] + for name in details.find_all('span', + class_ = 'cited-content_cbyCitation_journal-name'): + journal_names.append(name.text) + doi_urls = [] + for url in details.find_all('a'): + doi_urls.append(url.get('href')) + contributors = [] + for contrib in details.find_all('span', + class_ = 'cited-content_cbyCitation_article-contributors'): + contributors.append(contrib.text) + for i in range(0, int(num_citations)): + pub._citations.append(Citation(titles[i], journal_names[i], + contributors[i], doi_urls[i])) +def print_pub_info(pub): + print(f'''Article title: {pub.title} +Publication date: {pub.publication_date} +DOI-URL: {pub.doi_url} + +Subjects:''') + print(*(pub.subjects), sep = ", ") + print('\nContributors:') + print(*(pub.contributors), sep = ", ") + + if int(pub.num_citations) > 0: + if int(pub.num_citations) == 1: + print(f'\nThis publication is cited by the following publication:\n') + else: + print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n') + for citation in pub._citations: + print(f''' + Title: {citation.title} + Journal: {citation.journal} + Contributors: {citation.contributors} + DOI-URL: {citation.doi_url} + ''') + else: + print('\nThis publication is not cited by any other publication.') + +def input(url): + html_text = req.get(url).text + soup = bs(html_text, 'html.parser') + + pub = get_article_info(soup) + if int(pub.num_citations) > 0: + get_citation_info(pub, int(pub.num_citations), soup) + return pub + +#if len(sys.argv) != 2: +# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) +# exit(1) +#url = sys.argv[1] +#pub = input(url) +#print_pub_info(pub) diff --git a/input_old/pub.py b/input_old/pub.py new file mode 100644 index 0000000..13b90e8 --- /dev/null +++ b/input_old/pub.py @@ -0,0 +1,32 @@ +class Publication: + #_registry = [] + #_citations = [] + #_references = [] + + def __init__(self, title, publication_date, contributors, doi_url, + subjects, num_citations): + #self._registry.append(self) + self.title = title + self.publication_date = publication_date + self.contributors = contributors + self.doi_url = doi_url + self.subjects = subjects + self.num_citations = num_citations + self.num_references = num_references + self._citations = [] + self._references = [] + +class Citation: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + +class References: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + diff --git a/input/test.py b/input_old/test.py similarity index 100% rename from input/test.py rename to input_old/test.py diff --git a/input/x b/input_old/x similarity index 100% rename from input/x rename to input_old/x -- GitLab