From bcb61efa617d4c9dc2820f980fc39ef4928fa99c Mon Sep 17 00:00:00 2001 From: Sebastian David <sebastian.david@uni-hamburg.de> Date: Wed, 1 Dec 2021 13:54:08 +0100 Subject: [PATCH] Created the first running programm with almost ful --- README.md | 3 - citation_parser_ui.py | 205 ++++++++++++ example_urls | 2 + input/README.md | 50 +++ input/__init__.py | 0 input/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 171 bytes input/__pycache__/interface.cpython-38.pyc | Bin 0 -> 3543 bytes input/__pycache__/publication.cpython-38.pyc | Bin 0 -> 4312 bytes input/get/__init__.py | 0 input/get/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 175 bytes input/get/__pycache__/acs.cpython-38.pyc | Bin 0 -> 4215 bytes .../journal_fetcher.cpython-38.pyc | Bin 0 -> 3815 bytes input/get/__pycache__/nature.cpython-38.pyc | Bin 0 -> 1654 bytes input/get/acs.py | 192 +++++++++++ input/get/journal_fetcher.py | 96 ++++++ input/get/nature.py | 59 ++++ input/get/template_.py | 51 +++ input/interface.py | 113 +++++++ input/publication.py | 143 +++++++++ input/requirements.txt | 2 + input/test/__init__.py | 0 input/test/test_acs.py | 303 ++++++++++++++++++ input/test/test_input.py | 82 +++++ ui_programm_fragmente/input_to_checklist.py | 29 +- ui_programm_fragmente/upload_to_checklist.py | 59 +++- 25 files changed, 1361 insertions(+), 28 deletions(-) delete mode 100644 README.md create mode 100644 citation_parser_ui.py create mode 100644 example_urls create mode 100644 input/README.md create mode 100644 input/__init__.py create mode 100644 input/__pycache__/__init__.cpython-38.pyc create mode 100644 input/__pycache__/interface.cpython-38.pyc create mode 100644 input/__pycache__/publication.cpython-38.pyc create mode 100755 input/get/__init__.py create mode 100644 input/get/__pycache__/__init__.cpython-38.pyc create mode 100644 input/get/__pycache__/acs.cpython-38.pyc create mode 100644 input/get/__pycache__/journal_fetcher.cpython-38.pyc create mode 100644 input/get/__pycache__/nature.cpython-38.pyc create mode 100755 input/get/acs.py create mode 100755 input/get/journal_fetcher.py create mode 100644 input/get/nature.py create mode 100755 input/get/template_.py create mode 100755 input/interface.py create mode 100755 input/publication.py create mode 100644 input/requirements.txt create mode 100644 input/test/__init__.py create mode 100644 input/test/test_acs.py create mode 100755 input/test/test_input.py diff --git a/README.md b/README.md deleted file mode 100644 index 9635f91..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Projekt CiS-Biochemie 2021-22 - -Automated citation loop finder for ACS articles. \ No newline at end of file diff --git a/citation_parser_ui.py b/citation_parser_ui.py new file mode 100644 index 0000000..66529c3 --- /dev/null +++ b/citation_parser_ui.py @@ -0,0 +1,205 @@ +import base64 +import re +import dash +from dash import dcc +from dash import html +from dash import callback_context +from dash.dependencies import Input, Output, State +from dash.exceptions import PreventUpdate +from input.interface import InputInterface +import input.publication + +app = dash.Dash(__name__) + +additional_options = ['Update Automatically','Smart Input'] + +app.layout = html.Div([ + # Layer 0: For the Header and Help Function(s) + html.Div([ + html.Button(id='show-info',children='Show Info',n_clicks=0), + html.Div(id='info-box') + ]), + # Layer 1: For all mandatory Inputs + html.Div([ + "Input: ", + dcc.Input(id='input-string', value='', type='text',debounce=True), + dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'), + dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10'), + dcc.Upload( + id="upload-data", + children=html.Div( + ["Drag and drop or click to select a file to upload."] + ), + + style={ + "width": "30%", + "height": "60px", + "lineHeight": "60px", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "textAlign": "center", + "margin": "10px", + }) + ]), + # Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message + html.Div([ + dcc.Checklist(id='input-checklist',options=[],labelStyle = dict(display='block'),value=[]), + html.Div(id='input-err',style={'color':'red'}), + html.Button(id='clear-all-button',children='Clear All'), + html.Button(id='clear-selected-button',children='Clear Selected'), + html.Button(id='start-button',children='Generate Graph') + ]), + # Layer 3: For additional Options (e.g. Topological Sort) + html.Div([ + html.H4('Additional Options'), + dcc.Checklist(id='additional-options', + options=[{'label':k,'value':k} for k in additional_options], + value=[]) + ]), + # Layer 4: For the Graph + html.Div([ + html.Div(id='test-output') + ]) +]) + +''' +Most important callback function. Updates the checklist that holds all inputs. +input-string is required as Output to clear the input box after each input +''' +@app.callback( + Output('input-checklist','options'), + Output('input-checklist','value'), + Output('input-string','value'), + Output('input-err','children'), + Input('input-string','value'), + Input('clear-all-button','n_clicks'), + Input('clear-selected-button','n_clicks'), + Input('upload-data','contents'), + State('input-checklist','options'), + State('input-checklist','value'), + State('additional-options','value') +) +def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs,selected_inputs,additional_options): + ''' + :param input_value: given by dcc.Input + :type input_value: string + :param btn1: signals pressing of clear-all-button + :param btn2: signals pressing of clear-selected-button + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + ''' + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + # if clear-all-button was pressed: + if 'clear-all-button' in changed_id: + return list(),list(),'','' + # if clear-selected-button was pressed: + if 'clear-selected-button' in changed_id: + all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs] + return all_inputs,list(),'','' + # when a new element is added via dcc.Input + if 'input-string' in changed_id: + options = all_inputs + currValues = [x['value'] for x in options] + if input_value not in currValues: + if 'Smart Input' in additional_options: + try: + i = InputInterface() + pub = i.get_publication(input_value) + except Exception as err: + return options,selected_inputs,'','{}'.format(err) + rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date + options.append({'label':rep_str, 'value':input_value}) + else: + options.append({'label':input_value,'value':input_value}) + return options,selected_inputs,'','' + # when a txt-file is uploaded + if 'upload-data.contents' in changed_id: + if filecontents: + string = filecontents + found = base64.b64decode(re.search(',(.+?)$', string).group(1)) + filecontents = found.decode('utf-8') + list_of_inputs = (filecontents.strip().split('\n')) + options = all_inputs + CurrValues = [x['value'] for x in options] + for input_value in list_of_inputs: + if input_value not in CurrValues: + if 'Smart Input' in additional_options: + try: + i = InputInterface() + pub = i.get_publication(input_value) + except Exception as err: + return options,selected_inputs,'','{}'.format(err) + rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date + options.append({'label':rep_str, 'value':input_value}) + else: + options.append({'label':input_value,'value':input_value}) + return options,selected_inputs,'','' + # when the programm is first started: + if input_value == '': + app.layout['input-checklist'].options.clear() + return list(),list(),'','' + +''' +This callback shows and hides the (first) help-box +''' +@app.callback( + Output('info-box','children'), + Input('show-info','n_clicks') +) +def show_hide_info_box(n_clicks): + if n_clicks % 2 == 0: + return '' + else: + return 'Hier koennte Ihre Werbung stehen' + +''' +Basic structure for a callback that generates an output +''' +@app.callback( + Output('test-output','children'), + Input('start-button','n_clicks'), + Input('input-checklist','options'), + Input('input-checklist','value'), + Input('forward-depth','value'), + Input('backward-depth','value'), + State('additional-options','value') +) +def generate_output(n_clicks,all_inputs,selected_inputs, + forward_depth,backward_depth,additional_options): + ''' + :param n_clicks: how often has Generate Graph been clicked + :type n_clicks: int + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + :param forward_depth: forward recursion depth + :type forward_depth: unsigned int + :param backward_depth: backward recursion depth + :type backward_depth: unsigned int + :param additional_options: value of all selected additional options + :type additional_options: list of strings + ''' + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + if n_clicks is None: + raise PreventUpdate + elif 'Update Automatically' in additional_options \ + or 'start-button' in changed_id: + s = '' + for i in range(len(all_inputs)): + x = all_inputs[i]['value'] + if x in selected_inputs: + s += x*(abs(int(forward_depth)-int(backward_depth))) + else: + s += x*(int(forward_depth)+int(backward_depth)) + return s + else: + raise PreventUpdate + +if __name__ == '__main__': + app.run_server(debug=True) diff --git a/example_urls b/example_urls new file mode 100644 index 0000000..96ac680 --- /dev/null +++ b/example_urls @@ -0,0 +1,2 @@ +https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332 +https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709 diff --git a/input/README.md b/input/README.md new file mode 100644 index 0000000..110ce69 --- /dev/null +++ b/input/README.md @@ -0,0 +1,50 @@ +# Projekt CiS-Projekt 2021/22 + +Input-Package to fetch publication information with a given url. + +## Usage/Examples + +```python +from input.interface import InputInterface as Input +from input.publication import Publication + +def main(url): + inter = Input() + try: + pub = inter.get_publication(url) + except Exception as error: + raise error + + print(pub) + pub.title = "Cool new Title" + print(pub) + +if __name__ == "__main__": + main("https://doi.org/10.1021/acs.chemrev.8b00728") +``` + +The expected results of calling this methode are: +| Input-Url | Result | +|-----------|-----------| +| supported & correct| A publication Instance | +| supported & uncorrect| ValueError| +| not supported | ValueError| + +Supported Url are urls, which comply with the url-pattern of supported Journals. + +### Supported Journals: + +- ACS-Journals +- (Nature-Journals) + +## Testing + +``` c +python -m unittest input/test/<file.py> -v +# for all tests in directory +python -m unittest discover input/test -v +``` +## Authors +- Florian Jochens +- Sam Ockenden +- Julius Schenk \ No newline at end of file diff --git a/input/__init__.py b/input/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/input/__pycache__/__init__.cpython-38.pyc b/input/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd915a8709319a33f27a6083fd415baeb6b4d6c3 GIT binary patch literal 171 zcmWIL<>g{vU|{(1a%m!neheazF(U&5g98HtLoo{j149Z!FoPze-%5rekT8Py6`-Gy zpPQ;*oSKwaT#}iXr(aN%pOu<jlB%DasavcI<>{7Y>X&B57Zl~E7bWKA#-|k}rst;S om89xt<`tBd=*P!r=4F<|$LkeT-r}&y%}*)KNwovn^cmzL0A`3SF#rGn literal 0 HcmV?d00001 diff --git a/input/__pycache__/interface.cpython-38.pyc b/input/__pycache__/interface.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b823637b2913428dc68e9eb088266aafd5643971 GIT binary patch literal 3543 zcmWIL<>g{vU|>**Tb`K4&%p2)#6iZ)3=9ko3=9m#ZVU_zDGX5zDU2yhIgGhXQA~^= zHggVh6mtq=3Udxi6bqPUjbcq<Okv4k%Vm#ZXJl|^NMTK3Yhg%XYi5e#NMR0U&}6TQ z;_}QZNi9lCOioou%P&$W$w*c3%qu7@(G5sU&Q45ERmjiFsZ=P*$ShVU&d4v#Nl{2j zRY*)uPAx7@O;JcI%Fk8EFD)s~Oi6_(f@;&_;(7^ky(Z%=mh!}$Y(GuLTigMqNjaIx zi6xo&dC4G2WDN2ih+mw-z`&5o5XG3n5XF?j7{#2z6vdLl6vdj(5XIKckj9w8oWjz= z5ycMiOcX~dXDU|;`y8eejug%omMCsGpDTsCg(Zq7g(ro#g&~SJg)fD_g&~Tsoq>fR zia(e^Q{WaK*r)ISsB+>`P*6|^&df{CNiE6GQz*~KOwLe9smx2v%}h?r$*EMx%q_?- zDk)Y-%u7+oP0UM7Pc2qROD##xNG&P`YxYZK1o;ArL7oC(eo(w=F)%RHFk~^LFk~|o zsgy95Fx4<LGi5Q?Fr+coFwSGDVd`V7VTcE-31-k_^wVUy#gUm;T#}fVoO+A3xU?X( zsECDuf#DW=e0*MNd3?Mk^DXA&oZ?$7iACwfx7f1FLG((-Tb%Ln$vKI|#qseg8GZ%o zXXNLm>KCUbB^H-tCg$lE6y;~7W|ySuCuiyw>q2?DrJ4Grnehch`RPT8xw-LaMTzOT zsd*)-`kA2Apq~j#C3*#wMeGa=4B{X!v4Q-_%*80hR3(NQmU>Vpq67yhXh0rt2I)0` z21hz5ICN?l7BDVkU<7*}B2vPX!nlCBh9Qe(A!9RR6JrWPFoPzOUzMg`zCv+9YI0^; zX0n1CH0%}1Gg6B{DY!H*MIo`oN<qE4R$Y_z7C$s{AmNc(d`k=xgYoF%e2yh0MVU#Z zC8@4OMfpXy*wXTgauZ8#aiph~#1|x%WUOSo#ZsJ_lUBsbz`y_@_&{;N5g(tKmst`Y zFAwq*4>UkH7`YhP7+L;TiDLu{M71W<E#}gqoLg)~sU@XFdC5#5jZh3qG4NnAV_;w? zVaQ^1VThHfWvXFXz*NJykg<rZhG_vah{eRf$Pmm>$X3FV!nlAHBmxdBreFq5=0q7r z1_p4l1f}bcj8p}9H0tK2mSp6oD3s(Yq^FiBBq|`~O<h=`gKO2u%uCKGP07qlSI8_; zFIGs-ECJ_YNJc11O-n6G%}Y)#*5ra~2}mqT%uOwU<qMDlbm0K5%BldQOrf+W#|lJ% zoP**Ngu;@_f>e+aD}~~cB7`s`_^gnfuK;ttLVl5gUw&SyLS~vmUVe!}acKc4_ok-6 zwf_<Yr4xnBVpMrQO*U{wD+1-)TinTsdGV#isqvur;s>V(L^*bgD=aam6q0C)gcukY zRx%fff>H=5f863lNoJ5#15Q7nq*El#z`&phN;Mpyi09*C<YE+Hu98DbF=!UjD5Y>y zmQobpL8<|+FF;jEZenJhLSj)#W^zvIP)is^k_-$CD9NBm3Y6P<p|KI4lbN1TQX~V4 zX;7>e$%0tms09<?_yv^+b{O$1gFSvBCSnwqpmG6RTnd0{eZ~}~7H|>i!qChJ%HgQh z4M;Jl@CXK33UXFDLk&YLS1n@=V-~|irb3orNXezic#9)GJ})shH9lUG=@yfo!7au_ zwA6Wv%O)qYxFk70x4<q8WFDwQVq;)psItW30eIGe6|BX2HaYppi8;k~dI)ng8NoTK zNCY#mZi#_H7SkB8S3v$^V6IXn%3_otWMyDr;AUW800-e8a1btFC}CW{RKmQFF^gpZ z>q3Twph}l5ouQVoguR5Lh9QfsnK6Yag(;mOjk%dAg;|opg&|h1mZ^ku0apr34bwuV z6sBy>BC{IC1>7}E3mF+13e9Rj#q*q6rmCD0o&~%qtP2=wm{OQh*jkw+85S}wWUOVb z602dz;#<I7!@Ph$g&icu$PmtuXTicyB9P5hbc~UqmW7d_N~%OKg#%T0Gb2c)DMJcV zElV9s1Vbc49#aHEI749+RPPr?1`sR}OyNY=*~A#hP#6`#P#Bd0st~xUR>JdXaB5;v zaz<*gLS~+VdU|S!x(=wAECE-eiAAXj(TTcMv08csl?u7}DWy57#R?i2wXP=IG*Ek} zq$o4B7*v8os#s7{GFK0-A~>-O)K<y|RnRaKK~}>`$h6Fy)M9mn`YN{STK)9Y62F(A z*stOQyGL8Ept6bw+#=CSPc6}_u6_CU|Ns9{?9g^Vm9{U~vEb@oAtgT*Qm|(vmZd5r zDyTzMsq2DUM^*C3ijx!bz?DE!DzuT8nxZKLF41qXgX;gB%%oc^0f{9UpsWLOY<&DJ zwt~c>)Vz{g>_w@?`8j2&MZ%ymf+a7tqNE7q>RU`jskc~j6HAgaZm}g66r|>*+~S0U z4#+rO2s0iM9N?^ROCUZzJ+&ke))<M8zs2cVk(^opYBS&B_5?RS!7UU`R&YHNC4?M+ z;5=P?iv{GqA~}#_SW`jG8MM4z1S(7-K$#j;fH81LFtaeqF$ys9Fmo_6F-kBpF^Vw? zFzPV!F=FI7sFPuqf&((OSW}=#0c0CjJk&Gs@wa&5;|og@b0DP*sL7g=pBx`w1j>=% za02@m+>|U51L<WhE-5Mkm0sX_u?SRn7O8*=3Xo%O@j{!l$*DOx@$pFh0p;!@aB&MN zpg@IhF{pUqU}RxoVPs)sVdmlB;1UoO5anP6*Xf#^MV27znDUFkaa06qhZd=WEYbyu s3qn#?0iueBI0|GslC>a<z}DX4uz|#%9mws)pavid3lAd)81gUy0MOQ>TL1t6 literal 0 HcmV?d00001 diff --git a/input/__pycache__/publication.cpython-38.pyc b/input/__pycache__/publication.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b893491e387cafe8f592479d23c76f302b721bde GIT binary patch literal 4312 zcmWIL<>g{vU|{(1a%tjvVFrfBAPzESW?*1&U|?V<Heg_2NMT4}%wfo7jAG1Xieh49 zaA!ziN?~qcNMUYfieh$WNMT7~ZDB}ZZDxvMNns0S&}4rJGE0-`7PDhs<t-MU%;J({ zkO(pc*$=j%gn@w}l_82Tg&~S5g)xdbg+G-gl_ixml|_;PiN%`ACdrTrCR3Q_aDeS( zjbcw_NnxAAn8KdI(ZUkNk;<9EnZnh=(99UcmBIv;;ZEUcVTs~S<pJ|}Q}|j~qIgr7 zf*CXgZgB^cCgo%%CzfR9=T)h5DL_F`YC%zIacW*ku|lE(l9FPu3O`MbTkI+MnenAX zIk#9#GD~t&Z}BAO=am#?CY6@t7Zu-P&&n??%1g|-B~XAcKRzX~B=r_YacNRkYH~^O zEv}-}wA7;1yyVp4Tb#+6C16JsCvzf&5s1yiz`)=Piiju%28J5O8isg=8m1bCc*Yv$ z8ishL8kQP{c;*_`8ish58nzmSc-9*B6vk|hB9nB68ishbJhmEk5M9Fo7WD#)vgfhY zaMUowv!{T}<49o)X3%6x+{MJe0QMj#6ao^95_3~aQi~vd1_|lH0bG?;0Z5qwG%&1S z42AqOg_4X^1ti}iw3Jj9q{0+hDHN9!A*3LtfkV_v0Zb7#39JmOIml6Bm6KUq0x~Qy zCr1HU0%1AWUC2^a3Ls6<u?Qs)_kn#?oROMhr2vXjkf}wfIjM=osR)%|!;lnW@g^)G zSb>8I$^#h&3OS@ihh`{LVX>70C|5-nmlR<*5RtGTW;;U@mqK|)W^#r?X0bw2YHFSW zTv>`jQl&ykMrJW`T30}`7p@v+vtwQ*hRyI~jMZX<5g4|iS%zc|*q5kw6{VJx7Ufwf z_~qvzg4|D&vxt>}fuV>EM6iPh4iEv#Lq(vnridHF<zZl8xW$@YlwVq)$$5*VI5j7Y zSZymAiuf5A7=8uoXXNLm>KCUbB^H-tCg$lE6y;~7W|ySuCuiyw>q2?DrJ4Grnehch z`RPT8xw-LaMTzOTsd*)-`k8qJr6u~{#E}fn^m+xAw>aYCGxIV_;^RTCES3Tl0^)3p z2*|<6!NkT0CYh^5kdlWUOyf$%TWpY!NoE9Thhh+$4_1cwfC_WQ1q?L|S&TIdSxhNR z3z%ydvRD>^c&s%HS!_@d_Jxco%pkc195oDCoC_II_*@|VLXKbtO%}hZ&Ja)uXay=E zK)AY=%OyWkx3nk+DpXy|<ycyhkzZ7d5b%POj|c&<hd_BsAwSPbp}LkU7+Oq1lvLMp z1;I;8kPugOEf*|VL(Ju>uKmTNqi~CnOOyK+TUvfmZej@}Er3$REtahO%)BBj$>Nr9 zdTL2LEWyPWmlS2@r6bbHO2#5k=z(J%LI{9T5PLjGWqiB|sALua1vMiJ2MYrW0~aF; z0}CS;qX478mnvbfx1f;>)qxT@pjsB>Ja7T82P)th(iyUuiuh6((;0&q(iv(QYZzP@ zVufm%YCzG(w19CT!$L+zhC;C#aG~vYiv^SmG?{L(B^DH<=A{&Yl8z>G5h$HU@uPX` z76(iK;uTPGE&}<dNCK8ZK>8S%I2hTOSQt4N1sJR3k^O)rsFFby6es{d7~~&N{sa4` zjDdlngrSD9nXv>^AxVJ}+ydqrMv(jS!k{uN*&Ic2HH<C{u?DqFB@8u8P}Lq#)l4AO zg<2)7DU1u)Ao2?tn;DxJ3$^lu;G&Gpj1W<fE^uYW#)wpzIf8=*RGEWHbWj-!Pdf_v zMG7#ELP@>?s6fzzg;KE|7rbDCSBP*CoYf&ryA{IZ{2~Q-0tVTSzl_Ij0LbmpFt;KV zy^u_Ah30g)Gpp?5i;+^eLTW`pYH~?2+-=36AOMxJAa}uxFV<1W%P+A~P_M34*JQuN z0;<n$almB3sjEl^lo41!uDZn;QdyAdT2z!@R3yg0z@W(t$uzPI3=B~$pv;Aq-fr<G zrlf$x!R24EFDMs*5-tN54<jEl4<iR74<jEF8zb9)E@lwP##|+doDNZRqohDkY6PW1 za0*m_q(FvdMo4PQW-5{a7k!LbjM+@V43LITm4SkDer`cxQED-`DlW~-EG&gLQ9z+m zQl5`oH5R*RG8M^zY7<c2)nqCHl@PaB^Gh;Pi_ksF79XEl7$2Vj@*yZ^Ffa))@-S8j zA^QfRM3c$SPm>Q+5#%N2rpCwL;);*Y%}*)K0kL`F;|og@bD%Qp@$o77$?@^Gn2Sq_ zia_2i0{Od05acFMINstZ2DK-Xb5lz)@>9UMyGRKfT}k=*IYlZUb>Pg8Ai%`{3j+fK zsEjNIg&YSX3&_QP*%;Xvm>8KDKs4K59!)-eCPpq!KItNmYK&$*NISS$Z-(5g=SXD+ zLzYx#NrqJB6vjEM;4TDf6l*GT3d<bE6xI~B7M3Ws6!u^SP0k{DP%2_Ys+qxcImm2q z85R#pkI5j{fl3_^2C+e50FHN%SHX>BNP`(mQyCKTiT%i>1U!cilO`{?`3g$DB(-@# zB>~n}WpZW-IM}Tef(ufUGt)9tixo1{kc%}$NeVA~tUwW^fY6H44u%z6pc)LW!%veH zQEeB&qrFH6lp@%`Sq)pV0L!CgK5%|00OdST`;S2o)|#pkgcW4)##50H0|Ns})eLGk zfht3Af&x{|DWD1%+HL|DA`DrewiBr3!~)`{Ff9PLpcXQMTSm-&RUw?!wYnIsBaom= zekQ0D18OfRD1e$fAVG{q5lGM(sb2|h7!`q98=CCkau(ugP|}A42{^RGL75wrQ;H-( zEVLXBDjnKEK>{*{fsus;)LcPeuAfx`NP$yaQWPKWr^yJ(kL*RNAQRO<ggS`O1QFok z2VU|ZMFH4maIpf)&G{fpK?NlTGs8bNaB;%)k4GGAvY#gFEw+-%0#M5fl!=SLW`c?r YurxL^Z*kZ_3P(GTvSJV+z|5Bc0L$(sT>t<8 literal 0 HcmV?d00001 diff --git a/input/get/__init__.py b/input/get/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/input/get/__pycache__/__init__.cpython-38.pyc b/input/get/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f81f36aaccfa12fdee27996811a2c67cc19fff8 GIT binary patch literal 175 zcmWIL<>g{vU|{(1a%m!neheazF(U&5g98HtLoo{j149Z!FoPze-%5rekT8Py6{4S! zpPQ;*oSKwaT#}iXr(aN%pOu<jlB%DasavcI<>{7Y>X&B57Zl~E7bWKA#-|k}rst;S sm89xt<`tBd=%=Ta=*P!r=4F<|$LkeT-r}&y%}*)KNwovn_8H_V04HlMqyPW_ literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/acs.cpython-38.pyc b/input/get/__pycache__/acs.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f8c362e84d5ccbbdf32f8310eeafd1376ab1977 GIT binary patch literal 4215 zcmWIL<>g{vU|>**Tb{U1mVx0hh=Yt-7#J8F7#J9eZ5S9BQW&BbQW#U1au}l+Qy5d2 zbC`0OqnH^%Vk|i<xvWvFxolBvj12A!DXb}MEet7asm#sHQS2#9!3>)0Roz_98JRgL z3duQ%#l;HwX$oHXrA2v(Ic}*X$r-6dT%pB@>8Vx<&Kar6*$SCy3ZX?g3dxCi3Q4I7 zrNyZ!3gww484BvjiFxs*#i{Y7MLFtR3J{Q(m!eRTk(#HF1~N*apfo8bGdZy&Gd~Zi zDLu6$9!XT4>m|sOewvK8_)s08$#jc50HOUBhjV5L*q~&P24oBm89xRFhE#?q#uSDq zrWD2~<`kxM#weCl)>O7swiM<$OeriWtSv0fj8W`hF%Ga8TM9c^j5C!j6(Yis!r8(S z#g)Pp%%I7Aiyay$RR&-`1{b6zXQpK)D?o)6((;QyK2~sa4pzv51Y@xtSZNfCp@EU1 zCetnE(xRMOY(=RhrA2wkj7UyrVqjokV_;x#2F0rZ0|P?|!ve+>#u|nNOestY85c6u zGS)DrFlTcXY1A;Lut+kLFlVu3vjsDxum&@vFb6YevQ_1Q?E-~<XmM(>LQraYYK1~c zzCvn6Nl{{Q3Cv9j#W05#mnIdL6lLb67b|4uDY)ckLR<?nA;jOs-%264w4flrs6-(z zA1tj~T9i`^x1`E0P9vkFq@dW|N?%{oULz$xQ!l?LUB9NFG^toGF}WDbOUcg!Ng5jH zX~d*do78IR$CPX9#RO|>R<Y_SaPo5by#&RYn<ncmrlQnathtFL$r-m;LGCIjVr5`p zxFr-E8W7+g6yoX<@8usF<mc!UtjSaa3alt@u<FzlP{LcuP{hl?!0^joKO;XkRlhhj zDY3XDGcixUpeR2pHM=BLKRHvkSQpCEEzQ&~&5SQ7%1<v!%*~BYD@shyP0cGw)z8c; zC@s-XPc6|0d7+^47B^Dr76(NS2PodygqS%P`IxGNp|P)rqzWZwL5>3DXK>8gFfuSy z@qyyDhG8LNEn_%Co*D~72~###kqIM1EfXU{6-NzI6Jt1oDMJclEmIv+1Vbc49#aHE z4PzGb0v1qONMX)qDherKO<`HU1~R3Yv5B#UF^fHgF`Kif0>sK<PGQYvDyl$Nvw$Oo zZ6RX{yCg#`a}DzXjuZ|EFN?F51<dD!@|kN{(-~^nz+zkr8C@7+1!~!A*cNcruxBwZ zWMpJ0+ya*4hO6hO;V5Cq;?8C(dIh3uI3yWrIlyXo;A%M28EUz}V!RNsTJ9R|1so}S zU>k&Ld1|;8aMkcYY?#1UC|1H#!&1Xq!&bvl!&$@C%ofa`$^UB+JOM&NzE~lnBsWI^ zRNN#Nr6!i77ArWVCYEHTmF5)Zmlo(|<`tJD<|U_sl6XO4QEFZZq_9Jzb$CWjE-uzB zPE9OI&PXi==VnLeU|ncgRE%N+QgH;g0AZltFJ=W!u3w_QnZ?DKdFcv8sm1xFMaiiO zd8Iiyzbw_OYt<DXDYsZ5AhDz*wI~m4ow}oQu)2;yQfY}oN`7i_UWr0ZW?nWZ3kM_? zq!#J@5^*gm$}h4~aPjw4@XIezaLX^vOZmmGmsnJinVgdvpOKoFl3G**Dp-CAB}SKI zmgJ<`DuGmo2Kgw({^HTg$Z^X_(G3Af{1Sokob&TaiZYWNOG`5Hi+*wGC1;lCg0=qQ z*DEMZ(oIP$N!2Y&%qdO%C4pv@Zb@QAeqMfV<t@fUP2M6<DRzqkRIr1B{uZZeMRIBZ zsK~#?QUWT4xIy`uD-2{5*eAEx((;RP6H9Kfg6+G-R-BrXnp|>=4V3-!(r>XP78IoB zrQG69OiC(BEsIadOfD&6W?*2@<OLT1w^+dTu%?27<rX)H0}X>)>?!$~pkkp&31lZv zGQ^LiCHY0gx7ZTFUcJQ*Ev0X9g6pB;jMS7{9L1$cS*gh-#kY8%tay;Ix7eXPa3KRB z_&_G}f_z(0niQXtnVwOi3MwB=K;;7$6C)p^0HYX-03#nW7b6cNA2T1b0HXjC3nL#R zACmwx7b6!ll*cH*#KXw+ze*I6qKouU%tR?oWEmJ3;H60h^PnkB7@;KyqC{Z@=X6Ag z!j{fZ%MKQUl_(rF><hSRI3W4Ga0gfpQKE3xaDqybY__6zAi9QAlA)FptOik{aHTWU za)ZTSB??as4^oN3Tf@D8tA-b1!vw}csdR=~z8aPV94Y(@8Eg4#_|qA(nI|w6+0^iZ zO2q{nDFWcK=vl2mjlcqq6v2gzU=@r-At^!&*kEEHwV={?Aw!CA3S%piB&aMG%ww6r zSQr9UBLdgA0mg^u$zw`ktQ7)_iNe)ff$@>l2-gUMe4WiUff4F|VM&Hs;dF*t5wL18 zusfA%Ich{8VYq-NMLb1fA=3h$6v-5+g-oDQdIDqNpBlaeTqV3Uf;B=l!Zji((kU{% ztWdd-bcR|{aA?RbWULjd5d-^`xhSYc4CE4cXuPWxuMvmT7GM=jMM*F|NL?0ltwbKn z1g1iWs2tQ^Ohs$Js^lPLeHJs=?~-6KdAOPjFh0a2uo|fvDX8C>ptXsVBttDI?xex0 z6<{W!`*{IhisC}XTIm|;1^f#c7O<o!Eo7{fsgX&M%4VLxTx3=wy@0Dm2ITu1QE;e8 z)JWDy)kvo(!$XCsFsVcUT<5TZ>YN(x8onCQW{zM6O_d@61_lPdB0&ZQh9V&lAq*l! zK!hlW0F}B$Vjvc%l~5!BVo8DsDG(tIB4ijC7^-;lbCiox(^894^O94GZ!zU$R<Y|9 zrKY*$XI2S;8&p>M`mn}Sb?r+~BdAJ9&(Fs<9$L;rO6OaQnNVS9;ij8jlwVqKi;?RW zqrxvn-dhY&oIx;qG#PJkBxjbymsA#{R!M=1_|z0#kiOKslKAALN?4P+N&~kXs`XXs zcvM0QR^7bB+|(+4JPKhx0y(89GYMQiS25`*R59r(6oDFsnu6e3r3h5;70H1DQ65Bq zYLX%a5K9q6fC~8{We`gRM1YFnA~jI)%wCjQkdv643O4H&YjHtNX2~t)g8TwVH6jbH zMvBxynlwO!CIbUQ6i-oVT0AJUKqY^X7Dyayfi{Sx10r-mgdT{{2N9qKdyyf?J}yMe zM)4!qkUE4DE)c~IvLYovGYTXY53c>9xItpjkc$!kagamh7H2ZF+fy9H3u8je1;s45 zLlnghay8gk5Cdu~hzU0q#6dPTN)%lrJ}0rb1X9C++O9?5YK|XTCBgb|QJ{)$5vZbL z$5zp?;I8VJSs1w(xfq2Q1sGMBS(teKi!d_%XJUrZEIf={;3`sqNrnlbQV6?BMixc^ zMkWTP|12=_KMOM-*nAmKmC68;s}e`6R$+ed)8xLz5g!jqqw(>#xZ>k;^HWN5Qsd)q z@x;d$mL}#vW!U56Q}UDJ<BNDeG0am8>U<{Wrj}&nr`%#LE-5PF2c-c}v!uuf#Bu@= z;Gjbgpg;t7XFzI;K|Bsd7FISUCPpD%CT1Zcu$(4GksC-aC&(U2a7P5x6wrf4UOc!b zomvFW5Vr&&^5B*Wq+bnA(%^&+HVr8X!R`QAdW*vb;z>J@bBjSe85TAUCLTrsW&o&| Be7XPt literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/journal_fetcher.cpython-38.pyc b/input/get/__pycache__/journal_fetcher.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..894af00c5d09c44d55f9e5d9a60d05409a7620db GIT binary patch literal 3815 zcmWIL<>g{vU|{(1a%rMB9|OZ<5C<8vGB7YWFfcF_doeIDq%cG=q%fv1<uK+lMKR?v zM=>*k_{=#hxvWvFU^Yt*TNGOgV+v~ydoD*52P1<!Lke38dkaGfdn!{3M=x^}X9`O& zgC=K{GFL!iQEFa^LUK-Gaj`;Levv|PL27blT4u6>SAJ<xUSbZ{OOR7EnQpN=Iyw8M zmL%TdOH3**DN0N($xSWE$WPH^yv6I3npj$rnO2$;oL^etr^$GWJD@ZvCo?&*Br`uR z8Ds!5R)hIx2?GN|Dnk@w3PTiA3S$&=DoZMBDq9NE9HtcJ6qXj2X2vM?RF+hZRMu2Z zuqbN^8%VU7F^UT+&J7l4PvL-y^Q3U5aHVjk@TBmj@TKsl2&4$62&D+8h@^<7h^2_9 zNTf)nNTo=p$fU@o$fd}qD5NN+D5WT;sHCW-sHLc<XrySSXr*YU=%nbT=*>}$;!V*H zX3#Xa#Rm-zx73p4jMSnkGcE-M1%;4|%wmPiVueHnSeWP{LNX1aR3SG%r8FnC7_32) z=@xToQO+&4qSTVoqP%2Aq;O$oU|;|x1ZP-OSMilFEMTl*Sjbq*7|xKV$HGv;kj0e6 zoXu5a!^lv}#K=&^Q^Hci)Xdn#7|vkIkiuBYRL2y-5Xq3o6v0r!TEm#dmcrD`6wIK> zT=fuaKgiKRsU=03sb#6f3gxLu#hEFox)~+8ISPq+DGCr*f&7CKqq>=S#U+V($*Ej$ zLjn?u5_3~aQj3bM;9|N602i|=sVqoUC@spdQYbDdLI@Xt^h3l`@-uZyi*gi7@)Z&l z3J~cAp%~&UD+Q<2M0hGgxC!JZ5caF$26;rUAhD=8wMdiw7DrKPVQFe{N%1Y_^wg4D zT&WewsRf`ES$vDrBP1liwWuh+=oVLSWpPPru4_eRNf8SJ1H&!$<ovvn)VvZ+<|1|w zpSdWt@D_7wQPE0<B0&ZQhF{V88Tq-X`o*b9iNz(EiFx`3Mfq8&*(Is^$(g#vx=@~O zX{LT@W_&?WetJ=2Zf<;9QDS;-YF<gIer8@lX^DP%YKeXpB#*^IlZswJ<t>i%)ROq( z{L%s;P%;3e2L^5?Mm|OnMk%H$anw|(2UClZjzDP&<PmT>0+~|6kirN_N1&9F0!a_m z@ZfcXW@p{B(!69)@PJ|>DODjew;(4K?BW!K%shqS(j;)&)`M$w&PYwpE>_4)Q%KJ& zOU(ntrb13;UUo6afJBAj(t?8gqLS1U1*kXS8mlZ({Q)j`)T?XN6*3Zw^GZ-`R!>gM zi!UusjV~?A(a==)Tgh~b&#|PWC^M<FBoz{aY-#yLxrrq!8Nu-djw1*G^877sBm-1I zVaf!`bL>@8sBX|hQbtB_X2Cs2esC70CYGdvG9Oam2~XyTV6U=8bvfS91Emn8JOv8D zA`u1#h9Yqg0ZPtDLB)@zS|2I6<WMaICp{#CtE8dw3i)XYeu*WeMTt2I0Y&*KrO745 zRZ;;y`N0a#`MCw9CEz+CF-O5EGe0Lkz48|aSk6B$wTeHWD8D2%GfyEnIWsjcIkidy z$;^_BR0YS})S}Gf#5@J(jMUuB<is3>;QZvw)RIaR+u@P{8I{GE$;DOtjs*odnW-so zbCn=2$;?X!C2Xi|3Tg_jdFh#Xsi{SodFfSZPN|tW#bD<kxx&9FJuxpc8RCHAlA_8g z5r{v)t^wN&7M1oaE=o+zfmNcQKtV|Ifx{<N!53VA7FYQP6y+zUrlw@(r5A%#g1qVn z3ns_p#FW(BN{~3zp>Q>!d6}Tx6<h*NHu-6=@TrnVaxo+j5_1qvR&c5kbxtfw%Fn1w zDM~C!RR~HgPE9OI&ZyEt(g$`fD2PKcQuB){!PNsK*uXBUl0j1Io1c@KT$+<u1dG%v zF~8E}oYc%@1;^yflwyPpN=UkpOha}aJY#~q40c5xB!7dV5|#=<a=xi4naP=XD0)$R z08VbHMY@ieDXA$62uHf47H6jCRp}tR1(u*ZVV33RfxYaTpOTsbN)LPp{m!X5Iq;$w z<Pwko-0N}(g`g@GB!?0-ureI35gM&k!q8*^(t$9|F|{baI6nuLcEOGcNlnhk1LvM9 zS%htnga}S4pcDi$rb@!8G&2X3u)z9U^U5-d^7BAtdX*5^+7fV6FDMmMP#0GTz+3<d z4RAW<hl)VMI<-m^D&U--n_HR(ZQ&JHNx4=Oq!wj@Oa{eAYEBL!&c$I`K&FCH6x5-b zs#QFGi6wfVVxlBJS+7b6ToOQ%g&sHr^opby7#ON}kO~I9DnTzj1;<=HaDvtYl>mBG zd|ogvD7ouZae;EY9w^N8ssv#M1}A6gfwH(>l{iw~)>H5=N{8yM;s!Yl#^LoWF49wg znGAI+EYHe=@~j@Hq|4D$0Oe7LZ3>}!3c-2`j(SxBUU~{($3WfbRK*X^bb6q`(W?@N znXLfLV^Eh?@p|cjk}EU}s)R5y6U2C^a)<?0+)&dp^V0MnCZwjoydnjX17`<4L~hVi za7iuJs}cdpWu)fnDR}0kf$U95&C#pk^v%!FQvjzSevkrCjDi9uKV7eiA1()pq$(bm z3Q!PNfzk{_D^ywn6k&*Dtfv4?zc7<P=0Y@7@q^PVD4M`2Rj-N@WTZk+YJpxA7dWwi zN(8+sPB7ayHAN2^FOc-8SH<UAQ2;Ru62_n;4Gjf|Yy33%ZgIrN=OyN*#>d~{ijRjh z&*S57@x;d$mL}#vW!U56Q}UBR!o{E}D;d(+xy4*uQUq=$772qYP*9uw7E4loeom1D zh!5(j6-hEMFhp@DCM6Z6mc^%JCYL~}Sk%h4h>d}PffrQ4#xXE3a4@p4Ffj@VF)=bR zig18QE)Yo-d>d?+CgUy6+|-gpa3fBWt4I#qwoOb*E&?@(ia<VzVooYHf%uLSBr6DR osp=tB^F?3}f}D=cPq#R1AfagoGOZZYcx7P`VB}%sVB}#20I@fBeE<Le literal 0 HcmV?d00001 diff --git a/input/get/__pycache__/nature.cpython-38.pyc b/input/get/__pycache__/nature.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47affbf4e197c535229a9e2bbecf3988abfad115 GIT binary patch literal 1654 zcmWIL<>g{vU|{(1a%rL;I|IXG5C<8vFfcGUFfcF_8!#|1q%cG=q%fv1<uK+lMKLjg z*vvW1xhzpEj12A!DJ&_hEet8FsZ7nxQLHI!!3>)0Roz_98JRgL3duQ%#l;HwX$oHX zrA2v(Ic}*X$r-6dT%pB@>8Vx<&Kar6*$SCy3ZX?g3dxCi3Q4I7rNyZ!3gww484Bvj ziFxs*#i{Y7MLFtR3J{Q(m!eRTk(#HF1~N*apfo8bGdZy&Gd~ZiDLu6$9!XT4>m|rz znvA#jP~D)(c#As#p*tC*3K_$Lz=nZ=A(bJDF@+(DDTOhLIh`SjC6zUmEtNfmX%15g za|%lfOEY5>2Uv^~EXJC`1{UK=VGm}|<haES^>>vO*c-*kMTrHe#VC%?O)W`GNi0cJ zNGr<ERY+7QD9SI(Oi4{qC@snXYtm%8#avpHbBnDgwWPEtFPRa^u^=`GJA<MO<nR>6 zU<OU5Drc}xP{4shx)>DH>6vAzc_332ax(L>ixo=p6%rMSOA89}i%L>c6tW-@r3csP zwvs`Uv51v{fuV>UM*IrW&&bbB)h|v>N-QqPOw7|SD9X=D%`QpRPtMdW)`jwPOEdLL zGvf=2^3#hFb93X<iW1XvQ}aqv^)vGdN=x+9Q%m&o5=%;pQuPWdZ*e0<w*<%|Odua} zR0%@^PY+2R#rL43400OS_cs_A7)lrxFxD_EWUOUOVXR@yV#;DpVM<}nWK3b{Wn93L z!n%-AoS~KpF2jZ_Q_Bn&XGaldfs1pXh_j|M)Uu^B)Ut!+SyPx&I3e;b46!1$95rkU z*lIW+wk~93WGL)_tK&ja$63R^fUSlTN!^AL_8O)d<{GvdmKxR?_GVDV;;u4;$BT1O zYGO%hu|lE(QYzNX%quQQ%u7y1#9x&_MoCFQv6a4lN`9tZeo?yqEtb5*+|(#$7k^Jp z##^k3B_&10x7d^O^GZ_lN}~9ZGfTkvDZV7LBqudW49<rp4TyM^XhBhCajqUxR&z-# zNv+~aN!CjSIW4~k%q}iX%1TWx(c~xsMQ0QTD0>#?mloV&$w*C1xy6!}nU`{lIX$%` ziXFss%*naMmRL}bnwL_<%)r2)$pub&Q7j-mQ5^9p`I(^NAc_s*iYShFsH>v5Ai2Ic zBQ+(8Hy-4hqRgbylKi6LD9(7OD~hAop>Dgy4i!Qq>LLyX28LVwAO|9ON(qz_LCKMU zjf+W!Q34EE7+ILa7=^&NN*tCx(RBK0a^K>Jj|YWyeEco0`1suXl+v8k`1o5q@$rSF zi8)Xi_W1ae{N(ufD52ocfB^rX5LcIYFaOXWKS!V7TRg>}$VkpjEy>7Fxy4*uQgn+Y zDL+4_h!d0-K_OHm1Y&{GKblvHKm`FG$oV`B3=AAhENm=HOpHRDOpIU|KTWnGF$M;P zC`oW;)Jsn-(SwF%Jh*I4Edoc`EkTGpQkZ~a25bnDm0%ZugOI}p;!``2^NQIR7#LXC Ic^G+^0UT7sU;qFB literal 0 HcmV?d00001 diff --git a/input/get/acs.py b/input/get/acs.py new file mode 100755 index 0000000..9691845 --- /dev/null +++ b/input/get/acs.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: Check if Url can be used with 'can_use_url' + and then fetch publication with 'get_publication' +""" + +import re + +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication, Citation + + +class Fetcher(JournalFetcher): + """ + Specific Fetcher for the ACS journals. + """ + + # Constant for the abbreviations of the supported Journals + SUPPORTED_JOURNALS = ['1021'] + + @staticmethod + def can_use_url(url: str) -> str: + """ + Uses Regex to extract journal specific substrings in Doi. + TODO: Support non Doi-urls + """ + matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n")) + + #Checks if match exists + if matched_url is not None: + return matched_url[4] in Fetcher.SUPPORTED_JOURNALS + else: + return False + + @staticmethod + + + def get_pub_light(url: str) -> Publication: + """ + Fetches html and creates Beatifulsoup-instance in parent class. + Specific css-searches for ACS-Journals and creates Publication-instance. + """ + + # Creation of Soup + try: + soup = JournalFetcher.get_soup(url) + except Exception as error: + raise error + + # Raise Error if re recognizes Pattern, but url isnt correct: + # For other Urls + if soup.text.strip(" \t\n")=="Missing resource null": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + # For Dois + if soup.title is not None: + if soup.title.text == "Error: DOI Not Found": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + + soup_header = soup.select('.article_header')[0] + + # Creates Publication + doi_url = soup_header.select('a[title="DOI URL"]')[0].string + title = soup_header.select(".hlFld-Title")[0].text + + contributors = [] + for author in soup_header.select(".hlFld-ContribAuthor"): + contributors.append(author.text) + + journal = soup_header.select(".cit-title")[0].text + + # Replaces abbreviation with whole name + if journal in JournalFetcher.abbrev_dict: + journal = JournalFetcher.abbrev_dict[journal] + + + published = soup_header.select(".pub-date-value")[0].text + + subjects = [] + subject_soup = soup_header.select('.article_header-taxonomy')[0] + for subject in subject_soup.select('a'): + subjects.append(subject.text) + + return Publication(doi_url, title, contributors, journal, published, + subjects) + + def get_publication(url: str) -> Publication: + """ + Fetches html and creates Beatifulsoup-instance in parent class. + Specific css-searches for ACS-Journals and creates Publication-instance. + """ + + # Creation of Soup + try: + soup = JournalFetcher.get_soup(url) + except Exception as error: + raise error + + # Raise Error if re recognizes Pattern, but url isnt correct: + # For other Urls + if soup.text.strip(" \t\n")=="Missing resource null": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + # For Dois + if soup.title is not None: + if soup.title.text == "Error: DOI Not Found": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + + soup_header = soup.select('.article_header')[0] + + #Could be used for more specific search + ref_cit_soup = soup + + # Creates Publication + doi_url = soup_header.select('a[title="DOI URL"]')[0].string + title = soup_header.select(".hlFld-Title")[0].text + + contributors = [] + for author in soup_header.select(".hlFld-ContribAuthor"): + contributors.append(author.text) + + journal = soup_header.select(".cit-title")[0].text + + # Replaces abbreviation with whole name + if journal in JournalFetcher.abbrev_dict: + journal = JournalFetcher.abbrev_dict[journal] + + + published = soup_header.select(".pub-date-value")[0].text + + subjects = [] + subject_soup = soup_header.select('.article_header-taxonomy')[0] + for subject in subject_soup.select('a'): + subjects.append(subject.text) + + + references = [] + references_soup = ref_cit_soup.select('ol#references') + if references_soup != []: + for reference in references_soup[0].select('li'): + if reference.select('.refDoi') != []: + ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:]) + else: + # No Doi -> No Paper + continue + ref_title = reference.select('.NLM_article-title')[0].text\ + if reference.select('.NLM_article-title') != [] else None + ref_journal = reference.select('i')[0].text\ + if reference.select('i') != [] else None + + # Replaces abbreviation with whole name + if ref_journal in JournalFetcher.abbrev_dict: + ref_journal = JournalFetcher.abbrev_dict[ref_journal] + + ref_contributors=[] + for author in reference.select('.NLM_contrib-group'): + ref_contributors.append(author.text.replace("\n", " ").replace("\r", "")) + + references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference")) + + citations = [] + citation_soup = ref_cit_soup.select('.cited-content_cbyCitation') + if citation_soup != []: + for citation in citation_soup[0].select('li'): + if citation.select('a[title="DOI URL"]') != []: + cit_doi = citation.select('a[title="DOI URL"]')[0].text + else: + # No Doi -> No Paper + continue + cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\ + if citation.select('.cited-content_cbyCitation_article-title')!= [] else None + cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\ + if citation.select('.cited-content_cbyCitation_journal-name') != [] else None + + # Replaces abbreviation with whole name + if cit_journal in JournalFetcher.abbrev_dict: + cit_journal = JournalFetcher.abbrev_dict[cit_journal] + cit_contributors =[] + cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\ + .text.replace("\n", " ").replace("\r", "").split(', ') + # clean up of the last Entry + cit_contributors_last = cit_contributors.pop().strip(". ") + if cit_contributors_last != '': + cit_contributors.append(cit_contributors_last) + citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation")) + + return Publication(doi_url, title, contributors, journal, published + , subjects, references, citations) diff --git a/input/get/journal_fetcher.py b/input/get/journal_fetcher.py new file mode 100755 index 0000000..514af1f --- /dev/null +++ b/input/get/journal_fetcher.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +""" +Parent class for specific Journal +""" + +from abc import ABCMeta, abstractmethod +from bs4 import BeautifulSoup +import requests +from input.publication import Publication + + +class JournalFetcher(metaclass=ABCMeta): + """ + This is a abstract-class for fetcher modules + """ + + @staticmethod + def get_soup(url: str) -> BeautifulSoup: + """ + Retrieves webside-html and returns a BeautifulSoup-instance + + Parameters: + ----------- + :type url: str + :param url: doi-url to a publication + :return: BeatifulSoup-instance + """ + try: + req = requests.get(url) + except requests.exceptions.HTTPError as err: + raise SystemExit(err) + + return BeautifulSoup(req.content, 'html.parser') + + + @staticmethod + @abstractmethod + def can_use_url(url: str) -> bool: + """ + Abstract-function to be implemented in subclass. + Checks if given url links to a supported journal + """ + raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url)) + + + @staticmethod + @abstractmethod + def get_publication(url: str) -> Publication: + """ + Abstract-function to be implemented in subclass. + Creates a Publication-instance. + """ + raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url)) + + + # A Dictionary, which connects abbreviation to whole journal-name + abbrev_dict = { + "Nat. Protoc.":"Journal of Natural Products" + ,"PLoS Comput. Biol.":"PLoS Computational Biology" + ,"PLoS One":"PLoS One" + ,"Protein Sci.":"Protein Science" + ,"J. Am. Chem. Soc.":"Journal of the American Chemical Society" + ,"J. Chem. Phys.":"Journal of Chemical Physics" + ,"Appl. Sci.":"Applied Science" + ,"Comput. Sci. Eng.":"Computing in Science & Engineering" + ,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry" + ,"Biol. Chem.":"Biological Chemistry" + ,"Isr. J. Chem.":"Israel Journal of Chemistry" + ,"Nat. Methods":"Nature Methods" + ,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America" + ,"J. Phys. Chem. B":"Journal of Physical Chemistry B" + ,"Carbohydr. Res.":"Carbohydrate Research" + ,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation" + ,"J. Mol. Biol.":"Journal of Molecular Biology" + ,"Nucleic Acids Res.":"Nucleic Acids Research" + ,"J. Comput. Chem.":"Journal of Computational Chemistry" + ,"J. Cheminf.":"Journal of Cheminformatics" + ,"J. Med. Chem.":"Journal of Medicinal Chemistry" + ,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design" + ,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling" + ,"Mol. Cell":"Molecular Cell" + ,"J. Cell Biolog.":"Journal of Cell Biology" + ,"Mol. Cell Biol.":"Molecular and Cellular Biology" + ,"J. Cell Sci.":"Journal of Cell Science" + ,"Nat. Cell Biol.":"Nature Cell Biology" + ,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology" + ,"Mol. Biol. Cell":"Molecular Biology of the Cell" + ,"Build. Environ.":"Building and Environment" + ,"Sci. Rep.":"Scientific Reports" + ,"Nat. Chem.":"Nature Chemistry" + ,"Nat. Med.":"Nature Medicine" + ,"Nat. Commun.":"Nature Communications" + ,"Exp. Cell Res.":"Experimental Cell Research" + ,"Nat. Chem. Biol.":"Nature Chemical Biology" + } \ No newline at end of file diff --git a/input/get/nature.py b/input/get/nature.py new file mode 100644 index 0000000..c50ea0e --- /dev/null +++ b/input/get/nature.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: Check if Url can be used with 'can_use_url' + and then fetch publication with 'get_publication' +""" + +# import re +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication + + +class Fetcher(JournalFetcher): + + """ + scrapes publication metadata from a provided url + """ + + # TODO: List of Compatable Journals + # NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead + SUPPORTED_JOURNALS = [] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Checks if given url links to a supported journal. + """ + + # TODO: Check the URL for compatability + # re.match in SUPPORTED_JOURNALS + return False + + @staticmethod + def get_publication(url: str) -> Publication: + """ + Creates a Publication-instance. + """ + + soup = JournalFetcher.get_soup(url) + + _doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content") + _title = soup.head.find(attrs={"name": "citation_title"}).get("content") + _journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content") + _published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content") + _contributors = [] + _subjects = [] + + for creator in soup.head.findAll(attrs={"name": "dc.creator"}): + _contributors.append(creator.get("content")) + + for subject in soup.head.findAll(attrs={"name": "dc.subject"}): + _subjects.append(subject.get("content")) + + return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects) + + # TODO: Exceptions-handling + # raise ValueException("Cant Fetch: '{}'".format(error)) + # return None diff --git a/input/get/template_.py b/input/get/template_.py new file mode 100755 index 0000000..58de023 --- /dev/null +++ b/input/get/template_.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +""" +Child class of JournalFetcher +Usage: None, this is just a template and should be ignored +""" + +# import re +from input.get.journal_fetcher import JournalFetcher +from input.publication import Publication + + +class Fetcher(JournalFetcher): + + """ + This is only a template and therefore has no functionality + """ + + # TODO: Naming-Convention: + # Class: 'Fetcher' + # file: [journal-/organisation-name] + # format = "[a-z]*.py" allowed + # TODO: List of Compatable Journals + SUPPORTED_JOURNALS = [] + + @staticmethod + def can_use_url(url: str) -> bool: + """ + Checks if given url links to a supported journal. + """ + + # TODO: Check the URL for compatability + # url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url) + # if url_re is not None: + # return url_re[4] in SUPPORTED_JOURNALS + # else: + return False + + @staticmethod + def get_publication(url: str) -> Publication: + """ + Creates a Publication-instance. + """ + + # TODO: Fetch data from the HTML + # soup = JournalFetcher.get_soup(url) + # doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] + # TODO: Create new Publication-instance + # return Publication(doi_url, title, contributors = [], journal + # , publication_date, subjects = [], references = [], citations = []) + return None \ No newline at end of file diff --git a/input/interface.py b/input/interface.py new file mode 100755 index 0000000..59515b3 --- /dev/null +++ b/input/interface.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +""" +Interface for the Input-Package only this should be accessed from outside this Package. + +""" +from os import walk +import importlib +import pathlib +import re +from input.publication import Publication + +class InputInterface: + """ + Singleton which dynamically imports and manages fetchers + """ + + instance = None + get_path = None + fetcher_classes=[] + + # '__new__' is called before '__init__' and gives us an instance + def __new__(cls, *args, **kwargs): + + # checks if an instance exists and if it doesnt creates one + if cls.instance == None: + cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs) + + return cls.instance + + def __init__(self): + # imports all modules + + if self.fetcher_classes ==[]: + self.import_fetcher_classes() + if self.fetcher_classes ==[]: + raise AttributeError("No specific Fetchers where found at: '{}'" + .format(self.get_path)) + + + def get_publication(self, url: str) -> Publication: + """ + The interface-method to get a Publication-instance + (including it's citations and references) + + Parameters + ---------- + :param url: url to a Publication + :type url: str + :return: Publication instance or None if not supported + """ + + # Checks if module supports the 'url' and + # returns a Publication if it does. + for fetcher_class in InputInterface.fetcher_classes: + if fetcher_class.can_use_url(url): + return fetcher_class.get_publication(url) + + # No Module for given url was found + raise ValueError("'{}' is not supported".format(url)) + + def get_pub_light(self, url: str) -> Publication: + """ + The interface-method to get a Publication-instance + (only for main article) + + Parameters + ---------- + :param url: url to a Publication + :type url: str + :return: Publication instance or None if not supported + """ + + # Checks if module supports the 'url' and + # returns a Publication if it does. + for fetcher_class in InputInterface.fetcher_classes: + if fetcher_class.can_use_url(url): + return fetcher_class.get_pub_light(url) + + # No Module for given url was found + raise ValueError("'{}' is not supported".format(url)) + + def get_supported_fetchers(self): + # print(self.fetcher_classes[0].__name__) Useless right now, + # because all classes are called the same + return [a.__name__ for a in self.fetcher_classes] + + def import_fetcher_classes(self): + """ + Searches in 'get', if there are [a-z]*.py modules (specific Fetchers) + and tries to import them. + Saves found modules in 'fetcher_files'. + """ + + # Path to 'get'-package + self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve()) + + # Searches for modules with given Pattern + fetcher_file_names=[] + for file in next(walk(self.get_path), (None, None, []))[2]: + if re.match(r'[a-z]+.py', file) is not None: + fetcher_file_names.append(file) + + # Tries to import those modules and saves their 'Fetcher'-class + for file in fetcher_file_names: + try: + fetcher_class = importlib.import_module("input.get.{}".format(file[:-3])) + try: + self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher')) + except Exception as error: + ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3])) + except Exception: + raise ImportError("Module '{}' can not be imported".format(file[:-3])) diff --git a/input/publication.py b/input/publication.py new file mode 100755 index 0000000..fc512e7 --- /dev/null +++ b/input/publication.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +# this is needed for typing pre python 3.9, this maybe as an large Overhead +from typing import Any, List + + +class Publication: + """ + Represents a Publications + """ + def __init__(self, doi_url: str, title: str \ + , contributors: List[str], journal: str \ + , publication_date: str, subjects: List[str]\ + , references: List[Any] = None, citations: List[Any] = None ): + """ + Parameters + ---------- + :param doi_url: doi_url of the publication + :type doi_url: str + :param title: title of the publication + :type title: str + :param contributors:list of all contributors + :type contributors: list[] + :param published: date of release + :type published: str + :param subjects: the subject of the Publication + :type subjects: List[str] + :param references: the Citation which is been referenced by this Publication + :type references: List[Any] + :param citations: the Citation which references this Publication + :type citations: List[Any] + :return: None + """ + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + self.subjects = subjects + if references is None: + self.references = [] + else: + self.references = references + if citations is None: + self.citations = [] + else: + self.citations = citations + + # For the 'Verarbeitungsgruppe' + self.group = None + + def __str__(self) -> str: + return ("Title: {}\n" + "Doi-url: {}\n" + "Authors: {}\n" + "Journal: {}\n" + "Published on: {}\n" + "Subjects: {}\n" + "References: \n{}\n" + "Citations: \n{}")\ + .format(self.title, self.doi_url, ", ".join(self.contributors) + , self.journal, self.publication_date + , ", ".join(self.subjects) + , "\n".join(self.get_citation_string(self.references)) + , "\n".join(self.get_citation_string(self.citations))) + + @staticmethod + def get_citation_string(citations): + if citations == []: + return ["None"] + else: + citation_string = [] + for citation in citations: + citation_string.append(citation.__str__()) + return citation_string + + def add_citations(self, citation) -> None: + """ + Appends a list of Citations or Citation to self.citations. + + Parameter + --------- + :param citation: Citation or Reference of the Publication + :type citation: Citation or list[Citation] + :return: self.citations + """ + if type(citation) is Citation: + self.citations.append(citation) + + # Checks if 'citation' is a list of Citations + elif type(citation) is list: + for _cit in citation: + if type(_cit) is Citation: + self.citations.append(_cit) + else: + raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" + .format(type(_cit))) + else: + raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" + .format(type(citation))) + + return self.citations + + def __eq__(self, other) -> bool: + """ Compares the unique doi_url of two Publications""" + if type(self)==type(other): + return self.doi_url == other.doi_url + return False + + +class Citation: + def __init__(self, doi_url: str, title: str \ + , journal: str, contributors: List[str] \ + , cit_type: str = "Citation"): + """ + Parameters + ---------- + :param doi_url: doi_url of the publication + :type doi_url: str + :param title: title of the publication + :type title: str + :param contributors: list of all contributors + :type contributors: List[str] + :param cit_type: Specifies if Reference or Citation + :type cit_type: str + :return: None + """ + + self.title = title + self.doi_url = doi_url + self.journal = journal + self.contributors = contributors + self.cit_type = cit_type + + def __str__(self) -> str: + return ("\t{}-Title: {}\n" + "\t{}-Doi: {}\n" + "\t{}-Journal: {}\n" + "\t{}-Contributors: {}\n")\ + .format(self.cit_type, self.title + , self.cit_type, self.doi_url + , self.cit_type, self.journal + , self.cit_type, ", ".join(self.contributors)) diff --git a/input/requirements.txt b/input/requirements.txt new file mode 100644 index 0000000..a151126 --- /dev/null +++ b/input/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4 +requests \ No newline at end of file diff --git a/input/test/__init__.py b/input/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/input/test/test_acs.py b/input/test/test_acs.py new file mode 100644 index 0000000..e3dfe84 --- /dev/null +++ b/input/test/test_acs.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python + +from input.get.acs import Fetcher as Acs +from input.publication import Publication, Citation +from input.test.test_input import FetcherTestCase + + +class AcsTestCase(FetcherTestCase): + """ + Methods with test_* will be detected by unittest and run. + """ + + def test_acs_url(self): + # Positive Testing + self.can_use_url_test(Acs, "https://doi.org/10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "doi.org/10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, " 10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203 " , True) + self.can_use_url_test(Acs, "\t 10.1021/acs.jcim.1c00203 \t\n" , True) + self.can_use_url_test(Acs, "https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203" , True) + + # Negative Testing + self.can_use_url_test(Acs, "" , False) + self.can_use_url_test(Acs, "https://doi.org/10.1038/219021a0" , False) + self.can_use_url_test(Acs, "https://www.nature.com/articles/219021a0" , False) + self.can_use_url_test(Acs, "https://pubs.acs.org/doi/doi.org/10.1021/acs.jcim.1c00203", False) + + + + def test_acs_publication(self): + url = "https://doi.org/10.1021/acs.jcim.1c00203" + self.get_publication_test(Acs, url, self.expectedPubs[url]) + + def test_acs_exceptions(self): + test_url= "https://doi.org/10.1021/acs.jcim.1c002" + self.get_publication_exception_test(Acs, test_url) + + # Dictionary of Expected Results, with url + expectedPubs = { + "https://doi.org/10.1021/acs.jcim.1c00203": + Publication( + doi_url = "https://doi.org/10.1021/acs.jcim.1c00203", + title = "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", + contributors = ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], + journal="Journal of Chemical Information and Modeling", + publication_date = "July 19, 2021", + subjects = ["Algorithms","Ligands","Molecules","Receptors","Macrocycles"], + references = [ + Citation(doi_url = "https://doi.org/10.1002/jcc.21334" + , title ="AutoDock Vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading" + , journal="Journal of Computational Chemistry" + , contributors=["Trott, O.", "Olson, A. J."] + , cit_type="Reference") + , Citation(doi_url = "https://doi.org/10.1038/nprot.2016.051" + , title ="Computational protein-ligand docking and virtual drug screening with the AutoDock suite" + , journal="Journal of Natural Products" + , contributors=["Forli, S.","Huey, R.","Pique, M. E.","Sanner, M. F.","Goodsell, D. S.","Olson, A. J."] + , cit_type="Reference") + , Citation(title = "A semiempirical free energy force field with charge-based desolvation" + , doi_url = "https://doi.org/10.1002/jcc.20634" + , journal="Journal of Computational Chemistry" + , contributors=["Huey, R.","Morris, G. M.","Olson, A. J.","Goodsell, D. S."] + , cit_type="Reference") + , Citation(title="Accelerating autodock4 with gpus and gradient-based local search" + , doi_url="https://doi.org/10.1021/acs.jctc.0c01006" + , journal="Journal of Chemical Theory and Computation" + , contributors=["Santos-Martins, D.","Solis-Vasquez, L.","Tillack, A. F.","Sanner, M. F.","Koch, A.","Forli, S."] + , cit_type="Reference") + , Citation(title="AutoDockFR: Advances in Protein-Ligand Docking with Explicitly Specified Binding Site Flexibility" + , doi_url="https://doi.org/10.1371/journal.pcbi.1004586" + , journal="PLoS Computational Biology" + , contributors=["Ravindranath, P. A.","Forli, S.","Goodsell, D. S.","Olson, A. J.","Sanner, M. F."] + , cit_type="Reference") + , Citation(title="Docking flexible cyclic peptides with AutoDock CrankPep" + , doi_url="https://doi.org/10.1021/acs.jctc.9b00557" + , journal="Journal of Chemical Theory and Computation" + , contributors=["Zhang, Y.","Sanner, M. F."] + , cit_type="Reference") + , Citation(title="Fast, accurate, and reliable molecular docking with QuickVina 2" + , doi_url="https://doi.org/10.1093/bioinformatics/btv082" + , journal="Bioinformatics" + , contributors=["Alhossary, A.","Handoko, S. D.","Mu, Y.","Kwoh, C.-K."] + , cit_type="Reference") + , Citation(title="Lessons learned in empirical scoring with smina from the CSAR 2011 benchmarking exercise" + , doi_url="https://doi.org/10.1021/ci300604z" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Koes, D. R.","Baumgartner, M. P.","Camacho, C. J."] + , cit_type="Reference") + , Citation(title="Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking" + , doi_url="https://doi.org/10.1021/acs.jctc.5b00834" + , journal="Journal of Chemical Theory and Computation" + , contributors=["Nivedha, A. K.","Thieker, D. F.","Makeneni, S.","Hu, H.","Woods, R. J."] + , cit_type="Reference") + , Citation(title="AutoDock VinaXB: implementation of XBSF, new empirical halogen bond scoring function, into AutoDock Vina" + , doi_url="https://doi.org/10.1186/s13321-016-0139-1" + , journal="Journal of Cheminformatics" + , contributors=["Koebel, M. R.","Schmadeke, G.","Posner, R. G.","Sirimulla, S."] + , cit_type="Reference") + , Citation(title="Vinardo: A Scoring Function Based on Autodock Vina Improves Scoring, Docking, and Virtual Screening" + , doi_url="https://doi.org/10.1371/journal.pone.0155183" + , journal="PLoS One" + , contributors=["Quiroga, R.","Villarreal, M. A."] + , cit_type="Reference") + , Citation(title="Lennard-Jones potential and dummy atom settings to overcome the AUTODOCK limitation in treating flexible ring systems" + , doi_url="https://doi.org/10.1021/ci700036j" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Forli, S.","Botta, M."] + , cit_type="Reference") + , Citation(title="AutoDock4Zn: an improved AutoDock force field for small-molecule docking to zinc metalloproteins" + , doi_url="https://doi.org/10.1021/ci500209e" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Santos-Martins, D.","Forli, S.","Ramos, M. J.","Olson, A. J."] + , cit_type="Reference") + , Citation(title="A force field with discrete displaceable waters and desolvation entropy for hydrated ligand docking" + , doi_url="https://doi.org/10.1021/jm2005145" + , journal="Journal of Medicinal Chemistry" + , contributors=["Forli, S.","Olson, A. J."] + , cit_type="Reference") + , Citation(title="Directional phosphorylation and nuclear transport of the splicing factor SRSF1 is regulated by an RNA recognition motif" + , doi_url="https://doi.org/10.1016/j.jmb.2016.04.009" + , journal="Journal of Molecular Biology" + , contributors=["Serrano, P.","Aubol, B. E.","Keshwani, M. M.","Forli, S.","Ma, C.-T.","Dutta, S. K.","Geralt, M.","Wüthrich, K.","Adams, J. A."] + , cit_type="Reference") + , Citation(title="Covalent docking using autodock: Two-point attractor and flexible side chain methods" + , doi_url="https://doi.org/10.1002/pro.2733" + , journal="Protein Science" + , contributors=["Bianco, G.","Forli, S.","Goodsell, D. S.","Olson, A. J."] + , cit_type="Reference") + , Citation(title="Consensus docking: improving the reliability of docking in a virtual screening context" + , doi_url="https://doi.org/10.1021/ci300399w" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Houston, D. R.","Walkinshaw, M. D."] + , cit_type="Reference") + , Citation(title="DockBench: an integrated informatic platform bridging the gap between the robust validation of docking protocols and virtual screening simulations" + , doi_url="https://doi.org/10.3390/molecules20069977" + , journal="Molecules" + , contributors=["Cuzzolin, A.","Sturlese, M.","Malvacio, I.","Ciancetta, A.","Moro, S."] + , cit_type="Reference") + , Citation(title="A new force field for molecular mechanical simulation of nucleic acids and proteins" + , doi_url="https://doi.org/10.1021/ja00315a051" + , journal="Journal of the American Chemical Society" + , contributors=["Weiner, S. J.","Kollman, P. A.","Case, D. A.","Singh, U. C.","Ghio, C.","Alagona, G.","Profeta, S.","Weiner, P."] + , cit_type="Reference") + , Citation(title="AutoDock Bias: improving binding mode prediction and virtual screening using known protein-ligand interactions" + , doi_url="https://doi.org/10.1093/bioinformatics/btz152" + , journal="Bioinformatics" + , contributors=["Arcon, J. P.","Modenutti, C. P.","Avendaño, D.","Lopez, E. D.","Defelipe, L. A.","Ambrosio, F. A.","Turjanski, A. G.","Forli, S.","Marti, M. A."] + , cit_type="Reference") + , Citation(title="Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory" + , doi_url="https://doi.org/10.1021/jp9723574" + , journal="Journal of Physical Chemistry B" + , contributors=["Lazaridis, T."] + , cit_type="Reference") + , Citation(title="Inhomogeneous fluid approach to solvation thermodynamics. 2. Applications to simple fluids" + , doi_url="https://doi.org/10.1021/jp972358w" + , journal="Journal of Physical Chemistry B" + , contributors=["Lazaridis, T."] + , cit_type="Reference") + , Citation(title="Grid inhomogeneous solvation theory: Hydration structure and thermodynamics of the miniature receptor cucurbit[7]uril" + , doi_url="https://doi.org/10.1063/1.4733951" + , journal="Journal of Chemical Physics" + , contributors=["Nguyen, C. N.","Young, T. K.","Gilson, M. K."] + , cit_type="Reference") + , Citation(title="AutoDock-GIST: Incorporating Thermodynamics of Active-Site Water into Scoring Function for Accurate Protein-Ligand Docking" + , doi_url="https://doi.org/10.3390/molecules21111604" + , journal="Molecules" + , contributors=["Uehara, S.","Tanaka, S."] + , cit_type="Reference") + , Citation(title="ZINC20—A Free Ultralarge-Scale Chemical Database for Ligand Discovery" + , doi_url="https://doi.org/10.1021/acs.jcim.0c00675" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Irwin, J. J.","Tang, K. G.","Young, J.","Dandarchuluun, C.","Wong, B. R.","Khurelbaatar, M.","Moroz, Y. S.","Mayfield, J.","Sayle, R. A."] + , cit_type="Reference") + , Citation(title="Structural biology-inspired discovery of novel KRAS–PDEδ inhibitors" + , doi_url="https://doi.org/10.1021/acs.jmedchem.7b01243" + , journal="Journal of Medicinal Chemistry" + , contributors=["Jiang, Y.","Zhuang, C.","Chen, L.","Lu, J.","Dong, G.","Miao, Z.","Zhang, W.","Li, J.","Sheng, C."] + , cit_type="Reference") + , Citation(title="D3R grand challenge 2015: evaluation of protein–ligand pose and affinity predictions" + , doi_url="https://doi.org/10.1007/s10822-016-9946-8" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["Gathiaka, S.","Liu, S.","Chiu, M.","Yang, H.","Stuckey, J. A.","Kang, Y. N.","Delproposto, J.","Kubish, G.","Dunbar, J. B.","Carlson, H. A.","Burley, S. K.","Walters, W. P.","Amaro, R. E.","Feher, V. A.","Gilson, M. K."] + , cit_type="Reference") + , Citation(title="D3R grand challenge 4: blind prediction of protein–ligand poses, affinity rankings, and relative binding free energies" + , doi_url="https://doi.org/10.1007/s10822-020-00289-y" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["Parks, C. D.","Gaieb, Z.","Chiu, M.","Yang, H.","Shao, C.","Walters, W. P.","Jansen, J. M.","McGaughey, G.","Lewis, R. A.","Bembenek, S. D.","Ameriks, M. K.","Mirzadegan, T.","Burley, S. K.","Amaro, R. E.","Gilson, M. K."] + , cit_type="Reference") + , Citation(title="D3R Grand Challenge 4: prospective pose prediction of BACE1 ligands with AutoDock-GPU" + , doi_url="https://doi.org/10.1007/s10822-019-00241-9" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["Santos-Martins, D.","Eberhardt, J.","Bianco, G.","Solis-Vasquez, L.","Ambrosio, F. A.","Koch, A.","Forli, S."] + , cit_type="Reference") + , Citation(title="Comparison of affinity ranking using AutoDock-GPU and MM-GBSA scores for BACE-1 inhibitors in the D3R Grand Challenge 4" + , doi_url="https://doi.org/10.1007/s10822-019-00240-w" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["El Khoury, L.","Santos-Martins, D.","Sasmal, S.","Eberhardt, J.","Bianco, G.","Ambrosio, F. A.","Solis-Vasquez, L.","Koch, A.","Forli, S.","Mobley, D. L."] + , cit_type="Reference") + , Citation(title="Macrocycle modeling in ICM: benchmarking and evaluation in D3R Grand Challenge 4" + , doi_url="https://doi.org/10.1007/s10822-019-00225-9" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["Lam, P. C.-H.","Abagyan, R.","Totrov, M."] + , cit_type="Reference") + , Citation(title="Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking" + , doi_url="https://doi.org/10.1021/jm300687e" + , journal="Journal of Medicinal Chemistry" + , contributors=["Mysinger, M. M.","Carchia, M.","Irwin, J. J.","Shoichet, B. K."] + , cit_type="Reference") + , Citation(title="Evaluation of AutoDock and AutoDock Vina on the CASF-2013 benchmark" + , doi_url="https://doi.org/10.1021/acs.jcim.8b00312" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Gaillard, T."] + , cit_type="Reference") + , Citation(title="Autodock vina adopts more accurate binding poses but autodock4 forms better binding affinity" + , doi_url="https://doi.org/10.1021/acs.jcim.9b00778" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Nguyen, N. T.","Nguyen, T. H.","Pham, T. N. H.","Huy, N. T.","Bay, M. V.","Pham, M. Q.","Nam, P. C.","Vu, V. V.","Ngo, S. T."] + , cit_type="Reference") + , Citation(title="Development and validation of a genetic algorithm for flexible docking" + , doi_url="https://doi.org/10.1006/jmbi.1996.0897" + , journal="Journal of Molecular Biology" + , contributors=["Jones, G.","Willett, P.","Glen, R. C.","Leach, A. R.","Taylor, R."] + , cit_type="Reference") + , Citation(title="Glide: a new approach for rapid, accurate docking and scoring. 1. Method and assessment of docking accuracy" + , doi_url="https://doi.org/10.1021/jm0306430" + , journal="Journal of Medicinal Chemistry" + , contributors=["Friesner, R. A.","Banks, J. L.","Murphy, R. B.","Halgren, T. A.","Klicic, J. J.","Mainz, D. T.","Repasky, M. P.","Knoll, E. H.","Shelley, M.","Perry, J. K."] + , cit_type="Reference") + , Citation(title="Surflex: fully automatic flexible molecular docking using a molecular similarity-based search engine" + , doi_url="https://doi.org/10.1021/jm020406h" + , journal="Journal of Medicinal Chemistry" + , contributors=["Jain, A. N."] + , cit_type="Reference") + , Citation(title="A fast flexible docking method using an incremental construction algorithm" + , doi_url="https://doi.org/10.1006/jmbi.1996.0477" + , journal="Journal of Molecular Biology" + , contributors=["Rarey, M.","Kramer, B.","Lengauer, T.","Klebe, G."] + , cit_type="Reference") + , Citation(title="EDock: blind protein–ligand docking by replica-exchange monte carlo simulation" + , doi_url="https://doi.org/10.1186/s13321-020-00440-9" + , journal="Journal of Cheminformatics" + , contributors=["Zhang, W.","Bell, E. W.","Yin, M.","Zhang, Y."] + , cit_type="Reference") + , Citation(title="DOCK 6: Impact of new features and current docking performance" + , doi_url="https://doi.org/10.1002/jcc.23905" + , journal="Journal of Computational Chemistry" + , contributors=["Allen, W. J.","Balius, T. E.","Mukherjee, S.","Brozell, S. R.","Moustakas, D. T.","Lang, P. T.","Case, D. A.","Kuntz, I. D.","Rizzo, R. C."] + , cit_type="Reference") + , Citation(title="Improving scoring-docking-screening powers of protein–ligand scoring functions using random forest" + , doi_url="https://doi.org/10.1002/jcc.24667" + , journal="Journal of Computational Chemistry" + , contributors=["Wang, C.","Zhang, Y."] + , cit_type="Reference") + , Citation(title="ID-Score: a new empirical scoring function based on a comprehensive set of descriptors related to protein–ligand interactions" + , doi_url="https://doi.org/10.1021/ci300493w" + , journal="Journal of Chemical Information and Modeling" + , contributors=["Li, G.-B.","Yang, L.-L.","Wang, W.-J.","Li, L.-L.","Yang, S.-Y."] + , cit_type="Reference") + , Citation(title="Further development and validation of empirical scoring functions for structure-based binding affinity prediction" + , doi_url="https://doi.org/10.1023/a:1016357811882" + , journal="Journal of Computer-Aided Molecular Design" + , contributors=["Wang, R.","Lai, L.","Wang, S."] + , cit_type="Reference") + , Citation(title="A knowledge-based energy function for protein- ligand, protein- protein, and protein- DNA complexes" + , doi_url="https://doi.org/10.1021/jm049314d" + , journal="Journal of Medicinal Chemistry" + , contributors=["Zhang, C.","Liu, S.","Zhu, Q.","Zhou, Y."] + , cit_type="Reference") + , Citation(title="DLIGAND2: an improved knowledge-based energy function for protein–ligand interactions using the distance-scaled, finite, ideal-gas reference state" + , doi_url="https://doi.org/10.1186/s13321-019-0373-4" + , journal="Journal of Cheminformatics" + , contributors=["Chen, P.","Ke, Y.","Lu, Y.","Du, Y.","Li, J.","Yan, H.","Zhao, H.","Zhou, Y.","Yang, Y."] + , cit_type="Reference") + , Citation(title="Comparing AutoDock and Vina in ligand/decoy discrimination for virtual screening" + , doi_url="https://doi.org/10.3390/app9214538" + , journal="Applied Science" + , contributors=["Vieira, T. F.","Sousa, S. F."] + , cit_type="Reference") + , Citation(title="Benchmark of four popular virtual screening programs: construction of the active/decoy dataset remains a major determinant of measured performance" + , doi_url="https://doi.org/10.1186/s13321-016-0167-x" + , journal="Journal of Cheminformatics" + , contributors=["Chaput, L.","Martinez-Sanz, J.","Quiniou, E.","Rigolet, P.","Saettel, N.","Mouawad, L."] + , cit_type="Reference") + , Citation(title="Array programming with NumPy" + , doi_url="https://doi.org/10.1038/s41586-020-2649-2" + , journal="Nature" + , contributors=["Harris, C. R."] + , cit_type="Reference") + , Citation(title="Matplotlib: A 2D graphics environment" + , doi_url="https://doi.org/10.1109/mcse.2007.55" + , journal="Computing in Science & Engineering" + , contributors=["Hunter, J. D."] + , cit_type="Reference") + ], citations = [ + Citation(doi_url = "https://doi.org/10.1021/acsomega.1c04320" + , title ="Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives" + , journal="ACS Omega" + , contributors=["Mohammad K. Parvez","Sarfaraz Ahmed","Mohammed S. Al-Dosari","Mazin A. S. Abdelwahid","Ahmed H. Arbab","Adnan J. Al-Rehaily","Mai M. Al-Oqail"],cit_type="Citation"), + + ] + ) + } \ No newline at end of file diff --git a/input/test/test_input.py b/input/test/test_input.py new file mode 100755 index 0000000..b2ca55f --- /dev/null +++ b/input/test/test_input.py @@ -0,0 +1,82 @@ +import unittest +from input.get.journal_fetcher import JournalFetcher +from input.interface import InputInterface +from input.publication import Publication + +""" +Testing the Publication fetcher + +Publication 1: 'https://doi.org/10.1021/acs.jcim.1c00203' +Publication 2: 'doi.org/10.1021/acs.jcim.1c00917' +Publication 3: '10.1038/nchem.1781' +Publication 4: '11.12/jaj' +Publication 5: '11.12/' +Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF +""" +# TODO: Testcases for: +# - Specific Journals: Inherit from FetcherTestCase +# - interface module-importer (test case) +# - Error detection +# - wrong/no Journal_fetchers +# - wrong urls +# - correct Types in publication +# - Edgecases (i.e. paper as pdf, no connection, etc) + + +class InterfaceTestCase(unittest.TestCase): + def setUp(self): + self.assertEqual(InputInterface.instance, None) + self.interface = InputInterface() + + def test_singleton(self): + # interface should already be made in setUp() + self.assertNotEqual(self.interface.instance, None) + new_interface = InputInterface() + self.assertEqual(self.interface, new_interface) + + # def test_imported_modules(self): + # fetchers = self.interface.get_supported_fetchers + +class FetcherTestCase(unittest.TestCase): + + + def can_use_url_test(self, fetcher : JournalFetcher, test_url: str, expected_res: bool): + # Tests the 'can_use_url'-method + self.assertEqual(fetcher.can_use_url(test_url), expected_res) + + + def get_publication_test(self, fetcher : JournalFetcher, test_url: str, expected_res: Publication): + """ + this test asserts that every variable is equals to the expected result + """ + actual_res = fetcher.get_publication(test_url) + self.assertEqual(actual_res.doi_url, expected_res.doi_url) + self.assertEqual(actual_res.title, expected_res.title) + self.assertEqual(actual_res.contributors, expected_res.contributors) + self.assertEqual(actual_res.journal, expected_res.journal) + self.assertEqual(actual_res.publication_date, expected_res.publication_date) + self.assertEqual(actual_res.subjects, expected_res.subjects) + + # Checking for all references + self.assertEqual(len(actual_res.references), len(expected_res.references)) + num_references = len(expected_res.references) + for i in range(num_references): + self.assertEqual(actual_res.references[i].doi_url, expected_res.references[i].doi_url) + self.assertEqual(actual_res.references[i].journal, expected_res.references[i].journal) + self.assertEqual(actual_res.references[i].contributors, expected_res.references[i].contributors) + self.assertEqual(actual_res.references[i].cit_type, expected_res.references[i].cit_type) + + # Checking for all citations + self.assertEqual(len(actual_res.citations), len(expected_res.citations)) + num_citations = len(expected_res.citations) + for i in range(num_citations): + self.assertEqual(actual_res.citations[i].doi_url, expected_res.citations[i].doi_url) + self.assertEqual(actual_res.citations[i].journal, expected_res.citations[i].journal) + self.assertEqual(actual_res.citations[i].contributors, expected_res.citations[i].contributors) + self.assertEqual(actual_res.citations[i].cit_type, expected_res.citations[i].cit_type) + + + def get_publication_exception_test(self, fetcher: JournalFetcher, test_url: str): + # Ckecks + with self.assertRaises(ValueError): + fetcher.get_publication(test_url) \ No newline at end of file diff --git a/ui_programm_fragmente/input_to_checklist.py b/ui_programm_fragmente/input_to_checklist.py index af6c0b1..3c00ed4 100644 --- a/ui_programm_fragmente/input_to_checklist.py +++ b/ui_programm_fragmente/input_to_checklist.py @@ -4,6 +4,8 @@ from dash import html from dash import callback_context from dash.dependencies import Input, Output, State from dash.exceptions import PreventUpdate +from input.interface import InputInterface +import input.publication app = dash.Dash(__name__) @@ -22,9 +24,10 @@ app.layout = html.Div([ dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'), dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10') ]), - # Layer 2: For the checklist and Remove-/Start-Buttons + # Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message html.Div([ dcc.Checklist(id='input-checklist',options=[],labelStyle = dict(display='block'),value=[]), + html.Div(id='input-err',style={'color':'red'}), html.Button(id='clear-all-button',children='Clear All'), html.Button(id='clear-selected-button',children='Clear Selected'), html.Button(id='start-button',children='Generate Graph') @@ -50,6 +53,7 @@ input-string is required as Output to clear the input box after each input Output('input-checklist','options'), Output('input-checklist','value'), Output('input-string','value'), + Output('input-err','children'), Input('input-string','value'), Input('clear-all-button','n_clicks'), Input('clear-selected-button','n_clicks'), @@ -71,21 +75,28 @@ def update_input_checklist(input_value,btn1,btn2,all_inputs,selected_inputs): changed_id = [p['prop_id'] for p in callback_context.triggered][0] # if clear-all-button was pressed: if 'clear-all-button' in changed_id: - return list(),list(),'' + return list(),list(),'','' # if clear-selected-button was pressed: if 'clear-selected-button' in changed_id: all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs] - return all_inputs,list(),'' + return all_inputs,list(),'','' # when the programm is first started: if input_value == '': app.layout['input-checklist'].options.clear() - return list(),list(),'' + return list(),list(),'','' # when a new element is added via dcc.Input - options = all_inputs - currValues = [x['value'] for x in options] - if input_value not in currValues: - options.append({'label':input_value, 'value':input_value}) - return options,selected_inputs,'' + if 'input-string' in changed_id: + options = all_inputs + currValues = [x['value'] for x in options] + if input_value not in currValues: + try: + i = InputInterface() + pub = i.get_pub_light(input_value) + except Exception as err: + return options,selected_inputs,'','{}'.format(err) + rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date + options.append({'label':rep_str, 'value':input_value}) + return options,selected_inputs,'','' ''' This callback shows and hides the (first) help-box diff --git a/ui_programm_fragmente/upload_to_checklist.py b/ui_programm_fragmente/upload_to_checklist.py index dba9f69..9a094f2 100644 --- a/ui_programm_fragmente/upload_to_checklist.py +++ b/ui_programm_fragmente/upload_to_checklist.py @@ -1,7 +1,9 @@ import dash from dash import dcc from dash import html -from dash.dependencies import Input, Output +from dash.dependencies import Input, Output, State +import base64 +import re app = dash.Dash(__name__) @@ -15,6 +17,7 @@ app.layout = html.Div([ children=html.Div( ["Drag and drop or click to select a file to upload."] ), + style={ "width": "30%", "height": "60px", @@ -25,27 +28,51 @@ app.layout = html.Div([ "textAlign": "center", "margin": "10px", }), - dcc.Checklist(id='list-of-inputs',labelStyle = dict(display='block')) + ]), + dcc.Checklist(id='input-checklist',options=list(),labelStyle = dict(display='block'),value=[]), + ]) @app.callback( - Output('list-of-inputs','options'), - Input('upload-data','filenames'), - Input('upload-data','contents') + Output('input-checklist','options'), + Input('upload-data','filename'), + Input('upload-data','contents'), + State('input-checklist','options') ) -def update_input_list(uploaded_filenames,uploaded_file_contents): - for line in uploaded_file_contents: - line = line.rstrip() - list_of_inputs[line] = line - try: - stream = open(uploaded_filenames,'r') # this statement may throw an exception - except IOError as err: # exception stores error message in err - # now comes the code reacting on the exception - sys.stderr.write('{}: {}\n'.format(sys.argv[0], err)) - exit(1) - return [{'label': i, 'value': i} for i in list_of_inputs], '' +def update_input_list(uploaded_filenames,uploaded_file_contents,all_inputs): + if uploaded_file_contents is not None: + + + string = uploaded_file_contents + + #cutting the first part of the String away to decode + found = base64.b64decode(re.search(',(.+?)$', string).group(1)) + print(found.decode('utf-8')) + + uploaded_file_contents = found.decode('utf-8') + + + list_of_inputs = (uploaded_file_contents.split()) + #das hier sollte es untereinander anzeigen, bekomme ich allerdings nicht auf die Seite... + #return (*list_of_inputs, sep="\n") + + options = all_inputs + if not options: + options = list() + CurrValues = [x['value'] for x in options] + + + # würde auch funktionieren + # return (found.decode('utf-8')) + for i in list_of_inputs: + if i not in CurrValues: + options.append({'label':i, 'value':i}) + + + return options if __name__ == '__main__': app.run_server(debug=True) + -- GitLab