From 86ab6da4331613fa8bf7fa0d6399797541e04978 Mon Sep 17 00:00:00 2001 From: Florian Jochens <fj@andaco.de> Date: Mon, 29 Nov 2021 12:01:12 +0100 Subject: [PATCH] added updated input files --- count_journal.py | 40 ++++++++++++ example_input.py | 8 ++- input/README.md | 19 +++++- input/__init__.py | 6 -- input/get/__init__.py | 7 --- input/get/acs.py | 42 +++++++++++-- input/get/journal_fetcher.py | 42 +++++++++++++ input/get/nature.py | 2 +- input/get/template_.py | 3 +- input/interface.py | 50 +++++++++------ input/publication.py | 78 ++++++----------------- input/test/__init__.py | 2 - input/test/test_acs.py | 119 ++++++++++++++++++----------------- input/test/test_input.py | 28 ++++++--- input/test_doi.txt | 4 -- 15 files changed, 273 insertions(+), 177 deletions(-) create mode 100755 count_journal.py mode change 100755 => 100644 input/test/test_acs.py delete mode 100644 input/test_doi.txt diff --git a/count_journal.py b/count_journal.py new file mode 100755 index 0000000..13886a2 --- /dev/null +++ b/count_journal.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +from input.interface import InputInterface as Input + +def count_journals(url: str): + inter = Input() + pub = inter.get_publication(url) + + if pub.citations: + for citation in pub.citations: + journal = citation.journal + if journal in cit: + cit[journal] += 1 + else: + cit[journal] = 1 + + if pub.references: + for reference in pub.references: + journal = reference.journal + if journal in cit: + cit[journal] += 1 + else: + cit[journal] = 1 + +if __name__ == "__main__": + cit = {} + + count_journals("https://doi.org/10.1021/acs.jcim.1c00203") + count_journals("https://doi.org/10.1021/acs.jcim.6b00561") + count_journals("https://doi.org/10.1021/acs.jcim.6b00613") + count_journals("https://doi.org/10.1021/acs.jcim.1c00917") + count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332") + #count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290") + #count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007") + #count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162") + + cit = dict(sorted(cit.items(), key=lambda item: item[1])) + for journal in cit: + if journal != "": + print(f'{journal}: {cit[journal]}') diff --git a/example_input.py b/example_input.py index febbc7b..76eede0 100755 --- a/example_input.py +++ b/example_input.py @@ -3,8 +3,10 @@ from input.interface import InputInterface as Input def main(url: str): - print(Input.get_publication(url)) + i = Input() + print(i.get_publication(url)) + # print(i.get_supported_fetchers()) Useless because all classes are called the same if __name__ == "__main__": - #main("https://doi.org/10.1021/acs.jcim.1c00203") - main("https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332") + #main("https://doi.org/10.1021/acs.jcim.1c0023") + main("https://doi.org/10.1021/acs.jcim.5b00332") diff --git a/input/README.md b/input/README.md index 7776ee0..110ce69 100644 --- a/input/README.md +++ b/input/README.md @@ -9,8 +9,9 @@ from input.interface import InputInterface as Input from input.publication import Publication def main(url): + inter = Input() try: - pub = Input.get_publication(url) + pub = inter.get_publication(url) except Exception as error: raise error @@ -18,10 +19,24 @@ def main(url): pub.title = "Cool new Title" print(pub) -if __name__=="__main__": +if __name__ == "__main__": main("https://doi.org/10.1021/acs.chemrev.8b00728") ``` +The expected results of calling this methode are: +| Input-Url | Result | +|-----------|-----------| +| supported & correct| A publication Instance | +| supported & uncorrect| ValueError| +| not supported | ValueError| + +Supported Url are urls, which comply with the url-pattern of supported Journals. + +### Supported Journals: + +- ACS-Journals +- (Nature-Journals) + ## Testing ``` c diff --git a/input/__init__.py b/input/__init__.py index 428d906..e69de29 100644 --- a/input/__init__.py +++ b/input/__init__.py @@ -1,6 +0,0 @@ -""" -init.py for Input-Package. -""" - -from input.publication import Publication -from input.interface import InputInterface diff --git a/input/get/__init__.py b/input/get/__init__.py index 2a6ddd0..e69de29 100755 --- a/input/get/__init__.py +++ b/input/get/__init__.py @@ -1,7 +0,0 @@ -#!/usr/bin/env python3 -""" -__init__ for journalFetcher-module -temp file with nothing in it right now -""" -from input.publication import Publication -from input.get.journal_fetcher import JournalFetcher diff --git a/input/get/acs.py b/input/get/acs.py index cfc1d6d..892cf98 100755 --- a/input/get/acs.py +++ b/input/get/acs.py @@ -27,6 +27,8 @@ class Fetcher(JournalFetcher): TODO: Support non Doi-urls """ matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n")) + + #Checks if match exists if matched_url is not None: return matched_url[4] in Fetcher.SUPPORTED_JOURNALS else: @@ -40,8 +42,25 @@ class Fetcher(JournalFetcher): """ # Creation of Soup - soup = JournalFetcher.get_soup(url) + try: + soup = JournalFetcher.get_soup(url) + except Exception as error: + raise error + + # Raise Error if re recognizes Pattern, but url isnt correct: + # For other Urls + if soup.text.strip(" \t\n")=="Missing resource null": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + # For Dois + if soup.title is not None: + if soup.title.text == "Error: DOI Not Found": + raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) + + soup_header = soup.select('.article_header')[0] + + #Could be used for more specific search ref_cit_soup = soup # Creates Publication @@ -54,6 +73,11 @@ class Fetcher(JournalFetcher): journal = soup_header.select(".cit-title")[0].text + # Replaces abbreviation with whole name + if journal in JournalFetcher.abbrev_dict: + journal = JournalFetcher.abbrev_dict[journal] + + published = soup_header.select(".pub-date-value")[0].text subjects = [] @@ -61,8 +85,6 @@ class Fetcher(JournalFetcher): for subject in subject_soup.select('a'): subjects.append(subject.text) - num_citations = 0 - references = [] references_soup = ref_cit_soup.select('ol#references') @@ -77,6 +99,10 @@ class Fetcher(JournalFetcher): if reference.select('.NLM_article-title') != [] else None ref_journal = reference.select('i')[0].text\ if reference.select('i') != [] else None + + # Replaces abbreviation with whole name + if ref_journal in JournalFetcher.abbrev_dict: + ref_journal = JournalFetcher.abbrev_dict[ref_journal] ref_contributors=[] for author in reference.select('.NLM_contrib-group'): @@ -94,9 +120,13 @@ class Fetcher(JournalFetcher): # No Doi -> No Paper continue cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\ - if citation.select('.cited-content_cbyCitation_article-title')!= [] else "None" + if citation.select('.cited-content_cbyCitation_article-title')!= [] else None cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\ - if citation.select('.cited-content_cbyCitation_journal-name') != [] else "None" + if citation.select('.cited-content_cbyCitation_journal-name') != [] else None + + # Replaces abbreviation with whole name + if cit_journal in JournalFetcher.abbrev_dict: + cit_journal = JournalFetcher.abbrev_dict[cit_journal] cit_contributors =[] cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\ .text.replace("\n", " ").replace("\r", "").split(', ') @@ -107,4 +137,4 @@ class Fetcher(JournalFetcher): citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation")) return Publication(doi_url, title, contributors, journal, published - , subjects, num_citations, references, citations) + , subjects, references, citations) diff --git a/input/get/journal_fetcher.py b/input/get/journal_fetcher.py index aad5857..514af1f 100755 --- a/input/get/journal_fetcher.py +++ b/input/get/journal_fetcher.py @@ -52,3 +52,45 @@ class JournalFetcher(metaclass=ABCMeta): Creates a Publication-instance. """ raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url)) + + + # A Dictionary, which connects abbreviation to whole journal-name + abbrev_dict = { + "Nat. Protoc.":"Journal of Natural Products" + ,"PLoS Comput. Biol.":"PLoS Computational Biology" + ,"PLoS One":"PLoS One" + ,"Protein Sci.":"Protein Science" + ,"J. Am. Chem. Soc.":"Journal of the American Chemical Society" + ,"J. Chem. Phys.":"Journal of Chemical Physics" + ,"Appl. Sci.":"Applied Science" + ,"Comput. Sci. Eng.":"Computing in Science & Engineering" + ,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry" + ,"Biol. Chem.":"Biological Chemistry" + ,"Isr. J. Chem.":"Israel Journal of Chemistry" + ,"Nat. Methods":"Nature Methods" + ,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America" + ,"J. Phys. Chem. B":"Journal of Physical Chemistry B" + ,"Carbohydr. Res.":"Carbohydrate Research" + ,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation" + ,"J. Mol. Biol.":"Journal of Molecular Biology" + ,"Nucleic Acids Res.":"Nucleic Acids Research" + ,"J. Comput. Chem.":"Journal of Computational Chemistry" + ,"J. Cheminf.":"Journal of Cheminformatics" + ,"J. Med. Chem.":"Journal of Medicinal Chemistry" + ,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design" + ,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling" + ,"Mol. Cell":"Molecular Cell" + ,"J. Cell Biolog.":"Journal of Cell Biology" + ,"Mol. Cell Biol.":"Molecular and Cellular Biology" + ,"J. Cell Sci.":"Journal of Cell Science" + ,"Nat. Cell Biol.":"Nature Cell Biology" + ,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology" + ,"Mol. Biol. Cell":"Molecular Biology of the Cell" + ,"Build. Environ.":"Building and Environment" + ,"Sci. Rep.":"Scientific Reports" + ,"Nat. Chem.":"Nature Chemistry" + ,"Nat. Med.":"Nature Medicine" + ,"Nat. Commun.":"Nature Communications" + ,"Exp. Cell Res.":"Experimental Cell Research" + ,"Nat. Chem. Biol.":"Nature Chemical Biology" + } \ No newline at end of file diff --git a/input/get/nature.py b/input/get/nature.py index 4d206f4..c50ea0e 100644 --- a/input/get/nature.py +++ b/input/get/nature.py @@ -52,7 +52,7 @@ class Fetcher(JournalFetcher): for subject in soup.head.findAll(attrs={"name": "dc.subject"}): _subjects.append(subject.get("content")) - return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects, 0) + return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects) # TODO: Exceptions-handling # raise ValueException("Cant Fetch: '{}'".format(error)) diff --git a/input/get/template_.py b/input/get/template_.py index da200bc..58de023 100755 --- a/input/get/template_.py +++ b/input/get/template_.py @@ -46,5 +46,6 @@ class Fetcher(JournalFetcher): # soup = JournalFetcher.get_soup(url) # doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] # TODO: Create new Publication-instance - # return Publication(doi,title,contributors[],journal,publication_date,subjects[],num_citation=None ,references[],citations[]) + # return Publication(doi_url, title, contributors = [], journal + # , publication_date, subjects = [], references = [], citations = []) return None \ No newline at end of file diff --git a/input/interface.py b/input/interface.py index c0d6df4..8f0af9d 100755 --- a/input/interface.py +++ b/input/interface.py @@ -15,11 +15,30 @@ class InputInterface: Singleton which dynamically imports and manages fetchers """ + instance = None get_path = None fetcher_classes=[] - @staticmethod - def get_publication(url: str) -> Publication: + # '__new__' is called before '__init__' and gives us an instance + def __new__(cls, *args, **kwargs): + + # checks if an instance exists and if it doesnt creates one + if cls.instance == None: + cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs) + + return cls.instance + + def __init__(self): + # imports all modules + + if self.fetcher_classes ==[]: + self.import_fetcher_classes() + if self.fetcher_classes ==[]: + raise AttributeError("No specific Fetchers where found at: '{}'" + .format(self.get_path)) + + + def get_publication(self, url: str) -> Publication: """ The interface-method to get a Publication-instance @@ -29,24 +48,20 @@ class InputInterface: :type url: str :return: Publication instance or None if not supported """ - # Initializes 'fetcher_classes', the list of imported modules - if InputInterface.fetcher_classes ==[]: - InputInterface.get_fetcher_classes() - if InputInterface.fetcher_classes ==[]: - raise AttributeError("No specific Fetchers where found at: '{}'" - .format(InputInterface.get_path)) # Checks if module supports the 'url' and returns a Publication if it does. for fetcher_class in InputInterface.fetcher_classes: if fetcher_class.can_use_url(url): return fetcher_class.get_publication(url) - + # No Module for given url was found - return None + raise ValueError("'{}' is not supported".format(url)) + def get_supported_fetchers(self): + # print(self.fetcher_classes[0].__name__) Useless right now, because all classes are called the same + return [a.__name__ for a in self.fetcher_classes] - @staticmethod - def get_fetcher_classes(): + def import_fetcher_classes(self): """ Searches in 'get', if there are [a-z]*.py modules (specific Fetchers) and tries to import them. @@ -54,24 +69,21 @@ class InputInterface: """ # Path to 'get'-package - InputInterface.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve()) + self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve()) # Searches for modules with given Pattern fetcher_file_names=[] - for file in next(walk(InputInterface.get_path), (None, None, []))[2]: + for file in next(walk(self.get_path), (None, None, []))[2]: if re.match(r'[a-z]+.py', file) is not None: fetcher_file_names.append(file) - if fetcher_file_names !=[]: - print("Found following Modules: {}".format(", ".join(fetcher_file_names))) - # Tries to import those modules and saves their 'Fetcher'-class for file in fetcher_file_names: try: fetcher_class = importlib.import_module("input.get.{}".format(file[:-3])) try: - InputInterface.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher')) + self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher')) except Exception as error: - print("Module '{}' does not have a 'Fetcher'-class".format(file[:-3])) + ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3])) except Exception: raise ImportError("Module '{}' can not be imported".format(file[:-3])) diff --git a/input/publication.py b/input/publication.py index 6de2373..fc512e7 100755 --- a/input/publication.py +++ b/input/publication.py @@ -1,13 +1,17 @@ #!/usr/bin/env python3 +# this is needed for typing pre python 3.9, this maybe as an large Overhead +from typing import Any, List + + class Publication: """ Represents a Publications """ def __init__(self, doi_url: str, title: str \ - , contributors: list[str], journal: str \ - , publication_date: str, subjects: list[str], num_citations: int = None \ - , references: list[any] = None, citations: list[any] = None ): + , contributors: List[str], journal: str \ + , publication_date: str, subjects: List[str]\ + , references: List[Any] = None, citations: List[Any] = None ): """ Parameters ---------- @@ -20,11 +24,11 @@ class Publication: :param published: date of release :type published: str :param subjects: the subject of the Publication - :type subjects: list[str] + :type subjects: List[str] :param references: the Citation which is been referenced by this Publication - :type references: list[any] + :type references: List[Any] :param citations: the Citation which references this Publication - :type citations: list[any] + :type citations: List[Any] :return: None """ self.doi_url = doi_url @@ -41,11 +45,9 @@ class Publication: self.citations = [] else: self.citations = citations - if num_citations is None: - self.num_citations = len(self.citations) - else: - self.num_citations = num_citations # braucht man nicht einfach len(citations) - + + # For the 'Verarbeitungsgruppe' + self.group = None def __str__(self) -> str: return ("Title: {}\n" @@ -55,7 +57,7 @@ class Publication: "Published on: {}\n" "Subjects: {}\n" "References: \n{}\n" - "Citations: \n{}\n")\ + "Citations: \n{}")\ .format(self.title, self.doi_url, ", ".join(self.contributors) , self.journal, self.publication_date , ", ".join(self.subjects) @@ -101,39 +103,14 @@ class Publication: def __eq__(self, other) -> bool: """ Compares the unique doi_url of two Publications""" - return self.doi_url == other.doi_url - - def print_pub(self): - print(f'''Article title: {self.title} -Publication date: {self.publication_date} -DOI-URL: {self.doi_url} - -Subjects:''') - print(*(self.subjects), sep = ", ") - print('\nContributors:') - print(*(self.contributors), sep = ", ") - - if int(self.num_citations) > 0: - if int(self.num_citations) == 1: - print(f'\nThis publication is cited by the following publication:\n') - else: - print(f'\nThis publication is cited by the following {self.num_citations} publications:\n') - for citation in self.citations: - print(f''' - Title: {citation.title} - Journal: {citation.journal} - Contributors: {citation.contributors} - DOI-URL: {citation.doi_url} - ''') - else: - print('\nThis publication is not cited by any other publication.') - - + if type(self)==type(other): + return self.doi_url == other.doi_url + return False class Citation: def __init__(self, doi_url: str, title: str \ - , journal: str, contributors: list[str] \ + , journal: str, contributors: List[str] \ , cit_type: str = "Citation"): """ Parameters @@ -143,7 +120,7 @@ class Citation: :param title: title of the publication :type title: str :param contributors: list of all contributors - :type contributors: list[str] + :type contributors: List[str] :param cit_type: Specifies if Reference or Citation :type cit_type: str :return: None @@ -164,20 +141,3 @@ class Citation: , self.cit_type, self.doi_url , self.cit_type, self.journal , self.cit_type, ", ".join(self.contributors)) - - -# This is just a replica of Citations -class Reference: - def __init__(self, doi_url: str, title: str, journal: str, contributors: list[str]): - self.title = title - self.doi_url = doi_url - self.journal = journal - self.contributors = contributors - - def __str__(self) -> str: - return ("\tReferences-Title: {}\n" - "\tReferences-Doi: {}\n" - "\tReferences-Journal: {}\n" - "\tReferences-Contributors: {}")\ - .format(self.title, self.doi_url - , self.journal, ", ".join(self.contributors)) diff --git a/input/test/__init__.py b/input/test/__init__.py index 897ddc5..e69de29 100644 --- a/input/test/__init__.py +++ b/input/test/__init__.py @@ -1,2 +0,0 @@ -from input.test.test_input import * -from input.publication import Publication \ No newline at end of file diff --git a/input/test/test_acs.py b/input/test/test_acs.py old mode 100755 new mode 100644 index b187140..e3dfe84 --- a/input/test/test_acs.py +++ b/input/test/test_acs.py @@ -10,68 +10,71 @@ class AcsTestCase(FetcherTestCase): Methods with test_* will be detected by unittest and run. """ - def test_url(self): + def test_acs_url(self): # Positive Testing - self.can_use_url_test(Acs, "https://doi.org/10.1021/acs.jcim.1c00203", True) - self.can_use_url_test(Acs, "doi.org/10.1021/acs.jcim.1c00203", True) - self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203", True) - self.can_use_url_test(Acs, " 10.1021/acs.jcim.1c00203", True) - self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203 ", True) - self.can_use_url_test(Acs, "\t 10.1021/acs.jcim.1c00203 \t\n", True) - self.can_use_url_test(Acs, "https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203", True) + self.can_use_url_test(Acs, "https://doi.org/10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "doi.org/10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, " 10.1021/acs.jcim.1c00203" , True) + self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203 " , True) + self.can_use_url_test(Acs, "\t 10.1021/acs.jcim.1c00203 \t\n" , True) + self.can_use_url_test(Acs, "https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203" , True) # Negative Testing - self.can_use_url_test(Acs, "", False) - self.can_use_url_test(Acs, "https://doi.org/10.1038/219021a0", False) - self.can_use_url_test(Acs, "https://www.nature.com/articles/219021a0", False) + self.can_use_url_test(Acs, "" , False) + self.can_use_url_test(Acs, "https://doi.org/10.1038/219021a0" , False) + self.can_use_url_test(Acs, "https://www.nature.com/articles/219021a0" , False) self.can_use_url_test(Acs, "https://pubs.acs.org/doi/doi.org/10.1021/acs.jcim.1c00203", False) - def test_publication(self): + def test_acs_publication(self): url = "https://doi.org/10.1021/acs.jcim.1c00203" self.get_publication_test(Acs, url, self.expectedPubs[url]) - + def test_acs_exceptions(self): + test_url= "https://doi.org/10.1021/acs.jcim.1c002" + self.get_publication_exception_test(Acs, test_url) + # Dictionary of Expected Results, with url expectedPubs = { "https://doi.org/10.1021/acs.jcim.1c00203": Publication( doi_url = "https://doi.org/10.1021/acs.jcim.1c00203", title = "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", - contributors = ["Jerome Eberhardt","Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], - journal="J. Chem. Inf. Model.", + contributors = ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], + journal="Journal of Chemical Information and Modeling", publication_date = "July 19, 2021", subjects = ["Algorithms","Ligands","Molecules","Receptors","Macrocycles"], references = [ Citation(doi_url = "https://doi.org/10.1002/jcc.21334" , title ="AutoDock Vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading" - , journal="J. Comput. Chem." + , journal="Journal of Computational Chemistry" , contributors=["Trott, O.", "Olson, A. J."] , cit_type="Reference") , Citation(doi_url = "https://doi.org/10.1038/nprot.2016.051" , title ="Computational protein-ligand docking and virtual drug screening with the AutoDock suite" - , journal="Nat. Protoc." + , journal="Journal of Natural Products" , contributors=["Forli, S.","Huey, R.","Pique, M. E.","Sanner, M. F.","Goodsell, D. S.","Olson, A. J."] , cit_type="Reference") , Citation(title = "A semiempirical free energy force field with charge-based desolvation" , doi_url = "https://doi.org/10.1002/jcc.20634" - , journal="J. Comput. Chem." + , journal="Journal of Computational Chemistry" , contributors=["Huey, R.","Morris, G. M.","Olson, A. J.","Goodsell, D. S."] , cit_type="Reference") , Citation(title="Accelerating autodock4 with gpus and gradient-based local search" , doi_url="https://doi.org/10.1021/acs.jctc.0c01006" - , journal="J. Chem. Theory Comput." + , journal="Journal of Chemical Theory and Computation" , contributors=["Santos-Martins, D.","Solis-Vasquez, L.","Tillack, A. F.","Sanner, M. F.","Koch, A.","Forli, S."] , cit_type="Reference") , Citation(title="AutoDockFR: Advances in Protein-Ligand Docking with Explicitly Specified Binding Site Flexibility" , doi_url="https://doi.org/10.1371/journal.pcbi.1004586" - , journal="PLoS Comput. Biol." + , journal="PLoS Computational Biology" , contributors=["Ravindranath, P. A.","Forli, S.","Goodsell, D. S.","Olson, A. J.","Sanner, M. F."] , cit_type="Reference") , Citation(title="Docking flexible cyclic peptides with AutoDock CrankPep" , doi_url="https://doi.org/10.1021/acs.jctc.9b00557" - , journal="J. Chem. Theory Comput." + , journal="Journal of Chemical Theory and Computation" , contributors=["Zhang, Y.","Sanner, M. F."] , cit_type="Reference") , Citation(title="Fast, accurate, and reliable molecular docking with QuickVina 2" @@ -81,17 +84,17 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="Lessons learned in empirical scoring with smina from the CSAR 2011 benchmarking exercise" , doi_url="https://doi.org/10.1021/ci300604z" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Koes, D. R.","Baumgartner, M. P.","Camacho, C. J."] , cit_type="Reference") , Citation(title="Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking" , doi_url="https://doi.org/10.1021/acs.jctc.5b00834" - , journal="J. Chem. Theory Comput." + , journal="Journal of Chemical Theory and Computation" , contributors=["Nivedha, A. K.","Thieker, D. F.","Makeneni, S.","Hu, H.","Woods, R. J."] , cit_type="Reference") , Citation(title="AutoDock VinaXB: implementation of XBSF, new empirical halogen bond scoring function, into AutoDock Vina" , doi_url="https://doi.org/10.1186/s13321-016-0139-1" - , journal="J. Cheminf." + , journal="Journal of Cheminformatics" , contributors=["Koebel, M. R.","Schmadeke, G.","Posner, R. G.","Sirimulla, S."] , cit_type="Reference") , Citation(title="Vinardo: A Scoring Function Based on Autodock Vina Improves Scoring, Docking, and Virtual Screening" @@ -101,32 +104,32 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="Lennard-Jones potential and dummy atom settings to overcome the AUTODOCK limitation in treating flexible ring systems" , doi_url="https://doi.org/10.1021/ci700036j" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Forli, S.","Botta, M."] , cit_type="Reference") , Citation(title="AutoDock4Zn: an improved AutoDock force field for small-molecule docking to zinc metalloproteins" , doi_url="https://doi.org/10.1021/ci500209e" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Santos-Martins, D.","Forli, S.","Ramos, M. J.","Olson, A. J."] , cit_type="Reference") , Citation(title="A force field with discrete displaceable waters and desolvation entropy for hydrated ligand docking" , doi_url="https://doi.org/10.1021/jm2005145" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Forli, S.","Olson, A. J."] , cit_type="Reference") , Citation(title="Directional phosphorylation and nuclear transport of the splicing factor SRSF1 is regulated by an RNA recognition motif" , doi_url="https://doi.org/10.1016/j.jmb.2016.04.009" - , journal="J. Mol. Biol." + , journal="Journal of Molecular Biology" , contributors=["Serrano, P.","Aubol, B. E.","Keshwani, M. M.","Forli, S.","Ma, C.-T.","Dutta, S. K.","Geralt, M.","Wüthrich, K.","Adams, J. A."] , cit_type="Reference") , Citation(title="Covalent docking using autodock: Two-point attractor and flexible side chain methods" , doi_url="https://doi.org/10.1002/pro.2733" - , journal="Protein Sci." + , journal="Protein Science" , contributors=["Bianco, G.","Forli, S.","Goodsell, D. S.","Olson, A. J."] , cit_type="Reference") , Citation(title="Consensus docking: improving the reliability of docking in a virtual screening context" , doi_url="https://doi.org/10.1021/ci300399w" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Houston, D. R.","Walkinshaw, M. D."] , cit_type="Reference") , Citation(title="DockBench: an integrated informatic platform bridging the gap between the robust validation of docking protocols and virtual screening simulations" @@ -136,7 +139,7 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="A new force field for molecular mechanical simulation of nucleic acids and proteins" , doi_url="https://doi.org/10.1021/ja00315a051" - , journal="J. Am. Chem. Soc." + , journal="Journal of the American Chemical Society" , contributors=["Weiner, S. J.","Kollman, P. A.","Case, D. A.","Singh, U. C.","Ghio, C.","Alagona, G.","Profeta, S.","Weiner, P."] , cit_type="Reference") , Citation(title="AutoDock Bias: improving binding mode prediction and virtual screening using known protein-ligand interactions" @@ -146,17 +149,17 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory" , doi_url="https://doi.org/10.1021/jp9723574" - , journal="J. Phys. Chem. B" + , journal="Journal of Physical Chemistry B" , contributors=["Lazaridis, T."] , cit_type="Reference") , Citation(title="Inhomogeneous fluid approach to solvation thermodynamics. 2. Applications to simple fluids" , doi_url="https://doi.org/10.1021/jp972358w" - , journal="J. Phys. Chem. B" + , journal="Journal of Physical Chemistry B" , contributors=["Lazaridis, T."] , cit_type="Reference") , Citation(title="Grid inhomogeneous solvation theory: Hydration structure and thermodynamics of the miniature receptor cucurbit[7]uril" , doi_url="https://doi.org/10.1063/1.4733951" - , journal="J. Chem. Phys." + , journal="Journal of Chemical Physics" , contributors=["Nguyen, C. N.","Young, T. K.","Gilson, M. K."] , cit_type="Reference") , Citation(title="AutoDock-GIST: Incorporating Thermodynamics of Active-Site Water into Scoring Function for Accurate Protein-Ligand Docking" @@ -166,117 +169,117 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="ZINC20—A Free Ultralarge-Scale Chemical Database for Ligand Discovery" , doi_url="https://doi.org/10.1021/acs.jcim.0c00675" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Irwin, J. J.","Tang, K. G.","Young, J.","Dandarchuluun, C.","Wong, B. R.","Khurelbaatar, M.","Moroz, Y. S.","Mayfield, J.","Sayle, R. A."] , cit_type="Reference") , Citation(title="Structural biology-inspired discovery of novel KRAS–PDEδ inhibitors" , doi_url="https://doi.org/10.1021/acs.jmedchem.7b01243" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Jiang, Y.","Zhuang, C.","Chen, L.","Lu, J.","Dong, G.","Miao, Z.","Zhang, W.","Li, J.","Sheng, C."] , cit_type="Reference") , Citation(title="D3R grand challenge 2015: evaluation of protein–ligand pose and affinity predictions" , doi_url="https://doi.org/10.1007/s10822-016-9946-8" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["Gathiaka, S.","Liu, S.","Chiu, M.","Yang, H.","Stuckey, J. A.","Kang, Y. N.","Delproposto, J.","Kubish, G.","Dunbar, J. B.","Carlson, H. A.","Burley, S. K.","Walters, W. P.","Amaro, R. E.","Feher, V. A.","Gilson, M. K."] , cit_type="Reference") , Citation(title="D3R grand challenge 4: blind prediction of protein–ligand poses, affinity rankings, and relative binding free energies" , doi_url="https://doi.org/10.1007/s10822-020-00289-y" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["Parks, C. D.","Gaieb, Z.","Chiu, M.","Yang, H.","Shao, C.","Walters, W. P.","Jansen, J. M.","McGaughey, G.","Lewis, R. A.","Bembenek, S. D.","Ameriks, M. K.","Mirzadegan, T.","Burley, S. K.","Amaro, R. E.","Gilson, M. K."] , cit_type="Reference") , Citation(title="D3R Grand Challenge 4: prospective pose prediction of BACE1 ligands with AutoDock-GPU" , doi_url="https://doi.org/10.1007/s10822-019-00241-9" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["Santos-Martins, D.","Eberhardt, J.","Bianco, G.","Solis-Vasquez, L.","Ambrosio, F. A.","Koch, A.","Forli, S."] , cit_type="Reference") , Citation(title="Comparison of affinity ranking using AutoDock-GPU and MM-GBSA scores for BACE-1 inhibitors in the D3R Grand Challenge 4" , doi_url="https://doi.org/10.1007/s10822-019-00240-w" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["El Khoury, L.","Santos-Martins, D.","Sasmal, S.","Eberhardt, J.","Bianco, G.","Ambrosio, F. A.","Solis-Vasquez, L.","Koch, A.","Forli, S.","Mobley, D. L."] , cit_type="Reference") , Citation(title="Macrocycle modeling in ICM: benchmarking and evaluation in D3R Grand Challenge 4" , doi_url="https://doi.org/10.1007/s10822-019-00225-9" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["Lam, P. C.-H.","Abagyan, R.","Totrov, M."] , cit_type="Reference") , Citation(title="Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking" , doi_url="https://doi.org/10.1021/jm300687e" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Mysinger, M. M.","Carchia, M.","Irwin, J. J.","Shoichet, B. K."] , cit_type="Reference") , Citation(title="Evaluation of AutoDock and AutoDock Vina on the CASF-2013 benchmark" , doi_url="https://doi.org/10.1021/acs.jcim.8b00312" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Gaillard, T."] , cit_type="Reference") , Citation(title="Autodock vina adopts more accurate binding poses but autodock4 forms better binding affinity" , doi_url="https://doi.org/10.1021/acs.jcim.9b00778" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Nguyen, N. T.","Nguyen, T. H.","Pham, T. N. H.","Huy, N. T.","Bay, M. V.","Pham, M. Q.","Nam, P. C.","Vu, V. V.","Ngo, S. T."] , cit_type="Reference") , Citation(title="Development and validation of a genetic algorithm for flexible docking" , doi_url="https://doi.org/10.1006/jmbi.1996.0897" - , journal="J. Mol. Biol." + , journal="Journal of Molecular Biology" , contributors=["Jones, G.","Willett, P.","Glen, R. C.","Leach, A. R.","Taylor, R."] , cit_type="Reference") , Citation(title="Glide: a new approach for rapid, accurate docking and scoring. 1. Method and assessment of docking accuracy" , doi_url="https://doi.org/10.1021/jm0306430" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Friesner, R. A.","Banks, J. L.","Murphy, R. B.","Halgren, T. A.","Klicic, J. J.","Mainz, D. T.","Repasky, M. P.","Knoll, E. H.","Shelley, M.","Perry, J. K."] , cit_type="Reference") , Citation(title="Surflex: fully automatic flexible molecular docking using a molecular similarity-based search engine" , doi_url="https://doi.org/10.1021/jm020406h" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Jain, A. N."] , cit_type="Reference") , Citation(title="A fast flexible docking method using an incremental construction algorithm" , doi_url="https://doi.org/10.1006/jmbi.1996.0477" - , journal="J. Mol. Biol." + , journal="Journal of Molecular Biology" , contributors=["Rarey, M.","Kramer, B.","Lengauer, T.","Klebe, G."] , cit_type="Reference") , Citation(title="EDock: blind protein–ligand docking by replica-exchange monte carlo simulation" , doi_url="https://doi.org/10.1186/s13321-020-00440-9" - , journal="J. Cheminf." + , journal="Journal of Cheminformatics" , contributors=["Zhang, W.","Bell, E. W.","Yin, M.","Zhang, Y."] , cit_type="Reference") , Citation(title="DOCK 6: Impact of new features and current docking performance" , doi_url="https://doi.org/10.1002/jcc.23905" - , journal="J. Comput. Chem." + , journal="Journal of Computational Chemistry" , contributors=["Allen, W. J.","Balius, T. E.","Mukherjee, S.","Brozell, S. R.","Moustakas, D. T.","Lang, P. T.","Case, D. A.","Kuntz, I. D.","Rizzo, R. C."] , cit_type="Reference") , Citation(title="Improving scoring-docking-screening powers of protein–ligand scoring functions using random forest" , doi_url="https://doi.org/10.1002/jcc.24667" - , journal="J. Comput. Chem." + , journal="Journal of Computational Chemistry" , contributors=["Wang, C.","Zhang, Y."] , cit_type="Reference") , Citation(title="ID-Score: a new empirical scoring function based on a comprehensive set of descriptors related to protein–ligand interactions" , doi_url="https://doi.org/10.1021/ci300493w" - , journal="J. Chem. Inf. Model." + , journal="Journal of Chemical Information and Modeling" , contributors=["Li, G.-B.","Yang, L.-L.","Wang, W.-J.","Li, L.-L.","Yang, S.-Y."] , cit_type="Reference") , Citation(title="Further development and validation of empirical scoring functions for structure-based binding affinity prediction" , doi_url="https://doi.org/10.1023/a:1016357811882" - , journal="J. Comput.-Aided Mol. Des." + , journal="Journal of Computer-Aided Molecular Design" , contributors=["Wang, R.","Lai, L.","Wang, S."] , cit_type="Reference") , Citation(title="A knowledge-based energy function for protein- ligand, protein- protein, and protein- DNA complexes" , doi_url="https://doi.org/10.1021/jm049314d" - , journal="J. Med. Chem." + , journal="Journal of Medicinal Chemistry" , contributors=["Zhang, C.","Liu, S.","Zhu, Q.","Zhou, Y."] , cit_type="Reference") , Citation(title="DLIGAND2: an improved knowledge-based energy function for protein–ligand interactions using the distance-scaled, finite, ideal-gas reference state" , doi_url="https://doi.org/10.1186/s13321-019-0373-4" - , journal="J. Cheminf." + , journal="Journal of Cheminformatics" , contributors=["Chen, P.","Ke, Y.","Lu, Y.","Du, Y.","Li, J.","Yan, H.","Zhao, H.","Zhou, Y.","Yang, Y."] , cit_type="Reference") , Citation(title="Comparing AutoDock and Vina in ligand/decoy discrimination for virtual screening" , doi_url="https://doi.org/10.3390/app9214538" - , journal="Appl. Sci." + , journal="Applied Science" , contributors=["Vieira, T. F.","Sousa, S. F."] , cit_type="Reference") , Citation(title="Benchmark of four popular virtual screening programs: construction of the active/decoy dataset remains a major determinant of measured performance" , doi_url="https://doi.org/10.1186/s13321-016-0167-x" - , journal="J. Cheminf." + , journal="Journal of Cheminformatics" , contributors=["Chaput, L.","Martinez-Sanz, J.","Quiniou, E.","Rigolet, P.","Saettel, N.","Mouawad, L."] , cit_type="Reference") , Citation(title="Array programming with NumPy" @@ -286,7 +289,7 @@ class AcsTestCase(FetcherTestCase): , cit_type="Reference") , Citation(title="Matplotlib: A 2D graphics environment" , doi_url="https://doi.org/10.1109/mcse.2007.55" - , journal="Comput. Sci. Eng." + , journal="Computing in Science & Engineering" , contributors=["Hunter, J. D."] , cit_type="Reference") ], citations = [ diff --git a/input/test/test_input.py b/input/test/test_input.py index 86fe6ae..b2ca55f 100755 --- a/input/test/test_input.py +++ b/input/test/test_input.py @@ -1,6 +1,6 @@ import unittest from input.get.journal_fetcher import JournalFetcher - +from input.interface import InputInterface from input.publication import Publication """ @@ -24,7 +24,18 @@ Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF class InterfaceTestCase(unittest.TestCase): - pass + def setUp(self): + self.assertEqual(InputInterface.instance, None) + self.interface = InputInterface() + + def test_singleton(self): + # interface should already be made in setUp() + self.assertNotEqual(self.interface.instance, None) + new_interface = InputInterface() + self.assertEqual(self.interface, new_interface) + + # def test_imported_modules(self): + # fetchers = self.interface.get_supported_fetchers class FetcherTestCase(unittest.TestCase): @@ -45,10 +56,9 @@ class FetcherTestCase(unittest.TestCase): self.assertEqual(actual_res.journal, expected_res.journal) self.assertEqual(actual_res.publication_date, expected_res.publication_date) self.assertEqual(actual_res.subjects, expected_res.subjects) - # self.assertEqual(actual_res.num_citations, expected_res.num_citations) # Checking for all references - self.assertEquals(len(actual_res.references), len(expected_res.references)) + self.assertEqual(len(actual_res.references), len(expected_res.references)) num_references = len(expected_res.references) for i in range(num_references): self.assertEqual(actual_res.references[i].doi_url, expected_res.references[i].doi_url) @@ -57,7 +67,7 @@ class FetcherTestCase(unittest.TestCase): self.assertEqual(actual_res.references[i].cit_type, expected_res.references[i].cit_type) # Checking for all citations - self.assertEquals(len(actual_res.citations), len(expected_res.citations)) + self.assertEqual(len(actual_res.citations), len(expected_res.citations)) num_citations = len(expected_res.citations) for i in range(num_citations): self.assertEqual(actual_res.citations[i].doi_url, expected_res.citations[i].doi_url) @@ -66,7 +76,7 @@ class FetcherTestCase(unittest.TestCase): self.assertEqual(actual_res.citations[i].cit_type, expected_res.citations[i].cit_type) - -if __name__=="__main__": - print("test") - unittest.main() \ No newline at end of file + def get_publication_exception_test(self, fetcher: JournalFetcher, test_url: str): + # Ckecks + with self.assertRaises(ValueError): + fetcher.get_publication(test_url) \ No newline at end of file diff --git a/input/test_doi.txt b/input/test_doi.txt deleted file mode 100644 index ced8c84..0000000 --- a/input/test_doi.txt +++ /dev/null @@ -1,4 +0,0 @@ -https://doi.org/10.1021/acs.jcim.1c00203 -https://doi.org/10.1021/acs.jcim.1c00917 -https://doi.org/10.1021/acs.jmedchem.0c01332 -10.1093/bioinformatics/btaa190 -- GitLab