Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • baw8330/projekt-cis-biochemie-2021-22
  • bax5890/projekt-cis-biochemie-2021-22
2 results
Select Git revision
Show changes
Commits on Source (13)
Showing
with 1159 additions and 2 deletions
**/__pycache__/
#!/usr/bin/env python3
from input.interface import InputInterface as Input
def count_journals(url: str):
inter = Input()
pub = inter.get_publication(url)
if pub.citations:
for citation in pub.citations:
journal = citation.journal
if journal in cit:
cit[journal] += 1
else:
cit[journal] = 1
if pub.references:
for reference in pub.references:
journal = reference.journal
if journal in cit:
cit[journal] += 1
else:
cit[journal] = 1
if __name__ == "__main__":
cit = {}
count_journals("https://doi.org/10.1021/acs.jcim.1c00203")
count_journals("https://doi.org/10.1021/acs.jcim.6b00561")
count_journals("https://doi.org/10.1021/acs.jcim.6b00613")
count_journals("https://doi.org/10.1021/acs.jcim.1c00917")
count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332")
#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290")
#count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007")
#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162")
cit = dict(sorted(cit.items(), key=lambda item: item[1]))
for journal in cit:
if journal != "":
print(f'{journal}: {cit[journal]}')
#!/usr/bin/env python3
from input.interface import InputInterface as Input
def main(url: str):
i = Input()
#print(i.get_publication(url))
print(i.get_pub_light(url))
# print(i.get_supported_fetchers()) Useless because all classes are called the same
if __name__ == "__main__":
#main("https://doi.org/10.1021/acs.jcim.1c0023")
main("https://doi.org/10.1021/acs.jcim.5b00332")
# Projekt CiS-Projekt 2021/22
Input-Skripts
Input-Package to fetch publication information with a given url.
## Usage/Examples
```python
from input.interface import InputInterface as Input
from input.publication import Publication
def main(url):
inter = Input()
try:
pub = inter.get_publication(url)
except Exception as error:
raise error
print(pub)
pub.title = "Cool new Title"
print(pub)
if __name__ == "__main__":
main("https://doi.org/10.1021/acs.chemrev.8b00728")
```
The expected results of calling this methode are:
| Input-Url | Result |
|-----------|-----------|
| supported & correct| A publication Instance |
| supported & uncorrect| ValueError|
| not supported | ValueError|
Supported Url are urls, which comply with the url-pattern of supported Journals.
### Supported Journals:
- ACS-Journals
- (Nature-Journals)
## Testing
``` c
python -m unittest input/test/<file.py> -v
# for all tests in directory
python -m unittest discover input/test -v
```
## Authors
- Florian Jochens
- Sam Ockenden
- Julius Schenk
\ No newline at end of file
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: Check if Url can be used with 'can_use_url'
and then fetch publication with 'get_publication'
"""
import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication, Citation
class Fetcher(JournalFetcher):
"""
Specific Fetcher for the ACS journals.
"""
# Constant for the abbreviations of the supported Journals
SUPPORTED_JOURNALS = ['1021']
@staticmethod
def can_use_url(url: str) -> str:
"""
Uses Regex to extract journal specific substrings in Doi.
TODO: Support non Doi-urls
"""
matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n"))
#Checks if match exists
if matched_url is not None:
return matched_url[4] in Fetcher.SUPPORTED_JOURNALS
else:
return False
@staticmethod
def get_pub_light(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
return Publication(doi_url, title, contributors, journal, published,
subjects)
def get_publication(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
#Could be used for more specific search
ref_cit_soup = soup
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
references = []
references_soup = ref_cit_soup.select('ol#references')
if references_soup != []:
for reference in references_soup[0].select('li'):
if reference.select('.refDoi') != []:
ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:])
else:
# No Doi -> No Paper
continue
ref_title = reference.select('.NLM_article-title')[0].text\
if reference.select('.NLM_article-title') != [] else None
ref_journal = reference.select('i')[0].text\
if reference.select('i') != [] else None
# Replaces abbreviation with whole name
if ref_journal in JournalFetcher.abbrev_dict:
ref_journal = JournalFetcher.abbrev_dict[ref_journal]
ref_contributors=[]
for author in reference.select('.NLM_contrib-group'):
ref_contributors.append(author.text.replace("\n", " ").replace("\r", ""))
references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference"))
citations = []
citation_soup = ref_cit_soup.select('.cited-content_cbyCitation')
if citation_soup != []:
for citation in citation_soup[0].select('li'):
if citation.select('a[title="DOI URL"]') != []:
cit_doi = citation.select('a[title="DOI URL"]')[0].text
else:
# No Doi -> No Paper
continue
cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\
if citation.select('.cited-content_cbyCitation_article-title')!= [] else None
cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\
if citation.select('.cited-content_cbyCitation_journal-name') != [] else None
# Replaces abbreviation with whole name
if cit_journal in JournalFetcher.abbrev_dict:
cit_journal = JournalFetcher.abbrev_dict[cit_journal]
cit_contributors =[]
cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\
.text.replace("\n", " ").replace("\r", "").split(', ')
# clean up of the last Entry
cit_contributors_last = cit_contributors.pop().strip(". ")
if cit_contributors_last != '':
cit_contributors.append(cit_contributors_last)
citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation"))
return Publication(doi_url, title, contributors, journal, published
, subjects, references, citations)
#!/usr/bin/env python3
"""
Parent class for specific Journal
"""
from abc import ABCMeta, abstractmethod
from bs4 import BeautifulSoup
import requests
from input.publication import Publication
class JournalFetcher(metaclass=ABCMeta):
"""
This is a abstract-class for fetcher modules
"""
@staticmethod
def get_soup(url: str) -> BeautifulSoup:
"""
Retrieves webside-html and returns a BeautifulSoup-instance
Parameters:
-----------
:type url: str
:param url: doi-url to a publication
:return: BeatifulSoup-instance
"""
try:
req = requests.get(url)
except requests.exceptions.HTTPError as err:
raise SystemExit(err)
return BeautifulSoup(req.content, 'html.parser')
@staticmethod
@abstractmethod
def can_use_url(url: str) -> bool:
"""
Abstract-function to be implemented in subclass.
Checks if given url links to a supported journal
"""
raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url))
@staticmethod
@abstractmethod
def get_publication(url: str) -> Publication:
"""
Abstract-function to be implemented in subclass.
Creates a Publication-instance.
"""
raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url))
# A Dictionary, which connects abbreviation to whole journal-name
abbrev_dict = {
"Nat. Protoc.":"Journal of Natural Products"
,"PLoS Comput. Biol.":"PLoS Computational Biology"
,"PLoS One":"PLoS One"
,"Protein Sci.":"Protein Science"
,"J. Am. Chem. Soc.":"Journal of the American Chemical Society"
,"J. Chem. Phys.":"Journal of Chemical Physics"
,"Appl. Sci.":"Applied Science"
,"Comput. Sci. Eng.":"Computing in Science & Engineering"
,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry"
,"Biol. Chem.":"Biological Chemistry"
,"Isr. J. Chem.":"Israel Journal of Chemistry"
,"Nat. Methods":"Nature Methods"
,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America"
,"J. Phys. Chem. B":"Journal of Physical Chemistry B"
,"Carbohydr. Res.":"Carbohydrate Research"
,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation"
,"J. Mol. Biol.":"Journal of Molecular Biology"
,"Nucleic Acids Res.":"Nucleic Acids Research"
,"J. Comput. Chem.":"Journal of Computational Chemistry"
,"J. Cheminf.":"Journal of Cheminformatics"
,"J. Med. Chem.":"Journal of Medicinal Chemistry"
,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design"
,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling"
,"Mol. Cell":"Molecular Cell"
,"J. Cell Biolog.":"Journal of Cell Biology"
,"Mol. Cell Biol.":"Molecular and Cellular Biology"
,"J. Cell Sci.":"Journal of Cell Science"
,"Nat. Cell Biol.":"Nature Cell Biology"
,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology"
,"Mol. Biol. Cell":"Molecular Biology of the Cell"
,"Build. Environ.":"Building and Environment"
,"Sci. Rep.":"Scientific Reports"
,"Nat. Chem.":"Nature Chemistry"
,"Nat. Med.":"Nature Medicine"
,"Nat. Commun.":"Nature Communications"
,"Exp. Cell Res.":"Experimental Cell Research"
,"Nat. Chem. Biol.":"Nature Chemical Biology"
}
\ No newline at end of file
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: Check if Url can be used with 'can_use_url'
and then fetch publication with 'get_publication'
"""
# import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication
class Fetcher(JournalFetcher):
"""
scrapes publication metadata from a provided url
"""
# TODO: List of Compatable Journals
# NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead
SUPPORTED_JOURNALS = []
@staticmethod
def can_use_url(url: str) -> bool:
"""
Checks if given url links to a supported journal.
"""
# TODO: Check the URL for compatability
# re.match in SUPPORTED_JOURNALS
return False
@staticmethod
def get_publication(url: str) -> Publication:
"""
Creates a Publication-instance.
"""
soup = JournalFetcher.get_soup(url)
_doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content")
_title = soup.head.find(attrs={"name": "citation_title"}).get("content")
_journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content")
_published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content")
_contributors = []
_subjects = []
for creator in soup.head.findAll(attrs={"name": "dc.creator"}):
_contributors.append(creator.get("content"))
for subject in soup.head.findAll(attrs={"name": "dc.subject"}):
_subjects.append(subject.get("content"))
return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects)
# TODO: Exceptions-handling
# raise ValueException("Cant Fetch: '{}'".format(error))
# return None
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: None, this is just a template and should be ignored
"""
# import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication
class Fetcher(JournalFetcher):
"""
This is only a template and therefore has no functionality
"""
# TODO: Naming-Convention:
# Class: 'Fetcher'
# file: [journal-/organisation-name]
# format = "[a-z]*.py" allowed
# TODO: List of Compatable Journals
SUPPORTED_JOURNALS = []
@staticmethod
def can_use_url(url: str) -> bool:
"""
Checks if given url links to a supported journal.
"""
# TODO: Check the URL for compatability
# url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url)
# if url_re is not None:
# return url_re[4] in SUPPORTED_JOURNALS
# else:
return False
@staticmethod
def get_publication(url: str) -> Publication:
"""
Creates a Publication-instance.
"""
# TODO: Fetch data from the HTML
# soup = JournalFetcher.get_soup(url)
# doi,title,contributors[],journal,publication_date,subjects[],references[],citations[]
# TODO: Create new Publication-instance
# return Publication(doi_url, title, contributors = [], journal
# , publication_date, subjects = [], references = [], citations = [])
return None
\ No newline at end of file
#!/usr/bin/env python3
"""
Interface for the Input-Package only this should be accessed from outside this Package.
"""
from os import walk
import importlib
import pathlib
import re
from input.publication import Publication
class InputInterface:
"""
Singleton which dynamically imports and manages fetchers
"""
instance = None
get_path = None
fetcher_classes=[]
# '__new__' is called before '__init__' and gives us an instance
def __new__(cls, *args, **kwargs):
# checks if an instance exists and if it doesnt creates one
if cls.instance == None:
cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs)
return cls.instance
def __init__(self):
# imports all modules
if self.fetcher_classes ==[]:
self.import_fetcher_classes()
if self.fetcher_classes ==[]:
raise AttributeError("No specific Fetchers where found at: '{}'"
.format(self.get_path))
def get_publication(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(including it's citations and references)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_publication(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_pub_light(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(only for main article)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_pub_light(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_supported_fetchers(self):
# print(self.fetcher_classes[0].__name__) Useless right now,
# because all classes are called the same
return [a.__name__ for a in self.fetcher_classes]
def import_fetcher_classes(self):
"""
Searches in 'get', if there are [a-z]*.py modules (specific Fetchers)
and tries to import them.
Saves found modules in 'fetcher_files'.
"""
# Path to 'get'-package
self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve())
# Searches for modules with given Pattern
fetcher_file_names=[]
for file in next(walk(self.get_path), (None, None, []))[2]:
if re.match(r'[a-z]+.py', file) is not None:
fetcher_file_names.append(file)
# Tries to import those modules and saves their 'Fetcher'-class
for file in fetcher_file_names:
try:
fetcher_class = importlib.import_module("input.get.{}".format(file[:-3]))
try:
self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher'))
except Exception as error:
ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3]))
except Exception:
raise ImportError("Module '{}' can not be imported".format(file[:-3]))
#!/usr/bin/env python3
# this is needed for typing pre python 3.9, this maybe as an large Overhead
from typing import Any, List
class Publication:
"""
Represents a Publications
"""
def __init__(self, doi_url: str, title: str \
, contributors: List[str], journal: str \
, publication_date: str, subjects: List[str]\
, references: List[Any] = None, citations: List[Any] = None ):
"""
Parameters
----------
:param doi_url: doi_url of the publication
:type doi_url: str
:param title: title of the publication
:type title: str
:param contributors:list of all contributors
:type contributors: list[]
:param published: date of release
:type published: str
:param subjects: the subject of the Publication
:type subjects: List[str]
:param references: the Citation which is been referenced by this Publication
:type references: List[Any]
:param citations: the Citation which references this Publication
:type citations: List[Any]
:return: None
"""
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.subjects = subjects
if references is None:
self.references = []
else:
self.references = references
if citations is None:
self.citations = []
else:
self.citations = citations
# For the 'Verarbeitungsgruppe'
self.group = None
def __str__(self) -> str:
return ("Title: {}\n"
"Doi-url: {}\n"
"Authors: {}\n"
"Journal: {}\n"
"Published on: {}\n"
"Subjects: {}\n"
"References: \n{}\n"
"Citations: \n{}")\
.format(self.title, self.doi_url, ", ".join(self.contributors)
, self.journal, self.publication_date
, ", ".join(self.subjects)
, "\n".join(self.get_citation_string(self.references))
, "\n".join(self.get_citation_string(self.citations)))
@staticmethod
def get_citation_string(citations):
if citations == []:
return ["None"]
else:
citation_string = []
for citation in citations:
citation_string.append(citation.__str__())
return citation_string
def add_citations(self, citation) -> None:
"""
Appends a list of Citations or Citation to self.citations.
Parameter
---------
:param citation: Citation or Reference of the Publication
:type citation: Citation or list[Citation]
:return: self.citations
"""
if type(citation) is Citation:
self.citations.append(citation)
# Checks if 'citation' is a list of Citations
elif type(citation) is list:
for _cit in citation:
if type(_cit) is Citation:
self.citations.append(_cit)
else:
raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
.format(type(_cit)))
else:
raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
.format(type(citation)))
return self.citations
def __eq__(self, other) -> bool:
""" Compares the unique doi_url of two Publications"""
if type(self)==type(other):
return self.doi_url == other.doi_url
return False
class Citation:
def __init__(self, doi_url: str, title: str \
, journal: str, contributors: List[str] \
, cit_type: str = "Citation"):
"""
Parameters
----------
:param doi_url: doi_url of the publication
:type doi_url: str
:param title: title of the publication
:type title: str
:param contributors: list of all contributors
:type contributors: List[str]
:param cit_type: Specifies if Reference or Citation
:type cit_type: str
:return: None
"""
self.title = title
self.doi_url = doi_url
self.journal = journal
self.contributors = contributors
self.cit_type = cit_type
def __str__(self) -> str:
return ("\t{}-Title: {}\n"
"\t{}-Doi: {}\n"
"\t{}-Journal: {}\n"
"\t{}-Contributors: {}\n")\
.format(self.cit_type, self.title
, self.cit_type, self.doi_url
, self.cit_type, self.journal
, self.cit_type, ", ".join(self.contributors))
beautifulsoup4
requests
\ No newline at end of file
#!/usr/bin/env python
from input.get.acs import Fetcher as Acs
from input.publication import Publication, Citation
from input.test.test_input import FetcherTestCase
class AcsTestCase(FetcherTestCase):
"""
Methods with test_* will be detected by unittest and run.
"""
def test_acs_url(self):
# Positive Testing
self.can_use_url_test(Acs, "https://doi.org/10.1021/acs.jcim.1c00203" , True)
self.can_use_url_test(Acs, "doi.org/10.1021/acs.jcim.1c00203" , True)
self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203" , True)
self.can_use_url_test(Acs, " 10.1021/acs.jcim.1c00203" , True)
self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203 " , True)
self.can_use_url_test(Acs, "\t 10.1021/acs.jcim.1c00203 \t\n" , True)
self.can_use_url_test(Acs, "https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203" , True)
# Negative Testing
self.can_use_url_test(Acs, "" , False)
self.can_use_url_test(Acs, "https://doi.org/10.1038/219021a0" , False)
self.can_use_url_test(Acs, "https://www.nature.com/articles/219021a0" , False)
self.can_use_url_test(Acs, "https://pubs.acs.org/doi/doi.org/10.1021/acs.jcim.1c00203", False)
def test_acs_publication(self):
url = "https://doi.org/10.1021/acs.jcim.1c00203"
self.get_publication_test(Acs, url, self.expectedPubs[url])
def test_acs_exceptions(self):
test_url= "https://doi.org/10.1021/acs.jcim.1c002"
self.get_publication_exception_test(Acs, test_url)
# Dictionary of Expected Results, with url
expectedPubs = {
"https://doi.org/10.1021/acs.jcim.1c00203":
Publication(
doi_url = "https://doi.org/10.1021/acs.jcim.1c00203",
title = "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings",
contributors = ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"],
journal="Journal of Chemical Information and Modeling",
publication_date = "July 19, 2021",
subjects = ["Algorithms","Ligands","Molecules","Receptors","Macrocycles"],
references = [
Citation(doi_url = "https://doi.org/10.1002/jcc.21334"
, title ="AutoDock Vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading"
, journal="Journal of Computational Chemistry"
, contributors=["Trott, O.", "Olson, A. J."]
, cit_type="Reference")
, Citation(doi_url = "https://doi.org/10.1038/nprot.2016.051"
, title ="Computational protein-ligand docking and virtual drug screening with the AutoDock suite"
, journal="Journal of Natural Products"
, contributors=["Forli, S.","Huey, R.","Pique, M. E.","Sanner, M. F.","Goodsell, D. S.","Olson, A. J."]
, cit_type="Reference")
, Citation(title = "A semiempirical free energy force field with charge-based desolvation"
, doi_url = "https://doi.org/10.1002/jcc.20634"
, journal="Journal of Computational Chemistry"
, contributors=["Huey, R.","Morris, G. M.","Olson, A. J.","Goodsell, D. S."]
, cit_type="Reference")
, Citation(title="Accelerating autodock4 with gpus and gradient-based local search"
, doi_url="https://doi.org/10.1021/acs.jctc.0c01006"
, journal="Journal of Chemical Theory and Computation"
, contributors=["Santos-Martins, D.","Solis-Vasquez, L.","Tillack, A. F.","Sanner, M. F.","Koch, A.","Forli, S."]
, cit_type="Reference")
, Citation(title="AutoDockFR: Advances in Protein-Ligand Docking with Explicitly Specified Binding Site Flexibility"
, doi_url="https://doi.org/10.1371/journal.pcbi.1004586"
, journal="PLoS Computational Biology"
, contributors=["Ravindranath, P. A.","Forli, S.","Goodsell, D. S.","Olson, A. J.","Sanner, M. F."]
, cit_type="Reference")
, Citation(title="Docking flexible cyclic peptides with AutoDock CrankPep"
, doi_url="https://doi.org/10.1021/acs.jctc.9b00557"
, journal="Journal of Chemical Theory and Computation"
, contributors=["Zhang, Y.","Sanner, M. F."]
, cit_type="Reference")
, Citation(title="Fast, accurate, and reliable molecular docking with QuickVina 2"
, doi_url="https://doi.org/10.1093/bioinformatics/btv082"
, journal="Bioinformatics"
, contributors=["Alhossary, A.","Handoko, S. D.","Mu, Y.","Kwoh, C.-K."]
, cit_type="Reference")
, Citation(title="Lessons learned in empirical scoring with smina from the CSAR 2011 benchmarking exercise"
, doi_url="https://doi.org/10.1021/ci300604z"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Koes, D. R.","Baumgartner, M. P.","Camacho, C. J."]
, cit_type="Reference")
, Citation(title="Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking"
, doi_url="https://doi.org/10.1021/acs.jctc.5b00834"
, journal="Journal of Chemical Theory and Computation"
, contributors=["Nivedha, A. K.","Thieker, D. F.","Makeneni, S.","Hu, H.","Woods, R. J."]
, cit_type="Reference")
, Citation(title="AutoDock VinaXB: implementation of XBSF, new empirical halogen bond scoring function, into AutoDock Vina"
, doi_url="https://doi.org/10.1186/s13321-016-0139-1"
, journal="Journal of Cheminformatics"
, contributors=["Koebel, M. R.","Schmadeke, G.","Posner, R. G.","Sirimulla, S."]
, cit_type="Reference")
, Citation(title="Vinardo: A Scoring Function Based on Autodock Vina Improves Scoring, Docking, and Virtual Screening"
, doi_url="https://doi.org/10.1371/journal.pone.0155183"
, journal="PLoS One"
, contributors=["Quiroga, R.","Villarreal, M. A."]
, cit_type="Reference")
, Citation(title="Lennard-Jones potential and dummy atom settings to overcome the AUTODOCK limitation in treating flexible ring systems"
, doi_url="https://doi.org/10.1021/ci700036j"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Forli, S.","Botta, M."]
, cit_type="Reference")
, Citation(title="AutoDock4Zn: an improved AutoDock force field for small-molecule docking to zinc metalloproteins"
, doi_url="https://doi.org/10.1021/ci500209e"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Santos-Martins, D.","Forli, S.","Ramos, M. J.","Olson, A. J."]
, cit_type="Reference")
, Citation(title="A force field with discrete displaceable waters and desolvation entropy for hydrated ligand docking"
, doi_url="https://doi.org/10.1021/jm2005145"
, journal="Journal of Medicinal Chemistry"
, contributors=["Forli, S.","Olson, A. J."]
, cit_type="Reference")
, Citation(title="Directional phosphorylation and nuclear transport of the splicing factor SRSF1 is regulated by an RNA recognition motif"
, doi_url="https://doi.org/10.1016/j.jmb.2016.04.009"
, journal="Journal of Molecular Biology"
, contributors=["Serrano, P.","Aubol, B. E.","Keshwani, M. M.","Forli, S.","Ma, C.-T.","Dutta, S. K.","Geralt, M.","Wüthrich, K.","Adams, J. A."]
, cit_type="Reference")
, Citation(title="Covalent docking using autodock: Two-point attractor and flexible side chain methods"
, doi_url="https://doi.org/10.1002/pro.2733"
, journal="Protein Science"
, contributors=["Bianco, G.","Forli, S.","Goodsell, D. S.","Olson, A. J."]
, cit_type="Reference")
, Citation(title="Consensus docking: improving the reliability of docking in a virtual screening context"
, doi_url="https://doi.org/10.1021/ci300399w"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Houston, D. R.","Walkinshaw, M. D."]
, cit_type="Reference")
, Citation(title="DockBench: an integrated informatic platform bridging the gap between the robust validation of docking protocols and virtual screening simulations"
, doi_url="https://doi.org/10.3390/molecules20069977"
, journal="Molecules"
, contributors=["Cuzzolin, A.","Sturlese, M.","Malvacio, I.","Ciancetta, A.","Moro, S."]
, cit_type="Reference")
, Citation(title="A new force field for molecular mechanical simulation of nucleic acids and proteins"
, doi_url="https://doi.org/10.1021/ja00315a051"
, journal="Journal of the American Chemical Society"
, contributors=["Weiner, S. J.","Kollman, P. A.","Case, D. A.","Singh, U. C.","Ghio, C.","Alagona, G.","Profeta, S.","Weiner, P."]
, cit_type="Reference")
, Citation(title="AutoDock Bias: improving binding mode prediction and virtual screening using known protein-ligand interactions"
, doi_url="https://doi.org/10.1093/bioinformatics/btz152"
, journal="Bioinformatics"
, contributors=["Arcon, J. P.","Modenutti, C. P.","Avendaño, D.","Lopez, E. D.","Defelipe, L. A.","Ambrosio, F. A.","Turjanski, A. G.","Forli, S.","Marti, M. A."]
, cit_type="Reference")
, Citation(title="Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory"
, doi_url="https://doi.org/10.1021/jp9723574"
, journal="Journal of Physical Chemistry B"
, contributors=["Lazaridis, T."]
, cit_type="Reference")
, Citation(title="Inhomogeneous fluid approach to solvation thermodynamics. 2. Applications to simple fluids"
, doi_url="https://doi.org/10.1021/jp972358w"
, journal="Journal of Physical Chemistry B"
, contributors=["Lazaridis, T."]
, cit_type="Reference")
, Citation(title="Grid inhomogeneous solvation theory: Hydration structure and thermodynamics of the miniature receptor cucurbit[7]uril"
, doi_url="https://doi.org/10.1063/1.4733951"
, journal="Journal of Chemical Physics"
, contributors=["Nguyen, C. N.","Young, T. K.","Gilson, M. K."]
, cit_type="Reference")
, Citation(title="AutoDock-GIST: Incorporating Thermodynamics of Active-Site Water into Scoring Function for Accurate Protein-Ligand Docking"
, doi_url="https://doi.org/10.3390/molecules21111604"
, journal="Molecules"
, contributors=["Uehara, S.","Tanaka, S."]
, cit_type="Reference")
, Citation(title="ZINC20—A Free Ultralarge-Scale Chemical Database for Ligand Discovery"
, doi_url="https://doi.org/10.1021/acs.jcim.0c00675"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Irwin, J. J.","Tang, K. G.","Young, J.","Dandarchuluun, C.","Wong, B. R.","Khurelbaatar, M.","Moroz, Y. S.","Mayfield, J.","Sayle, R. A."]
, cit_type="Reference")
, Citation(title="Structural biology-inspired discovery of novel KRAS–PDEδ inhibitors"
, doi_url="https://doi.org/10.1021/acs.jmedchem.7b01243"
, journal="Journal of Medicinal Chemistry"
, contributors=["Jiang, Y.","Zhuang, C.","Chen, L.","Lu, J.","Dong, G.","Miao, Z.","Zhang, W.","Li, J.","Sheng, C."]
, cit_type="Reference")
, Citation(title="D3R grand challenge 2015: evaluation of protein–ligand pose and affinity predictions"
, doi_url="https://doi.org/10.1007/s10822-016-9946-8"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["Gathiaka, S.","Liu, S.","Chiu, M.","Yang, H.","Stuckey, J. A.","Kang, Y. N.","Delproposto, J.","Kubish, G.","Dunbar, J. B.","Carlson, H. A.","Burley, S. K.","Walters, W. P.","Amaro, R. E.","Feher, V. A.","Gilson, M. K."]
, cit_type="Reference")
, Citation(title="D3R grand challenge 4: blind prediction of protein–ligand poses, affinity rankings, and relative binding free energies"
, doi_url="https://doi.org/10.1007/s10822-020-00289-y"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["Parks, C. D.","Gaieb, Z.","Chiu, M.","Yang, H.","Shao, C.","Walters, W. P.","Jansen, J. M.","McGaughey, G.","Lewis, R. A.","Bembenek, S. D.","Ameriks, M. K.","Mirzadegan, T.","Burley, S. K.","Amaro, R. E.","Gilson, M. K."]
, cit_type="Reference")
, Citation(title="D3R Grand Challenge 4: prospective pose prediction of BACE1 ligands with AutoDock-GPU"
, doi_url="https://doi.org/10.1007/s10822-019-00241-9"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["Santos-Martins, D.","Eberhardt, J.","Bianco, G.","Solis-Vasquez, L.","Ambrosio, F. A.","Koch, A.","Forli, S."]
, cit_type="Reference")
, Citation(title="Comparison of affinity ranking using AutoDock-GPU and MM-GBSA scores for BACE-1 inhibitors in the D3R Grand Challenge 4"
, doi_url="https://doi.org/10.1007/s10822-019-00240-w"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["El Khoury, L.","Santos-Martins, D.","Sasmal, S.","Eberhardt, J.","Bianco, G.","Ambrosio, F. A.","Solis-Vasquez, L.","Koch, A.","Forli, S.","Mobley, D. L."]
, cit_type="Reference")
, Citation(title="Macrocycle modeling in ICM: benchmarking and evaluation in D3R Grand Challenge 4"
, doi_url="https://doi.org/10.1007/s10822-019-00225-9"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["Lam, P. C.-H.","Abagyan, R.","Totrov, M."]
, cit_type="Reference")
, Citation(title="Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking"
, doi_url="https://doi.org/10.1021/jm300687e"
, journal="Journal of Medicinal Chemistry"
, contributors=["Mysinger, M. M.","Carchia, M.","Irwin, J. J.","Shoichet, B. K."]
, cit_type="Reference")
, Citation(title="Evaluation of AutoDock and AutoDock Vina on the CASF-2013 benchmark"
, doi_url="https://doi.org/10.1021/acs.jcim.8b00312"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Gaillard, T."]
, cit_type="Reference")
, Citation(title="Autodock vina adopts more accurate binding poses but autodock4 forms better binding affinity"
, doi_url="https://doi.org/10.1021/acs.jcim.9b00778"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Nguyen, N. T.","Nguyen, T. H.","Pham, T. N. H.","Huy, N. T.","Bay, M. V.","Pham, M. Q.","Nam, P. C.","Vu, V. V.","Ngo, S. T."]
, cit_type="Reference")
, Citation(title="Development and validation of a genetic algorithm for flexible docking"
, doi_url="https://doi.org/10.1006/jmbi.1996.0897"
, journal="Journal of Molecular Biology"
, contributors=["Jones, G.","Willett, P.","Glen, R. C.","Leach, A. R.","Taylor, R."]
, cit_type="Reference")
, Citation(title="Glide: a new approach for rapid, accurate docking and scoring. 1. Method and assessment of docking accuracy"
, doi_url="https://doi.org/10.1021/jm0306430"
, journal="Journal of Medicinal Chemistry"
, contributors=["Friesner, R. A.","Banks, J. L.","Murphy, R. B.","Halgren, T. A.","Klicic, J. J.","Mainz, D. T.","Repasky, M. P.","Knoll, E. H.","Shelley, M.","Perry, J. K."]
, cit_type="Reference")
, Citation(title="Surflex: fully automatic flexible molecular docking using a molecular similarity-based search engine"
, doi_url="https://doi.org/10.1021/jm020406h"
, journal="Journal of Medicinal Chemistry"
, contributors=["Jain, A. N."]
, cit_type="Reference")
, Citation(title="A fast flexible docking method using an incremental construction algorithm"
, doi_url="https://doi.org/10.1006/jmbi.1996.0477"
, journal="Journal of Molecular Biology"
, contributors=["Rarey, M.","Kramer, B.","Lengauer, T.","Klebe, G."]
, cit_type="Reference")
, Citation(title="EDock: blind protein–ligand docking by replica-exchange monte carlo simulation"
, doi_url="https://doi.org/10.1186/s13321-020-00440-9"
, journal="Journal of Cheminformatics"
, contributors=["Zhang, W.","Bell, E. W.","Yin, M.","Zhang, Y."]
, cit_type="Reference")
, Citation(title="DOCK 6: Impact of new features and current docking performance"
, doi_url="https://doi.org/10.1002/jcc.23905"
, journal="Journal of Computational Chemistry"
, contributors=["Allen, W. J.","Balius, T. E.","Mukherjee, S.","Brozell, S. R.","Moustakas, D. T.","Lang, P. T.","Case, D. A.","Kuntz, I. D.","Rizzo, R. C."]
, cit_type="Reference")
, Citation(title="Improving scoring-docking-screening powers of protein–ligand scoring functions using random forest"
, doi_url="https://doi.org/10.1002/jcc.24667"
, journal="Journal of Computational Chemistry"
, contributors=["Wang, C.","Zhang, Y."]
, cit_type="Reference")
, Citation(title="ID-Score: a new empirical scoring function based on a comprehensive set of descriptors related to protein–ligand interactions"
, doi_url="https://doi.org/10.1021/ci300493w"
, journal="Journal of Chemical Information and Modeling"
, contributors=["Li, G.-B.","Yang, L.-L.","Wang, W.-J.","Li, L.-L.","Yang, S.-Y."]
, cit_type="Reference")
, Citation(title="Further development and validation of empirical scoring functions for structure-based binding affinity prediction"
, doi_url="https://doi.org/10.1023/a:1016357811882"
, journal="Journal of Computer-Aided Molecular Design"
, contributors=["Wang, R.","Lai, L.","Wang, S."]
, cit_type="Reference")
, Citation(title="A knowledge-based energy function for protein- ligand, protein- protein, and protein- DNA complexes"
, doi_url="https://doi.org/10.1021/jm049314d"
, journal="Journal of Medicinal Chemistry"
, contributors=["Zhang, C.","Liu, S.","Zhu, Q.","Zhou, Y."]
, cit_type="Reference")
, Citation(title="DLIGAND2: an improved knowledge-based energy function for protein–ligand interactions using the distance-scaled, finite, ideal-gas reference state"
, doi_url="https://doi.org/10.1186/s13321-019-0373-4"
, journal="Journal of Cheminformatics"
, contributors=["Chen, P.","Ke, Y.","Lu, Y.","Du, Y.","Li, J.","Yan, H.","Zhao, H.","Zhou, Y.","Yang, Y."]
, cit_type="Reference")
, Citation(title="Comparing AutoDock and Vina in ligand/decoy discrimination for virtual screening"
, doi_url="https://doi.org/10.3390/app9214538"
, journal="Applied Science"
, contributors=["Vieira, T. F.","Sousa, S. F."]
, cit_type="Reference")
, Citation(title="Benchmark of four popular virtual screening programs: construction of the active/decoy dataset remains a major determinant of measured performance"
, doi_url="https://doi.org/10.1186/s13321-016-0167-x"
, journal="Journal of Cheminformatics"
, contributors=["Chaput, L.","Martinez-Sanz, J.","Quiniou, E.","Rigolet, P.","Saettel, N.","Mouawad, L."]
, cit_type="Reference")
, Citation(title="Array programming with NumPy"
, doi_url="https://doi.org/10.1038/s41586-020-2649-2"
, journal="Nature"
, contributors=["Harris, C. R."]
, cit_type="Reference")
, Citation(title="Matplotlib: A 2D graphics environment"
, doi_url="https://doi.org/10.1109/mcse.2007.55"
, journal="Computing in Science & Engineering"
, contributors=["Hunter, J. D."]
, cit_type="Reference")
], citations = [
Citation(doi_url = "https://doi.org/10.1021/acsomega.1c04320"
, title ="Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives"
, journal="ACS Omega"
, contributors=["Mohammad K. Parvez","Sarfaraz Ahmed","Mohammed S. Al-Dosari","Mazin A. S. Abdelwahid","Ahmed H. Arbab","Adnan J. Al-Rehaily","Mai M. Al-Oqail"],cit_type="Citation"),
]
)
}
\ No newline at end of file
import unittest
from input.get.journal_fetcher import JournalFetcher
from input.interface import InputInterface
from input.publication import Publication
"""
Testing the Publication fetcher
Publication 1: 'https://doi.org/10.1021/acs.jcim.1c00203'
Publication 2: 'doi.org/10.1021/acs.jcim.1c00917'
Publication 3: '10.1038/nchem.1781'
Publication 4: '11.12/jaj'
Publication 5: '11.12/'
Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF
"""
# TODO: Testcases for:
# - Specific Journals: Inherit from FetcherTestCase
# - interface module-importer (test case)
# - Error detection
# - wrong/no Journal_fetchers
# - wrong urls
# - correct Types in publication
# - Edgecases (i.e. paper as pdf, no connection, etc)
class InterfaceTestCase(unittest.TestCase):
def setUp(self):
self.assertEqual(InputInterface.instance, None)
self.interface = InputInterface()
def test_singleton(self):
# interface should already be made in setUp()
self.assertNotEqual(self.interface.instance, None)
new_interface = InputInterface()
self.assertEqual(self.interface, new_interface)
# def test_imported_modules(self):
# fetchers = self.interface.get_supported_fetchers
class FetcherTestCase(unittest.TestCase):
def can_use_url_test(self, fetcher : JournalFetcher, test_url: str, expected_res: bool):
# Tests the 'can_use_url'-method
self.assertEqual(fetcher.can_use_url(test_url), expected_res)
def get_publication_test(self, fetcher : JournalFetcher, test_url: str, expected_res: Publication):
"""
this test asserts that every variable is equals to the expected result
"""
actual_res = fetcher.get_publication(test_url)
self.assertEqual(actual_res.doi_url, expected_res.doi_url)
self.assertEqual(actual_res.title, expected_res.title)
self.assertEqual(actual_res.contributors, expected_res.contributors)
self.assertEqual(actual_res.journal, expected_res.journal)
self.assertEqual(actual_res.publication_date, expected_res.publication_date)
self.assertEqual(actual_res.subjects, expected_res.subjects)
# Checking for all references
self.assertEqual(len(actual_res.references), len(expected_res.references))
num_references = len(expected_res.references)
for i in range(num_references):
self.assertEqual(actual_res.references[i].doi_url, expected_res.references[i].doi_url)
self.assertEqual(actual_res.references[i].journal, expected_res.references[i].journal)
self.assertEqual(actual_res.references[i].contributors, expected_res.references[i].contributors)
self.assertEqual(actual_res.references[i].cit_type, expected_res.references[i].cit_type)
# Checking for all citations
self.assertEqual(len(actual_res.citations), len(expected_res.citations))
num_citations = len(expected_res.citations)
for i in range(num_citations):
self.assertEqual(actual_res.citations[i].doi_url, expected_res.citations[i].doi_url)
self.assertEqual(actual_res.citations[i].journal, expected_res.citations[i].journal)
self.assertEqual(actual_res.citations[i].contributors, expected_res.citations[i].contributors)
self.assertEqual(actual_res.citations[i].cit_type, expected_res.citations[i].cit_type)
def get_publication_exception_test(self, fetcher: JournalFetcher, test_url: str):
# Ckecks
with self.assertRaises(ValueError):
fetcher.get_publication(test_url)
\ No newline at end of file
# Projekt CiS-Projekt 2021/22
Input-Skripts
File added
https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332
https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709
......@@ -21,9 +21,10 @@ from pathlib import Path
class Publication:
#_registry = []
_citations = []
_references = []
def __init__(self, title, publication_date, contributors, doi_url,
subjects, num_citations):
subjects = None, num_citations = None):
#self._registry.append(self)
self.title = title
self.publication_date = publication_date
......@@ -31,6 +32,8 @@ class Publication:
self.doi_url = doi_url
self.subjects = subjects
self.num_citations = num_citations
#self._citations = []
#self._references = []
class Citation:
def __init__(self, title, journal, contributors, doi_url):
......@@ -39,6 +42,13 @@ class Citation:
self.contributors = contributors
self.doi_url = doi_url
class References:
def __init__(self, title, journal, contributors, doi_url):
self.title = title
self.journal = journal
self.contributors = contributors
self.doi_url = doi_url
def get_article_info(soup):
header = soup.find('div', class_ = 'article_header-left pull-left')
article_title = header.find('span', class_ = 'hlFld-Title').text
......