diff --git a/count_journal.py b/count_journal.py deleted file mode 100755 index 13886a2e7badf339bdd23475f7d3de713329f472..0000000000000000000000000000000000000000 --- a/count_journal.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -from input.interface import InputInterface as Input - -def count_journals(url: str): - inter = Input() - pub = inter.get_publication(url) - - if pub.citations: - for citation in pub.citations: - journal = citation.journal - if journal in cit: - cit[journal] += 1 - else: - cit[journal] = 1 - - if pub.references: - for reference in pub.references: - journal = reference.journal - if journal in cit: - cit[journal] += 1 - else: - cit[journal] = 1 - -if __name__ == "__main__": - cit = {} - - count_journals("https://doi.org/10.1021/acs.jcim.1c00203") - count_journals("https://doi.org/10.1021/acs.jcim.6b00561") - count_journals("https://doi.org/10.1021/acs.jcim.6b00613") - count_journals("https://doi.org/10.1021/acs.jcim.1c00917") - count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332") - #count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290") - #count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007") - #count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162") - - cit = dict(sorted(cit.items(), key=lambda item: item[1])) - for journal in cit: - if journal != "": - print(f'{journal}: {cit[journal]}') diff --git a/example_input.py b/example_input.py deleted file mode 100755 index c9bca4189fce4c1fd0a0dfc42ef4e517baa5f406..0000000000000000000000000000000000000000 --- a/example_input.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python3 - -from input.interface import InputInterface as Input - -def main(url: str): - i = Input() - #print(i.get_publication(url)) - print(i.get_pub_light(url)) - # print(i.get_supported_fetchers()) Useless because all classes are called the same - -if __name__ == "__main__": - #main("https://doi.org/10.1021/acs.jcim.1c0023") - main("https://doi.org/10.1021/acs.jcim.5b00332") diff --git a/input/README.md b/input/README.md deleted file mode 100644 index 110ce69136a8935b83d070113130222f243e924f..0000000000000000000000000000000000000000 --- a/input/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Projekt CiS-Projekt 2021/22 - -Input-Package to fetch publication information with a given url. - -## Usage/Examples - -```python -from input.interface import InputInterface as Input -from input.publication import Publication - -def main(url): - inter = Input() - try: - pub = inter.get_publication(url) - except Exception as error: - raise error - - print(pub) - pub.title = "Cool new Title" - print(pub) - -if __name__ == "__main__": - main("https://doi.org/10.1021/acs.chemrev.8b00728") -``` - -The expected results of calling this methode are: -| Input-Url | Result | -|-----------|-----------| -| supported & correct| A publication Instance | -| supported & uncorrect| ValueError| -| not supported | ValueError| - -Supported Url are urls, which comply with the url-pattern of supported Journals. - -### Supported Journals: - -- ACS-Journals -- (Nature-Journals) - -## Testing - -``` c -python -m unittest input/test/<file.py> -v -# for all tests in directory -python -m unittest discover input/test -v -``` -## Authors -- Florian Jochens -- Sam Ockenden -- Julius Schenk \ No newline at end of file diff --git a/input/__init__.py b/input/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/input/get/__init__.py b/input/get/__init__.py deleted file mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/input/get/acs.py b/input/get/acs.py deleted file mode 100755 index 9691845b27ae694a8213a0f0fe5f827c75890eee..0000000000000000000000000000000000000000 --- a/input/get/acs.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python3 - -""" -Child class of JournalFetcher -Usage: Check if Url can be used with 'can_use_url' - and then fetch publication with 'get_publication' -""" - -import re - -from input.get.journal_fetcher import JournalFetcher -from input.publication import Publication, Citation - - -class Fetcher(JournalFetcher): - """ - Specific Fetcher for the ACS journals. - """ - - # Constant for the abbreviations of the supported Journals - SUPPORTED_JOURNALS = ['1021'] - - @staticmethod - def can_use_url(url: str) -> str: - """ - Uses Regex to extract journal specific substrings in Doi. - TODO: Support non Doi-urls - """ - matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n")) - - #Checks if match exists - if matched_url is not None: - return matched_url[4] in Fetcher.SUPPORTED_JOURNALS - else: - return False - - @staticmethod - - - def get_pub_light(url: str) -> Publication: - """ - Fetches html and creates Beatifulsoup-instance in parent class. - Specific css-searches for ACS-Journals and creates Publication-instance. - """ - - # Creation of Soup - try: - soup = JournalFetcher.get_soup(url) - except Exception as error: - raise error - - # Raise Error if re recognizes Pattern, but url isnt correct: - # For other Urls - if soup.text.strip(" \t\n")=="Missing resource null": - raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) - - # For Dois - if soup.title is not None: - if soup.title.text == "Error: DOI Not Found": - raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) - - - soup_header = soup.select('.article_header')[0] - - # Creates Publication - doi_url = soup_header.select('a[title="DOI URL"]')[0].string - title = soup_header.select(".hlFld-Title")[0].text - - contributors = [] - for author in soup_header.select(".hlFld-ContribAuthor"): - contributors.append(author.text) - - journal = soup_header.select(".cit-title")[0].text - - # Replaces abbreviation with whole name - if journal in JournalFetcher.abbrev_dict: - journal = JournalFetcher.abbrev_dict[journal] - - - published = soup_header.select(".pub-date-value")[0].text - - subjects = [] - subject_soup = soup_header.select('.article_header-taxonomy')[0] - for subject in subject_soup.select('a'): - subjects.append(subject.text) - - return Publication(doi_url, title, contributors, journal, published, - subjects) - - def get_publication(url: str) -> Publication: - """ - Fetches html and creates Beatifulsoup-instance in parent class. - Specific css-searches for ACS-Journals and creates Publication-instance. - """ - - # Creation of Soup - try: - soup = JournalFetcher.get_soup(url) - except Exception as error: - raise error - - # Raise Error if re recognizes Pattern, but url isnt correct: - # For other Urls - if soup.text.strip(" \t\n")=="Missing resource null": - raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) - - # For Dois - if soup.title is not None: - if soup.title.text == "Error: DOI Not Found": - raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url)) - - - soup_header = soup.select('.article_header')[0] - - #Could be used for more specific search - ref_cit_soup = soup - - # Creates Publication - doi_url = soup_header.select('a[title="DOI URL"]')[0].string - title = soup_header.select(".hlFld-Title")[0].text - - contributors = [] - for author in soup_header.select(".hlFld-ContribAuthor"): - contributors.append(author.text) - - journal = soup_header.select(".cit-title")[0].text - - # Replaces abbreviation with whole name - if journal in JournalFetcher.abbrev_dict: - journal = JournalFetcher.abbrev_dict[journal] - - - published = soup_header.select(".pub-date-value")[0].text - - subjects = [] - subject_soup = soup_header.select('.article_header-taxonomy')[0] - for subject in subject_soup.select('a'): - subjects.append(subject.text) - - - references = [] - references_soup = ref_cit_soup.select('ol#references') - if references_soup != []: - for reference in references_soup[0].select('li'): - if reference.select('.refDoi') != []: - ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:]) - else: - # No Doi -> No Paper - continue - ref_title = reference.select('.NLM_article-title')[0].text\ - if reference.select('.NLM_article-title') != [] else None - ref_journal = reference.select('i')[0].text\ - if reference.select('i') != [] else None - - # Replaces abbreviation with whole name - if ref_journal in JournalFetcher.abbrev_dict: - ref_journal = JournalFetcher.abbrev_dict[ref_journal] - - ref_contributors=[] - for author in reference.select('.NLM_contrib-group'): - ref_contributors.append(author.text.replace("\n", " ").replace("\r", "")) - - references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference")) - - citations = [] - citation_soup = ref_cit_soup.select('.cited-content_cbyCitation') - if citation_soup != []: - for citation in citation_soup[0].select('li'): - if citation.select('a[title="DOI URL"]') != []: - cit_doi = citation.select('a[title="DOI URL"]')[0].text - else: - # No Doi -> No Paper - continue - cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\ - if citation.select('.cited-content_cbyCitation_article-title')!= [] else None - cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\ - if citation.select('.cited-content_cbyCitation_journal-name') != [] else None - - # Replaces abbreviation with whole name - if cit_journal in JournalFetcher.abbrev_dict: - cit_journal = JournalFetcher.abbrev_dict[cit_journal] - cit_contributors =[] - cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\ - .text.replace("\n", " ").replace("\r", "").split(', ') - # clean up of the last Entry - cit_contributors_last = cit_contributors.pop().strip(". ") - if cit_contributors_last != '': - cit_contributors.append(cit_contributors_last) - citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation")) - - return Publication(doi_url, title, contributors, journal, published - , subjects, references, citations) diff --git a/input/get/journal_fetcher.py b/input/get/journal_fetcher.py deleted file mode 100755 index 514af1f80f5c7d442b790aebf5fe3954d50f8f5d..0000000000000000000000000000000000000000 --- a/input/get/journal_fetcher.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 - -""" -Parent class for specific Journal -""" - -from abc import ABCMeta, abstractmethod -from bs4 import BeautifulSoup -import requests -from input.publication import Publication - - -class JournalFetcher(metaclass=ABCMeta): - """ - This is a abstract-class for fetcher modules - """ - - @staticmethod - def get_soup(url: str) -> BeautifulSoup: - """ - Retrieves webside-html and returns a BeautifulSoup-instance - - Parameters: - ----------- - :type url: str - :param url: doi-url to a publication - :return: BeatifulSoup-instance - """ - try: - req = requests.get(url) - except requests.exceptions.HTTPError as err: - raise SystemExit(err) - - return BeautifulSoup(req.content, 'html.parser') - - - @staticmethod - @abstractmethod - def can_use_url(url: str) -> bool: - """ - Abstract-function to be implemented in subclass. - Checks if given url links to a supported journal - """ - raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url)) - - - @staticmethod - @abstractmethod - def get_publication(url: str) -> Publication: - """ - Abstract-function to be implemented in subclass. - Creates a Publication-instance. - """ - raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url)) - - - # A Dictionary, which connects abbreviation to whole journal-name - abbrev_dict = { - "Nat. Protoc.":"Journal of Natural Products" - ,"PLoS Comput. Biol.":"PLoS Computational Biology" - ,"PLoS One":"PLoS One" - ,"Protein Sci.":"Protein Science" - ,"J. Am. Chem. Soc.":"Journal of the American Chemical Society" - ,"J. Chem. Phys.":"Journal of Chemical Physics" - ,"Appl. Sci.":"Applied Science" - ,"Comput. Sci. Eng.":"Computing in Science & Engineering" - ,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry" - ,"Biol. Chem.":"Biological Chemistry" - ,"Isr. J. Chem.":"Israel Journal of Chemistry" - ,"Nat. Methods":"Nature Methods" - ,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America" - ,"J. Phys. Chem. B":"Journal of Physical Chemistry B" - ,"Carbohydr. Res.":"Carbohydrate Research" - ,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation" - ,"J. Mol. Biol.":"Journal of Molecular Biology" - ,"Nucleic Acids Res.":"Nucleic Acids Research" - ,"J. Comput. Chem.":"Journal of Computational Chemistry" - ,"J. Cheminf.":"Journal of Cheminformatics" - ,"J. Med. Chem.":"Journal of Medicinal Chemistry" - ,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design" - ,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling" - ,"Mol. Cell":"Molecular Cell" - ,"J. Cell Biolog.":"Journal of Cell Biology" - ,"Mol. Cell Biol.":"Molecular and Cellular Biology" - ,"J. Cell Sci.":"Journal of Cell Science" - ,"Nat. Cell Biol.":"Nature Cell Biology" - ,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology" - ,"Mol. Biol. Cell":"Molecular Biology of the Cell" - ,"Build. Environ.":"Building and Environment" - ,"Sci. Rep.":"Scientific Reports" - ,"Nat. Chem.":"Nature Chemistry" - ,"Nat. Med.":"Nature Medicine" - ,"Nat. Commun.":"Nature Communications" - ,"Exp. Cell Res.":"Experimental Cell Research" - ,"Nat. Chem. Biol.":"Nature Chemical Biology" - } \ No newline at end of file diff --git a/input/get/nature.py b/input/get/nature.py deleted file mode 100644 index c50ea0ef9d1d4a9a386730e31cc72372cbf698c0..0000000000000000000000000000000000000000 --- a/input/get/nature.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 - -""" -Child class of JournalFetcher -Usage: Check if Url can be used with 'can_use_url' - and then fetch publication with 'get_publication' -""" - -# import re -from input.get.journal_fetcher import JournalFetcher -from input.publication import Publication - - -class Fetcher(JournalFetcher): - - """ - scrapes publication metadata from a provided url - """ - - # TODO: List of Compatable Journals - # NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead - SUPPORTED_JOURNALS = [] - - @staticmethod - def can_use_url(url: str) -> bool: - """ - Checks if given url links to a supported journal. - """ - - # TODO: Check the URL for compatability - # re.match in SUPPORTED_JOURNALS - return False - - @staticmethod - def get_publication(url: str) -> Publication: - """ - Creates a Publication-instance. - """ - - soup = JournalFetcher.get_soup(url) - - _doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content") - _title = soup.head.find(attrs={"name": "citation_title"}).get("content") - _journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content") - _published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content") - _contributors = [] - _subjects = [] - - for creator in soup.head.findAll(attrs={"name": "dc.creator"}): - _contributors.append(creator.get("content")) - - for subject in soup.head.findAll(attrs={"name": "dc.subject"}): - _subjects.append(subject.get("content")) - - return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects) - - # TODO: Exceptions-handling - # raise ValueException("Cant Fetch: '{}'".format(error)) - # return None diff --git a/input/get/template_.py b/input/get/template_.py deleted file mode 100755 index 58de0237bd514f7dd1b5b25f251b740d33e3589e..0000000000000000000000000000000000000000 --- a/input/get/template_.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python3 - -""" -Child class of JournalFetcher -Usage: None, this is just a template and should be ignored -""" - -# import re -from input.get.journal_fetcher import JournalFetcher -from input.publication import Publication - - -class Fetcher(JournalFetcher): - - """ - This is only a template and therefore has no functionality - """ - - # TODO: Naming-Convention: - # Class: 'Fetcher' - # file: [journal-/organisation-name] - # format = "[a-z]*.py" allowed - # TODO: List of Compatable Journals - SUPPORTED_JOURNALS = [] - - @staticmethod - def can_use_url(url: str) -> bool: - """ - Checks if given url links to a supported journal. - """ - - # TODO: Check the URL for compatability - # url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url) - # if url_re is not None: - # return url_re[4] in SUPPORTED_JOURNALS - # else: - return False - - @staticmethod - def get_publication(url: str) -> Publication: - """ - Creates a Publication-instance. - """ - - # TODO: Fetch data from the HTML - # soup = JournalFetcher.get_soup(url) - # doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] - # TODO: Create new Publication-instance - # return Publication(doi_url, title, contributors = [], journal - # , publication_date, subjects = [], references = [], citations = []) - return None \ No newline at end of file diff --git a/input/interface.py b/input/interface.py deleted file mode 100755 index 59515b3a3a2a5361222b8e55d3a7314ab3907132..0000000000000000000000000000000000000000 --- a/input/interface.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 - -""" -Interface for the Input-Package only this should be accessed from outside this Package. - -""" -from os import walk -import importlib -import pathlib -import re -from input.publication import Publication - -class InputInterface: - """ - Singleton which dynamically imports and manages fetchers - """ - - instance = None - get_path = None - fetcher_classes=[] - - # '__new__' is called before '__init__' and gives us an instance - def __new__(cls, *args, **kwargs): - - # checks if an instance exists and if it doesnt creates one - if cls.instance == None: - cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs) - - return cls.instance - - def __init__(self): - # imports all modules - - if self.fetcher_classes ==[]: - self.import_fetcher_classes() - if self.fetcher_classes ==[]: - raise AttributeError("No specific Fetchers where found at: '{}'" - .format(self.get_path)) - - - def get_publication(self, url: str) -> Publication: - """ - The interface-method to get a Publication-instance - (including it's citations and references) - - Parameters - ---------- - :param url: url to a Publication - :type url: str - :return: Publication instance or None if not supported - """ - - # Checks if module supports the 'url' and - # returns a Publication if it does. - for fetcher_class in InputInterface.fetcher_classes: - if fetcher_class.can_use_url(url): - return fetcher_class.get_publication(url) - - # No Module for given url was found - raise ValueError("'{}' is not supported".format(url)) - - def get_pub_light(self, url: str) -> Publication: - """ - The interface-method to get a Publication-instance - (only for main article) - - Parameters - ---------- - :param url: url to a Publication - :type url: str - :return: Publication instance or None if not supported - """ - - # Checks if module supports the 'url' and - # returns a Publication if it does. - for fetcher_class in InputInterface.fetcher_classes: - if fetcher_class.can_use_url(url): - return fetcher_class.get_pub_light(url) - - # No Module for given url was found - raise ValueError("'{}' is not supported".format(url)) - - def get_supported_fetchers(self): - # print(self.fetcher_classes[0].__name__) Useless right now, - # because all classes are called the same - return [a.__name__ for a in self.fetcher_classes] - - def import_fetcher_classes(self): - """ - Searches in 'get', if there are [a-z]*.py modules (specific Fetchers) - and tries to import them. - Saves found modules in 'fetcher_files'. - """ - - # Path to 'get'-package - self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve()) - - # Searches for modules with given Pattern - fetcher_file_names=[] - for file in next(walk(self.get_path), (None, None, []))[2]: - if re.match(r'[a-z]+.py', file) is not None: - fetcher_file_names.append(file) - - # Tries to import those modules and saves their 'Fetcher'-class - for file in fetcher_file_names: - try: - fetcher_class = importlib.import_module("input.get.{}".format(file[:-3])) - try: - self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher')) - except Exception as error: - ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3])) - except Exception: - raise ImportError("Module '{}' can not be imported".format(file[:-3])) diff --git a/input/publication.py b/input/publication.py deleted file mode 100755 index fc512e7173a84695ea566706784c565a7b5ebb8f..0000000000000000000000000000000000000000 --- a/input/publication.py +++ /dev/null @@ -1,143 +0,0 @@ -#!/usr/bin/env python3 - -# this is needed for typing pre python 3.9, this maybe as an large Overhead -from typing import Any, List - - -class Publication: - """ - Represents a Publications - """ - def __init__(self, doi_url: str, title: str \ - , contributors: List[str], journal: str \ - , publication_date: str, subjects: List[str]\ - , references: List[Any] = None, citations: List[Any] = None ): - """ - Parameters - ---------- - :param doi_url: doi_url of the publication - :type doi_url: str - :param title: title of the publication - :type title: str - :param contributors:list of all contributors - :type contributors: list[] - :param published: date of release - :type published: str - :param subjects: the subject of the Publication - :type subjects: List[str] - :param references: the Citation which is been referenced by this Publication - :type references: List[Any] - :param citations: the Citation which references this Publication - :type citations: List[Any] - :return: None - """ - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - self.subjects = subjects - if references is None: - self.references = [] - else: - self.references = references - if citations is None: - self.citations = [] - else: - self.citations = citations - - # For the 'Verarbeitungsgruppe' - self.group = None - - def __str__(self) -> str: - return ("Title: {}\n" - "Doi-url: {}\n" - "Authors: {}\n" - "Journal: {}\n" - "Published on: {}\n" - "Subjects: {}\n" - "References: \n{}\n" - "Citations: \n{}")\ - .format(self.title, self.doi_url, ", ".join(self.contributors) - , self.journal, self.publication_date - , ", ".join(self.subjects) - , "\n".join(self.get_citation_string(self.references)) - , "\n".join(self.get_citation_string(self.citations))) - - @staticmethod - def get_citation_string(citations): - if citations == []: - return ["None"] - else: - citation_string = [] - for citation in citations: - citation_string.append(citation.__str__()) - return citation_string - - def add_citations(self, citation) -> None: - """ - Appends a list of Citations or Citation to self.citations. - - Parameter - --------- - :param citation: Citation or Reference of the Publication - :type citation: Citation or list[Citation] - :return: self.citations - """ - if type(citation) is Citation: - self.citations.append(citation) - - # Checks if 'citation' is a list of Citations - elif type(citation) is list: - for _cit in citation: - if type(_cit) is Citation: - self.citations.append(_cit) - else: - raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" - .format(type(_cit))) - else: - raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'" - .format(type(citation))) - - return self.citations - - def __eq__(self, other) -> bool: - """ Compares the unique doi_url of two Publications""" - if type(self)==type(other): - return self.doi_url == other.doi_url - return False - - -class Citation: - def __init__(self, doi_url: str, title: str \ - , journal: str, contributors: List[str] \ - , cit_type: str = "Citation"): - """ - Parameters - ---------- - :param doi_url: doi_url of the publication - :type doi_url: str - :param title: title of the publication - :type title: str - :param contributors: list of all contributors - :type contributors: List[str] - :param cit_type: Specifies if Reference or Citation - :type cit_type: str - :return: None - """ - - self.title = title - self.doi_url = doi_url - self.journal = journal - self.contributors = contributors - self.cit_type = cit_type - - def __str__(self) -> str: - return ("\t{}-Title: {}\n" - "\t{}-Doi: {}\n" - "\t{}-Journal: {}\n" - "\t{}-Contributors: {}\n")\ - .format(self.cit_type, self.title - , self.cit_type, self.doi_url - , self.cit_type, self.journal - , self.cit_type, ", ".join(self.contributors)) diff --git a/input/requirements.txt b/input/requirements.txt deleted file mode 100644 index a151126691e7f0a9f1c824e9cbac243a96b32e71..0000000000000000000000000000000000000000 --- a/input/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4 -requests \ No newline at end of file diff --git a/input/test/__init__.py b/input/test/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/input/test/test_acs.py b/input/test/test_acs.py deleted file mode 100644 index e3dfe84a09d3599de32efbab0dd60655b5414152..0000000000000000000000000000000000000000 --- a/input/test/test_acs.py +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/env python - -from input.get.acs import Fetcher as Acs -from input.publication import Publication, Citation -from input.test.test_input import FetcherTestCase - - -class AcsTestCase(FetcherTestCase): - """ - Methods with test_* will be detected by unittest and run. - """ - - def test_acs_url(self): - # Positive Testing - self.can_use_url_test(Acs, "https://doi.org/10.1021/acs.jcim.1c00203" , True) - self.can_use_url_test(Acs, "doi.org/10.1021/acs.jcim.1c00203" , True) - self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203" , True) - self.can_use_url_test(Acs, " 10.1021/acs.jcim.1c00203" , True) - self.can_use_url_test(Acs, "10.1021/acs.jcim.1c00203 " , True) - self.can_use_url_test(Acs, "\t 10.1021/acs.jcim.1c00203 \t\n" , True) - self.can_use_url_test(Acs, "https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203" , True) - - # Negative Testing - self.can_use_url_test(Acs, "" , False) - self.can_use_url_test(Acs, "https://doi.org/10.1038/219021a0" , False) - self.can_use_url_test(Acs, "https://www.nature.com/articles/219021a0" , False) - self.can_use_url_test(Acs, "https://pubs.acs.org/doi/doi.org/10.1021/acs.jcim.1c00203", False) - - - - def test_acs_publication(self): - url = "https://doi.org/10.1021/acs.jcim.1c00203" - self.get_publication_test(Acs, url, self.expectedPubs[url]) - - def test_acs_exceptions(self): - test_url= "https://doi.org/10.1021/acs.jcim.1c002" - self.get_publication_exception_test(Acs, test_url) - - # Dictionary of Expected Results, with url - expectedPubs = { - "https://doi.org/10.1021/acs.jcim.1c00203": - Publication( - doi_url = "https://doi.org/10.1021/acs.jcim.1c00203", - title = "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", - contributors = ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], - journal="Journal of Chemical Information and Modeling", - publication_date = "July 19, 2021", - subjects = ["Algorithms","Ligands","Molecules","Receptors","Macrocycles"], - references = [ - Citation(doi_url = "https://doi.org/10.1002/jcc.21334" - , title ="AutoDock Vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading" - , journal="Journal of Computational Chemistry" - , contributors=["Trott, O.", "Olson, A. J."] - , cit_type="Reference") - , Citation(doi_url = "https://doi.org/10.1038/nprot.2016.051" - , title ="Computational protein-ligand docking and virtual drug screening with the AutoDock suite" - , journal="Journal of Natural Products" - , contributors=["Forli, S.","Huey, R.","Pique, M. E.","Sanner, M. F.","Goodsell, D. S.","Olson, A. J."] - , cit_type="Reference") - , Citation(title = "A semiempirical free energy force field with charge-based desolvation" - , doi_url = "https://doi.org/10.1002/jcc.20634" - , journal="Journal of Computational Chemistry" - , contributors=["Huey, R.","Morris, G. M.","Olson, A. J.","Goodsell, D. S."] - , cit_type="Reference") - , Citation(title="Accelerating autodock4 with gpus and gradient-based local search" - , doi_url="https://doi.org/10.1021/acs.jctc.0c01006" - , journal="Journal of Chemical Theory and Computation" - , contributors=["Santos-Martins, D.","Solis-Vasquez, L.","Tillack, A. F.","Sanner, M. F.","Koch, A.","Forli, S."] - , cit_type="Reference") - , Citation(title="AutoDockFR: Advances in Protein-Ligand Docking with Explicitly Specified Binding Site Flexibility" - , doi_url="https://doi.org/10.1371/journal.pcbi.1004586" - , journal="PLoS Computational Biology" - , contributors=["Ravindranath, P. A.","Forli, S.","Goodsell, D. S.","Olson, A. J.","Sanner, M. F."] - , cit_type="Reference") - , Citation(title="Docking flexible cyclic peptides with AutoDock CrankPep" - , doi_url="https://doi.org/10.1021/acs.jctc.9b00557" - , journal="Journal of Chemical Theory and Computation" - , contributors=["Zhang, Y.","Sanner, M. F."] - , cit_type="Reference") - , Citation(title="Fast, accurate, and reliable molecular docking with QuickVina 2" - , doi_url="https://doi.org/10.1093/bioinformatics/btv082" - , journal="Bioinformatics" - , contributors=["Alhossary, A.","Handoko, S. D.","Mu, Y.","Kwoh, C.-K."] - , cit_type="Reference") - , Citation(title="Lessons learned in empirical scoring with smina from the CSAR 2011 benchmarking exercise" - , doi_url="https://doi.org/10.1021/ci300604z" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Koes, D. R.","Baumgartner, M. P.","Camacho, C. J."] - , cit_type="Reference") - , Citation(title="Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking" - , doi_url="https://doi.org/10.1021/acs.jctc.5b00834" - , journal="Journal of Chemical Theory and Computation" - , contributors=["Nivedha, A. K.","Thieker, D. F.","Makeneni, S.","Hu, H.","Woods, R. J."] - , cit_type="Reference") - , Citation(title="AutoDock VinaXB: implementation of XBSF, new empirical halogen bond scoring function, into AutoDock Vina" - , doi_url="https://doi.org/10.1186/s13321-016-0139-1" - , journal="Journal of Cheminformatics" - , contributors=["Koebel, M. R.","Schmadeke, G.","Posner, R. G.","Sirimulla, S."] - , cit_type="Reference") - , Citation(title="Vinardo: A Scoring Function Based on Autodock Vina Improves Scoring, Docking, and Virtual Screening" - , doi_url="https://doi.org/10.1371/journal.pone.0155183" - , journal="PLoS One" - , contributors=["Quiroga, R.","Villarreal, M. A."] - , cit_type="Reference") - , Citation(title="Lennard-Jones potential and dummy atom settings to overcome the AUTODOCK limitation in treating flexible ring systems" - , doi_url="https://doi.org/10.1021/ci700036j" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Forli, S.","Botta, M."] - , cit_type="Reference") - , Citation(title="AutoDock4Zn: an improved AutoDock force field for small-molecule docking to zinc metalloproteins" - , doi_url="https://doi.org/10.1021/ci500209e" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Santos-Martins, D.","Forli, S.","Ramos, M. J.","Olson, A. J."] - , cit_type="Reference") - , Citation(title="A force field with discrete displaceable waters and desolvation entropy for hydrated ligand docking" - , doi_url="https://doi.org/10.1021/jm2005145" - , journal="Journal of Medicinal Chemistry" - , contributors=["Forli, S.","Olson, A. J."] - , cit_type="Reference") - , Citation(title="Directional phosphorylation and nuclear transport of the splicing factor SRSF1 is regulated by an RNA recognition motif" - , doi_url="https://doi.org/10.1016/j.jmb.2016.04.009" - , journal="Journal of Molecular Biology" - , contributors=["Serrano, P.","Aubol, B. E.","Keshwani, M. M.","Forli, S.","Ma, C.-T.","Dutta, S. K.","Geralt, M.","Wüthrich, K.","Adams, J. A."] - , cit_type="Reference") - , Citation(title="Covalent docking using autodock: Two-point attractor and flexible side chain methods" - , doi_url="https://doi.org/10.1002/pro.2733" - , journal="Protein Science" - , contributors=["Bianco, G.","Forli, S.","Goodsell, D. S.","Olson, A. J."] - , cit_type="Reference") - , Citation(title="Consensus docking: improving the reliability of docking in a virtual screening context" - , doi_url="https://doi.org/10.1021/ci300399w" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Houston, D. R.","Walkinshaw, M. D."] - , cit_type="Reference") - , Citation(title="DockBench: an integrated informatic platform bridging the gap between the robust validation of docking protocols and virtual screening simulations" - , doi_url="https://doi.org/10.3390/molecules20069977" - , journal="Molecules" - , contributors=["Cuzzolin, A.","Sturlese, M.","Malvacio, I.","Ciancetta, A.","Moro, S."] - , cit_type="Reference") - , Citation(title="A new force field for molecular mechanical simulation of nucleic acids and proteins" - , doi_url="https://doi.org/10.1021/ja00315a051" - , journal="Journal of the American Chemical Society" - , contributors=["Weiner, S. J.","Kollman, P. A.","Case, D. A.","Singh, U. C.","Ghio, C.","Alagona, G.","Profeta, S.","Weiner, P."] - , cit_type="Reference") - , Citation(title="AutoDock Bias: improving binding mode prediction and virtual screening using known protein-ligand interactions" - , doi_url="https://doi.org/10.1093/bioinformatics/btz152" - , journal="Bioinformatics" - , contributors=["Arcon, J. P.","Modenutti, C. P.","Avendaño, D.","Lopez, E. D.","Defelipe, L. A.","Ambrosio, F. A.","Turjanski, A. G.","Forli, S.","Marti, M. A."] - , cit_type="Reference") - , Citation(title="Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory" - , doi_url="https://doi.org/10.1021/jp9723574" - , journal="Journal of Physical Chemistry B" - , contributors=["Lazaridis, T."] - , cit_type="Reference") - , Citation(title="Inhomogeneous fluid approach to solvation thermodynamics. 2. Applications to simple fluids" - , doi_url="https://doi.org/10.1021/jp972358w" - , journal="Journal of Physical Chemistry B" - , contributors=["Lazaridis, T."] - , cit_type="Reference") - , Citation(title="Grid inhomogeneous solvation theory: Hydration structure and thermodynamics of the miniature receptor cucurbit[7]uril" - , doi_url="https://doi.org/10.1063/1.4733951" - , journal="Journal of Chemical Physics" - , contributors=["Nguyen, C. N.","Young, T. K.","Gilson, M. K."] - , cit_type="Reference") - , Citation(title="AutoDock-GIST: Incorporating Thermodynamics of Active-Site Water into Scoring Function for Accurate Protein-Ligand Docking" - , doi_url="https://doi.org/10.3390/molecules21111604" - , journal="Molecules" - , contributors=["Uehara, S.","Tanaka, S."] - , cit_type="Reference") - , Citation(title="ZINC20—A Free Ultralarge-Scale Chemical Database for Ligand Discovery" - , doi_url="https://doi.org/10.1021/acs.jcim.0c00675" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Irwin, J. J.","Tang, K. G.","Young, J.","Dandarchuluun, C.","Wong, B. R.","Khurelbaatar, M.","Moroz, Y. S.","Mayfield, J.","Sayle, R. A."] - , cit_type="Reference") - , Citation(title="Structural biology-inspired discovery of novel KRAS–PDEδ inhibitors" - , doi_url="https://doi.org/10.1021/acs.jmedchem.7b01243" - , journal="Journal of Medicinal Chemistry" - , contributors=["Jiang, Y.","Zhuang, C.","Chen, L.","Lu, J.","Dong, G.","Miao, Z.","Zhang, W.","Li, J.","Sheng, C."] - , cit_type="Reference") - , Citation(title="D3R grand challenge 2015: evaluation of protein–ligand pose and affinity predictions" - , doi_url="https://doi.org/10.1007/s10822-016-9946-8" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["Gathiaka, S.","Liu, S.","Chiu, M.","Yang, H.","Stuckey, J. A.","Kang, Y. N.","Delproposto, J.","Kubish, G.","Dunbar, J. B.","Carlson, H. A.","Burley, S. K.","Walters, W. P.","Amaro, R. E.","Feher, V. A.","Gilson, M. K."] - , cit_type="Reference") - , Citation(title="D3R grand challenge 4: blind prediction of protein–ligand poses, affinity rankings, and relative binding free energies" - , doi_url="https://doi.org/10.1007/s10822-020-00289-y" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["Parks, C. D.","Gaieb, Z.","Chiu, M.","Yang, H.","Shao, C.","Walters, W. P.","Jansen, J. M.","McGaughey, G.","Lewis, R. A.","Bembenek, S. D.","Ameriks, M. K.","Mirzadegan, T.","Burley, S. K.","Amaro, R. E.","Gilson, M. K."] - , cit_type="Reference") - , Citation(title="D3R Grand Challenge 4: prospective pose prediction of BACE1 ligands with AutoDock-GPU" - , doi_url="https://doi.org/10.1007/s10822-019-00241-9" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["Santos-Martins, D.","Eberhardt, J.","Bianco, G.","Solis-Vasquez, L.","Ambrosio, F. A.","Koch, A.","Forli, S."] - , cit_type="Reference") - , Citation(title="Comparison of affinity ranking using AutoDock-GPU and MM-GBSA scores for BACE-1 inhibitors in the D3R Grand Challenge 4" - , doi_url="https://doi.org/10.1007/s10822-019-00240-w" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["El Khoury, L.","Santos-Martins, D.","Sasmal, S.","Eberhardt, J.","Bianco, G.","Ambrosio, F. A.","Solis-Vasquez, L.","Koch, A.","Forli, S.","Mobley, D. L."] - , cit_type="Reference") - , Citation(title="Macrocycle modeling in ICM: benchmarking and evaluation in D3R Grand Challenge 4" - , doi_url="https://doi.org/10.1007/s10822-019-00225-9" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["Lam, P. C.-H.","Abagyan, R.","Totrov, M."] - , cit_type="Reference") - , Citation(title="Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking" - , doi_url="https://doi.org/10.1021/jm300687e" - , journal="Journal of Medicinal Chemistry" - , contributors=["Mysinger, M. M.","Carchia, M.","Irwin, J. J.","Shoichet, B. K."] - , cit_type="Reference") - , Citation(title="Evaluation of AutoDock and AutoDock Vina on the CASF-2013 benchmark" - , doi_url="https://doi.org/10.1021/acs.jcim.8b00312" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Gaillard, T."] - , cit_type="Reference") - , Citation(title="Autodock vina adopts more accurate binding poses but autodock4 forms better binding affinity" - , doi_url="https://doi.org/10.1021/acs.jcim.9b00778" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Nguyen, N. T.","Nguyen, T. H.","Pham, T. N. H.","Huy, N. T.","Bay, M. V.","Pham, M. Q.","Nam, P. C.","Vu, V. V.","Ngo, S. T."] - , cit_type="Reference") - , Citation(title="Development and validation of a genetic algorithm for flexible docking" - , doi_url="https://doi.org/10.1006/jmbi.1996.0897" - , journal="Journal of Molecular Biology" - , contributors=["Jones, G.","Willett, P.","Glen, R. C.","Leach, A. R.","Taylor, R."] - , cit_type="Reference") - , Citation(title="Glide: a new approach for rapid, accurate docking and scoring. 1. Method and assessment of docking accuracy" - , doi_url="https://doi.org/10.1021/jm0306430" - , journal="Journal of Medicinal Chemistry" - , contributors=["Friesner, R. A.","Banks, J. L.","Murphy, R. B.","Halgren, T. A.","Klicic, J. J.","Mainz, D. T.","Repasky, M. P.","Knoll, E. H.","Shelley, M.","Perry, J. K."] - , cit_type="Reference") - , Citation(title="Surflex: fully automatic flexible molecular docking using a molecular similarity-based search engine" - , doi_url="https://doi.org/10.1021/jm020406h" - , journal="Journal of Medicinal Chemistry" - , contributors=["Jain, A. N."] - , cit_type="Reference") - , Citation(title="A fast flexible docking method using an incremental construction algorithm" - , doi_url="https://doi.org/10.1006/jmbi.1996.0477" - , journal="Journal of Molecular Biology" - , contributors=["Rarey, M.","Kramer, B.","Lengauer, T.","Klebe, G."] - , cit_type="Reference") - , Citation(title="EDock: blind protein–ligand docking by replica-exchange monte carlo simulation" - , doi_url="https://doi.org/10.1186/s13321-020-00440-9" - , journal="Journal of Cheminformatics" - , contributors=["Zhang, W.","Bell, E. W.","Yin, M.","Zhang, Y."] - , cit_type="Reference") - , Citation(title="DOCK 6: Impact of new features and current docking performance" - , doi_url="https://doi.org/10.1002/jcc.23905" - , journal="Journal of Computational Chemistry" - , contributors=["Allen, W. J.","Balius, T. E.","Mukherjee, S.","Brozell, S. R.","Moustakas, D. T.","Lang, P. T.","Case, D. A.","Kuntz, I. D.","Rizzo, R. C."] - , cit_type="Reference") - , Citation(title="Improving scoring-docking-screening powers of protein–ligand scoring functions using random forest" - , doi_url="https://doi.org/10.1002/jcc.24667" - , journal="Journal of Computational Chemistry" - , contributors=["Wang, C.","Zhang, Y."] - , cit_type="Reference") - , Citation(title="ID-Score: a new empirical scoring function based on a comprehensive set of descriptors related to protein–ligand interactions" - , doi_url="https://doi.org/10.1021/ci300493w" - , journal="Journal of Chemical Information and Modeling" - , contributors=["Li, G.-B.","Yang, L.-L.","Wang, W.-J.","Li, L.-L.","Yang, S.-Y."] - , cit_type="Reference") - , Citation(title="Further development and validation of empirical scoring functions for structure-based binding affinity prediction" - , doi_url="https://doi.org/10.1023/a:1016357811882" - , journal="Journal of Computer-Aided Molecular Design" - , contributors=["Wang, R.","Lai, L.","Wang, S."] - , cit_type="Reference") - , Citation(title="A knowledge-based energy function for protein- ligand, protein- protein, and protein- DNA complexes" - , doi_url="https://doi.org/10.1021/jm049314d" - , journal="Journal of Medicinal Chemistry" - , contributors=["Zhang, C.","Liu, S.","Zhu, Q.","Zhou, Y."] - , cit_type="Reference") - , Citation(title="DLIGAND2: an improved knowledge-based energy function for protein–ligand interactions using the distance-scaled, finite, ideal-gas reference state" - , doi_url="https://doi.org/10.1186/s13321-019-0373-4" - , journal="Journal of Cheminformatics" - , contributors=["Chen, P.","Ke, Y.","Lu, Y.","Du, Y.","Li, J.","Yan, H.","Zhao, H.","Zhou, Y.","Yang, Y."] - , cit_type="Reference") - , Citation(title="Comparing AutoDock and Vina in ligand/decoy discrimination for virtual screening" - , doi_url="https://doi.org/10.3390/app9214538" - , journal="Applied Science" - , contributors=["Vieira, T. F.","Sousa, S. F."] - , cit_type="Reference") - , Citation(title="Benchmark of four popular virtual screening programs: construction of the active/decoy dataset remains a major determinant of measured performance" - , doi_url="https://doi.org/10.1186/s13321-016-0167-x" - , journal="Journal of Cheminformatics" - , contributors=["Chaput, L.","Martinez-Sanz, J.","Quiniou, E.","Rigolet, P.","Saettel, N.","Mouawad, L."] - , cit_type="Reference") - , Citation(title="Array programming with NumPy" - , doi_url="https://doi.org/10.1038/s41586-020-2649-2" - , journal="Nature" - , contributors=["Harris, C. R."] - , cit_type="Reference") - , Citation(title="Matplotlib: A 2D graphics environment" - , doi_url="https://doi.org/10.1109/mcse.2007.55" - , journal="Computing in Science & Engineering" - , contributors=["Hunter, J. D."] - , cit_type="Reference") - ], citations = [ - Citation(doi_url = "https://doi.org/10.1021/acsomega.1c04320" - , title ="Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives" - , journal="ACS Omega" - , contributors=["Mohammad K. Parvez","Sarfaraz Ahmed","Mohammed S. Al-Dosari","Mazin A. S. Abdelwahid","Ahmed H. Arbab","Adnan J. Al-Rehaily","Mai M. Al-Oqail"],cit_type="Citation"), - - ] - ) - } \ No newline at end of file diff --git a/input/test/test_input.py b/input/test/test_input.py deleted file mode 100755 index b2ca55f961565fd1192b72ce992c9ff95bd23020..0000000000000000000000000000000000000000 --- a/input/test/test_input.py +++ /dev/null @@ -1,82 +0,0 @@ -import unittest -from input.get.journal_fetcher import JournalFetcher -from input.interface import InputInterface -from input.publication import Publication - -""" -Testing the Publication fetcher - -Publication 1: 'https://doi.org/10.1021/acs.jcim.1c00203' -Publication 2: 'doi.org/10.1021/acs.jcim.1c00917' -Publication 3: '10.1038/nchem.1781' -Publication 4: '11.12/jaj' -Publication 5: '11.12/' -Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF -""" -# TODO: Testcases for: -# - Specific Journals: Inherit from FetcherTestCase -# - interface module-importer (test case) -# - Error detection -# - wrong/no Journal_fetchers -# - wrong urls -# - correct Types in publication -# - Edgecases (i.e. paper as pdf, no connection, etc) - - -class InterfaceTestCase(unittest.TestCase): - def setUp(self): - self.assertEqual(InputInterface.instance, None) - self.interface = InputInterface() - - def test_singleton(self): - # interface should already be made in setUp() - self.assertNotEqual(self.interface.instance, None) - new_interface = InputInterface() - self.assertEqual(self.interface, new_interface) - - # def test_imported_modules(self): - # fetchers = self.interface.get_supported_fetchers - -class FetcherTestCase(unittest.TestCase): - - - def can_use_url_test(self, fetcher : JournalFetcher, test_url: str, expected_res: bool): - # Tests the 'can_use_url'-method - self.assertEqual(fetcher.can_use_url(test_url), expected_res) - - - def get_publication_test(self, fetcher : JournalFetcher, test_url: str, expected_res: Publication): - """ - this test asserts that every variable is equals to the expected result - """ - actual_res = fetcher.get_publication(test_url) - self.assertEqual(actual_res.doi_url, expected_res.doi_url) - self.assertEqual(actual_res.title, expected_res.title) - self.assertEqual(actual_res.contributors, expected_res.contributors) - self.assertEqual(actual_res.journal, expected_res.journal) - self.assertEqual(actual_res.publication_date, expected_res.publication_date) - self.assertEqual(actual_res.subjects, expected_res.subjects) - - # Checking for all references - self.assertEqual(len(actual_res.references), len(expected_res.references)) - num_references = len(expected_res.references) - for i in range(num_references): - self.assertEqual(actual_res.references[i].doi_url, expected_res.references[i].doi_url) - self.assertEqual(actual_res.references[i].journal, expected_res.references[i].journal) - self.assertEqual(actual_res.references[i].contributors, expected_res.references[i].contributors) - self.assertEqual(actual_res.references[i].cit_type, expected_res.references[i].cit_type) - - # Checking for all citations - self.assertEqual(len(actual_res.citations), len(expected_res.citations)) - num_citations = len(expected_res.citations) - for i in range(num_citations): - self.assertEqual(actual_res.citations[i].doi_url, expected_res.citations[i].doi_url) - self.assertEqual(actual_res.citations[i].journal, expected_res.citations[i].journal) - self.assertEqual(actual_res.citations[i].contributors, expected_res.citations[i].contributors) - self.assertEqual(actual_res.citations[i].cit_type, expected_res.citations[i].cit_type) - - - def get_publication_exception_test(self, fetcher: JournalFetcher, test_url: str): - # Ckecks - with self.assertRaises(ValueError): - fetcher.get_publication(test_url) \ No newline at end of file diff --git a/input_old/README.md b/input_old/README.md deleted file mode 100644 index 76bd11d5d70daac13e190f4d52269eb381413c69..0000000000000000000000000000000000000000 --- a/input_old/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Projekt CiS-Projekt 2021/22 -Input-Skripts - diff --git a/input_old/__pycache__/input_fj.cpython-39.pyc b/input_old/__pycache__/input_fj.cpython-39.pyc deleted file mode 100644 index a3e6099f4ab4c56400b2698c812d4b5fc9a9a7aa..0000000000000000000000000000000000000000 Binary files a/input_old/__pycache__/input_fj.cpython-39.pyc and /dev/null differ diff --git a/input_old/example_urls b/input_old/example_urls deleted file mode 100644 index 96ac680c65edddcb495312000157edea1ab94884..0000000000000000000000000000000000000000 --- a/input_old/example_urls +++ /dev/null @@ -1,2 +0,0 @@ -https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332 -https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709 diff --git a/input_old/input_fj.py b/input_old/input_fj.py deleted file mode 100644 index ecc8e68fc5a84a446ae3f09dcb5ed56e8d262766..0000000000000000000000000000000000000000 --- a/input_old/input_fj.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 -""" -Functions for information retrieval of articles from the ACS journal JCIM - -""" - -__author__ = "Florian Jochens" -__email__ = "fj@andaco.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - -from bs4 import BeautifulSoup as bs -import requests as req -import sys -from pathlib import Path - -class Publication: - #_registry = [] - _citations = [] - _references = [] - - def __init__(self, title, publication_date, contributors, doi_url, - subjects = None, num_citations = None): - #self._registry.append(self) - self.title = title - self.publication_date = publication_date - self.contributors = contributors - self.doi_url = doi_url - self.subjects = subjects - self.num_citations = num_citations - #self._citations = [] - #self._references = [] - -class Citation: - def __init__(self, title, journal, contributors, doi_url): - self.title = title - self.journal = journal - self.contributors = contributors - self.doi_url = doi_url - -class References: - def __init__(self, title, journal, contributors, doi_url): - self.title = title - self.journal = journal - self.contributors = contributors - self.doi_url = doi_url - -def get_article_info(soup): - header = soup.find('div', class_ = 'article_header-left pull-left') - article_title = header.find('span', class_ = 'hlFld-Title').text - publication_date = header.find('span', class_ = 'pub-date-value').text - for link in header.find('div', class_ = 'article_header-doiurl'): - doi_url = link.get('href') - subs = header.find('div', class_ = 'article_header-taxonomy') - subjects = [] - for sub in subs.find_all('a'): - subjects.append(sub.get('title')) - cons = header.find('ul', class_ = 'loa') - contributors = [] - for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): - contributors.append(con.text) - numc = header.find('div', class_ = 'articleMetrics_count') - if not numc.a: - num_citations = 0 - else: - num_citations = numc.a.text - - pub = Publication(article_title, publication_date, contributors, doi_url, - subjects, num_citations) - return pub - -def get_download_url(): - export = soup.find('div', class_ = 'cit-download-dropdown_content') - url = 'https://pubs.acs.org' - for link in export.find_all('a'): - if link.get('title') == 'Citation and references': - url += link.get('href') - print(url) - return url - -def download(url): # Download citation and references file - if url.find('='): - filename = url.rsplit('=', 1)[1] - path = Path(('./files/' + filename)) - if path.is_file(): - print("File already exists") - else: - print("File does not exist") - -def get_citation_info(pub, num_citations, soup): - pub._citations = [] - details = soup.find('ol', class_ = 'cited-content_cbyCitation') - titles = [] - for title in details.find_all('span', - class_ = 'cited-content_cbyCitation_article-title'): - titles.append(title.text.replace('.', '')) - journal_names = [] - for name in details.find_all('span', - class_ = 'cited-content_cbyCitation_journal-name'): - journal_names.append(name.text) - doi_urls = [] - for url in details.find_all('a'): - doi_urls.append(url.get('href')) - contributors = [] - for contrib in details.find_all('span', - class_ = 'cited-content_cbyCitation_article-contributors'): - contributors.append(contrib.text) - for i in range(0, int(num_citations)): - pub._citations.append(Citation(titles[i], journal_names[i], - contributors[i], doi_urls[i])) -def print_pub_info(pub): - print(f'''Article title: {pub.title} -Publication date: {pub.publication_date} -DOI-URL: {pub.doi_url} - -Subjects:''') - print(*(pub.subjects), sep = ", ") - print('\nContributors:') - print(*(pub.contributors), sep = ", ") - - if int(pub.num_citations) > 0: - if int(pub.num_citations) == 1: - print(f'\nThis publication is cited by the following publication:\n') - else: - print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n') - for citation in pub._citations: - print(f''' - Title: {citation.title} - Journal: {citation.journal} - Contributors: {citation.contributors} - DOI-URL: {citation.doi_url} - ''') - else: - print('\nThis publication is not cited by any other publication.') - -def input(url): - html_text = req.get(url).text - soup = bs(html_text, 'html.parser') - - pub = get_article_info(soup) - if int(pub.num_citations) > 0: - get_citation_info(pub, int(pub.num_citations), soup) - return pub - -#if len(sys.argv) != 2: -# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) -# exit(1) -#url = sys.argv[1] -#pub = input(url) -#print_pub_info(pub) diff --git a/input_old/pub.py b/input_old/pub.py deleted file mode 100644 index 13b90e804cd485813b731385b319b3077a017dd2..0000000000000000000000000000000000000000 --- a/input_old/pub.py +++ /dev/null @@ -1,32 +0,0 @@ -class Publication: - #_registry = [] - #_citations = [] - #_references = [] - - def __init__(self, title, publication_date, contributors, doi_url, - subjects, num_citations): - #self._registry.append(self) - self.title = title - self.publication_date = publication_date - self.contributors = contributors - self.doi_url = doi_url - self.subjects = subjects - self.num_citations = num_citations - self.num_references = num_references - self._citations = [] - self._references = [] - -class Citation: - def __init__(self, title, journal, contributors, doi_url): - self.title = title - self.journal = journal - self.contributors = contributors - self.doi_url = doi_url - -class References: - def __init__(self, title, journal, contributors, doi_url): - self.title = title - self.journal = journal - self.contributors = contributors - self.doi_url = doi_url - diff --git a/input_old/test.py b/input_old/test.py deleted file mode 100755 index dc623ca182691e9e06a6713a4d3d5dcf0bbf23c2..0000000000000000000000000000000000000000 --- a/input_old/test.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python3 - -from input_fj import input, print_pub_info -import sys - -if len(sys.argv) != 3: - sys.stderr.write('Usage: {} <url> <url>\n'.format(sys.argv[0])) - exit(1) -url = sys.argv[1] -url2 = sys.argv[2] -pub = input(url) -print_pub_info(pub) -pub2 = input(url2) -print_pub_info(pub2) - diff --git a/input_old/x b/input_old/x deleted file mode 100644 index c8ade9d56a520a3ac57e5eadce8b81bb3e63c0dd..0000000000000000000000000000000000000000 --- a/input_old/x +++ /dev/null @@ -1,234 +0,0 @@ -Article title: Feasibility of Active Machine Learning for Multiclass Compound Classification -Publication date: January 7, 2016 -DOI-URL: https://doi.org/10.1021/acs.jcim.5b00332 - -Subjects: -Algorithms, Molecules, Drug discovery, Screening assays, Receptors - -Contributors: -Tobias Lang, Florian Flachsenberg, Ulrike von Luxburg, Matthias Rarey - -This publication is cited by the following 30 publications: - - - Title: Concepts of Artificial Intelligence for Computer-Assisted Drug Discovery - Journal: Chemical Reviews - Contributors: Xin Yang, Yifei Wang, Ryan Byrne, Gisbert Schneider, Shengyong Yang. - DOI-URL: https://doi.org/10.1021/acs.chemrev.8b00728 - - - Title: De Novo Molecule Design by Translating from Reduced Graphs to SMILES - Journal: Journal of Chemical Information and Modeling - Contributors: Peter Pogány, Navot Arad, Sam Genway, Stephen D. Pickett. - DOI-URL: https://doi.org/10.1021/acs.jcim.8b00626 - - - Title: Designing Algorithms To Aid Discovery by Chemical Robots - Journal: ACS Central Science - Contributors: Alon B. Henson, Piotr S. Gromski, Leroy Cronin. - DOI-URL: https://doi.org/10.1021/acscentsci.8b00176 - - - Title: Modeling Kinase Inhibition Using Highly Confident Data Sets - Journal: Journal of Chemical Information and Modeling - Contributors: Sorin Avram, Alina Bora, Liliana Halip, Ramona Curpăn. - DOI-URL: https://doi.org/10.1021/acs.jcim.7b00729 - - - Title: Predictive Models for Fast and Effective Profiling of Kinase Inhibitors - Journal: Journal of Chemical Information and Modeling - Contributors: Alina Bora, Sorin Avram, Ionel Ciucanu, Marius Raica, and Stefana Avram . - DOI-URL: https://doi.org/10.1021/acs.jcim.5b00646 - - - Title: Evaluation of categorical matrix completion algorithms: toward improved active learning for drug discovery - Journal: Bioinformatics - Contributors: Huangqingbo Sun, Robert F Murphy, . - DOI-URL: https://doi.org/10.1093/bioinformatics/btab322 - - - Title: An Artificial Intelligence Approach Based on Hybrid CNN-XGB Model to Achieve High Prediction Accuracy through Feature Extraction, Classification and Regression for Enhancing Drug Discovery in Biomedicine - Journal: International Journal of Biology and Biomedical Engineering - Contributors: Mukesh Madanan, Biju T. Sayed, Nurul Akhmal Mohd Zulkefli, Nitha C. Velayudhan. - DOI-URL: https://doi.org/10.46300/91011.2021.15.22 - - - Title: Artificial Intelligence in Medicinal Chemistry - Journal: - Contributors: Edward Griffen, Alexander Dossetter, Andrew Leach, Shane Montague. - DOI-URL: https://doi.org/10.1002/0471266949.bmc267 - - - Title: Practical Chemogenomic Modeling and Molecule Discovery Strategies Unveiled by Active Learning - Journal: - Contributors: J.B. Brown. - DOI-URL: https://doi.org/10.1016/B978-0-12-801238-3.11533-8 - - - Title: Machine learning phases and criticalities without using real data for training - Journal: Physical Review B - Contributors: D.-R. Tan, F.-J. Jiang. - DOI-URL: https://doi.org/10.1103/PhysRevB.102.224434 - - - Title: Active learning effectively identifies a minimal set of maximally informative and asymptotically performant cytotoxic structure–activity patterns in NCI-60 cell lines - Journal: RSC Medicinal Chemistry - Contributors: Takumi Nakano, Shunichi Takeda, J.B. Brown. - DOI-URL: https://doi.org/10.1039/D0MD00110D - - - Title: Active learning efficiently converges on rational limits of toxicity prediction and identifies patterns for molecule design - Journal: Computational Toxicology - Contributors: Ahsan Habib Polash, Takumi Nakano, Christin Rakers, Shunichi Takeda, J.B. Brown. - DOI-URL: https://doi.org/10.1016/j.comtox.2020.100129 - - - Title: Practical considerations for active machine learning in drug discovery - Journal: Drug Discovery Today: Technologies - Contributors: Daniel Reker. - DOI-URL: https://doi.org/10.1016/j.ddtec.2020.06.001 - - - Title: Designing compact training sets for data-driven molecular property prediction through optimal exploitation and exploration - Journal: Molecular Systems Design & Engineering - Contributors: Bowen Li, Srinivas Rangarajan. - DOI-URL: https://doi.org/10.1039/C9ME00078J - - - Title: Applicability Domain of Active Learning in Chemical Probe Identification: Convergence in Learning from Non-Specific Compounds and Decision Rule Clarification - Journal: Molecules - Contributors: Ahsan Habib Polash, Takumi Nakano, Shunichi Takeda, J.B. Brown. - DOI-URL: https://doi.org/10.3390/molecules24152716 - - - Title: Capturing and applying knowledge to guide compound optimisation - Journal: Drug Discovery Today - Contributors: Matthew Segall, Tamsin Mansley, Peter Hunt, Edmund Champness. - DOI-URL: https://doi.org/10.1016/j.drudis.2019.02.004 - - - Title: A novel graph kernel on chemical compound classification - Journal: Journal of Bioinformatics and Computational Biology - Contributors: Qiangrong Jiang, Jiajia Ma. - DOI-URL: https://doi.org/10.1142/S0219720018500269 - - - Title: Accelerating Drug Discovery Using Convolution Neural Network Based Active Learning - Journal: - Contributors: Pengfei Liu, Kwong-Sak Leung. - DOI-URL: https://doi.org/10.1109/TENCON.2018.8650298 - - - Title: An Adaptive Lightweight Security Framework Suited for IoT - Journal: - Contributors: Menachem Domb. - DOI-URL: https://doi.org/10.5772/intechopen.73712 - - - Title: Adaptive mining and model building of medicinal chemistry data with a multi-metric perspective - Journal: Future Medicinal Chemistry - Contributors: JB Brown. - DOI-URL: https://doi.org/10.4155/fmc-2018-0188 - - - Title: Chemogenomic Active Learning's Domain of Applicability on Small, Sparse qHTS Matrices: A Study Using Cytochrome P450 and Nuclear Hormone Receptor Families - Journal: ChemMedChem - Contributors: Christin Rakers, Rifat Ara Najnin, Ahsan Habib Polash, Shunichi Takeda, J.B. Brown. - DOI-URL: https://doi.org/10.1002/cmdc.201700677 - - - Title: Automating drug discovery - Journal: Nature Reviews Drug Discovery - Contributors: Gisbert Schneider. - DOI-URL: https://doi.org/10.1038/nrd.2017.232 - - - Title: Classifiers and their Metrics Quantified - Journal: Molecular Informatics - Contributors: J. B. Brown. - DOI-URL: https://doi.org/10.1002/minf.201700127 - - - Title: Active Search for Computer-aided Drug Design - Journal: Molecular Informatics - Contributors: Dino Oglic, Steven A. Oatley, Simon J. F. Macdonald, Thomas Mcinally, Roman Garnett, Jonathan D. Hirst, Thomas Gärtner. - DOI-URL: https://doi.org/10.1002/minf.201700130 - - - Title: Selection of Informative Examples in Chemogenomic Datasets - Journal: - Contributors: Daniel Reker, J. B. Brown. - DOI-URL: https://doi.org/10.1007/978-1-4939-8639-2_13 - - - Title: The value of prior knowledge in machine learning of complex network systems - Journal: Bioinformatics - Contributors: Dana Ferranti, David Krane, David Craft, . - DOI-URL: https://doi.org/10.1093/bioinformatics/btx438 - - - Title: Lightweight adaptive Random-Forest for IoT rule generation and execution - Journal: Journal of Information Security and Applications - Contributors: Menachem Domb, Elisheva Bonchek-Dokow, Guy Leshem. - DOI-URL: https://doi.org/10.1016/j.jisa.2017.03.001 - - - Title: Active learning for computational chemogenomics - Journal: Future Medicinal Chemistry - Contributors: Daniel Reker, Petra Schneider, Gisbert Schneider, JB Brown. - DOI-URL: https://doi.org/10.4155/fmc-2016-0197 - - - Title: Small Random Forest Models for Effective Chemogenomic Active Learning - Journal: Journal of Computer Aided Chemistry - Contributors: Christin Rakers, Daniel Reker, J.B. Brown. - DOI-URL: https://doi.org/10.2751/jcac.18.124 - - - Title: Large-Scale Off-Target Identification Using Fast and Accurate Dual Regularized One-Class Collaborative Filtering and Its Application to Drug Repurposing - Journal: PLOS Computational Biology - Contributors: Hansaim Lim, Aleksandar Poleksic, Yuan Yao, Hanghang Tong, Di He, Luke Zhuang, Patrick Meng, Lei Xie, . - DOI-URL: https://doi.org/10.1371/journal.pcbi.1005135 - -Article title: Matched Molecular Series: Measuring SAR Similarity -Publication date: May 1, 2017 -DOI-URL: https://doi.org/10.1021/acs.jcim.6b00709 - -Subjects: -Substituents, Mathematical methods, Structure activity relationship, Biological databases - -Contributors: -Emanuel S. R. Ehmki, Christian Kramer - -This publication is cited by the following 5 publications: - - - Title: Matched Molecular Series Analysis for ADME Property Prediction - Journal: Journal of Chemical Information and Modeling - Contributors: Mahendra Awale, Sereina Riniker, Christian Kramer. - DOI-URL: https://doi.org/10.1021/acs.jcim.0c00269 - - - Title: Approaches using AI in medicinal chemistry - Journal: - Contributors: Christian Tyrchan, Eva Nittinger, Dea Gogishvili, Atanas Patronov, Thierry Kogej. - DOI-URL: https://doi.org/10.1016/B978-0-12-822249-2.00002-5 - - - Title: Bioactivity Prediction Based on Matched Molecular Pair and Matched Molecular Series Methods - Journal: Current Pharmaceutical Design - Contributors: Xiaoyu Ding, Chen Cui, Dingyan Wang, Jihui Zhao, Mingyue Zheng, Xiaomin Luo, Hualiang Jiang, Kaixian Chen. - DOI-URL: https://doi.org/10.2174/1381612826666200427111309 - - - Title: BRADSHAW: a system for automated molecular design - Journal: Journal of Computer-Aided Molecular Design - Contributors: Darren V. S. Green, Stephen Pickett, Chris Luscombe, Stefan Senger, David Marcus, Jamel Meslamani, David Brett, Adam Powell, Jonathan Masson. - DOI-URL: https://doi.org/10.1007/s10822-019-00234-8 - - - Title: The use of matched molecular series networks for cross target structure activity relationship translation and potency prediction - Journal: MedChemComm - Contributors: Christopher E. Keefer, George Chang. - DOI-URL: https://doi.org/10.1039/C7MD00465F - diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py deleted file mode 100644 index 0dcc7391bd5a633a86841f6097f486017ae94dfa..0000000000000000000000000000000000000000 --- a/verarbeitung/Processing.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Functions to generate a graph representing citations between multiple ACS/Nature journals - -""" - -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" -__email__ = "cis-project2021@zbh.uni-hamburg.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - -from bs4 import BeautifulSoup as bs -import requests as req -import sys -from pathlib import Path -from input_fj import input -from input_test import input_test_func -from json_demo import output_to_json - -# adds every publication from input list to graph structure -# doi_input_list: list of publication dois from user -def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): - references_pub_obj_list = [] - citations_pub_obj_list = [] - - for pub_doi in doi_input_list: - - #checks if its a test and chooses input function accordingly - if(test_var): - pub = input_test_func(pub_doi) - else: - pub = input(pub_doi) - - # checks if publication already exists in nodes - not_in_nodes = True - for node in nodes: # checks if a pub is already in nodes - if (pub.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - nodes.append(pub) - pub.group = "input" - else: - doi_input_list.remove(pub_doi) - - # inserts references as publication objects into list and - # inserts first depth references into nodes/edges if maximum search depth > 0 - for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var): - references_pub_obj_list.append(reference) - - # inserts citations as publication objects into list and - # inserts first height citations into nodes if maximum search height > 0 - for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var): - citations_pub_obj_list.append(citation) - - return(references_pub_obj_list, citations_pub_obj_list) - - -# adds edges between citation and reference group -def complete_inner_edges(test_var): - for node in nodes: - if (node.group == "depth"): - for citation in node.citations: - for cit in nodes: - if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): - edges.append([citation.doi_url, node.doi_url]) - if (node.group == "height"): - for reference in node.references: - for ref in nodes: - if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): - edges.append([node.doi_url,reference.doi_url]) - - - -# adds a node for every publication unknown -# adds edges for references between publications -def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): - references_pub_obj_list = [] - for reference in pub.references: - not_in_nodes = True - for node in nodes: - # checks every reference for duplication - if (reference.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - if (search_depth < search_depth_max): - - #checks if its a test and chooses input function accordingly - if (test_var): - reference_pub_obj = input_test_func(reference.doi_url) - else: - reference_pub_obj = input(reference.doi_url) - - reference_pub_obj.group = "depth" - nodes.append(reference_pub_obj) - edges.append([pub.doi_url,reference_pub_obj.doi_url]) - references_pub_obj_list.append(reference_pub_obj) - - # adds edge only if citation already exists - elif [pub.doi_url,reference.doi_url] not in edges: - edges.append([pub.doi_url,reference.doi_url]) - return references_pub_obj_list - - -# recursive function to implement height-first-search on references -# references_pub_obj_list: input list of references as publication objects -# search_depth: current search_depth of height-first-search -# search_depth_max: maximal search_depth for dfs -def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): - # adds next level to nodes/edges - for pub in references_pub_obj_list: - new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_depth < search_depth_max): - process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) - - - - -# adds a node for every publication unknown -# adds edges for citations between publications -def create_graph_structure_citations(pub, search_height, search_height_max, test_var): - citations_pub_obj_list = [] - for citation in pub.citations: - not_in_nodes = True - for node in nodes: - # checks every citation for duplication - if (citation.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - if (search_height < search_height_max): - - #checks if its a test and chooses input function accordingly - if (test_var): - citation_pub_obj = input_test_func(citation.doi_url) - else: - citation_pub_obj = input(citation.doi_url) - - citation_pub_obj.group = "height" - nodes.append(citation_pub_obj) - edges.append([citation_pub_obj.doi_url,pub.doi_url]) - citations_pub_obj_list.append(citation_pub_obj) - - # adds only edge if citation already exists - elif [citation.doi_url,pub.doi_url] not in edges: - edges.append([citation.doi_url,pub.doi_url]) - return citations_pub_obj_list - - - -# recursive function to implement height-first-search on citations -# citations_pub_obj_list: input list of citations as publication objects -# search_height: current search_height of height-first-search -# search_height_max: maximal search_height for dfs -def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): - # adds next level to nodes/edges - for pub in citations_pub_obj_list: - new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_height < search_height_max): - process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var) - - - - -# main function to call. Needs as input: -# doi_input_list: input list of dois -# search_height: max search height to process to -# search_depth: max search depth to process to -# test_var: only needed for unit test as True, default is False -def process_main(doi_input_list, search_height, search_depth, test_var = False): - # ERROR-Handling doi_array = NULL - if (len(doi_input_list) == 0): - print("Error, no input data") - - # ERROR- if a negative number is entered for height - if (search_height < 0): - print("Error, search_height of search must be positive") - - # ERROR- if a negative number is entered for depth - if (search_depth < 0): - print("Error, search_depth of search must be positive") - - # create empty array for the nodes - # create empty array for the edges - global nodes, edges - nodes = [] - edges = [] - - # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) - - # function calls to begin recursive processing up to max depth/height - process_citations_rec(citations_obj_list, 1, search_height, test_var) - process_references_rec(references_obj_list, 1, search_depth, test_var) - - # adds edges between reference group and citation group of known publications - complete_inner_edges(test_var) - - # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges) - - # only for unit tests - if (test_var == True): - doi_nodes_list = [] - for node in nodes: - doi_nodes_list.append(node.doi_url) - return(doi_nodes_list, edges) - - - - -# a function to print nodes and edges from a graph -def print_graph(nodes, edges): - print("Knoten:\n") - for node in nodes: - print(node.title, "\n") - print("\nKanten:\n") - for edge in edges: - print(edge,"\n") - - -# program test, because there is no connection to UI yet. -def try_known_publications(): - doi_list = [] - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') - #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') - - #arr.append('https://doi.org/10.1021/ci700007b') - #arr.append('https://doi.org/10.1021/acs.jcim.5b00292') - #url = sys.argv[1] - #arr.append[url] - - - nodes,edges = process_main(doi_list,2,2) - - print_graph(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py deleted file mode 100644 index 772d57204ce3374211d1d1fd3d08d279f085aac3..0000000000000000000000000000000000000000 --- a/verarbeitung/Processing_unittest.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest -from Processing import process_main - -class ProcessingTest(unittest.TestCase): - def testCycle(self): - nodes, edges = process_main(['doiz1'],1,1,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - - nodes, edges = process_main(['doiz1'],2,2,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) - - #def testBigCycle(self): - - #def testEmptyHeight(self): - - #def testEmptyDepth(self): - - def testEmptyDepthHeight(self): - nodes, edges = process_main(['doi1'],0,0,True) - self.assertCountEqual(nodes,['doi1']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2']) - self.assertCountEqual(edges, [['doi1', 'doi2']]) - - nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) - self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - - - def testInnerEdges(self): - nodes, edges = process_main(['doi_ie1'],1,1,True) - self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) - self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) - - def testRightHeight(self): - nodes, edges = process_main(['doi_h01'],1,0,True) - self.assertCountEqual(nodes,['doi_h01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_h02'],1,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1']) - self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - - nodes, edges = process_main(['doi_h02'],2,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2']) - self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) - - def testRightDepth(self): - nodes, edges = process_main(['doi_d01'],0,1,True) - self.assertCountEqual(nodes,['doi_d01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_d02'],0,1,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1']) - self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - - nodes, edges = process_main(['doi_d02'],0,2,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2']) - self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-36.pyc b/verarbeitung/__pycache__/Processing.cpython-36.pyc deleted file mode 100644 index eb6d8a0418a1340b746f2f664997515622356d8a..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc deleted file mode 100644 index 63ac529316c848e829cd83ef44ec749e5903bf9e..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc deleted file mode 100644 index 54c63251bbf3affbdd176d3d55f4956c2fc08406..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc b/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc deleted file mode 100644 index 9ce1023e6ea54e1b04b37ad5a1fd08115d5f52a4..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-36.pyc b/verarbeitung/__pycache__/input_fj.cpython-36.pyc deleted file mode 100644 index 04312c91f0a7675651e99a2a6c10a2c9da146758..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc deleted file mode 100644 index 515ab99c01a5ce78bb5bb6de554a4dae3ffe4b4b..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-39.pyc b/verarbeitung/__pycache__/input_fj.cpython-39.pyc deleted file mode 100644 index 175f9ebbfdf5f3313196b4f10aa01dc2e8e20509..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-36.pyc b/verarbeitung/__pycache__/input_test.cpython-36.pyc deleted file mode 100644 index 85878d6d127d9d2bd5efe9130672d982bb70c5fa..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc deleted file mode 100644 index df395212453392e135532b12396cd4c30a92ea05..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc deleted file mode 100644 index 68e42fd6a47a02787524c68816a42574834931d2..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-36.pyc b/verarbeitung/__pycache__/json_demo.cpython-36.pyc deleted file mode 100644 index 04acef5f40630ee2c7b6e887e33dc740b5e16a74..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc deleted file mode 100644 index 4a1e7ba987775a20fddaa4a8f846bb238670d6a1..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc deleted file mode 100644 index 4e31ce337645d5282ddab11668bc6d745735f9f8..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/unittest.cpython-36.pyc b/verarbeitung/__pycache__/unittest.cpython-36.pyc deleted file mode 100644 index 245eb7f9be9221daa930d9fa83c77368ba463af7..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/unittest.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py deleted file mode 100644 index 44361c4b095f1c4fb0fce1868498d0e9da32f551..0000000000000000000000000000000000000000 --- a/verarbeitung/input_test.py +++ /dev/null @@ -1,82 +0,0 @@ -class Publication: - def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - if references is None: - self.references = [] - else: - self.references = ref(references) - if citations is None: - self.citations = [] - else: - self.citations = cit(citations) - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -def input_test_func(pub_doi): - for array in list_of_arrays: - if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) - return pub - - -def cit(list_doi): - cits = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return cits - -def ref(list_doi): - refs = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return refs - - -beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], ''] -beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], ''] -beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], ''] - -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] - -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], ''] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] - -right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], ''] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], ''] -right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], ''] -right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], ''] -right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], ''] - -right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], ''] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], ''] -right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], ''] -right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], ''] -right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], ''] - -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3] diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py deleted file mode 100644 index b9f618d1a2dcac13ca51a530f365d40aa226bc11..0000000000000000000000000000000000000000 --- a/verarbeitung/json_demo.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import json -from input_fj import input - -""" -Functions that format the computed graph to match the interface to the output-part - -""" - -# creates a list that contains a dictionary for each node -# the dictionaries store the values for the attributes -def format_nodes(V): - list_of_node_dicts = list() - for node in V: - new_dict = dict() - new_dict["name"] = node.title - new_dict["author"] = node.contributors - new_dict["year"] = node.publication_date - new_dict["journal"] = node.journal - new_dict["doi"] = node.doi_url - new_dict["group"] = node.group - list_of_node_dicts.append(new_dict) - return list_of_node_dicts - -# creates a list that contains a disctionary for each edge -# the dictionaries contain the source as keys and the target as values -def format_edges(E): - list_of_edge_dicts = list() - for edge in E: - new_dict_2 = dict() - new_dict_2["source"] = edge[0] - new_dict_2["target"] = edge[1] - list_of_edge_dicts.append(new_dict_2) - return list_of_edge_dicts - -# combine the lists of nodes and edges to a dictionary and saves it to a json file -def output_to_json(V,E): - dict_of_all = dict() - list_of_node_dicts = format_nodes(V) - list_of_edge_dicts = format_edges(E) - dict_of_all["nodes"] = list_of_node_dicts - dict_of_all["links"] = list_of_edge_dicts - with open('json_text.json','w') as outfile: - json.dump(dict_of_all, outfile) - -#knoten = ["doi1", "doi2", "doi3"] -#kanten = [[1,2],[3,4],[5,6]] -#output_to_json(knoten,kanten) - diff --git "a/verarbeitung/n\303\266tige Tests.txt" "b/verarbeitung/n\303\266tige Tests.txt" deleted file mode 100644 index 95563280436fbf6b9b8702dffef6f32e213f5a16..0000000000000000000000000000000000000000 --- "a/verarbeitung/n\303\266tige Tests.txt" +++ /dev/null @@ -1,4 +0,0 @@ -Zyklus -großer Zyklus -Innere Kanten vervollständigen -