Skip to content
Snippets Groups Projects
Commit bcb61efa authored by David, Sebastian's avatar David, Sebastian
Browse files

Created the first running programm with almost ful

parent caeb7d20
No related branches found
No related tags found
1 merge request!8UI Hinzufügen
Showing
with 913 additions and 3 deletions
# Projekt CiS-Biochemie 2021-22
Automated citation loop finder for ACS articles.
\ No newline at end of file
import base64
import re
import dash
from dash import dcc
from dash import html
from dash import callback_context
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from input.interface import InputInterface
import input.publication
app = dash.Dash(__name__)
additional_options = ['Update Automatically','Smart Input']
app.layout = html.Div([
# Layer 0: For the Header and Help Function(s)
html.Div([
html.Button(id='show-info',children='Show Info',n_clicks=0),
html.Div(id='info-box')
]),
# Layer 1: For all mandatory Inputs
html.Div([
"Input: ",
dcc.Input(id='input-string', value='', type='text',debounce=True),
dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'),
dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10'),
dcc.Upload(
id="upload-data",
children=html.Div(
["Drag and drop or click to select a file to upload."]
),
style={
"width": "30%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin": "10px",
})
]),
# Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message
html.Div([
dcc.Checklist(id='input-checklist',options=[],labelStyle = dict(display='block'),value=[]),
html.Div(id='input-err',style={'color':'red'}),
html.Button(id='clear-all-button',children='Clear All'),
html.Button(id='clear-selected-button',children='Clear Selected'),
html.Button(id='start-button',children='Generate Graph')
]),
# Layer 3: For additional Options (e.g. Topological Sort)
html.Div([
html.H4('Additional Options'),
dcc.Checklist(id='additional-options',
options=[{'label':k,'value':k} for k in additional_options],
value=[])
]),
# Layer 4: For the Graph
html.Div([
html.Div(id='test-output')
])
])
'''
Most important callback function. Updates the checklist that holds all inputs.
input-string is required as Output to clear the input box after each input
'''
@app.callback(
Output('input-checklist','options'),
Output('input-checklist','value'),
Output('input-string','value'),
Output('input-err','children'),
Input('input-string','value'),
Input('clear-all-button','n_clicks'),
Input('clear-selected-button','n_clicks'),
Input('upload-data','contents'),
State('input-checklist','options'),
State('input-checklist','value'),
State('additional-options','value')
)
def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs,selected_inputs,additional_options):
'''
:param input_value: given by dcc.Input
:type input_value: string
:param btn1: signals pressing of clear-all-button
:param btn2: signals pressing of clear-selected-button
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
'''
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
# if clear-all-button was pressed:
if 'clear-all-button' in changed_id:
return list(),list(),'',''
# if clear-selected-button was pressed:
if 'clear-selected-button' in changed_id:
all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs]
return all_inputs,list(),'',''
# when a new element is added via dcc.Input
if 'input-string' in changed_id:
options = all_inputs
currValues = [x['value'] for x in options]
if input_value not in currValues:
if 'Smart Input' in additional_options:
try:
i = InputInterface()
pub = i.get_publication(input_value)
except Exception as err:
return options,selected_inputs,'','{}'.format(err)
rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date
options.append({'label':rep_str, 'value':input_value})
else:
options.append({'label':input_value,'value':input_value})
return options,selected_inputs,'',''
# when a txt-file is uploaded
if 'upload-data.contents' in changed_id:
if filecontents:
string = filecontents
found = base64.b64decode(re.search(',(.+?)$', string).group(1))
filecontents = found.decode('utf-8')
list_of_inputs = (filecontents.strip().split('\n'))
options = all_inputs
CurrValues = [x['value'] for x in options]
for input_value in list_of_inputs:
if input_value not in CurrValues:
if 'Smart Input' in additional_options:
try:
i = InputInterface()
pub = i.get_publication(input_value)
except Exception as err:
return options,selected_inputs,'','{}'.format(err)
rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date
options.append({'label':rep_str, 'value':input_value})
else:
options.append({'label':input_value,'value':input_value})
return options,selected_inputs,'',''
# when the programm is first started:
if input_value == '':
app.layout['input-checklist'].options.clear()
return list(),list(),'',''
'''
This callback shows and hides the (first) help-box
'''
@app.callback(
Output('info-box','children'),
Input('show-info','n_clicks')
)
def show_hide_info_box(n_clicks):
if n_clicks % 2 == 0:
return ''
else:
return 'Hier koennte Ihre Werbung stehen'
'''
Basic structure for a callback that generates an output
'''
@app.callback(
Output('test-output','children'),
Input('start-button','n_clicks'),
Input('input-checklist','options'),
Input('input-checklist','value'),
Input('forward-depth','value'),
Input('backward-depth','value'),
State('additional-options','value')
)
def generate_output(n_clicks,all_inputs,selected_inputs,
forward_depth,backward_depth,additional_options):
'''
:param n_clicks: how often has Generate Graph been clicked
:type n_clicks: int
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
:param forward_depth: forward recursion depth
:type forward_depth: unsigned int
:param backward_depth: backward recursion depth
:type backward_depth: unsigned int
:param additional_options: value of all selected additional options
:type additional_options: list of strings
'''
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
if n_clicks is None:
raise PreventUpdate
elif 'Update Automatically' in additional_options \
or 'start-button' in changed_id:
s = ''
for i in range(len(all_inputs)):
x = all_inputs[i]['value']
if x in selected_inputs:
s += x*(abs(int(forward_depth)-int(backward_depth)))
else:
s += x*(int(forward_depth)+int(backward_depth))
return s
else:
raise PreventUpdate
if __name__ == '__main__':
app.run_server(debug=True)
https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332
https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709
# Projekt CiS-Projekt 2021/22
Input-Package to fetch publication information with a given url.
## Usage/Examples
```python
from input.interface import InputInterface as Input
from input.publication import Publication
def main(url):
inter = Input()
try:
pub = inter.get_publication(url)
except Exception as error:
raise error
print(pub)
pub.title = "Cool new Title"
print(pub)
if __name__ == "__main__":
main("https://doi.org/10.1021/acs.chemrev.8b00728")
```
The expected results of calling this methode are:
| Input-Url | Result |
|-----------|-----------|
| supported & correct| A publication Instance |
| supported & uncorrect| ValueError|
| not supported | ValueError|
Supported Url are urls, which comply with the url-pattern of supported Journals.
### Supported Journals:
- ACS-Journals
- (Nature-Journals)
## Testing
``` c
python -m unittest input/test/<file.py> -v
# for all tests in directory
python -m unittest discover input/test -v
```
## Authors
- Florian Jochens
- Sam Ockenden
- Julius Schenk
\ No newline at end of file
File added
File added
File added
File added
File added
File added
File added
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: Check if Url can be used with 'can_use_url'
and then fetch publication with 'get_publication'
"""
import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication, Citation
class Fetcher(JournalFetcher):
"""
Specific Fetcher for the ACS journals.
"""
# Constant for the abbreviations of the supported Journals
SUPPORTED_JOURNALS = ['1021']
@staticmethod
def can_use_url(url: str) -> str:
"""
Uses Regex to extract journal specific substrings in Doi.
TODO: Support non Doi-urls
"""
matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n"))
#Checks if match exists
if matched_url is not None:
return matched_url[4] in Fetcher.SUPPORTED_JOURNALS
else:
return False
@staticmethod
def get_pub_light(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
return Publication(doi_url, title, contributors, journal, published,
subjects)
def get_publication(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
#Could be used for more specific search
ref_cit_soup = soup
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
references = []
references_soup = ref_cit_soup.select('ol#references')
if references_soup != []:
for reference in references_soup[0].select('li'):
if reference.select('.refDoi') != []:
ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:])
else:
# No Doi -> No Paper
continue
ref_title = reference.select('.NLM_article-title')[0].text\
if reference.select('.NLM_article-title') != [] else None
ref_journal = reference.select('i')[0].text\
if reference.select('i') != [] else None
# Replaces abbreviation with whole name
if ref_journal in JournalFetcher.abbrev_dict:
ref_journal = JournalFetcher.abbrev_dict[ref_journal]
ref_contributors=[]
for author in reference.select('.NLM_contrib-group'):
ref_contributors.append(author.text.replace("\n", " ").replace("\r", ""))
references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference"))
citations = []
citation_soup = ref_cit_soup.select('.cited-content_cbyCitation')
if citation_soup != []:
for citation in citation_soup[0].select('li'):
if citation.select('a[title="DOI URL"]') != []:
cit_doi = citation.select('a[title="DOI URL"]')[0].text
else:
# No Doi -> No Paper
continue
cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\
if citation.select('.cited-content_cbyCitation_article-title')!= [] else None
cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\
if citation.select('.cited-content_cbyCitation_journal-name') != [] else None
# Replaces abbreviation with whole name
if cit_journal in JournalFetcher.abbrev_dict:
cit_journal = JournalFetcher.abbrev_dict[cit_journal]
cit_contributors =[]
cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\
.text.replace("\n", " ").replace("\r", "").split(', ')
# clean up of the last Entry
cit_contributors_last = cit_contributors.pop().strip(". ")
if cit_contributors_last != '':
cit_contributors.append(cit_contributors_last)
citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation"))
return Publication(doi_url, title, contributors, journal, published
, subjects, references, citations)
#!/usr/bin/env python3
"""
Parent class for specific Journal
"""
from abc import ABCMeta, abstractmethod
from bs4 import BeautifulSoup
import requests
from input.publication import Publication
class JournalFetcher(metaclass=ABCMeta):
"""
This is a abstract-class for fetcher modules
"""
@staticmethod
def get_soup(url: str) -> BeautifulSoup:
"""
Retrieves webside-html and returns a BeautifulSoup-instance
Parameters:
-----------
:type url: str
:param url: doi-url to a publication
:return: BeatifulSoup-instance
"""
try:
req = requests.get(url)
except requests.exceptions.HTTPError as err:
raise SystemExit(err)
return BeautifulSoup(req.content, 'html.parser')
@staticmethod
@abstractmethod
def can_use_url(url: str) -> bool:
"""
Abstract-function to be implemented in subclass.
Checks if given url links to a supported journal
"""
raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url))
@staticmethod
@abstractmethod
def get_publication(url: str) -> Publication:
"""
Abstract-function to be implemented in subclass.
Creates a Publication-instance.
"""
raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url))
# A Dictionary, which connects abbreviation to whole journal-name
abbrev_dict = {
"Nat. Protoc.":"Journal of Natural Products"
,"PLoS Comput. Biol.":"PLoS Computational Biology"
,"PLoS One":"PLoS One"
,"Protein Sci.":"Protein Science"
,"J. Am. Chem. Soc.":"Journal of the American Chemical Society"
,"J. Chem. Phys.":"Journal of Chemical Physics"
,"Appl. Sci.":"Applied Science"
,"Comput. Sci. Eng.":"Computing in Science & Engineering"
,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry"
,"Biol. Chem.":"Biological Chemistry"
,"Isr. J. Chem.":"Israel Journal of Chemistry"
,"Nat. Methods":"Nature Methods"
,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America"
,"J. Phys. Chem. B":"Journal of Physical Chemistry B"
,"Carbohydr. Res.":"Carbohydrate Research"
,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation"
,"J. Mol. Biol.":"Journal of Molecular Biology"
,"Nucleic Acids Res.":"Nucleic Acids Research"
,"J. Comput. Chem.":"Journal of Computational Chemistry"
,"J. Cheminf.":"Journal of Cheminformatics"
,"J. Med. Chem.":"Journal of Medicinal Chemistry"
,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design"
,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling"
,"Mol. Cell":"Molecular Cell"
,"J. Cell Biolog.":"Journal of Cell Biology"
,"Mol. Cell Biol.":"Molecular and Cellular Biology"
,"J. Cell Sci.":"Journal of Cell Science"
,"Nat. Cell Biol.":"Nature Cell Biology"
,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology"
,"Mol. Biol. Cell":"Molecular Biology of the Cell"
,"Build. Environ.":"Building and Environment"
,"Sci. Rep.":"Scientific Reports"
,"Nat. Chem.":"Nature Chemistry"
,"Nat. Med.":"Nature Medicine"
,"Nat. Commun.":"Nature Communications"
,"Exp. Cell Res.":"Experimental Cell Research"
,"Nat. Chem. Biol.":"Nature Chemical Biology"
}
\ No newline at end of file
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: Check if Url can be used with 'can_use_url'
and then fetch publication with 'get_publication'
"""
# import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication
class Fetcher(JournalFetcher):
"""
scrapes publication metadata from a provided url
"""
# TODO: List of Compatable Journals
# NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead
SUPPORTED_JOURNALS = []
@staticmethod
def can_use_url(url: str) -> bool:
"""
Checks if given url links to a supported journal.
"""
# TODO: Check the URL for compatability
# re.match in SUPPORTED_JOURNALS
return False
@staticmethod
def get_publication(url: str) -> Publication:
"""
Creates a Publication-instance.
"""
soup = JournalFetcher.get_soup(url)
_doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content")
_title = soup.head.find(attrs={"name": "citation_title"}).get("content")
_journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content")
_published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content")
_contributors = []
_subjects = []
for creator in soup.head.findAll(attrs={"name": "dc.creator"}):
_contributors.append(creator.get("content"))
for subject in soup.head.findAll(attrs={"name": "dc.subject"}):
_subjects.append(subject.get("content"))
return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects)
# TODO: Exceptions-handling
# raise ValueException("Cant Fetch: '{}'".format(error))
# return None
#!/usr/bin/env python3
"""
Child class of JournalFetcher
Usage: None, this is just a template and should be ignored
"""
# import re
from input.get.journal_fetcher import JournalFetcher
from input.publication import Publication
class Fetcher(JournalFetcher):
"""
This is only a template and therefore has no functionality
"""
# TODO: Naming-Convention:
# Class: 'Fetcher'
# file: [journal-/organisation-name]
# format = "[a-z]*.py" allowed
# TODO: List of Compatable Journals
SUPPORTED_JOURNALS = []
@staticmethod
def can_use_url(url: str) -> bool:
"""
Checks if given url links to a supported journal.
"""
# TODO: Check the URL for compatability
# url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url)
# if url_re is not None:
# return url_re[4] in SUPPORTED_JOURNALS
# else:
return False
@staticmethod
def get_publication(url: str) -> Publication:
"""
Creates a Publication-instance.
"""
# TODO: Fetch data from the HTML
# soup = JournalFetcher.get_soup(url)
# doi,title,contributors[],journal,publication_date,subjects[],references[],citations[]
# TODO: Create new Publication-instance
# return Publication(doi_url, title, contributors = [], journal
# , publication_date, subjects = [], references = [], citations = [])
return None
\ No newline at end of file
#!/usr/bin/env python3
"""
Interface for the Input-Package only this should be accessed from outside this Package.
"""
from os import walk
import importlib
import pathlib
import re
from input.publication import Publication
class InputInterface:
"""
Singleton which dynamically imports and manages fetchers
"""
instance = None
get_path = None
fetcher_classes=[]
# '__new__' is called before '__init__' and gives us an instance
def __new__(cls, *args, **kwargs):
# checks if an instance exists and if it doesnt creates one
if cls.instance == None:
cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs)
return cls.instance
def __init__(self):
# imports all modules
if self.fetcher_classes ==[]:
self.import_fetcher_classes()
if self.fetcher_classes ==[]:
raise AttributeError("No specific Fetchers where found at: '{}'"
.format(self.get_path))
def get_publication(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(including it's citations and references)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_publication(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_pub_light(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(only for main article)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_pub_light(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_supported_fetchers(self):
# print(self.fetcher_classes[0].__name__) Useless right now,
# because all classes are called the same
return [a.__name__ for a in self.fetcher_classes]
def import_fetcher_classes(self):
"""
Searches in 'get', if there are [a-z]*.py modules (specific Fetchers)
and tries to import them.
Saves found modules in 'fetcher_files'.
"""
# Path to 'get'-package
self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve())
# Searches for modules with given Pattern
fetcher_file_names=[]
for file in next(walk(self.get_path), (None, None, []))[2]:
if re.match(r'[a-z]+.py', file) is not None:
fetcher_file_names.append(file)
# Tries to import those modules and saves their 'Fetcher'-class
for file in fetcher_file_names:
try:
fetcher_class = importlib.import_module("input.get.{}".format(file[:-3]))
try:
self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher'))
except Exception as error:
ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3]))
except Exception:
raise ImportError("Module '{}' can not be imported".format(file[:-3]))
#!/usr/bin/env python3
# this is needed for typing pre python 3.9, this maybe as an large Overhead
from typing import Any, List
class Publication:
"""
Represents a Publications
"""
def __init__(self, doi_url: str, title: str \
, contributors: List[str], journal: str \
, publication_date: str, subjects: List[str]\
, references: List[Any] = None, citations: List[Any] = None ):
"""
Parameters
----------
:param doi_url: doi_url of the publication
:type doi_url: str
:param title: title of the publication
:type title: str
:param contributors:list of all contributors
:type contributors: list[]
:param published: date of release
:type published: str
:param subjects: the subject of the Publication
:type subjects: List[str]
:param references: the Citation which is been referenced by this Publication
:type references: List[Any]
:param citations: the Citation which references this Publication
:type citations: List[Any]
:return: None
"""
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.subjects = subjects
if references is None:
self.references = []
else:
self.references = references
if citations is None:
self.citations = []
else:
self.citations = citations
# For the 'Verarbeitungsgruppe'
self.group = None
def __str__(self) -> str:
return ("Title: {}\n"
"Doi-url: {}\n"
"Authors: {}\n"
"Journal: {}\n"
"Published on: {}\n"
"Subjects: {}\n"
"References: \n{}\n"
"Citations: \n{}")\
.format(self.title, self.doi_url, ", ".join(self.contributors)
, self.journal, self.publication_date
, ", ".join(self.subjects)
, "\n".join(self.get_citation_string(self.references))
, "\n".join(self.get_citation_string(self.citations)))
@staticmethod
def get_citation_string(citations):
if citations == []:
return ["None"]
else:
citation_string = []
for citation in citations:
citation_string.append(citation.__str__())
return citation_string
def add_citations(self, citation) -> None:
"""
Appends a list of Citations or Citation to self.citations.
Parameter
---------
:param citation: Citation or Reference of the Publication
:type citation: Citation or list[Citation]
:return: self.citations
"""
if type(citation) is Citation:
self.citations.append(citation)
# Checks if 'citation' is a list of Citations
elif type(citation) is list:
for _cit in citation:
if type(_cit) is Citation:
self.citations.append(_cit)
else:
raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
.format(type(_cit)))
else:
raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
.format(type(citation)))
return self.citations
def __eq__(self, other) -> bool:
""" Compares the unique doi_url of two Publications"""
if type(self)==type(other):
return self.doi_url == other.doi_url
return False
class Citation:
def __init__(self, doi_url: str, title: str \
, journal: str, contributors: List[str] \
, cit_type: str = "Citation"):
"""
Parameters
----------
:param doi_url: doi_url of the publication
:type doi_url: str
:param title: title of the publication
:type title: str
:param contributors: list of all contributors
:type contributors: List[str]
:param cit_type: Specifies if Reference or Citation
:type cit_type: str
:return: None
"""
self.title = title
self.doi_url = doi_url
self.journal = journal
self.contributors = contributors
self.cit_type = cit_type
def __str__(self) -> str:
return ("\t{}-Title: {}\n"
"\t{}-Doi: {}\n"
"\t{}-Journal: {}\n"
"\t{}-Contributors: {}\n")\
.format(self.cit_type, self.title
, self.cit_type, self.doi_url
, self.cit_type, self.journal
, self.cit_type, ", ".join(self.contributors))
beautifulsoup4
requests
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment