From 95f3539719090754909ccf4e5d6e26f3440d77e4 Mon Sep 17 00:00:00 2001 From: Johann Jacobsohn <j.jacobsohn@satzmedia.de> Date: Fri, 14 Aug 2020 18:02:28 +0200 Subject: [PATCH] cleanup --- papersurfer/bibtex.py | 13 ++ papersurfer/doi.py | 77 ++++++++++++ papersurfer/dtos.py | 27 ++++ papersurfer/exceptions.py | 5 + papersurfer/mattermost.py | 95 ++++++++++++++ papersurfer/papersurfer.py | 248 +------------------------------------ papersurfer/ui_elements.py | 38 ++++++ 7 files changed, 261 insertions(+), 242 deletions(-) create mode 100644 papersurfer/bibtex.py create mode 100644 papersurfer/doi.py create mode 100644 papersurfer/dtos.py create mode 100644 papersurfer/exceptions.py create mode 100644 papersurfer/mattermost.py create mode 100644 papersurfer/ui_elements.py diff --git a/papersurfer/bibtex.py b/papersurfer/bibtex.py new file mode 100644 index 0000000..bd1a031 --- /dev/null +++ b/papersurfer/bibtex.py @@ -0,0 +1,13 @@ +"""Simplified DOI interface.""" +from doi import Doi + + +class Bibtex: + """Interface for bibtex string.""" + def entry_from_doi(self, doi): + """Get bibtex string for doi.""" + return Doi().get_bibtex(doi) + + def bib_from_dois(self, dois): + """Get bibtex string for mulitple dois.""" + return "\n".join([Doi().get_bibtex(doi) for doi in dois]) diff --git a/papersurfer/doi.py b/papersurfer/doi.py new file mode 100644 index 0000000..85f6934 --- /dev/null +++ b/papersurfer/doi.py @@ -0,0 +1,77 @@ +"""Simplified DOI interface.""" +import json +import re +import requests +from dtos import PaperDTO + + +class Doi: + """Interface w/ the doi.org api.""" + def get_doi_link(self, doi): + """Assemble doi link.""" + return f"http://doi.org/{doi}" + + def load_doi_data(self, doi): + """Load data for doi.""" + headers = { + 'Accept': 'application/json', + } + return requests.get(f'http://dx.doi.org/{doi}', + headers=headers).content + + def parse_doi_json(self, jsoncontent): + """Tranform doi json to PaperDTO.""" + info = json.loads(jsoncontent) + + with open("debug.json", "w") as file: + file.write(json.dumps(info)) + + author = (f"{info['author'][0]['given']} {info['author'][0]['family']}" + if "author" in info + else "Author N/A") + authors = (", ".join([f"{a['given']} {a['family']}" + for a in info['author']]) + if "author" in info + else "Authors N/A") + title = (info['title'] + if "title" in info and isinstance(info['title'], str) + else "Title N/A") + journal = (info['publisher'] + if "publisher" in info + else "Journal N/A") + year = info['created']['date-parts'][0][0] + doi = info['DOI'] + abstract = (info['abstract'] + if "abstract" in info + else "Abstract N/A") + + slug = f"{info['author'][0]['family']}{year}" + + return PaperDTO(author, authors, title, journal, year, abstract, doi, + slug) + + def get_bibtex(self, doi): + """Get bibtex string for doi.""" + headers = { + 'Accept': 'text/bibliography; style=bibtex', + } + return requests.get(f'http://dx.doi.org/{doi}', headers=headers).text + + def get_info(self, doi): + """Get information for doi.""" + try: + jsoncontent = self.load_doi_data(doi) + data = self.parse_doi_json(jsoncontent) + return data + except json.decoder.JSONDecodeError: + return None + + def extract_doi(self, hay): + """Parse doi from string, or None if not found. + + >>> Doi().extract_doi("https://doi.org/10.1093/petrology/egaa077") + '10.1093/petrology/egaa077' + """ + pattern = r'\b10\.\d{4,9}/[-._;()/:A-Z0-9]+' + matches = re.compile(pattern, re.I).search(hay) + return matches.group() if matches else None diff --git a/papersurfer/dtos.py b/papersurfer/dtos.py new file mode 100644 index 0000000..fd24811 --- /dev/null +++ b/papersurfer/dtos.py @@ -0,0 +1,27 @@ +"""Data transfer objects.""" +from dataclasses import dataclass + + +@dataclass +class PostDTO: + """Encapsulate Mattermost Posts.""" + id: str + message: str + reporter: str + doi: str + + def __str__(self): + return self.message + + +@dataclass +class PaperDTO: + """Encapsulate Paper meta data.""" + author: str + authors: str + title: str + journal: str + year: int + abstract: str + doi: str + slug: str diff --git a/papersurfer/exceptions.py b/papersurfer/exceptions.py new file mode 100644 index 0000000..debc398 --- /dev/null +++ b/papersurfer/exceptions.py @@ -0,0 +1,5 @@ +"""Assorted Exceptions.""" + + +class ConfigError(Exception): + """Configuration error.""" diff --git a/papersurfer/mattermost.py b/papersurfer/mattermost.py new file mode 100644 index 0000000..fff1818 --- /dev/null +++ b/papersurfer/mattermost.py @@ -0,0 +1,95 @@ +"""Simplified mattermost interface.""" +from exceptions import ConfigError +import requests +import mattermostdriver +from dtos import PostDTO +from doi import Doi + + +class Mattermost: + """Provide a simplified interaction w/ mattermost api.""" + def __init__(self, url, channelname, username, password): + self.msgs = [] + self.mattermost = mattermostdriver.Driver({ + 'url': url, + 'login_id': username, + 'password': password, + 'port': 443 + }) + + try: + self.mattermost.login() + except (mattermostdriver.exceptions.NoAccessTokenProvided, + requests.exceptions.InvalidURL, + requests.exceptions.HTTPError): + print("Failed to log into Mattermost.") + raise ConfigError + + try: + self.channel = self.get_channel(channelname) + except ConfigError: + print("Couldn't find Mattermost channel.") + raise ConfigError + self.reporters = {} + + def get_channel(self, channelname): + """Try to find the paper channel by display name.""" + teamapi = self.mattermost.teams + channelapi = self.mattermost.channels + teams = [team["id"] for team in teamapi.get_user_teams("me")] + channels = [] + for team in teams: + teamchannels = [channel for channel + in channelapi.get_channels_for_user("me", team) + if channel["display_name"] == channelname] + channels.extend(teamchannels) + + # lets just hope no-one has the same channel name in multiple teams + if len(channels) == 0: + print(f"Channel {channelname} does not exits") + raise ConfigError + return channels[0]["id"] + + def get_reporter(self, userid): + """Load user from mattermost api and cache.""" + userapi = self.mattermost.users + if userid not in self.reporters: + self.reporters[userid] = userapi.get_user(userid)["username"] + + return self.reporters[userid] + + def retrieve_all_messages(self): + """Retrieve all messages from mattermost, unfiltered for papers.""" + posts = self.mattermost.posts.get_posts_for_channel(self.channel) + return [PostDTO(id=m['id'], message=m['message'], + reporter=self.get_reporter(m['user_id']), + doi=Doi().extract_doi(m['message']),) + for m in posts['posts'].values()] + + def filter_incoming(self, posts): + """Filter messages from mattermost to only papers.""" + return [p for p in posts if "doi" in p.message] + + def retrieve(self): + """Retrieve papers from mattermost channel.""" + msgs = self.retrieve_all_messages() + self.msgs = self.filter_incoming(msgs) + return self.msgs + + def check_doi_exits(self, doi): + """Check for doi in current paper list.""" + doi_needle = Doi().extract_doi(doi) + msg_found = [msg for msg in self.msgs + if Doi().extract_doi(msg.doi) == doi_needle] + return bool(msg_found) + + def get_filtered(self, needle): + """Filter posts by needle.""" + return [m for m in self.msgs + if needle.lower() in m.message.lower() + or needle.lower() in m.reporter.lower()] + + def post(self, message): + """Post message to thread.""" + self.mattermost.posts.create_post({"channel_id": self.channel, + "message": message}) diff --git a/papersurfer/papersurfer.py b/papersurfer/papersurfer.py index 5165923..709dc8d 100644 --- a/papersurfer/papersurfer.py +++ b/papersurfer/papersurfer.py @@ -10,254 +10,18 @@ UI: """ import subprocess -from dataclasses import dataclass -import re from functools import partial -import json import time import os import sys -import requests -import mattermostdriver +import re +from exceptions import ConfigError import urwid import configargparse - - -class ConfigError(Exception): - """Configuration error.""" - - -@dataclass -class PostDTO: - """Encapsulate Mattermost Posts.""" - id: str - message: str - reporter: str - doi: str - - def __str__(self): - return self.message - - -@dataclass -class PaperDTO: - """Encapsulate Paper meta data.""" - author: str - authors: str - title: str - journal: str - year: int - abstract: str - doi: str - slug: str - - -class Bibtex: - """Interface for bibtex string.""" - def entry_from_doi(self, doi): - """Get bibtex string for doi.""" - return Doi().get_bibtex(doi) - - def bib_from_dois(self, dois): - """Get bibtex string for mulitple dois.""" - return "\n".join([Doi().get_bibtex(doi) for doi in dois]) - - -class Doi: - """Interface w/ the doi.org api.""" - def get_doi_link(self, doi): - """Assemble doi link.""" - return f"http://doi.org/{doi}" - - def load_doi_data(self, doi): - """Load data for doi.""" - headers = { - 'Accept': 'application/json', - } - return requests.get(f'http://dx.doi.org/{doi}', - headers=headers).content - - def parse_doi_json(self, jsoncontent): - """Tranform doi json to PaperDTO.""" - info = json.loads(jsoncontent) - - with open("debug.json", "w") as file: - file.write(json.dumps(info)) - - author = (f"{info['author'][0]['given']} {info['author'][0]['family']}" - if "author" in info - else "Author N/A") - authors = (", ".join([f"{a['given']} {a['family']}" - for a in info['author']]) - if "author" in info - else "Authors N/A") - title = (info['title'] - if "title" in info and isinstance(info['title'], str) - else "Title N/A") - journal = (info['publisher'] - if "publisher" in info - else "Journal N/A") - year = info['created']['date-parts'][0][0] - doi = info['DOI'] - abstract = (info['abstract'] - if "abstract" in info - else "Abstract N/A") - - slug = f"{info['author'][0]['family']}{year}" - - return PaperDTO(author, authors, title, journal, year, abstract, doi, - slug) - - def get_bibtex(self, doi): - """Get bibtex string for doi.""" - headers = { - 'Accept': 'text/bibliography; style=bibtex', - } - return requests.get(f'http://dx.doi.org/{doi}', headers=headers).text - - def get_info(self, doi): - """Get information for doi.""" - try: - jsoncontent = self.load_doi_data(doi) - data = self.parse_doi_json(jsoncontent) - return data - except json.decoder.JSONDecodeError: - return None - - def extract_doi(self, hay): - """Parse doi from string, or None if not found. - - >>> Doi().extract_doi("https://doi.org/10.1093/petrology/egaa077") - '10.1093/petrology/egaa077' - """ - pattern = r'\b10\.\d{4,9}/[-._;()/:A-Z0-9]+' - matches = re.compile(pattern, re.I).search(hay) - return matches.group() if matches else None - - -class Mattermost: - """Provide a simplified interaction w/ mattermost api.""" - def __init__(self, url, channelname, username, password): - self.msgs = [] - self.mattermost = mattermostdriver.Driver({ - 'url': url, - 'login_id': username, - 'password': password, - 'port': 443 - }) - - try: - self.mattermost.login() - except (mattermostdriver.exceptions.NoAccessTokenProvided, - requests.exceptions.InvalidURL, - requests.exceptions.HTTPError): - print("Failed to log into Mattermost.") - raise ConfigError - - try: - self.channel = self.get_channel(channelname) - except ConfigError: - print("Couldn't find Mattermost channel.") - raise ConfigError - self.reporters = {} - - def get_channel(self, channelname): - """Try to find the paper channel by display name.""" - teamapi = self.mattermost.teams - channelapi = self.mattermost.channels - teams = [team["id"] for team in teamapi.get_user_teams("me")] - channels = [] - for team in teams: - teamchannels = [channel for channel - in channelapi.get_channels_for_user("me", team) - if channel["display_name"] == channelname] - channels.extend(teamchannels) - - # lets just hope no-one has the same channel name in multiple teams - if len(channels) == 0: - print(f"Channel {channelname} does not exits") - raise ConfigError - return channels[0]["id"] - - def get_reporter(self, userid): - """Load user from mattermost api and cache.""" - userapi = self.mattermost.users - if userid not in self.reporters: - self.reporters[userid] = userapi.get_user(userid)["username"] - - return self.reporters[userid] - - def retrieve_all_messages(self): - """Retrieve all messages from mattermost, unfiltered for papers.""" - posts = self.mattermost.posts.get_posts_for_channel(self.channel) - return [PostDTO(id=m['id'], message=m['message'], - reporter=self.get_reporter(m['user_id']), - doi=Doi().extract_doi(m['message']),) - for m in posts['posts'].values()] - - def filter_incoming(self, posts): - """Filter messages from mattermost to only papers.""" - return [p for p in posts if "doi" in p.message] - - def retrieve(self): - """Retrieve papers from mattermost channel.""" - msgs = self.retrieve_all_messages() - self.msgs = self.filter_incoming(msgs) - return self.msgs - - def check_doi_exits(self, doi): - """Check for doi in current paper list.""" - doi_needle = Doi().extract_doi(doi) - msg_found = [msg for msg in self.msgs - if Doi().extract_doi(msg.doi) == doi_needle] - return bool(msg_found) - - def get_filtered(self, needle): - """Filter posts by needle.""" - return [m for m in self.msgs - if needle.lower() in m.message.lower() - or needle.lower() in m.reporter.lower()] - - def post(self, message): - """Post message to thread.""" - self.mattermost.posts.create_post({"channel_id": self.channel, - "message": message}) - - -class PrettyButton(urwid.WidgetWrap): - """Prettified urwid Button.""" - def __init__(self, label, on_press=None, user_data=None): - self.label = "" - self.text = urwid.Text("") - self.set_label(label) - self.widget = urwid.AttrMap(self.text, '', 'highlight') - - # use a hidden button for evt handling - self._hidden_btn = urwid.Button(f"hidden {self.label}", - on_press, user_data) - - super().__init__(self.widget) - - def selectable(self): - """Make button selectable.""" - return True - - def keypress(self, *args, **kw): - """Handle keypresses.""" - return self._hidden_btn.keypress(*args, **kw) - - def mouse_event(self, *args, **kw): - """Handle mouse events.""" - return self._hidden_btn.mouse_event(*args, **kw) - - def get_label(self): - """Return current input label.""" - return self.label - - def set_label(self, label): - """Return current input label.""" - self.label = label - self.text.set_text(f"[ {label} ]") +from ui_elements import PrettyButton +from mattermost import Mattermost +from doi import Doi +from bibtex import Bibtex class Papersurfer: diff --git a/papersurfer/ui_elements.py b/papersurfer/ui_elements.py new file mode 100644 index 0000000..5270be1 --- /dev/null +++ b/papersurfer/ui_elements.py @@ -0,0 +1,38 @@ +"""Assorted UI elements.""" +import urwid + + +class PrettyButton(urwid.WidgetWrap): + """Prettified urwid Button.""" + def __init__(self, label, on_press=None, user_data=None): + self.label = "" + self.text = urwid.Text("") + self.set_label(label) + self.widget = urwid.AttrMap(self.text, '', 'highlight') + + # use a hidden button for evt handling + self._hidden_btn = urwid.Button(f"hidden {self.label}", + on_press, user_data) + + super().__init__(self.widget) + + def selectable(self): + """Make button selectable.""" + return True + + def keypress(self, *args, **kw): + """Handle keypresses.""" + return self._hidden_btn.keypress(*args, **kw) + + def mouse_event(self, *args, **kw): + """Handle mouse events.""" + return self._hidden_btn.mouse_event(*args, **kw) + + def get_label(self): + """Return current input label.""" + return self.label + + def set_label(self, label): + """Return current input label.""" + self.label = label + self.text.set_text(f"[ {label} ]") -- GitLab