diff --git a/papersurfer/dtos.py b/papersurfer/dtos.py index fd2481118fadd92b16989e37c2cd60d30a192bcc..ce8ff499f184767ac4396c42551afe6013fac245 100644 --- a/papersurfer/dtos.py +++ b/papersurfer/dtos.py @@ -1,4 +1,5 @@ """Data transfer objects.""" +# pylint: disable=too-many-instance-attributes from dataclasses import dataclass @@ -6,6 +7,7 @@ from dataclasses import dataclass class PostDTO: """Encapsulate Mattermost Posts.""" id: str + create_at: int message: str reporter: str doi: str diff --git a/papersurfer/mattermost.py b/papersurfer/mattermost.py index 140ffc0f67f011bff75cf5d923d4830d9697f82f..290a187c0b6cb6ecf28bd66950f293c92b82d922 100644 --- a/papersurfer/mattermost.py +++ b/papersurfer/mattermost.py @@ -1,5 +1,7 @@ """Simplified mattermost interface.""" import requests +import logging +import time import mattermostdriver from .exceptions import ConfigError from .dtos import PostDTO @@ -65,23 +67,44 @@ class Mattermost: return self._reporters[userid] - def _retrieve_all_posts(self): + def _retrieve_all_posts(self, since): """Retrieve all posts from mattermost, unfiltered for papers.""" - posts = self._mattermost.posts.get_posts_for_channel(self._channel) - return [PostDTO(id=m['id'], message=m['message'], + posts = [] + params = {"since": since} if since else {} + + start = time.perf_counter() + + while True: + resp = self._mattermost.posts.get_posts_for_channel( + self._channel, params) + posts.extend(resp['posts'].values()) + if resp["prev_post_id"]: + params["before"] = resp["prev_post_id"] + else: + break + + postDTOs = [PostDTO( + id=m['id'], + create_at=m['create_at'], + message=m['message'], reporter=self._get_reporter(m['user_id']), doi=Doi().extract_doi(m['message']),) - for m in posts['posts'].values()] + for m in posts] + + logging.debug("retrieving and processing %i mattermost posts took %f", + len(posts), time.perf_counter() - start) + + return postDTOs def _filter_incoming(self, posts): """Filter posts from mattermost to only papers.""" return [p for p in posts if "doi" in p.message] - def retrieve(self): + def retrieve(self, since): """Retrieve papers from mattermost channel.""" if not self._loggedin: self._login() - posts = self._retrieve_all_posts() + posts = self._retrieve_all_posts(since) self.posts = self._filter_incoming(posts) return self.posts diff --git a/papersurfer/papersurfer.py b/papersurfer/papersurfer.py index 48a97c00d5e1a05f8a87903f22ec4e099c3b84a8..08f2549f9987d378722adc5f29382a0e70f81fd1 100644 --- a/papersurfer/papersurfer.py +++ b/papersurfer/papersurfer.py @@ -15,6 +15,7 @@ import time import os import sys import re +import logging import urwid import configargparse from tinydb import TinyDB, Query @@ -56,7 +57,8 @@ class Papersurfer: def load(self): """Load data from mattermost and save to storage.""" self._connect_db() - posts = self.mattermost.retrieve() + latest = self.get_latest_post() + posts = self.mattermost.retrieve(latest["create_at"] if latest else None) self._update_db(posts=posts) def _connect_db(self): @@ -66,6 +68,14 @@ class Papersurfer: if not self.db_papers: self.db_papers = TinyDB(f"{self.db_path}/{self.db_file_papers}") + def get_latest_post(self): + """Find the newest post and return.""" + posts = self.db_posts.all() + if posts: + posts.sort(reverse=True, key=lambda p: p["create_at"]) + + return posts[0] if len(posts) else [] + def _update_db(self, posts=[], papers=[]): """"Merge new data into database.""" self._upsert_multiple(posts, self.db_posts) @@ -84,7 +94,7 @@ class Papersurfer: def get_posts(self): """Get all posts in storage.""" self._connect_db() - return [PostDTO(p["id"], p["message"], p["reporter"], p["doi"]) + return [PostDTO(p["id"], p["create_at"], p["message"], p["reporter"], p["doi"]) for p in self.db_posts.all()] def get_posts_filtered(self, needle=None): @@ -564,6 +574,9 @@ def just_bibtex(url, channel, username, password): def main(): """Run main program.""" opt = parse_args() + + logging.basicConfig(filename='papersurfer.log', level=logging.DEBUG) + if opt.dump_posts: just_papers(opt.url, opt.channel, opt.username, opt.password) if opt.dump_bibtex: