Skip to content
Snippets Groups Projects
Commit e4bdc6b6 authored by Johann Jacobsohn's avatar Johann Jacobsohn
Browse files

retrieve all and only new mattermost posts; introduce logging

parent fe3dca8e
Branches
No related tags found
No related merge requests found
"""Data transfer objects.""" """Data transfer objects."""
# pylint: disable=too-many-instance-attributes
from dataclasses import dataclass from dataclasses import dataclass
...@@ -6,6 +7,7 @@ from dataclasses import dataclass ...@@ -6,6 +7,7 @@ from dataclasses import dataclass
class PostDTO: class PostDTO:
"""Encapsulate Mattermost Posts.""" """Encapsulate Mattermost Posts."""
id: str id: str
create_at: int
message: str message: str
reporter: str reporter: str
doi: str doi: str
......
"""Simplified mattermost interface.""" """Simplified mattermost interface."""
import requests import requests
import logging
import time
import mattermostdriver import mattermostdriver
from .exceptions import ConfigError from .exceptions import ConfigError
from .dtos import PostDTO from .dtos import PostDTO
...@@ -65,23 +67,44 @@ class Mattermost: ...@@ -65,23 +67,44 @@ class Mattermost:
return self._reporters[userid] return self._reporters[userid]
def _retrieve_all_posts(self): def _retrieve_all_posts(self, since):
"""Retrieve all posts from mattermost, unfiltered for papers.""" """Retrieve all posts from mattermost, unfiltered for papers."""
posts = self._mattermost.posts.get_posts_for_channel(self._channel) posts = []
return [PostDTO(id=m['id'], message=m['message'], params = {"since": since} if since else {}
start = time.perf_counter()
while True:
resp = self._mattermost.posts.get_posts_for_channel(
self._channel, params)
posts.extend(resp['posts'].values())
if resp["prev_post_id"]:
params["before"] = resp["prev_post_id"]
else:
break
postDTOs = [PostDTO(
id=m['id'],
create_at=m['create_at'],
message=m['message'],
reporter=self._get_reporter(m['user_id']), reporter=self._get_reporter(m['user_id']),
doi=Doi().extract_doi(m['message']),) doi=Doi().extract_doi(m['message']),)
for m in posts['posts'].values()] for m in posts]
logging.debug("retrieving and processing %i mattermost posts took %f",
len(posts), time.perf_counter() - start)
return postDTOs
def _filter_incoming(self, posts): def _filter_incoming(self, posts):
"""Filter posts from mattermost to only papers.""" """Filter posts from mattermost to only papers."""
return [p for p in posts if "doi" in p.message] return [p for p in posts if "doi" in p.message]
def retrieve(self): def retrieve(self, since):
"""Retrieve papers from mattermost channel.""" """Retrieve papers from mattermost channel."""
if not self._loggedin: if not self._loggedin:
self._login() self._login()
posts = self._retrieve_all_posts() posts = self._retrieve_all_posts(since)
self.posts = self._filter_incoming(posts) self.posts = self._filter_incoming(posts)
return self.posts return self.posts
......
...@@ -15,6 +15,7 @@ import time ...@@ -15,6 +15,7 @@ import time
import os import os
import sys import sys
import re import re
import logging
import urwid import urwid
import configargparse import configargparse
from tinydb import TinyDB, Query from tinydb import TinyDB, Query
...@@ -56,7 +57,8 @@ class Papersurfer: ...@@ -56,7 +57,8 @@ class Papersurfer:
def load(self): def load(self):
"""Load data from mattermost and save to storage.""" """Load data from mattermost and save to storage."""
self._connect_db() self._connect_db()
posts = self.mattermost.retrieve() latest = self.get_latest_post()
posts = self.mattermost.retrieve(latest["create_at"] if latest else None)
self._update_db(posts=posts) self._update_db(posts=posts)
def _connect_db(self): def _connect_db(self):
...@@ -66,6 +68,14 @@ class Papersurfer: ...@@ -66,6 +68,14 @@ class Papersurfer:
if not self.db_papers: if not self.db_papers:
self.db_papers = TinyDB(f"{self.db_path}/{self.db_file_papers}") self.db_papers = TinyDB(f"{self.db_path}/{self.db_file_papers}")
def get_latest_post(self):
"""Find the newest post and return."""
posts = self.db_posts.all()
if posts:
posts.sort(reverse=True, key=lambda p: p["create_at"])
return posts[0] if len(posts) else []
def _update_db(self, posts=[], papers=[]): def _update_db(self, posts=[], papers=[]):
""""Merge new data into database.""" """"Merge new data into database."""
self._upsert_multiple(posts, self.db_posts) self._upsert_multiple(posts, self.db_posts)
...@@ -84,7 +94,7 @@ class Papersurfer: ...@@ -84,7 +94,7 @@ class Papersurfer:
def get_posts(self): def get_posts(self):
"""Get all posts in storage.""" """Get all posts in storage."""
self._connect_db() self._connect_db()
return [PostDTO(p["id"], p["message"], p["reporter"], p["doi"]) return [PostDTO(p["id"], p["create_at"], p["message"], p["reporter"], p["doi"])
for p in self.db_posts.all()] for p in self.db_posts.all()]
def get_posts_filtered(self, needle=None): def get_posts_filtered(self, needle=None):
...@@ -564,6 +574,9 @@ def just_bibtex(url, channel, username, password): ...@@ -564,6 +574,9 @@ def just_bibtex(url, channel, username, password):
def main(): def main():
"""Run main program.""" """Run main program."""
opt = parse_args() opt = parse_args()
logging.basicConfig(filename='papersurfer.log', level=logging.DEBUG)
if opt.dump_posts: if opt.dump_posts:
just_papers(opt.url, opt.channel, opt.username, opt.password) just_papers(opt.url, opt.channel, opt.username, opt.password)
if opt.dump_bibtex: if opt.dump_bibtex:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment