Skip to main content
Sign in
Snippets Groups Projects
Commit e4bdc6b6 authored by Johann Jacobsohn's avatar Johann Jacobsohn
Browse files

retrieve all and only new mattermost posts; introduce logging

parent fe3dca8e
No related branches found
No related tags found
No related merge requests found
"""Data transfer objects."""
# pylint: disable=too-many-instance-attributes
from dataclasses import dataclass
......@@ -6,6 +7,7 @@ from dataclasses import dataclass
class PostDTO:
"""Encapsulate Mattermost Posts."""
id: str
create_at: int
message: str
reporter: str
doi: str
......
......
"""Simplified mattermost interface."""
import requests
import logging
import time
import mattermostdriver
from .exceptions import ConfigError
from .dtos import PostDTO
......@@ -65,23 +67,44 @@ class Mattermost:
return self._reporters[userid]
def _retrieve_all_posts(self):
def _retrieve_all_posts(self, since):
"""Retrieve all posts from mattermost, unfiltered for papers."""
posts = self._mattermost.posts.get_posts_for_channel(self._channel)
return [PostDTO(id=m['id'], message=m['message'],
posts = []
params = {"since": since} if since else {}
start = time.perf_counter()
while True:
resp = self._mattermost.posts.get_posts_for_channel(
self._channel, params)
posts.extend(resp['posts'].values())
if resp["prev_post_id"]:
params["before"] = resp["prev_post_id"]
else:
break
postDTOs = [PostDTO(
id=m['id'],
create_at=m['create_at'],
message=m['message'],
reporter=self._get_reporter(m['user_id']),
doi=Doi().extract_doi(m['message']),)
for m in posts['posts'].values()]
for m in posts]
logging.debug("retrieving and processing %i mattermost posts took %f",
len(posts), time.perf_counter() - start)
return postDTOs
def _filter_incoming(self, posts):
"""Filter posts from mattermost to only papers."""
return [p for p in posts if "doi" in p.message]
def retrieve(self):
def retrieve(self, since):
"""Retrieve papers from mattermost channel."""
if not self._loggedin:
self._login()
posts = self._retrieve_all_posts()
posts = self._retrieve_all_posts(since)
self.posts = self._filter_incoming(posts)
return self.posts
......
......
......@@ -15,6 +15,7 @@ import time
import os
import sys
import re
import logging
import urwid
import configargparse
from tinydb import TinyDB, Query
......@@ -56,7 +57,8 @@ class Papersurfer:
def load(self):
"""Load data from mattermost and save to storage."""
self._connect_db()
posts = self.mattermost.retrieve()
latest = self.get_latest_post()
posts = self.mattermost.retrieve(latest["create_at"] if latest else None)
self._update_db(posts=posts)
def _connect_db(self):
......@@ -66,6 +68,14 @@ class Papersurfer:
if not self.db_papers:
self.db_papers = TinyDB(f"{self.db_path}/{self.db_file_papers}")
def get_latest_post(self):
"""Find the newest post and return."""
posts = self.db_posts.all()
if posts:
posts.sort(reverse=True, key=lambda p: p["create_at"])
return posts[0] if len(posts) else []
def _update_db(self, posts=[], papers=[]):
""""Merge new data into database."""
self._upsert_multiple(posts, self.db_posts)
......@@ -84,7 +94,7 @@ class Papersurfer:
def get_posts(self):
"""Get all posts in storage."""
self._connect_db()
return [PostDTO(p["id"], p["message"], p["reporter"], p["doi"])
return [PostDTO(p["id"], p["create_at"], p["message"], p["reporter"], p["doi"])
for p in self.db_posts.all()]
def get_posts_filtered(self, needle=None):
......@@ -564,6 +574,9 @@ def just_bibtex(url, channel, username, password):
def main():
"""Run main program."""
opt = parse_args()
logging.basicConfig(filename='papersurfer.log', level=logging.DEBUG)
if opt.dump_posts:
just_papers(opt.url, opt.channel, opt.username, opt.password)
if opt.dump_bibtex:
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment