From 5e1c75a632caeed5c53eda2963170a5713142091 Mon Sep 17 00:00:00 2001
From: Johann Jacobsohn <j.jacobsohn@satzmedia.de>
Date: Tue, 4 Aug 2020 09:49:08 +0200
Subject: [PATCH] add experimental export-to-bibtex support

---
 papersurfer.py | 102 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 94 insertions(+), 8 deletions(-)

diff --git a/papersurfer.py b/papersurfer.py
index 691fda4..bd395c4 100644
--- a/papersurfer.py
+++ b/papersurfer.py
@@ -13,9 +13,12 @@ import subprocess
 from dataclasses import dataclass
 import re
 from functools import partial
+import requests
 from mattermostdriver import Driver
 import urwid
 import configargparse
+from pybtex.database import BibliographyData, Entry
+import xml.etree.ElementTree as ET
 
 URL = "mattermost.cen.uni-hamburg.de"
 CHANNEL = "n5myem9yc7fyzb9am7ym5o41ry"
@@ -29,11 +32,60 @@ class PostDTO:
     reporter: str
     doi: str
 
+@dataclass
+class PaperDTO:
+    """"Encapsulate Mattermost Posts."""
+    author: str
+    title: str
+    journal: str
+    year: int
+    doi: str
+
+
+class Bibtex:
+    def entry(self, author, title, journal, year):
+        return BibliographyData({
+             f'{author}{year}': Entry('article', [
+                 ('author', author),
+                 ('title', title),
+                 ('journal', journal),
+                 ('year', year),
+             ])
+        }).to_string('bibtex')
+
+    def entry_from_doi(self, doi):
+        paper = Doi().get_info(doi)
+        return self.entry(paper.author, paper.title, paper.journal, paper.year)
+
+    def bib_from_dois(self, dois):
+        return "\n".join([self.entry_from_doi(doi) for doi in dois])
+
 
 class Doi:
     """Interface w/ the doi.org api"""
     def get_doi_link(self, doi):
-        return f"http://doi.org/$doi"
+        """Assemble doi link."""
+        return f"http://doi.org/{doi}"
+
+    def load_doi_data(self, doi):
+        headers = {
+            'Accept': 'application/vnd.crossref.unixsd+xml',
+        }
+        return requests.get(f'http://dx.doi.org/{doi}', headers=headers).content
+
+    def parse_doi_xml(self, xml):
+        root = ET.fromstring(xml)
+        author = root.find(".//{http://www.crossref.org/xschema/1.1}surname").text  # fixme
+        title = root.find(".//{http://www.crossref.org/xschema/1.1}title").text
+        journal = root.find(".//{http://www.crossref.org/xschema/1.1}full_title").text
+        year = root.find(".//{http://www.crossref.org/xschema/1.1}year").text
+        doi = root.find(".//{http://www.crossref.org/xschema/1.1}year").text
+
+        return PaperDTO(author, title, journal, year, doi)
+
+    def get_info(self, doi):
+        xml = self.load_doi_data(doi)
+        return self.parse_doi_xml(xml)
 
     def extract_doi(self, hay):
         """Parse doi from string, or None if not found.
@@ -41,7 +93,8 @@ class Doi:
         >>> Doi().extract_doi("https://doi.org/10.1093/petrology/egaa077")
         '10.1093/petrology/egaa077'
         """
-        matches = re.compile(r'\b10\.\d{4,9}/[-._;()/:A-Z0-9]+', re.I).search(hay)
+        pattern = r'\b10\.\d{4,9}/[-._;()/:A-Z0-9]+'
+        matches = re.compile(pattern, re.I).search(hay)
         return matches.group() if matches else None
 
 
@@ -57,10 +110,11 @@ class Mattermost:
         self.mattermost.login()
         self.reporters = {}
 
-
     def get_reporter(self, id):
+        """Load user from mattermost api and cache."""
         if id not in self.reporters:
             self.reporters[id] = self.mattermost.users.get_user(id)["username"]
+
         return self.reporters[id]
 
     def retrieve_all_messages(self):
@@ -96,30 +150,35 @@ class Papersurfer:
     def __init__(self, username, password):
         self._screen = urwid.raw_display.Screen()
         self.size = self._screen.get_cols_rows()
+        self.filter = ""
 
         palette = [
             ('I say', 'default,bold', 'default', 'bold'),
             ('needle', 'default, bold, underline', 'default', 'bold')]
         ask = urwid.Edit(('I say', u"Filter?\n"))
         exitbutton = urwid.Button(u'Exit')
+        self.exportbutton = urwid.Button(u'Export filtered list as bibtex')
         div = urwid.Divider(u'-')
 
         self.mtm = Mattermost(username, password)
 
-        body = [self.listItem(paper) for paper in self.mtm.retrieve()]
+        body = [self.list_item(paper) for paper in self.mtm.retrieve()]
         self.listcontent = urwid.SimpleFocusListWalker(body)
 
         paperlist = urwid.BoxAdapter(urwid.ListBox(self.listcontent),
                                      self.size[1] - 5)
-        pile = urwid.Pile([ask, div, paperlist, div, exitbutton])
+        pile = urwid.Pile([ask, div, paperlist, div,
+                           urwid.Columns([exitbutton, self.exportbutton])])
         top = urwid.Filler(pile, valign='middle')
 
         urwid.connect_signal(ask, 'change', self.onchange)
         urwid.connect_signal(exitbutton, 'click', self.on_exit_clicked)
+        urwid.connect_signal(self.exportbutton, 'click', self.on_export_clicked)
 
-        urwid.MainLoop(top, palette).run()
+        self.mainloop = urwid.MainLoop(top, palette)
+        self.mainloop.run()
 
-    def listItem(self, paper, needle=""):
+    def list_item(self, paper, needle=""):
         """Create highlighted text entry."""
         text_items = []
         needle = needle or "ßß"
@@ -143,16 +202,43 @@ class Papersurfer:
         pile = urwid.Pile([title, button_bar, urwid.Divider()])
         return pile
 
+    def updscrn(self):
+        self.mainloop.draw_screen()
+
     def onchange(self, _, needle):
         """Handle filter change."""
+        self.filter = needle
         self.listcontent.clear()
-        self.listcontent.extend([self.listItem(paper, needle)
+        self.listcontent.extend([self.list_item(paper, needle)
                                  for paper in self.mtm.get_filtered(needle)])
 
+    def running_export(self, state):
+        btn = self.exportbutton
+        label = btn.get_label()
+        running_indicator = " (running...)"
+        if state:
+            btn.set_label(label + running_indicator)
+        else:
+            btn.set_label(label.replace(running_indicator, ""))
+        self.updscrn()
+
     def on_exit_clicked(self, button):
         """Handle exitbutton click and exit."""
         raise urwid.ExitMainLoop()
 
+    def on_export_clicked(self, _):
+        """Handle exitbutton click and exit."""
+        self.running_export(True)
+        self.export_to_bibtex()
+        self.running_export(False)
+
+    def export_to_bibtex(self):
+        papers = self.mtm.get_filtered(self.filter)
+        dois = [paper.doi for paper in papers]
+        string = Bibtex().bib_from_dois(dois)
+        with open("export.bib", 'w') as f:
+            f.write(string)
+
     def h_open_discussion(self, post, _):
         """Handle click/enter on discussion button."""
         self.open_discussion(post)
-- 
GitLab