From 3e45b565eb585f195189f6d8149ca203b0a95c05 Mon Sep 17 00:00:00 2001
From: Timofey Arkhangelskiy <timarkh@gmail.com>
Date: Mon, 26 Jun 2023 14:03:54 +0200
Subject: [PATCH] send_query now works for ANNIS

---
 common/annis_query_parser.py | 28 ++++++++++++++++++++--------
 common/config.py             |  3 ++-
 common/query_parser.py       | 19 +++++++++++++++++++
 common/views_logic.py        |  5 +++--
 config/annis_test.json       |  9 +++++++--
 5 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/common/annis_query_parser.py b/common/annis_query_parser.py
index 1fc38bf..920fcb3 100644
--- a/common/annis_query_parser.py
+++ b/common/annis_query_parser.py
@@ -31,13 +31,14 @@ class AnnisQueryParser(QueryParser):
         q = {
             'query': '',
             'query_language': 'AQL',
-            'corpora': config.annis_corpus_list,
+            'corpora': [config.annis_corpus_id],
             'limit': min(config.max_hits, searchOptions['maximumRecords']),
             'order': 'Randomized'
         }
         termIndexes = self.term_indexes(params)
         queryFront = ''
         queryTail = ''
+        params = self.rename_params(params, config)
         for param in sorted(params):
             print(param)
             # For query words:
@@ -222,19 +223,30 @@ class AnnisQueryParser(QueryParser):
         operandR += wordRelParams
         return operandL + operandR
 
-    # TODO: add real API links and test it
     def send_query(self, query, config: ResourceConfig):
         """
         Send the translated query to the ANNIS API. Return JSON results
         returned by the corpus.
         """
-        url = config.resource_base_url.strip('/') + '/v1/'
-        print(url)
-        response = requests.post(url, json=query, timeout=60)
-        responseJSON = response.json()
-        return responseJSON
+        res = {
+            'n_hits': -1,
+            'hit_ids': ''
+        }
+        urlCount = config.resource_base_url.strip('/') + '/v1/search/count'
+        print(urlCount)
+        response = requests.post(urlCount, json=query, timeout=60)
+        try:
+            res['n_hits'] = response.json()['match_count']
+        except:
+            pass
+        if res['n_hits'] > 0:
+            urlFind = config.resource_base_url.strip('/') + '/v1/search/find'
+            print(urlFind)
+            response = requests.post(urlFind, json=query, timeout=60)
+            res['hit_ids'] = response.content.decode('utf-8').strip('\n').split('\n')
+        print(res)
+        return res
 
 
 if __name__ == '__main__':
     pass
-
diff --git a/common/config.py b/common/config.py
index fab703d..981ad2f 100644
--- a/common/config.py
+++ b/common/config.py
@@ -26,6 +26,7 @@ class ResourceConfig:
         self.port = '5000'
         self.url_path = '127.0.0.1'
         self.resource_base_url = 'http://127.0.0.1'
+        self.annis_corpus_id = ''   # ANNIS-internal ID of the corpus to search in
         self.titles = []
         self.descriptions = []
         self.authors = []
@@ -40,10 +41,10 @@ class ResourceConfig:
         self.adv_supported = False
         self.supported_layers = []
         self.resources = []
-        self.annis_corpus_list = []
         self.search_lang_id = ''
         self.pos_convert = []           # corpus-specific to UD (regexes)
         self.pos_convert_reverse = {}   # UD to corpus-specific
+        self.tier_convert_reverse = {}  # FCS to corpus-specific tier IDs
 
         self.query_timeout = 60
 
diff --git a/common/query_parser.py b/common/query_parser.py
index 56afd4d..b1382bb 100644
--- a/common/query_parser.py
+++ b/common/query_parser.py
@@ -201,6 +201,25 @@ class QueryParser:
             return [0]
         return [t for t in sorted(terms)]
 
+    def rename_params(self, params: dict, config: ResourceConfig):
+        """
+        If there are corpus-specific names for common tier IDs
+        such as "text" or "lemma", make necessary replacements
+        in the search parameter list.
+        Return replaced list.
+        """
+        newParams = []
+        for param in params:
+            paramNew = copy.deepcopy(param)
+            if paramNew[0] in config.tier_convert_reverse:
+                paramNew[0] = config.tier_convert_reverse[paramNew[0]]
+            elif (paramNew[0] == 'tok'
+                  and config.platform == 'annis'
+                  and 'text' in config.tier_convert_reverse):
+                paramNew[0] = config.tier_convert_reverse['text']
+            newParams.append(paramNew)
+        return newParams
+
     def build_get_string(self, getParams, config: ResourceConfig, searchOptions: dict, withinClause=''):
         # Abstract function
         raise NotImplementedError()
diff --git a/common/views_logic.py b/common/views_logic.py
index e8aff25..3914f2e 100644
--- a/common/views_logic.py
+++ b/common/views_logic.py
@@ -203,10 +203,11 @@ def process_search_retrieve(version: SRUVersion,
             else:
                 query = app.qp_annis.translate_advanced(query, config, searchOptions)
             print(query)
-            # res = app.qp_annis.send_query(query, config)
+            res = app.qp_annis.send_query(query, config)
         except Diagnostic as diag:
             return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates)
-        return query['query']
+        # return query['query']
+        return res
         # records, nHits, diagnostics = app.rp_annis.parse(res, config, searchOptions['x-fcs-dataviews'])
         # if any(diag.is_fatal() for diag in diagnostics):
         #     return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates)
diff --git a/config/annis_test.json b/config/annis_test.json
index b9a15cc..f6e05bc 100644
--- a/config/annis_test.json
+++ b/config/annis_test.json
@@ -2,9 +2,14 @@
 	"host": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)",
 	"port": "80",
 	"transport_protocol": "https",
-	"max_hits": 15,
+	"max_hits": 17,
 	"platform": "annis",
 	"advanced_search_capability": true,
 	"adv_supported": true,
-	"resource_base_url": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)"
+	"resource_base_url": "http://adwhh1.server.uni-hamburg.de:17101",
+	"annis_corpus_id": "DGS-Corpus-r3-en",
+	"tier_convert_reverse": {
+		"text": "Gloss",
+		"lemma": "GlossType"
+	}
 }
\ No newline at end of file
-- 
GitLab