From 3e45b565eb585f195189f6d8149ca203b0a95c05 Mon Sep 17 00:00:00 2001 From: Timofey Arkhangelskiy <timarkh@gmail.com> Date: Mon, 26 Jun 2023 14:03:54 +0200 Subject: [PATCH] send_query now works for ANNIS --- common/annis_query_parser.py | 28 ++++++++++++++++++++-------- common/config.py | 3 ++- common/query_parser.py | 19 +++++++++++++++++++ common/views_logic.py | 5 +++-- config/annis_test.json | 9 +++++++-- 5 files changed, 51 insertions(+), 13 deletions(-) diff --git a/common/annis_query_parser.py b/common/annis_query_parser.py index 1fc38bf..920fcb3 100644 --- a/common/annis_query_parser.py +++ b/common/annis_query_parser.py @@ -31,13 +31,14 @@ class AnnisQueryParser(QueryParser): q = { 'query': '', 'query_language': 'AQL', - 'corpora': config.annis_corpus_list, + 'corpora': [config.annis_corpus_id], 'limit': min(config.max_hits, searchOptions['maximumRecords']), 'order': 'Randomized' } termIndexes = self.term_indexes(params) queryFront = '' queryTail = '' + params = self.rename_params(params, config) for param in sorted(params): print(param) # For query words: @@ -222,19 +223,30 @@ class AnnisQueryParser(QueryParser): operandR += wordRelParams return operandL + operandR - # TODO: add real API links and test it def send_query(self, query, config: ResourceConfig): """ Send the translated query to the ANNIS API. Return JSON results returned by the corpus. """ - url = config.resource_base_url.strip('/') + '/v1/' - print(url) - response = requests.post(url, json=query, timeout=60) - responseJSON = response.json() - return responseJSON + res = { + 'n_hits': -1, + 'hit_ids': '' + } + urlCount = config.resource_base_url.strip('/') + '/v1/search/count' + print(urlCount) + response = requests.post(urlCount, json=query, timeout=60) + try: + res['n_hits'] = response.json()['match_count'] + except: + pass + if res['n_hits'] > 0: + urlFind = config.resource_base_url.strip('/') + '/v1/search/find' + print(urlFind) + response = requests.post(urlFind, json=query, timeout=60) + res['hit_ids'] = response.content.decode('utf-8').strip('\n').split('\n') + print(res) + return res if __name__ == '__main__': pass - diff --git a/common/config.py b/common/config.py index fab703d..981ad2f 100644 --- a/common/config.py +++ b/common/config.py @@ -26,6 +26,7 @@ class ResourceConfig: self.port = '5000' self.url_path = '127.0.0.1' self.resource_base_url = 'http://127.0.0.1' + self.annis_corpus_id = '' # ANNIS-internal ID of the corpus to search in self.titles = [] self.descriptions = [] self.authors = [] @@ -40,10 +41,10 @@ class ResourceConfig: self.adv_supported = False self.supported_layers = [] self.resources = [] - self.annis_corpus_list = [] self.search_lang_id = '' self.pos_convert = [] # corpus-specific to UD (regexes) self.pos_convert_reverse = {} # UD to corpus-specific + self.tier_convert_reverse = {} # FCS to corpus-specific tier IDs self.query_timeout = 60 diff --git a/common/query_parser.py b/common/query_parser.py index 56afd4d..b1382bb 100644 --- a/common/query_parser.py +++ b/common/query_parser.py @@ -201,6 +201,25 @@ class QueryParser: return [0] return [t for t in sorted(terms)] + def rename_params(self, params: dict, config: ResourceConfig): + """ + If there are corpus-specific names for common tier IDs + such as "text" or "lemma", make necessary replacements + in the search parameter list. + Return replaced list. + """ + newParams = [] + for param in params: + paramNew = copy.deepcopy(param) + if paramNew[0] in config.tier_convert_reverse: + paramNew[0] = config.tier_convert_reverse[paramNew[0]] + elif (paramNew[0] == 'tok' + and config.platform == 'annis' + and 'text' in config.tier_convert_reverse): + paramNew[0] = config.tier_convert_reverse['text'] + newParams.append(paramNew) + return newParams + def build_get_string(self, getParams, config: ResourceConfig, searchOptions: dict, withinClause=''): # Abstract function raise NotImplementedError() diff --git a/common/views_logic.py b/common/views_logic.py index e8aff25..3914f2e 100644 --- a/common/views_logic.py +++ b/common/views_logic.py @@ -203,10 +203,11 @@ def process_search_retrieve(version: SRUVersion, else: query = app.qp_annis.translate_advanced(query, config, searchOptions) print(query) - # res = app.qp_annis.send_query(query, config) + res = app.qp_annis.send_query(query, config) except Diagnostic as diag: return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates) - return query['query'] + # return query['query'] + return res # records, nHits, diagnostics = app.rp_annis.parse(res, config, searchOptions['x-fcs-dataviews']) # if any(diag.is_fatal() for diag in diagnostics): # return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates) diff --git a/config/annis_test.json b/config/annis_test.json index b9a15cc..f6e05bc 100644 --- a/config/annis_test.json +++ b/config/annis_test.json @@ -2,9 +2,14 @@ "host": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)", "port": "80", "transport_protocol": "https", - "max_hits": 15, + "max_hits": 17, "platform": "annis", "advanced_search_capability": true, "adv_supported": true, - "resource_base_url": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)" + "resource_base_url": "http://adwhh1.server.uni-hamburg.de:17101", + "annis_corpus_id": "DGS-Corpus-r3-en", + "tier_convert_reverse": { + "text": "Gloss", + "lemma": "GlossType" + } } \ No newline at end of file -- GitLab