Skip to content
Snippets Groups Projects
Commit 3e45b565 authored by Timofey Arkhangelskiy's avatar Timofey Arkhangelskiy
Browse files

send_query now works for ANNIS

parent 4903a3e2
No related branches found
No related tags found
No related merge requests found
......@@ -31,13 +31,14 @@ class AnnisQueryParser(QueryParser):
q = {
'query': '',
'query_language': 'AQL',
'corpora': config.annis_corpus_list,
'corpora': [config.annis_corpus_id],
'limit': min(config.max_hits, searchOptions['maximumRecords']),
'order': 'Randomized'
}
termIndexes = self.term_indexes(params)
queryFront = ''
queryTail = ''
params = self.rename_params(params, config)
for param in sorted(params):
print(param)
# For query words:
......@@ -222,19 +223,30 @@ class AnnisQueryParser(QueryParser):
operandR += wordRelParams
return operandL + operandR
# TODO: add real API links and test it
def send_query(self, query, config: ResourceConfig):
"""
Send the translated query to the ANNIS API. Return JSON results
returned by the corpus.
"""
url = config.resource_base_url.strip('/') + '/v1/'
print(url)
response = requests.post(url, json=query, timeout=60)
responseJSON = response.json()
return responseJSON
res = {
'n_hits': -1,
'hit_ids': ''
}
urlCount = config.resource_base_url.strip('/') + '/v1/search/count'
print(urlCount)
response = requests.post(urlCount, json=query, timeout=60)
try:
res['n_hits'] = response.json()['match_count']
except:
pass
if res['n_hits'] > 0:
urlFind = config.resource_base_url.strip('/') + '/v1/search/find'
print(urlFind)
response = requests.post(urlFind, json=query, timeout=60)
res['hit_ids'] = response.content.decode('utf-8').strip('\n').split('\n')
print(res)
return res
if __name__ == '__main__':
pass
......@@ -26,6 +26,7 @@ class ResourceConfig:
self.port = '5000'
self.url_path = '127.0.0.1'
self.resource_base_url = 'http://127.0.0.1'
self.annis_corpus_id = '' # ANNIS-internal ID of the corpus to search in
self.titles = []
self.descriptions = []
self.authors = []
......@@ -40,10 +41,10 @@ class ResourceConfig:
self.adv_supported = False
self.supported_layers = []
self.resources = []
self.annis_corpus_list = []
self.search_lang_id = ''
self.pos_convert = [] # corpus-specific to UD (regexes)
self.pos_convert_reverse = {} # UD to corpus-specific
self.tier_convert_reverse = {} # FCS to corpus-specific tier IDs
self.query_timeout = 60
......
......@@ -201,6 +201,25 @@ class QueryParser:
return [0]
return [t for t in sorted(terms)]
def rename_params(self, params: dict, config: ResourceConfig):
"""
If there are corpus-specific names for common tier IDs
such as "text" or "lemma", make necessary replacements
in the search parameter list.
Return replaced list.
"""
newParams = []
for param in params:
paramNew = copy.deepcopy(param)
if paramNew[0] in config.tier_convert_reverse:
paramNew[0] = config.tier_convert_reverse[paramNew[0]]
elif (paramNew[0] == 'tok'
and config.platform == 'annis'
and 'text' in config.tier_convert_reverse):
paramNew[0] = config.tier_convert_reverse['text']
newParams.append(paramNew)
return newParams
def build_get_string(self, getParams, config: ResourceConfig, searchOptions: dict, withinClause=''):
# Abstract function
raise NotImplementedError()
......
......@@ -203,10 +203,11 @@ def process_search_retrieve(version: SRUVersion,
else:
query = app.qp_annis.translate_advanced(query, config, searchOptions)
print(query)
# res = app.qp_annis.send_query(query, config)
res = app.qp_annis.send_query(query, config)
except Diagnostic as diag:
return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates)
return query['query']
# return query['query']
return res
# records, nHits, diagnostics = app.rp_annis.parse(res, config, searchOptions['x-fcs-dataviews'])
# if any(diag.is_fatal() for diag in diagnostics):
# return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates)
......
......@@ -2,9 +2,14 @@
"host": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)",
"port": "80",
"transport_protocol": "https",
"max_hits": 15,
"max_hits": 17,
"platform": "annis",
"advanced_search_capability": true,
"adv_supported": true,
"resource_base_url": "https://www.sign-lang.uni-hamburg.de/dgs-korpus/ (replace later)"
"resource_base_url": "http://adwhh1.server.uni-hamburg.de:17101",
"annis_corpus_id": "DGS-Corpus-r3-en",
"tier_convert_reverse": {
"text": "Gloss",
"lemma": "GlossType"
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment