diff --git a/common/config.py b/common/config.py index 688a145ad7456e4f531e25d9d8446e89f7ce0745..df1191c6075182b58dea114dc0a2a8ac4698410d 100644 --- a/common/config.py +++ b/common/config.py @@ -19,11 +19,12 @@ class ResourceConfig: rxExt = re.compile('\\.[^.]*$') def __init__(self, fnameConfig=None): - self.platform = CorpPlatform.annis + self.platform = 'tsakorpus' self.transport_protocol = 'https' self.host = '127.0.0.1' self.port = '5000' self.url_path = '127.0.0.1' + self.resource_base_url = 'http://127.0.0.1' self.titles = [] self.descriptions = [] self.authors = [] @@ -38,6 +39,7 @@ class ResourceConfig: self.adv_supported = False self.supported_layers = [] self.resources = [] + self.search_lang_id = '' self.query_timeout = 60 diff --git a/common/tsakorpus_query_parser.py b/common/tsakorpus_query_parser.py index c1f6c8718c04bd5e58962c942f027a0b480768f3..da60360fcbdbb209dc6c0a9ff3b0d420728f6138 100644 --- a/common/tsakorpus_query_parser.py +++ b/common/tsakorpus_query_parser.py @@ -1,5 +1,7 @@ from urllib.parse import quote import re +import json +import urllib.request from .query_parser import QueryParser from .config import ResourceConfig from .diagnostics import Diagnostic, DiagnosticTypes @@ -16,15 +18,19 @@ class TsakorpusQueryParser(QueryParser): Build a GET string (everything after the ?) from a description of the GET parameters in the getParams list. """ - nWords = len(self.term_indexes(getParams)) + termIndexes = self.term_indexes(getParams) + nWords = len(termIndexes) s = 'n_words=' + str(nWords) for param in getParams: - if type(param[1]) is list: - index = '_'.join(str(v) for v in param[1]) + if param[0] in ('word_rel_', 'word_dist_from_', 'word_dist_to_'): + sfx = '_0' else: - index = str(param[1]) - s += '&' + param[0] + index + '=' + quote(param[2]) + sfx = '' + s += '&' + param[0] + str(param[1]) + sfx + '=' + quote(str(param[2])) + for i in termIndexes: + s += '&lang' + str(i) + '=' + config.search_lang_id s += '&page_size=' + str(config.max_hits) + s += '&precise=on&sort=random&response_format=json&distance_strict=on' return s def term_query(self, query, config): @@ -42,9 +48,10 @@ class TsakorpusQueryParser(QueryParser): iTerm += 1 getParams.append(['wf', iTerm, term]) if iTerm >= 2: - getParams.append(['word_rel_', [iTerm-1, iTerm], '1']) - getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1']) - getParams.append(['word_dist_to_', [iTerm-1, iTerm], '1']) + # A maximum of one distance constraint per term + getParams.append(['word_rel_', iTerm, iTerm-1]) + getParams.append(['word_dist_from_', iTerm, '1']) + getParams.append(['word_dist_to_', iTerm, '1']) return getParams @@ -70,10 +77,10 @@ class TsakorpusQueryParser(QueryParser): """ getParamsShifted = [] for param in getParams: - if type(param[1]) is int: - newParam = (param[0], param[1] + shift, param[2]) + if type(param[2]) is int: + newParam = (param[0], param[1] + shift, param[2] + shift) else: - newParam = (param[0], [i + shift for i in param[1]], param[2]) + newParam = (param[0], param[1] + shift, param[2]) getParamsShifted.append(newParam) return getParamsShifted @@ -152,6 +159,18 @@ class TsakorpusQueryParser(QueryParser): return self.not_bool(resultRight) return {} + def send_query(self, strGetParams: str, config: ResourceConfig): + """ + Send the translated query to the Tsakorpus instance. Return JSON results + returned by the corpus. + """ + url = config.resource_base_url.strip('/') + '/search_sent?' + strGetParams + response = urllib.request.urlopen(url) + data = response.read() + encoding = response.info().get_content_charset('utf-8') + responseJSON = json.loads(data.decode(encoding)) + return responseJSON + if __name__ == '__main__': pass diff --git a/config/test.json b/config/test.json index 31ad20d1da233eb4d6edcb686b82f41584ea33dc..cc8503e4eddae711fd78cbb57ae1c8cb8a963a4a 100644 --- a/config/test.json +++ b/config/test.json @@ -1,6 +1,8 @@ { "host": "0.0.0.0", "port": "80", - "max_hits": 20, - "platform": "tsakorpus" + "max_hits": 15, + "platform": "tsakorpus", + "resource_base_url": "http://127.0.0.1:7342", + "search_lang_id": "beserman" } \ No newline at end of file diff --git a/main.py b/main.py index c28ff40b5ad47065d1fe0fb666ddfcfeb1c3186e..37c7634ffe882771ba109aef9e7a6249ba1359f0 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,9 @@ def endpoint( if operation == Operation.searchRetrieve: if config.platform == CorpPlatform.tsakorpus: try: - res = app.qp_tsakorpus.translate_fcsql(query, config) + strGetParams = app.qp_tsakorpus.translate_fcsql(query, config) + print(strGetParams) + res = app.qp_tsakorpus.send_query(strGetParams, config) except Diagnostic as diag: print('diag', str(diag)) return Response(content=str(diag), media_type='application/xml')