Skip to content
Snippets Groups Projects
Commit 7e5af69b authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Test and fix communication with Tsakorpus

parent d8f76456
Branches
No related tags found
No related merge requests found
......@@ -19,11 +19,12 @@ class ResourceConfig:
rxExt = re.compile('\\.[^.]*$')
def __init__(self, fnameConfig=None):
self.platform = CorpPlatform.annis
self.platform = 'tsakorpus'
self.transport_protocol = 'https'
self.host = '127.0.0.1'
self.port = '5000'
self.url_path = '127.0.0.1'
self.resource_base_url = 'http://127.0.0.1'
self.titles = []
self.descriptions = []
self.authors = []
......@@ -38,6 +39,7 @@ class ResourceConfig:
self.adv_supported = False
self.supported_layers = []
self.resources = []
self.search_lang_id = ''
self.query_timeout = 60
......
from urllib.parse import quote
import re
import json
import urllib.request
from .query_parser import QueryParser
from .config import ResourceConfig
from .diagnostics import Diagnostic, DiagnosticTypes
......@@ -16,15 +18,19 @@ class TsakorpusQueryParser(QueryParser):
Build a GET string (everything after the ?) from a description
of the GET parameters in the getParams list.
"""
nWords = len(self.term_indexes(getParams))
termIndexes = self.term_indexes(getParams)
nWords = len(termIndexes)
s = 'n_words=' + str(nWords)
for param in getParams:
if type(param[1]) is list:
index = '_'.join(str(v) for v in param[1])
if param[0] in ('word_rel_', 'word_dist_from_', 'word_dist_to_'):
sfx = '_0'
else:
index = str(param[1])
s += '&' + param[0] + index + '=' + quote(param[2])
sfx = ''
s += '&' + param[0] + str(param[1]) + sfx + '=' + quote(str(param[2]))
for i in termIndexes:
s += '&lang' + str(i) + '=' + config.search_lang_id
s += '&page_size=' + str(config.max_hits)
s += '&precise=on&sort=random&response_format=json&distance_strict=on'
return s
def term_query(self, query, config):
......@@ -42,9 +48,10 @@ class TsakorpusQueryParser(QueryParser):
iTerm += 1
getParams.append(['wf', iTerm, term])
if iTerm >= 2:
getParams.append(['word_rel_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_to_', [iTerm-1, iTerm], '1'])
# A maximum of one distance constraint per term
getParams.append(['word_rel_', iTerm, iTerm-1])
getParams.append(['word_dist_from_', iTerm, '1'])
getParams.append(['word_dist_to_', iTerm, '1'])
return getParams
......@@ -70,10 +77,10 @@ class TsakorpusQueryParser(QueryParser):
"""
getParamsShifted = []
for param in getParams:
if type(param[1]) is int:
newParam = (param[0], param[1] + shift, param[2])
if type(param[2]) is int:
newParam = (param[0], param[1] + shift, param[2] + shift)
else:
newParam = (param[0], [i + shift for i in param[1]], param[2])
newParam = (param[0], param[1] + shift, param[2])
getParamsShifted.append(newParam)
return getParamsShifted
......@@ -152,6 +159,18 @@ class TsakorpusQueryParser(QueryParser):
return self.not_bool(resultRight)
return {}
def send_query(self, strGetParams: str, config: ResourceConfig):
"""
Send the translated query to the Tsakorpus instance. Return JSON results
returned by the corpus.
"""
url = config.resource_base_url.strip('/') + '/search_sent?' + strGetParams
response = urllib.request.urlopen(url)
data = response.read()
encoding = response.info().get_content_charset('utf-8')
responseJSON = json.loads(data.decode(encoding))
return responseJSON
if __name__ == '__main__':
pass
......
{
"host": "0.0.0.0",
"port": "80",
"max_hits": 20,
"platform": "tsakorpus"
"max_hits": 15,
"platform": "tsakorpus",
"resource_base_url": "http://127.0.0.1:7342",
"search_lang_id": "beserman"
}
\ No newline at end of file
......@@ -67,7 +67,9 @@ def endpoint(
if operation == Operation.searchRetrieve:
if config.platform == CorpPlatform.tsakorpus:
try:
res = app.qp_tsakorpus.translate_fcsql(query, config)
strGetParams = app.qp_tsakorpus.translate_fcsql(query, config)
print(strGetParams)
res = app.qp_tsakorpus.send_query(strGetParams, config)
except Diagnostic as diag:
print('diag', str(diag))
return Response(content=str(diag), media_type='application/xml')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment