Skip to content
Snippets Groups Projects
Commit 209a749e authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Start advanced search processing functions

parent 628d8a55
No related branches found
No related tags found
No related merge requests found
......@@ -10,9 +10,13 @@ class QueryParser:
This class contains commonly used methods for initial parsing of a GET
query. It does not include platform-specific methods.
"""
# Regexes for simple search
rxTermQuery = re.compile('^(?:(?:[^ "]|\\\\")*|"(?:[^"]|\\\\")*")$')
# Regexes for advanced search
rxWithinClause = re.compile(' +within +(s|sentence|u|utterance|p|paragraph|'
't|turn|text|session) *$')
def __init__(self):
pass
......@@ -91,10 +95,10 @@ class QueryParser:
# Abstract function
raise NotImplementedError()
def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1):
def translate_simple(self, query: str, config: ResourceConfig, start=0, end=-1):
"""
Translate an FCS-QL query into a corpus-specific query (GET query,
JSON Elasticsearch query or whatever).
Translate a simple search (CQL) query into a corpus-specific query
(GET query, JSON Elasticsearch query or whatever).
If something is wrong with the query, raise a Diagnostic exception.
This is a top-level platform-independent function. It recursively
parses the query by locating the hierarchically highest logical operator
......@@ -110,8 +114,7 @@ class QueryParser:
raise Diagnostic(DiagnosticTypes.sru, 27)
if self.rxTermQuery.search(query) is not None:
return self.build_get_string(self.term_query(query, config), config)
return self.build_get_string(self.translate_fcsql(query, config,
basicSearch=basicSearch,
return self.build_get_string(self.translate_simple(query, config,
start=start, end=end),
config)
# if query.count('(') != query.count(')'):
......@@ -129,21 +132,66 @@ class QueryParser:
iOpPos, strOp = self.find_operator(query, start, end)
if iOpPos == -1:
if query[start] == '(' and query[end - 1] == ')':
return self.translate_fcsql(query, config, basicSearch=basicSearch, start=start + 1, end=end - 1)
return self.translate_simple(query, config, start=start + 1, end=end - 1)
else:
return self.term_query(query[start:end], config)
if strOp in ('AND', 'OR'):
resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos)
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end)
resultLeft = self.translate_simple(query, config, start=start, end=iOpPos)
resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp),
end=end)
if len(resultLeft) <= 0 or len(resultRight) <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
return self.binary_bool(strOp, resultLeft, resultRight, config)
elif strOp == 'NOT':
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp),
resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp),
end=end)
return self.not_bool(resultRight, config)
return {}
def adv_main_query(self, query: str, config: ResourceConfig, start=0, end=-1):
if len(query) <= 0:
raise Diagnostic(DiagnosticTypes.sru, 27)
if start >= len(query) - 1 or end <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
while start < len(query) and query[start] in ' \t\n':
start += 1
while end > 0 and query[end - 1] in ' \t\n':
end -= 1
if start >= end:
raise Diagnostic(DiagnosticTypes.sru, 10)
raise NotImplementedError
return {}
def translate_advanced(self, query: str, config: ResourceConfig):
"""
Translate an advanced search (FCS-QL) query into a corpus-specific query
(GET query, JSON Elasticsearch query or whatever).
If something is wrong with the query, raise a Diagnostic exception.
This is a top-level platform-independent function. It recursively
parses the query by locating the hierarchically highest logical operator
in the current query and then calling a respective lower-level
function, which may be platform-specific.
"""
withinClause = ''
end = len(query)
m = self.rxWithinClause.search(query)
if m is not None:
withinClause = m.group(1)
if withinClause == 's':
withinClause = 'sentence'
elif withinClause == 'u':
withinClause = 'utterance'
elif withinClause == 'p':
withinClause = 'paragraph'
elif withinClause == 't':
withinClause = 'turn'
query = self.rxWithinClause.sub('', query)
end = len(query)
if end == 0:
raise Diagnostic(DiagnosticTypes.sru, 27)
return self.adv_main_query(query, config, start=0, end=end)
def validate_query(self, operation, version, queryType, query,
xFcsEndpointDescription, xFcsContext,
xFcsDataviews, xFcsRewritesAllowed):
......@@ -182,7 +230,7 @@ class QueryParser:
# Check version-specific parameters and values
if version == SRUVersion.v1_2:
if queryType == QueryType.cql:
if queryType == QueryType.fcs:
diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='queryType'))
for dv in xFcsDataviews.split(','):
dv = dv.strip()
......
......@@ -80,8 +80,11 @@ def process_search_retrieve(version: SRUVersion,
"""
if config.platform == CorpPlatform.tsakorpus:
try:
strGetParams = app.qp_tsakorpus.translate_fcsql(query, config)
print(strGetParams)
if queryType == QueryType.cql:
strGetParams = app.qp_tsakorpus.translate_simple(query, config)
else:
strGetParams = app.qp_tsakorpus.translate_advanced(query, config)
# print(strGetParams)
res = app.qp_tsakorpus.send_query(strGetParams, config)
except Diagnostic as diag:
return fatal_response(Operation.searchRetrieve, version, diagnostics + [diag], request, templates)
......@@ -98,9 +101,11 @@ def process_search_retrieve(version: SRUVersion,
media_type='application/xml')
elif config.platform == CorpPlatform.litterae:
try:
strGetParams = app.qp_litterae.translate_fcsql(query, config)
print(strGetParams)
# return strGetParams
if queryType == QueryType.cql:
strGetParams = app.qp_litterae.translate_simple(query, config)
else:
strGetParams = app.qp_litterae.translate_simple(query, config)
# print(strGetParams)
res = app.qp_litterae.send_query(strGetParams, config)
print(res)
except Diagnostic as diag:
......
......@@ -42,7 +42,7 @@ def endpoint(
corpusID: str,
operation: Operation = Operation.explain,
version: SRUVersion = SRUVersion.v2_0,
queryType: QueryType = QueryType.fcs,
queryType: QueryType = QueryType.cql,
query: str = '',
xFcsEndpointDescription: str = Query(
default='',
......@@ -82,7 +82,6 @@ def endpoint(
xFcsDataviews, xFcsRewritesAllowed)
# Now, do the substantial things
return process_request(operation, version, queryType, query, searchOptions, config, diagnostics, app, request, templates)
# return {'operation': operation, 'version': version}
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment