diff --git a/common/query_parser.py b/common/query_parser.py index 1cda936c0962f0c586fdddad5e604850a5855e3c..bd04f89ddb96785d36de0cd8f803c2f18ae4221b 100644 --- a/common/query_parser.py +++ b/common/query_parser.py @@ -10,9 +10,13 @@ class QueryParser: This class contains commonly used methods for initial parsing of a GET query. It does not include platform-specific methods. """ - + # Regexes for simple search rxTermQuery = re.compile('^(?:(?:[^ "]|\\\\")*|"(?:[^"]|\\\\")*")$') + # Regexes for advanced search + rxWithinClause = re.compile(' +within +(s|sentence|u|utterance|p|paragraph|' + 't|turn|text|session) *$') + def __init__(self): pass @@ -91,10 +95,10 @@ class QueryParser: # Abstract function raise NotImplementedError() - def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1): + def translate_simple(self, query: str, config: ResourceConfig, start=0, end=-1): """ - Translate an FCS-QL query into a corpus-specific query (GET query, - JSON Elasticsearch query or whatever). + Translate a simple search (CQL) query into a corpus-specific query + (GET query, JSON Elasticsearch query or whatever). If something is wrong with the query, raise a Diagnostic exception. This is a top-level platform-independent function. It recursively parses the query by locating the hierarchically highest logical operator @@ -110,9 +114,8 @@ class QueryParser: raise Diagnostic(DiagnosticTypes.sru, 27) if self.rxTermQuery.search(query) is not None: return self.build_get_string(self.term_query(query, config), config) - return self.build_get_string(self.translate_fcsql(query, config, - basicSearch=basicSearch, - start=start, end=end), + return self.build_get_string(self.translate_simple(query, config, + start=start, end=end), config) # if query.count('(') != query.count(')'): # return None @@ -129,21 +132,66 @@ class QueryParser: iOpPos, strOp = self.find_operator(query, start, end) if iOpPos == -1: if query[start] == '(' and query[end - 1] == ')': - return self.translate_fcsql(query, config, basicSearch=basicSearch, start=start + 1, end=end - 1) + return self.translate_simple(query, config, start=start + 1, end=end - 1) else: return self.term_query(query[start:end], config) if strOp in ('AND', 'OR'): - resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos) - resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end) + resultLeft = self.translate_simple(query, config, start=start, end=iOpPos) + resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp), + end=end) if len(resultLeft) <= 0 or len(resultRight) <= 0: raise Diagnostic(DiagnosticTypes.sru, 10) return self.binary_bool(strOp, resultLeft, resultRight, config) elif strOp == 'NOT': - resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), - end=end) + resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp), + end=end) return self.not_bool(resultRight, config) return {} + def adv_main_query(self, query: str, config: ResourceConfig, start=0, end=-1): + if len(query) <= 0: + raise Diagnostic(DiagnosticTypes.sru, 27) + if start >= len(query) - 1 or end <= 0: + raise Diagnostic(DiagnosticTypes.sru, 10) + while start < len(query) and query[start] in ' \t\n': + start += 1 + while end > 0 and query[end - 1] in ' \t\n': + end -= 1 + if start >= end: + raise Diagnostic(DiagnosticTypes.sru, 10) + raise NotImplementedError + return {} + + def translate_advanced(self, query: str, config: ResourceConfig): + """ + Translate an advanced search (FCS-QL) query into a corpus-specific query + (GET query, JSON Elasticsearch query or whatever). + If something is wrong with the query, raise a Diagnostic exception. + This is a top-level platform-independent function. It recursively + parses the query by locating the hierarchically highest logical operator + in the current query and then calling a respective lower-level + function, which may be platform-specific. + """ + withinClause = '' + end = len(query) + m = self.rxWithinClause.search(query) + if m is not None: + withinClause = m.group(1) + if withinClause == 's': + withinClause = 'sentence' + elif withinClause == 'u': + withinClause = 'utterance' + elif withinClause == 'p': + withinClause = 'paragraph' + elif withinClause == 't': + withinClause = 'turn' + query = self.rxWithinClause.sub('', query) + end = len(query) + if end == 0: + raise Diagnostic(DiagnosticTypes.sru, 27) + return self.adv_main_query(query, config, start=0, end=end) + + def validate_query(self, operation, version, queryType, query, xFcsEndpointDescription, xFcsContext, xFcsDataviews, xFcsRewritesAllowed): @@ -182,7 +230,7 @@ class QueryParser: # Check version-specific parameters and values if version == SRUVersion.v1_2: - if queryType == QueryType.cql: + if queryType == QueryType.fcs: diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='queryType')) for dv in xFcsDataviews.split(','): dv = dv.strip() diff --git a/common/views_logic.py b/common/views_logic.py index 9bd86ee7d9da26453dbec1c9a2f95bfb230625a0..77584b85d00a8ab9b459dda1fb6648eecda5b02d 100644 --- a/common/views_logic.py +++ b/common/views_logic.py @@ -80,8 +80,11 @@ def process_search_retrieve(version: SRUVersion, """ if config.platform == CorpPlatform.tsakorpus: try: - strGetParams = app.qp_tsakorpus.translate_fcsql(query, config) - print(strGetParams) + if queryType == QueryType.cql: + strGetParams = app.qp_tsakorpus.translate_simple(query, config) + else: + strGetParams = app.qp_tsakorpus.translate_advanced(query, config) + # print(strGetParams) res = app.qp_tsakorpus.send_query(strGetParams, config) except Diagnostic as diag: return fatal_response(Operation.searchRetrieve, version, diagnostics + [diag], request, templates) @@ -98,9 +101,11 @@ def process_search_retrieve(version: SRUVersion, media_type='application/xml') elif config.platform == CorpPlatform.litterae: try: - strGetParams = app.qp_litterae.translate_fcsql(query, config) - print(strGetParams) - # return strGetParams + if queryType == QueryType.cql: + strGetParams = app.qp_litterae.translate_simple(query, config) + else: + strGetParams = app.qp_litterae.translate_simple(query, config) + # print(strGetParams) res = app.qp_litterae.send_query(strGetParams, config) print(res) except Diagnostic as diag: diff --git a/main.py b/main.py index fdb2842388476a5bae76df352b03e4a71e5ec4b5..1796659cc93104305d3c2fa195a6c85f9c7e3ee1 100644 --- a/main.py +++ b/main.py @@ -42,7 +42,7 @@ def endpoint( corpusID: str, operation: Operation = Operation.explain, version: SRUVersion = SRUVersion.v2_0, - queryType: QueryType = QueryType.fcs, + queryType: QueryType = QueryType.cql, query: str = '', xFcsEndpointDescription: str = Query( default='', @@ -82,7 +82,6 @@ def endpoint( xFcsDataviews, xFcsRewritesAllowed) # Now, do the substantial things return process_request(operation, version, queryType, query, searchOptions, config, diagnostics, app, request, templates) - # return {'operation': operation, 'version': version} if __name__ == '__main__':