Skip to content
Snippets Groups Projects
Commit cd9724df authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Make Diagnostic an Exception subclass; work on Tsakorpus FCS-QL

parent 5bc4ddfe
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ from .enums import * ...@@ -2,7 +2,7 @@ from .enums import *
import jinja2 import jinja2
class Diagnostic: class Diagnostic(Exception):
""" """
Contains methods for issuing diagnostic messages (fatal or non-fatal) Contains methods for issuing diagnostic messages (fatal or non-fatal)
as per FCS specifications. as per FCS specifications.
...@@ -12,10 +12,12 @@ class Diagnostic: ...@@ -12,10 +12,12 @@ class Diagnostic:
""" """
fatalFCSDiagnostics = {3, 10, 11} # FCS specifications, 4.2 fatalFCSDiagnostics = {3, 10, 11} # FCS specifications, 4.2
fatalSRUDiagnostics = {8, 10, 27, 235} # A subset actually used by this endpoint fatalSRUDiagnostics = {8, 10, 27, 37, 47, 48, 235} # A subset actually used by this endpoint
stdMessages = { stdMessages = {
(DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.' (DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.',
(DiagnosticTypes.sru, 27): 'The query should not be empty.',
(DiagnosticTypes.sru, 37): 'Unsupported boolean operator.'
} }
def __init__(self, diagType: DiagnosticTypes, diagID: int, def __init__(self, diagType: DiagnosticTypes, diagID: int,
...@@ -55,7 +57,7 @@ class Diagnostic: ...@@ -55,7 +57,7 @@ class Diagnostic:
return 'info:srw/diagnostic/1/' + str(self.diagID) return 'info:srw/diagnostic/1/' + str(self.diagID)
return '' return ''
def __repr__(self): def __str__(self):
""" """
Return the XML version of this diagnostic. Return the XML version of this diagnostic.
""" """
...@@ -65,6 +67,9 @@ class Diagnostic: ...@@ -65,6 +67,9 @@ class Diagnostic:
message=self.message) message=self.message)
return xmlText.strip() return xmlText.strip()
def __repr__(self):
return str(self)
if __name__ == '__main__': if __name__ == '__main__':
# Test # Test
......
import copy
import re import re
from .query_parser import QueryParser from .query_parser import QueryParser
from .config import ResourceConfig from .config import ResourceConfig
...@@ -8,6 +9,8 @@ class TsakorpusQueryParser(QueryParser): ...@@ -8,6 +9,8 @@ class TsakorpusQueryParser(QueryParser):
Parses search queries for Tsakorpus-based corpora. Parses search queries for Tsakorpus-based corpora.
""" """
rxTsakorpusBool = re.compile('[()|,]')
def term_query(self, query, config): def term_query(self, query, config):
""" """
Return list of query parameters for one term or sequence of terms. Return list of query parameters for one term or sequence of terms.
...@@ -21,7 +24,7 @@ class TsakorpusQueryParser(QueryParser): ...@@ -21,7 +24,7 @@ class TsakorpusQueryParser(QueryParser):
for term in query.split(' '): for term in query.split(' '):
if len(term) > 0: if len(term) > 0:
iTerm += 1 iTerm += 1
getParams.append(['w', iTerm, term]) getParams.append(['wf', iTerm, term])
if iTerm >= 2: if iTerm >= 2:
getParams.append(['word_rel_', [iTerm-1, iTerm], '1']) getParams.append(['word_rel_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1']) getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1'])
...@@ -29,25 +32,88 @@ class TsakorpusQueryParser(QueryParser): ...@@ -29,25 +32,88 @@ class TsakorpusQueryParser(QueryParser):
return getParams return getParams
def term_indexes(self, getParams):
"""
Find all search term indexes used in the GET parameters
specified by getParams list. Return list of integers (1-based).
"""
terms = set()
for param in getParams:
if type(param[1]) is int:
terms.add(param[1])
elif type(param[1]) is list:
for t in param[1]:
terms.add(t)
return [t for t in sorted(terms)]
def shift_term_indexes(self, getParams, shift):
"""
Increase all search term indexes in the GET parameters
specified by getParams by shift.
"""
getParamsShifted = []
for param in getParams:
if type(param[1]) is int:
newParam = (param[0], param[1] + shift, param[2])
else:
newParam = (param[0], [i + shift for i in param[1]], param[2])
getParamsShifted.append(newParam)
return getParamsShifted
def binary_bool(self, strOp, operandL, operandR, config):
if len(operandL) <= 0 or len(operandR) <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
termsL = self.term_indexes(operandL)
operandR = self.shift_term_indexes(operandR, max(termsL))
termsR = self.term_indexes(operandR)
if strOp == 'AND':
if len(termsL) > 1 and len(termsR) > 1:
message = 'Tsakorpus does not support queries that combine several ' \
'multi-word sequences with boolean operators.'
raise Diagnostic(DiagnosticTypes.sru, 48, message=message)
return operandL + operandR
elif strOp == 'OR':
if len(termsL) > 1 or len(termsR) > 1:
message = 'Tsakorpus does not support queries that combine several ' \
'multi-word sequences with boolean operators.'
raise Diagnostic(DiagnosticTypes.sru, 48, message=message)
if operandL[0][0] != 'wf' or operandR[0][0] != 'wf':
raise Diagnostic(DiagnosticTypes.sru, 47)
if self.rxTsakorpusBool.search(operandL[0][2]) is not None:
getParamsNew = [('wf', operandL[0][1], '(' + operandL[0][2] + ')|' + operandR[0][2])]
else:
getParamsNew = [('wf', operandL[0][1], operandL[0][2] + '|' + operandR[0][2])]
return getParamsNew
raise Diagnostic(DiagnosticTypes.sru, 37, details=strOp)
def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1): def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1):
""" """
Translate an FCS-QL query into a Tsakorpus GET query. Translate an FCS-QL query into a Tsakorpus GET query.
If something is wrong with the query, return a diagnostic. If something is wrong with the query, raise a Diagnostic exception.
The function is recursive and only looks at the part of the string The function is recursive and only looks at the part of the string
delimited by start and end parameters. delimited by start and end parameters.
""" """
print(query, start, end)
if end == -1: if end == -1:
end = len(query) end = len(query)
if end == 0: if end == 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.') raise Diagnostic(DiagnosticTypes.sru, 27)
if self.rxTermQuery.search(query) is not None: if self.rxTermQuery.search(query) is not None:
return self.term_query(query, config) return self.term_query(query, config)
# if query.count('(') != query.count(')'): # if query.count('(') != query.count(')'):
# return None # return None
if len(query) <= 0: if len(query) <= 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.') raise Diagnostic(DiagnosticTypes.sru, 27)
if start >= len(query) - 1 or end <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
while start < len(query) and query[start] in ' \t\n':
start += 1
while end > 0 and query[end - 1] in ' \t\n':
end -= 1
if start >= end: if start >= end:
return Diagnostic(DiagnosticTypes.sru, 10) raise Diagnostic(DiagnosticTypes.sru, 10)
iOpPos, strOp = self.find_operator(query, start, end) iOpPos, strOp = self.find_operator(query, start, end)
if iOpPos == -1: if iOpPos == -1:
if query[start] == '(' and query[end - 1] == ')': if query[start] == '(' and query[end - 1] == ')':
...@@ -57,9 +123,8 @@ class TsakorpusQueryParser(QueryParser): ...@@ -57,9 +123,8 @@ class TsakorpusQueryParser(QueryParser):
if strOp in ('AND', 'OR'): if strOp in ('AND', 'OR'):
resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos) resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos)
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end) resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end)
if (type(resultRight) is Diagnostic or type(resultLeft) is Diagnostic if len(resultLeft) <= 0 or len(resultRight) <= 0:
or len(resultLeft) <= 0 or len(resultRight) <= 0): raise Diagnostic(DiagnosticTypes.sru, 10)
return Diagnostic(DiagnosticTypes.sru, 10)
return self.binary_bool(strOp, resultLeft, resultRight, config) return self.binary_bool(strOp, resultLeft, resultRight, config)
elif strOp == 'NOT': elif strOp == 'NOT':
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp),
......
...@@ -66,9 +66,11 @@ def endpoint( ...@@ -66,9 +66,11 @@ def endpoint(
if operation == Operation.searchRetrieve: if operation == Operation.searchRetrieve:
if config.platform == CorpPlatform.tsakorpus: if config.platform == CorpPlatform.tsakorpus:
try:
res = app.qp_tsakorpus.translate_fcsql(query, config) res = app.qp_tsakorpus.translate_fcsql(query, config)
if type(res) == Diagnostic: except Diagnostic as diag:
return Response(content=str(res), media_type='application/xml') print('diag', str(diag))
return Response(content=str(diag), media_type='application/xml')
return str(res) return str(res)
return {'operation': operation, 'version': version} return {'operation': operation, 'version': version}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment