Skip to content
Snippets Groups Projects
Commit cd9724df authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Make Diagnostic an Exception subclass; work on Tsakorpus FCS-QL

parent 5bc4ddfe
Branches
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ from .enums import *
import jinja2
class Diagnostic:
class Diagnostic(Exception):
"""
Contains methods for issuing diagnostic messages (fatal or non-fatal)
as per FCS specifications.
......@@ -12,10 +12,12 @@ class Diagnostic:
"""
fatalFCSDiagnostics = {3, 10, 11} # FCS specifications, 4.2
fatalSRUDiagnostics = {8, 10, 27, 235} # A subset actually used by this endpoint
fatalSRUDiagnostics = {8, 10, 27, 37, 47, 48, 235} # A subset actually used by this endpoint
stdMessages = {
(DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.'
(DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.',
(DiagnosticTypes.sru, 27): 'The query should not be empty.',
(DiagnosticTypes.sru, 37): 'Unsupported boolean operator.'
}
def __init__(self, diagType: DiagnosticTypes, diagID: int,
......@@ -55,7 +57,7 @@ class Diagnostic:
return 'info:srw/diagnostic/1/' + str(self.diagID)
return ''
def __repr__(self):
def __str__(self):
"""
Return the XML version of this diagnostic.
"""
......@@ -65,6 +67,9 @@ class Diagnostic:
message=self.message)
return xmlText.strip()
def __repr__(self):
return str(self)
if __name__ == '__main__':
# Test
......
import copy
import re
from .query_parser import QueryParser
from .config import ResourceConfig
......@@ -8,6 +9,8 @@ class TsakorpusQueryParser(QueryParser):
Parses search queries for Tsakorpus-based corpora.
"""
rxTsakorpusBool = re.compile('[()|,]')
def term_query(self, query, config):
"""
Return list of query parameters for one term or sequence of terms.
......@@ -21,7 +24,7 @@ class TsakorpusQueryParser(QueryParser):
for term in query.split(' '):
if len(term) > 0:
iTerm += 1
getParams.append(['w', iTerm, term])
getParams.append(['wf', iTerm, term])
if iTerm >= 2:
getParams.append(['word_rel_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1'])
......@@ -29,25 +32,88 @@ class TsakorpusQueryParser(QueryParser):
return getParams
def term_indexes(self, getParams):
"""
Find all search term indexes used in the GET parameters
specified by getParams list. Return list of integers (1-based).
"""
terms = set()
for param in getParams:
if type(param[1]) is int:
terms.add(param[1])
elif type(param[1]) is list:
for t in param[1]:
terms.add(t)
return [t for t in sorted(terms)]
def shift_term_indexes(self, getParams, shift):
"""
Increase all search term indexes in the GET parameters
specified by getParams by shift.
"""
getParamsShifted = []
for param in getParams:
if type(param[1]) is int:
newParam = (param[0], param[1] + shift, param[2])
else:
newParam = (param[0], [i + shift for i in param[1]], param[2])
getParamsShifted.append(newParam)
return getParamsShifted
def binary_bool(self, strOp, operandL, operandR, config):
if len(operandL) <= 0 or len(operandR) <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
termsL = self.term_indexes(operandL)
operandR = self.shift_term_indexes(operandR, max(termsL))
termsR = self.term_indexes(operandR)
if strOp == 'AND':
if len(termsL) > 1 and len(termsR) > 1:
message = 'Tsakorpus does not support queries that combine several ' \
'multi-word sequences with boolean operators.'
raise Diagnostic(DiagnosticTypes.sru, 48, message=message)
return operandL + operandR
elif strOp == 'OR':
if len(termsL) > 1 or len(termsR) > 1:
message = 'Tsakorpus does not support queries that combine several ' \
'multi-word sequences with boolean operators.'
raise Diagnostic(DiagnosticTypes.sru, 48, message=message)
if operandL[0][0] != 'wf' or operandR[0][0] != 'wf':
raise Diagnostic(DiagnosticTypes.sru, 47)
if self.rxTsakorpusBool.search(operandL[0][2]) is not None:
getParamsNew = [('wf', operandL[0][1], '(' + operandL[0][2] + ')|' + operandR[0][2])]
else:
getParamsNew = [('wf', operandL[0][1], operandL[0][2] + '|' + operandR[0][2])]
return getParamsNew
raise Diagnostic(DiagnosticTypes.sru, 37, details=strOp)
def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1):
"""
Translate an FCS-QL query into a Tsakorpus GET query.
If something is wrong with the query, return a diagnostic.
If something is wrong with the query, raise a Diagnostic exception.
The function is recursive and only looks at the part of the string
delimited by start and end parameters.
"""
print(query, start, end)
if end == -1:
end = len(query)
if end == 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.')
raise Diagnostic(DiagnosticTypes.sru, 27)
if self.rxTermQuery.search(query) is not None:
return self.term_query(query, config)
# if query.count('(') != query.count(')'):
# return None
if len(query) <= 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.')
raise Diagnostic(DiagnosticTypes.sru, 27)
if start >= len(query) - 1 or end <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
while start < len(query) and query[start] in ' \t\n':
start += 1
while end > 0 and query[end - 1] in ' \t\n':
end -= 1
if start >= end:
return Diagnostic(DiagnosticTypes.sru, 10)
raise Diagnostic(DiagnosticTypes.sru, 10)
iOpPos, strOp = self.find_operator(query, start, end)
if iOpPos == -1:
if query[start] == '(' and query[end - 1] == ')':
......@@ -57,9 +123,8 @@ class TsakorpusQueryParser(QueryParser):
if strOp in ('AND', 'OR'):
resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos)
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end)
if (type(resultRight) is Diagnostic or type(resultLeft) is Diagnostic
or len(resultLeft) <= 0 or len(resultRight) <= 0):
return Diagnostic(DiagnosticTypes.sru, 10)
if len(resultLeft) <= 0 or len(resultRight) <= 0:
raise Diagnostic(DiagnosticTypes.sru, 10)
return self.binary_bool(strOp, resultLeft, resultRight, config)
elif strOp == 'NOT':
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp),
......
......@@ -66,9 +66,11 @@ def endpoint(
if operation == Operation.searchRetrieve:
if config.platform == CorpPlatform.tsakorpus:
res = app.qp_tsakorpus.translate_fcsql(query, config)
if type(res) == Diagnostic:
return Response(content=str(res), media_type='application/xml')
try:
res = app.qp_tsakorpus.translate_fcsql(query, config)
except Diagnostic as diag:
print('diag', str(diag))
return Response(content=str(diag), media_type='application/xml')
return str(res)
return {'operation': operation, 'version': version}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment