Skip to content
Snippets Groups Projects
Commit 5bc4ddfe authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Start translating FCS-QL queries into Tsakorpus GET queries

parent 177d5028
Branches
No related tags found
No related merge requests found
......@@ -16,9 +16,10 @@ class ResourceConfig:
for one of the resources this endpoint communicates with.
"""
rxExt = re.compile('\\.[^.]*$')
def __init__(self, fnameConfig=None):
self.platform = CorpPlatform.annis
self.corpus_id = ''
self.transport_protocol = 'https'
self.host = '127.0.0.1'
self.port = '5000'
......@@ -168,3 +169,20 @@ class ResourceConfig:
dictConfig = self.processed_gui_config(data)
with open(fnameOut, 'w', encoding='utf-8') as fOut:
json.dump(dictConfig, fOut, sort_keys=True, ensure_ascii=False, indent=2)
def read_configs(configDir='./config'):
"""
Load all configuration files from the configuration directory
(one file per resource). Initialize ResourceConfig instances for
each of them. Return a dictionary where the keys are filenames and
the values contain respective ResourceConfig objects.
"""
configs = {}
for fname in os.listdir(configDir):
if not fname.lower().endswith('.json'):
continue
fnameFull = os.path.join(configDir, fname)
fnameNoExt = ResourceConfig.rxExt.sub('', fname)
configs[fnameNoExt] = ResourceConfig(fnameFull)
return configs
......@@ -12,7 +12,11 @@ class Diagnostic:
"""
fatalFCSDiagnostics = {3, 10, 11} # FCS specifications, 4.2
fatalSRUDiagnostics = {8, 10} # A subset actually used by this endpoint
fatalSRUDiagnostics = {8, 10, 27, 235} # A subset actually used by this endpoint
stdMessages = {
(DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.'
}
def __init__(self, diagType: DiagnosticTypes, diagID: int,
details: str = '',
......@@ -24,6 +28,8 @@ class Diagnostic:
self.diagID = diagID
self.details = details
self.message = message
if len(self.message) <= 0 and (diagType, diagID) in self.stdMessages:
self.message = self.stdMessages[(diagType, diagID)]
self.templateLoader = jinja2.FileSystemLoader(searchpath="./static")
self.templateEnv = jinja2.Environment(loader=self.templateLoader)
......
from .enums import *
from .diagnostics import Diagnostic
import re
class QueryParser:
......@@ -7,9 +8,39 @@ class QueryParser:
This class contains commonly used methods for initial parsing of a GET
query. It does not include platform-specific methods.
"""
rxTermQuery = re.compile('^(?:(?:[^ "]|\\\\")*|"(?:[^"]|\\\\")*")$')
def __init__(self):
pass
@staticmethod
def find_operator(strQuery, start=0, end=-1):
if end == -1:
end = len(strQuery) - 1
if strQuery[start:start+3] == 'NOT':
return start, 'NOT'
parenthBalance = 0
inQuotes = False
for i in range(start, end):
if inQuotes:
if strQuery[i] == '"':
inQuotes = False
continue
if strQuery[i] == '"':
inQuotes = True
continue
if strQuery[i] == '(':
parenthBalance += 1
elif strQuery[i] == ')':
parenthBalance -= 1
elif parenthBalance == 0:
if strQuery[i:i+3] == 'AND':
return i, 'AND'
elif strQuery[i:i+2] == 'OR':
return i, 'OR'
return -1, ''
def validate_query(self, operation, version, queryType, query,
xFcsEndpointDescription, xFcsContext,
xFcsDataviews, xFcsRewritesAllowed):
......
import re
from .query_parser import QueryParser
from .config import ResourceConfig
from .diagnostics import Diagnostic, DiagnosticTypes
class TsakorpusQueryParser(QueryParser):
"""
Parses search queries for Tsakorpus-based corpora.
"""
def term_query(self, query, config):
"""
Return list of query parameters for one term or sequence of terms.
"""
if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
query = query[1:len(query)-1]
if len(query) <= 0:
return Diagnostic(DiagnosticTypes.sru, 10)
getParams = []
iTerm = 0
for term in query.split(' '):
if len(term) > 0:
iTerm += 1
getParams.append(['w', iTerm, term])
if iTerm >= 2:
getParams.append(['word_rel_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_from_', [iTerm-1, iTerm], '1'])
getParams.append(['word_dist_to_', [iTerm-1, iTerm], '1'])
return getParams
def translate_fcsql(self, query: str, config: ResourceConfig, basicSearch: bool = False, start=0, end=-1):
"""
Translate an FCS-QL query into a Tsakorpus GET query.
If something is wrong with the query, return a diagnostic.
The function is recursive and only looks at the part of the string
delimited by start and end parameters.
"""
if end == -1:
end = len(query)
if end == 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.')
if self.rxTermQuery.search(query) is not None:
return self.term_query(query, config)
# if query.count('(') != query.count(')'):
# return None
if len(query) <= 0:
return Diagnostic(DiagnosticTypes.sru, 27, message='The query should not be empty.')
if start >= end:
return Diagnostic(DiagnosticTypes.sru, 10)
iOpPos, strOp = self.find_operator(query, start, end)
if iOpPos == -1:
if query[start] == '(' and query[end - 1] == ')':
return self.translate_fcsql(query, config, basicSearch=basicSearch, start=start + 1, end=end - 1)
else:
return self.term_query(query[start:end], config)
if strOp in ('AND', 'OR'):
resultLeft = self.translate_fcsql(query, config, basicSearch=basicSearch, start=start, end=iOpPos)
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp), end=end)
if (type(resultRight) is Diagnostic or type(resultLeft) is Diagnostic
or len(resultLeft) <= 0 or len(resultRight) <= 0):
return Diagnostic(DiagnosticTypes.sru, 10)
return self.binary_bool(strOp, resultLeft, resultRight, config)
elif strOp == 'NOT':
resultRight = self.translate_fcsql(query, config, basicSearch=basicSearch, start=iOpPos + len(strOp),
end=end)
return self.not_bool(resultRight)
return {}
if __name__ == '__main__':
pass
{
"host": "0.0.0.0",
"port": "80",
"max_hits": 20
"max_hits": 20,
"platform": "tsakorpus"
}
\ No newline at end of file
from fastapi import FastAPI, Request, Query
from fastapi import FastAPI, Request, Query, Response
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from common.query_parser import QueryParser
from common.tsakorpus_query_parser import TsakorpusQueryParser
from common.enums import *
from common.diagnostics import Diagnostic
from common.config import ResourceConfig
from common.config import ResourceConfig, read_configs
import json
import os
import re
import uvicorn
rxExt = re.compile('\\.[^.]*$')
app = FastAPI()
app.mount('/static', StaticFiles(directory='static'), name='static')
templates = Jinja2Templates(directory='static')
app.qp = QueryParser()
app.configs = {}
i = 0
for fname in os.listdir('config'):
if not fname.lower().endswith('.json'):
continue
i += 1
fnameFull = os.path.join('config', fname)
fnameNoExt = rxExt.sub('', fname)
app.configs[fnameNoExt] = ResourceConfig(fnameFull)
app.qp_tsakorpus = TsakorpusQueryParser()
app.configs = read_configs()
@app.get('/')
......@@ -35,9 +27,8 @@ def root():
return {"message": "Hello World"}
@app.get('/fcs-endpoint/{platform}/{corpusID}')
@app.get('/fcs-endpoint/{corpusID}')
def endpoint(
platform: CorpPlatform,
corpusID: str,
operation: Operation = Operation.explain,
version: SRUVersion = SRUVersion.v2_0,
......@@ -60,19 +51,27 @@ def endpoint(
alias='x-fcs-rewrites-allowed'
)
):
if corpusID not in app.configs:
message = 'No corpus with this ID (' + corpusID +') is served by this Endpoint. ' \
'Valid corpus IDs are: ' + '; '.join(cID for cID in sorted(app.configs)) + '.'
diagBody = str(Diagnostic(DiagnosticTypes.sru, 235,
message=message)) # "Database does not exist"
return Response(content=diagBody, media_type='application/xml')
config = app.configs[corpusID]
diagnostics = app.qp.validate_query(operation, version, queryType, query,
xFcsEndpointDescription, xFcsContext,
xFcsDataviews, xFcsRewritesAllowed)
if any(d.is_fatal() for d in diagnostics):
return '\n'.join(str(d) for d in diagnostics)
if platform == CorpPlatform.annis:
return {'platform': 'annis', 'operation': operation, 'version': version}
elif platform == CorpPlatform.litterae:
return {'platform': 'litterae', 'operation': operation, 'version': version}
elif platform == CorpPlatform.tsakorpus:
return {'platform': 'tsakorpus', 'operation': operation, 'version': version}
return
if operation == Operation.searchRetrieve:
if config.platform == CorpPlatform.tsakorpus:
res = app.qp_tsakorpus.translate_fcsql(query, config)
if type(res) == Diagnostic:
return Response(content=str(res), media_type='application/xml')
return str(res)
return {'operation': operation, 'version': version}
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment