diff --git a/.gitignore b/.gitignore index 2c997de6baa6fba62d14427e943e0797ad56e023..8e7ac9edde48331a36ed5409a2ae952e0d35da18 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ docs/_build *.7z *.rar *.gz +*.doc *.docx *.pdf *.min.css.map diff --git a/README.md b/README.md index b1f3de64a4e5fa797d397d51ce1076be90d18fa6..94a1e1a1a086d3ccddf8ae8b41532a17e2c34329 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ All documentation is available [here](https://fcs-clarin.readthedocs.io/en/latest/). +CLARIN FCS specifications this endpoint implements are available [here](https://office.clarin.eu/v/CE-2017-1046-FCS-Specification-v89.pdf). + ## Requirements diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..139597f9cb07c5d48bed18984ec4747f4b4f3438 --- /dev/null +++ b/common/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/common/diagnostics.py b/common/diagnostics.py new file mode 100644 index 0000000000000000000000000000000000000000..e26add3626564df3ee035652b1efd32422f6c224 --- /dev/null +++ b/common/diagnostics.py @@ -0,0 +1,67 @@ +from .enums import * +import jinja2 + + +class Diagnostic: + """ + Contains methods for issuing diagnostic messages (fatal or non-fatal) + as per FCS specifications. + Read more about SRU diagnostics here: https://www.loc.gov/standards/sru/diagnostics/ . + SRU diagnostics list is located here: http://www.loc.gov/standards/sru/diagnostics/diagnosticsList.html . + Additional FCS diagnostics are listed in the FCS specifications. + """ + + fatalFCSDiagnostics = {3, 10, 11} # FCS specifications, 4.2 + fatalSRUDiagnostics = {8, 10} # A subset actually used by this endpoint + + def __init__(self, diagType: DiagnosticTypes, diagID: int, + details: str = '', + message: str = ''): + """ + Initialize a diagnostic with a given numerical ID. + """ + self.diagType = diagType + self.diagID = diagID + self.details = details + self.message = message + + self.templateLoader = jinja2.FileSystemLoader(searchpath="./static") + self.templateEnv = jinja2.Environment(loader=self.templateLoader) + + def is_fatal(self): + """ + Return True iff the diagnostic with the given ID is fatal, + i.e. the query should not be processed further. + """ + if self.diagType == DiagnosticTypes.fcs: + return self.diagID in self.fatalFCSDiagnostics + elif self.diagType == DiagnosticTypes.sru: + return self.diagID in self.fatalSRUDiagnostics + return True + + def uri(self): + """ + Return URI representing this diagnostic. + """ + if self.diagType == DiagnosticTypes.fcs: + return 'http://clarin.eu/fcs/diagnostic/' + str(self.diagID) + elif self.diagType == DiagnosticTypes.sru: + return 'info:srw/diagnostic/1/' + str(self.diagID) + return '' + + def __repr__(self): + """ + Return the XML version of this diagnostic. + """ + template = self.templateEnv.get_template('diagnostic.xml') + xmlText = template.render(uri=self.uri(), + details=self.details, + message=self.message) + return xmlText.strip() + + +if __name__ == '__main__': + # Test + d = Diagnostic(DiagnosticTypes.fcs, 3) + d.message = '123' + print(d) diff --git a/common/enums.py b/common/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..ea51cdda673b1694151605408c466025f71ba38a --- /dev/null +++ b/common/enums.py @@ -0,0 +1,35 @@ +from enum import Enum + +# All enumerators used for query validation are defined here + +class CorpPlatform(str, Enum): + tsakorpus = 'tsakorpus' + annis = 'annis' + litterae = 'litterae' + + +class Operation(str, Enum): + explain = 'explain' + searchRetrieve = 'searchRetrieve' + scan = 'scan' + + +class SRUVersion(str, Enum): + v1_2 = '1.2' + v2_0 = '2.0' + + +class QueryType(str, Enum): + # Query language (parameter used since SRU 2.0) + fcs = 'fcs' + cql = 'cql' # Contextual Query Language; default + + +class DiagnosticTypes(str, Enum): + # Diagnostic type (defines its namespace) + sru = 'sru' # Defined at http://www.loc.gov/standards/sru/diagnostics/diagnosticsList.html + fcs = 'fcs' # Defined in the FCS specifications, 4.2 + +if __name__ == '__main__': + pass + diff --git a/common/query_parser.py b/common/query_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..32497a480609b6a68fcdcb2ddc609420e3d48792 --- /dev/null +++ b/common/query_parser.py @@ -0,0 +1,48 @@ +from .enums import * +from .diagnostics import Diagnostic + + +class QueryParser: + """ + This class contains commonly used methods for initial parsing of a GET + query. It does not include platform-specific methods. + """ + def __init__(self): + pass + + def validate_query(self, operation, version, queryType, query, + xFcsEndpointDescription, xFcsContext, + xFcsDataviews, xFcsRewritesAllowed): + """ + Check if the query parameters contain a valid combination of values. + :param operation: + :param version: + :param queryType: + :param query: + :param xFcsEndpointDescription: + :param xFcsContext: + :param xFcsDataviews: + :param xFcsRewritesAllowed: + :return: Return a list of diagnostics describing problems with the query. + If the query is prima facie valid and can be processed further, an empty + list will be returned. + """ + diagnostics = [] + + # Check if additional parameters combine with the operation requested + # (FCS specifications, 4.1) + if len(xFcsEndpointDescription) > 0 and operation != Operation.explain: + diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='x-fcs-endpoint-description')) + if len(xFcsContext) > 0 and operation != Operation.searchRetrieve: + diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='x-fcs-context')) + if len(xFcsDataviews) > 0 and operation != Operation.searchRetrieve: + diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='x-fcs-dataviews')) + if len(xFcsRewritesAllowed) > 0 and operation != Operation.searchRetrieve: + diagnostics.append(Diagnostic(DiagnosticTypes.sru, 8, details='x-fcs-rewrites-allowed')) + + return diagnostics + + +if __name__ == '__main__': + pass + diff --git a/main.py b/main.py index 7ecba967c217433e15941e96e0ac8d6f632186aa..7af51bddf2f96e3345e8b0667ec52af82bb919ac 100644 --- a/main.py +++ b/main.py @@ -3,38 +3,19 @@ from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse +from common.query_parser import QueryParser +from common.enums import * +from common.diagnostics import Diagnostic import json import uvicorn -from enum import Enum - - -class CorpPlatform(str, Enum): - tsakorpus = 'tsakorpus' - annis = 'annis' - litterae = 'litterae' - - -class Operation(str, Enum): - explain = 'explain' - searchRetrieve = 'searchRetrieve' - scan = 'scan' - - -class SRUVersion(str, Enum): - v1_2 = '1.2' - v2_0 = '2.0' - - -class QueryType(str, Enum): - # Query language (parameter used since SRU 2.0) - fcs = 'fcs' - cql = 'cql' # Contextual Query Language; default app = FastAPI() app.mount('/static', StaticFiles(directory='static'), name='static') templates = Jinja2Templates(directory='static') +app.qp = QueryParser() + @app.get('/') def root(): @@ -66,6 +47,12 @@ def endpoint( alias='x-fcs-rewrites-allowed' ) ): + diagnostics = app.qp.validate_query(operation, version, queryType, query, + xFcsEndpointDescription, xFcsContext, + xFcsDataviews, xFcsRewritesAllowed) + if any(d.is_fatal() for d in diagnostics): + return '\n'.join(str(d) for d in diagnostics) + if platform == CorpPlatform.annis: return {'platform': 'annis', 'operation': operation, 'version': version} elif platform == CorpPlatform.litterae: diff --git a/static/diagnostic.xml b/static/diagnostic.xml new file mode 100644 index 0000000000000000000000000000000000000000..7a157a9dfb74d430f76c5f9fb389e678309bd093 --- /dev/null +++ b/static/diagnostic.xml @@ -0,0 +1,6 @@ +{# Per FCS specifications (section 1.5), SRU 2.0 diagnostics must use this namespace and prefixed namespace syntax #} +<diag:diagnostic xmlns:diag="http://docs.oasis-open.org/ns/search-ws/diagnostic"> + <diag:uri>{{ uri }}</diag:uri>{% if details|length > 0 %} + <diag:details>{{ details }}</diag:details>{% endif %}{% if message|length > 0 %} + <diag:message>{{ message }}</diag:message>{% endif %} +</diag:diagnostic> \ No newline at end of file