From 4903a3e237984d2db3dc69c18940c7ff882461d1 Mon Sep 17 00:00:00 2001
From: Timofey Arkhangelskiy <timofey.arkhangelskiy@uni-hamburg.de>
Date: Tue, 6 Jun 2023 16:59:35 +0200
Subject: [PATCH] Fix some problems with CLARIN endpoint tester

---
 common/annis_query_parser.py        |   5 +-
 common/config.py                    |  15 ++-
 common/diagnostics.py               |  19 +++-
 common/litterae_query_parser.py     |   2 +-
 common/litterae_response_parser.py  |  23 +++-
 common/query_parser.py              |  29 +++--
 common/tsakorpus_query_parser.py    |   4 +-
 common/tsakorpus_response_parser.py |  14 ++-
 common/views_logic.py               | 170 ++++++++++++++++++++++++----
 main.py                             |  64 +++++++++--
 notes.txt                           |   2 +
 requirements.txt                    |   3 +-
 static/diagnostic.xml               |   3 +-
 static/endpoint_description.xml     |   2 +-
 static/explain_response.xml         |   2 +-
 static/search_retrieve_response.xml |   5 +-
 16 files changed, 290 insertions(+), 72 deletions(-)

diff --git a/common/annis_query_parser.py b/common/annis_query_parser.py
index b9edca5..1fc38bf 100644
--- a/common/annis_query_parser.py
+++ b/common/annis_query_parser.py
@@ -16,7 +16,8 @@ class AnnisQueryParser(QueryParser):
     rxRelOps = re.compile('^(?:\\^\\*|\\||\\.[*,0-9]*)|_=_$')      # Operators for setting relations between query words
     rxFramingQuotes = re.compile('^[/"]|(?<!\\\\)[/"]$')
 
-    def build_get_string(self, params, config: ResourceConfig, withinClause=''):
+    def build_get_string(self, params, config: ResourceConfig,
+                         searchOptions: dict, withinClause=''):
         """
         Build a payload for an ANNIS search request.
         ANNIS uses POST with JSON payload rather than GET, but the
@@ -31,7 +32,7 @@ class AnnisQueryParser(QueryParser):
             'query': '',
             'query_language': 'AQL',
             'corpora': config.annis_corpus_list,
-            'limit': config.max_hits,
+            'limit': min(config.max_hits, searchOptions['maximumRecords']),
             'order': 'Randomized'
         }
         termIndexes = self.term_indexes(params)
diff --git a/common/config.py b/common/config.py
index eb22b99..fab703d 100644
--- a/common/config.py
+++ b/common/config.py
@@ -47,18 +47,18 @@ class ResourceConfig:
 
         self.query_timeout = 60
 
+        # NB: The following properties are not used right now.
+        # They may be used if somebody develops a GUI for editing
+        # configuration files in the future.
         self.boolParams = set(k for k in self.__dict__
                               if type(self.__dict__[k]) == bool)
         self.intParams = set(k for k in self.__dict__
                              if type(self.__dict__[k]) == int)
         self.lsParams = set()
-
         # dictionaries where values are strings
         self.dict_sParams = {'pos_convert_reverse'}
-
         # dictionaries where values are lists of strings
         self.dict_lsParams = {'pos_convert'}
-
         # dictionaries where values are dictionaries {k: string}
         self.dict_dParams = set()
 
@@ -143,6 +143,9 @@ class ResourceConfig:
     def gui_str_to_dict(self, s, value_type='list'):
         """
         Process one input string that describes a dictionary.
+        NB: This function is not used right now. It may be used if
+        somebody develops a GUI for editing configuration files
+        in the future.
         """
         d = {}
         s = s.replace('\r', '').strip()
@@ -187,6 +190,9 @@ class ResourceConfig:
         """
         Turn form data filled by the user in the configuration GUI to
         a dictionary in the correct format.
+        NB: This function is not used right now. It may be used if
+        somebody develops a GUI for editing configuration files
+        in the future.
         """
         dictConfig = {}
         for f in self.boolParams:
@@ -223,6 +229,9 @@ class ResourceConfig:
         """
         Save current or new configuration as a JSON file (can be used to edit
         configuration files through a web interface).
+        NB: This function is not used right now. It may be used if
+        somebody develops a GUI for editing configuration files
+        in the future.
         """
         if data is None or len(data) <= 0:
             dictConfig = self.as_dict()
diff --git a/common/diagnostics.py b/common/diagnostics.py
index 47103c3..e90db7b 100644
--- a/common/diagnostics.py
+++ b/common/diagnostics.py
@@ -12,20 +12,23 @@ class Diagnostic(Exception):
     """
 
     fatalFCSDiagnostics = {3, 10, 11}      # FCS specifications, 4.2
-    fatalSRUDiagnostics = {1, 4, 8, 10, 27, 37, 47, 48, 235}     # A subset actually used by this endpoint
+    fatalSRUDiagnostics = {1, 4, 5, 6, 8, 10, 27, 37, 47, 48, 61, 71, 235}     # A subset actually used by this endpoint
 
     stdMessages = {
         (DiagnosticTypes.fcs, 4): 'Requested Data View not valid for this resource.',
-        (DiagnosticTypes.sru, 4): 'Unsupported operation.',
+        (DiagnosticTypes.sru, 4): 'Unsupported operation. Supported operation: explain, searchRetrieve, scan.',
+        (DiagnosticTypes.sru, 5): 'Unsupported version. Supported SRU versions: 1.2 and 2.0.',
         (DiagnosticTypes.sru, 8): 'Unsupported parameter.',
         (DiagnosticTypes.sru, 10): 'Something is wrong with the query syntax.',
         (DiagnosticTypes.sru, 27): 'The query should not be empty.',
-        (DiagnosticTypes.sru, 37): 'Unsupported boolean operator.'
+        (DiagnosticTypes.sru, 37): 'Unsupported boolean operator.',
+        (DiagnosticTypes.sru, 61): 'Start record position out of range.'
     }
 
     def __init__(self, diagType: DiagnosticTypes, diagID: int,
                  details: str = '',
-                 message: str = ''):
+                 message: str = '',
+                 version: SRUVersion = SRUVersion.v2_0):
         """
         Initialize a diagnostic with a given numerical ID.
         """
@@ -33,6 +36,7 @@ class Diagnostic(Exception):
         self.diagID = diagID
         self.details = details
         self.message = message
+        self.version = version
         if len(self.message) <= 0 and (diagType, diagID) in self.stdMessages:
             self.message = self.stdMessages[(diagType, diagID)]
 
@@ -64,10 +68,15 @@ class Diagnostic(Exception):
         """
         Return the XML version of this diagnostic.
         """
+        if self.version == SRUVersion.v1_2:
+            templateVersion = 1
+        else:
+            templateVersion = 2
         template = self.templateEnv.get_template('diagnostic.xml')
         xmlText = template.render(uri=self.uri(),
                                   details=self.details,
-                                  message=self.message)
+                                  message=self.message,
+                                  version=templateVersion)
         return xmlText.strip()
 
     def __repr__(self):
diff --git a/common/litterae_query_parser.py b/common/litterae_query_parser.py
index 2d0ca1f..34a5441 100644
--- a/common/litterae_query_parser.py
+++ b/common/litterae_query_parser.py
@@ -14,7 +14,7 @@ class LitteraeQueryParser(QueryParser):
     Parses search queries for Formulae, Litterae, Chartae.
     """
 
-    def build_get_string(self, getParams, config, withinClause=''):
+    def build_get_string(self, getParams, config, searchOptions: dict, withinClause=''):
         """
         Build a GET string (everything after the ?) from a description
         of the GET parameters in the getParams list.
diff --git a/common/litterae_response_parser.py b/common/litterae_response_parser.py
index d6476bf..a9233b2 100644
--- a/common/litterae_response_parser.py
+++ b/common/litterae_response_parser.py
@@ -20,7 +20,8 @@ class LitteraeResponseParser:
     def __init__(self):
         self.pc = None      # POS convertor, rebuilt with each parse call
 
-    def process_hits(self, tableNode, config: ResourceConfig, diagnostics: list[Diagnostic], advancedHits=False):
+    def process_hits(self, tableNode, config: ResourceConfig, searchOptions: dict,
+                     diagnostics: list[Diagnostic], advancedHits=False):
         """
         Process hits from an HTML node with the results table.
         If anything goes wrong, add Diagnostic objects to diagnostics list.
@@ -32,7 +33,8 @@ class LitteraeResponseParser:
         rows = tableNode.xpath('tr')
         iRow = 0
         iRowOffset = 0
-        while iRow < len(rows) and iRow - iRowOffset < config.max_hits:
+        maxHits = min(config.max_hits, searchOptions['maximumRecords'])
+        while iRow < len(rows) and iRow - iRowOffset < maxHits:
             row = rows[iRow]
             iRow += 1
             paragraphs = row.xpath('td/p')
@@ -51,7 +53,7 @@ class LitteraeResponseParser:
             records.append(record)
         return records
 
-    def parse(self, response, config: ResourceConfig, xFcsDataviews):
+    def parse(self, response, config: ResourceConfig, searchOptions: dict):
         """
         Read HTML response with the first N hits returned by a Litterae
         instance. Return a list of Record objects and the total number of
@@ -61,7 +63,7 @@ class LitteraeResponseParser:
         """
         diagnostics = []
         advancedHits = False
-        dataViewsRequested = {v.strip() for v in xFcsDataviews.split(',') if len(v.strip()) > 0}
+        dataViewsRequested = {v.strip() for v in searchOptions['x-fcs-dataviews'].split(',') if len(v.strip()) > 0}
         if 'adv' in dataViewsRequested:
             advancedHits = True
         srcTree = fromstring(response)
@@ -73,11 +75,20 @@ class LitteraeResponseParser:
                 nRecords = int(m.group(1))
         resTableNodes = srcTree.xpath('//table[@id="partsSearchResultTable"]/tbody')
         records = []
+        if searchOptions['startRecord'] > 1 and nRecords < searchOptions['startRecord']:
+            # We don't actually care about startRecord, but we should
+            # return a fatal diagnostic if it is larger than the number
+            # of hits.
+            diagnostics.append(Diagnostic(DiagnosticTypes.sru, 61))
+            return records, nRecords, diagnostics
         if len(resTableNodes) <= 0:
             nRecords = 0
         else:
-            records = self.process_hits(resTableNodes[0], config, diagnostics, advancedHits=advancedHits)
-        if len(records) < nRecords and len(records) < config.max_hits:
+            records = self.process_hits(resTableNodes[0], config, searchOptions,
+                                        diagnostics, advancedHits=advancedHits)
+        if (len(records) < nRecords
+                and len(records) < config.max_hits
+                and len(records) < searchOptions['maximumRecords']):
             diagnostics.append(Diagnostic(DiagnosticTypes.sru, 59,
                                           message='Some results could not be shown due to copyright restrictions.'))
         return records, nRecords, diagnostics
diff --git a/common/query_parser.py b/common/query_parser.py
index 736f3bc..56afd4d 100644
--- a/common/query_parser.py
+++ b/common/query_parser.py
@@ -12,6 +12,7 @@ class QueryParser:
     """
     # Regexes for simple search
     rxTermQuery = re.compile('^(?:(?:[^ "]|\\\\")*|"(?:[^"]|\\\\")*")$')
+    rxQueryWSpaces = re.compile('[^ \t][ \t]+[^ \t]')
 
     # Regexes for advanced search
     rxWithinClause = re.compile(' +within +(s|sentence|u|utterance|p|paragraph|'
@@ -200,7 +201,7 @@ class QueryParser:
             return [0]
         return [t for t in sorted(terms)]
 
-    def build_get_string(self, getParams, config: ResourceConfig, withinClause=''):
+    def build_get_string(self, getParams, config: ResourceConfig, searchOptions: dict, withinClause=''):
         # Abstract function
         raise NotImplementedError()
 
@@ -257,7 +258,7 @@ class QueryParser:
         # Abstract function
         raise NotImplementedError()
 
-    def translate_simple(self, query: str, config: ResourceConfig, start=0, end=-1):
+    def translate_simple(self, query: str, config: ResourceConfig, searchOptions: dict, start=0, end=-1):
         """
         Translate a simple search (CQL) query into a corpus-specific query
         (GET query, JSON Elasticsearch query or whatever).
@@ -275,10 +276,10 @@ class QueryParser:
             if end == 0:
                 raise Diagnostic(DiagnosticTypes.sru, 27)
             if self.rxTermQuery.search(query) is not None:
-                return self.build_get_string(self.term_query(query, config), config)
-            return self.build_get_string(self.translate_simple(query, config,
+                return self.build_get_string(self.term_query(query, config), config, searchOptions)
+            return self.build_get_string(self.translate_simple(query, config, searchOptions,
                                                                start=start, end=end),
-                                         config)
+                                         config, searchOptions)
             # if query.count('(') != query.count(')'):
             #     return None
         if len(query) <= 0:
@@ -294,18 +295,22 @@ class QueryParser:
         iOpPos, strOp = self.find_operator(query, start, end)
         if iOpPos == -1:
             if query[start] == '(' and query[end - 1] == ')':
-                return self.translate_simple(query, config, start=start + 1, end=end - 1)
+                return self.translate_simple(query, config, searchOptions, start=start + 1, end=end - 1)
             else:
-                return self.term_query(query[start:end], config)
+                queryPart = query[start:end]
+                if ((not queryPart.startswith('"') or not queryPart.endswith('"'))
+                        and self.rxQueryWSpaces.search(queryPart.strip('"')) is not None):
+                    raise Diagnostic(DiagnosticTypes.sru, 10)
+                return self.term_query(queryPart, config)
         if strOp in ('AND', 'OR'):
-            resultLeft = self.translate_simple(query, config, start=start, end=iOpPos)
-            resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp),
+            resultLeft = self.translate_simple(query, config, searchOptions, start=start, end=iOpPos)
+            resultRight = self.translate_simple(query, config, searchOptions, start=iOpPos + len(strOp),
                                                 end=end)
             if len(resultLeft) <= 0 or len(resultRight) <= 0:
                 raise Diagnostic(DiagnosticTypes.sru, 10)
             return self.binary_bool(strOp, resultLeft, resultRight, config)
         elif strOp == 'NOT':
-            resultRight = self.translate_simple(query, config, start=iOpPos + len(strOp),
+            resultRight = self.translate_simple(query, config, searchOptions, start=iOpPos + len(strOp),
                                                 end=end)
             return self.not_bool(resultRight, config)
         raise Diagnostic(DiagnosticTypes.sru, 10)
@@ -401,7 +406,7 @@ class QueryParser:
             return self.adv_main_or(resultLeft, resultRight, config)
         raise NotImplementedError
 
-    def translate_advanced(self, query: str, config: ResourceConfig):
+    def translate_advanced(self, query: str, config: ResourceConfig, searchOptions: dict):
         """
         Translate an advanced search (FCS-QL) query into a corpus-specific query
         (GET query, JSON Elasticsearch query or whatever).
@@ -429,7 +434,7 @@ class QueryParser:
             end = len(query)
         if end == 0:
             raise Diagnostic(DiagnosticTypes.sru, 27)
-        return self.build_get_string(self.adv_main_query(query, config, start=0, end=end), config,
+        return self.build_get_string(self.adv_main_query(query, config, start=0, end=end), config, searchOptions,
                                      withinClause=withinClause)
 
     def validate_query(self, operation, version, queryType, query,
diff --git a/common/tsakorpus_query_parser.py b/common/tsakorpus_query_parser.py
index f6a61e4..8602bc9 100644
--- a/common/tsakorpus_query_parser.py
+++ b/common/tsakorpus_query_parser.py
@@ -14,7 +14,7 @@ class TsakorpusQueryParser(QueryParser):
 
     rxTsakorpusBool = re.compile('[()|,]')
 
-    def build_get_string(self, getParams, config: ResourceConfig, withinClause=''):
+    def build_get_string(self, getParams, config: ResourceConfig, searchOptions: dict, withinClause=''):
         """
         Build a GET string (everything after the ?) from a description
         of the GET parameters in the getParams list.
@@ -34,7 +34,7 @@ class TsakorpusQueryParser(QueryParser):
             s += '&' + param[0] + str(param[1]) + sfx + '=' + quote(str(param[2]))
         for i in termIndexes:
             s += '&lang' + str(i) + '=' + config.search_lang_id
-        s += '&page_size=' + str(config.max_hits)
+        s += '&page_size=' + str(min(config.max_hits, searchOptions['maximumRecords']))
         s += '&precise=on&sort=random&response_format=json&distance_strict=on'
         return s
 
diff --git a/common/tsakorpus_response_parser.py b/common/tsakorpus_response_parser.py
index 4b2d112..be5ca79 100644
--- a/common/tsakorpus_response_parser.py
+++ b/common/tsakorpus_response_parser.py
@@ -135,7 +135,7 @@ class TsakorpusResponseParser:
         return record
 
 
-    def parse(self, response, config: ResourceConfig, xFcsDataviews, lang=''):
+    def parse(self, response, config: ResourceConfig, searchOptions: dict, lang=''):
         """
         Read a dictionary with the first N hits returned by a Tsakorpus
         instance. Return a list of Record objects and the total number of
@@ -144,15 +144,21 @@ class TsakorpusResponseParser:
         self.pc = POSConvertor(config)
         diagnostics = []
         advancedHits = False
-        dataViewsRequested = {v.strip() for v in xFcsDataviews.split(',') if len(v.strip()) > 0}
+        dataViewsRequested = {v.strip() for v in searchOptions['x-fcs-dataviews'].split(',') if len(v.strip()) > 0}
         if 'adv' in dataViewsRequested:
             advancedHits = True
         nRecords = 0
+        records = []
         if 'n_sentences' in response:
             nRecords = response['n_sentences']
+        if searchOptions['startRecord'] > 1 and nRecords < searchOptions['startRecord']:
+            # We don't actually care about startRecord, but we should
+            # return a fatal diagnostic if it is larger than the number
+            # of hits.
+            diagnostics.append(Diagnostic(DiagnosticTypes.sru, 61))
+            return records, nRecords, diagnostics
         if nRecords <= 0 or 'contexts' not in response:
-            return [], nRecords, diagnostics
-        records = []
+            return records, nRecords, diagnostics
         for context in response['contexts']:
             records.append(self.parse_context(context, config, lang, advancedHits))
         return records, nRecords, diagnostics
diff --git a/common/views_logic.py b/common/views_logic.py
index ca27bd2..e8aff25 100644
--- a/common/views_logic.py
+++ b/common/views_logic.py
@@ -8,24 +8,133 @@ from .diagnostics import Diagnostic
 from .config import ResourceConfig
 
 
+def initial_validation(operation, version, queryType, searchOptions, query):
+    """
+    Validate and convert values of the main request parameters.
+    Return converted values and a list of fatal diagnostics, if anything is wrong.
+    """
+    failDiagnoctics = []
+
+    if version == '1.2':
+        version = SRUVersion.v1_2
+    elif version == '2.0':
+        version = SRUVersion.v2_0
+    else:
+        version = SRUVersion.v2_0
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 5, details='2.0'))
+
+    if operation == '':
+        if len(query) > 0:
+            operation = Operation.searchRetrieve
+        else:
+            operation = Operation.explain
+    elif operation == 'explain':
+        operation = Operation.explain
+    elif operation == 'searchRetrieve':
+        operation = Operation.searchRetrieve
+    elif operation == 'scan':
+        operation = Operation.scan
+    else:
+        operation = Operation.explain
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 4, version=version))
+
+    if queryType == 'fcs':
+        queryType = QueryType.fcs
+    elif queryType == 'cql':
+        queryType = QueryType.cql
+    else:
+        queryType = QueryType.cql
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 6, message='Supported query types: fcs and cql.',
+                                          version=version))
+
+    try:
+        searchOptions['startRecord'] = int(searchOptions['startRecord'])
+    except ValueError:
+        searchOptions['startRecord'] = 1
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 6, message='startRecord should be a positive integer.',
+                                          version=version))
+    if searchOptions['startRecord'] < 1:
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 6, message='startRecord should be a positive integer.',
+                                          version=version))
+
+    try:
+        searchOptions['maximumRecords'] = int(searchOptions['maximumRecords'])
+    except ValueError:
+        searchOptions['maximumRecords'] = 0
+        failDiagnoctics.append(
+            Diagnostic(DiagnosticTypes.sru, 6, message='maximumRecords should be a non-negative integer.',
+                       version=version))
+    if searchOptions['maximumRecords'] < 0:
+        failDiagnoctics.append(
+            Diagnostic(DiagnosticTypes.sru, 6, message='maximumRecords should be a non-negative integer.',
+                       version=version))
+
+    # recordPacking has entirely different semantics in SRU 1.2 and SRU 2.0
+    if version == SRUVersion.v1_2:
+        if searchOptions['recordPacking'] == '':
+            searchOptions['recordPacking'] = 'xml'
+        if searchOptions['recordPacking'] not in ('xml', 'string'):
+            failDiagnoctics.append(
+                Diagnostic(DiagnosticTypes.sru, 71, message='recordPacking should equal "xml" or "string".',
+                           version=version))
+    else:
+        if searchOptions['recordXMLEscaping'] == '':
+            searchOptions['recordXMLEscaping'] = 'xml'
+        if searchOptions['recordPacking'] == '':
+            searchOptions['recordPacking'] = 'packed'
+        if searchOptions['recordXMLEscaping'] not in ('xml', 'string'):
+            failDiagnoctics.append(
+                Diagnostic(DiagnosticTypes.sru, 71, message='recordXMLEscaping should equal "xml" or "string".',
+                           version=version))
+        if searchOptions['recordPacking'] not in ('packed', 'unpacked'):
+            failDiagnoctics.append(
+                Diagnostic(DiagnosticTypes.sru, 6, message='recordPacking should equal "packed" or "unpacked".',
+                           version=version))
+
+    try:
+        searchOptions['resultSetTTL'] = int(searchOptions['resultSetTTL'])
+    except ValueError:
+        searchOptions['resultSetTTL'] = 0
+        failDiagnoctics.append(
+            Diagnostic(DiagnosticTypes.sru, 6, message='resultSetTTL should be a positive integer.',
+                       version=version))
+    if searchOptions['resultSetTTL'] < 0:
+        # This does not look good, but we don't care because this
+        # value is not used anyway
+        pass
+        # failDiagnoctics.append(
+        #     Diagnostic(DiagnosticTypes.sru, 6, message='resultSetTTL should be a positive integer.',
+        #                version=version))
+
+    return operation, version, queryType, searchOptions, failDiagnoctics
+
+
 def fatal_response(operation: Operation,
                    version: SRUVersion,
+                   config: Optional[ResourceConfig],
                    diagnostics: list[Diagnostic],
                    request, templates):
     """
     Return a response with the fatal diagnostics
     and no other payload.
     """
+    if config is None:
+        configStr = ''
+    else:
+        configStr = config.as_dict()
+    if version == SRUVersion.v1_2:
+        templateVersion = 1
+    else:
+        templateVersion = 2
+    for diag in diagnostics:
+        diag.version = version
     diagStr = [str(d) for d in diagnostics]
     if operation in (Operation.explain, Operation.scan):
-        if version == SRUVersion.v1_2:
-            templateVersion = 1
-        else:
-            templateVersion = 2
         return templates.TemplateResponse('explain_response.xml',
                                           {
                                               'request': request,
                                               'diagnostics': diagStr,
+                                              'config': configStr,
                                               'version': templateVersion
                                           },
                                           media_type='application/xml')
@@ -34,7 +143,8 @@ def fatal_response(operation: Operation,
                                           {
                                               'request': request,
                                               'diagnostics': diagStr,
-                                              'n_hits': 0
+                                              'n_hits': 0,
+                                              'version': templateVersion
                                           },
                                           media_type='application/xml')
 
@@ -52,6 +162,8 @@ def process_explain(version: SRUVersion,
         templateVersion = 1
     else:
         templateVersion = 2
+    for diag in diagnostics:
+        diag.version = version
     endpointDescNeeded = False
     if 'x-fcs-endpoint-description' in searchOptions and searchOptions['x-fcs-endpoint-description'] == 'true':
         endpointDescNeeded = True
@@ -70,7 +182,7 @@ def process_explain(version: SRUVersion,
 def process_search_retrieve(version: SRUVersion,
                             queryType: QueryType,
                             query: str,
-                            searchOptions: dict[str, str],
+                            searchOptions: dict,
                             config: Optional[ResourceConfig],
                             diagnostics: list[Diagnostic],
                             app, request, templates):
@@ -78,18 +190,26 @@ def process_search_retrieve(version: SRUVersion,
     Process a searchRetrieve request.
     Return a rendered XML response.
     """
+    if version == SRUVersion.v1_2:
+        templateVersion = 1
+    else:
+        templateVersion = 2
+    for diag in diagnostics:
+        diag.version = version
     if config.platform == CorpPlatform.annis:
         try:
             if queryType == QueryType.cql:
-                query = app.qp_annis.translate_simple(query, config)
+                query = app.qp_annis.translate_simple(query, config, searchOptions)
             else:
-                query = app.qp_annis.translate_advanced(query, config)
+                query = app.qp_annis.translate_advanced(query, config, searchOptions)
             print(query)
             # res = app.qp_annis.send_query(query, config)
         except Diagnostic as diag:
-            return fatal_response(Operation.searchRetrieve, version, diagnostics + [diag], request, templates)
+            return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates)
         return query['query']
         # records, nHits, diagnostics = app.rp_annis.parse(res, config, searchOptions['x-fcs-dataviews'])
+        # if any(diag.is_fatal() for diag in diagnostics):
+        #     return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates)
         # records = [r.as_dict() for r in records]
         # diagnostics = [str(d) for d in diagnostics]
         # return templates.TemplateResponse('search_retrieve_response.xml',
@@ -97,20 +217,23 @@ def process_search_retrieve(version: SRUVersion,
         #                                       'request': request,
         #                                       'n_hits': nHits,
         #                                       'records': records,
+        #                                       'version': templateVersion,
         #                                       'diagnostics': diagnostics
         #                                   },
         #                                   media_type='application/xml')
     if config.platform == CorpPlatform.tsakorpus:
         try:
             if queryType == QueryType.cql:
-                strGetParams = app.qp_tsakorpus.translate_simple(query, config)
+                strGetParams = app.qp_tsakorpus.translate_simple(query, config, searchOptions)
             else:
-                strGetParams = app.qp_tsakorpus.translate_advanced(query, config)
+                strGetParams = app.qp_tsakorpus.translate_advanced(query, config, searchOptions)
             print(strGetParams)
             res = app.qp_tsakorpus.send_query(strGetParams, config)
         except Diagnostic as diag:
-            return fatal_response(Operation.searchRetrieve, version, diagnostics + [diag], request, templates)
-        records, nHits, diagnostics = app.rp_tsakorpus.parse(res, config, searchOptions['x-fcs-dataviews'])
+            return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates)
+        records, nHits, diagnostics = app.rp_tsakorpus.parse(res, config, searchOptions)
+        if any(diag.is_fatal() for diag in diagnostics):
+            return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates)
         records = [r.as_dict() for r in records]
         diagnostics = [str(d) for d in diagnostics]
         return templates.TemplateResponse('search_retrieve_response.xml',
@@ -118,20 +241,22 @@ def process_search_retrieve(version: SRUVersion,
                                               'request': request,
                                               'n_hits': nHits,
                                               'records': records,
+                                              'version': templateVersion,
                                               'diagnostics': diagnostics
                                           },
                                           media_type='application/xml')
     elif config.platform == CorpPlatform.litterae:
         try:
             if queryType == QueryType.cql:
-                strGetParams = app.qp_litterae.translate_simple(query, config)
+                strGetParams = app.qp_litterae.translate_simple(query, config, searchOptions)
             else:
-                strGetParams = app.qp_litterae.translate_simple(query, config)
+                # No advanced search for Litterae
+                strGetParams = app.qp_litterae.translate_simple(query, config, searchOptions)
             # print(strGetParams)
             res = app.qp_litterae.send_query(strGetParams, config)
             print(res)
         except Diagnostic as diag:
-            return fatal_response(Operation.searchRetrieve, version, diagnostics + [diag], request, templates)
+            return fatal_response(Operation.searchRetrieve, version, config, diagnostics + [diag], request, templates)
         for dv in searchOptions['x-fcs-dataviews'].split(','):
             dv = dv.strip()
             if dv != 'hits' and version == SRUVersion.v2_0:
@@ -139,8 +264,10 @@ def process_search_retrieve(version: SRUVersion,
                 # are available as a data view.
                 # If SRU 1.2 is used, such a diagnostic has already been added
                 # at a previous step.
-                diagnostics.append(Diagnostic(DiagnosticTypes.fcs, 4, details=dv))
-        records, nHits, diagnostics = app.rp_litterae.parse(res, config, searchOptions['x-fcs-dataviews'])
+                diagnostics.append(Diagnostic(DiagnosticTypes.fcs, 4, details=dv, version=version))
+        records, nHits, diagnostics = app.rp_litterae.parse(res, config, searchOptions)
+        if any (diag.is_fatal() for diag in diagnostics):
+            return fatal_response(Operation.searchRetrieve, version, config, diagnostics, request, templates)
         records = [r.as_dict() for r in records]
         diagnostics = [str(d) for d in diagnostics]
         return templates.TemplateResponse('search_retrieve_response.xml',
@@ -148,6 +275,7 @@ def process_search_retrieve(version: SRUVersion,
                                               'request': request,
                                               'n_hits': nHits,
                                               'records': records,
+                                              'version': templateVersion,
                                               'diagnostics': diagnostics
                                           },
                                           media_type='application/xml')
@@ -172,7 +300,7 @@ def process_request(operation: Operation,
     # If something is clearly wrong with the query, return
     # a response with the list of diagnostics
     if config is None or any(d.is_fatal() for d in diagnostics):
-        return fatal_response(operation, version, diagnostics, request, templates)
+        return fatal_response(operation, version, config, diagnostics, request, templates)
 
     # If everything looks good, proceed to query parsing
     if operation == Operation.searchRetrieve:
@@ -183,8 +311,8 @@ def process_request(operation: Operation,
     # We should not end up here, but if we did, something went wrong and
     # no fatal diagnostic describes the problem. Add a generic fatal diagnostic
     # and return a fatal response.
-    diagnostics.append(Diagnostic(DiagnosticTypes.sru, 1))
-    return fatal_response(operation, version, diagnostics, request, templates)
+    diagnostics.append(Diagnostic(DiagnosticTypes.sru, 1, version=version))
+    return fatal_response(operation, version, config, diagnostics, request, templates)
 
 
 if __name__ == '__main__':
diff --git a/main.py b/main.py
index 53717de..ebcfdd4 100644
--- a/main.py
+++ b/main.py
@@ -13,11 +13,9 @@ from common.enums import *
 from common.diagnostics import Diagnostic
 from common.config import ResourceConfig, read_configs
 from common.views_logic import *
-import json
-import os
-import re
-import copy
 import uvicorn
+from a2wsgi import ASGIMiddleware
+from datetime import datetime
 
 app = FastAPI()
 app.mount('/static', StaticFiles(directory='static'), name='static')
@@ -31,6 +29,12 @@ app.qp_tsakorpus = TsakorpusQueryParser()
 app.rp_tsakorpus = TsakorpusResponseParser()
 app.qp_annis = AnnisQueryParser()
 app.configs = read_configs()
+app.logging = True
+
+# The following line is needed in case you want to deploy the endpoint
+# under Apache2 with WSGI. Apache's mod_wsgi will import the variable
+# named 'application' from this file.
+application = ASGIMiddleware(app)
 
 
 @app.get('/')
@@ -42,10 +46,19 @@ def root():
 def endpoint(
         request: Request,
         corpusID: str,
-        operation: Operation = Operation.explain,
-        version: SRUVersion = SRUVersion.v2_0,
-        queryType: QueryType = QueryType.cql,
+        operation: str = '',
+        version: str = '2.0',
+        queryType: str = 'cql',
         query: str = '',
+        startRecord: str = '1',
+        maximumRecords: str = '999999',
+        recordPacking: str = '',
+        recordXMLEscaping: str = '',
+        recordSchema: str = '',
+        resultSetTTL: str = '999999',
+        stylesheet: str = '',
+        extraRequestData: str = '',
+        httpAccept: str = 'application/sru+xml',
         xFcsEndpointDescription: str = Query(
             default='',
             alias='x-fcs-endpoint-description'
@@ -63,20 +76,51 @@ def endpoint(
             alias='x-fcs-rewrites-allowed'
         )
         ):
+    """
+    Process incoming HTTP requests. Return an XML response.
+    Main parameters are defined here: https://www.loc.gov/standards/sru/sru-1-2.html (SRU 1.2)
+    and here http://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/part3-sru2.0/searchRetrieve-v1.0-os-part3-sru2.0.html (SRU 2.0).
+    Only a part of them is actually taken into account.
+    Extra parameters (starting with x-) are defined in the FCS specifications.
+    """
     searchOptions = {
+        'startRecord': startRecord,
+        'maximumRecords': maximumRecords,
+        'recordPacking': recordPacking,
+        'recordXMLEscaping': recordXMLEscaping,
+        'recordSchema': recordSchema,
+        'resultSetTTL': resultSetTTL,
+        'stylesheet': stylesheet,
+        'extraRequestData': extraRequestData,
+        'httpAccept': httpAccept,
         'x-fcs-endpoint-description': xFcsEndpointDescription,
         'x-fcs-context': xFcsContext,
         'x-fcs-dataviews': xFcsDataviews,
         'x-fcs-rewrites-allowed': xFcsRewritesAllowed
     }
 
+    if app.logging:
+        msg = str(datetime.now()) + '\t' + str(request.query_params) + '\n'
+        with open('query_log.txt', 'a', encoding='utf-8') as fLog:
+            fLog.write(msg)
+
+    # Validate values of operation, version, queryType and some optional parameters
+    operation, version, queryType, searchOptions, failDiagnoctics = initial_validation(operation, version, queryType,
+                                                                                       searchOptions, query)
+
     # Check if the corpus ID is correct
     if corpusID not in app.configs:
         message = 'No corpus with this ID (' + corpusID +') is served by this Endpoint. ' \
                   'Valid corpus IDs are: ' + '; '.join(cID for cID in sorted(app.configs)) + '.'
-        diag = Diagnostic(DiagnosticTypes.sru, 235, message=message)  # "Database does not exist"
-        return process_request(operation, version, queryType, query, searchOptions, None, [diag], app, request, templates)
-    config = app.configs[corpusID]
+        failDiagnoctics.append(Diagnostic(DiagnosticTypes.sru, 235, message=message, version=version))  # "Database does not exist"
+        config = None
+    else:
+        config = app.configs[corpusID]
+
+    if len(failDiagnoctics) > 0:
+        # This is as far as we can get with bad parameter values
+        return process_request(operation, version, queryType, query, searchOptions, config, failDiagnoctics, app, request,
+                               templates)
 
     # Check for common problems with parameter values
     diagnostics = app.qp.validate_query(operation, version, queryType, query,
diff --git a/notes.txt b/notes.txt
index 0974489..c5bebb2 100644
--- a/notes.txt
+++ b/notes.txt
@@ -20,6 +20,8 @@ p. 12 and elsewhere: http://explain.z3950.org/dtd/2.0/ mentioned as the URL of t
 
 p. 12-13, example: what is "result-id" in SupportedLayer? What do I put as the text of the SupportedLayer element?
 
+p. 13: It says 'scan' operation is not used for now, but CLARIN endpoint tester has tests that send scan requests. Why?
+
 p. 14: x-cmd-resource-info parameter present in the query example, but never explained (mentioned in some 2013 slides on FCS; should now probably be x-fcs-endpoint-description)
 
 p. 14: What is the sruResponse prefix? It is never mentioned. No namespace is provided here, so the example is actually invalid XML. (Who doesn't validate their examples before showing them to a wide audience -- in an official specification, no less??) Maybe sru was actually meant (see p. 2 and an SRU v. 1.2 example above)? I'm putting sru there for now.
diff --git a/requirements.txt b/requirements.txt
index 3e16ef4..8886832 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ fastapi>=0.88.0
 uvicorn>=0.20.0
 lxml
 Jinja2>=3.0.3
-requests
\ No newline at end of file
+requests
+a2wsgi
\ No newline at end of file
diff --git a/static/diagnostic.xml b/static/diagnostic.xml
index 7a157a9..0292e4c 100644
--- a/static/diagnostic.xml
+++ b/static/diagnostic.xml
@@ -1,5 +1,6 @@
 {# Per FCS specifications (section 1.5), SRU 2.0 diagnostics must use this namespace and prefixed namespace syntax #}
-<diag:diagnostic xmlns:diag="http://docs.oasis-open.org/ns/search-ws/diagnostic">
+{% if version == 1 -%}{% set sru_version = '1.2' %}{% else -%}{% set sru_version = '2.0' %}{% endif -%}
+<diag:diagnostic xmlns:diag="{% if sru_version == '2.0' %}http://docs.oasis-open.org/ns/search-ws/diagnostic{% else %}http://www.loc.gov/zing/srw/diagnostic/{% endif %}">
     <diag:uri>{{ uri }}</diag:uri>{% if details|length > 0 %}
     <diag:details>{{ details }}</diag:details>{% endif %}{% if message|length > 0 %}
     <diag:message>{{ message }}</diag:message>{% endif %}
diff --git a/static/endpoint_description.xml b/static/endpoint_description.xml
index 0f5c3c3..a695229 100644
--- a/static/endpoint_description.xml
+++ b/static/endpoint_description.xml
@@ -1,5 +1,5 @@
     <sru:extraResponseData>
-        <ed:EndpointDescription xmlns:ed="http://clarin.eu/fcs/endpoint-description" version="{{ sru_version }}">
+        <ed:EndpointDescription xmlns:ed="http://clarin.eu/fcs/endpoint-description" version="1.0">
             <ed:Capabilities>{% if config.basic_search_capability %}
                 <ed:Capability>http://clarin.eu/fcs/capability/basic-search</ed:Capability>{% endif %}{% if config.advanced_search_capability and version >= 2 %}
                 <ed:Capability>http://clarin.eu/fcs/capability/advanced-search</ed:Capability>{% endif %}
diff --git a/static/explain_response.xml b/static/explain_response.xml
index 3941c8a..69356e0 100644
--- a/static/explain_response.xml
+++ b/static/explain_response.xml
@@ -1,6 +1,6 @@
 {% if version == 1 -%}{% set sru_version = '1.2' %}{% else -%}{% set sru_version = '2.0' %}{% endif -%}
 <?xml version='1.0' encoding='utf-8'?>
-<sru:explainResponse xmlns:sru="http://www.loc.gov/zing/srw/">
+<sru:explainResponse xmlns:sru="{% if sru_version == '2.0' %}http://docs.oasis-open.org/ns/search-ws/sruResponse{% else %}http://www.loc.gov/zing/srw/{% endif %}">
     <sru:version>{{ sru_version }}</sru:version>{% if config %}
     <sru:record>
         <sru:recordSchema>http://explain.z3950.org/dtd/2.0/</sru:recordSchema>
diff --git a/static/search_retrieve_response.xml b/static/search_retrieve_response.xml
index 0e73071..544e999 100644
--- a/static/search_retrieve_response.xml
+++ b/static/search_retrieve_response.xml
@@ -1,6 +1,7 @@
 <?xml version='1.0' encoding='utf-8'?>
-<sru:searchRetrieveResponse xmlns:sru="http://docs.oasis-open.org/ns/search-ws/sruResponse">
-	<sru:version>2.0</sru:version>
+{% if version == 1 -%}{% set sru_version = '1.2' %}{% else -%}{% set sru_version = '2.0' %}{% endif -%}
+<sru:searchRetrieveResponse xmlns:sru="{% if sru_version == '2.0' %}http://docs.oasis-open.org/ns/search-ws/sruResponse{% else %}http://www.loc.gov/zing/srw/{% endif %}">
+	<sru:version>{{ sru_version }}</sru:version>
 	<sru:numberOfRecords>{{ n_hits }}</sru:numberOfRecords>{% if records %}
 	<sru:records>{% for record in records %}
 		<sru:record>
-- 
GitLab