Skip to content
Snippets Groups Projects
Commit 7f1707db authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Tsakorpus searchRetrieve response (not complete yet)

parent 81840846
Branches
No related tags found
No related merge requests found
from urllib.parse import quote
import re
import json
import urllib.request
from lxml.html import fragment_fromstring
from .enums import *
from .config import ResourceConfig
from .search_retrieve import Record
from .diagnostics import Diagnostic, DiagnosticTypes
class TsakorpusResponseParser:
"""
Parses responses from a Tsakorpus instance.
"""
def __init__(self):
pass
def parse_context(self, hit, config: ResourceConfig, lang=''):
"""
Parse one hit. Return it as a Record object.
"""
record = Record(dataView=DataView.hits)
if len(lang) <= 0:
lang = config.search_lang_id
if ('languages' not in hit
or lang not in hit['languages']
or 'text' not in hit['languages'][lang]):
return record
content = fragment_fromstring(hit['languages'][lang]['text'],
create_parent='div')
text = ''
for el in content:
if el.tag == 'span' and 'class' in el.attrib and 'sentence_meta' in el.attrib['class']:
if el.tail is not None:
text += el.tail.strip('\n\t ')
continue
if el.text is not None:
if 'class' in el.attrib and re.search('\\bwmatch\\b', el.attrib['class']) is not None:
text += '<hits:Hit>' + el.text + '</hits:Hit>'
else:
text += el.text
if el.tail is not None:
text += el.tail
print(text)
record.text = text
return record
def parse(self, response, config: ResourceConfig, lang=''):
"""
Read a dictionary with the first N hits returned by a Tsakorpus
instance. Return a list of Record objects and the total number of
records found.
"""
nRecords = 0
if 'n_sentences' in response:
nRecords = response['n_sentences']
if nRecords <= 0 or 'contexts' not in response:
return [], nRecords
records = []
for context in response['contexts']:
records.append(self.parse_context(context, config, lang))
return records, nRecords
if __name__ == '__main__':
pass
......@@ -5,6 +5,7 @@ from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from common.query_parser import QueryParser
from common.tsakorpus_query_parser import TsakorpusQueryParser
from common.tsakorpus_response_parser import TsakorpusResponseParser
from common.enums import *
from common.diagnostics import Diagnostic
from common.config import ResourceConfig, read_configs
......@@ -19,6 +20,7 @@ templates = Jinja2Templates(directory='static')
app.qp = QueryParser()
app.qp_tsakorpus = TsakorpusQueryParser()
app.rp_tsakorpus = TsakorpusResponseParser()
app.configs = read_configs()
......@@ -29,6 +31,7 @@ def root():
@app.get('/fcs-endpoint/{corpusID}')
def endpoint(
request: Request,
corpusID: str,
operation: Operation = Operation.explain,
version: SRUVersion = SRUVersion.v2_0,
......@@ -73,7 +76,16 @@ def endpoint(
except Diagnostic as diag:
print('diag', str(diag))
return Response(content=str(diag), media_type='application/xml')
return str(res)
records, nHits = app.rp_tsakorpus.parse(res, config)
records = [r.as_dict() for r in records]
return templates.TemplateResponse('search_retrieve_response.xml',
{
'request': request,
'n_hits': nHits,
'records': records
})
# media_type='application/xml')
# return str(res)
return {'operation': operation, 'version': version}
......
......@@ -13,7 +13,7 @@
<sruResponse:recordPosition>{{ loop.index }}</sruResponse:recordPosition>
</sruResponse:record>
{% endfor %}
</sruResponse:records>{% if n_hits > record.resources|length %}
<sruResponse:nextRecordPosition>{{ record.resources|length + 1 }}</sruResponse:nextRecordPosition>{% endif %}
</sruResponse:records>{% if n_hits > records|length %}
<sruResponse:nextRecordPosition>{{ records|length + 1 }}</sruResponse:nextRecordPosition>{% endif %}
<sruResponse:resultCountPrecision>info:srw/vocabulary/resultCountPrecision/1/exact</sruResponse:resultCountPrecision>
</sruResponse:searchRetrieveResponse>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment