Skip to content
Snippets Groups Projects
Commit 0f505c26 authored by Arkhangelskiy, Timofey's avatar Arkhangelskiy, Timofey
Browse files

Check x-fcs-dataviews

parent 0752838a
Branches
No related tags found
No related merge requests found
......@@ -11,8 +11,8 @@ class Record:
multiple hits. Here, each record contains exactly one resource
with exactly one fragment with exactly one hit.
"""
def __init__(self, dataView: DataView):
self.dataView = dataView
def __init__(self, advancedHits: bool = False):
self.advancedHits = advancedHits
# For simple search:
self.text = ''
self.textNoHighlight = '' # no <hits:Hit> elements, just text
......@@ -34,14 +34,11 @@ class Record:
'dv_hits': [{
'text': self.text
}],
'dv_adv': [{
'segments': self.segments,
'layers': self.layers
}]
'dv_adv': []
}]
}]
}
if self.dataView == DataView.adv:
if self.advancedHits:
record['resources'][0]['resource_fragments'][0]['dv_adv'].append({
'segments': self.segments,
'layers': self.layers
......
......@@ -77,7 +77,7 @@ class TsakorpusResponseParser:
})
def parse_span(self, el, record):
def parse_span(self, el, record, advancedHits=False):
"""
Parse one <span> element from the HTML representation
of one hit returned by a Tsakorpus instance. Add the extracted
......@@ -95,16 +95,17 @@ class TsakorpusResponseParser:
if 'class' in el.attrib and re.search('\\bword\\b', el.attrib['class']) is not None:
if re.search('\\bwmatch\\b', el.attrib['class']) is not None:
bMatch = True
segID = 's' + str(len(record.segments))
segment = {
'id': segID,
'start': len(record.textNoHighlight) + 1,
'end': len(record.textNoHighlight) + len(el.text)
}
record.segments.append(segment)
record.textNoHighlight += el.text
if 'data-ana' in el.attrib:
self.parse_annotation(el.attrib['data-ana'], segID, record)
if advancedHits:
segID = 's' + str(len(record.segments))
segment = {
'id': segID,
'start': len(record.textNoHighlight) + 1,
'end': len(record.textNoHighlight) + len(el.text)
}
record.segments.append(segment)
if 'data-ana' in el.attrib:
self.parse_annotation(el.attrib['data-ana'], segID, record)
if bMatch:
record.text += '<hits:Hit>' + el.text + '</hits:Hit>'
else:
......@@ -114,11 +115,11 @@ class TsakorpusResponseParser:
record.textNoHighlight += el.tail
def parse_context(self, hit, config: ResourceConfig, lang=''):
def parse_context(self, hit, config: ResourceConfig, lang='', advancedHits=False):
"""
Parse one hit. Return it as a Record object.
"""
record = Record(dataView=DataView.hits)
record = Record(advancedHits=advancedHits)
if len(lang) <= 0:
lang = config.search_lang_id
if ('languages' not in hit
......@@ -130,17 +131,25 @@ class TsakorpusResponseParser:
content = fragment_fromstring(contentTxt,
create_parent='div')
for el in content:
self.parse_span(el, record)
self.parse_span(el, record, advancedHits)
return record
def parse(self, response, config: ResourceConfig, lang=''):
def parse(self, response, config: ResourceConfig, xFcsDataviews, lang=''):
"""
Read a dictionary with the first N hits returned by a Tsakorpus
instance. Return a list of Record objects and the total number of
records found.
"""
self.pc = POSConvertor(config)
diagnostics = []
advancedHits = False
dataViewsRequested = {v.strip() for v in xFcsDataviews.split(',') if len(v.strip()) > 0}
if 'adv' in dataViewsRequested:
advancedHits = True
for v in dataViewsRequested:
if v not in ('hits', 'adv'):
diagnostics.append(Diagnostic(DiagnosticTypes.fcs, 4, details=v))
nRecords = 0
if 'n_sentences' in response:
nRecords = response['n_sentences']
......@@ -148,7 +157,7 @@ class TsakorpusResponseParser:
return [], nRecords
records = []
for context in response['contexts']:
records.append(self.parse_context(context, config, lang))
records.append(self.parse_context(context, config, lang, advancedHits))
return records, nRecords
......
......@@ -76,7 +76,7 @@ def endpoint(
except Diagnostic as diag:
print('diag', str(diag))
return Response(content=str(diag), media_type='application/xml')
records, nHits = app.rp_tsakorpus.parse(res, config)
records, nHits = app.rp_tsakorpus.parse(res, config, xFcsDataviews)
records = [r.as_dict() for r in records]
return templates.TemplateResponse('search_retrieve_response.xml',
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment