Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
fcs-clarin-endpoint-hamburg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Arkhangelskiy, Timofey
fcs-clarin-endpoint-hamburg
Commits
0f505c26
Commit
0f505c26
authored
2 years ago
by
Arkhangelskiy, Timofey
Browse files
Options
Downloads
Patches
Plain Diff
Check x-fcs-dataviews
parent
0752838a
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
common/search_retrieve.py
+4
-7
4 additions, 7 deletions
common/search_retrieve.py
common/tsakorpus_response_parser.py
+24
-15
24 additions, 15 deletions
common/tsakorpus_response_parser.py
main.py
+1
-1
1 addition, 1 deletion
main.py
with
29 additions
and
23 deletions
common/search_retrieve.py
+
4
−
7
View file @
0f505c26
...
...
@@ -11,8 +11,8 @@ class Record:
multiple hits. Here, each record contains exactly one resource
with exactly one fragment with exactly one hit.
"""
def
__init__
(
self
,
dataView
:
DataView
):
self
.
dataView
=
dataView
def
__init__
(
self
,
advancedHits
:
bool
=
False
):
self
.
advancedHits
=
advancedHits
# For simple search:
self
.
text
=
''
self
.
textNoHighlight
=
''
# no <hits:Hit> elements, just text
...
...
@@ -34,14 +34,11 @@ class Record:
'
dv_hits
'
:
[{
'
text
'
:
self
.
text
}],
'
dv_adv
'
:
[{
'
segments
'
:
self
.
segments
,
'
layers
'
:
self
.
layers
}]
'
dv_adv
'
:
[]
}]
}]
}
if
self
.
dataView
==
DataView
.
adv
:
if
self
.
advancedHits
:
record
[
'
resources
'
][
0
][
'
resource_fragments
'
][
0
][
'
dv_adv
'
].
append
({
'
segments
'
:
self
.
segments
,
'
layers
'
:
self
.
layers
...
...
This diff is collapsed.
Click to expand it.
common/tsakorpus_response_parser.py
+
24
−
15
View file @
0f505c26
...
...
@@ -77,7 +77,7 @@ class TsakorpusResponseParser:
})
def
parse_span
(
self
,
el
,
record
):
def
parse_span
(
self
,
el
,
record
,
advancedHits
=
False
):
"""
Parse one <span> element from the HTML representation
of one hit returned by a Tsakorpus instance. Add the extracted
...
...
@@ -95,16 +95,17 @@ class TsakorpusResponseParser:
if
'
class
'
in
el
.
attrib
and
re
.
search
(
'
\\
bword
\\
b
'
,
el
.
attrib
[
'
class
'
])
is
not
None
:
if
re
.
search
(
'
\\
bwmatch
\\
b
'
,
el
.
attrib
[
'
class
'
])
is
not
None
:
bMatch
=
True
segID
=
'
s
'
+
str
(
len
(
record
.
segments
))
segment
=
{
'
id
'
:
segID
,
'
start
'
:
len
(
record
.
textNoHighlight
)
+
1
,
'
end
'
:
len
(
record
.
textNoHighlight
)
+
len
(
el
.
text
)
}
record
.
segments
.
append
(
segment
)
record
.
textNoHighlight
+=
el
.
text
if
'
data-ana
'
in
el
.
attrib
:
self
.
parse_annotation
(
el
.
attrib
[
'
data-ana
'
],
segID
,
record
)
if
advancedHits
:
segID
=
'
s
'
+
str
(
len
(
record
.
segments
))
segment
=
{
'
id
'
:
segID
,
'
start
'
:
len
(
record
.
textNoHighlight
)
+
1
,
'
end
'
:
len
(
record
.
textNoHighlight
)
+
len
(
el
.
text
)
}
record
.
segments
.
append
(
segment
)
if
'
data-ana
'
in
el
.
attrib
:
self
.
parse_annotation
(
el
.
attrib
[
'
data-ana
'
],
segID
,
record
)
if
bMatch
:
record
.
text
+=
'
<hits:Hit>
'
+
el
.
text
+
'
</hits:Hit>
'
else
:
...
...
@@ -114,11 +115,11 @@ class TsakorpusResponseParser:
record
.
textNoHighlight
+=
el
.
tail
def
parse_context
(
self
,
hit
,
config
:
ResourceConfig
,
lang
=
''
):
def
parse_context
(
self
,
hit
,
config
:
ResourceConfig
,
lang
=
''
,
advancedHits
=
False
):
"""
Parse one hit. Return it as a Record object.
"""
record
=
Record
(
dataView
=
DataView
.
h
its
)
record
=
Record
(
advancedHits
=
advancedH
its
)
if
len
(
lang
)
<=
0
:
lang
=
config
.
search_lang_id
if
(
'
languages
'
not
in
hit
...
...
@@ -130,17 +131,25 @@ class TsakorpusResponseParser:
content
=
fragment_fromstring
(
contentTxt
,
create_parent
=
'
div
'
)
for
el
in
content
:
self
.
parse_span
(
el
,
record
)
self
.
parse_span
(
el
,
record
,
advancedHits
)
return
record
def
parse
(
self
,
response
,
config
:
ResourceConfig
,
lang
=
''
):
def
parse
(
self
,
response
,
config
:
ResourceConfig
,
xFcsDataviews
,
lang
=
''
):
"""
Read a dictionary with the first N hits returned by a Tsakorpus
instance. Return a list of Record objects and the total number of
records found.
"""
self
.
pc
=
POSConvertor
(
config
)
diagnostics
=
[]
advancedHits
=
False
dataViewsRequested
=
{
v
.
strip
()
for
v
in
xFcsDataviews
.
split
(
'
,
'
)
if
len
(
v
.
strip
())
>
0
}
if
'
adv
'
in
dataViewsRequested
:
advancedHits
=
True
for
v
in
dataViewsRequested
:
if
v
not
in
(
'
hits
'
,
'
adv
'
):
diagnostics
.
append
(
Diagnostic
(
DiagnosticTypes
.
fcs
,
4
,
details
=
v
))
nRecords
=
0
if
'
n_sentences
'
in
response
:
nRecords
=
response
[
'
n_sentences
'
]
...
...
@@ -148,7 +157,7 @@ class TsakorpusResponseParser:
return
[],
nRecords
records
=
[]
for
context
in
response
[
'
contexts
'
]:
records
.
append
(
self
.
parse_context
(
context
,
config
,
lang
))
records
.
append
(
self
.
parse_context
(
context
,
config
,
lang
,
advancedHits
))
return
records
,
nRecords
...
...
This diff is collapsed.
Click to expand it.
main.py
+
1
−
1
View file @
0f505c26
...
...
@@ -76,7 +76,7 @@ def endpoint(
except
Diagnostic
as
diag
:
print
(
'
diag
'
,
str
(
diag
))
return
Response
(
content
=
str
(
diag
),
media_type
=
'
application/xml
'
)
records
,
nHits
=
app
.
rp_tsakorpus
.
parse
(
res
,
config
)
records
,
nHits
=
app
.
rp_tsakorpus
.
parse
(
res
,
config
,
xFcsDataviews
)
records
=
[
r
.
as_dict
()
for
r
in
records
]
return
templates
.
TemplateResponse
(
'
search_retrieve_response.xml
'
,
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment