Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
slide-index
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Welter, Felix
slide-index
Commits
8f440b45
Commit
8f440b45
authored
4 years ago
by
felixwelter
Browse files
Options
Downloads
Patches
Plain Diff
Add further context processing to TitleFocusSearchIndex
parent
fcec07d2
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
Dockerfile
+2
-2
2 additions, 2 deletions
Dockerfile
search_index/german_stopwords.py
+234
-0
234 additions, 0 deletions
search_index/german_stopwords.py
search_index/title_focus_search_index.py
+27
-0
27 additions, 0 deletions
search_index/title_focus_search_index.py
with
263 additions
and
2 deletions
Dockerfile
+
2
−
2
View file @
8f440b45
FROM
python:3.8
RUN
pip
install
pdfplumber Whoosh Flask nltk Deprecated
RUN
python
-m
nltk.downloader punkt
RUN
pip
install
pdfplumber Whoosh Flask nltk Deprecated
pandas
RUN
python
-m
nltk.downloader punkt
&&
python
-m
nltk.downloader stopwords
RUN
apt-get update
RUN
apt-get
install
-y
libmagickwand-dev ghostscript
RUN
rm
/etc/ImageMagick-6/policy.xml
...
...
This diff is collapsed.
Click to expand it.
search_index/german_stopwords.py
0 → 100644
+
234
−
0
View file @
8f440b45
stopwords
=
[
"
aber
"
,
"
alle
"
,
"
allem
"
,
"
allen
"
,
"
aller
"
,
"
alles
"
,
"
als
"
,
"
also
"
,
"
am
"
,
"
an
"
,
"
ander
"
,
"
andere
"
,
"
anderem
"
,
"
anderen
"
,
"
anderer
"
,
"
anderes
"
,
"
anderm
"
,
"
andern
"
,
"
anderr
"
,
"
anders
"
,
"
auch
"
,
"
auf
"
,
"
aus
"
,
"
bei
"
,
"
bin
"
,
"
bis
"
,
"
bist
"
,
"
da
"
,
"
damit
"
,
"
dann
"
,
"
der
"
,
"
den
"
,
"
des
"
,
"
dem
"
,
"
die
"
,
"
das
"
,
"
daß
"
,
"
derselbe
"
,
"
derselben
"
,
"
denselben
"
,
"
desselben
"
,
"
demselben
"
,
"
dieselbe
"
,
"
dieselben
"
,
"
dasselbe
"
,
"
dazu
"
,
"
dein
"
,
"
deine
"
,
"
deinem
"
,
"
deinen
"
,
"
deiner
"
,
"
deines
"
,
"
denn
"
,
"
derer
"
,
"
dessen
"
,
"
dich
"
,
"
dir
"
,
"
du
"
,
"
dies
"
,
"
diese
"
,
"
diesem
"
,
"
diesen
"
,
"
dieser
"
,
"
dieses
"
,
""
,
"
doch
"
,
"
dort
"
,
""
,
"
durch
"
,
"
ein
"
,
"
eine
"
,
"
einem
"
,
"
einen
"
,
"
einer
"
,
"
eines
"
,
"
einig
"
,
"
einige
"
,
"
einigem
"
,
"
einigen
"
,
"
einiger
"
,
"
einiges
"
,
"
einmal
"
,
"
er
"
,
"
ihn
"
,
"
ihm
"
,
"
es
"
,
"
etwas
"
,
"
euer
"
,
"
eure
"
,
"
eurem
"
,
"
euren
"
,
"
eurer
"
,
"
eures
"
,
"
für
"
,
"
gegen
"
,
"
gewesen
"
,
"
hab
"
,
"
habe
"
,
"
haben
"
,
"
hat
"
,
"
hatte
"
,
"
hatten
"
,
"
hier
"
,
"
hin
"
,
"
hinter
"
,
"
ich
"
,
"
mich
"
,
"
mir
"
,
""
,
"
ihr
"
,
"
ihre
"
,
"
ihrem
"
,
"
ihren
"
,
"
ihrer
"
,
"
ihres
"
,
"
euch
"
,
"
im
"
,
"
in
"
,
"
indem
"
,
"
ins
"
,
"
ist
"
,
"
jede
"
,
"
jedem
"
,
"
jeden
"
,
"
jeder
"
,
"
jedes
"
,
"
jene
"
,
"
jenem
"
,
"
jenen
"
,
"
jener
"
,
"
jenes
"
,
"
jetzt
"
,
"
kann
"
,
"
kein
"
,
"
keine
"
,
"
keinem
"
,
"
keinen
"
,
"
keiner
"
,
"
keines
"
,
"
können
"
,
"
könnte
"
,
"
machen
"
,
"
man
"
,
"
manche
"
,
"
manchem
"
,
"
manchen
"
,
"
mancher
"
,
"
manches
"
,
"
mein
"
,
"
meine
"
,
"
meinem
"
,
"
meinen
"
,
"
meiner
"
,
"
meines
"
,
"
mit
"
,
"
muss
"
,
"
musste
"
,
"
nach
"
,
"
nicht
"
,
"
nichts
"
,
"
noch
"
,
"
nun
"
,
"
nur
"
,
"
ob
"
,
"
oder
"
,
"
ohne
"
,
"
sehr
"
,
"
sein
"
,
"
seine
"
,
"
seinem
"
,
"
seinen
"
,
"
seiner
"
,
"
seines
"
,
"
selbst
"
,
"
sich
"
,
"
sie
"
,
"
ihnen
"
,
"
sind
"
,
"
so
"
,
"
solche
"
,
"
solchem
"
,
"
solchen
"
,
"
solcher
"
,
"
solches
"
,
"
soll
"
,
"
sollte
"
,
"
sondern
"
,
"
sonst
"
,
"
über
"
,
"
um
"
,
"
und
"
,
"
uns
"
,
"
unse
"
,
"
unsem
"
,
"
unsen
"
,
"
unser
"
,
"
unses
"
,
"
unter
"
,
"
viel
"
,
"
vom
"
,
"
von
"
,
"
vor
"
,
"
während
"
,
"
war
"
,
"
waren
"
,
"
warst
"
,
"
was
"
,
"
weg
"
,
"
weil
"
,
"
weiter
"
,
"
welche
"
,
"
welchem
"
,
"
welchen
"
,
"
welcher
"
,
"
welches
"
,
"
wenn
"
,
"
werde
"
,
"
werden
"
,
"
wie
"
,
"
wieder
"
,
"
will
"
,
"
wir
"
,
"
wird
"
,
"
wirst
"
,
"
wo
"
,
"
wollen
"
,
"
wollte
"
,
"
würde
"
,
"
würden
"
,
"
zu
"
,
"
zum
"
,
"
zur
"
,
"
zwar
"
,
"
zwischen
"
]
This diff is collapsed.
Click to expand it.
search_index/title_focus_search_index.py
+
27
−
0
View file @
8f440b45
from
whoosh.qparser
import
QueryParser
,
OrGroup
from
nltk.corpus
import
stopwords
from
.basic_search_index
import
BasicSearchIndex
from
.german_stopwords
import
stopwords
as
german_stopwords
def
clean_context
(
term
,
context
):
context
=
context
.
split
(
"
"
)
# Remove words from context that are already in the term
for
token
in
term
.
split
(
"
"
):
if
token
.
strip
()
!=
""
:
try
:
context
.
remove
(
token
)
except
ValueError
:
pass
# Remove stop words
for
lang
in
[
"
english
"
,
"
german
"
]:
for
stopword
in
stopwords
.
words
(
lang
):
try
:
context
.
remove
(
stopword
)
except
ValueError
:
pass
# for stopword in german_stopwords:
# context = context.replace(stopword, "")
return
"
"
.
join
(
context
)
class
TitleFocusSearchIndex
(
BasicSearchIndex
):
...
...
@@ -8,6 +34,7 @@ class TitleFocusSearchIndex(BasicSearchIndex):
def
result_list
(
self
,
query
,
context
):
user_query
=
query
context
=
clean_context
(
user_query
,
context
)
query
=
"
title:({})^2
"
.
format
(
user_query
)
query
+=
"
content:({})^1.2
"
.
format
(
user_query
)
if
len
(
context
.
strip
())
>
0
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment