Skip to content
Snippets Groups Projects
Commit e46e1c80 authored by felixwelter's avatar felixwelter
Browse files

Add option to give title row

parent 1310e543
Branches
Tags
No related merge requests found
......@@ -25,6 +25,13 @@ def index():
return render_template('index.html', ll=ll)
@app.route('/current_index')
def current_index():
index = Index(index_dir=INDEX_DIR)
return "<br>".join(
["{} {} {}".format(res["file_name"], res["page"], res["title"]) for res in index.search("*", "")])
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ["pdf"]
......@@ -39,7 +46,8 @@ def upload():
files = request.files.getlist('files')
index = Index(index_dir=INDEX_DIR)
indexer = BasicIndexer(index, IMAGE_DIR)
indexer.title_row = 2 # TODO: Add mechanism for automatic detection of user configuration
if request.form['title_row']:
indexer.title_row = int(request.form['title_row'])
for i, file in enumerate(files):
if file.filename != '':
if file and allowed_file(file.filename):
......
......@@ -20,6 +20,7 @@ class BasicIndexer():
pdf_file_name = pdf_file_path.split(os.sep)[-1]
for i, page in enumerate(pdf.pages):
text = page.extract_text()
print(self.title_row)
self.index.add(pdf_file_name, i + 1, text, text.split("\n")[self.title_row])
if self.process_images:
img_name = pdf_file_name + "_" + str(i + 1) + ".jpg"
......
......@@ -6,7 +6,8 @@
<h1>Upload new slide</h1>
<form action="upload" enctype="multipart/form-data" method="post">
<input type="file" name="files" multiple="">
<input type="submit" value="Upload">
<input type="submit" value="Upload"><br>
Title row: <input type="number" name="title_row">
</form>
<ul>
{% for item in ll %}
......@@ -17,6 +18,9 @@
<form action="reset_index" method="post">
<input type="submit" value="Reset Index">
</form>
<p>
<a href="current_index">View example of extracted titles</a>
</p>
<h1>Query</h1>
<form action="search" method="post">
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment