From e46e1c8029c77939c5c4028ab65a947085ab503c Mon Sep 17 00:00:00 2001 From: felixwelter <felixwelter@gmail.com> Date: Mon, 12 Oct 2020 14:11:18 +0200 Subject: [PATCH] Add option to give title row --- app.py | 10 +++++++++- slide_indexer/basic_indexer.py | 1 + templates/index.html | 6 +++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 27d1ebd..6e2c91e 100644 --- a/app.py +++ b/app.py @@ -25,6 +25,13 @@ def index(): return render_template('index.html', ll=ll) +@app.route('/current_index') +def current_index(): + index = Index(index_dir=INDEX_DIR) + return "<br>".join( + ["{} {} {}".format(res["file_name"], res["page"], res["title"]) for res in index.search("*", "")]) + + def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ["pdf"] @@ -39,7 +46,8 @@ def upload(): files = request.files.getlist('files') index = Index(index_dir=INDEX_DIR) indexer = BasicIndexer(index, IMAGE_DIR) - indexer.title_row = 2 # TODO: Add mechanism for automatic detection of user configuration + if request.form['title_row']: + indexer.title_row = int(request.form['title_row']) for i, file in enumerate(files): if file.filename != '': if file and allowed_file(file.filename): diff --git a/slide_indexer/basic_indexer.py b/slide_indexer/basic_indexer.py index e10a942..5ad2b99 100644 --- a/slide_indexer/basic_indexer.py +++ b/slide_indexer/basic_indexer.py @@ -20,6 +20,7 @@ class BasicIndexer(): pdf_file_name = pdf_file_path.split(os.sep)[-1] for i, page in enumerate(pdf.pages): text = page.extract_text() + print(self.title_row) self.index.add(pdf_file_name, i + 1, text, text.split("\n")[self.title_row]) if self.process_images: img_name = pdf_file_name + "_" + str(i + 1) + ".jpg" diff --git a/templates/index.html b/templates/index.html index f7062b2..08475f1 100644 --- a/templates/index.html +++ b/templates/index.html @@ -6,7 +6,8 @@ <h1>Upload new slide</h1> <form action="upload" enctype="multipart/form-data" method="post"> <input type="file" name="files" multiple=""> - <input type="submit" value="Upload"> + <input type="submit" value="Upload"><br> + Title row: <input type="number" name="title_row"> </form> <ul> {% for item in ll %} @@ -17,6 +18,9 @@ <form action="reset_index" method="post"> <input type="submit" value="Reset Index"> </form> +<p> + <a href="current_index">View example of extracted titles</a> +</p> <h1>Query</h1> <form action="search" method="post"> -- GitLab