diff --git a/app.py b/app.py index 27d1ebd682a8f987f74323419838946cd222f013..6e2c91e8e0df1d9d0a7e894be044ca166bb9a789 100644 --- a/app.py +++ b/app.py @@ -25,6 +25,13 @@ def index(): return render_template('index.html', ll=ll) +@app.route('/current_index') +def current_index(): + index = Index(index_dir=INDEX_DIR) + return "<br>".join( + ["{} {} {}".format(res["file_name"], res["page"], res["title"]) for res in index.search("*", "")]) + + def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ["pdf"] @@ -39,7 +46,8 @@ def upload(): files = request.files.getlist('files') index = Index(index_dir=INDEX_DIR) indexer = BasicIndexer(index, IMAGE_DIR) - indexer.title_row = 2 # TODO: Add mechanism for automatic detection of user configuration + if request.form['title_row']: + indexer.title_row = int(request.form['title_row']) for i, file in enumerate(files): if file.filename != '': if file and allowed_file(file.filename): diff --git a/slide_indexer/basic_indexer.py b/slide_indexer/basic_indexer.py index e10a942733988539b05187d8c0f1cd5f084be4e7..5ad2b997378fa5138d8c5d5d1b512e5ab68f1583 100644 --- a/slide_indexer/basic_indexer.py +++ b/slide_indexer/basic_indexer.py @@ -20,6 +20,7 @@ class BasicIndexer(): pdf_file_name = pdf_file_path.split(os.sep)[-1] for i, page in enumerate(pdf.pages): text = page.extract_text() + print(self.title_row) self.index.add(pdf_file_name, i + 1, text, text.split("\n")[self.title_row]) if self.process_images: img_name = pdf_file_name + "_" + str(i + 1) + ".jpg" diff --git a/templates/index.html b/templates/index.html index f7062b25268a5500c7196e10be5e02e762f1b82c..08475f161f6e79b87bf00d8502b6f2d5cb0cea42 100644 --- a/templates/index.html +++ b/templates/index.html @@ -6,7 +6,8 @@ <h1>Upload new slide</h1> <form action="upload" enctype="multipart/form-data" method="post"> <input type="file" name="files" multiple=""> - <input type="submit" value="Upload"> + <input type="submit" value="Upload"><br> + Title row: <input type="number" name="title_row"> </form> <ul> {% for item in ll %} @@ -17,6 +18,9 @@ <form action="reset_index" method="post"> <input type="submit" value="Reset Index"> </form> +<p> + <a href="current_index">View example of extracted titles</a> +</p> <h1>Query</h1> <form action="search" method="post">