From 27fcf5eab648a0734d53d6a2db88060c6d150763 Mon Sep 17 00:00:00 2001
From: felixwelter <felixwelter@gmail.com>
Date: Fri, 4 Sep 2020 15:30:40 +0200
Subject: [PATCH] Add multi file upload and index purging
---
app.py | 48 ++++++++++++++++++++++++++++----------------
templates/index.html | 15 +++++++++-----
2 files changed, 41 insertions(+), 22 deletions(-)
diff --git a/app.py b/app.py
index 5137711..1d890ef 100644
--- a/app.py
+++ b/app.py
@@ -1,3 +1,4 @@
+import glob
import os
from pathlib import Path
@@ -11,6 +12,7 @@ app = Flask(__name__)
SLIDE_DIR = "slides"
IMAGE_DIR = "img_cache"
+INDEX_DIR = "index"
Index = TitleFocusSearchIndex
@@ -18,6 +20,7 @@ Index = TitleFocusSearchIndex
@app.route('/')
def index():
ll = os.listdir(SLIDE_DIR)
+ ll.sort()
return render_template('index.html', ll=ll)
@@ -27,29 +30,31 @@ def allowed_file(filename):
@app.route('/upload', methods=['POST'])
def upload():
- if 'file' in request.files:
- file = request.files['file']
- if file.filename != '':
- if file and allowed_file(file.filename):
- filename = secure_filename(file.filename)
- file_path = os.path.join(Path(SLIDE_DIR), filename)
- file.save(file_path)
- pdf = pdfplumber.open(file_path)
- index = Index()
- for i, page in enumerate(pdf.pages):
- text = page.extract_text()
- index.add(str(file_path), i, text, text.split("\n")[0]) # Assumes title in the first line
- img_name = str(file_path)[7:] + "_" + str(i) + ".jpg"
- img_path = os.path.join(IMAGE_DIR, img_name)
- page.to_image().save(img_path)
- del index
+ if 'files' in request.files:
+ files = request.files.getlist('files')
+ for i, file in enumerate(files):
+ print(i, file)
+ if file.filename != '':
+ if file and allowed_file(file.filename):
+ filename = secure_filename(file.filename)
+ file_path = os.path.join(Path(SLIDE_DIR), filename)
+ file.save(file_path)
+ pdf = pdfplumber.open(file_path)
+ index = Index(index_dir=INDEX_DIR)
+ for i, page in enumerate(pdf.pages):
+ text = page.extract_text()
+ index.add(str(file_path), i, text, text.split("\n")[0]) # Assumes title in the first line
+ img_name = str(file_path)[7:] + "_" + str(i) + ".jpg"
+ img_path = os.path.join(IMAGE_DIR, img_name)
+ page.to_image().save(img_path)
+ del index
return redirect('/')
@app.route("/search", methods=['POST'])
def query():
try:
- index = Index()
+ index = Index(index_dir=INDEX_DIR)
query = request.form.get("term")
context = request.form.get("context")
result = index.search(query, context)
@@ -64,6 +69,15 @@ def query():
})
+@app.route("/reset_index", methods=['POST'])
+def reset_index():
+ for folder in [IMAGE_DIR, SLIDE_DIR, INDEX_DIR]:
+ files = glob.glob(folder + '/*')
+ for f in files:
+ os.remove(f)
+ return redirect('/')
+
+
@app.route("/slide/<img_name>")
def slide(img_name):
path = os.path.join(IMAGE_DIR, img_name)
diff --git a/templates/index.html b/templates/index.html
index 31e1195..a24edd3 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -5,13 +5,18 @@
<body>
<h1>Upload new slide</h1>
<form action="upload" enctype="multipart/form-data" method="post">
- <input type="file" name="file">
+ <input type="file" name="files" multiple="">
<input type="submit" value="Upload">
</form>
-{% for item in ll %}
- <ul>
+<ul>
+ {% for item in ll %}
<li>{{ item }}</li>
- </ul>{% endfor %}
+ {% endfor %}
+</ul>
+
+<form action="reset_index" method="post">
+ <input type="submit" value="Reset Index">
+</form>
<h1>Query</h1>
<form action="search" method="post">
@@ -37,7 +42,7 @@
if (http.readyState == 4 && http.status == 200) {
console.log(http.responseText);
res = JSON.parse(http.responseText)
- if(res["type"] == "miss"){
+ if (res["type"] == "miss") {
document.getElementById("form-result").innerHTML = "No slide found";
return;
}
--
GitLab