Skip to content
Snippets Groups Projects
Commit 9a272ee4 authored by felixwelter's avatar felixwelter
Browse files

Change indexed page number to start with 1

parent b03b8f29
No related branches found
No related tags found
No related merge requests found
...@@ -20,8 +20,8 @@ class BasicIndexer(): ...@@ -20,8 +20,8 @@ class BasicIndexer():
pdf_file_name = pdf_file_path.split(os.sep)[-1] pdf_file_name = pdf_file_path.split(os.sep)[-1]
for i, page in enumerate(pdf.pages): for i, page in enumerate(pdf.pages):
text = page.extract_text() text = page.extract_text()
self.index.add(pdf_file_name, i, text, text.split("\n")[self.title_row]) self.index.add(pdf_file_name, i + 1, text, text.split("\n")[self.title_row])
if self.process_images: if self.process_images:
img_name = pdf_file_name + "_" + str(i) + ".jpg" img_name = pdf_file_name + "_" + str(i + 1) + ".jpg"
img_path = os.path.join(self.image_dir, img_name) img_path = os.path.join(self.image_dir, img_name)
page.to_image().save(img_path) page.to_image().save(img_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment