From 4c8aa673f67c60bd6dcaf9996d72b405a847971d Mon Sep 17 00:00:00 2001
From: Christoph Ladurner <christoph.ladurner@tugraz.at>
Date: Sat, 11 Nov 2023 23:03:14 +0100
Subject: [PATCH] global: replace cchardet by charset_normalizer

---
 docs/conf.py                        | 2 +-
 invenio_previewer/extensions/zip.py | 5 +++--
 invenio_previewer/utils.py          | 6 ++++--
 setup.cfg                           | 4 ++--
 tests/test_utils.py                 | 8 +++++---
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index c58f06e..6fcc2ce 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -311,7 +311,7 @@ texinfo_documents = [
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
-    "https://docs.python.org/": None,
+    "python": ("https://docs.python.org/", None),
     "invenio_records_ui": (
         "https://invenio-records-ui.readthedocs.io/en/latest/",
         None,
diff --git a/invenio_previewer/extensions/zip.py b/invenio_previewer/extensions/zip.py
index 63cd794..e1a21a5 100644
--- a/invenio_previewer/extensions/zip.py
+++ b/invenio_previewer/extensions/zip.py
@@ -2,6 +2,7 @@
 #
 # This file is part of Invenio.
 # Copyright (C) 2015-2019 CERN.
+# Copyright (C) 2023 Graz University of Technology.
 #
 # Invenio is free software; you can redistribute it and/or modify it
 # under the terms of the MIT License; see LICENSE file for more details.
@@ -11,7 +12,7 @@
 import os
 import zipfile
 
-import cchardet as chardet
+from charset_normalizer import detect
 from flask import current_app, render_template
 
 from ..proxies import current_previewer
@@ -31,7 +32,7 @@ def make_tree(file):
             sample = " ".join(zf.namelist()[:max_files_count])
             if not isinstance(sample, bytes):
                 sample = sample.encode("utf-16be")
-            encoding = chardet.detect(sample).get("encoding", "utf-8")
+            encoding = detect(sample).get("encoding", "utf-8")
             for i, info in enumerate(zf.infolist()):
                 if i > max_files_count:
                     raise BufferError("Too many files inside the ZIP file.")
diff --git a/invenio_previewer/utils.py b/invenio_previewer/utils.py
index 2b93cb0..b2b470c 100644
--- a/invenio_previewer/utils.py
+++ b/invenio_previewer/utils.py
@@ -4,13 +4,15 @@
 # Copyright (C) 2016-2019 CERN.
 # Copyright (C) 2023 Northwestern University.
 # Copyright (C) 2023 California Institute of Technology.
+# Copyright (C) 2023 Graz University of Technology.
 #
 # Invenio is free software; you can redistribute it and/or modify it
 # under the terms of the MIT License; see LICENSE file for more details.
 
 """Invenio Previewer Utilities."""
 
-import cchardet
+
+import charset_normalizer
 from flask import current_app
 
 
@@ -31,7 +33,7 @@ def detect_encoding(fp, default=None):
         sample = fp.read(chardet_size)
 
         # Result contains 'confidence' and 'encoding'
-        result = cchardet.detect(sample)
+        result = charset_normalizer.detect(sample)
         confidence = result.get("confidence", 0) or 0
         encoding = result.get("encoding", default) or default
 
diff --git a/setup.cfg b/setup.cfg
index 87c061e..6a55f24 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,7 +2,7 @@
 #
 # This file is part of Invenio.
 # Copyright (C) 2016-2020 CERN.
-# Copyright (C) 2022 Graz University of Technology.
+# Copyright (C) 2022-2023 Graz University of Technology.
 #
 # Invenio is free software; you can redistribute it and/or modify it
 # under the terms of the MIT License; see LICENSE file for more details.
@@ -27,7 +27,7 @@ packages = find:
 python_requires = >=3.7
 zip_safe = False
 install_requires =
-    cchardet>=1.0.0
+    charset_normalizer>=3.3.2
     invenio-assets>=1.2.7
     invenio-base>=1.2.10
     invenio-formatter>=1.1.3
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 05f3400..902696b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,14 +2,16 @@
 #
 # This file is part of Invenio.
 # Copyright (C) 2016-2019 CERN.
+# Copyright (C) 2023 Graz University of Technology.
 #
 # Invenio is free software; you can redistribute it and/or modify it
 # under the terms of the MIT License; see LICENSE file for more details.
 
 """Test of utilities module."""
 
+from unittest.mock import patch
+
 import pytest
-from mock import patch
 from six import BytesIO
 
 from invenio_previewer import current_previewer
@@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
     f = BytesIO(string)
     initial_position = f.tell()
 
-    with patch("cchardet.detect") as mock_detect:
+    with patch("charset_normalizer.detect") as mock_detect:
         mock_detect.return_value = {"encoding": encoding, "confidence": confidence}
         assert detect_encoding(f) is detect
         assert f.tell() == initial_position
@@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
 def test_detect_encoding_exception(testapp):
     f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8"))
 
-    with patch("cchardet.detect", Exception):
+    with patch("charset_normalizer.detect", Exception):
         assert detect_encoding(f) is None
-- 
GitLab