diff --git a/docs/conf.py b/docs/conf.py index c58f06e2921664217d282f0f07f7ca8dd178b484..6fcc2ce5d2df6db7552c845d19048e2167c5b12f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -311,7 +311,7 @@ texinfo_documents = [ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "https://docs.python.org/": None, + "python": ("https://docs.python.org/", None), "invenio_records_ui": ( "https://invenio-records-ui.readthedocs.io/en/latest/", None, diff --git a/invenio_previewer/extensions/zip.py b/invenio_previewer/extensions/zip.py index 63cd794223cfee7deabb1bc87420718035ac835f..e1a21a58327e0e02b9dccaa938c30680e5552496 100644 --- a/invenio_previewer/extensions/zip.py +++ b/invenio_previewer/extensions/zip.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2015-2019 CERN. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -11,7 +12,7 @@ import os import zipfile -import cchardet as chardet +from charset_normalizer import detect from flask import current_app, render_template from ..proxies import current_previewer @@ -31,7 +32,7 @@ def make_tree(file): sample = " ".join(zf.namelist()[:max_files_count]) if not isinstance(sample, bytes): sample = sample.encode("utf-16be") - encoding = chardet.detect(sample).get("encoding", "utf-8") + encoding = detect(sample).get("encoding", "utf-8") for i, info in enumerate(zf.infolist()): if i > max_files_count: raise BufferError("Too many files inside the ZIP file.") diff --git a/invenio_previewer/utils.py b/invenio_previewer/utils.py index 2b93cb0e583fb09ee698ce59846e6a87d98c7832..b2b470c5f7d5133c0ac67e00e2bd6d3341cf038a 100644 --- a/invenio_previewer/utils.py +++ b/invenio_previewer/utils.py @@ -4,13 +4,15 @@ # Copyright (C) 2016-2019 CERN. # Copyright (C) 2023 Northwestern University. # Copyright (C) 2023 California Institute of Technology. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Invenio Previewer Utilities.""" -import cchardet + +import charset_normalizer from flask import current_app @@ -31,7 +33,7 @@ def detect_encoding(fp, default=None): sample = fp.read(chardet_size) # Result contains 'confidence' and 'encoding' - result = cchardet.detect(sample) + result = charset_normalizer.detect(sample) confidence = result.get("confidence", 0) or 0 encoding = result.get("encoding", default) or default diff --git a/setup.cfg b/setup.cfg index 87c061ea49e9b985f5db06b8041144e21115c258..6a55f24628a322064c3cf6b534af7d993fab0828 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2016-2020 CERN. -# Copyright (C) 2022 Graz University of Technology. +# Copyright (C) 2022-2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -27,7 +27,7 @@ packages = find: python_requires = >=3.7 zip_safe = False install_requires = - cchardet>=1.0.0 + charset_normalizer>=3.3.2 invenio-assets>=1.2.7 invenio-base>=1.2.10 invenio-formatter>=1.1.3 diff --git a/tests/test_utils.py b/tests/test_utils.py index 05f3400ece5f79f751dea6d31d07f38163c81513..902696b0106a5eae4f563ba160741a49125e8168 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,14 +2,16 @@ # # This file is part of Invenio. # Copyright (C) 2016-2019 CERN. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Test of utilities module.""" +from unittest.mock import patch + import pytest -from mock import patch from six import BytesIO from invenio_previewer import current_previewer @@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): f = BytesIO(string) initial_position = f.tell() - with patch("cchardet.detect") as mock_detect: + with patch("charset_normalizer.detect") as mock_detect: mock_detect.return_value = {"encoding": encoding, "confidence": confidence} assert detect_encoding(f) is detect assert f.tell() == initial_position @@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): def test_detect_encoding_exception(testapp): f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8")) - with patch("cchardet.detect", Exception): + with patch("charset_normalizer.detect", Exception): assert detect_encoding(f) is None