From 4c8aa673f67c60bd6dcaf9996d72b405a847971d Mon Sep 17 00:00:00 2001 From: Christoph Ladurner <christoph.ladurner@tugraz.at> Date: Sat, 11 Nov 2023 23:03:14 +0100 Subject: [PATCH] global: replace cchardet by charset_normalizer --- docs/conf.py | 2 +- invenio_previewer/extensions/zip.py | 5 +++-- invenio_previewer/utils.py | 6 ++++-- setup.cfg | 4 ++-- tests/test_utils.py | 8 +++++--- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index c58f06e..6fcc2ce 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -311,7 +311,7 @@ texinfo_documents = [ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "https://docs.python.org/": None, + "python": ("https://docs.python.org/", None), "invenio_records_ui": ( "https://invenio-records-ui.readthedocs.io/en/latest/", None, diff --git a/invenio_previewer/extensions/zip.py b/invenio_previewer/extensions/zip.py index 63cd794..e1a21a5 100644 --- a/invenio_previewer/extensions/zip.py +++ b/invenio_previewer/extensions/zip.py @@ -2,6 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2015-2019 CERN. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -11,7 +12,7 @@ import os import zipfile -import cchardet as chardet +from charset_normalizer import detect from flask import current_app, render_template from ..proxies import current_previewer @@ -31,7 +32,7 @@ def make_tree(file): sample = " ".join(zf.namelist()[:max_files_count]) if not isinstance(sample, bytes): sample = sample.encode("utf-16be") - encoding = chardet.detect(sample).get("encoding", "utf-8") + encoding = detect(sample).get("encoding", "utf-8") for i, info in enumerate(zf.infolist()): if i > max_files_count: raise BufferError("Too many files inside the ZIP file.") diff --git a/invenio_previewer/utils.py b/invenio_previewer/utils.py index 2b93cb0..b2b470c 100644 --- a/invenio_previewer/utils.py +++ b/invenio_previewer/utils.py @@ -4,13 +4,15 @@ # Copyright (C) 2016-2019 CERN. # Copyright (C) 2023 Northwestern University. # Copyright (C) 2023 California Institute of Technology. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Invenio Previewer Utilities.""" -import cchardet + +import charset_normalizer from flask import current_app @@ -31,7 +33,7 @@ def detect_encoding(fp, default=None): sample = fp.read(chardet_size) # Result contains 'confidence' and 'encoding' - result = cchardet.detect(sample) + result = charset_normalizer.detect(sample) confidence = result.get("confidence", 0) or 0 encoding = result.get("encoding", default) or default diff --git a/setup.cfg b/setup.cfg index 87c061e..6a55f24 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2016-2020 CERN. -# Copyright (C) 2022 Graz University of Technology. +# Copyright (C) 2022-2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -27,7 +27,7 @@ packages = find: python_requires = >=3.7 zip_safe = False install_requires = - cchardet>=1.0.0 + charset_normalizer>=3.3.2 invenio-assets>=1.2.7 invenio-base>=1.2.10 invenio-formatter>=1.1.3 diff --git a/tests/test_utils.py b/tests/test_utils.py index 05f3400..902696b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,14 +2,16 @@ # # This file is part of Invenio. # Copyright (C) 2016-2019 CERN. +# Copyright (C) 2023 Graz University of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """Test of utilities module.""" +from unittest.mock import patch + import pytest -from mock import patch from six import BytesIO from invenio_previewer import current_previewer @@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): f = BytesIO(string) initial_position = f.tell() - with patch("cchardet.detect") as mock_detect: + with patch("charset_normalizer.detect") as mock_detect: mock_detect.return_value = {"encoding": encoding, "confidence": confidence} assert detect_encoding(f) is detect assert f.tell() == initial_position @@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): def test_detect_encoding_exception(testapp): f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8")) - with patch("cchardet.detect", Exception): + with patch("charset_normalizer.detect", Exception): assert detect_encoding(f) is None -- GitLab