Skip to content
Snippets Groups Projects
Commit 4c8aa673 authored by Christoph Ladurner's avatar Christoph Ladurner
Browse files

global: replace cchardet by charset_normalizer

parent f83b31f7
No related branches found
No related tags found
No related merge requests found
...@@ -311,7 +311,7 @@ texinfo_documents = [ ...@@ -311,7 +311,7 @@ texinfo_documents = [
# Example configuration for intersphinx: refer to the Python standard library. # Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = { intersphinx_mapping = {
"https://docs.python.org/": None, "python": ("https://docs.python.org/", None),
"invenio_records_ui": ( "invenio_records_ui": (
"https://invenio-records-ui.readthedocs.io/en/latest/", "https://invenio-records-ui.readthedocs.io/en/latest/",
None, None,
... ...
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# #
# This file is part of Invenio. # This file is part of Invenio.
# Copyright (C) 2015-2019 CERN. # Copyright (C) 2015-2019 CERN.
# Copyright (C) 2023 Graz University of Technology.
# #
# Invenio is free software; you can redistribute it and/or modify it # Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details. # under the terms of the MIT License; see LICENSE file for more details.
...@@ -11,7 +12,7 @@ ...@@ -11,7 +12,7 @@
import os import os
import zipfile import zipfile
import cchardet as chardet from charset_normalizer import detect
from flask import current_app, render_template from flask import current_app, render_template
from ..proxies import current_previewer from ..proxies import current_previewer
...@@ -31,7 +32,7 @@ def make_tree(file): ...@@ -31,7 +32,7 @@ def make_tree(file):
sample = " ".join(zf.namelist()[:max_files_count]) sample = " ".join(zf.namelist()[:max_files_count])
if not isinstance(sample, bytes): if not isinstance(sample, bytes):
sample = sample.encode("utf-16be") sample = sample.encode("utf-16be")
encoding = chardet.detect(sample).get("encoding", "utf-8") encoding = detect(sample).get("encoding", "utf-8")
for i, info in enumerate(zf.infolist()): for i, info in enumerate(zf.infolist()):
if i > max_files_count: if i > max_files_count:
raise BufferError("Too many files inside the ZIP file.") raise BufferError("Too many files inside the ZIP file.")
... ...
......
...@@ -4,13 +4,15 @@ ...@@ -4,13 +4,15 @@
# Copyright (C) 2016-2019 CERN. # Copyright (C) 2016-2019 CERN.
# Copyright (C) 2023 Northwestern University. # Copyright (C) 2023 Northwestern University.
# Copyright (C) 2023 California Institute of Technology. # Copyright (C) 2023 California Institute of Technology.
# Copyright (C) 2023 Graz University of Technology.
# #
# Invenio is free software; you can redistribute it and/or modify it # Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details. # under the terms of the MIT License; see LICENSE file for more details.
"""Invenio Previewer Utilities.""" """Invenio Previewer Utilities."""
import cchardet
import charset_normalizer
from flask import current_app from flask import current_app
...@@ -31,7 +33,7 @@ def detect_encoding(fp, default=None): ...@@ -31,7 +33,7 @@ def detect_encoding(fp, default=None):
sample = fp.read(chardet_size) sample = fp.read(chardet_size)
# Result contains 'confidence' and 'encoding' # Result contains 'confidence' and 'encoding'
result = cchardet.detect(sample) result = charset_normalizer.detect(sample)
confidence = result.get("confidence", 0) or 0 confidence = result.get("confidence", 0) or 0
encoding = result.get("encoding", default) or default encoding = result.get("encoding", default) or default
... ...
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# #
# This file is part of Invenio. # This file is part of Invenio.
# Copyright (C) 2016-2020 CERN. # Copyright (C) 2016-2020 CERN.
# Copyright (C) 2022 Graz University of Technology. # Copyright (C) 2022-2023 Graz University of Technology.
# #
# Invenio is free software; you can redistribute it and/or modify it # Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details. # under the terms of the MIT License; see LICENSE file for more details.
...@@ -27,7 +27,7 @@ packages = find: ...@@ -27,7 +27,7 @@ packages = find:
python_requires = >=3.7 python_requires = >=3.7
zip_safe = False zip_safe = False
install_requires = install_requires =
cchardet>=1.0.0 charset_normalizer>=3.3.2
invenio-assets>=1.2.7 invenio-assets>=1.2.7
invenio-base>=1.2.10 invenio-base>=1.2.10
invenio-formatter>=1.1.3 invenio-formatter>=1.1.3
... ...
......
...@@ -2,14 +2,16 @@ ...@@ -2,14 +2,16 @@
# #
# This file is part of Invenio. # This file is part of Invenio.
# Copyright (C) 2016-2019 CERN. # Copyright (C) 2016-2019 CERN.
# Copyright (C) 2023 Graz University of Technology.
# #
# Invenio is free software; you can redistribute it and/or modify it # Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details. # under the terms of the MIT License; see LICENSE file for more details.
"""Test of utilities module.""" """Test of utilities module."""
from unittest.mock import patch
import pytest import pytest
from mock import patch
from six import BytesIO from six import BytesIO
from invenio_previewer import current_previewer from invenio_previewer import current_previewer
...@@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): ...@@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
f = BytesIO(string) f = BytesIO(string)
initial_position = f.tell() initial_position = f.tell()
with patch("cchardet.detect") as mock_detect: with patch("charset_normalizer.detect") as mock_detect:
mock_detect.return_value = {"encoding": encoding, "confidence": confidence} mock_detect.return_value = {"encoding": encoding, "confidence": confidence}
assert detect_encoding(f) is detect assert detect_encoding(f) is detect
assert f.tell() == initial_position assert f.tell() == initial_position
...@@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect): ...@@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
def test_detect_encoding_exception(testapp): def test_detect_encoding_exception(testapp):
f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8")) f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8"))
with patch("cchardet.detect", Exception): with patch("charset_normalizer.detect", Exception):
assert detect_encoding(f) is None assert detect_encoding(f) is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment