Skip to content
Snippets Groups Projects
Commit 4c8aa673 authored by Christoph Ladurner's avatar Christoph Ladurner
Browse files

global: replace cchardet by charset_normalizer

parent f83b31f7
No related branches found
No related tags found
No related merge requests found
......@@ -311,7 +311,7 @@ texinfo_documents = [
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
"https://docs.python.org/": None,
"python": ("https://docs.python.org/", None),
"invenio_records_ui": (
"https://invenio-records-ui.readthedocs.io/en/latest/",
None,
......
......@@ -2,6 +2,7 @@
#
# This file is part of Invenio.
# Copyright (C) 2015-2019 CERN.
# Copyright (C) 2023 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
......@@ -11,7 +12,7 @@
import os
import zipfile
import cchardet as chardet
from charset_normalizer import detect
from flask import current_app, render_template
from ..proxies import current_previewer
......@@ -31,7 +32,7 @@ def make_tree(file):
sample = " ".join(zf.namelist()[:max_files_count])
if not isinstance(sample, bytes):
sample = sample.encode("utf-16be")
encoding = chardet.detect(sample).get("encoding", "utf-8")
encoding = detect(sample).get("encoding", "utf-8")
for i, info in enumerate(zf.infolist()):
if i > max_files_count:
raise BufferError("Too many files inside the ZIP file.")
......
......@@ -4,13 +4,15 @@
# Copyright (C) 2016-2019 CERN.
# Copyright (C) 2023 Northwestern University.
# Copyright (C) 2023 California Institute of Technology.
# Copyright (C) 2023 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Invenio Previewer Utilities."""
import cchardet
import charset_normalizer
from flask import current_app
......@@ -31,7 +33,7 @@ def detect_encoding(fp, default=None):
sample = fp.read(chardet_size)
# Result contains 'confidence' and 'encoding'
result = cchardet.detect(sample)
result = charset_normalizer.detect(sample)
confidence = result.get("confidence", 0) or 0
encoding = result.get("encoding", default) or default
......
......@@ -2,7 +2,7 @@
#
# This file is part of Invenio.
# Copyright (C) 2016-2020 CERN.
# Copyright (C) 2022 Graz University of Technology.
# Copyright (C) 2022-2023 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
......@@ -27,7 +27,7 @@ packages = find:
python_requires = >=3.7
zip_safe = False
install_requires =
cchardet>=1.0.0
charset_normalizer>=3.3.2
invenio-assets>=1.2.7
invenio-base>=1.2.10
invenio-formatter>=1.1.3
......
......@@ -2,14 +2,16 @@
#
# This file is part of Invenio.
# Copyright (C) 2016-2019 CERN.
# Copyright (C) 2023 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Test of utilities module."""
from unittest.mock import patch
import pytest
from mock import patch
from six import BytesIO
from invenio_previewer import current_previewer
......@@ -43,7 +45,7 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
f = BytesIO(string)
initial_position = f.tell()
with patch("cchardet.detect") as mock_detect:
with patch("charset_normalizer.detect") as mock_detect:
mock_detect.return_value = {"encoding": encoding, "confidence": confidence}
assert detect_encoding(f) is detect
assert f.tell() == initial_position
......@@ -52,5 +54,5 @@ def test_detect_encoding(testapp, string, confidence, encoding, detect):
def test_detect_encoding_exception(testapp):
f = BytesIO("Γκρήκ Στρίνγκ".encode("utf-8"))
with patch("cchardet.detect", Exception):
with patch("charset_normalizer.detect", Exception):
assert detect_encoding(f) is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment