ai-content-maker/.venv/Lib/site-packages/pooch/tests/test_hashes.py

205 lines
7.1 KiB
Python

# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
# pylint: disable=redefined-outer-name
"""
Test the hash calculation and checking functions.
"""
import os
from pathlib import Path
from tempfile import NamedTemporaryFile
import pytest
try:
import xxhash
XXHASH_MAJOR_VERSION = int(xxhash.VERSION.split(".", maxsplit=1)[0])
except ImportError:
xxhash = None
XXHASH_MAJOR_VERSION = 0
from ..core import Pooch
from ..hashes import (
make_registry,
file_hash,
hash_matches,
)
from .utils import check_tiny_data, mirror_directory
DATA_DIR = str(Path(__file__).parent / "data" / "store")
REGISTRY = (
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
)
REGISTRY_RECURSIVE = (
"subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
)
TINY_DATA_HASHES_HASHLIB = {
"sha1": "c03148994acd89317915ea2f2d080d6dd127aa09",
"sha256": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
"md5": "70e2afd3fd7e336ae478b1e740a5f08e",
}
TINY_DATA_HASHES_XXH = {
"xxh64": "f843815fe57948fa",
"xxh32": "98d6f1a2",
# Require xxHash > 2.0
"xxh128": "0267d220db258fffb0c567c0ecd1b689",
"xxh3_128": "0267d220db258fffb0c567c0ecd1b689",
"xxh3_64": "811e3f2a12aec53f",
}
TINY_DATA_HASHES = TINY_DATA_HASHES_HASHLIB.copy()
TINY_DATA_HASHES.update(TINY_DATA_HASHES_XXH)
@pytest.fixture
def data_dir_mirror(tmp_path):
"""
Mirror the test data folder on a temporary directory. Needed to avoid
permission errors when pooch is installed on a non-writable path.
"""
return mirror_directory(DATA_DIR, tmp_path)
def test_make_registry(data_dir_mirror):
"Check that the registry builder creates the right file names and hashes"
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
# Need to close the file before writing to it.
outfile.close()
try:
make_registry(data_dir_mirror, outfile.name, recursive=False)
with open(outfile.name, encoding="utf-8") as fout:
registry = fout.read()
assert registry == REGISTRY
# Check that the registry can be used.
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
pup.load_registry(outfile.name)
true = str(data_dir_mirror / "tiny-data.txt")
fname = pup.fetch("tiny-data.txt")
assert true == fname
check_tiny_data(fname)
finally:
os.remove(outfile.name)
def test_make_registry_recursive(data_dir_mirror):
"Check that the registry builder works in recursive mode"
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
# Need to close the file before writing to it.
outfile.close()
try:
make_registry(data_dir_mirror, outfile.name, recursive=True)
with open(outfile.name, encoding="utf-8") as fout:
registry = fout.read()
assert registry == REGISTRY_RECURSIVE
# Check that the registry can be used.
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
pup.load_registry(outfile.name)
assert str(data_dir_mirror / "tiny-data.txt") == pup.fetch("tiny-data.txt")
check_tiny_data(pup.fetch("tiny-data.txt"))
true = str(data_dir_mirror / "subdir" / "tiny-data.txt")
assert true == pup.fetch("subdir/tiny-data.txt")
check_tiny_data(pup.fetch("subdir/tiny-data.txt"))
finally:
os.remove(outfile.name)
def test_file_hash_invalid_algorithm():
"Test an invalid hashing algorithm"
with pytest.raises(ValueError) as exc:
file_hash(fname="something", alg="blah")
assert "'blah'" in str(exc.value)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES.items()),
ids=list(TINY_DATA_HASHES.keys()),
)
def test_file_hash(alg, expected_hash):
"Test the hash calculation using hashlib and xxhash"
if alg.startswith("xxh"):
if xxhash is None:
pytest.skip("requires xxhash")
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
pytest.skip("requires xxhash > 2.0")
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
returned_hash = file_hash(fname, alg)
assert returned_hash == expected_hash
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES.items()),
ids=list(TINY_DATA_HASHES.keys()),
)
def test_hash_matches(alg, expected_hash):
"Make sure the hash checking function works"
if alg.startswith("xxh"):
if xxhash is None:
pytest.skip("requires xxhash")
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
pytest.skip("requires xxhash > 2.0")
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash}"
assert hash_matches(fname, known_hash)
# And also if it fails
known_hash = f"{alg}:blablablabla"
assert not hash_matches(fname, known_hash)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES_HASHLIB.items()),
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
)
def test_hash_matches_strict(alg, expected_hash):
"Make sure the hash checking function raises an exception if strict"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash}"
assert hash_matches(fname, known_hash, strict=True)
# And also if it fails
bad_hash = f"{alg}:blablablabla"
with pytest.raises(ValueError) as error:
hash_matches(fname, bad_hash, strict=True, source="Neverland")
assert "Neverland" in str(error.value)
with pytest.raises(ValueError) as error:
hash_matches(fname, bad_hash, strict=True, source=None)
assert fname in str(error.value)
def test_hash_matches_none():
"The hash checking function should always returns True if known_hash=None"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
assert hash_matches(fname, known_hash=None)
# Should work even if the file is invalid
assert hash_matches(fname="", known_hash=None)
# strict should cause an error if this wasn't working
assert hash_matches(fname, known_hash=None, strict=True)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES_HASHLIB.items()),
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
)
def test_hash_matches_uppercase(alg, expected_hash):
"Hash matching should be independent of upper or lower case"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash.upper()}"
assert hash_matches(fname, known_hash, strict=True)
# And also if it fails
with pytest.raises(ValueError) as error:
hash_matches(fname, known_hash[:-5], strict=True, source="Neverland")
assert "Neverland" in str(error.value)