ai-content-maker/.venv/Lib/site-packages/pooch/tests/test_processors.py

290 lines
11 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Test the processor hooks
"""
from pathlib import Path
from tempfile import TemporaryDirectory
import warnings
import pytest
from .. import Pooch
from ..processors import Unzip, Untar, Decompress
from .utils import pooch_test_url, pooch_test_registry, check_tiny_data, capture_log
REGISTRY = pooch_test_registry()
BASEURL = pooch_test_url()
@pytest.mark.network
@pytest.mark.parametrize(
"method,ext,name",
[
("auto", "xz", None),
("lzma", "xz", None),
("xz", "xz", None),
("bzip2", "bz2", None),
("gzip", "gz", None),
("gzip", "gz", "different-name.txt"),
],
ids=["auto", "lzma", "xz", "bz2", "gz", "name"],
)
def test_decompress(method, ext, name):
"Check that decompression after download works for all formats"
processor = Decompress(method=method, name=name)
with TemporaryDirectory() as local_store:
path = Path(local_store)
if name is None:
true_path = str(path / ".".join(["tiny-data.txt", ext, "decomp"]))
else:
true_path = str(path / name)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Check the logs when downloading and from the processor
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
logs = log_file.getvalue()
lines = logs.splitlines()
assert len(lines) == 2
assert lines[0].split()[0] == "Downloading"
assert lines[-1].startswith("Decompressing")
assert method in lines[-1]
assert fname == true_path
check_tiny_data(fname)
# Check that processor doesn't execute when not downloading
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
assert log_file.getvalue() == ""
assert fname == true_path
check_tiny_data(fname)
@pytest.mark.network
def test_decompress_fails():
"Should fail if method='auto' and no extension is given in the file name"
with TemporaryDirectory() as local_store:
path = Path(local_store)
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Invalid extension
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="auto"))
assert exception.value.args[0].startswith("Unrecognized file extension '.txt'")
assert "pooch.Unzip/Untar" not in exception.value.args[0]
# Should also fail for a bad method name
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="bla"))
assert exception.value.args[0].startswith("Invalid compression method 'bla'")
assert "pooch.Unzip/Untar" not in exception.value.args[0]
# Point people to Untar and Unzip
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="zip"))
assert exception.value.args[0].startswith("Invalid compression method 'zip'")
assert "pooch.Unzip/Untar" in exception.value.args[0]
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("store.zip", processor=Decompress(method="auto"))
assert exception.value.args[0].startswith("Unrecognized file extension '.zip'")
assert "pooch.Unzip/Untar" in exception.value.args[0]
@pytest.mark.network
@pytest.mark.parametrize(
"target_path", [None, "some_custom_path"], ids=["default_path", "custom_path"]
)
@pytest.mark.parametrize(
"archive,members",
[
("tiny-data", ["tiny-data.txt"]),
("store", None),
("store", ["store/tiny-data.txt"]),
("store", ["store/subdir/tiny-data.txt"]),
("store", ["store/subdir"]),
("store", ["store/tiny-data.txt", "store/subdir"]),
],
ids=[
"single_file",
"archive_all",
"archive_file",
"archive_subdir_file",
"archive_subdir",
"archive_multiple",
],
)
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
ids=["Unzip", "Untar"],
)
def test_unpacking(processor_class, extension, target_path, archive, members):
"Tests the behaviour of processors for unpacking archives (Untar, Unzip)"
processor = processor_class(members=members, extract_dir=target_path)
if target_path is None:
target_path = archive + extension + processor.suffix
with TemporaryDirectory() as path:
path = Path(path)
true_paths, expected_log = _unpacking_expected_paths_and_logs(
archive, members, path / target_path, processor_class.__name__
)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Capture logs and check for the right processor message
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
_check_logs(log_file, expected_log)
for fname in fnames:
check_tiny_data(fname)
# Check that processor doesn't execute when not downloading
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
_check_logs(log_file, [])
for fname in fnames:
check_tiny_data(fname)
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_multiple_unpacking(processor_class, extension):
"Test that multiple subsequent calls to a processor yield correct results"
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with the one member only
processor1 = processor_class(members=["store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
check_tiny_data(filenames1[0])
# Do a second fetch with the other member
processor2 = processor_class(
members=["store/tiny-data.txt", "store/subdir/tiny-data.txt"]
)
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) == 2
check_tiny_data(filenames2[0])
check_tiny_data(filenames2[1])
# Do a third fetch, again with one member and assert
# that only this member was returned
filenames3 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames3) == 1
check_tiny_data(filenames3[0])
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpack_members_with_leading_dot(processor_class, extension):
"Test that unpack members can also be specifed both with a leading ./"
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with the one member only
processor1 = processor_class(members=["./store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
check_tiny_data(filenames1[0])
def _check_logs(log_file, expected_lines):
"""
Assert that the lines in the log match the expected ones.
"""
lines = log_file.getvalue().splitlines()
assert len(lines) == len(expected_lines)
for line, expected_line in zip(lines, expected_lines):
assert line.startswith(expected_line)
def _unpacking_expected_paths_and_logs(archive, members, path, name):
"""
Generate the appropriate expected paths and log message depending on the
parameters for the test.
"""
log_lines = ["Downloading"]
if archive == "tiny-data":
true_paths = {str(path / "tiny-data.txt")}
log_lines.append("Extracting 'tiny-data.txt'")
elif archive == "store" and members is None:
true_paths = {
str(path / "store" / "tiny-data.txt"),
str(path / "store" / "subdir" / "tiny-data.txt"),
}
log_lines.append(f"{name}{name[-1]}ing contents")
elif archive == "store" and members is not None:
true_paths = []
for member in members:
true_path = path / Path(*member.split("/"))
if not str(true_path).endswith("tiny-data.txt"):
true_path = true_path / "tiny-data.txt"
true_paths.append(str(true_path))
log_lines.append(f"Extracting '{member}'")
true_paths = set(true_paths)
return true_paths, log_lines
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpacking_members_then_no_members(processor_class, extension):
"""
Test that calling with valid members then without them works.
https://github.com/fatiando/pooch/issues/364
"""
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with an existing member
processor1 = processor_class(members=["store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
# Do a second fetch with no members
processor2 = processor_class()
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) > 1
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpacking_wrong_members_then_no_members(processor_class, extension):
"""
Test that calling with invalid members then without them works.
https://github.com/fatiando/pooch/issues/364
"""
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with incorrect member
processor1 = processor_class(members=["not-a-valid-file.csv"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 0
# Do a second fetch with no members
processor2 = processor_class()
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) > 0