290 lines
11 KiB
Python
290 lines
11 KiB
Python
# Copyright (c) 2018 The Pooch Developers.
|
|
# Distributed under the terms of the BSD 3-Clause License.
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
#
|
|
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
|
#
|
|
"""
|
|
Test the processor hooks
|
|
"""
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
import warnings
|
|
|
|
import pytest
|
|
|
|
from .. import Pooch
|
|
from ..processors import Unzip, Untar, Decompress
|
|
|
|
from .utils import pooch_test_url, pooch_test_registry, check_tiny_data, capture_log
|
|
|
|
|
|
REGISTRY = pooch_test_registry()
|
|
BASEURL = pooch_test_url()
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"method,ext,name",
|
|
[
|
|
("auto", "xz", None),
|
|
("lzma", "xz", None),
|
|
("xz", "xz", None),
|
|
("bzip2", "bz2", None),
|
|
("gzip", "gz", None),
|
|
("gzip", "gz", "different-name.txt"),
|
|
],
|
|
ids=["auto", "lzma", "xz", "bz2", "gz", "name"],
|
|
)
|
|
def test_decompress(method, ext, name):
|
|
"Check that decompression after download works for all formats"
|
|
processor = Decompress(method=method, name=name)
|
|
with TemporaryDirectory() as local_store:
|
|
path = Path(local_store)
|
|
if name is None:
|
|
true_path = str(path / ".".join(["tiny-data.txt", ext, "decomp"]))
|
|
else:
|
|
true_path = str(path / name)
|
|
# Setup a pooch in a temp dir
|
|
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
|
# Check the logs when downloading and from the processor
|
|
with capture_log() as log_file:
|
|
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
|
|
logs = log_file.getvalue()
|
|
lines = logs.splitlines()
|
|
assert len(lines) == 2
|
|
assert lines[0].split()[0] == "Downloading"
|
|
assert lines[-1].startswith("Decompressing")
|
|
assert method in lines[-1]
|
|
assert fname == true_path
|
|
check_tiny_data(fname)
|
|
# Check that processor doesn't execute when not downloading
|
|
with capture_log() as log_file:
|
|
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
|
|
assert log_file.getvalue() == ""
|
|
assert fname == true_path
|
|
check_tiny_data(fname)
|
|
|
|
|
|
@pytest.mark.network
|
|
def test_decompress_fails():
|
|
"Should fail if method='auto' and no extension is given in the file name"
|
|
with TemporaryDirectory() as local_store:
|
|
path = Path(local_store)
|
|
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
|
# Invalid extension
|
|
with pytest.raises(ValueError) as exception:
|
|
with warnings.catch_warnings():
|
|
pup.fetch("tiny-data.txt", processor=Decompress(method="auto"))
|
|
assert exception.value.args[0].startswith("Unrecognized file extension '.txt'")
|
|
assert "pooch.Unzip/Untar" not in exception.value.args[0]
|
|
# Should also fail for a bad method name
|
|
with pytest.raises(ValueError) as exception:
|
|
with warnings.catch_warnings():
|
|
pup.fetch("tiny-data.txt", processor=Decompress(method="bla"))
|
|
assert exception.value.args[0].startswith("Invalid compression method 'bla'")
|
|
assert "pooch.Unzip/Untar" not in exception.value.args[0]
|
|
# Point people to Untar and Unzip
|
|
with pytest.raises(ValueError) as exception:
|
|
with warnings.catch_warnings():
|
|
pup.fetch("tiny-data.txt", processor=Decompress(method="zip"))
|
|
assert exception.value.args[0].startswith("Invalid compression method 'zip'")
|
|
assert "pooch.Unzip/Untar" in exception.value.args[0]
|
|
with pytest.raises(ValueError) as exception:
|
|
with warnings.catch_warnings():
|
|
pup.fetch("store.zip", processor=Decompress(method="auto"))
|
|
assert exception.value.args[0].startswith("Unrecognized file extension '.zip'")
|
|
assert "pooch.Unzip/Untar" in exception.value.args[0]
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"target_path", [None, "some_custom_path"], ids=["default_path", "custom_path"]
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"archive,members",
|
|
[
|
|
("tiny-data", ["tiny-data.txt"]),
|
|
("store", None),
|
|
("store", ["store/tiny-data.txt"]),
|
|
("store", ["store/subdir/tiny-data.txt"]),
|
|
("store", ["store/subdir"]),
|
|
("store", ["store/tiny-data.txt", "store/subdir"]),
|
|
],
|
|
ids=[
|
|
"single_file",
|
|
"archive_all",
|
|
"archive_file",
|
|
"archive_subdir_file",
|
|
"archive_subdir",
|
|
"archive_multiple",
|
|
],
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"processor_class,extension",
|
|
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
|
ids=["Unzip", "Untar"],
|
|
)
|
|
def test_unpacking(processor_class, extension, target_path, archive, members):
|
|
"Tests the behaviour of processors for unpacking archives (Untar, Unzip)"
|
|
processor = processor_class(members=members, extract_dir=target_path)
|
|
if target_path is None:
|
|
target_path = archive + extension + processor.suffix
|
|
with TemporaryDirectory() as path:
|
|
path = Path(path)
|
|
true_paths, expected_log = _unpacking_expected_paths_and_logs(
|
|
archive, members, path / target_path, processor_class.__name__
|
|
)
|
|
# Setup a pooch in a temp dir
|
|
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
|
# Capture logs and check for the right processor message
|
|
with capture_log() as log_file:
|
|
fnames = pup.fetch(archive + extension, processor=processor)
|
|
assert set(fnames) == true_paths
|
|
_check_logs(log_file, expected_log)
|
|
for fname in fnames:
|
|
check_tiny_data(fname)
|
|
# Check that processor doesn't execute when not downloading
|
|
with capture_log() as log_file:
|
|
fnames = pup.fetch(archive + extension, processor=processor)
|
|
assert set(fnames) == true_paths
|
|
_check_logs(log_file, [])
|
|
for fname in fnames:
|
|
check_tiny_data(fname)
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"processor_class,extension",
|
|
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
|
)
|
|
def test_multiple_unpacking(processor_class, extension):
|
|
"Test that multiple subsequent calls to a processor yield correct results"
|
|
|
|
with TemporaryDirectory() as local_store:
|
|
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
|
|
|
# Do a first fetch with the one member only
|
|
processor1 = processor_class(members=["store/tiny-data.txt"])
|
|
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
|
assert len(filenames1) == 1
|
|
check_tiny_data(filenames1[0])
|
|
|
|
# Do a second fetch with the other member
|
|
processor2 = processor_class(
|
|
members=["store/tiny-data.txt", "store/subdir/tiny-data.txt"]
|
|
)
|
|
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
|
assert len(filenames2) == 2
|
|
check_tiny_data(filenames2[0])
|
|
check_tiny_data(filenames2[1])
|
|
|
|
# Do a third fetch, again with one member and assert
|
|
# that only this member was returned
|
|
filenames3 = pup.fetch("store" + extension, processor=processor1)
|
|
assert len(filenames3) == 1
|
|
check_tiny_data(filenames3[0])
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"processor_class,extension",
|
|
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
|
)
|
|
def test_unpack_members_with_leading_dot(processor_class, extension):
|
|
"Test that unpack members can also be specifed both with a leading ./"
|
|
|
|
with TemporaryDirectory() as local_store:
|
|
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
|
|
|
# Do a first fetch with the one member only
|
|
processor1 = processor_class(members=["./store/tiny-data.txt"])
|
|
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
|
assert len(filenames1) == 1
|
|
check_tiny_data(filenames1[0])
|
|
|
|
|
|
def _check_logs(log_file, expected_lines):
|
|
"""
|
|
Assert that the lines in the log match the expected ones.
|
|
"""
|
|
lines = log_file.getvalue().splitlines()
|
|
assert len(lines) == len(expected_lines)
|
|
for line, expected_line in zip(lines, expected_lines):
|
|
assert line.startswith(expected_line)
|
|
|
|
|
|
def _unpacking_expected_paths_and_logs(archive, members, path, name):
|
|
"""
|
|
Generate the appropriate expected paths and log message depending on the
|
|
parameters for the test.
|
|
"""
|
|
log_lines = ["Downloading"]
|
|
if archive == "tiny-data":
|
|
true_paths = {str(path / "tiny-data.txt")}
|
|
log_lines.append("Extracting 'tiny-data.txt'")
|
|
elif archive == "store" and members is None:
|
|
true_paths = {
|
|
str(path / "store" / "tiny-data.txt"),
|
|
str(path / "store" / "subdir" / "tiny-data.txt"),
|
|
}
|
|
log_lines.append(f"{name}{name[-1]}ing contents")
|
|
elif archive == "store" and members is not None:
|
|
true_paths = []
|
|
for member in members:
|
|
true_path = path / Path(*member.split("/"))
|
|
if not str(true_path).endswith("tiny-data.txt"):
|
|
true_path = true_path / "tiny-data.txt"
|
|
true_paths.append(str(true_path))
|
|
log_lines.append(f"Extracting '{member}'")
|
|
true_paths = set(true_paths)
|
|
return true_paths, log_lines
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"processor_class,extension",
|
|
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
|
)
|
|
def test_unpacking_members_then_no_members(processor_class, extension):
|
|
"""
|
|
Test that calling with valid members then without them works.
|
|
https://github.com/fatiando/pooch/issues/364
|
|
"""
|
|
with TemporaryDirectory() as local_store:
|
|
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
|
|
|
# Do a first fetch with an existing member
|
|
processor1 = processor_class(members=["store/tiny-data.txt"])
|
|
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
|
assert len(filenames1) == 1
|
|
|
|
# Do a second fetch with no members
|
|
processor2 = processor_class()
|
|
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
|
assert len(filenames2) > 1
|
|
|
|
|
|
@pytest.mark.network
|
|
@pytest.mark.parametrize(
|
|
"processor_class,extension",
|
|
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
|
)
|
|
def test_unpacking_wrong_members_then_no_members(processor_class, extension):
|
|
"""
|
|
Test that calling with invalid members then without them works.
|
|
https://github.com/fatiando/pooch/issues/364
|
|
"""
|
|
with TemporaryDirectory() as local_store:
|
|
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
|
|
|
# Do a first fetch with incorrect member
|
|
processor1 = processor_class(members=["not-a-valid-file.csv"])
|
|
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
|
assert len(filenames1) == 0
|
|
|
|
# Do a second fetch with no members
|
|
processor2 = processor_class()
|
|
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
|
assert len(filenames2) > 0
|