83 lines
1.8 KiB
Python
83 lines
1.8 KiB
Python
|
import pytest
|
||
|
|
||
|
from nltk.util import everygrams
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def everygram_input():
|
||
|
"""Form test data for tests."""
|
||
|
return iter(["a", "b", "c"])
|
||
|
|
||
|
|
||
|
def test_everygrams_without_padding(everygram_input):
|
||
|
expected_output = [
|
||
|
("a",),
|
||
|
("a", "b"),
|
||
|
("a", "b", "c"),
|
||
|
("b",),
|
||
|
("b", "c"),
|
||
|
("c",),
|
||
|
]
|
||
|
output = list(everygrams(everygram_input))
|
||
|
assert output == expected_output
|
||
|
|
||
|
|
||
|
def test_everygrams_max_len(everygram_input):
|
||
|
expected_output = [
|
||
|
("a",),
|
||
|
("a", "b"),
|
||
|
("b",),
|
||
|
("b", "c"),
|
||
|
("c",),
|
||
|
]
|
||
|
output = list(everygrams(everygram_input, max_len=2))
|
||
|
assert output == expected_output
|
||
|
|
||
|
|
||
|
def test_everygrams_min_len(everygram_input):
|
||
|
expected_output = [
|
||
|
("a", "b"),
|
||
|
("a", "b", "c"),
|
||
|
("b", "c"),
|
||
|
]
|
||
|
output = list(everygrams(everygram_input, min_len=2))
|
||
|
assert output == expected_output
|
||
|
|
||
|
|
||
|
def test_everygrams_pad_right(everygram_input):
|
||
|
expected_output = [
|
||
|
("a",),
|
||
|
("a", "b"),
|
||
|
("a", "b", "c"),
|
||
|
("b",),
|
||
|
("b", "c"),
|
||
|
("b", "c", None),
|
||
|
("c",),
|
||
|
("c", None),
|
||
|
("c", None, None),
|
||
|
(None,),
|
||
|
(None, None),
|
||
|
(None,),
|
||
|
]
|
||
|
output = list(everygrams(everygram_input, max_len=3, pad_right=True))
|
||
|
assert output == expected_output
|
||
|
|
||
|
|
||
|
def test_everygrams_pad_left(everygram_input):
|
||
|
expected_output = [
|
||
|
(None,),
|
||
|
(None, None),
|
||
|
(None, None, "a"),
|
||
|
(None,),
|
||
|
(None, "a"),
|
||
|
(None, "a", "b"),
|
||
|
("a",),
|
||
|
("a", "b"),
|
||
|
("a", "b", "c"),
|
||
|
("b",),
|
||
|
("b", "c"),
|
||
|
("c",),
|
||
|
]
|
||
|
output = list(everygrams(everygram_input, max_len=3, pad_left=True))
|
||
|
assert output == expected_output
|