40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
|
import unittest
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from nltk import ConditionalFreqDist, tokenize
|
||
|
|
||
|
|
||
|
class TestEmptyCondFreq(unittest.TestCase):
|
||
|
def test_tabulate(self):
|
||
|
empty = ConditionalFreqDist()
|
||
|
self.assertEqual(empty.conditions(), [])
|
||
|
with pytest.raises(ValueError):
|
||
|
empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
|
||
|
self.assertEqual(empty.conditions(), [])
|
||
|
|
||
|
def test_plot(self):
|
||
|
empty = ConditionalFreqDist()
|
||
|
self.assertEqual(empty.conditions(), [])
|
||
|
empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
|
||
|
self.assertEqual(empty.conditions(), [])
|
||
|
|
||
|
def test_increment(self):
|
||
|
# make sure that we can still mutate cfd normally
|
||
|
text = "cow cat mouse cat tiger"
|
||
|
cfd = ConditionalFreqDist()
|
||
|
|
||
|
# create cfd with word length as condition
|
||
|
for word in tokenize.word_tokenize(text):
|
||
|
condition = len(word)
|
||
|
cfd[condition][word] += 1
|
||
|
|
||
|
self.assertEqual(cfd.conditions(), [3, 5])
|
||
|
|
||
|
# incrementing previously unseen key is still possible
|
||
|
cfd[2]["hi"] += 1
|
||
|
self.assertCountEqual(cfd.conditions(), [3, 5, 2]) # new condition added
|
||
|
self.assertEqual(
|
||
|
cfd[2]["hi"], 1
|
||
|
) # key's frequency incremented from 0 (unseen) to 1
|