ai-content-maker/.venv/Lib/site-packages/nltk/test/unit/test_rte_classify.py

import pytest

from nltk import config_megam
from nltk.classify.rte_classify import RTEFeatureExtractor, rte_classifier, rte_features
from nltk.corpus import rte as rte_corpus

expected_from_rte_feature_extration = """
alwayson        => True
ne_hyp_extra    => 0
ne_overlap      => 1
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 3
word_overlap    => 3

alwayson        => True
ne_hyp_extra    => 0
ne_overlap      => 1
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 2
word_overlap    => 1

alwayson        => True
ne_hyp_extra    => 1
ne_overlap      => 1
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 1
word_overlap    => 2

alwayson        => True
ne_hyp_extra    => 1
ne_overlap      => 0
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 6
word_overlap    => 2

alwayson        => True
ne_hyp_extra    => 1
ne_overlap      => 0
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 4
word_overlap    => 0

alwayson        => True
ne_hyp_extra    => 1
ne_overlap      => 0
neg_hyp         => 0
neg_txt         => 0
word_hyp_extra  => 3
word_overlap    => 1
"""


class TestRTEClassifier:
    # Test the feature extraction method.
    def test_rte_feature_extraction(self):
        pairs = rte_corpus.pairs(["rte1_dev.xml"])[:6]
        test_output = [
            f"{key:<15} => {rte_features(pair)[key]}"
            for pair in pairs
            for key in sorted(rte_features(pair))
        ]
        expected_output = expected_from_rte_feature_extration.strip().split("\n")
        # Remove null strings.
        expected_output = list(filter(None, expected_output))
        assert test_output == expected_output

    # Test the RTEFeatureExtractor object.
    def test_feature_extractor_object(self):
        rtepair = rte_corpus.pairs(["rte3_dev.xml"])[33]
        extractor = RTEFeatureExtractor(rtepair)

        assert extractor.hyp_words == {"member", "China", "SCO."}
        assert extractor.overlap("word") == set()
        assert extractor.overlap("ne") == {"China"}
        assert extractor.hyp_extra("word") == {"member"}

    # Test the RTE classifier training.
    def test_rte_classification_without_megam(self):
        # Use a sample size for unit testing, since we
        # don't need to fully train these classifiers
        clf = rte_classifier("IIS", sample_N=100)
        clf = rte_classifier("GIS", sample_N=100)

    def test_rte_classification_with_megam(self):
        try:
            config_megam()
        except (LookupError, AttributeError) as e:
            pytest.skip("Skipping tests with dependencies on MEGAM")
        clf = rte_classifier("megam", sample_N=100)
first commit 2024-05-03 04:18:51 +03:00			`import pytest`

			`from nltk import config_megam`
			`from nltk.classify.rte_classify import RTEFeatureExtractor, rte_classifier, rte_features`
			`from nltk.corpus import rte as rte_corpus`

			`expected_from_rte_feature_extration = """`
			`alwayson => True`
			`ne_hyp_extra => 0`
			`ne_overlap => 1`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 3`
			`word_overlap => 3`

			`alwayson => True`
			`ne_hyp_extra => 0`
			`ne_overlap => 1`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 2`
			`word_overlap => 1`

			`alwayson => True`
			`ne_hyp_extra => 1`
			`ne_overlap => 1`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 1`
			`word_overlap => 2`

			`alwayson => True`
			`ne_hyp_extra => 1`
			`ne_overlap => 0`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 6`
			`word_overlap => 2`

			`alwayson => True`
			`ne_hyp_extra => 1`
			`ne_overlap => 0`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 4`
			`word_overlap => 0`

			`alwayson => True`
			`ne_hyp_extra => 1`
			`ne_overlap => 0`
			`neg_hyp => 0`
			`neg_txt => 0`
			`word_hyp_extra => 3`
			`word_overlap => 1`
			`"""`


			`class TestRTEClassifier:`
			`# Test the feature extraction method.`
			`def test_rte_feature_extraction(self):`
			`pairs = rte_corpus.pairs(["rte1_dev.xml"])[:6]`
			`test_output = [`
			`f"{key:<15} => {rte_features(pair)[key]}"`
			`for pair in pairs`
			`for key in sorted(rte_features(pair))`
			`]`
			`expected_output = expected_from_rte_feature_extration.strip().split("\n")`
			`# Remove null strings.`
			`expected_output = list(filter(None, expected_output))`
			`assert test_output == expected_output`

			`# Test the RTEFeatureExtractor object.`
			`def test_feature_extractor_object(self):`
			`rtepair = rte_corpus.pairs(["rte3_dev.xml"])[33]`
			`extractor = RTEFeatureExtractor(rtepair)`

			`assert extractor.hyp_words == {"member", "China", "SCO."}`
			`assert extractor.overlap("word") == set()`
			`assert extractor.overlap("ne") == {"China"}`
			`assert extractor.hyp_extra("word") == {"member"}`

			`# Test the RTE classifier training.`
			`def test_rte_classification_without_megam(self):`
			`# Use a sample size for unit testing, since we`
			`# don't need to fully train these classifiers`
			`clf = rte_classifier("IIS", sample_N=100)`
			`clf = rte_classifier("GIS", sample_N=100)`

			`def test_rte_classification_with_megam(self):`
			`try:`
			`config_megam()`
			`except (LookupError, AttributeError) as e:`
			`pytest.skip("Skipping tests with dependencies on MEGAM")`
			`clf = rte_classifier("megam", sample_N=100)`