247 lines
4.8 KiB
Python
247 lines
4.8 KiB
Python
from nltk.translate.ribes_score import corpus_ribes, word_rank_alignment
|
|
|
|
|
|
def test_ribes_empty_worder(): # worder as in word order
|
|
# Verifies that these two sentences have no alignment,
|
|
# and hence have the lowest possible RIBES score.
|
|
hyp = "This is a nice sentence which I quite like".split()
|
|
ref = "Okay well that's neat and all but the reference's different".split()
|
|
|
|
assert word_rank_alignment(ref, hyp) == []
|
|
|
|
list_of_refs = [[ref]]
|
|
hypotheses = [hyp]
|
|
assert corpus_ribes(list_of_refs, hypotheses) == 0.0
|
|
|
|
|
|
def test_ribes_one_worder():
|
|
# Verifies that these two sentences have just one match,
|
|
# and the RIBES score for this sentence with very little
|
|
# correspondence is 0.
|
|
hyp = "This is a nice sentence which I quite like".split()
|
|
ref = "Okay well that's nice and all but the reference's different".split()
|
|
|
|
assert word_rank_alignment(ref, hyp) == [3]
|
|
|
|
list_of_refs = [[ref]]
|
|
hypotheses = [hyp]
|
|
assert corpus_ribes(list_of_refs, hypotheses) == 0.0
|
|
|
|
|
|
def test_ribes_two_worder():
|
|
# Verifies that these two sentences have two matches,
|
|
# but still get the lowest possible RIBES score due
|
|
# to the lack of similarity.
|
|
hyp = "This is a nice sentence which I quite like".split()
|
|
ref = "Okay well that's nice and all but the reference is different".split()
|
|
|
|
assert word_rank_alignment(ref, hyp) == [9, 3]
|
|
|
|
list_of_refs = [[ref]]
|
|
hypotheses = [hyp]
|
|
assert corpus_ribes(list_of_refs, hypotheses) == 0.0
|
|
|
|
|
|
def test_ribes():
|
|
# Based on the doctest of the corpus_ribes function
|
|
hyp1 = [
|
|
"It",
|
|
"is",
|
|
"a",
|
|
"guide",
|
|
"to",
|
|
"action",
|
|
"which",
|
|
"ensures",
|
|
"that",
|
|
"the",
|
|
"military",
|
|
"always",
|
|
"obeys",
|
|
"the",
|
|
"commands",
|
|
"of",
|
|
"the",
|
|
"party",
|
|
]
|
|
ref1a = [
|
|
"It",
|
|
"is",
|
|
"a",
|
|
"guide",
|
|
"to",
|
|
"action",
|
|
"that",
|
|
"ensures",
|
|
"that",
|
|
"the",
|
|
"military",
|
|
"will",
|
|
"forever",
|
|
"heed",
|
|
"Party",
|
|
"commands",
|
|
]
|
|
ref1b = [
|
|
"It",
|
|
"is",
|
|
"the",
|
|
"guiding",
|
|
"principle",
|
|
"which",
|
|
"guarantees",
|
|
"the",
|
|
"military",
|
|
"forces",
|
|
"always",
|
|
"being",
|
|
"under",
|
|
"the",
|
|
"command",
|
|
"of",
|
|
"the",
|
|
"Party",
|
|
]
|
|
ref1c = [
|
|
"It",
|
|
"is",
|
|
"the",
|
|
"practical",
|
|
"guide",
|
|
"for",
|
|
"the",
|
|
"army",
|
|
"always",
|
|
"to",
|
|
"heed",
|
|
"the",
|
|
"directions",
|
|
"of",
|
|
"the",
|
|
"party",
|
|
]
|
|
|
|
hyp2 = [
|
|
"he",
|
|
"read",
|
|
"the",
|
|
"book",
|
|
"because",
|
|
"he",
|
|
"was",
|
|
"interested",
|
|
"in",
|
|
"world",
|
|
"history",
|
|
]
|
|
ref2a = [
|
|
"he",
|
|
"was",
|
|
"interested",
|
|
"in",
|
|
"world",
|
|
"history",
|
|
"because",
|
|
"he",
|
|
"read",
|
|
"the",
|
|
"book",
|
|
]
|
|
|
|
list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
|
|
hypotheses = [hyp1, hyp2]
|
|
|
|
score = corpus_ribes(list_of_refs, hypotheses)
|
|
|
|
assert round(score, 4) == 0.3597
|
|
|
|
|
|
def test_no_zero_div():
|
|
# Regression test for Issue 2529, assure that no ZeroDivisionError is thrown.
|
|
hyp1 = [
|
|
"It",
|
|
"is",
|
|
"a",
|
|
"guide",
|
|
"to",
|
|
"action",
|
|
"which",
|
|
"ensures",
|
|
"that",
|
|
"the",
|
|
"military",
|
|
"always",
|
|
"obeys",
|
|
"the",
|
|
"commands",
|
|
"of",
|
|
"the",
|
|
"party",
|
|
]
|
|
ref1a = [
|
|
"It",
|
|
"is",
|
|
"a",
|
|
"guide",
|
|
"to",
|
|
"action",
|
|
"that",
|
|
"ensures",
|
|
"that",
|
|
"the",
|
|
"military",
|
|
"will",
|
|
"forever",
|
|
"heed",
|
|
"Party",
|
|
"commands",
|
|
]
|
|
ref1b = [
|
|
"It",
|
|
"is",
|
|
"the",
|
|
"guiding",
|
|
"principle",
|
|
"which",
|
|
"guarantees",
|
|
"the",
|
|
"military",
|
|
"forces",
|
|
"always",
|
|
"being",
|
|
"under",
|
|
"the",
|
|
"command",
|
|
"of",
|
|
"the",
|
|
"Party",
|
|
]
|
|
ref1c = [
|
|
"It",
|
|
"is",
|
|
"the",
|
|
"practical",
|
|
"guide",
|
|
"for",
|
|
"the",
|
|
"army",
|
|
"always",
|
|
"to",
|
|
"heed",
|
|
"the",
|
|
"directions",
|
|
"of",
|
|
"the",
|
|
"party",
|
|
]
|
|
|
|
hyp2 = ["he", "read", "the"]
|
|
ref2a = ["he", "was", "interested", "in", "world", "history", "because", "he"]
|
|
|
|
list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
|
|
hypotheses = [hyp1, hyp2]
|
|
|
|
score = corpus_ribes(list_of_refs, hypotheses)
|
|
|
|
assert round(score, 4) == 0.1688
|