86 lines
2.1 KiB
Python
86 lines
2.1 KiB
Python
|
import unittest
|
||
|
|
||
|
from nltk import RegexpParser
|
||
|
|
||
|
|
||
|
class TestChunkRule(unittest.TestCase):
|
||
|
def test_tag_pattern2re_pattern_quantifier(self):
|
||
|
"""Test for bug https://github.com/nltk/nltk/issues/1597
|
||
|
|
||
|
Ensures that curly bracket quantifiers can be used inside a chunk rule.
|
||
|
This type of quantifier has been used for the supplementary example
|
||
|
in https://www.nltk.org/book/ch07.html#exploring-text-corpora.
|
||
|
"""
|
||
|
sent = [
|
||
|
("The", "AT"),
|
||
|
("September-October", "NP"),
|
||
|
("term", "NN"),
|
||
|
("jury", "NN"),
|
||
|
("had", "HVD"),
|
||
|
("been", "BEN"),
|
||
|
("charged", "VBN"),
|
||
|
("by", "IN"),
|
||
|
("Fulton", "NP-TL"),
|
||
|
("Superior", "JJ-TL"),
|
||
|
("Court", "NN-TL"),
|
||
|
("Judge", "NN-TL"),
|
||
|
("Durwood", "NP"),
|
||
|
("Pye", "NP"),
|
||
|
("to", "TO"),
|
||
|
("investigate", "VB"),
|
||
|
("reports", "NNS"),
|
||
|
("of", "IN"),
|
||
|
("possible", "JJ"),
|
||
|
("``", "``"),
|
||
|
("irregularities", "NNS"),
|
||
|
("''", "''"),
|
||
|
("in", "IN"),
|
||
|
("the", "AT"),
|
||
|
("hard-fought", "JJ"),
|
||
|
("primary", "NN"),
|
||
|
("which", "WDT"),
|
||
|
("was", "BEDZ"),
|
||
|
("won", "VBN"),
|
||
|
("by", "IN"),
|
||
|
("Mayor-nominate", "NN-TL"),
|
||
|
("Ivan", "NP"),
|
||
|
("Allen", "NP"),
|
||
|
("Jr.", "NP"),
|
||
|
(".", "."),
|
||
|
] # source: brown corpus
|
||
|
cp = RegexpParser("CHUNK: {<N.*>{4,}}")
|
||
|
tree = cp.parse(sent)
|
||
|
assert (
|
||
|
tree.pformat()
|
||
|
== """(S
|
||
|
The/AT
|
||
|
September-October/NP
|
||
|
term/NN
|
||
|
jury/NN
|
||
|
had/HVD
|
||
|
been/BEN
|
||
|
charged/VBN
|
||
|
by/IN
|
||
|
Fulton/NP-TL
|
||
|
Superior/JJ-TL
|
||
|
(CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
|
||
|
to/TO
|
||
|
investigate/VB
|
||
|
reports/NNS
|
||
|
of/IN
|
||
|
possible/JJ
|
||
|
``/``
|
||
|
irregularities/NNS
|
||
|
''/''
|
||
|
in/IN
|
||
|
the/AT
|
||
|
hard-fought/JJ
|
||
|
primary/NN
|
||
|
which/WDT
|
||
|
was/BEDZ
|
||
|
won/VBN
|
||
|
by/IN
|
||
|
(CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
|
||
|
./.)"""
|
||
|
)
|