ai-content-maker/.venv/Lib/site-packages/pycrfsuite/_dumpparser.py

# -*- coding: utf-8 -*-
from __future__ import absolute_import
import re


class ParsedDump(object):
    """
    CRFsuite model parameters. Objects of this type are returned by
    :meth:`pycrfsuite.Tagger.info()` method.

    Attributes
    ----------

    transitions : dict
        ``{(from_label, to_label): weight}`` dict with learned transition weights

    state_features : dict
        ``{(attribute, label): weight}`` dict with learned ``(attribute, label)`` weights

    header : dict
        Metadata from the file header

    labels : dict
        ``{name: internal_id}`` dict with model labels

    attributes : dict
        ``{name: internal_id}`` dict with known attributes

    """
    def __init__(self):
        self.header = {}
        self.labels = {}
        self.attributes = {}
        self.transitions = {}
        self.state_features = {}


class CRFsuiteDumpParser(object):
    """
    A hack: parser for `crfsuite dump` results.

    Obtaining coefficients "the proper way" is quite hard otherwise
    because in CRFsuite they are hidden in private structures.
    """

    def __init__(self):
        self.state = None
        self.result = ParsedDump()

    def feed(self, line):
        # Strip initial ws and line terminator, but allow for ws at the end of feature names.
        line = line.lstrip().rstrip('\r\n')
        if not line:
            return

        m = re.match(r"(FILEHEADER|LABELS|ATTRIBUTES|TRANSITIONS|STATE_FEATURES) = {", line)
        if m:
            self.state = m.group(1)
        elif line == '}':
            self.state = None
        else:
            getattr(self, 'parse_%s' % self.state)(line)

    def parse_FILEHEADER(self, line):
        m = re.match(r"(\w+): (.*)", line)
        self.result.header[m.group(1)] = m.group(2)

    def parse_LABELS(self, line):
        m = re.match(r"(\d+): (.*)", line)
        self.result.labels[m.group(2)] = m.group(1)

    def parse_ATTRIBUTES(self, line):
        m = re.match(r"(\d+): (.*)", line)
        self.result.attributes[m.group(2)] = m.group(1)

    def parse_TRANSITIONS(self, line):
        m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line)
        from_, to_ = m.group(1), m.group(2)
        assert from_ in self.result.labels
        assert to_ in self.result.labels
        self.result.transitions[(from_, to_)] = float(m.group(3))

    def parse_STATE_FEATURES(self, line):
        m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line)
        attr, label = m.group(1), m.group(2)
        assert attr in self.result.attributes
        assert label in self.result.labels
        self.result.state_features[(attr, label)] = float(m.group(3))
first commit 2024-05-03 04:18:51 +03:00			`# -- coding: utf-8 --`
			`from __future__ import absolute_import`
			`import re`


			`class ParsedDump(object):`
			`"""`
			`CRFsuite model parameters. Objects of this type are returned by`
			:meth:`pycrfsuite.Tagger.info()` method.

			`Attributes`
			`----------`

			`transitions : dict`
			``{(from_label, to_label): weight}`` dict with learned transition weights

			`state_features : dict`
			``{(attribute, label): weight}`` dict with learned ``(attribute, label)`` weights

			`header : dict`
			`Metadata from the file header`

			`labels : dict`
			``{name: internal_id}`` dict with model labels

			`attributes : dict`
			``{name: internal_id}`` dict with known attributes

			`"""`
			`def __init__(self):`
			`self.header = {}`
			`self.labels = {}`
			`self.attributes = {}`
			`self.transitions = {}`
			`self.state_features = {}`


			`class CRFsuiteDumpParser(object):`
			`"""`
			A hack: parser for `crfsuite dump` results.

			`Obtaining coefficients "the proper way" is quite hard otherwise`
			`because in CRFsuite they are hidden in private structures.`
			`"""`

			`def __init__(self):`
			`self.state = None`
			`self.result = ParsedDump()`

			`def feed(self, line):`
			`# Strip initial ws and line terminator, but allow for ws at the end of feature names.`
			`line = line.lstrip().rstrip('\r\n')`
			`if not line:`
			`return`

			`m = re.match(r"(FILEHEADER\|LABELS\|ATTRIBUTES\|TRANSITIONS\|STATE_FEATURES) = {", line)`
			`if m:`
			`self.state = m.group(1)`
			`elif line == '}':`
			`self.state = None`
			`else:`
			`getattr(self, 'parse_%s' % self.state)(line)`

			`def parse_FILEHEADER(self, line):`
			`m = re.match(r"(\w+): (.*)", line)`
			`self.result.header[m.group(1)] = m.group(2)`

			`def parse_LABELS(self, line):`
			`m = re.match(r"(\d+): (.*)", line)`
			`self.result.labels[m.group(2)] = m.group(1)`

			`def parse_ATTRIBUTES(self, line):`
			`m = re.match(r"(\d+): (.*)", line)`
			`self.result.attributes[m.group(2)] = m.group(1)`

			`def parse_TRANSITIONS(self, line):`
			`m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line)`
			`from_, to_ = m.group(1), m.group(2)`
			`assert from_ in self.result.labels`
			`assert to_ in self.result.labels`
			`self.result.transitions[(from_, to_)] = float(m.group(3))`

			`def parse_STATE_FEATURES(self, line):`
			`m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line)`
			`attr, label = m.group(1), m.group(2)`
			`assert attr in self.result.attributes`
			`assert label in self.result.labels`
			`self.result.state_features[(attr, label)] = float(m.group(3))`