ai-content-maker/.venv/Lib/site-packages/langcodes/registry_parser.py

from langcodes.util import data_filename

LIST_KEYS = {'Description', 'Prefix'}


def parse_file(file):
    """
    Take an open file containing the IANA subtag registry, and yield a
    dictionary of information for each subtag it describes.
    """
    lines = []
    for line in file:
        line = line.rstrip('\n')
        if line == '%%':
            # This is a separator between items. Parse the data we've
            # collected and yield the result.
            yield from parse_item(lines)
            lines.clear()
        elif line.startswith('  '):
            # This is a continuation line. Concatenate it to the previous
            # line, including one of the spaces.
            lines[-1] += line[1:]
        else:
            lines.append(line)
    yield from parse_item(lines)


def parse_item(lines):
    """
    Given the lines that form a subtag entry (after joining wrapped lines
    back together), parse the data they contain.

    Returns a generator that yields once if there was any data there
    (and an empty generator if this was just the header).
    """
    info = {}
    for line in lines:
        key, value = line.split(': ', 1)
        if key in LIST_KEYS:
            info.setdefault(key, []).append(value)
        else:
            assert key not in info
            info[key] = value

    if 'Subtag' in info or 'Tag' in info:
        yield info


def parse_registry():
    """
    Yield a sequence of dictionaries, containing the info in the included
    IANA subtag registry file.
    """
    with open(
        data_filename('language-subtag-registry.txt'), encoding='utf-8'
    ) as data_file:
        # 'yield from' instead of returning, so that we only close the file
        # when finished.
        yield from parse_file(data_file)
first commit 2024-05-03 04:18:51 +03:00			`from langcodes.util import data_filename`

			`LIST_KEYS = {'Description', 'Prefix'}`


			`def parse_file(file):`
			`"""`
			`Take an open file containing the IANA subtag registry, and yield a`
			`dictionary of information for each subtag it describes.`
			`"""`
			`lines = []`
			`for line in file:`
			`line = line.rstrip('\n')`
			`if line == '%%':`
			`# This is a separator between items. Parse the data we've`
			`# collected and yield the result.`
			`yield from parse_item(lines)`
			`lines.clear()`
			`elif line.startswith(' '):`
			`# This is a continuation line. Concatenate it to the previous`
			`# line, including one of the spaces.`
			`lines[-1] += line[1:]`
			`else:`
			`lines.append(line)`
			`yield from parse_item(lines)`


			`def parse_item(lines):`
			`"""`
			`Given the lines that form a subtag entry (after joining wrapped lines`
			`back together), parse the data they contain.`

			`Returns a generator that yields once if there was any data there`
			`(and an empty generator if this was just the header).`
			`"""`
			`info = {}`
			`for line in lines:`
			`key, value = line.split(': ', 1)`
			`if key in LIST_KEYS:`
			`info.setdefault(key, []).append(value)`
			`else:`
			`assert key not in info`
			`info[key] = value`

			`if 'Subtag' in info or 'Tag' in info:`
			`yield info`


			`def parse_registry():`
			`"""`
			`Yield a sequence of dictionaries, containing the info in the included`
			`IANA subtag registry file.`
			`"""`
			`with open(`
			`data_filename('language-subtag-registry.txt'), encoding='utf-8'`
			`) as data_file:`
			`# 'yield from' instead of returning, so that we only close the file`
			`# when finished.`
			`yield from parse_file(data_file)`