# coding: utf-8 from __future__ import absolute_import from __future__ import print_function # Emitter expects events obeying the following grammar: # stream ::= STREAM-START document* STREAM-END # document ::= DOCUMENT-START node DOCUMENT-END # node ::= SCALAR | sequence | mapping # sequence ::= SEQUENCE-START node* SEQUENCE-END # mapping ::= MAPPING-START (node node)* MAPPING-END import sys from .error import YAMLError, YAMLStreamError from .events import * # NOQA # fmt: off from .compat import utf8, text_type, PY2, nprint, dbg, DBG_EVENT, \ check_anchorname_char # fmt: on if False: # MYPY from typing import Any, Dict, List, Union, Text, Tuple, Optional # NOQA from .compat import StreamType # NOQA __all__ = ["Emitter", "EmitterError"] class EmitterError(YAMLError): pass class ScalarAnalysis(object): def __init__( self, scalar, empty, multiline, allow_flow_plain, allow_block_plain, allow_single_quoted, allow_double_quoted, allow_block, ): # type: (Any, Any, Any, bool, bool, bool, bool, bool) -> None self.scalar = scalar self.empty = empty self.multiline = multiline self.allow_flow_plain = allow_flow_plain self.allow_block_plain = allow_block_plain self.allow_single_quoted = allow_single_quoted self.allow_double_quoted = allow_double_quoted self.allow_block = allow_block class Indents(object): # replacement for the list based stack of None/int def __init__(self): # type: () -> None self.values = [] # type: List[Tuple[int, bool]] def append(self, val, seq): # type: (Any, Any) -> None self.values.append((val, seq)) def pop(self): # type: () -> Any return self.values.pop()[0] def last_seq(self): # type: () -> bool # return the seq(uence) value for the element added before the last one # in increase_indent() try: return self.values[-2][1] except IndexError: return False def seq_flow_align(self, seq_indent, column): # type: (int, int) -> int # extra spaces because of dash if len(self.values) < 2 or not self.values[-1][1]: return 0 # -1 for the dash base = self.values[-1][0] if self.values[-1][0] is not None else 0 return base + seq_indent - column - 1 def __len__(self): # type: () -> int return len(self.values) class Emitter(object): # fmt: off DEFAULT_TAG_PREFIXES = { u'!': u'!', u'tag:yaml.org,2002:': u'!!', } # fmt: on MAX_SIMPLE_KEY_LENGTH = 128 def __init__( self, stream, canonical=None, indent=None, width=None, allow_unicode=None, line_break=None, block_seq_indent=None, top_level_colon_align=None, prefix_colon=None, brace_single_entry_mapping_in_flow_sequence=None, dumper=None, ): # type: (StreamType, Any, Optional[int], Optional[int], Optional[bool], Any, Optional[int], Optional[bool], Any, Optional[bool], Any) -> None # NOQA self.dumper = dumper if self.dumper is not None and getattr(self.dumper, "_emitter", None) is None: self.dumper._emitter = self self.stream = stream # Encoding can be overriden by STREAM-START. self.encoding = None # type: Optional[Text] self.allow_space_break = None # Emitter is a state machine with a stack of states to handle nested # structures. self.states = [] # type: List[Any] self.state = self.expect_stream_start # type: Any # Current event and the event queue. self.events = [] # type: List[Any] self.event = None # type: Any # The current indentation level and the stack of previous indents. self.indents = Indents() self.indent = None # type: Optional[int] # flow_context is an expanding/shrinking list consisting of '{' and '[' # for each unclosed flow context. If empty list that means block context self.flow_context = [] # type: List[Text] # Contexts. self.root_context = False self.sequence_context = False self.mapping_context = False self.simple_key_context = False # Characteristics of the last emitted character: # - current position. # - is it a whitespace? # - is it an indention character # (indentation space, '-', '?', or ':')? self.line = 0 self.column = 0 self.whitespace = True self.indention = True self.compact_seq_seq = True # dash after dash self.compact_seq_map = True # key after dash # self.compact_ms = False # dash after key, only when excplicit key with ? self.no_newline = None # type: Optional[bool] # set if directly after `- ` # Whether the document requires an explicit document end indicator self.open_ended = False # colon handling self.colon = u":" self.prefixed_colon = ( self.colon if prefix_colon is None else prefix_colon + self.colon ) # single entry mappings in flow sequence self.brace_single_entry_mapping_in_flow_sequence = ( brace_single_entry_mapping_in_flow_sequence ) # NOQA # Formatting details. self.canonical = canonical self.allow_unicode = allow_unicode # set to False to get "\Uxxxxxxxx" for non-basic unicode like emojis self.unicode_supplementary = sys.maxunicode > 0xFFFF self.sequence_dash_offset = block_seq_indent if block_seq_indent else 0 self.top_level_colon_align = top_level_colon_align self.best_sequence_indent = 2 self.requested_indent = indent # specific for literal zero indent if indent and 1 < indent < 10: self.best_sequence_indent = indent self.best_map_indent = self.best_sequence_indent # if self.best_sequence_indent < self.sequence_dash_offset + 1: # self.best_sequence_indent = self.sequence_dash_offset + 1 self.best_width = 80 if width and width > self.best_sequence_indent * 2: self.best_width = width self.best_line_break = u"\n" # type: Any if line_break in [u"\r", u"\n", u"\r\n"]: self.best_line_break = line_break # Tag prefixes. self.tag_prefixes = None # type: Any # Prepared anchor and tag. self.prepared_anchor = None # type: Any self.prepared_tag = None # type: Any # Scalar analysis and style. self.analysis = None # type: Any self.style = None # type: Any self.scalar_after_indicator = True # write a scalar on the same line as `---` @property def stream(self): # type: () -> Any try: return self._stream except AttributeError: raise YAMLStreamError("output stream needs to specified") @stream.setter def stream(self, val): # type: (Any) -> None if val is None: return if not hasattr(val, "write"): raise YAMLStreamError("stream argument needs to have a write() method") self._stream = val @property def serializer(self): # type: () -> Any try: if hasattr(self.dumper, "typ"): return self.dumper.serializer return self.dumper._serializer except AttributeError: return self # cyaml @property def flow_level(self): # type: () -> int return len(self.flow_context) def dispose(self): # type: () -> None # Reset the state attributes (to clear self-references) self.states = [] self.state = None def emit(self, event): # type: (Any) -> None if dbg(DBG_EVENT): nprint(event) self.events.append(event) while not self.need_more_events(): self.event = self.events.pop(0) self.state() self.event = None # In some cases, we wait for a few next events before emitting. def need_more_events(self): # type: () -> bool if not self.events: return True event = self.events[0] if isinstance(event, DocumentStartEvent): return self.need_events(1) elif isinstance(event, SequenceStartEvent): return self.need_events(2) elif isinstance(event, MappingStartEvent): return self.need_events(3) else: return False def need_events(self, count): # type: (int) -> bool level = 0 for event in self.events[1:]: if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): level += 1 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): level -= 1 elif isinstance(event, StreamEndEvent): level = -1 if level < 0: return False return len(self.events) < count + 1 def increase_indent(self, flow=False, sequence=None, indentless=False): # type: (bool, Optional[bool], bool) -> None self.indents.append(self.indent, sequence) if self.indent is None: # top level if flow: # self.indent = self.best_sequence_indent if self.indents.last_seq() else \ # self.best_map_indent # self.indent = self.best_sequence_indent self.indent = self.requested_indent else: self.indent = 0 elif not indentless: self.indent += ( self.best_sequence_indent if self.indents.last_seq() else self.best_map_indent ) # if self.indents.last_seq(): # if self.indent == 0: # top level block sequence # self.indent = self.best_sequence_indent - self.sequence_dash_offset # else: # self.indent += self.best_sequence_indent # else: # self.indent += self.best_map_indent # States. # Stream handlers. def expect_stream_start(self): # type: () -> None if isinstance(self.event, StreamStartEvent): if PY2: if self.event.encoding and not getattr(self.stream, "encoding", None): self.encoding = self.event.encoding else: if self.event.encoding and not hasattr(self.stream, "encoding"): self.encoding = self.event.encoding self.write_stream_start() self.state = self.expect_first_document_start else: raise EmitterError("expected StreamStartEvent, but got %s" % (self.event,)) def expect_nothing(self): # type: () -> None raise EmitterError("expected nothing, but got %s" % (self.event,)) # Document handlers. def expect_first_document_start(self): # type: () -> Any return self.expect_document_start(first=True) def expect_document_start(self, first=False): # type: (bool) -> None if isinstance(self.event, DocumentStartEvent): if (self.event.version or self.event.tags) and self.open_ended: self.write_indicator(u"...", True) self.write_indent() if self.event.version: version_text = self.prepare_version(self.event.version) self.write_version_directive(version_text) self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() if self.event.tags: handles = sorted(self.event.tags.keys()) for handle in handles: prefix = self.event.tags[handle] self.tag_prefixes[prefix] = handle handle_text = self.prepare_tag_handle(handle) prefix_text = self.prepare_tag_prefix(prefix) self.write_tag_directive(handle_text, prefix_text) implicit = ( first and not self.event.explicit and not self.canonical and not self.event.version and not self.event.tags and not self.check_empty_document() ) if not implicit: self.write_indent() self.write_indicator(u"---", True) if self.canonical: self.write_indent() self.state = self.expect_document_root elif isinstance(self.event, StreamEndEvent): if self.open_ended: self.write_indicator(u"...", True) self.write_indent() self.write_stream_end() self.state = self.expect_nothing else: raise EmitterError( "expected DocumentStartEvent, but got %s" % (self.event,) ) def expect_document_end(self): # type: () -> None if isinstance(self.event, DocumentEndEvent): self.write_indent() if self.event.explicit: self.write_indicator(u"...", True) self.write_indent() self.flush_stream() self.state = self.expect_document_start else: raise EmitterError("expected DocumentEndEvent, but got %s" % (self.event,)) def expect_document_root(self): # type: () -> None self.states.append(self.expect_document_end) self.expect_node(root=True) # Node handlers. def expect_node(self, root=False, sequence=False, mapping=False, simple_key=False): # type: (bool, bool, bool, bool) -> None self.root_context = root self.sequence_context = sequence # not used in PyYAML self.mapping_context = mapping self.simple_key_context = simple_key if isinstance(self.event, AliasEvent): self.expect_alias() elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): if ( self.process_anchor(u"&") and isinstance(self.event, ScalarEvent) and self.sequence_context ): self.sequence_context = False if ( root and isinstance(self.event, ScalarEvent) and not self.scalar_after_indicator ): self.write_indent() self.process_tag() if isinstance(self.event, ScalarEvent): # nprint('@', self.indention, self.no_newline, self.column) self.expect_scalar() elif isinstance(self.event, SequenceStartEvent): # nprint('@', self.indention, self.no_newline, self.column) i2, n2 = self.indention, self.no_newline # NOQA if self.event.comment: if self.event.flow_style is False and self.event.comment: if self.write_post_comment(self.event): self.indention = False self.no_newline = True if self.write_pre_comment(self.event): self.indention = i2 self.no_newline = not self.indention if ( self.flow_level or self.canonical or self.event.flow_style or self.check_empty_sequence() ): self.expect_flow_sequence() else: self.expect_block_sequence() elif isinstance(self.event, MappingStartEvent): if self.event.flow_style is False and self.event.comment: self.write_post_comment(self.event) if self.event.comment and self.event.comment[1]: self.write_pre_comment(self.event) if ( self.flow_level or self.canonical or self.event.flow_style or self.check_empty_mapping() ): self.expect_flow_mapping(single=self.event.nr_items == 1) else: self.expect_block_mapping() else: raise EmitterError("expected NodeEvent, but got %s" % (self.event,)) def expect_alias(self): # type: () -> None if self.event.anchor is None: raise EmitterError("anchor is not specified for alias") self.process_anchor(u"*") self.state = self.states.pop() def expect_scalar(self): # type: () -> None self.increase_indent(flow=True) self.process_scalar() self.indent = self.indents.pop() self.state = self.states.pop() # Flow sequence handlers. def expect_flow_sequence(self): # type: () -> None ind = self.indents.seq_flow_align(self.best_sequence_indent, self.column) self.write_indicator(u" " * ind + u"[", True, whitespace=True) self.increase_indent(flow=True, sequence=True) self.flow_context.append("[") self.state = self.expect_first_flow_sequence_item def expect_first_flow_sequence_item(self): # type: () -> None if isinstance(self.event, SequenceEndEvent): self.indent = self.indents.pop() popped = self.flow_context.pop() assert popped == "[" self.write_indicator(u"]", False) if self.event.comment and self.event.comment[0]: # eol comment on empty flow sequence self.write_post_comment(self.event) elif self.flow_level == 0: self.write_line_break() self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: self.write_indent() self.states.append(self.expect_flow_sequence_item) self.expect_node(sequence=True) def expect_flow_sequence_item(self): # type: () -> None if isinstance(self.event, SequenceEndEvent): self.indent = self.indents.pop() popped = self.flow_context.pop() assert popped == "[" if self.canonical: self.write_indicator(u",", False) self.write_indent() self.write_indicator(u"]", False) if self.event.comment and self.event.comment[0]: # eol comment on flow sequence self.write_post_comment(self.event) else: self.no_newline = False self.state = self.states.pop() else: self.write_indicator(u",", False) if self.canonical or self.column > self.best_width: self.write_indent() self.states.append(self.expect_flow_sequence_item) self.expect_node(sequence=True) # Flow mapping handlers. def expect_flow_mapping(self, single=False): # type: (Optional[bool]) -> None ind = self.indents.seq_flow_align(self.best_sequence_indent, self.column) map_init = u"{" if ( single and self.flow_level and self.flow_context[-1] == "[" and not self.canonical and not self.brace_single_entry_mapping_in_flow_sequence ): # single map item with flow context, no curly braces necessary map_init = u"" self.write_indicator(u" " * ind + map_init, True, whitespace=True) self.flow_context.append(map_init) self.increase_indent(flow=True, sequence=False) self.state = self.expect_first_flow_mapping_key def expect_first_flow_mapping_key(self): # type: () -> None if isinstance(self.event, MappingEndEvent): self.indent = self.indents.pop() popped = self.flow_context.pop() assert popped == "{" # empty flow mapping self.write_indicator(u"}", False) if self.event.comment and self.event.comment[0]: # eol comment on empty mapping self.write_post_comment(self.event) elif self.flow_level == 0: self.write_line_break() self.state = self.states.pop() else: if self.canonical or self.column > self.best_width: self.write_indent() if not self.canonical and self.check_simple_key(): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: self.write_indicator(u"?", True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) def expect_flow_mapping_key(self): # type: () -> None if isinstance(self.event, MappingEndEvent): # if self.event.comment and self.event.comment[1]: # self.write_pre_comment(self.event) self.indent = self.indents.pop() popped = self.flow_context.pop() assert popped in [u"{", u""] if self.canonical: self.write_indicator(u",", False) self.write_indent() if popped != u"": self.write_indicator(u"}", False) if self.event.comment and self.event.comment[0]: # eol comment on flow mapping, never reached on empty mappings self.write_post_comment(self.event) else: self.no_newline = False self.state = self.states.pop() else: self.write_indicator(u",", False) if self.canonical or self.column > self.best_width: self.write_indent() if not self.canonical and self.check_simple_key(): self.states.append(self.expect_flow_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) else: self.write_indicator(u"?", True) self.states.append(self.expect_flow_mapping_value) self.expect_node(mapping=True) def expect_flow_mapping_simple_value(self): # type: () -> None self.write_indicator(self.prefixed_colon, False) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) def expect_flow_mapping_value(self): # type: () -> None if self.canonical or self.column > self.best_width: self.write_indent() self.write_indicator(self.prefixed_colon, True) self.states.append(self.expect_flow_mapping_key) self.expect_node(mapping=True) # Block sequence handlers. def expect_block_sequence(self): # type: () -> None if self.mapping_context: indentless = not self.indention else: indentless = False if not self.compact_seq_seq and self.column != 0: self.write_line_break() self.increase_indent(flow=False, sequence=True, indentless=indentless) self.state = self.expect_first_block_sequence_item def expect_first_block_sequence_item(self): # type: () -> Any return self.expect_block_sequence_item(first=True) def expect_block_sequence_item(self, first=False): # type: (bool) -> None if not first and isinstance(self.event, SequenceEndEvent): if self.event.comment and self.event.comment[1]: # final comments on a block list e.g. empty line self.write_pre_comment(self.event) self.indent = self.indents.pop() self.state = self.states.pop() self.no_newline = False else: if self.event.comment and self.event.comment[1]: self.write_pre_comment(self.event) nonl = self.no_newline if self.column == 0 else False self.write_indent() ind = self.sequence_dash_offset # if len(self.indents) > 1 else 0 self.write_indicator(u" " * ind + u"-", True, indention=True) if nonl or self.sequence_dash_offset + 2 > self.best_sequence_indent: self.no_newline = True self.states.append(self.expect_block_sequence_item) self.expect_node(sequence=True) # Block mapping handlers. def expect_block_mapping(self): # type: () -> None if not self.mapping_context and not (self.compact_seq_map or self.column == 0): self.write_line_break() self.increase_indent(flow=False, sequence=False) self.state = self.expect_first_block_mapping_key def expect_first_block_mapping_key(self): # type: () -> None return self.expect_block_mapping_key(first=True) def expect_block_mapping_key(self, first=False): # type: (Any) -> None if not first and isinstance(self.event, MappingEndEvent): if self.event.comment and self.event.comment[1]: # final comments from a doc self.write_pre_comment(self.event) self.indent = self.indents.pop() self.state = self.states.pop() else: if self.event.comment and self.event.comment[1]: # final comments from a doc self.write_pre_comment(self.event) self.write_indent() if self.check_simple_key(): if not isinstance( self.event, (SequenceStartEvent, MappingStartEvent) ): # sequence keys try: if self.event.style == "?": self.write_indicator(u"?", True, indention=True) except AttributeError: # aliases have no style pass self.states.append(self.expect_block_mapping_simple_value) self.expect_node(mapping=True, simple_key=True) if isinstance(self.event, AliasEvent): self.stream.write(u" ") else: self.write_indicator(u"?", True, indention=True) self.states.append(self.expect_block_mapping_value) self.expect_node(mapping=True) def expect_block_mapping_simple_value(self): # type: () -> None if getattr(self.event, "style", None) != "?": # prefix = u'' if self.indent == 0 and self.top_level_colon_align is not None: # write non-prefixed colon c = u" " * (self.top_level_colon_align - self.column) + self.colon else: c = self.prefixed_colon self.write_indicator(c, False) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) def expect_block_mapping_value(self): # type: () -> None self.write_indent() self.write_indicator(self.prefixed_colon, True, indention=True) self.states.append(self.expect_block_mapping_key) self.expect_node(mapping=True) # Checkers. def check_empty_sequence(self): # type: () -> bool return ( isinstance(self.event, SequenceStartEvent) and bool(self.events) and isinstance(self.events[0], SequenceEndEvent) ) def check_empty_mapping(self): # type: () -> bool return ( isinstance(self.event, MappingStartEvent) and bool(self.events) and isinstance(self.events[0], MappingEndEvent) ) def check_empty_document(self): # type: () -> bool if not isinstance(self.event, DocumentStartEvent) or not self.events: return False event = self.events[0] return ( isinstance(event, ScalarEvent) and event.anchor is None and event.tag is None and event.implicit and event.value == "" ) def check_simple_key(self): # type: () -> bool length = 0 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: if self.prepared_anchor is None: self.prepared_anchor = self.prepare_anchor(self.event.anchor) length += len(self.prepared_anchor) if ( isinstance(self.event, (ScalarEvent, CollectionStartEvent)) and self.event.tag is not None ): if self.prepared_tag is None: self.prepared_tag = self.prepare_tag(self.event.tag) length += len(self.prepared_tag) if isinstance(self.event, ScalarEvent): if self.analysis is None: self.analysis = self.analyze_scalar(self.event.value) length += len(self.analysis.scalar) return length < self.MAX_SIMPLE_KEY_LENGTH and ( isinstance(self.event, AliasEvent) or ( isinstance(self.event, SequenceStartEvent) and self.event.flow_style is True ) or ( isinstance(self.event, MappingStartEvent) and self.event.flow_style is True ) or ( isinstance(self.event, ScalarEvent) # if there is an explicit style for an empty string, it is a simple key and not (self.analysis.empty and self.style and self.style not in "'\"") and not self.analysis.multiline ) or self.check_empty_sequence() or self.check_empty_mapping() ) # Anchor, Tag, and Scalar processors. def process_anchor(self, indicator): # type: (Any) -> bool if self.event.anchor is None: self.prepared_anchor = None return False if self.prepared_anchor is None: self.prepared_anchor = self.prepare_anchor(self.event.anchor) if self.prepared_anchor: self.write_indicator(indicator + self.prepared_anchor, True) # issue 288 self.no_newline = False self.prepared_anchor = None return True def process_tag(self): # type: () -> None tag = self.event.tag if isinstance(self.event, ScalarEvent): if self.style is None: self.style = self.choose_scalar_style() if (not self.canonical or tag is None) and ( (self.style == "" and self.event.implicit[0]) or (self.style != "" and self.event.implicit[1]) ): self.prepared_tag = None return if self.event.implicit[0] and tag is None: tag = u"!" self.prepared_tag = None else: if (not self.canonical or tag is None) and self.event.implicit: self.prepared_tag = None return if tag is None: raise EmitterError("tag is not specified") if self.prepared_tag is None: self.prepared_tag = self.prepare_tag(tag) if self.prepared_tag: self.write_indicator(self.prepared_tag, True) if ( self.sequence_context and not self.flow_level and isinstance(self.event, ScalarEvent) ): self.no_newline = True self.prepared_tag = None def choose_scalar_style(self): # type: () -> Any if self.analysis is None: self.analysis = self.analyze_scalar(self.event.value) if self.event.style == '"' or self.canonical: return '"' if (not self.event.style or self.event.style == "?") and ( self.event.implicit[0] or not self.event.implicit[2] ): if not ( self.simple_key_context and (self.analysis.empty or self.analysis.multiline) ) and ( self.flow_level and self.analysis.allow_flow_plain or (not self.flow_level and self.analysis.allow_block_plain) ): return "" self.analysis.allow_block = True if self.event.style and self.event.style in "|>": if ( not self.flow_level and not self.simple_key_context and self.analysis.allow_block ): return self.event.style if not self.event.style and self.analysis.allow_double_quoted: if "'" in self.event.value or "\n" in self.event.value: return '"' if not self.event.style or self.event.style == "'": if self.analysis.allow_single_quoted and not ( self.simple_key_context and self.analysis.multiline ): return "'" return '"' def process_scalar(self): # type: () -> None if self.analysis is None: self.analysis = self.analyze_scalar(self.event.value) if self.style is None: self.style = self.choose_scalar_style() split = not self.simple_key_context # if self.analysis.multiline and split \ # and (not self.style or self.style in '\'\"'): # self.write_indent() # nprint('xx', self.sequence_context, self.flow_level) if self.sequence_context and not self.flow_level: self.write_indent() if self.style == '"': self.write_double_quoted(self.analysis.scalar, split) elif self.style == "'": self.write_single_quoted(self.analysis.scalar, split) elif self.style == ">": self.write_folded(self.analysis.scalar) elif self.style == "|": self.write_literal(self.analysis.scalar, self.event.comment) else: self.write_plain(self.analysis.scalar, split) self.analysis = None self.style = None if self.event.comment: self.write_post_comment(self.event) # Analyzers. def prepare_version(self, version): # type: (Any) -> Any major, minor = version if major != 1: raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) return u"%d.%d" % (major, minor) def prepare_tag_handle(self, handle): # type: (Any) -> Any if not handle: raise EmitterError("tag handle must not be empty") if handle[0] != u"!" or handle[-1] != u"!": raise EmitterError( "tag handle must start and end with '!': %r" % (utf8(handle)) ) for ch in handle[1:-1]: if not ( u"0" <= ch <= u"9" or u"A" <= ch <= u"Z" or u"a" <= ch <= u"z" or ch in u"-_" ): raise EmitterError( "invalid character %r in the tag handle: %r" % (utf8(ch), utf8(handle)) ) return handle def prepare_tag_prefix(self, prefix): # type: (Any) -> Any if not prefix: raise EmitterError("tag prefix must not be empty") chunks = [] # type: List[Any] start = end = 0 if prefix[0] == u"!": end = 1 ch_set = u"-;/?:@&=+$,_.~*'()[]" if self.dumper: version = getattr(self.dumper, "version", (1, 2)) if version is None or version >= (1, 2): ch_set += u"#" while end < len(prefix): ch = prefix[end] if ( u"0" <= ch <= u"9" or u"A" <= ch <= u"Z" or u"a" <= ch <= u"z" or ch in ch_set ): end += 1 else: if start < end: chunks.append(prefix[start:end]) start = end = end + 1 data = utf8(ch) for ch in data: chunks.append(u"%%%02X" % ord(ch)) if start < end: chunks.append(prefix[start:end]) return "".join(chunks) def prepare_tag(self, tag): # type: (Any) -> Any if not tag: raise EmitterError("tag must not be empty") if tag == u"!": return tag handle = None suffix = tag prefixes = sorted(self.tag_prefixes.keys()) for prefix in prefixes: if tag.startswith(prefix) and (prefix == u"!" or len(prefix) < len(tag)): handle = self.tag_prefixes[prefix] suffix = tag[len(prefix) :] chunks = [] # type: List[Any] start = end = 0 ch_set = u"-;/?:@&=+$,_.~*'()[]" if self.dumper: version = getattr(self.dumper, "version", (1, 2)) if version is None or version >= (1, 2): ch_set += u"#" while end < len(suffix): ch = suffix[end] if ( u"0" <= ch <= u"9" or u"A" <= ch <= u"Z" or u"a" <= ch <= u"z" or ch in ch_set or (ch == u"!" and handle != u"!") ): end += 1 else: if start < end: chunks.append(suffix[start:end]) start = end = end + 1 data = utf8(ch) for ch in data: chunks.append(u"%%%02X" % ord(ch)) if start < end: chunks.append(suffix[start:end]) suffix_text = "".join(chunks) if handle: return u"%s%s" % (handle, suffix_text) else: return u"!<%s>" % suffix_text def prepare_anchor(self, anchor): # type: (Any) -> Any if not anchor: raise EmitterError("anchor must not be empty") for ch in anchor: if not check_anchorname_char(ch): raise EmitterError( "invalid character %r in the anchor: %r" % (utf8(ch), utf8(anchor)) ) return anchor def analyze_scalar(self, scalar): # type: (Any) -> Any # Empty scalar is a special case. if not scalar: return ScalarAnalysis( scalar=scalar, empty=True, multiline=False, allow_flow_plain=False, allow_block_plain=True, allow_single_quoted=True, allow_double_quoted=True, allow_block=False, ) # Indicators and special characters. block_indicators = False flow_indicators = False line_breaks = False special_characters = False # Important whitespace combinations. leading_space = False leading_break = False trailing_space = False trailing_break = False break_space = False space_break = False # Check document indicators. if scalar.startswith(u"---") or scalar.startswith(u"..."): block_indicators = True flow_indicators = True # First character or preceded by a whitespace. preceeded_by_whitespace = True # Last character or followed by a whitespace. followed_by_whitespace = ( len(scalar) == 1 or scalar[1] in u"\0 \t\r\n\x85\u2028\u2029" ) # The previous character is a space. previous_space = False # The previous character is a break. previous_break = False index = 0 while index < len(scalar): ch = scalar[index] # Check for indicators. if index == 0: # Leading indicators are special characters. if ch in u"#,[]{}&*!|>'\"%@`": flow_indicators = True block_indicators = True if ch in u"?:": # ToDo if self.serializer.use_version == (1, 1): flow_indicators = True elif len(scalar) == 1: # single character flow_indicators = True if followed_by_whitespace: block_indicators = True if ch == u"-" and followed_by_whitespace: flow_indicators = True block_indicators = True else: # Some indicators cannot appear within a scalar as well. if ch in u",[]{}": # http://yaml.org/spec/1.2/spec.html#id2788859 flow_indicators = True if ch == u"?" and self.serializer.use_version == (1, 1): flow_indicators = True if ch == u":": if followed_by_whitespace: flow_indicators = True block_indicators = True if ch == u"#" and preceeded_by_whitespace: flow_indicators = True block_indicators = True # Check for line breaks, special, and unicode characters. if ch in u"\n\x85\u2028\u2029": line_breaks = True if not (ch == u"\n" or u"\x20" <= ch <= u"\x7E"): if ( ch == u"\x85" or u"\xA0" <= ch <= u"\uD7FF" or u"\uE000" <= ch <= u"\uFFFD" or ( self.unicode_supplementary and (u"\U00010000" <= ch <= u"\U0010FFFF") ) ) and ch != u"\uFEFF": # unicode_characters = True if not self.allow_unicode: special_characters = True else: special_characters = True # Detect important whitespace combinations. if ch == u" ": if index == 0: leading_space = True if index == len(scalar) - 1: trailing_space = True if previous_break: break_space = True previous_space = True previous_break = False elif ch in u"\n\x85\u2028\u2029": if index == 0: leading_break = True if index == len(scalar) - 1: trailing_break = True if previous_space: space_break = True previous_space = False previous_break = True else: previous_space = False previous_break = False # Prepare for the next character. index += 1 preceeded_by_whitespace = ch in u"\0 \t\r\n\x85\u2028\u2029" followed_by_whitespace = ( index + 1 >= len(scalar) or scalar[index + 1] in u"\0 \t\r\n\x85\u2028\u2029" ) # Let's decide what styles are allowed. allow_flow_plain = True allow_block_plain = True allow_single_quoted = True allow_double_quoted = True allow_block = True # Leading and trailing whitespaces are bad for plain scalars. if leading_space or leading_break or trailing_space or trailing_break: allow_flow_plain = allow_block_plain = False # We do not permit trailing spaces for block scalars. if trailing_space: allow_block = False # Spaces at the beginning of a new line are only acceptable for block # scalars. if break_space: allow_flow_plain = allow_block_plain = allow_single_quoted = False # Spaces followed by breaks, as well as special character are only # allowed for double quoted scalars. if special_characters: allow_flow_plain = ( allow_block_plain ) = allow_single_quoted = allow_block = False elif space_break: allow_flow_plain = allow_block_plain = allow_single_quoted = False if not self.allow_space_break: allow_block = False # Although the plain scalar writer supports breaks, we never emit # multiline plain scalars. if line_breaks: allow_flow_plain = allow_block_plain = False # Flow indicators are forbidden for flow plain scalars. if flow_indicators: allow_flow_plain = False # Block indicators are forbidden for block plain scalars. if block_indicators: allow_block_plain = False return ScalarAnalysis( scalar=scalar, empty=False, multiline=line_breaks, allow_flow_plain=allow_flow_plain, allow_block_plain=allow_block_plain, allow_single_quoted=allow_single_quoted, allow_double_quoted=allow_double_quoted, allow_block=allow_block, ) # Writers. def flush_stream(self): # type: () -> None if hasattr(self.stream, "flush"): self.stream.flush() def write_stream_start(self): # type: () -> None # Write BOM if needed. if self.encoding and self.encoding.startswith("utf-16"): self.stream.write(u"\uFEFF".encode(self.encoding)) def write_stream_end(self): # type: () -> None self.flush_stream() def write_indicator( self, indicator, need_whitespace, whitespace=False, indention=False ): # type: (Any, Any, bool, bool) -> None if self.whitespace or not need_whitespace: data = indicator else: data = u" " + indicator self.whitespace = whitespace self.indention = self.indention and indention self.column += len(data) self.open_ended = False if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) def write_indent(self): # type: () -> None indent = self.indent or 0 if ( not self.indention or self.column > indent or (self.column == indent and not self.whitespace) ): if bool(self.no_newline): self.no_newline = False else: self.write_line_break() if self.column < indent: self.whitespace = True data = u" " * (indent - self.column) self.column = indent if self.encoding: data = data.encode(self.encoding) self.stream.write(data) def write_line_break(self, data=None): # type: (Any) -> None if data is None: data = self.best_line_break self.whitespace = True self.indention = True self.line += 1 self.column = 0 if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) def write_version_directive(self, version_text): # type: (Any) -> None data = u"%%YAML %s" % version_text if self.encoding: data = data.encode(self.encoding) self.stream.write(data) self.write_line_break() def write_tag_directive(self, handle_text, prefix_text): # type: (Any, Any) -> None data = u"%%TAG %s %s" % (handle_text, prefix_text) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) self.write_line_break() # Scalar streams. def write_single_quoted(self, text, split=True): # type: (Any, Any) -> None if self.root_context: if self.requested_indent is not None: self.write_line_break() if self.requested_indent != 0: self.write_indent() self.write_indicator(u"'", True) spaces = False breaks = False start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] if spaces: if ch is None or ch != u" ": if ( start + 1 == end and self.column > self.best_width and split and start != 0 and end != len(text) ): self.write_indent() else: data = text[start:end] self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end elif breaks: if ch is None or ch not in u"\n\x85\u2028\u2029": if text[start] == u"\n": self.write_line_break() for br in text[start:end]: if br == u"\n": self.write_line_break() else: self.write_line_break(br) self.write_indent() start = end else: if ch is None or ch in u" \n\x85\u2028\u2029" or ch == u"'": if start < end: data = text[start:end] self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end if ch == u"'": data = u"''" self.column += 2 if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end + 1 if ch is not None: spaces = ch == u" " breaks = ch in u"\n\x85\u2028\u2029" end += 1 self.write_indicator(u"'", False) ESCAPE_REPLACEMENTS = { u"\0": u"0", u"\x07": u"a", u"\x08": u"b", u"\x09": u"t", u"\x0A": u"n", u"\x0B": u"v", u"\x0C": u"f", u"\x0D": u"r", u"\x1B": u"e", u'"': u'"', u"\\": u"\\", u"\x85": u"N", u"\xA0": u"_", u"\u2028": u"L", u"\u2029": u"P", } def write_double_quoted(self, text, split=True): # type: (Any, Any) -> None if self.root_context: if self.requested_indent is not None: self.write_line_break() if self.requested_indent != 0: self.write_indent() self.write_indicator(u'"', True) start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] if ( ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' or not ( u"\x20" <= ch <= u"\x7E" or ( self.allow_unicode and (u"\xA0" <= ch <= u"\uD7FF" or u"\uE000" <= ch <= u"\uFFFD") ) ) ): if start < end: data = text[start:end] self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end if ch is not None: if ch in self.ESCAPE_REPLACEMENTS: data = u"\\" + self.ESCAPE_REPLACEMENTS[ch] elif ch <= u"\xFF": data = u"\\x%02X" % ord(ch) elif ch <= u"\uFFFF": data = u"\\u%04X" % ord(ch) else: data = u"\\U%08X" % ord(ch) self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end + 1 if ( 0 < end < len(text) - 1 and (ch == u" " or start >= end) and self.column + (end - start) > self.best_width and split ): data = text[start:end] + u"\\" if start < end: start = end self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) self.write_indent() self.whitespace = False self.indention = False if text[start] == u" ": data = u"\\" self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) end += 1 self.write_indicator(u'"', False) def determine_block_hints(self, text): # type: (Any) -> Any indent = 0 indicator = u"" hints = u"" if text: if text[0] in u" \n\x85\u2028\u2029": indent = self.best_sequence_indent hints += text_type(indent) elif self.root_context: for end in ["\n---", "\n..."]: pos = 0 while True: pos = text.find(end, pos) if pos == -1: break try: if text[pos + 4] in " \r\n": break except IndexError: pass pos += 1 if pos > -1: break if pos > 0: indent = self.best_sequence_indent if text[-1] not in u"\n\x85\u2028\u2029": indicator = u"-" elif len(text) == 1 or text[-2] in u"\n\x85\u2028\u2029": indicator = u"+" hints += indicator return hints, indent, indicator def write_folded(self, text): # type: (Any) -> None hints, _indent, _indicator = self.determine_block_hints(text) self.write_indicator(u">" + hints, True) if _indicator == u"+": self.open_ended = True self.write_line_break() leading_space = True spaces = False breaks = True start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] if breaks: if ch is None or ch not in u"\n\x85\u2028\u2029\a": if ( not leading_space and ch is not None and ch != u" " and text[start] == u"\n" ): self.write_line_break() leading_space = ch == u" " for br in text[start:end]: if br == u"\n": self.write_line_break() else: self.write_line_break(br) if ch is not None: self.write_indent() start = end elif spaces: if ch != u" ": if start + 1 == end and self.column > self.best_width: self.write_indent() else: data = text[start:end] self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) start = end else: if ch is None or ch in u" \n\x85\u2028\u2029\a": data = text[start:end] self.column += len(data) if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) if ch == u"\a": if end < (len(text) - 1) and not text[end + 2].isspace(): self.write_line_break() self.write_indent() end += 2 # \a and the space that is inserted on the fold else: raise EmitterError( "unexcpected fold indicator \\a before space" ) if ch is None: self.write_line_break() start = end if ch is not None: breaks = ch in u"\n\x85\u2028\u2029" spaces = ch == u" " end += 1 def write_literal(self, text, comment=None): # type: (Any, Any) -> None hints, _indent, _indicator = self.determine_block_hints(text) self.write_indicator(u"|" + hints, True) try: comment = comment[1][0] if comment: self.stream.write(comment) except (TypeError, IndexError): pass if _indicator == u"+": self.open_ended = True self.write_line_break() breaks = True start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] if breaks: if ch is None or ch not in u"\n\x85\u2028\u2029": for br in text[start:end]: if br == u"\n": self.write_line_break() else: self.write_line_break(br) if ch is not None: if self.root_context: idnx = self.indent if self.indent is not None else 0 self.stream.write(u" " * (_indent + idnx)) else: self.write_indent() start = end else: if ch is None or ch in u"\n\x85\u2028\u2029": data = text[start:end] if bool(self.encoding): data = data.encode(self.encoding) self.stream.write(data) if ch is None: self.write_line_break() start = end if ch is not None: breaks = ch in u"\n\x85\u2028\u2029" end += 1 def write_plain(self, text, split=True): # type: (Any, Any) -> None if self.root_context: if self.requested_indent is not None: self.write_line_break() if self.requested_indent != 0: self.write_indent() else: self.open_ended = True if not text: return if not self.whitespace: data = u" " self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) self.whitespace = False self.indention = False spaces = False breaks = False start = end = 0 while end <= len(text): ch = None if end < len(text): ch = text[end] if spaces: if ch != u" ": if start + 1 == end and self.column > self.best_width and split: self.write_indent() self.whitespace = False self.indention = False else: data = text[start:end] self.column += len(data) if self.encoding: data = data.encode(self.encoding) self.stream.write(data) start = end elif breaks: if ch not in u"\n\x85\u2028\u2029": # type: ignore if text[start] == u"\n": self.write_line_break() for br in text[start:end]: if br == u"\n": self.write_line_break() else: self.write_line_break(br) self.write_indent() self.whitespace = False self.indention = False start = end else: if ch is None or ch in u" \n\x85\u2028\u2029": data = text[start:end] self.column += len(data) if self.encoding: data = data.encode(self.encoding) try: self.stream.write(data) except: # NOQA sys.stdout.write(repr(data) + "\n") raise start = end if ch is not None: spaces = ch == u" " breaks = ch in u"\n\x85\u2028\u2029" end += 1 def write_comment(self, comment, pre=False): # type: (Any, bool) -> None value = comment.value # nprintf('{:02d} {:02d} {!r}'.format(self.column, comment.start_mark.column, value)) if not pre and value[-1] == "\n": value = value[:-1] try: # get original column position col = comment.start_mark.column if comment.value and comment.value.startswith("\n"): # never inject extra spaces if the comment starts with a newline # and not a real comment (e.g. if you have an empty line following a key-value col = self.column elif col < self.column + 1: ValueError except ValueError: col = self.column + 1 # nprint('post_comment', self.line, self.column, value) try: # at least one space if the current column >= the start column of the comment # but not at the start of a line nr_spaces = col - self.column if self.column and value.strip() and nr_spaces < 1 and value[0] != "\n": nr_spaces = 1 value = " " * nr_spaces + value try: if bool(self.encoding): value = value.encode(self.encoding) except UnicodeDecodeError: pass self.stream.write(value) except TypeError: raise if not pre: self.write_line_break() def write_pre_comment(self, event): # type: (Any) -> bool comments = event.comment[1] if comments is None: return False try: start_events = (MappingStartEvent, SequenceStartEvent) for comment in comments: if isinstance(event, start_events) and getattr( comment, "pre_done", None ): continue if self.column != 0: self.write_line_break() self.write_comment(comment, pre=True) if isinstance(event, start_events): comment.pre_done = True except TypeError: sys.stdout.write("eventtt {} {}".format(type(event), event)) raise return True def write_post_comment(self, event): # type: (Any) -> bool if self.event.comment[0] is None: return False comment = event.comment[0] self.write_comment(comment) return True