240 lines
7.2 KiB
Python
240 lines
7.2 KiB
Python
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Provides utilities that may be especially useful to plugins."""
|
|
|
|
|
|
import threading
|
|
|
|
from tensorboard._vendor.bleach.sanitizer import Cleaner
|
|
import markdown
|
|
|
|
from tensorboard import context as _context
|
|
from tensorboard.backend import experiment_id as _experiment_id
|
|
from tensorboard.util import tb_logging
|
|
|
|
|
|
logger = tb_logging.get_logger()
|
|
|
|
_ALLOWED_ATTRIBUTES = {
|
|
"a": ["href", "title"],
|
|
"img": ["src", "title", "alt"],
|
|
}
|
|
|
|
_ALLOWED_TAGS = [
|
|
"ul",
|
|
"ol",
|
|
"li",
|
|
"p",
|
|
"pre",
|
|
"code",
|
|
"blockquote",
|
|
"h1",
|
|
"h2",
|
|
"h3",
|
|
"h4",
|
|
"h5",
|
|
"h6",
|
|
"hr",
|
|
"br",
|
|
"strong",
|
|
"em",
|
|
"a",
|
|
"img",
|
|
"table",
|
|
"thead",
|
|
"tbody",
|
|
"td",
|
|
"tr",
|
|
"th",
|
|
]
|
|
|
|
# Cache Markdown converter to avoid expensive initialization at each
|
|
# call to `markdown_to_safe_html`. Cache a different instance per thread.
|
|
class _MarkdownStore(threading.local):
|
|
def __init__(self):
|
|
self.markdown = markdown.Markdown(
|
|
extensions=[
|
|
"markdown.extensions.tables",
|
|
"markdown.extensions.fenced_code",
|
|
]
|
|
)
|
|
|
|
|
|
_MARKDOWN_STORE = _MarkdownStore()
|
|
|
|
|
|
# Cache Cleaner to avoid expensive initialization at each call to `clean`.
|
|
# Cache a different instance per thread.
|
|
class _CleanerStore(threading.local):
|
|
def __init__(self):
|
|
self.cleaner = Cleaner(
|
|
tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRIBUTES
|
|
)
|
|
|
|
|
|
_CLEANER_STORE = _CleanerStore()
|
|
|
|
|
|
def safe_html(unsafe_string):
|
|
"""Return the input as a str, sanitized for insertion into the DOM.
|
|
|
|
Arguments:
|
|
unsafe_string: A Unicode string or UTF-8--encoded bytestring
|
|
possibly containing unsafe HTML markup.
|
|
|
|
Returns:
|
|
A string containing safe HTML.
|
|
"""
|
|
total_null_bytes = 0
|
|
if isinstance(unsafe_string, bytes):
|
|
unsafe_string = unsafe_string.decode("utf-8")
|
|
return _CLEANER_STORE.cleaner.clean(unsafe_string)
|
|
|
|
|
|
def markdown_to_safe_html(markdown_string):
|
|
"""Convert Markdown to HTML that's safe to splice into the DOM.
|
|
|
|
Arguments:
|
|
markdown_string: A Unicode string or UTF-8--encoded bytestring
|
|
containing Markdown source. Markdown tables are supported.
|
|
|
|
Returns:
|
|
A string containing safe HTML.
|
|
"""
|
|
return markdowns_to_safe_html([markdown_string], lambda xs: xs[0])
|
|
|
|
|
|
def markdowns_to_safe_html(markdown_strings, combine):
|
|
"""Convert multiple Markdown documents to one safe HTML document.
|
|
|
|
One could also achieve this by calling `markdown_to_safe_html`
|
|
multiple times and combining the results. Compared to that approach,
|
|
this function may be faster, because HTML sanitization (which can be
|
|
expensive) is performed only once rather than once per input. It may
|
|
also be less precise: if one of the input documents has unsafe HTML
|
|
that is sanitized away, that sanitization might affect other
|
|
documents, even if those documents are safe.
|
|
|
|
Args:
|
|
markdown_strings: List of Markdown source strings to convert, as
|
|
Unicode strings or UTF-8--encoded bytestrings. Markdown tables
|
|
are supported.
|
|
combine: Callback function that takes a list of unsafe HTML
|
|
strings of the same shape as `markdown_strings` and combines
|
|
them into a single unsafe HTML string, which will be sanitized
|
|
and returned.
|
|
|
|
Returns:
|
|
A string containing safe HTML.
|
|
"""
|
|
unsafe_htmls = []
|
|
total_null_bytes = 0
|
|
|
|
for source in markdown_strings:
|
|
# Convert to utf-8 whenever we have a binary input.
|
|
if isinstance(source, bytes):
|
|
source_decoded = source.decode("utf-8")
|
|
# Remove null bytes and warn if there were any, since it probably means
|
|
# we were given a bad encoding.
|
|
source = source_decoded.replace("\x00", "")
|
|
total_null_bytes += len(source_decoded) - len(source)
|
|
unsafe_html = _MARKDOWN_STORE.markdown.convert(source)
|
|
unsafe_htmls.append(unsafe_html)
|
|
|
|
unsafe_combined = combine(unsafe_htmls)
|
|
sanitized_combined = _CLEANER_STORE.cleaner.clean(unsafe_combined)
|
|
|
|
warning = ""
|
|
if total_null_bytes:
|
|
warning = (
|
|
"<!-- WARNING: discarded %d null bytes in markdown string "
|
|
"after UTF-8 decoding -->\n"
|
|
) % total_null_bytes
|
|
|
|
return warning + sanitized_combined
|
|
|
|
|
|
def context(environ):
|
|
"""Get a TensorBoard `RequestContext` from a WSGI environment.
|
|
|
|
Returns:
|
|
A `RequestContext` value.
|
|
"""
|
|
return _context.from_environ(environ)
|
|
|
|
|
|
def experiment_id(environ):
|
|
"""Determine the experiment ID associated with a WSGI request.
|
|
|
|
Each request to TensorBoard has an associated experiment ID, which is
|
|
always a string and may be empty. This experiment ID should be passed
|
|
to data providers.
|
|
|
|
Args:
|
|
environ: A WSGI environment `dict`. For a Werkzeug request, this is
|
|
`request.environ`.
|
|
|
|
Returns:
|
|
A experiment ID, as a possibly-empty `str`.
|
|
"""
|
|
return environ.get(_experiment_id.WSGI_ENVIRON_KEY, "")
|
|
|
|
|
|
class _MetadataVersionChecker:
|
|
"""TensorBoard-internal utility for warning when data is too new.
|
|
|
|
Specify a maximum known `version` number as stored in summary
|
|
metadata, and automatically reject and warn on data from newer
|
|
versions. This keeps a (single) bit of internal state to handle
|
|
logging a warning to the user at most once.
|
|
|
|
This should only be used by plugins bundled with TensorBoard, since
|
|
it may instruct users to upgrade their copy of TensorBoard.
|
|
"""
|
|
|
|
def __init__(self, data_kind, latest_known_version):
|
|
"""Initialize a `_MetadataVersionChecker`.
|
|
|
|
Args:
|
|
data_kind: A human-readable description of the kind of data
|
|
being read, like "scalar" or "histogram" or "PR curve".
|
|
latest_known_version: Highest tolerated value of `version`,
|
|
like `0`.
|
|
"""
|
|
self._data_kind = data_kind
|
|
self._latest_known_version = latest_known_version
|
|
self._warned = False
|
|
|
|
def ok(self, version, run, tag):
|
|
"""Test whether `version` is permitted, else complain."""
|
|
if 0 <= version <= self._latest_known_version:
|
|
return True
|
|
self._maybe_warn(version, run, tag)
|
|
return False
|
|
|
|
def _maybe_warn(self, version, run, tag):
|
|
if self._warned:
|
|
return
|
|
self._warned = True
|
|
logger.warning(
|
|
"Some %s data is too new to be read by this version of TensorBoard. "
|
|
"Upgrading TensorBoard may fix this. "
|
|
"(sample: run %r, tag %r, data version %r)",
|
|
self._data_kind,
|
|
run,
|
|
tag,
|
|
version,
|
|
)
|