829 lines
34 KiB
Python
829 lines
34 KiB
Python
import os
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Literal, Optional, Type, Union
|
|
|
|
import requests
|
|
import yaml
|
|
|
|
from huggingface_hub.file_download import hf_hub_download
|
|
from huggingface_hub.hf_api import upload_file
|
|
from huggingface_hub.repocard_data import (
|
|
CardData,
|
|
DatasetCardData,
|
|
EvalResult,
|
|
ModelCardData,
|
|
SpaceCardData,
|
|
eval_results_to_model_index,
|
|
model_index_to_eval_results,
|
|
)
|
|
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
|
|
|
from .constants import REPOCARD_NAME
|
|
from .utils import EntryNotFoundError, SoftTemporaryDirectory, logging, validate_hf_hub_args
|
|
|
|
|
|
logger = logging.get_logger(__name__)
|
|
|
|
|
|
TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
|
|
TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
|
|
|
|
# exact same regex as in the Hub server. Please keep in sync.
|
|
# See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
|
|
REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
|
|
|
|
|
|
class RepoCard:
|
|
card_data_class = CardData
|
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
repo_type = "model"
|
|
|
|
def __init__(self, content: str, ignore_metadata_errors: bool = False):
|
|
"""Initialize a RepoCard from string content. The content should be a
|
|
Markdown file with a YAML block at the beginning and a Markdown body.
|
|
|
|
Args:
|
|
content (`str`): The content of the Markdown file.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub.repocard import RepoCard
|
|
>>> text = '''
|
|
... ---
|
|
... language: en
|
|
... license: mit
|
|
... ---
|
|
...
|
|
... # My repo
|
|
... '''
|
|
>>> card = RepoCard(text)
|
|
>>> card.data.to_dict()
|
|
{'language': 'en', 'license': 'mit'}
|
|
>>> card.text
|
|
'\\n# My repo\\n'
|
|
|
|
```
|
|
<Tip>
|
|
Raises the following error:
|
|
|
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
when the content of the repo card metadata is not a dictionary.
|
|
|
|
</Tip>
|
|
"""
|
|
|
|
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
|
|
# See the `content` property setter for more details.
|
|
self.ignore_metadata_errors = ignore_metadata_errors
|
|
self.content = content
|
|
|
|
@property
|
|
def content(self):
|
|
"""The content of the RepoCard, including the YAML block and the Markdown body."""
|
|
line_break = _detect_line_ending(self._content) or "\n"
|
|
return f"---{line_break}{self.data.to_yaml(line_break=line_break)}{line_break}---{line_break}{self.text}"
|
|
|
|
@content.setter
|
|
def content(self, content: str):
|
|
"""Set the content of the RepoCard."""
|
|
self._content = content
|
|
|
|
match = REGEX_YAML_BLOCK.search(content)
|
|
if match:
|
|
# Metadata found in the YAML block
|
|
yaml_block = match.group(2)
|
|
self.text = content[match.end() :]
|
|
data_dict = yaml.safe_load(yaml_block)
|
|
|
|
if data_dict is None:
|
|
data_dict = {}
|
|
|
|
# The YAML block's data should be a dictionary
|
|
if not isinstance(data_dict, dict):
|
|
raise ValueError("repo card metadata block should be a dict")
|
|
else:
|
|
# Model card without metadata... create empty metadata
|
|
logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
|
|
data_dict = {}
|
|
self.text = content
|
|
|
|
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
|
|
|
|
def __str__(self):
|
|
return self.content
|
|
|
|
def save(self, filepath: Union[Path, str]):
|
|
r"""Save a RepoCard to a file.
|
|
|
|
Args:
|
|
filepath (`Union[Path, str]`): Filepath to the markdown file to save.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub.repocard import RepoCard
|
|
>>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
|
|
>>> card.save("/tmp/test.md")
|
|
|
|
```
|
|
"""
|
|
filepath = Path(filepath)
|
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
# Preserve newlines as in the existing file.
|
|
with open(filepath, mode="w", newline="", encoding="utf-8") as f:
|
|
f.write(str(self))
|
|
|
|
@classmethod
|
|
def load(
|
|
cls,
|
|
repo_id_or_path: Union[str, Path],
|
|
repo_type: Optional[str] = None,
|
|
token: Optional[str] = None,
|
|
ignore_metadata_errors: bool = False,
|
|
):
|
|
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
|
|
|
|
Args:
|
|
repo_id_or_path (`Union[str, Path]`):
|
|
The repo ID associated with a Hugging Face Hub repo or a local filepath.
|
|
repo_type (`str`, *optional*):
|
|
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
|
|
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
|
|
class, the default value will be the child class's `repo_type`.
|
|
token (`str`, *optional*):
|
|
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
|
|
ignore_metadata_errors (`str`):
|
|
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
the process. Use it at your own risk.
|
|
|
|
Returns:
|
|
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
|
|
README.md file or filepath.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub.repocard import RepoCard
|
|
>>> card = RepoCard.load("nateraw/food")
|
|
>>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
|
|
|
|
```
|
|
"""
|
|
|
|
if Path(repo_id_or_path).exists():
|
|
card_path = Path(repo_id_or_path)
|
|
elif isinstance(repo_id_or_path, str):
|
|
card_path = Path(
|
|
hf_hub_download(
|
|
repo_id_or_path,
|
|
REPOCARD_NAME,
|
|
repo_type=repo_type or cls.repo_type,
|
|
token=token,
|
|
)
|
|
)
|
|
else:
|
|
raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
|
|
|
|
# Preserve newlines in the existing file.
|
|
with card_path.open(mode="r", newline="", encoding="utf-8") as f:
|
|
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
|
|
|
|
def validate(self, repo_type: Optional[str] = None):
|
|
"""Validates card against Hugging Face Hub's card validation logic.
|
|
Using this function requires access to the internet, so it is only called
|
|
internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
|
|
|
|
Args:
|
|
repo_type (`str`, *optional*, defaults to "model"):
|
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
|
|
If this function is called from a child class, the default will be the child class's `repo_type`.
|
|
|
|
<Tip>
|
|
Raises the following errors:
|
|
|
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
if the card fails validation checks.
|
|
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
|
if the request to the Hub API fails for any other reason.
|
|
|
|
</Tip>
|
|
"""
|
|
|
|
# If repo type is provided, otherwise, use the repo type of the card.
|
|
repo_type = repo_type or self.repo_type
|
|
|
|
body = {
|
|
"repoType": repo_type,
|
|
"content": str(self),
|
|
}
|
|
headers = {"Accept": "text/plain"}
|
|
|
|
try:
|
|
r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers)
|
|
r.raise_for_status()
|
|
except requests.exceptions.HTTPError as exc:
|
|
if r.status_code == 400:
|
|
raise ValueError(r.text)
|
|
else:
|
|
raise exc
|
|
|
|
def push_to_hub(
|
|
self,
|
|
repo_id: str,
|
|
token: Optional[str] = None,
|
|
repo_type: Optional[str] = None,
|
|
commit_message: Optional[str] = None,
|
|
commit_description: Optional[str] = None,
|
|
revision: Optional[str] = None,
|
|
create_pr: Optional[bool] = None,
|
|
parent_commit: Optional[str] = None,
|
|
):
|
|
"""Push a RepoCard to a Hugging Face Hub repo.
|
|
|
|
Args:
|
|
repo_id (`str`):
|
|
The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
|
|
token (`str`, *optional*):
|
|
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
|
|
the stored token.
|
|
repo_type (`str`, *optional*, defaults to "model"):
|
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
|
|
function is called by a child class, it will default to the child class's `repo_type`.
|
|
commit_message (`str`, *optional*):
|
|
The summary / title / first line of the generated commit.
|
|
commit_description (`str`, *optional*)
|
|
The description of the generated commit.
|
|
revision (`str`, *optional*):
|
|
The git revision to commit from. Defaults to the head of the `"main"` branch.
|
|
create_pr (`bool`, *optional*):
|
|
Whether or not to create a Pull Request with this commit. Defaults to `False`.
|
|
parent_commit (`str`, *optional*):
|
|
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
|
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
|
especially useful if the repo is updated / committed to concurrently.
|
|
Returns:
|
|
`str`: URL of the commit which updated the card metadata.
|
|
"""
|
|
|
|
# If repo type is provided, otherwise, use the repo type of the card.
|
|
repo_type = repo_type or self.repo_type
|
|
|
|
# Validate card before pushing to hub
|
|
self.validate(repo_type=repo_type)
|
|
|
|
with SoftTemporaryDirectory() as tmpdir:
|
|
tmp_path = Path(tmpdir) / REPOCARD_NAME
|
|
tmp_path.write_text(str(self))
|
|
url = upload_file(
|
|
path_or_fileobj=str(tmp_path),
|
|
path_in_repo=REPOCARD_NAME,
|
|
repo_id=repo_id,
|
|
token=token,
|
|
repo_type=repo_type,
|
|
commit_message=commit_message,
|
|
commit_description=commit_description,
|
|
create_pr=create_pr,
|
|
revision=revision,
|
|
parent_commit=parent_commit,
|
|
)
|
|
return url
|
|
|
|
@classmethod
|
|
def from_template(
|
|
cls,
|
|
card_data: CardData,
|
|
template_path: Optional[str] = None,
|
|
template_str: Optional[str] = None,
|
|
**template_kwargs,
|
|
):
|
|
"""Initialize a RepoCard from a template. By default, it uses the default template.
|
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
|
|
|
Args:
|
|
card_data (`huggingface_hub.CardData`):
|
|
A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
|
|
header of the repo card on the Hugging Face Hub.
|
|
template_path (`str`, *optional*):
|
|
A path to a markdown file with optional Jinja template variables that can be filled
|
|
in with `template_kwargs`. Defaults to the default template.
|
|
|
|
Returns:
|
|
[`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
|
|
template.
|
|
"""
|
|
if is_jinja_available():
|
|
import jinja2
|
|
else:
|
|
raise ImportError(
|
|
"Using RepoCard.from_template requires Jinja2 to be installed. Please"
|
|
" install it with `pip install Jinja2`."
|
|
)
|
|
|
|
kwargs = card_data.to_dict().copy()
|
|
kwargs.update(template_kwargs) # Template_kwargs have priority
|
|
|
|
if template_path is not None:
|
|
template_str = Path(template_path).read_text()
|
|
if template_str is None:
|
|
template_str = Path(cls.default_template_path).read_text()
|
|
template = jinja2.Template(template_str)
|
|
content = template.render(card_data=card_data.to_yaml(), **kwargs)
|
|
return cls(content)
|
|
|
|
|
|
class ModelCard(RepoCard):
|
|
card_data_class = ModelCardData
|
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
repo_type = "model"
|
|
|
|
@classmethod
|
|
def from_template( # type: ignore # violates Liskov property but easier to use
|
|
cls,
|
|
card_data: ModelCardData,
|
|
template_path: Optional[str] = None,
|
|
template_str: Optional[str] = None,
|
|
**template_kwargs,
|
|
):
|
|
"""Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
|
|
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
|
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
|
|
|
Args:
|
|
card_data (`huggingface_hub.ModelCardData`):
|
|
A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
|
|
header of the model card on the Hugging Face Hub.
|
|
template_path (`str`, *optional*):
|
|
A path to a markdown file with optional Jinja template variables that can be filled
|
|
in with `template_kwargs`. Defaults to the default template.
|
|
|
|
Returns:
|
|
[`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
|
|
template.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
|
|
|
|
>>> # Using the Default Template
|
|
>>> card_data = ModelCardData(
|
|
... language='en',
|
|
... license='mit',
|
|
... library_name='timm',
|
|
... tags=['image-classification', 'resnet'],
|
|
... datasets=['beans'],
|
|
... metrics=['accuracy'],
|
|
... )
|
|
>>> card = ModelCard.from_template(
|
|
... card_data,
|
|
... model_description='This model does x + y...'
|
|
... )
|
|
|
|
>>> # Including Evaluation Results
|
|
>>> card_data = ModelCardData(
|
|
... language='en',
|
|
... tags=['image-classification', 'resnet'],
|
|
... eval_results=[
|
|
... EvalResult(
|
|
... task_type='image-classification',
|
|
... dataset_type='beans',
|
|
... dataset_name='Beans',
|
|
... metric_type='accuracy',
|
|
... metric_value=0.9,
|
|
... ),
|
|
... ],
|
|
... model_name='my-cool-model',
|
|
... )
|
|
>>> card = ModelCard.from_template(card_data)
|
|
|
|
>>> # Using a Custom Template
|
|
>>> card_data = ModelCardData(
|
|
... language='en',
|
|
... tags=['image-classification', 'resnet']
|
|
... )
|
|
>>> card = ModelCard.from_template(
|
|
... card_data=card_data,
|
|
... template_path='./src/huggingface_hub/templates/modelcard_template.md',
|
|
... custom_template_var='custom value', # will be replaced in template if it exists
|
|
... )
|
|
|
|
```
|
|
"""
|
|
return super().from_template(card_data, template_path, template_str, **template_kwargs)
|
|
|
|
|
|
class DatasetCard(RepoCard):
|
|
card_data_class = DatasetCardData
|
|
default_template_path = TEMPLATE_DATASETCARD_PATH
|
|
repo_type = "dataset"
|
|
|
|
@classmethod
|
|
def from_template( # type: ignore # violates Liskov property but easier to use
|
|
cls,
|
|
card_data: DatasetCardData,
|
|
template_path: Optional[str] = None,
|
|
template_str: Optional[str] = None,
|
|
**template_kwargs,
|
|
):
|
|
"""Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
|
|
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
|
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
|
|
|
Args:
|
|
card_data (`huggingface_hub.DatasetCardData`):
|
|
A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
|
|
header of the dataset card on the Hugging Face Hub.
|
|
template_path (`str`, *optional*):
|
|
A path to a markdown file with optional Jinja template variables that can be filled
|
|
in with `template_kwargs`. Defaults to the default template.
|
|
|
|
Returns:
|
|
[`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
|
|
template.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub import DatasetCard, DatasetCardData
|
|
|
|
>>> # Using the Default Template
|
|
>>> card_data = DatasetCardData(
|
|
... language='en',
|
|
... license='mit',
|
|
... annotations_creators='crowdsourced',
|
|
... task_categories=['text-classification'],
|
|
... task_ids=['sentiment-classification', 'text-scoring'],
|
|
... multilinguality='monolingual',
|
|
... pretty_name='My Text Classification Dataset',
|
|
... )
|
|
>>> card = DatasetCard.from_template(
|
|
... card_data,
|
|
... pretty_name=card_data.pretty_name,
|
|
... )
|
|
|
|
>>> # Using a Custom Template
|
|
>>> card_data = DatasetCardData(
|
|
... language='en',
|
|
... license='mit',
|
|
... )
|
|
>>> card = DatasetCard.from_template(
|
|
... card_data=card_data,
|
|
... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
|
|
... custom_template_var='custom value', # will be replaced in template if it exists
|
|
... )
|
|
|
|
```
|
|
"""
|
|
return super().from_template(card_data, template_path, template_str, **template_kwargs)
|
|
|
|
|
|
class SpaceCard(RepoCard):
|
|
card_data_class = SpaceCardData
|
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
repo_type = "space"
|
|
|
|
|
|
def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
|
|
"""Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
|
|
|
|
Uses same implementation as in Hub server, keep it in sync.
|
|
|
|
Returns:
|
|
str: The detected line ending of the string.
|
|
"""
|
|
cr = content.count("\r")
|
|
lf = content.count("\n")
|
|
crlf = content.count("\r\n")
|
|
if cr + lf == 0:
|
|
return None
|
|
if crlf == cr and crlf == lf:
|
|
return "\r\n"
|
|
if cr > lf:
|
|
return "\r"
|
|
else:
|
|
return "\n"
|
|
|
|
|
|
def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
|
|
content = Path(local_path).read_text()
|
|
match = REGEX_YAML_BLOCK.search(content)
|
|
if match:
|
|
yaml_block = match.group(2)
|
|
data = yaml.safe_load(yaml_block)
|
|
if data is None or isinstance(data, dict):
|
|
return data
|
|
raise ValueError("repo card metadata block should be a dict")
|
|
else:
|
|
return None
|
|
|
|
|
|
def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
|
|
"""
|
|
Save the metadata dict in the upper YAML part Trying to preserve newlines as
|
|
in the existing file. Docs about open() with newline="" parameter:
|
|
https://docs.python.org/3/library/functions.html?highlight=open#open Does
|
|
not work with "^M" linebreaks, which are replaced by \n
|
|
"""
|
|
line_break = "\n"
|
|
content = ""
|
|
# try to detect existing newline character
|
|
if os.path.exists(local_path):
|
|
with open(local_path, "r", newline="", encoding="utf8") as readme:
|
|
content = readme.read()
|
|
if isinstance(readme.newlines, tuple):
|
|
line_break = readme.newlines[0]
|
|
elif isinstance(readme.newlines, str):
|
|
line_break = readme.newlines
|
|
|
|
# creates a new file if it not
|
|
with open(local_path, "w", newline="", encoding="utf8") as readme:
|
|
data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
|
|
# sort_keys: keep dict order
|
|
match = REGEX_YAML_BLOCK.search(content)
|
|
if match:
|
|
output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
|
|
else:
|
|
output = f"---{line_break}{data_yaml}---{line_break}{content}"
|
|
|
|
readme.write(output)
|
|
readme.close()
|
|
|
|
|
|
def metadata_eval_result(
|
|
*,
|
|
model_pretty_name: str,
|
|
task_pretty_name: str,
|
|
task_id: str,
|
|
metrics_pretty_name: str,
|
|
metrics_id: str,
|
|
metrics_value: Any,
|
|
dataset_pretty_name: str,
|
|
dataset_id: str,
|
|
metrics_config: Optional[str] = None,
|
|
metrics_verified: bool = False,
|
|
dataset_config: Optional[str] = None,
|
|
dataset_split: Optional[str] = None,
|
|
dataset_revision: Optional[str] = None,
|
|
metrics_verification_token: Optional[str] = None,
|
|
) -> Dict:
|
|
"""
|
|
Creates a metadata dict with the result from a model evaluated on a dataset.
|
|
|
|
Args:
|
|
model_pretty_name (`str`):
|
|
The name of the model in natural language.
|
|
task_pretty_name (`str`):
|
|
The name of a task in natural language.
|
|
task_id (`str`):
|
|
Example: automatic-speech-recognition. A task id.
|
|
metrics_pretty_name (`str`):
|
|
A name for the metric in natural language. Example: Test WER.
|
|
metrics_id (`str`):
|
|
Example: wer. A metric id from https://hf.co/metrics.
|
|
metrics_value (`Any`):
|
|
The value from the metric. Example: 20.0 or "20.0 ± 1.2".
|
|
dataset_pretty_name (`str`):
|
|
The name of the dataset in natural language.
|
|
dataset_id (`str`):
|
|
Example: common_voice. A dataset id from https://hf.co/datasets.
|
|
metrics_config (`str`, *optional*):
|
|
The name of the metric configuration used in `load_metric()`.
|
|
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
|
metrics_verified (`bool`, *optional*, defaults to `False`):
|
|
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
dataset_config (`str`, *optional*):
|
|
Example: fr. The name of the dataset configuration used in `load_dataset()`.
|
|
dataset_split (`str`, *optional*):
|
|
Example: test. The name of the dataset split used in `load_dataset()`.
|
|
dataset_revision (`str`, *optional*):
|
|
Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
|
|
used in `load_dataset()`.
|
|
metrics_verification_token (`bool`, *optional*):
|
|
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
|
|
|
|
Returns:
|
|
`dict`: a metadata dict with the result from a model evaluated on a dataset.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub import metadata_eval_result
|
|
>>> results = metadata_eval_result(
|
|
... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
|
|
... task_pretty_name="Text Classification",
|
|
... task_id="text-classification",
|
|
... metrics_pretty_name="Accuracy",
|
|
... metrics_id="accuracy",
|
|
... metrics_value=0.2662102282047272,
|
|
... dataset_pretty_name="ReactionJPEG",
|
|
... dataset_id="julien-c/reactionjpeg",
|
|
... dataset_config="default",
|
|
... dataset_split="test",
|
|
... )
|
|
>>> results == {
|
|
... 'model-index': [
|
|
... {
|
|
... 'name': 'RoBERTa fine-tuned on ReactionGIF',
|
|
... 'results': [
|
|
... {
|
|
... 'task': {
|
|
... 'type': 'text-classification',
|
|
... 'name': 'Text Classification'
|
|
... },
|
|
... 'dataset': {
|
|
... 'name': 'ReactionJPEG',
|
|
... 'type': 'julien-c/reactionjpeg',
|
|
... 'config': 'default',
|
|
... 'split': 'test'
|
|
... },
|
|
... 'metrics': [
|
|
... {
|
|
... 'type': 'accuracy',
|
|
... 'value': 0.2662102282047272,
|
|
... 'name': 'Accuracy',
|
|
... 'verified': False
|
|
... }
|
|
... ]
|
|
... }
|
|
... ]
|
|
... }
|
|
... ]
|
|
... }
|
|
True
|
|
|
|
```
|
|
"""
|
|
|
|
return {
|
|
"model-index": eval_results_to_model_index(
|
|
model_name=model_pretty_name,
|
|
eval_results=[
|
|
EvalResult(
|
|
task_name=task_pretty_name,
|
|
task_type=task_id,
|
|
metric_name=metrics_pretty_name,
|
|
metric_type=metrics_id,
|
|
metric_value=metrics_value,
|
|
dataset_name=dataset_pretty_name,
|
|
dataset_type=dataset_id,
|
|
metric_config=metrics_config,
|
|
verified=metrics_verified,
|
|
verify_token=metrics_verification_token,
|
|
dataset_config=dataset_config,
|
|
dataset_split=dataset_split,
|
|
dataset_revision=dataset_revision,
|
|
)
|
|
],
|
|
)
|
|
}
|
|
|
|
|
|
@validate_hf_hub_args
|
|
def metadata_update(
|
|
repo_id: str,
|
|
metadata: Dict,
|
|
*,
|
|
repo_type: Optional[str] = None,
|
|
overwrite: bool = False,
|
|
token: Optional[str] = None,
|
|
commit_message: Optional[str] = None,
|
|
commit_description: Optional[str] = None,
|
|
revision: Optional[str] = None,
|
|
create_pr: bool = False,
|
|
parent_commit: Optional[str] = None,
|
|
) -> str:
|
|
"""
|
|
Updates the metadata in the README.md of a repository on the Hugging Face Hub.
|
|
If the README.md file doesn't exist yet, a new one is created with metadata and an
|
|
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
|
|
as a Space cannot exist without a `README.md` file.
|
|
|
|
Args:
|
|
repo_id (`str`):
|
|
The name of the repository.
|
|
metadata (`dict`):
|
|
A dictionary containing the metadata to be updated.
|
|
repo_type (`str`, *optional*):
|
|
Set to `"dataset"` or `"space"` if updating to a dataset or space,
|
|
`None` or `"model"` if updating to a model. Default is `None`.
|
|
overwrite (`bool`, *optional*, defaults to `False`):
|
|
If set to `True` an existing field can be overwritten, otherwise
|
|
attempting to overwrite an existing field will cause an error.
|
|
token (`str`, *optional*):
|
|
The Hugging Face authentication token.
|
|
commit_message (`str`, *optional*):
|
|
The summary / title / first line of the generated commit. Defaults to
|
|
`f"Update metadata with huggingface_hub"`
|
|
commit_description (`str` *optional*)
|
|
The description of the generated commit
|
|
revision (`str`, *optional*):
|
|
The git revision to commit from. Defaults to the head of the
|
|
`"main"` branch.
|
|
create_pr (`boolean`, *optional*):
|
|
Whether or not to create a Pull Request from `revision` with that commit.
|
|
Defaults to `False`.
|
|
parent_commit (`str`, *optional*):
|
|
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
|
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
|
especially useful if the repo is updated / committed to concurrently.
|
|
Returns:
|
|
`str`: URL of the commit which updated the card metadata.
|
|
|
|
Example:
|
|
```python
|
|
>>> from huggingface_hub import metadata_update
|
|
>>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
|
|
... 'results': [{'dataset': {'name': 'ReactionGIF',
|
|
... 'type': 'julien-c/reactiongif'},
|
|
... 'metrics': [{'name': 'Recall',
|
|
... 'type': 'recall',
|
|
... 'value': 0.7762102282047272}],
|
|
... 'task': {'name': 'Text Classification',
|
|
... 'type': 'text-classification'}}]}]}
|
|
>>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
|
|
|
|
```
|
|
"""
|
|
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
|
|
|
# Card class given repo_type
|
|
card_class: Type[RepoCard]
|
|
if repo_type is None or repo_type == "model":
|
|
card_class = ModelCard
|
|
elif repo_type == "dataset":
|
|
card_class = DatasetCard
|
|
elif repo_type == "space":
|
|
card_class = RepoCard
|
|
else:
|
|
raise ValueError(f"Unknown repo_type: {repo_type}")
|
|
|
|
# Either load repo_card from the Hub or create an empty one.
|
|
# NOTE: Will not create the repo if it doesn't exist.
|
|
try:
|
|
card = card_class.load(repo_id, token=token, repo_type=repo_type)
|
|
except EntryNotFoundError:
|
|
if repo_type == "space":
|
|
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
|
|
|
|
# Initialize a ModelCard or DatasetCard from default template and no data.
|
|
card = card_class.from_template(CardData())
|
|
|
|
for key, value in metadata.items():
|
|
if key == "model-index":
|
|
# if the new metadata doesn't include a name, either use existing one or repo name
|
|
if "name" not in value[0]:
|
|
value[0]["name"] = getattr(card, "model_name", repo_id)
|
|
model_name, new_results = model_index_to_eval_results(value)
|
|
if card.data.eval_results is None:
|
|
card.data.eval_results = new_results
|
|
card.data.model_name = model_name
|
|
else:
|
|
existing_results = card.data.eval_results
|
|
|
|
# Iterate over new results
|
|
# Iterate over existing results
|
|
# If both results describe the same metric but value is different:
|
|
# If overwrite=True: overwrite the metric value
|
|
# Else: raise ValueError
|
|
# Else: append new result to existing ones.
|
|
for new_result in new_results:
|
|
result_found = False
|
|
for existing_result in existing_results:
|
|
if new_result.is_equal_except_value(existing_result):
|
|
if new_result != existing_result and not overwrite:
|
|
raise ValueError(
|
|
"You passed a new value for the existing metric"
|
|
f" 'name: {new_result.metric_name}, type: "
|
|
f"{new_result.metric_type}'. Set `overwrite=True`"
|
|
" to overwrite existing metrics."
|
|
)
|
|
result_found = True
|
|
existing_result.metric_value = new_result.metric_value
|
|
if existing_result.verified is True:
|
|
existing_result.verify_token = new_result.verify_token
|
|
if not result_found:
|
|
card.data.eval_results.append(new_result)
|
|
else:
|
|
# Any metadata that is not a result metric
|
|
if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
|
|
raise ValueError(
|
|
f"You passed a new value for the existing meta data field '{key}'."
|
|
" Set `overwrite=True` to overwrite existing metadata."
|
|
)
|
|
else:
|
|
card.data[key] = value
|
|
|
|
return card.push_to_hub(
|
|
repo_id,
|
|
token=token,
|
|
repo_type=repo_type,
|
|
commit_message=commit_message,
|
|
commit_description=commit_description,
|
|
create_pr=create_pr,
|
|
revision=revision,
|
|
parent_commit=parent_commit,
|
|
)
|