ai-content-maker/.venv/Lib/site-packages/transformers/models/pop2piano/processing_pop2piano.py

# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Processor class for Pop2Piano."""

import os
from typing import List, Optional, Union

import numpy as np

from ...feature_extraction_utils import BatchFeature
from ...processing_utils import ProcessorMixin
from ...tokenization_utils import BatchEncoding, PaddingStrategy, TruncationStrategy
from ...utils import TensorType


class Pop2PianoProcessor(ProcessorMixin):
    r"""
    Constructs an Pop2Piano processor which wraps a Pop2Piano Feature Extractor and Pop2Piano Tokenizer into a single
    processor.

    [`Pop2PianoProcessor`] offers all the functionalities of [`Pop2PianoFeatureExtractor`] and [`Pop2PianoTokenizer`].
    See the docstring of [`~Pop2PianoProcessor.__call__`] and [`~Pop2PianoProcessor.decode`] for more information.

    Args:
        feature_extractor (`Pop2PianoFeatureExtractor`):
            An instance of [`Pop2PianoFeatureExtractor`]. The feature extractor is a required input.
        tokenizer (`Pop2PianoTokenizer`):
            An instance of ['Pop2PianoTokenizer`]. The tokenizer is a required input.
    """

    attributes = ["feature_extractor", "tokenizer"]
    feature_extractor_class = "Pop2PianoFeatureExtractor"
    tokenizer_class = "Pop2PianoTokenizer"

    def __init__(self, feature_extractor, tokenizer):
        super().__init__(feature_extractor, tokenizer)

    def __call__(
        self,
        audio: Union[np.ndarray, List[float], List[np.ndarray]] = None,
        sampling_rate: Union[int, List[int]] = None,
        steps_per_beat: int = 2,
        resample: Optional[bool] = True,
        notes: Union[List, TensorType] = None,
        padding: Union[bool, str, PaddingStrategy] = False,
        truncation: Union[bool, str, TruncationStrategy] = None,
        max_length: Optional[int] = None,
        pad_to_multiple_of: Optional[int] = None,
        verbose: bool = True,
        **kwargs,
    ) -> Union[BatchFeature, BatchEncoding]:
        """
        This method uses [`Pop2PianoFeatureExtractor.__call__`] method to prepare log-mel-spectrograms for the model,
        and [`Pop2PianoTokenizer.__call__`] to prepare token_ids from notes.

        Please refer to the docstring of the above two methods for more information.
        """

        # Since Feature Extractor needs both audio and sampling_rate and tokenizer needs both token_ids and
        # feature_extractor_output, we must check for both.
        if (audio is None and sampling_rate is None) and (notes is None):
            raise ValueError(
                "You have to specify at least audios and sampling_rate in order to use feature extractor or "
                "notes to use the tokenizer part."
            )

        if audio is not None and sampling_rate is not None:
            inputs = self.feature_extractor(
                audio=audio,
                sampling_rate=sampling_rate,
                steps_per_beat=steps_per_beat,
                resample=resample,
                **kwargs,
            )
        if notes is not None:
            encoded_token_ids = self.tokenizer(
                notes=notes,
                padding=padding,
                truncation=truncation,
                max_length=max_length,
                pad_to_multiple_of=pad_to_multiple_of,
                verbose=verbose,
                **kwargs,
            )

        if notes is None:
            return inputs

        elif audio is None or sampling_rate is None:
            return encoded_token_ids

        else:
            inputs["token_ids"] = encoded_token_ids["token_ids"]
            return inputs

    def batch_decode(
        self,
        token_ids,
        feature_extractor_output: BatchFeature,
        return_midi: bool = True,
    ) -> BatchEncoding:
        """
        This method uses [`Pop2PianoTokenizer.batch_decode`] method to convert model generated token_ids to midi_notes.

        Please refer to the docstring of the above two methods for more information.
        """

        return self.tokenizer.batch_decode(
            token_ids=token_ids, feature_extractor_output=feature_extractor_output, return_midi=return_midi
        )

    @property
    def model_input_names(self):
        tokenizer_input_names = self.tokenizer.model_input_names
        feature_extractor_input_names = self.feature_extractor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + feature_extractor_input_names))

    def save_pretrained(self, save_directory, **kwargs):
        if os.path.isfile(save_directory):
            raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
        os.makedirs(save_directory, exist_ok=True)
        return super().save_pretrained(save_directory, **kwargs)

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(*args)
first commit 2024-05-03 04:18:51 +03:00			`# coding=utf-8`
			`# Copyright 2023 The HuggingFace Inc. team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`""" Processor class for Pop2Piano."""`

			`import os`
			`from typing import List, Optional, Union`

			`import numpy as np`

			`from ...feature_extraction_utils import BatchFeature`
			`from ...processing_utils import ProcessorMixin`
			`from ...tokenization_utils import BatchEncoding, PaddingStrategy, TruncationStrategy`
			`from ...utils import TensorType`


			`class Pop2PianoProcessor(ProcessorMixin):`
			`r"""`
			`Constructs an Pop2Piano processor which wraps a Pop2Piano Feature Extractor and Pop2Piano Tokenizer into a single`
			`processor.`

			[`Pop2PianoProcessor`] offers all the functionalities of [`Pop2PianoFeatureExtractor`] and [`Pop2PianoTokenizer`].
			See the docstring of [`~Pop2PianoProcessor.__call__`] and [`~Pop2PianoProcessor.decode`] for more information.

			`Args:`
			feature_extractor (`Pop2PianoFeatureExtractor`):
			An instance of [`Pop2PianoFeatureExtractor`]. The feature extractor is a required input.
			tokenizer (`Pop2PianoTokenizer`):
			An instance of ['Pop2PianoTokenizer`]. The tokenizer is a required input.
			`"""`

			`attributes = ["feature_extractor", "tokenizer"]`
			`feature_extractor_class = "Pop2PianoFeatureExtractor"`
			`tokenizer_class = "Pop2PianoTokenizer"`

			`def __init__(self, feature_extractor, tokenizer):`
			`super().__init__(feature_extractor, tokenizer)`

			`def __call__(`
			`self,`
			`audio: Union[np.ndarray, List[float], List[np.ndarray]] = None,`
			`sampling_rate: Union[int, List[int]] = None,`
			`steps_per_beat: int = 2,`
			`resample: Optional[bool] = True,`
			`notes: Union[List, TensorType] = None,`
			`padding: Union[bool, str, PaddingStrategy] = False,`
			`truncation: Union[bool, str, TruncationStrategy] = None,`
			`max_length: Optional[int] = None,`
			`pad_to_multiple_of: Optional[int] = None,`
			`verbose: bool = True,`
			`**kwargs,`
			`) -> Union[BatchFeature, BatchEncoding]:`
			`"""`
			This method uses [`Pop2PianoFeatureExtractor.__call__`] method to prepare log-mel-spectrograms for the model,
			and [`Pop2PianoTokenizer.__call__`] to prepare token_ids from notes.

			`Please refer to the docstring of the above two methods for more information.`
			`"""`

			`# Since Feature Extractor needs both audio and sampling_rate and tokenizer needs both token_ids and`
			`# feature_extractor_output, we must check for both.`
			`if (audio is None and sampling_rate is None) and (notes is None):`
			`raise ValueError(`
			`"You have to specify at least audios and sampling_rate in order to use feature extractor or "`
			`"notes to use the tokenizer part."`
			`)`

			`if audio is not None and sampling_rate is not None:`
			`inputs = self.feature_extractor(`
			`audio=audio,`
			`sampling_rate=sampling_rate,`
			`steps_per_beat=steps_per_beat,`
			`resample=resample,`
			`**kwargs,`
			`)`
			`if notes is not None:`
			`encoded_token_ids = self.tokenizer(`
			`notes=notes,`
			`padding=padding,`
			`truncation=truncation,`
			`max_length=max_length,`
			`pad_to_multiple_of=pad_to_multiple_of,`
			`verbose=verbose,`
			`**kwargs,`
			`)`

			`if notes is None:`
			`return inputs`

			`elif audio is None or sampling_rate is None:`
			`return encoded_token_ids`

			`else:`
			`inputs["token_ids"] = encoded_token_ids["token_ids"]`
			`return inputs`

			`def batch_decode(`
			`self,`
			`token_ids,`
			`feature_extractor_output: BatchFeature,`
			`return_midi: bool = True,`
			`) -> BatchEncoding:`
			`"""`
			This method uses [`Pop2PianoTokenizer.batch_decode`] method to convert model generated token_ids to midi_notes.

			`Please refer to the docstring of the above two methods for more information.`
			`"""`

			`return self.tokenizer.batch_decode(`
			`token_ids=token_ids, feature_extractor_output=feature_extractor_output, return_midi=return_midi`
			`)`

			`@property`
			`def model_input_names(self):`
			`tokenizer_input_names = self.tokenizer.model_input_names`
			`feature_extractor_input_names = self.feature_extractor.model_input_names`
			`return list(dict.fromkeys(tokenizer_input_names + feature_extractor_input_names))`

			`def save_pretrained(self, save_directory, **kwargs):`
			`if os.path.isfile(save_directory):`
			`raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")`
			`os.makedirs(save_directory, exist_ok=True)`
			`return super().save_pretrained(save_directory, **kwargs)`

			`@classmethod`
			`def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):`
			`args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)`
			`return cls(*args)`