664 lines
28 KiB
Python
664 lines
28 KiB
Python
|
# coding=utf-8
|
||
|
# Copyright 2018 DPR Authors, The Hugging Face Team.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
""" PyTorch DPR model for Open Domain Question Answering."""
|
||
|
|
||
|
|
||
|
from dataclasses import dataclass
|
||
|
from typing import Optional, Tuple, Union
|
||
|
|
||
|
import torch
|
||
|
from torch import Tensor, nn
|
||
|
|
||
|
from ...modeling_outputs import BaseModelOutputWithPooling
|
||
|
from ...modeling_utils import PreTrainedModel
|
||
|
from ...utils import (
|
||
|
ModelOutput,
|
||
|
add_start_docstrings,
|
||
|
add_start_docstrings_to_model_forward,
|
||
|
logging,
|
||
|
replace_return_docstrings,
|
||
|
)
|
||
|
from ..bert.modeling_bert import BertModel
|
||
|
from .configuration_dpr import DPRConfig
|
||
|
|
||
|
|
||
|
logger = logging.get_logger(__name__)
|
||
|
|
||
|
_CONFIG_FOR_DOC = "DPRConfig"
|
||
|
_CHECKPOINT_FOR_DOC = "facebook/dpr-ctx_encoder-single-nq-base"
|
||
|
|
||
|
|
||
|
from ..deprecated._archive_maps import ( # noqa: F401, E402
|
||
|
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, # noqa: F401, E402
|
||
|
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, # noqa: F401, E402
|
||
|
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, # noqa: F401, E402
|
||
|
)
|
||
|
|
||
|
|
||
|
##########
|
||
|
# Outputs
|
||
|
##########
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class DPRContextEncoderOutput(ModelOutput):
|
||
|
"""
|
||
|
Class for outputs of [`DPRQuestionEncoder`].
|
||
|
|
||
|
Args:
|
||
|
pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
|
||
|
The DPR encoder outputs the *pooler_output* that corresponds to the context representation. Last layer
|
||
|
hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
|
||
|
This output is to be used to embed contexts for nearest neighbors queries with questions embeddings.
|
||
|
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
||
|
shape `(batch_size, sequence_length, hidden_size)`.
|
||
|
|
||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||
|
attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
|
||
|
sequence_length)`.
|
||
|
|
||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||
|
heads.
|
||
|
"""
|
||
|
|
||
|
pooler_output: torch.FloatTensor
|
||
|
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class DPRQuestionEncoderOutput(ModelOutput):
|
||
|
"""
|
||
|
Class for outputs of [`DPRQuestionEncoder`].
|
||
|
|
||
|
Args:
|
||
|
pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
|
||
|
The DPR encoder outputs the *pooler_output* that corresponds to the question representation. Last layer
|
||
|
hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
|
||
|
This output is to be used to embed questions for nearest neighbors queries with context embeddings.
|
||
|
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
||
|
shape `(batch_size, sequence_length, hidden_size)`.
|
||
|
|
||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||
|
attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
|
||
|
sequence_length)`.
|
||
|
|
||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||
|
heads.
|
||
|
"""
|
||
|
|
||
|
pooler_output: torch.FloatTensor
|
||
|
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class DPRReaderOutput(ModelOutput):
|
||
|
"""
|
||
|
Class for outputs of [`DPRQuestionEncoder`].
|
||
|
|
||
|
Args:
|
||
|
start_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
|
||
|
Logits of the start index of the span for each passage.
|
||
|
end_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
|
||
|
Logits of the end index of the span for each passage.
|
||
|
relevance_logits (`torch.FloatTensor` of shape `(n_passages, )`):
|
||
|
Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage to answer the
|
||
|
question, compared to all the other passages.
|
||
|
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
||
|
shape `(batch_size, sequence_length, hidden_size)`.
|
||
|
|
||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||
|
attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
|
||
|
Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
|
||
|
sequence_length)`.
|
||
|
|
||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||
|
heads.
|
||
|
"""
|
||
|
|
||
|
start_logits: torch.FloatTensor
|
||
|
end_logits: torch.FloatTensor = None
|
||
|
relevance_logits: torch.FloatTensor = None
|
||
|
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
|
||
|
|
||
|
|
||
|
class DPRPreTrainedModel(PreTrainedModel):
|
||
|
def _init_weights(self, module):
|
||
|
"""Initialize the weights"""
|
||
|
if isinstance(module, nn.Linear):
|
||
|
# Slightly different from the TF version which uses truncated_normal for initialization
|
||
|
# cf https://github.com/pytorch/pytorch/pull/5617
|
||
|
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||
|
if module.bias is not None:
|
||
|
module.bias.data.zero_()
|
||
|
elif isinstance(module, nn.Embedding):
|
||
|
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||
|
if module.padding_idx is not None:
|
||
|
module.weight.data[module.padding_idx].zero_()
|
||
|
elif isinstance(module, nn.LayerNorm):
|
||
|
module.bias.data.zero_()
|
||
|
module.weight.data.fill_(1.0)
|
||
|
|
||
|
|
||
|
class DPREncoder(DPRPreTrainedModel):
|
||
|
base_model_prefix = "bert_model"
|
||
|
|
||
|
def __init__(self, config: DPRConfig):
|
||
|
super().__init__(config)
|
||
|
self.bert_model = BertModel(config, add_pooling_layer=False)
|
||
|
if self.bert_model.config.hidden_size <= 0:
|
||
|
raise ValueError("Encoder hidden_size can't be zero")
|
||
|
self.projection_dim = config.projection_dim
|
||
|
if self.projection_dim > 0:
|
||
|
self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim)
|
||
|
# Initialize weights and apply final processing
|
||
|
self.post_init()
|
||
|
|
||
|
def forward(
|
||
|
self,
|
||
|
input_ids: Tensor,
|
||
|
attention_mask: Optional[Tensor] = None,
|
||
|
token_type_ids: Optional[Tensor] = None,
|
||
|
inputs_embeds: Optional[Tensor] = None,
|
||
|
output_attentions: bool = False,
|
||
|
output_hidden_states: bool = False,
|
||
|
return_dict: bool = False,
|
||
|
) -> Union[BaseModelOutputWithPooling, Tuple[Tensor, ...]]:
|
||
|
outputs = self.bert_model(
|
||
|
input_ids=input_ids,
|
||
|
attention_mask=attention_mask,
|
||
|
token_type_ids=token_type_ids,
|
||
|
inputs_embeds=inputs_embeds,
|
||
|
output_attentions=output_attentions,
|
||
|
output_hidden_states=output_hidden_states,
|
||
|
return_dict=return_dict,
|
||
|
)
|
||
|
sequence_output = outputs[0]
|
||
|
pooled_output = sequence_output[:, 0, :]
|
||
|
|
||
|
if self.projection_dim > 0:
|
||
|
pooled_output = self.encode_proj(pooled_output)
|
||
|
|
||
|
if not return_dict:
|
||
|
return (sequence_output, pooled_output) + outputs[2:]
|
||
|
|
||
|
return BaseModelOutputWithPooling(
|
||
|
last_hidden_state=sequence_output,
|
||
|
pooler_output=pooled_output,
|
||
|
hidden_states=outputs.hidden_states,
|
||
|
attentions=outputs.attentions,
|
||
|
)
|
||
|
|
||
|
@property
|
||
|
def embeddings_size(self) -> int:
|
||
|
if self.projection_dim > 0:
|
||
|
return self.encode_proj.out_features
|
||
|
return self.bert_model.config.hidden_size
|
||
|
|
||
|
|
||
|
class DPRSpanPredictor(DPRPreTrainedModel):
|
||
|
base_model_prefix = "encoder"
|
||
|
|
||
|
def __init__(self, config: DPRConfig):
|
||
|
super().__init__(config)
|
||
|
self.encoder = DPREncoder(config)
|
||
|
self.qa_outputs = nn.Linear(self.encoder.embeddings_size, 2)
|
||
|
self.qa_classifier = nn.Linear(self.encoder.embeddings_size, 1)
|
||
|
# Initialize weights and apply final processing
|
||
|
self.post_init()
|
||
|
|
||
|
def forward(
|
||
|
self,
|
||
|
input_ids: Tensor,
|
||
|
attention_mask: Tensor,
|
||
|
inputs_embeds: Optional[Tensor] = None,
|
||
|
output_attentions: bool = False,
|
||
|
output_hidden_states: bool = False,
|
||
|
return_dict: bool = False,
|
||
|
) -> Union[DPRReaderOutput, Tuple[Tensor, ...]]:
|
||
|
# notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length
|
||
|
n_passages, sequence_length = input_ids.size() if input_ids is not None else inputs_embeds.size()[:2]
|
||
|
# feed encoder
|
||
|
outputs = self.encoder(
|
||
|
input_ids,
|
||
|
attention_mask=attention_mask,
|
||
|
inputs_embeds=inputs_embeds,
|
||
|
output_attentions=output_attentions,
|
||
|
output_hidden_states=output_hidden_states,
|
||
|
return_dict=return_dict,
|
||
|
)
|
||
|
sequence_output = outputs[0]
|
||
|
|
||
|
# compute logits
|
||
|
logits = self.qa_outputs(sequence_output)
|
||
|
start_logits, end_logits = logits.split(1, dim=-1)
|
||
|
start_logits = start_logits.squeeze(-1).contiguous()
|
||
|
end_logits = end_logits.squeeze(-1).contiguous()
|
||
|
relevance_logits = self.qa_classifier(sequence_output[:, 0, :])
|
||
|
|
||
|
# resize
|
||
|
start_logits = start_logits.view(n_passages, sequence_length)
|
||
|
end_logits = end_logits.view(n_passages, sequence_length)
|
||
|
relevance_logits = relevance_logits.view(n_passages)
|
||
|
|
||
|
if not return_dict:
|
||
|
return (start_logits, end_logits, relevance_logits) + outputs[2:]
|
||
|
|
||
|
return DPRReaderOutput(
|
||
|
start_logits=start_logits,
|
||
|
end_logits=end_logits,
|
||
|
relevance_logits=relevance_logits,
|
||
|
hidden_states=outputs.hidden_states,
|
||
|
attentions=outputs.attentions,
|
||
|
)
|
||
|
|
||
|
|
||
|
##################
|
||
|
# PreTrainedModel
|
||
|
##################
|
||
|
|
||
|
|
||
|
class DPRPretrainedContextEncoder(DPRPreTrainedModel):
|
||
|
"""
|
||
|
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
|
||
|
models.
|
||
|
"""
|
||
|
|
||
|
config_class = DPRConfig
|
||
|
load_tf_weights = None
|
||
|
base_model_prefix = "ctx_encoder"
|
||
|
|
||
|
|
||
|
class DPRPretrainedQuestionEncoder(DPRPreTrainedModel):
|
||
|
"""
|
||
|
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
|
||
|
models.
|
||
|
"""
|
||
|
|
||
|
config_class = DPRConfig
|
||
|
load_tf_weights = None
|
||
|
base_model_prefix = "question_encoder"
|
||
|
|
||
|
|
||
|
class DPRPretrainedReader(DPRPreTrainedModel):
|
||
|
"""
|
||
|
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
|
||
|
models.
|
||
|
"""
|
||
|
|
||
|
config_class = DPRConfig
|
||
|
load_tf_weights = None
|
||
|
base_model_prefix = "span_predictor"
|
||
|
|
||
|
|
||
|
###############
|
||
|
# Actual Models
|
||
|
###############
|
||
|
|
||
|
|
||
|
DPR_START_DOCSTRING = r"""
|
||
|
|
||
|
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
||
|
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|
||
|
etc.)
|
||
|
|
||
|
This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
|
||
|
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
|
||
|
and behavior.
|
||
|
|
||
|
Parameters:
|
||
|
config ([`DPRConfig`]): Model configuration class with all the parameters of the model.
|
||
|
Initializing with a config file does not load the weights associated with the model, only the
|
||
|
configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
|
||
|
"""
|
||
|
|
||
|
DPR_ENCODERS_INPUTS_DOCSTRING = r"""
|
||
|
Args:
|
||
|
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
||
|
Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be
|
||
|
formatted with [CLS] and [SEP] tokens as follows:
|
||
|
|
||
|
(a) For sequence pairs (for a pair title+text for example):
|
||
|
|
||
|
```
|
||
|
tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
|
||
|
token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1
|
||
|
```
|
||
|
|
||
|
(b) For single sequences (for a question for example):
|
||
|
|
||
|
```
|
||
|
tokens: [CLS] the dog is hairy . [SEP]
|
||
|
token_type_ids: 0 0 0 0 0 0 0
|
||
|
```
|
||
|
|
||
|
DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
|
||
|
rather than the left.
|
||
|
|
||
|
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
||
|
[`PreTrainedTokenizer.__call__`] for details.
|
||
|
|
||
|
[What are input IDs?](../glossary#input-ids)
|
||
|
attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||
|
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
||
|
|
||
|
- 1 for tokens that are **not masked**,
|
||
|
- 0 for tokens that are **masked**.
|
||
|
|
||
|
[What are attention masks?](../glossary#attention-mask)
|
||
|
token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||
|
Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
|
||
|
1]`:
|
||
|
|
||
|
- 0 corresponds to a *sentence A* token,
|
||
|
- 1 corresponds to a *sentence B* token.
|
||
|
|
||
|
[What are token type IDs?](../glossary#token-type-ids)
|
||
|
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||
|
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
|
||
|
is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
|
||
|
model's internal embedding lookup matrix.
|
||
|
output_attentions (`bool`, *optional*):
|
||
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
|
||
|
tensors for more detail.
|
||
|
output_hidden_states (`bool`, *optional*):
|
||
|
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||
|
more detail.
|
||
|
return_dict (`bool`, *optional*):
|
||
|
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||
|
"""
|
||
|
|
||
|
DPR_READER_INPUTS_DOCSTRING = r"""
|
||
|
Args:
|
||
|
input_ids (`Tuple[torch.LongTensor]` of shapes `(n_passages, sequence_length)`):
|
||
|
Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question
|
||
|
and 2) the passages titles and 3) the passages texts To match pretraining, DPR `input_ids` sequence should
|
||
|
be formatted with [CLS] and [SEP] with the format:
|
||
|
|
||
|
`[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>`
|
||
|
|
||
|
DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
|
||
|
rather than the left.
|
||
|
|
||
|
Indices can be obtained using [`DPRReaderTokenizer`]. See this class documentation for more details.
|
||
|
|
||
|
[What are input IDs?](../glossary#input-ids)
|
||
|
attention_mask (`torch.FloatTensor` of shape `(n_passages, sequence_length)`, *optional*):
|
||
|
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
||
|
|
||
|
- 1 for tokens that are **not masked**,
|
||
|
- 0 for tokens that are **masked**.
|
||
|
|
||
|
[What are attention masks?](../glossary#attention-mask)
|
||
|
inputs_embeds (`torch.FloatTensor` of shape `(n_passages, sequence_length, hidden_size)`, *optional*):
|
||
|
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
|
||
|
is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
|
||
|
model's internal embedding lookup matrix.
|
||
|
output_attentions (`bool`, *optional*):
|
||
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
|
||
|
tensors for more detail.
|
||
|
output_hidden_states (`bool`, *optional*):
|
||
|
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||
|
more detail.
|
||
|
return_dict (`bool`, *optional*):
|
||
|
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||
|
"""
|
||
|
|
||
|
|
||
|
@add_start_docstrings(
|
||
|
"The bare DPRContextEncoder transformer outputting pooler outputs as context representations.",
|
||
|
DPR_START_DOCSTRING,
|
||
|
)
|
||
|
class DPRContextEncoder(DPRPretrainedContextEncoder):
|
||
|
def __init__(self, config: DPRConfig):
|
||
|
super().__init__(config)
|
||
|
self.config = config
|
||
|
self.ctx_encoder = DPREncoder(config)
|
||
|
# Initialize weights and apply final processing
|
||
|
self.post_init()
|
||
|
|
||
|
@add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||
|
@replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC)
|
||
|
def forward(
|
||
|
self,
|
||
|
input_ids: Optional[Tensor] = None,
|
||
|
attention_mask: Optional[Tensor] = None,
|
||
|
token_type_ids: Optional[Tensor] = None,
|
||
|
inputs_embeds: Optional[Tensor] = None,
|
||
|
output_attentions: Optional[bool] = None,
|
||
|
output_hidden_states: Optional[bool] = None,
|
||
|
return_dict: Optional[bool] = None,
|
||
|
) -> Union[DPRContextEncoderOutput, Tuple[Tensor, ...]]:
|
||
|
r"""
|
||
|
Return:
|
||
|
|
||
|
Examples:
|
||
|
|
||
|
```python
|
||
|
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
|
||
|
|
||
|
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
|
||
|
>>> model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
|
||
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
|
||
|
>>> embeddings = model(input_ids).pooler_output
|
||
|
```"""
|
||
|
|
||
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||
|
output_hidden_states = (
|
||
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||
|
)
|
||
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||
|
|
||
|
if input_ids is not None and inputs_embeds is not None:
|
||
|
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
||
|
elif input_ids is not None:
|
||
|
input_shape = input_ids.size()
|
||
|
elif inputs_embeds is not None:
|
||
|
input_shape = inputs_embeds.size()[:-1]
|
||
|
else:
|
||
|
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||
|
|
||
|
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
||
|
|
||
|
if attention_mask is None:
|
||
|
attention_mask = (
|
||
|
torch.ones(input_shape, device=device)
|
||
|
if input_ids is None
|
||
|
else (input_ids != self.config.pad_token_id)
|
||
|
)
|
||
|
if token_type_ids is None:
|
||
|
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
|
||
|
|
||
|
outputs = self.ctx_encoder(
|
||
|
input_ids=input_ids,
|
||
|
attention_mask=attention_mask,
|
||
|
token_type_ids=token_type_ids,
|
||
|
inputs_embeds=inputs_embeds,
|
||
|
output_attentions=output_attentions,
|
||
|
output_hidden_states=output_hidden_states,
|
||
|
return_dict=return_dict,
|
||
|
)
|
||
|
|
||
|
if not return_dict:
|
||
|
return outputs[1:]
|
||
|
return DPRContextEncoderOutput(
|
||
|
pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
|
||
|
)
|
||
|
|
||
|
|
||
|
@add_start_docstrings(
|
||
|
"The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.",
|
||
|
DPR_START_DOCSTRING,
|
||
|
)
|
||
|
class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
|
||
|
def __init__(self, config: DPRConfig):
|
||
|
super().__init__(config)
|
||
|
self.config = config
|
||
|
self.question_encoder = DPREncoder(config)
|
||
|
# Initialize weights and apply final processing
|
||
|
self.post_init()
|
||
|
|
||
|
@add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||
|
@replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC)
|
||
|
def forward(
|
||
|
self,
|
||
|
input_ids: Optional[Tensor] = None,
|
||
|
attention_mask: Optional[Tensor] = None,
|
||
|
token_type_ids: Optional[Tensor] = None,
|
||
|
inputs_embeds: Optional[Tensor] = None,
|
||
|
output_attentions: Optional[bool] = None,
|
||
|
output_hidden_states: Optional[bool] = None,
|
||
|
return_dict: Optional[bool] = None,
|
||
|
) -> Union[DPRQuestionEncoderOutput, Tuple[Tensor, ...]]:
|
||
|
r"""
|
||
|
Return:
|
||
|
|
||
|
Examples:
|
||
|
|
||
|
```python
|
||
|
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
|
||
|
|
||
|
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
|
||
|
>>> model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
|
||
|
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
|
||
|
>>> embeddings = model(input_ids).pooler_output
|
||
|
```
|
||
|
"""
|
||
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||
|
output_hidden_states = (
|
||
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||
|
)
|
||
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||
|
|
||
|
if input_ids is not None and inputs_embeds is not None:
|
||
|
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
||
|
elif input_ids is not None:
|
||
|
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
|
||
|
input_shape = input_ids.size()
|
||
|
elif inputs_embeds is not None:
|
||
|
input_shape = inputs_embeds.size()[:-1]
|
||
|
else:
|
||
|
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||
|
|
||
|
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
||
|
|
||
|
if attention_mask is None:
|
||
|
attention_mask = (
|
||
|
torch.ones(input_shape, device=device)
|
||
|
if input_ids is None
|
||
|
else (input_ids != self.config.pad_token_id)
|
||
|
)
|
||
|
if token_type_ids is None:
|
||
|
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
|
||
|
|
||
|
outputs = self.question_encoder(
|
||
|
input_ids=input_ids,
|
||
|
attention_mask=attention_mask,
|
||
|
token_type_ids=token_type_ids,
|
||
|
inputs_embeds=inputs_embeds,
|
||
|
output_attentions=output_attentions,
|
||
|
output_hidden_states=output_hidden_states,
|
||
|
return_dict=return_dict,
|
||
|
)
|
||
|
|
||
|
if not return_dict:
|
||
|
return outputs[1:]
|
||
|
return DPRQuestionEncoderOutput(
|
||
|
pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
|
||
|
)
|
||
|
|
||
|
|
||
|
@add_start_docstrings(
|
||
|
"The bare DPRReader transformer outputting span predictions.",
|
||
|
DPR_START_DOCSTRING,
|
||
|
)
|
||
|
class DPRReader(DPRPretrainedReader):
|
||
|
def __init__(self, config: DPRConfig):
|
||
|
super().__init__(config)
|
||
|
self.config = config
|
||
|
self.span_predictor = DPRSpanPredictor(config)
|
||
|
# Initialize weights and apply final processing
|
||
|
self.post_init()
|
||
|
|
||
|
@add_start_docstrings_to_model_forward(DPR_READER_INPUTS_DOCSTRING)
|
||
|
@replace_return_docstrings(output_type=DPRReaderOutput, config_class=_CONFIG_FOR_DOC)
|
||
|
def forward(
|
||
|
self,
|
||
|
input_ids: Optional[Tensor] = None,
|
||
|
attention_mask: Optional[Tensor] = None,
|
||
|
inputs_embeds: Optional[Tensor] = None,
|
||
|
output_attentions: Optional[bool] = None,
|
||
|
output_hidden_states: Optional[bool] = None,
|
||
|
return_dict: Optional[bool] = None,
|
||
|
) -> Union[DPRReaderOutput, Tuple[Tensor, ...]]:
|
||
|
r"""
|
||
|
Return:
|
||
|
|
||
|
Examples:
|
||
|
|
||
|
```python
|
||
|
>>> from transformers import DPRReader, DPRReaderTokenizer
|
||
|
|
||
|
>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
|
||
|
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
|
||
|
>>> encoded_inputs = tokenizer(
|
||
|
... questions=["What is love ?"],
|
||
|
... titles=["Haddaway"],
|
||
|
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
|
||
|
... return_tensors="pt",
|
||
|
... )
|
||
|
>>> outputs = model(**encoded_inputs)
|
||
|
>>> start_logits = outputs.start_logits
|
||
|
>>> end_logits = outputs.end_logits
|
||
|
>>> relevance_logits = outputs.relevance_logits
|
||
|
```
|
||
|
"""
|
||
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||
|
output_hidden_states = (
|
||
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||
|
)
|
||
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||
|
|
||
|
if input_ids is not None and inputs_embeds is not None:
|
||
|
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
||
|
elif input_ids is not None:
|
||
|
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
|
||
|
input_shape = input_ids.size()
|
||
|
elif inputs_embeds is not None:
|
||
|
input_shape = inputs_embeds.size()[:-1]
|
||
|
else:
|
||
|
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||
|
|
||
|
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
||
|
|
||
|
if attention_mask is None:
|
||
|
attention_mask = torch.ones(input_shape, device=device)
|
||
|
|
||
|
return self.span_predictor(
|
||
|
input_ids,
|
||
|
attention_mask,
|
||
|
inputs_embeds=inputs_embeds,
|
||
|
output_attentions=output_attentions,
|
||
|
output_hidden_states=output_hidden_states,
|
||
|
return_dict=return_dict,
|
||
|
)
|