Python源码示例:tensorflow.contrib.rnn.RNNCell()
示例1
def get_rnn_cell_trainable_variables(cell):
"""Returns the list of trainable variables of an RNN cell.
Args:
cell: an instance of :tf_main:`RNNCell <nn/rnn_cell/RNNCell>`.
Returns:
list: trainable variables of the cell.
"""
cell_ = cell
while True:
try:
return cell_.trainable_variables
except AttributeError:
# Cell wrappers (e.g., `DropoutWrapper`) cannot directly access to
# `trainable_variables` as they don't initialize superclass
# (tf==v1.3). So try to access through the cell in the wrapper.
cell_ = cell._cell # pylint: disable=protected-access
示例2
def _get_single_cell(cell_type, num_units):
"""Constructs and return a single `RNNCell`.
Args:
cell_type: Either a string identifying the `RNNCell` type or a subclass of
`RNNCell`.
num_units: The number of units in the `RNNCell`.
Returns:
An initialized `RNNCell`.
Raises:
ValueError: `cell_type` is an invalid `RNNCell` name.
TypeError: `cell_type` is not a string or a subclass of `RNNCell`.
"""
cell_type = _CELL_TYPES.get(cell_type, cell_type)
if not cell_type or not issubclass(cell_type, contrib_rnn.RNNCell):
raise ValueError('The supported cell types are {}; got {}'.format(
list(_CELL_TYPES.keys()), cell_type))
return cell_type(num_units=num_units)
示例3
def apply_dropout(
cell, input_keep_probability, output_keep_probability, random_seed=None):
"""Apply dropout to the outputs and inputs of `cell`.
Args:
cell: An `RNNCell`.
input_keep_probability: Probability to keep inputs to `cell`. If `None`,
no dropout is applied.
output_keep_probability: Probability to keep outputs of `cell`. If `None`,
no dropout is applied.
random_seed: Seed for random dropout.
Returns:
An `RNNCell`, the result of applying the supplied dropouts to `cell`.
"""
input_prob_none = input_keep_probability is None
output_prob_none = output_keep_probability is None
if input_prob_none and output_prob_none:
return cell
if input_prob_none:
input_keep_probability = 1.0
if output_prob_none:
output_keep_probability = 1.0
return contrib_rnn.DropoutWrapper(
cell, input_keep_probability, output_keep_probability, random_seed)
示例4
def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop_projection, mask_finished=False):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderCell, self).__init__()
#Initialize decoder layers
self._prenet = prenet
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._mask_finished = mask_finished
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例5
def __init__(self, is_training, attention_mechanism, rnn_cell, frame_projection = None, stop_projection = None):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderWrapper, self).__init__()
#Initialize decoder layers
self._training = is_training
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例6
def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop_projection):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderCell, self).__init__()
#Initialize decoder layers
self._prenet = prenet
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例7
def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop_projection):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderCell, self).__init__()
#Initialize decoder layers
self._prenet = prenet
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例8
def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop_projection, mask_finished=False):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderCell, self).__init__()
#Initialize decoder layers
self._prenet = prenet
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._mask_finished = mask_finished
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例9
def get_rnn_cell_trainable_variables(cell):
"""Returns the list of trainable variables of an RNN cell.
Args:
cell: an instance of :tf_main:`RNNCell <nn/rnn_cell/RNNCell>`.
Returns:
list: trainable variables of the cell.
"""
cell_ = cell
while True:
try:
return cell_.trainable_variables
except AttributeError:
# Cell wrappers (e.g., `DropoutWrapper`) cannot directly access to
# `trainable_variables` as they don't initialize superclass
# (tf==v1.3). So try to access through the cell in the wrapper.
cell_ = cell._cell # pylint: disable=protected-access
示例10
def __init__(self, prenet, attention_mechanism, rnn_cell, frame_projection, stop_projection):
"""Initialize decoder parameters
Args:
prenet: A tensorflow fully connected layer acting as the decoder pre-net
attention_mechanism: A _BaseAttentionMechanism instance, usefull to
learn encoder-decoder alignments
rnn_cell: Instance of RNNCell, main body of the decoder
frame_projection: tensorflow fully connected layer with r * num_mels output units
stop_projection: tensorflow fully connected layer, expected to project to a scalar
and through a sigmoid activation
mask_finished: Boolean, Whether to mask decoder frames after the <stop_token>
"""
super(TacotronDecoderCell, self).__init__()
# Initialize decoder layers
self._prenet = prenet
self._attention_mechanism = attention_mechanism
self._cell = rnn_cell
self._frame_projection = frame_projection
self._stop_projection = stop_projection
self._attention_layer_size = self._attention_mechanism.values.get_shape()[-1].value
示例11
def apply_dropout(
cell, input_keep_probability, output_keep_probability, random_seed=None):
"""Apply dropout to the outputs and inputs of `cell`.
Args:
cell: An `RNNCell`.
input_keep_probability: Probability to keep inputs to `cell`. If `None`,
no dropout is applied.
output_keep_probability: Probability to keep outputs of `cell`. If `None`,
no dropout is applied.
random_seed: Seed for random dropout.
Returns:
An `RNNCell`, the result of applying the supplied dropouts to `cell`.
"""
input_prob_none = input_keep_probability is None
output_prob_none = output_keep_probability is None
if input_prob_none and output_prob_none:
return cell
if input_prob_none:
input_keep_probability = 1.0
if output_prob_none:
output_keep_probability = 1.0
return contrib_rnn.DropoutWrapper(
cell, input_keep_probability, output_keep_probability, random_seed)
示例12
def construct_rnn_cell(num_units, cell_type='basic_rnn',
dropout_keep_probabilities=None):
"""Constructs cells, applies dropout and assembles a `MultiRNNCell`.
The cell type chosen by DynamicRNNEstimator.__init__() is the same as
returned by this function when called with the same arguments.
Args:
num_units: A single `int` or a list/tuple of `int`s. The size of the
`RNNCell`s.
cell_type: A string identifying the `RNNCell` type or a subclass of
`RNNCell`.
dropout_keep_probabilities: a list of dropout probabilities or `None`. If a
list is given, it must have length `len(cell_type) + 1`.
Returns:
An initialized `RNNCell`.
"""
if not isinstance(num_units, (list, tuple)):
num_units = (num_units,)
cells = [_get_single_cell(cell_type, n) for n in num_units]
if dropout_keep_probabilities:
cells = apply_dropout(cells, dropout_keep_probabilities)
if len(cells) == 1:
return cells[0]
return contrib_rnn.MultiRNNCell(cells)
示例13
def apply_dropout(cells, dropout_keep_probabilities, random_seed=None):
"""Applies dropout to the outputs and inputs of `cell`.
Args:
cells: A list of `RNNCell`s.
dropout_keep_probabilities: a list whose elements are either floats in
`[0.0, 1.0]` or `None`. It must have length one greater than `cells`.
random_seed: Seed for random dropout.
Returns:
A list of `RNNCell`s, the result of applying the supplied dropouts.
Raises:
ValueError: If `len(dropout_keep_probabilities) != len(cells) + 1`.
"""
if len(dropout_keep_probabilities) != len(cells) + 1:
raise ValueError(
'The number of dropout probabilites must be one greater than the '
'number of cells. Got {} cells and {} dropout probabilities.'.format(
len(cells), len(dropout_keep_probabilities)))
wrapped_cells = [
contrib_rnn.DropoutWrapper(cell, prob, 1.0, seed=random_seed)
for cell, prob in zip(cells[:-1], dropout_keep_probabilities[:-2])
]
wrapped_cells.append(
contrib_rnn.DropoutWrapper(cells[-1], dropout_keep_probabilities[-2],
dropout_keep_probabilities[-1]))
return wrapped_cells
示例14
def _to_rnn_cell(cell_or_type, num_units, num_layers):
"""Constructs and return an `RNNCell`.
Args:
cell_or_type: Either a string identifying the `RNNCell` type, a subclass of
`RNNCell` or an instance of an `RNNCell`.
num_units: The number of units in the `RNNCell`.
num_layers: The number of layers in the RNN.
Returns:
An initialized `RNNCell`.
Raises:
ValueError: `cell_or_type` is an invalid `RNNCell` name.
TypeError: `cell_or_type` is not a string or a subclass of `RNNCell`.
"""
if isinstance(cell_or_type, contrib_rnn.RNNCell):
return cell_or_type
if isinstance(cell_or_type, str):
cell_or_type = _CELL_TYPES.get(cell_or_type)
if cell_or_type is None:
raise ValueError('The supported cell types are {}; got {}'.format(
list(_CELL_TYPES.keys()), cell_or_type))
if not issubclass(cell_or_type, contrib_rnn.RNNCell):
raise TypeError(
'cell_or_type must be a subclass of RNNCell or one of {}.'.format(
list(_CELL_TYPES.keys())))
cell = cell_or_type(num_units=num_units)
if num_layers > 1:
cell = contrib_rnn.MultiRNNCell(
[cell] * num_layers, state_is_tuple=True)
return cell
示例15
def __init__(self, cell, mem_size, embed_size, max_n_valid_indices):
"""Constructs a `ResidualWrapper` for `cell`.
Args:
cell: An instance of `RNNCell`.
mem_size: size of the memory.
embed_size: the size/dimension of the embedding in each memory location.
max_n_valid_indices: maximum number of valid_indices.
"""
self._cell = cell
self._mem_size = mem_size
self._embed_size = embed_size
self._max_n_valid_indices = max_n_valid_indices
示例16
def define_rnn_cell(cell_class, num_units, num_layers=1, keep_prob=1.0,
input_keep_prob=None, output_keep_prob=None):
if input_keep_prob is None:
input_keep_prob = keep_prob
if output_keep_prob is None:
output_keep_prob = keep_prob
cells = []
for _ in range(num_layers):
if cell_class == 'GRU':
cell = GRUCell(num_units=num_units)
elif cell_class == 'LSTM':
cell = LSTMCell(num_units=num_units)
else:
cell = RNNCell(num_units=num_units)
if keep_prob < 1.0:
cell = DropoutWrapper(cell=cell, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob)
cells.append(cell)
if len(cells) > 1:
final_cell = MultiRNNCell(cells)
else:
final_cell = cells[0]
return final_cell
示例17
def __init__(self, cell, attention_mechanism, dropout, attn_cell_config,
num_proj, dtype=tf.float32):
"""
Args:
cell: (RNNCell)
attention_mechanism: (AttentionMechanism)
dropout: (tf.float)
attn_cell_config: (dict) hyper params
"""
# variables and tensors
self._cell = cell
self._attention_mechanism = attention_mechanism
self._dropout = dropout
# hyperparameters and shapes
self._n_channels = self._attention_mechanism._n_channels
self._dim_e = attn_cell_config["dim_e"]
self._dim_o = attn_cell_config["dim_o"]
self._num_units = attn_cell_config["num_units"]
self._dim_embeddings = attn_cell_config["dim_embeddings"]
self._num_proj = num_proj
self._dtype = dtype
# for RNNCell
self._state_size = AttentionState(self._cell._state_size, self._dim_o)
示例18
def __init__(self, cell: RNNCell, prenets: Tuple[PreNet]):
super(DecoderPreNetWrapper, self).__init__()
self._cell = cell
self.prenets = prenets
示例19
def _to_rnn_cell(cell_or_type, num_units, num_layers):
"""Constructs and return an `RNNCell`.
Args:
cell_or_type: Either a string identifying the `RNNCell` type, a subclass of
`RNNCell` or an instance of an `RNNCell`.
num_units: The number of units in the `RNNCell`.
num_layers: The number of layers in the RNN.
Returns:
An initialized `RNNCell`.
Raises:
ValueError: `cell_or_type` is an invalid `RNNCell` name.
TypeError: `cell_or_type` is not a string or a subclass of `RNNCell`.
"""
if isinstance(cell_or_type, contrib_rnn.RNNCell):
return cell_or_type
if isinstance(cell_or_type, str):
cell_or_type = _CELL_TYPES.get(cell_or_type)
if cell_or_type is None:
raise ValueError('The supported cell types are {}; got {}'.format(
list(_CELL_TYPES.keys()), cell_or_type))
if not issubclass(cell_or_type, contrib_rnn.RNNCell):
raise TypeError(
'cell_or_type must be a subclass of RNNCell or one of {}.'.format(
list(_CELL_TYPES.keys())))
cell = cell_or_type(num_units=num_units)
if num_layers > 1:
cell = contrib_rnn.MultiRNNCell(
[cell] * num_layers, state_is_tuple=True)
return cell
示例20
def __init__(self, cell: RNNCell, max_iter):
self._cell = cell
self._max_iter = max_iter
示例21
def __init__(self, cell: RNNCell, mgc_prenets: Tuple[PreNet], lf0_prenets: Tuple[PreNet]):
super(DecoderMgcLf0PreNetWrapper, self).__init__()
self._cell = cell
self.mgc_prenets = mgc_prenets
self.lf0_prenets = lf0_prenets
示例22
def dict_to_state_tuple(input_dict, cell):
"""Reconstructs nested `state` from a dict containing state `Tensor`s.
Args:
input_dict: A dict of `Tensor`s.
cell: An instance of `RNNCell`.
Returns:
If `input_dict` does not contain keys 'STATE_PREFIX_i' for `0 <= i < n`
where `n` is the number of nested entries in `cell.state_size`, this
function returns `None`. Otherwise, returns a `Tensor` if `cell.state_size`
is an `int` or a nested tuple of `Tensor`s if `cell.state_size` is a nested
tuple.
Raises:
ValueError: State is partially specified. The `input_dict` must contain
values for all state components or none at all.
"""
flat_state_sizes = nest.flatten(cell.state_size)
state_tensors = []
with ops.name_scope('dict_to_state_tuple'):
for i, state_size in enumerate(flat_state_sizes):
state_name = _get_state_name(i)
state_tensor = input_dict.get(state_name)
if state_tensor is not None:
rank_check = check_ops.assert_rank(
state_tensor, 2, name='check_state_{}_rank'.format(i))
shape_check = check_ops.assert_equal(
array_ops.shape(state_tensor)[1],
state_size,
name='check_state_{}_shape'.format(i))
with ops.control_dependencies([rank_check, shape_check]):
state_tensor = array_ops.identity(state_tensor, name=state_name)
state_tensors.append(state_tensor)
if not state_tensors:
return None
elif len(state_tensors) == len(flat_state_sizes):
dummy_state = cell.zero_state(batch_size=1, dtype=dtypes.bool)
return nest.pack_sequence_as(dummy_state, state_tensors)
else:
raise ValueError(
'RNN state was partially specified.'
'Expected zero or {} state Tensors; got {}'.
format(len(flat_state_sizes), len(state_tensors)))
示例23
def construct_rnn(initial_state,
sequence_input,
cell,
num_label_columns,
dtype=dtypes.float32,
parallel_iterations=32,
swap_memory=True):
"""Build an RNN and apply a fully connected layer to get the desired output.
Args:
initial_state: The initial state to pass the RNN. If `None`, the
default starting state for `self._cell` is used.
sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]`
that will be passed as input to the RNN.
cell: An initialized `RNNCell`.
num_label_columns: The desired output dimension.
dtype: dtype of `cell`.
parallel_iterations: Number of iterations to run in parallel. Values >> 1
use more memory but take less time, while smaller values use less memory
but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
Returns:
activations: The output of the RNN, projected to `num_label_columns`
dimensions.
final_state: A `Tensor` or nested tuple of `Tensor`s representing the final
state output by the RNN.
"""
with ops.name_scope('RNN'):
rnn_outputs, final_state = rnn.dynamic_rnn(
cell=cell,
inputs=sequence_input,
initial_state=initial_state,
dtype=dtype,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
time_major=False)
activations = layers.fully_connected(
inputs=rnn_outputs,
num_outputs=num_label_columns,
activation_fn=None,
trainable=True)
return activations, final_state
示例24
def __init__(self,
num_units,
num_dims=1,
input_dims=None,
output_dims=None,
priority_dims=None,
non_recurrent_dims=None,
tied=False,
cell_fn=None,
non_recurrent_fn=None):
"""Initialize the parameters of a Grid RNN cell
Args:
num_units: int, The number of units in all dimensions of this GridRNN cell
num_dims: int, Number of dimensions of this grid.
input_dims: int or list, List of dimensions which will receive input data.
output_dims: int or list, List of dimensions from which the output will be
recorded.
priority_dims: int or list, List of dimensions to be considered as
priority dimensions.
If None, no dimension is prioritized.
non_recurrent_dims: int or list, List of dimensions that are not
recurrent.
The transfer function for non-recurrent dimensions is specified
via `non_recurrent_fn`,
which is default to be `tensorflow.nn.relu`.
tied: bool, Whether to share the weights among the dimensions of this
GridRNN cell.
If there are non-recurrent dimensions in the grid, weights are
shared between each
group of recurrent and non-recurrent dimensions.
cell_fn: function, a function which returns the recurrent cell object. Has
to be in the following signature:
def cell_func(num_units, input_size):
# ...
and returns an object of type `RNNCell`. If None, LSTMCell with
default parameters will be used.
non_recurrent_fn: a tensorflow Op that will be the transfer function of
the non-recurrent dimensions
"""
if num_dims < 1:
raise ValueError('dims must be >= 1: {}'.format(num_dims))
self._config = _parse_rnn_config(num_dims, input_dims, output_dims,
priority_dims, non_recurrent_dims,
non_recurrent_fn or nn.relu, tied,
num_units)
cell_input_size = (self._config.num_dims - 1) * num_units
if cell_fn is None:
self._cell = rnn.LSTMCell(
num_units=num_units, input_size=cell_input_size, state_is_tuple=False)
else:
self._cell = cell_fn(num_units, cell_input_size)
if not isinstance(self._cell, rnn.RNNCell):
raise ValueError('cell_fn must return an object of type RNNCell')
示例25
def _checked_scope(cell, scope, reuse=None, **kwargs):
if reuse is not None:
kwargs["reuse"] = reuse
with vs.variable_scope(scope, **kwargs) as checking_scope:
scope_name = checking_scope.name
if hasattr(cell, "_scope"):
cell_scope = cell._scope # pylint: disable=protected-access
if cell_scope.name != checking_scope.name:
raise ValueError(
"Attempt to reuse RNNCell %s with a different variable scope than "
"its first use. First use of cell was with scope '%s', this "
"attempt is with scope '%s'. Please create a new instance of the "
"cell if you would like it to use a different set of weights. "
"If before you were using: MultiRNNCell([%s(...)] * num_layers), "
"change to: MultiRNNCell([%s(...) for _ in range(num_layers)]). "
"If before you were using the same cell instance as both the "
"forward and reverse cell of a bidirectional RNN, simply create "
"two instances (one for forward, one for reverse). "
"In May 2017, we will start transitioning this cell's behavior "
"to use existing stored weights, if any, when it is called "
"with scope=None (which can lead to silent model degradation, so "
"this error will remain until then.)"
% (cell, cell_scope.name, scope_name, type(cell).__name__,
type(cell).__name__))
else:
weights_found = False
try:
with vs.variable_scope(checking_scope, reuse=True):
vs.get_variable(_WEIGHTS_VARIABLE_NAME)
weights_found = True
except ValueError:
pass
if weights_found and reuse is None:
raise ValueError(
"Attempt to have a second RNNCell use the weights of a variable "
"scope that already has weights: '%s'; and the cell was not "
"constructed as %s(..., reuse=True). "
"To share the weights of an RNNCell, simply "
"reuse it in your second calculation, or create a new one with "
"the argument reuse=True." % (scope_name, type(cell).__name__))
# Everything is OK. Update the cell's scope and yield it.
cell._scope = checking_scope # pylint: disable=protected-access
yield checking_scope
示例26
def __init__(self, cell, attn_inputs,
attn_size, attn_vec_size,
output_size=None, input_size=None,
state_is_tuple=True, attn_masks=None,
merge_output_attn='linear',
reuse=None):
"""Create a cell with attention.
Args:
cell: an RNNCell, an attention is added to it.
attn_inputs: a Tensor.
attn_size: integer, the size of an attention vector. Equal to
cell.output_size by default.
attn_vec_size: integer, the number of convolutional features calculated
on attention state and a size of the hidden layer built from
base cell state. Equal to attn_size by default.
input_size: integer, the size of a hidden linear layer,
built from inputs and attention. Derived from the input tensor
by default.
state_is_tuple: If True, accepted and returned states are n-tuples, where
`n = len(cells)`. By default (False), the states are all
concatenated along the column axis.
attn_mask: mask that should be applied to attention. If None, no masks
will be applied.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
Raises:
TypeError: if cell is not an RNNCell.
ValueError: if cell returns a state tuple but the flag
`state_is_tuple` is `False` or if attn_length is zero or less.
"""
if not isinstance(cell, rnn.RNNCell):
raise TypeError("The parameter cell is not RNNCell.")
if nest.is_sequence(cell.state_size) and not state_is_tuple:
raise ValueError("Cell returns tuple of states, but the flag "
"state_is_tuple is not set. State size is: %s"
% str(cell.state_size))
if not state_is_tuple:
logging.warn(
"%s: Using a concatenated state is slower and will soon be "
"deprecated. Use state_is_tuple=True.", self)
self._state_is_tuple = state_is_tuple
if not state_is_tuple:
raise NotImplementedError
self._cell = cell
self._input_size = input_size
self._output_size = output_size
if output_size is None:
self._output_size = cell.output_size
self._attn_size = attn_size
self._reuse = reuse
self._attn_inputs = attn_inputs
self._attn_vec_size = attn_vec_size
self.attn_masks = attn_masks
self.merge_output_attn = merge_output_attn
示例27
def build_encoder(self, post_word_input, corr_responses_input):
if self.cell_class == 'GRU':
encoder_cell = MultiRNNCell([GRUCell(self.num_units) for _ in range(self.num_layers)])
elif self.cell_class == 'LSTM':
encoder_cell = MultiRNNCell([LSTMCell(self.num_units) for _ in range(self.num_layers)])
else:
encoder_cell = MultiRNNCell([RNNCell(self.num_units) for _ in range(self.num_layers)])
with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE) as scope:
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_cell,
post_word_input,
self.posts_length,
dtype=tf.float32, scope=scope)
batch_size, encoder_len = tf.shape(self.posts)[0], tf.shape(self.posts)[1]
corr_response_input = tf.reshape(corr_responses_input, [batch_size, -1, self.dim_emb])
corr_cum_len = tf.shape(corr_response_input)[1]
with tf.variable_scope('mutual_attention', reuse=tf.AUTO_REUSE):
encoder_out_trans = tf.layers.dense(encoder_output, self.num_units,
name='encoder_out_transform')
corr_response_trans = tf.layers.dense(corr_response_input, self.num_units,
name='corr_response_transform')
encoder_out_trans = tf.expand_dims(encoder_out_trans, axis=1)
encoder_out_trans = tf.tile(encoder_out_trans, [1, corr_cum_len, 1, 1])
encoder_out_trans = tf.reshape(encoder_out_trans, [-1, encoder_len, self.num_units])
corr_response_trans = tf.reshape(corr_response_trans, [-1, self.num_units])
corr_response_trans = tf.expand_dims(corr_response_trans, axis=1)
# TODO: try bilinear attention
v = tf.get_variable("attention_v", [self.num_units], dtype=tf.float32)
score = tf.reduce_sum(v * tf.tanh(encoder_out_trans + corr_response_trans), axis=2)
alignments = tf.nn.softmax(score)
encoder_out_tiled = tf.expand_dims(encoder_output, axis=1)
encoder_out_tiled = tf.tile(encoder_out_tiled, [1, corr_cum_len, 1, 1])
encoder_out_tiled = tf.reshape(encoder_out_tiled, [-1, encoder_len, self.num_units])
context_mutual = tf.reduce_sum(tf.expand_dims(alignments, 2) * encoder_out_tiled, axis=1)
context_mutual = tf.reshape(context_mutual, [batch_size, -1, self.num_units])
context_mutual = tf.reduce_mean(context_mutual, axis=1)
encoder_output = tf.concat([encoder_output, tf.expand_dims(context_mutual, 1)], axis=1)
if self.use_trans_repr:
trans_output = tf.layers.dense(self.trans_reprs, self.num_units,
name='trans_reprs_transform', reuse=tf.AUTO_REUSE)
encoder_output = tf.concat([encoder_output, trans_output], axis=1)
return encoder_output, encoder_state
示例28
def build_decoder(self, encoder_output, encoder_state, triple_input, decoder_input, train_mode=True):
if self.cell_class == 'GRU':
decoder_cell = MultiRNNCell([GRUCell(self.num_units) for _ in range(self.num_layers)])
elif self.cell_class == 'LSTM':
decoder_cell = MultiRNNCell([LSTMCell(self.num_units) for _ in range(self.num_layers)])
else:
decoder_cell = MultiRNNCell([RNNCell(self.num_units) for _ in range(self.num_layers)])
if train_mode:
with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
if self.use_trans_select:
kd_context = self.transfer_matching(encoder_output, triple_input)
else:
kd_context = None
# prepare attention
attention_keys, attention_values, attention_construct_fn \
= prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units)
decoder_fn_train = attention_decoder_train(
encoder_state=encoder_state,
attention_keys=attention_keys,
attention_values=attention_values,
attention_construct_fn=attention_construct_fn)
# train decoder
decoder_output, _, _ = dynamic_rnn_decoder(cell=decoder_cell,
decoder_fn=decoder_fn_train,
inputs=decoder_input,
sequence_length=self.responses_length,
scope=scope)
output_fn = create_output_fn(vocab_size=self.vocab_size)
output_logits = output_fn(decoder_output)
return output_logits
else:
with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
if self.use_trans_select:
kd_context = self.transfer_matching(encoder_output, triple_input)
else:
kd_context = None
attention_keys, attention_values, attention_construct_fn \
= prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units, reuse=tf.AUTO_REUSE)
output_fn = create_output_fn(vocab_size=self.vocab_size)
# inference decoder
decoder_fn_inference = attention_decoder_inference(
num_units=self.num_units, num_decoder_symbols=self.vocab_size,
output_fn=output_fn, encoder_state=encoder_state,
attention_keys=attention_keys, attention_values=attention_values,
attention_construct_fn=attention_construct_fn, embeddings=self.word_embed,
start_of_sequence_id=GO_ID, end_of_sequence_id=EOS_ID, maximum_length=self.max_length)
# get decoder output
decoder_distribution, _, _ = dynamic_rnn_decoder(cell=decoder_cell,
decoder_fn=decoder_fn_inference,
scope=scope)
return decoder_distribution
示例29
def dict_to_state_tuple(input_dict, cell):
"""Reconstructs nested `state` from a dict containing state `Tensor`s.
Args:
input_dict: A dict of `Tensor`s.
cell: An instance of `RNNCell`.
Returns:
If `input_dict` does not contain keys 'STATE_PREFIX_i' for `0 <= i < n`
where `n` is the number of nested entries in `cell.state_size`, this
function returns `None`. Otherwise, returns a `Tensor` if `cell.state_size`
is an `int` or a nested tuple of `Tensor`s if `cell.state_size` is a nested
tuple.
Raises:
ValueError: State is partially specified. The `input_dict` must contain
values for all state components or none at all.
"""
flat_state_sizes = nest.flatten(cell.state_size)
state_tensors = []
with ops.name_scope('dict_to_state_tuple'):
for i, state_size in enumerate(flat_state_sizes):
state_name = _get_state_name(i)
state_tensor = input_dict.get(state_name)
if state_tensor is not None:
rank_check = check_ops.assert_rank(
state_tensor, 2, name='check_state_{}_rank'.format(i))
shape_check = check_ops.assert_equal(
array_ops.shape(state_tensor)[1],
state_size,
name='check_state_{}_shape'.format(i))
with ops.control_dependencies([rank_check, shape_check]):
state_tensor = array_ops.identity(state_tensor, name=state_name)
state_tensors.append(state_tensor)
if not state_tensors:
return None
elif len(state_tensors) == len(flat_state_sizes):
dummy_state = cell.zero_state(batch_size=1, dtype=dtypes.bool)
return nest.pack_sequence_as(dummy_state, state_tensors)
else:
raise ValueError(
'RNN state was partially specified.'
'Expected zero or {} state Tensors; got {}'.
format(len(flat_state_sizes), len(state_tensors)))
示例30
def construct_rnn(initial_state,
sequence_input,
cell,
num_label_columns,
dtype=dtypes.float32,
parallel_iterations=32,
swap_memory=True):
"""Build an RNN and apply a fully connected layer to get the desired output.
Args:
initial_state: The initial state to pass the RNN. If `None`, the
default starting state for `self._cell` is used.
sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]`
that will be passed as input to the RNN.
cell: An initialized `RNNCell`.
num_label_columns: The desired output dimension.
dtype: dtype of `cell`.
parallel_iterations: Number of iterations to run in parallel. Values >> 1
use more memory but take less time, while smaller values use less memory
but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
Returns:
activations: The output of the RNN, projected to `num_label_columns`
dimensions.
final_state: A `Tensor` or nested tuple of `Tensor`s representing the final
state output by the RNN.
"""
with ops.name_scope('RNN'):
rnn_outputs, final_state = rnn.dynamic_rnn(
cell=cell,
inputs=sequence_input,
initial_state=initial_state,
dtype=dtype,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
time_major=False)
activations = layers.fully_connected(
inputs=rnn_outputs,
num_outputs=num_label_columns,
activation_fn=None,
trainable=True)
return activations, final_state