122 lines
4.3 KiB
Python
122 lines
4.3 KiB
Python
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
The function lex_net(args) define the lexical analysis network structure
|
|
"""
|
|
import sys
|
|
import os
|
|
import math
|
|
|
|
import paddle.fluid as fluid
|
|
from paddle.fluid.initializer import NormalInitializer
|
|
|
|
|
|
def lex_net(word, vocab_size, num_labels, for_infer=True, target=None):
|
|
"""
|
|
define the lexical analysis network structure
|
|
word: stores the input of the model
|
|
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
|
|
|
|
return:
|
|
for infer: return the prediction
|
|
otherwise: return the prediction
|
|
"""
|
|
|
|
word_emb_dim=128
|
|
grnn_hidden_dim=128
|
|
bigru_num=2
|
|
emb_lr = 1.0
|
|
crf_lr = 1.0
|
|
init_bound = 0.1
|
|
IS_SPARSE = True
|
|
|
|
def _bigru_layer(input_feature):
|
|
"""
|
|
define the bidirectional gru layer
|
|
"""
|
|
pre_gru = fluid.layers.fc(
|
|
input=input_feature,
|
|
size=grnn_hidden_dim * 3,
|
|
param_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound),
|
|
regularizer=fluid.regularizer.L2DecayRegularizer(
|
|
regularization_coeff=1e-4)))
|
|
gru = fluid.layers.dynamic_gru(
|
|
input=pre_gru,
|
|
size=grnn_hidden_dim,
|
|
param_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound),
|
|
regularizer=fluid.regularizer.L2DecayRegularizer(
|
|
regularization_coeff=1e-4)))
|
|
|
|
pre_gru_r = fluid.layers.fc(
|
|
input=input_feature,
|
|
size=grnn_hidden_dim * 3,
|
|
param_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound),
|
|
regularizer=fluid.regularizer.L2DecayRegularizer(
|
|
regularization_coeff=1e-4)))
|
|
gru_r = fluid.layers.dynamic_gru(
|
|
input=pre_gru_r,
|
|
size=grnn_hidden_dim,
|
|
is_reverse=True,
|
|
param_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound),
|
|
regularizer=fluid.regularizer.L2DecayRegularizer(
|
|
regularization_coeff=1e-4)))
|
|
|
|
bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1)
|
|
return bi_merge
|
|
|
|
def _net_conf(word, target=None):
|
|
"""
|
|
Configure the network
|
|
"""
|
|
word_embedding = fluid.embedding(
|
|
input=word,
|
|
size=[vocab_size, word_emb_dim],
|
|
dtype='float32',
|
|
is_sparse=IS_SPARSE,
|
|
param_attr=fluid.ParamAttr(
|
|
learning_rate=emb_lr,
|
|
name="word_emb",
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound)))
|
|
|
|
input_feature = word_embedding
|
|
for i in range(bigru_num):
|
|
bigru_output = _bigru_layer(input_feature)
|
|
input_feature = bigru_output
|
|
|
|
emission = fluid.layers.fc(
|
|
size=num_labels,
|
|
input=bigru_output,
|
|
param_attr=fluid.ParamAttr(
|
|
initializer=fluid.initializer.Uniform(
|
|
low=-init_bound, high=init_bound),
|
|
regularizer=fluid.regularizer.L2DecayRegularizer(
|
|
regularization_coeff=1e-4)))
|
|
|
|
size = emission.shape[1]
|
|
fluid.layers.create_parameter(
|
|
shape=[size + 2, size], dtype=emission.dtype, name='crfw')
|
|
crf_decode = fluid.layers.crf_decoding(
|
|
input=emission, param_attr=fluid.ParamAttr(name='crfw'))
|
|
|
|
return crf_decode
|
|
return _net_conf(word) |