# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ The function lex_net(args) define the lexical analysis network structure """ import sys import os import math import paddle.fluid as fluid from paddle.fluid.initializer import NormalInitializer def lex_net(word, vocab_size, num_labels, for_infer=True, target=None): """ define the lexical analysis network structure word: stores the input of the model for_infer: a boolean value, indicating if the model to be created is for training or predicting. return: for infer: return the prediction otherwise: return the prediction """ word_emb_dim=128 grnn_hidden_dim=128 bigru_num=2 emb_lr = 1.0 crf_lr = 1.0 init_bound = 0.1 IS_SPARSE = True def _bigru_layer(input_feature): """ define the bidirectional gru layer """ pre_gru = fluid.layers.fc( input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru = fluid.layers.dynamic_gru( input=pre_gru, size=grnn_hidden_dim, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) pre_gru_r = fluid.layers.fc( input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru_r = fluid.layers.dynamic_gru( input=pre_gru_r, size=grnn_hidden_dim, is_reverse=True, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) return bi_merge def _net_conf(word, target=None): """ Configure the network """ word_embedding = fluid.embedding( input=word, size=[vocab_size, word_emb_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( learning_rate=emb_lr, name="word_emb", initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound))) input_feature = word_embedding for i in range(bigru_num): bigru_output = _bigru_layer(input_feature) input_feature = bigru_output emission = fluid.layers.fc( size=num_labels, input=bigru_output, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) size = emission.shape[1] fluid.layers.create_parameter( shape=[size + 2, size], dtype=emission.dtype, name='crfw') crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw')) return crf_decode return _net_conf(word)