NLP-Bert语义情感分类
评论情感分类数据处理
# 评论情感分类数据处理
# labels: 0负面、1中性、2正面
class CommentProcessor(DataProcessor):
def get_train_examples(self, data_dir):
return self._create_examples(
self._read_tsv(os.path.join(data_dir,"train.tsv")),"train"
)def get_dev_examples(self, data_dir):
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev"
)def get_test_examples(self, data_dir):
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test"
)def get_labels(self):
return ["0","1","2"]
def _create_examples(self,lines,set_type):
=[]
examplesfor (i ,line) in enumerate(lines):
if i == 0:
continue
= "%s-%s" % (set_type,i)
guid try:
= tokenization.convert_to_unicode(line[1])
text_a except:
continue
if set_type == "test":
= "0"
label else:
= tokenization.convert_to_unicode(line[0])
label
examples.append(=guid,text_a = text_a, label=label)
InputExample(guid
)return examples
将Bert模型ckpt文件转为 tfserving部署所需的pb
#!/usr/bin/python3.6
'''
BERT模型ckpt文件转为部署tfserving所需的文件
'''
import json
import os
from enum import Enum
import sys
import modeling
from termcolor import colored
import logging
import tensorflow as tf
import argparse
import pickle
'export_model_dir', "./output/comment_0/versions", 'Directory where the model exported files should be placed.')
tf.app.flags.DEFINE_string('model_version', 10001, 'Models version number.')
tf.app.flags.DEFINE_integer(= tf.app.flags.FLAGS
FLAGS
def create_classification_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels):
"""
:param bert_config:
:param is_training:
:param input_ids:
:param input_mask:
:param segment_ids:
:param labels:
:param num_labels:
:param use_one_hot_embedding:
:return:
"""
#import tensorflow as tf
#import modeling
# 通过传入的训练数据,进行representation
= modeling.BertModel(
model =bert_config,
config=is_training,
is_training=input_ids,
input_ids=input_mask,
input_mask=segment_ids,
token_type_ids
)
= model.get_pooled_output()
output_layer = output_layer.shape[-1].value
hidden_size
= tf.get_variable(
output_weights "output_weights", [num_labels, hidden_size],
=tf.truncated_normal_initializer(stddev=0.02))
initializer
= tf.get_variable(
output_bias "output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
= tf.nn.dropout(output_layer, keep_prob=0.9)
output_layer
= tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
logits = tf.nn.softmax(logits, axis=-1)
probabilities = tf.nn.log_softmax(logits, axis=-1)
log_probs
if labels is not None:
= tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
one_hot_labels
= -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
per_example_loss = tf.reduce_mean(per_example_loss)
loss else:
= None, None
loss, per_example_loss return (loss, per_example_loss, logits, probabilities)
def main(max_seq_len, model_dir, num_labels):
with tf.Session() as sess:
#输入占位符
= tf.placeholder(tf.int32, (None, max_seq_len), 'input_ids')
input_ids = tf.placeholder(tf.int32, (None, max_seq_len), 'input_mask')
input_mask #模型前向传播
= modeling.BertConfig.from_json_file('./uncased_L-2_H-128_A-2/bert_config.json')
bert_config = create_classification_model(bert_config=bert_config, is_training=False,
loss, per_example_loss, logits, probabilities =input_ids, input_mask=input_mask, segment_ids=None, labels=None, num_labels=num_labels)
input_ids#转换结果格式
= tf.argmax(logits, 1)
logits = tf.identity(probabilities, 'pred_prob')
probabilities #模型保存的对象
= tf.train.Saver()
saver with tf.Session() as sess:
sess.run(tf.global_variables_initializer())= tf.train.latest_checkpoint(model_dir)
latest_checkpoint
saver.restore(sess,latest_checkpoint )# Create SavedModelBuilder class
# defines where the model will be exported
= FLAGS.export_model_dir
export_path_base = os.path.join(
export_path
tf.compat.as_bytes(export_path_base),str(FLAGS.model_version)))
tf.compat.as_bytes(print('Exporting trained model to', export_path)
= tf.saved_model.builder.SavedModelBuilder(export_path)
builder # Creates the TensorInfo protobuf objects that encapsulates the input/output tensors
= tf.saved_model.utils.build_tensor_info(input_ids)
input_ids_tensor = tf.saved_model.utils.build_tensor_info(input_mask)
input_mask_tensor # output tensor info
= tf.saved_model.utils.build_tensor_info(logits)
logits_output print("logits_output")
print(logits_output)
= tf.saved_model.utils.build_tensor_info(probabilities)
probabilities_output print("probabilities_output")
print(probabilities_output)
# Defines the DeepLab signatures, uses the TF Predict API
# It receives an image and its dimensions and output the segmentation mask
= ['0','1','2']
labels_map = (
prediction_signature
tf.saved_model.signature_def_utils.build_signature_def(={'input_ids': input_ids_tensor, 'input_mask': input_mask_tensor},
inputs={'pred_label': logits_output , 'score':probabilities_output},
outputs=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
method_name
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],={
signature_def_map'result':
prediction_signature,
})# export the model
=True)
builder.save(as_textprint('Done exporting!')
if __name__ == '__main__':
= 128
max_seq_len = 3
num_labels = './output/comment_0'
model_dir main(max_seq_len, model_dir, num_labels)
About ME
👋 读书城南,🤔 在未来面前,我们都是孩子~
- 📙 一个热衷于探索学习新方向、新事物的智能产品经理,闲暇时间喜欢coding💻、画图🎨、音乐🎵、学习ing~
👋 Social Media
🛠️ Blog: http://oceaneyes.top
⚡ PM导航: https://pmhub.oceangzy.top
☘️ CNBLOG: https://www.cnblogs.com/oceaneyes-gzy/
🌱 AI PRJ自己部署的一些算法demo: http://ai.oceangzy.top/
📫 Email: 1450136519@qq.com
💬 WeChat: OCEANGZY
💬 公众号: UncleJoker-GZY
👋 加入小组~
👋 感谢打赏~
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 OCAEN.GZY读书城南!