python - tensorflow 提要列表功能(多热)到 tf.estimator

标签 python tensorflow machine-learning neural-network deep-learning

某些要素列的数据类型是列表。而且它们的长度可以不同。我想将此列编码为多热点分类特征并将其提供给 tf.estimator。我尝试了以下操作,但显示错误 Unable to get element as bytes 。我认为这是深度学习中的常见做法,尤其是推荐系统,例如深广模型。我发现了一个相关问题here但它没有显示如何提供给估计器。

import pandas as pd
import tensorflow as tf

OUTDIR = "./data"

data = {"x": [["a", "c"], ["a", "b"], ["b", "c"]], "y": ["x", "y", "z"]}
df = pd.DataFrame(data)

Y = df["y"]
X = df.drop("y", axis=1)

indicator_features = [
    tf.feature_column.indicator_column(
        categorical_column=tf.feature_column.categorical_column_with_vocabulary_list(
            key="x", vocabulary_list=["a", "b", "c"]
        )
    )
]

model = tf.estimator.LinearClassifier(
    feature_columns=indicator_features, model_dir=OUTDIR
)

training_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=X, y=Y, batch_size=64, shuffle=True, num_epochs=None
)

model.train(input_fn=training_input_fn)

出现以下错误:

INFO:tensorflow:Using default config. INFO:tensorflow:Using config: {'_model_dir': 'testalg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} INFO:tensorflow:Calling model_fn. INFO:tensorflow:Done calling model_fn. INFO:tensorflow:Create CheckpointSaverHook. INFO:tensorflow:Graph was finalized. INFO:tensorflow:Running local_init_op. INFO:tensorflow:Done running local_init_op. INFO:tensorflow:Error reported to Coordinator: , Unable to get element as bytes. INFO:tensorflow:Saving checkpoints for 0 into testalg/model.ckpt. ------------------------------------------------------- InternalError Traceback (most recent call last) /home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1321 try: -> 1322 return fn(*args) 1323 except errors.OpError as e:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata) 1306 return self._call_tf_sessionrun( -> 1307 options, feed_dict, fetch_list, target_list, run_metadata) 1308

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata) 1408 self._session, options, feed_dict, fetch_list, target_list, -> 1409 run_metadata) 1410 else:

InternalError: Unable to get element as bytes.

During handling of the above exception, another exception occurred:

InternalError Traceback (most recent call last) in () 44 45 ---> 46 model.train(input_fn=training_input_fn)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners) 364 365 saving_listeners = _check_listeners_type(saving_listeners) --> 366 loss = self._train_model(input_fn, hooks, saving_listeners) 367 logging.info('Loss for final step: %s.', loss) 368 return self

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners) 1117
return self._train_model_distributed(input_fn, hooks, saving_listeners) 1118 else: -> 1119 return self._train_model_default(input_fn, hooks, saving_listeners) 1120 1121 def _train_model_default(self, input_fn, hooks, saving_listeners):

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1133 return self._train_with_estimator_spec(estimator_spec, worker_hooks, 1134
hooks, global_step_tensor, -> 1135 saving_listeners) 1136 1137 def _train_model_distributed(self, input_fn, hooks, saving_listeners):

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners) 1334 loss = None 1335 while not mon_sess.should_stop(): -> 1336 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) 1337 return loss 1338

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in exit(self, exception_type, exception_value, traceback) 687 if exception_type in [errors.OutOfRangeError, StopIteration]: 688 exception_type = None --> 689 self._close_internal(exception_type) 690 # exit should return True to suppress an exception. 691 return exception_type is None

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in _close_internal(self, exception_type) 724 if self._sess is None: 725 raise RuntimeError('Session is already closed.') --> 726 self._sess.close() 727 finally: 728 self._sess = None

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in close(self) 972 if self._sess: 973 try: --> 974 self._sess.close() 975 except _PREEMPTION_ERRORS: 976 pass

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py in close(self) 1116 self._coord.join( 1117
stop_grace_period_secs=self._stop_grace_period_secs, -> 1118 ignore_live_threads=True) 1119 finally: 1120 try:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py in join(self, threads, stop_grace_period_secs, ignore_live_threads) 387 self._registered_threads = set() 388 if self._exc_info_to_raise: --> 389 six.reraise(*self._exc_info_to_raise) 390 elif stragglers: 391 if ignore_live_threads:

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb) 683 value = tp() 684 if value.traceback is not tb: --> 685 raise value.with_traceback(tb) 686 raise value 687

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py in _run(self, sess, enqueue_op, feed_fn, coord) 92 try: 93 feed_dict = None if feed_fn is None else feed_fn() ---> 94 sess.run(enqueue_op, feed_dict=feed_dict) 95 except (errors.OutOfRangeError, errors.CancelledError): 96 # This exception indicates that a queue was closed.

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata) 898 try: 899 result = self._run(None, fetches, feed_dict, options_ptr, --> 900 run_metadata_ptr) 901 if run_metadata: 902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor): 1134 results = self._do_run(handle, final_targets, final_fetches, -> 1135 feed_dict_tensor, options, run_metadata) 1136 else: 1137 results = []

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1314 if handle is None: 1315 return self._do_call(_run_fn, feeds, fetches, targets, options, -> 1316 run_metadata) 1317 else: 1318 return self._do_call(_prun_fn, handle, feeds, fetches)

/home/yinan.li1/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1333 except KeyError: 1334 pass -> 1335 raise type(e)(node_def, op, message) 1336 1337 def _extend_graph(self):

InternalError: Unable to get element as bytes.

最佳答案

我认为你的情况的问题之一是 pandas 中的列类型实际上是对象而不是字符串。如果将其转换为单独的字符串列,您将消除此错误。请记住,基本的 TensorFlow tf.string 数据类型允许您构建字节字符串张量。并且当您在此列中存储对象而不是字符串时,您会收到错误。

下面的代码将克服您上面遇到的错误,但它不会完全解决您的问题。列表的可变长度必须通过填充或列表或类似的东西来处理,indicator_column 可能在处理缺失值时遇到问题。

X2= pd.DataFrame(X['x'].values.tolist(), columns=['x1','x2'])

feat1 = tf.feature_column.categorical_column_with_vocabulary_list(
            key="x1", vocabulary_list=["a", "b", "c"]
        )
feat2 = tf.feature_column.categorical_column_with_vocabulary_list(
            key="x2", vocabulary_list=["a", "b", "c"]
        )
indicator_features = [
    tf.feature_column.indicator_column(
        categorical_column=feat1
    ),tf.feature_column.indicator_column(
        categorical_column=feat2
    )
]

training_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=X2, y=Y, batch_size=64, shuffle=True, num_epochs=None
)

关于python - tensorflow 提要列表功能(多热)到 tf.estimator,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52938714/

相关文章:

python - 无法使用pip或anaconda安装tensorflow

machine-learning - Caffe 卷积层权重和尺寸

machine-learning - 如何使用 Keras 更改 RNN 中的隐藏状态激活大小?

python - Discord.py,如果它不存在,我该如何让机器人创建一个 channel ,但如果它存在,它就会继续执行它的任务?

Python - 使用 2 种定义类型处理 csv

Python 模数结果不同于 wolfram alpha?

python - 从 Python 中获取 Bash 脚本的好方法是什么?

python - GPU计算能力3.0,但最低要求的Cuda能力为3.5

tensorflow - tf.data group_by_window() 无需首先迭代完整的数据集

python - 如何访问 Tensorflow 中循环单元的权重?