このトピックでは、特徴生成ファイル fg.json とモデル構成ファイル config の構成例について説明します。
サンプルデータ
以下のサンプルデータには、一般的なデータの型のフィールドが含まれています:
ID 特徴: user_id、item_id
カテゴリ特徴: gender、category
数値特徴: age
ルックアップ特徴: user__kv_category_click_1d
多値特徴: tags
テキスト特徴: description
行動シーケンス特徴: click_10_seq
サンプルラベル: is_click
ルックアップ特徴とは、キーバリュー (KV) ストア、キャッシュ、データベーステーブルなど、外部の事前に計算されたマッピングテーブルから特徴値を取得するメカニズムです。これにより、モデルはトレーニングや予測中にこれらの事前計算された値を迅速にクエリできます。これにより、リアルタイムコンピューティングや複雑なデータ処理ロジックが不要になります。このメソッドは、レコメンデーションシステムやクリック率 (CTR) 予測などのシナリオで一般的です。例としては、プロダクトカテゴリに対するユーザーの過去のクリック数などの行動統計や、アイテムの人気度などが挙げられます。
フィールド名 | サンプルデータ 1 | サンプルデータ 2 | サンプルデータ 3 |
request_id | 101 | 102 | 103 |
user_id | 1 | 2 | 3 |
item_id | 4 | 5 | 10 |
event_unix_time | 1672502400 | 1672502400 | 1672502400 |
is_click | 0 | 1 | 1 |
age | 25 | 30 | 22 |
gender | Male | Female | Female |
user__kv_category_click_1d | Electronics:10 Appliances:1 Accessories:2 | Electronics:1 Appliances:5 Accessories:1 | Electronics:1 Appliances:2 Accessories:11 |
category | Electronics | Appliances | Accessories |
tags | Tech Computer Portable | Home Appliance Refrigerated | Fashion Glasses Sunscreen |
description | Portable high-performance laptop | Large-capacity refrigerator | Fashionable sunglasses |
click_10_seq | item__item_id:4#item__category:Electronics#user__ts:21041;item__item_id:5#item__category:Appliances#user__ts:168139;item__item_id:10#item__category:Accessories#user__ts:168284 | ||
ds | 20230101 | 20230101 | 20230101 |
特徴生成構成ファイル fg.json
以下に、構成済みの fg.json ファイルの例を示します。
{
"features": [
{
"feature_name": "user_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "user:user_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "item_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:item_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "age",
"feature_type": "raw_feature",
"value_type": "Double",
"expression": "user:age",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false
},
{
"feature_name": "gender",
"feature_type": "id_feature",
"value_type": "String",
"expression": "user:gender",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "user__kv_category_click_1d",
"feature_type": "lookup_feature",
"value_type": "Double",
"map": "user:user__kv_category_click_1d",
"key": "item:category",
"needDiscrete": false,
"needWeighting": false,
"needKey": false,
"default_value": "0",
"combiner": "mean",
"need_prefix": false
},
{
"feature_name": "category",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:category",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "tags",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:tags",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": true
},
{
"feature_name": "description",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:description",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": true
},
{
"sequence_name": "click_10_seq",
"sequence_column": "click_10_seq",
"sequence_length": 10,
"sequence_delim": ";",
"attribute_delim": "#",
"sequence_table": "item",
"sequence_pk": "user:click_10_seq",
"features": [
{
"feature_name": "item_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:item_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false,
"group": "click_10_seq_feature"
},
{
"feature_name": "category",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:category",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false,
"group": "click_10_seq_feature"
},
{
"feature_name": "ts",
"feature_type": "raw_feature",
"value_type": "Double",
"expression": "user:ts",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"group": "click_10_seq_feature"
}
]
}
],
"reserves": [
"request_id",
"user_id",
"item_id",
"is_click"
]
}モデル構成ファイル config
以下に、構成済みのモデル構成ファイル config の例を示します。この例には、コンボ特徴と式特徴が含まれています。詳細については、easy_rec をご参照ください。
train_config {
optimizer_config {
use_moving_average: false
adam_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.001
decay_steps: 1
decay_factor: 0.5
min_learning_rate: 1e-06
}
}
}
}
num_steps: 1
sync_replicas: true
save_summary_steps: 100
log_step_count_steps: 100
}
eval_config {
metrics_set {
auc {
}
}
}
data_config {
batch_size: 1024
label_fields: "is_click"
shuffle: false
num_epochs: 10000
input_type: OdpsRTPInput
separator: ""
selected_cols: "is_click,features"
input_fields {
input_name: "is_click"
input_type: INT32
default_val: "0"
}
input_fields {
input_name: "user_id"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "item_id"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "age"
input_type: DOUBLE
default_val: "-1024"
}
input_fields {
input_name: "gender"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "user__kv_category_click_1d"
input_type: DOUBLE
default_val: "0"
}
input_fields {
input_name: "category"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "tags"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "description"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "click_10_seq__item_id"
input_type: STRING
}
input_fields {
input_name: "click_10_seq__category"
input_type: STRING
}
input_fields {
input_name: "click_10_seq__ts"
input_type: STRING
}
pai_worker_queue: true
}
feature_configs {
input_names: "user_id"
feature_type: IdFeature
embedding_dim: 8
hash_bucket_size: 48000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "item_id"
feature_type: IdFeature
embedding_dim: 8
hash_bucket_size: 27000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "age"
feature_type: RawFeature
embedding_dim: 4
separator: ""
boundaries: 1e-08
boundaries: 10
boundaries: 20
boundaries: 30
boundaries: 40
boundaries: 50
boundaries: 60
}
feature_configs {
input_names: "gender"
feature_type: IdFeature
embedding_dim: 4
hash_bucket_size: 10
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "user__kv_category_click_1d"
feature_type: RawFeature
embedding_dim: 4
separator: ""
boundaries: 1e-08
boundaries: 1.0
boundaries: 2.0
boundaries: 3.0
boundaries: 4.0
boundaries: 5.0
boundaries: 6.0
}
feature_configs {
input_names: "category"
feature_type: IdFeature
embedding_dim: 4
hash_bucket_size: 100
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "tags"
feature_type: TagFeature
embedding_dim: 4
hash_bucket_size: 1000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "description"
feature_type: SequenceFeature
embedding_dim: 4
hash_bucket_size: 10
separator: ""
sequence_combiner {
text_cnn {
filter_sizes: 2
filter_sizes: 3
filter_sizes: 4
num_filters: 16
num_filters: 8
num_filters: 8
}
}
}
feature_configs {
input_names: "click_10_seq__item_id"
feature_type: SequenceFeature
embedding_dim: 8
hash_bucket_size: 27000
separator: ";"
combiner: "mean"
sub_feature_type: IdFeature
}
feature_configs {
input_names: "click_10_seq__category"
feature_type: SequenceFeature
embedding_dim: 4
hash_bucket_size: 10000
separator: ";"
combiner: "mean"
sub_feature_type: IdFeature
}
feature_configs {
input_names: "click_10_seq__ts"
feature_type: SequenceFeature
embedding_dim: 4
separator: ";"
sub_feature_type: RawFeature
}
feature_configs {
input_names: "click_10_seq__ts"
feature_type: SequenceFeature
embedding_dim: 4
separator: ";"
sub_feature_type: RawFeature
}
feature_configs {
input_names: ["age", "gender"]
feature_name: "combo_age_gender"
feature_type: ComboFeature
embedding_dim: 16
hash_bucket_size: 1000
}
feature_configs {
input_names: "age"
feature_name: "age_satisfy1"
feature_type: ExprFeature
expression: "age>=18"
}
model_config {
model_class: "MultiTower"
feature_groups {
group_name: "all"
feature_names: "user_id"
feature_names: "item_id"
feature_names: "age"
feature_names: "gender"
feature_names: "user__kv_category_click_1d"
feature_names: "category"
feature_names: "tags"
feature_names: "description"
feature_names: "combo_age_gender"
feature_names: "age_satisfy1"
wide_deep: DEEP
sequence_features {
group_name: "click_10_seq"
seq_att_map {
key: "item_id"
key: "category"
hist_seq: "click_10_seq__item_id"
hist_seq: "click_10_seq__category"
hist_seq: "click_10_seq__ts"
}
tf_summary: false
allow_key_search: false
allow_key_transform: true
}
}
embedding_regularization: 5e-06
multi_tower {
towers {
input: "all"
dnn {
hidden_units: 256
hidden_units: 128
}
}
final_dnn {
hidden_units: 64
hidden_units: 32
}
l2_regularization: 1e-06
}
}
export_config {
multi_placeholder: true
}