This topic provides examples of how to configure the feature generation file, fg.json, and the model configuration file, config.
Sample data
The following sample data includes fields with common data types:
ID features: user_id, item_id
Categorical features: gender, category
Numerical feature: age
Lookup feature: user__kv_category_click_1d
Multi-value feature: tags
Text feature: description
Behavior sequence feature: click_10_seq
Sample label: is_click
A lookup feature is a mechanism that retrieves feature values from an external, pre-calculated mapping table, such as a key-value (KV) store, cache, or database table. This allows the model to quickly query these pre-calculated values during training or prediction. This avoids the need for real-time computing or complex data processing logic. This method is common in scenarios such as recommendation systems and click-through rate (CTR) prediction. Examples include a user's historical behavior statistics, such as the number of clicks on a product category, or an item's popularity.
Field name | Sample data 1 | Sample data 2 | Sample data 3 |
request_id | 101 | 102 | 103 |
user_id | 1 | 2 | 3 |
item_id | 4 | 5 | 10 |
event_unix_time | 1672502400 | 1672502400 | 1672502400 |
is_click | 0 | 1 | 1 |
age | 25 | 30 | 22 |
gender | Male | Female | Female |
user__kv_category_click_1d | Electronics:10 Appliances:1 Accessories:2 | Electronics:1 Appliances:5 Accessories:1 | Electronics:1 Appliances:2 Accessories:11 |
category | Electronics | Appliances | Accessories |
tags | Tech Computer Portable | Home Appliance Refrigerated | Fashion Glasses Sunscreen |
description | Portable high-performance laptop | Large-capacity refrigerator | Fashionable sunglasses |
click_10_seq | item__item_id:4#item__category:Electronics#user__ts:21041;item__item_id:5#item__category:Appliances#user__ts:168139;item__item_id:10#item__category:Accessories#user__ts:168284 | ||
ds | 20230101 | 20230101 | 20230101 |
Feature generation configuration file fg.json
The following example shows a configured fg.json file.
{
"features": [
{
"feature_name": "user_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "user:user_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "item_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:item_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "age",
"feature_type": "raw_feature",
"value_type": "Double",
"expression": "user:age",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false
},
{
"feature_name": "gender",
"feature_type": "id_feature",
"value_type": "String",
"expression": "user:gender",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "user__kv_category_click_1d",
"feature_type": "lookup_feature",
"value_type": "Double",
"map": "user:user__kv_category_click_1d",
"key": "item:category",
"needDiscrete": false,
"needWeighting": false,
"needKey": false,
"default_value": "0",
"combiner": "mean",
"need_prefix": false
},
{
"feature_name": "category",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:category",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false
},
{
"feature_name": "tags",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:tags",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": true
},
{
"feature_name": "description",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:description",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": true
},
{
"sequence_name": "click_10_seq",
"sequence_column": "click_10_seq",
"sequence_length": 10,
"sequence_delim": ";",
"attribute_delim": "#",
"sequence_table": "item",
"sequence_pk": "user:click_10_seq",
"features": [
{
"feature_name": "item_id",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:item_id",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false,
"group": "click_10_seq_feature"
},
{
"feature_name": "category",
"feature_type": "id_feature",
"value_type": "String",
"expression": "item:category",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"is_multi": false,
"group": "click_10_seq_feature"
},
{
"feature_name": "ts",
"feature_type": "raw_feature",
"value_type": "Double",
"expression": "user:ts",
"default_value": "-1024",
"combiner": "mean",
"need_prefix": false,
"group": "click_10_seq_feature"
}
]
}
],
"reserves": [
"request_id",
"user_id",
"item_id",
"is_click"
]
}Model configuration file config
The following example shows a configured model configuration file, config. It includes combo features and expression features. For more information, see easy_rec.
train_config {
optimizer_config {
use_moving_average: false
adam_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.001
decay_steps: 1
decay_factor: 0.5
min_learning_rate: 1e-06
}
}
}
}
num_steps: 1
sync_replicas: true
save_summary_steps: 100
log_step_count_steps: 100
}
eval_config {
metrics_set {
auc {
}
}
}
data_config {
batch_size: 1024
label_fields: "is_click"
shuffle: false
num_epochs: 10000
input_type: OdpsRTPInput
separator: ""
selected_cols: "is_click,features"
input_fields {
input_name: "is_click"
input_type: INT32
default_val: "0"
}
input_fields {
input_name: "user_id"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "item_id"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "age"
input_type: DOUBLE
default_val: "-1024"
}
input_fields {
input_name: "gender"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "user__kv_category_click_1d"
input_type: DOUBLE
default_val: "0"
}
input_fields {
input_name: "category"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "tags"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "description"
input_type: STRING
default_val: "-1024"
}
input_fields {
input_name: "click_10_seq__item_id"
input_type: STRING
}
input_fields {
input_name: "click_10_seq__category"
input_type: STRING
}
input_fields {
input_name: "click_10_seq__ts"
input_type: STRING
}
pai_worker_queue: true
}
feature_configs {
input_names: "user_id"
feature_type: IdFeature
embedding_dim: 8
hash_bucket_size: 48000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "item_id"
feature_type: IdFeature
embedding_dim: 8
hash_bucket_size: 27000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "age"
feature_type: RawFeature
embedding_dim: 4
separator: ""
boundaries: 1e-08
boundaries: 10
boundaries: 20
boundaries: 30
boundaries: 40
boundaries: 50
boundaries: 60
}
feature_configs {
input_names: "gender"
feature_type: IdFeature
embedding_dim: 4
hash_bucket_size: 10
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "user__kv_category_click_1d"
feature_type: RawFeature
embedding_dim: 4
separator: ""
boundaries: 1e-08
boundaries: 1.0
boundaries: 2.0
boundaries: 3.0
boundaries: 4.0
boundaries: 5.0
boundaries: 6.0
}
feature_configs {
input_names: "category"
feature_type: IdFeature
embedding_dim: 4
hash_bucket_size: 100
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "tags"
feature_type: TagFeature
embedding_dim: 4
hash_bucket_size: 1000
separator: ""
combiner: "mean"
}
feature_configs {
input_names: "description"
feature_type: SequenceFeature
embedding_dim: 4
hash_bucket_size: 10
separator: ""
sequence_combiner {
text_cnn {
filter_sizes: 2
filter_sizes: 3
filter_sizes: 4
num_filters: 16
num_filters: 8
num_filters: 8
}
}
}
feature_configs {
input_names: "click_10_seq__item_id"
feature_type: SequenceFeature
embedding_dim: 8
hash_bucket_size: 27000
separator: ";"
combiner: "mean"
sub_feature_type: IdFeature
}
feature_configs {
input_names: "click_10_seq__category"
feature_type: SequenceFeature
embedding_dim: 4
hash_bucket_size: 10000
separator: ";"
combiner: "mean"
sub_feature_type: IdFeature
}
feature_configs {
input_names: "click_10_seq__ts"
feature_type: SequenceFeature
embedding_dim: 4
separator: ";"
sub_feature_type: RawFeature
}
feature_configs {
input_names: "click_10_seq__ts"
feature_type: SequenceFeature
embedding_dim: 4
separator: ";"
sub_feature_type: RawFeature
}
feature_configs {
input_names: ["age", "gender"]
feature_name: "combo_age_gender"
feature_type: ComboFeature
embedding_dim: 16
hash_bucket_size: 1000
}
feature_configs {
input_names: "age"
feature_name: "age_satisfy1"
feature_type: ExprFeature
expression: "age>=18"
}
model_config {
model_class: "MultiTower"
feature_groups {
group_name: "all"
feature_names: "user_id"
feature_names: "item_id"
feature_names: "age"
feature_names: "gender"
feature_names: "user__kv_category_click_1d"
feature_names: "category"
feature_names: "tags"
feature_names: "description"
feature_names: "combo_age_gender"
feature_names: "age_satisfy1"
wide_deep: DEEP
sequence_features {
group_name: "click_10_seq"
seq_att_map {
key: "item_id"
key: "category"
hist_seq: "click_10_seq__item_id"
hist_seq: "click_10_seq__category"
hist_seq: "click_10_seq__ts"
}
tf_summary: false
allow_key_search: false
allow_key_transform: true
}
}
embedding_regularization: 5e-06
multi_tower {
towers {
input: "all"
dnn {
hidden_units: 256
hidden_units: 128
}
}
final_dnn {
hidden_units: 64
hidden_units: 32
}
l2_regularization: 1e-06
}
}
export_config {
multi_placeholder: true
}