After you create a dataset in Intelligent Media Management (IMM), create a metadata index for files stored in Object Storage Service (OSS) or Drive and Photo Service. A metadata index structures key information about your media files -- titles, authors, keywords, creation dates, sizes, formats, and resolutions -- so you can search, filter, and manage them efficiently.
Prerequisites
Before you begin, make sure you have:
A dataset created in your IMM project. For more information, see Create a dataset
Indexing methods
IMM supports two indexing methods:
| Method | Scope | API operation | Best for |
|---|---|---|---|
| Automatic | All objects in an OSS bucket | CreateBinding | Full-bucket indexing with continuous incremental updates |
| Manual | Specified files in OSS or Drive and Photo Service | BatchIndexFileMeta / IndexFileMeta | Selective indexing of specific files |
Automatic indexing
Call CreateBinding to map a dataset to an OSS bucket. After the mapping is established, IMM performs a full scan of all existing objects in the bucket, extracts metadata, and creates a metadata index. It then continuously monitors the bucket for incremental data and indexes new objects automatically.
Metadata indexing incurs fees proportional to the number of objects scanned. For details, see Billing items. To try out metadata indexing, use a bucket with a small number of objects and choose a workflow template carefully to avoid unexpected charges.
Step 1: Create a binding
The following example maps the test-dataset dataset in the test-project project to the test-bucket bucket.
Request
{
"ProjectName": "test-project",
"URI": "oss://test-bucket",
"DatasetName": "test-dataset"
}Response
{
"Binding": {
"Phase": "",
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"State": "Ready",
"CreateTime": "2022-07-06T07:03:28.054762739+08:00",
"UpdateTime": "2022-07-06T07:03:28.054762739+08:00",
"URI": "oss://test-bucket"
},
"RequestId": "090D2AC5-8450-0AA8-A1B1-****"
}Complete sample code (IMM SDK for Python)
# -*- coding: utf-8 -*-
import os
from alibabacloud_imm20200930.client import Client as imm20200930Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_imm20200930 import models as imm_20200930_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
class Sample:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> imm20200930Client:
"""
Use your AccessKey ID and AccessKey secret to initialize the client.
@param access_key_id:
@param access_key_secret:
@return: Client
@throws Exception
"""
config = open_api_models.Config(
access_key_id=access_key_id,
access_key_secret=access_key_secret
)
# Specify the endpoint.
config.endpoint = f'imm.cn-beijing.aliyuncs.com'
return imm20200930Client(config)
@staticmethod
def main() -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
create_binding_request = imm_20200930_models.CreateBindingRequest(
# Specify the name of the IMM project.
project_name='test-project',
# Specify the name of the dataset.
dataset_name='test-dataset',
# Specify the URI of the bucket.
uri='oss://test-bucket'
)
runtime = util_models.RuntimeOptions()
try:
# Print the response of the API operation.
response = client.create_binding_with_options(create_binding_request, runtime)
print(response.body.to_map())
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
print(error)
if __name__ == '__main__':
Sample.main()Step 2 (optional): Check binding status
Call GetBinding to query the mapping status.
Request
{
"ProjectName": "test-project",
"URI": "oss://test-bucket",
"DatasetName": "test-dataset"
}Response
{
"Binding": {
"Phase": "IncrementalScanning",
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"State": "Running",
"CreateTime": "2022-07-06T07:04:05.105182822+08:00",
"UpdateTime": "2022-07-06T07:04:13.302084076+08:00",
"URI": "oss://test-bucket"
},
"RequestId": "B5A9F54B-6C54-03C9-B011-****"
}Key response fields:
| Field | Value | Meaning |
|---|---|---|
| Phase | IncrementalScanning | IMM has finished indexing all existing objects and is now scanning for incremental objects |
| State | Running | The mapping is being established |
Complete sample code (IMM SDK for Python 1.27.3)
# -*- coding: utf-8 -*-
import os
from alibabacloud_imm20200930.client import Client as imm20200930Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_imm20200930 import models as imm_20200930_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
class Sample:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> imm20200930Client:
"""
Use your AccessKey ID and AccessKey secret to initialize the client.
@param access_key_id:
@param access_key_id:
@param access_key_secret:
@return: Client
@throws Exception
"""
config = open_api_models.Config(
access_key_id=access_key_id,
access_key_secret=access_key_secret
)
# Specify the endpoint.
config.endpoint = f'imm.cn-beijing.aliyuncs.com'
return imm20200930Client(config)
@staticmethod
def main() -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
get_binding_request = imm_20200930_models.GetBindingRequest(
# Specify the name of the IMM project.
project_name='test-project',
# Specify the name of the dataset.
dataset_name='test-dataset',
# Specify the URI of the bucket.
uri='oss://test-bucket'
)
runtime = util_models.RuntimeOptions()
try:
# Print the response of the API operation.
response = client.get_binding_with_options(get_binding_request, runtime)
print(response.body.to_map())
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
print(error)
if __name__ == '__main__':
Sample.main()Manual indexing
To index specific files in OSS or Drive and Photo Service, call BatchIndexFileMeta (multiple files) or IndexFileMeta (single file). Both operations support custom labels and Message Service (MNS) notifications.
Index multiple files (BatchIndexFileMeta)
The following example indexes oss://test-bucket/test-object1.jpg and oss://test-bucket/test-object2.jpg in the test-dataset dataset of the test-project project with custom labels.
Request
{
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"Files": [
{
"URI": "oss://test-bucket/test-object1.jpg",
"CustomLabels": {
"category": "Persons"
}
},
{
"URI": "oss://test-bucket/test-object2.jpg",
"CustomLabels": {
"category": "Pets"
}
}
],
"Notification": {
"MNS": {
"TopicName": "test-topic"
}
}
}Response
{
"RequestId": "0D4CB096-EB44-02D6-A4E9-****",
"EventId": "16C-1KoeYbdckkiOObpyzc****"
}MNS notification message
For more information about Message Service SDKs, see Step 4: Receive and delete the message.
{
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"RequestId": "658FFD57-B495-07C0-B24B-B64CC52993CB",
"StartTime": "2022-07-06T07:18:18.664770352+08:00",
"EndTime": "2022-07-06T07:18:20.762465221+08:00",
"Success": true,
"Message": "",
"Files": [
{
"URI": "oss://test-bucket/test-object1.jpg",
"CustomLabels": {
"category": "Persons"
},
"Error": ""
},
{
"URI": "oss://test-bucket/test-object2.jpg",
"CustomLabels": {
"category": "Pets"
},
"Error": ""
}
]
}Verify the result:
Success is
true: the metadata index is created.Error is empty for a given file: that file was indexed successfully.
Complete sample code (IMM SDK for Python)
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import sys
import os
from typing import List
from alibabacloud_imm20200930.client import Client as imm20200930Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_imm20200930 import models as imm_20200930_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
class Sample:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> imm20200930Client:
"""
Use your AccessKey ID and AccessKey secret to initialize the client.
@param access_key_id:
@param access_key_secret:
@return: Client
@throws Exception
"""
config = open_api_models.Config(
access_key_id=access_key_id,
access_key_secret=access_key_secret
)
# Specify the endpoint.
config.endpoint = f'imm.cn-beijing.aliyuncs.com'
return imm20200930Client(config)
@staticmethod
def main(
args: List[str],
) -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
notification_mns = imm_20200930_models.MNS(
topic_name='test-topic'
)
notification = imm_20200930_models.Notification(
mns=notification_mns
)
input_file_0custom_labels = {
'category': 'Persons'
}
input_file_0 = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object1.jpg',
custom_labels=input_file_0custom_labels
)
input_file_1custom_labels = {
'category': 'Pets'
}
input_file_1 = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object2.jpg',
custom_labels=input_file_1custom_labels
)
batch_index_file_meta_request = imm_20200930_models.BatchIndexFileMetaRequest(
project_name='test-project',
dataset_name='test-dataset',
files=[
input_file_0,
input_file_1
],
notification=notification
)
runtime = util_models.RuntimeOptions()
try:
# Write your code to print the response of the API operation if necessary.
client.batch_index_file_meta_with_options(batch_index_file_meta_request, runtime)
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
@staticmethod
async def main_async(
args: List[str],
) -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
notification_mns = imm_20200930_models.MNS(
topic_name='test-topic'
)
notification = imm_20200930_models.Notification(
mns=notification_mns
)
input_file_0custom_labels = {
'category': 'Persons'
}
input_file_0 = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object1.jpg',
custom_labels=input_file_0custom_labels
)
input_file_1custom_labels = {
'category': 'Pets'
}
input_file_1 = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object2.jpg',
custom_labels=input_file_1custom_labels
)
batch_index_file_meta_request = imm_20200930_models.BatchIndexFileMetaRequest(
project_name='test-project',
dataset_name='test-dataset',
files=[
input_file_0,
input_file_1
],
notification=notification
)
runtime = util_models.RuntimeOptions()
try:
# Write your code to print the response of the API operation if necessary.
await client.batch_index_file_meta_with_options_async(batch_index_file_meta_request, runtime)
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
if __name__ == '__main__':
Sample.main(sys.argv[1:])Index a single file (IndexFileMeta)
The following example indexes oss://test-bucket/test-object1.jpg in the test-dataset dataset of the test-project project.
Request
{
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"File": {
"URI": "oss://test-bucket/test-object1.jpg",
"CustomLabels": {
"category": "Persons"
}
},
"Notification": {
"MNS": {
"TopicName": "test-topic"
}
}
}Response
{
"RequestId": "5AA694AD-3D10-0B6A-85B2-****",
"EventId": "17C-1Kofq1mlJxRYF7vAGF****"
}MNS notification message
For more information about Message Service SDKs, see Step 4: Receive and delete the message.
{
"ProjectName": "test-project",
"DatasetName": "test-dataset",
"RequestId": "658FFD57-B495-07C0-B24B-B64CC52993CB",
"StartTime": "2022-07-06T07:18:18.664770352+08:00",
"EndTime": "2022-07-06T07:18:20.762465221+08:00",
"Success": true,
"Message": "",
"Files": [
{
"URI": "oss://test-bucket/test-object1.jpg",
"CustomLabels": {
"category": "Persons"
},
"Error": ""
}
]
}Verify the result:
Success is
true: the metadata index is created.Error is empty for a given file: that file was indexed successfully.
Complete sample code (IMM SDK for Python)
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import sys
import os
from typing import List
from alibabacloud_imm20200930.client import Client as imm20200930Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_imm20200930 import models as imm_20200930_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
class Sample:
def __init__(self):
pass
@staticmethod
def create_client(
access_key_id: str,
access_key_secret: str,
) -> imm20200930Client:
"""
Use your AccessKey ID and AccessKey secret to initialize the client.
@param access_key_id:
@param access_key_secret:
@return: Client
@throws Exception
"""
config = open_api_models.Config(
access_key_id=access_key_id,
access_key_secret=access_key_secret
)
# Specify the endpoint.
config.endpoint = f'imm.cn-beijing.aliyuncs.com'
return imm20200930Client(config)
@staticmethod
def main(
args: List[str],
) -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
notification_mns = imm_20200930_models.MNS(
topic_name='test-topic'
)
notification = imm_20200930_models.Notification(
mns=notification_mns
)
input_file_custom_labels = {
'category': 'Persons'
}
input_file = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object1.jpg',
custom_labels=input_file_custom_labels
)
index_file_meta_request = imm_20200930_models.IndexFileMetaRequest(
project_name='test-project',
dataset_name='test-dataset',
file=input_file,
notification=notification
)
runtime = util_models.RuntimeOptions()
try:
# Write your code to print the response of the API operation if necessary.
client.index_file_meta_with_options(index_file_meta_request, runtime)
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
@staticmethod
async def main_async(
args: List[str],
) -> None:
# The AccessKey pair of an Alibaba Cloud account has permissions on all API operations. To prevent security risks, we recommend that you call API operations or perform routine O&M as a RAM user.
# We recommend that you do not include your AccessKey pair (AccessKey ID and AccessKey secret) in your project code. Otherwise, the AccessKey pair may be leaked and the security of all resources within your account may be compromised.
# In this example, the AccessKey pair is read from the environment variables to implement identity verification for API access. For information about how to configure environment variables, visit https://www.alibabacloud.com/help/document_detail/2361894.html.
imm_access_key_id = os.getenv("AccessKeyId")
imm_access_key_secret = os.getenv("AccessKeySecret")
client = Sample.create_client(imm_access_key_id, imm_access_key_secret)
notification_mns = imm_20200930_models.MNS(
topic_name='test-topic'
)
notification = imm_20200930_models.Notification(
mns=notification_mns
)
input_file_custom_labels = {
'category': 'Persons'
}
input_file = imm_20200930_models.InputFile(
uri='oss://test-bucket/test-object1.jpg',
custom_labels=input_file_custom_labels
)
index_file_meta_request = imm_20200930_models.IndexFileMetaRequest(
project_name='test-project',
dataset_name='test-dataset',
file=input_file,
notification=notification
)
runtime = util_models.RuntimeOptions()
try:
# Write your code to print the response of the API operation if necessary.
await client.index_file_meta_with_options_async(index_file_meta_request, runtime)
except Exception as error:
# Print the error message if necessary.
UtilClient.assert_as_string(error.message)
if __name__ == '__main__':
Sample.main(sys.argv[1:])