I am new to OpenSearch and want to use for semantic search. I would greatly appreciate it if you could provide any help.
Versions (relevant - OpenSearch/Dashboard/Server OS/Browser):
'number': '2.7.0',
Describe the issue:
I am getting some errors while indexing documents. I am following the steps mentioned in notebook semantic-search-with-amazon-opensearch/Module 6 - Neural Search.ipynb at main · aws-samples/semantic-search-with-amazon-opensearch · GitHub
Some code snippets :
load_model_output = ml_client.load_model(model_id)
print(load_model_output)
task_id = load_model_output['task_id']
# {'task_id': 'WPFnC4gBg1EhCll66Vmu', 'status': 'CREATED'}
task_info = ml_client.get_task_info(task_id)
print(task_info)
# {'model_id': 'U_FmC4gBg1EhCll64Fla', 'task_type': 'DEPLOY_MODEL', 'function_name': 'TEXT_EMBEDDING', 'state': 'COMPLETED', 'worker_node': ['WPLriH_GQLmuiiFmBsQ-yA', 'G1R97vvoQNK3xSj-zuaq1A'], 'create_time': 1683818539438, 'last_update_time': 1683818539548, 'is_async': True}
model_info = ml_client.get_model_info(model_id)
print(model_info)
# {'name': 'all-MiniLM-L6-v2', 'algorithm': 'TEXT_EMBEDDING', 'model_version': '1.1.0', 'model_format': 'TORCH_SCRIPT', 'model_state': 'DEPLOYED', 'model_content_hash_value': '9376c2ebd7c83f99ec2526323786c348d2382e6d86576f750c89ea544d6bbb14', 'model_config': {'model_type': 'bert', 'embedding_dimension': 384, 'framework_type': 'SENTENCE_TRANSFORMERS'}, 'created_time': 1683818471514, 'last_updated_time': 1683818539491, 'last_deployed_time': 1683818539491, 'total_chunks': 9, 'planning_worker_node_count': 2, 'current_worker_node_count': 2, 'planning_worker_nodes': ['WPLriH_GQLmuiiFmBsQ-yA', 'G1R97vvoQNK3xSj-zuaq1A'], 'deploy_to_all_nodes': True}
pipeline={
"description": "An example neural search pipeline",
"processors" : [
{
"text_embedding": {
"model_id": model_id,
"field_map": {
"title_summary": "title_summary_embedding",
}
}
}
]
}
pipeline_id = 'nlp_pipeline'
aos_client.ingest.put_pipeline(id=pipeline_id,body=pipeline)
aos_client.ingest.get_pipeline(id=pipeline_id)
# {'nlp_pipeline': {'description': 'An example neural search pipeline', 'processors': [{'text_embedding': {'model_id': 'U_FmC4gBg1EhCll64Fla', 'field_map': {'title_summary': 'title_summary_embedding'}}}]}}
knn_index = {
"settings": {
"index.knn": True,
"index.knn.space_type": "cosinesimil",
"default_pipeline": pipeline_id,
"analysis": {
"analyzer": {"default": {"type": "standard", "stopwords": "_english_"}}
},
},
"mappings": {
"properties": {
"title_summary_embedding": {
"type": "knn_vector",
"dimension": 384,
"method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},
"store": True,
},
"title_summary": {"type": "text", "store": True}
}
},
}
aos_client.indices.create(index="proposals-index-nlp",body=knn_index,ignore=400)
# {'acknowledged': True, 'shards_acknowledged': True, 'index': 'proposals-index-nlp'}
for i, row in df.iterrows():
body = {
"title_summary": row["title_summary"]
}
try:
aos_client.index(index="proposals-index-nlp", body=body)
except Exception as e:
print(e)
Errors
TransportError(500, 'm_l_exception', 'Failed to inference TEXT_EMBEDDING model: U_FmC4gBg1EhCll64Fla')
TransportError(500, 'm_l_exception', 'm_l_exception: model not deployed.')
TransportError(500, 'm_l_exception', 'Failed to inference TEXT_EMBEDDING model: U_FmC4gBg1EhCll64Fla')
TransportError(500, 'm_l_exception', 'm_l_exception: model not deployed.')
TransportError(500, 'm_l_exception', 'Failed to inference TEXT_EMBEDDING model: U_FmC4gBg1EhCll64Fla')
TransportError(500, 'm_l_exception', 'm_l_exception: model not deployed.')
...
Configuration:
Running OpenSearch cluster with docker:
version: '3'
services:
opensearch-node1: # This is also the hostname of the container within the Docker network (i.e. https://opensearch-node1/)
image: opensearchproject/opensearch:latest # Specifying the latest available image - modify if you want a specific version
container_name: opensearch-node1
environment:
- cluster.name=opensearch-cluster # Name the cluster
- node.name=opensearch-node1 # Name the node that will run in this container
- discovery.seed_hosts=opensearch-node1,opensearch-node2 # Nodes to look for when discovering the cluster
- cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2 # Nodes eligible to serve as cluster manager
- bootstrap.memory_lock=true # Disable JVM heap memory swapping
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM
ulimits:
memlock:
soft: -1 # Set memlock to unlimited (no soft or hard limit)
hard: -1
nofile:
soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536
hard: 65536
volumes:
- opensearch-data1:/usr/share/opensearch/data # Creates volume called opensearch-data1 and mounts it to the container
ports:
- 9200:9200 # REST API
- 9600:9600 # Performance Analyzer
networks:
- opensearch-net # All of the containers will join the same Docker bridge network
opensearch-node2:
image: opensearchproject/opensearch:latest # This should be the same image used for opensearch-node1 to avoid issues
container_name: opensearch-node2
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch-node2
- discovery.seed_hosts=opensearch-node1,opensearch-node2
- cluster.initial_cluster_manager_nodes=opensearch-node1,opensearch-node2
- bootstrap.memory_lock=true
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
volumes:
- opensearch-data2:/usr/share/opensearch/data
networks:
- opensearch-net
opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
container_name: opensearch-dashboards
ports:
- 5601:5601 # Map host port 5601 to container port 5601
expose:
- "5601" # Expose port 5601 for web access to OpenSearch Dashboards
environment:
OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
networks:
- opensearch-net
volumes:
opensearch-data1:
opensearch-data2:
networks:
opensearch-net:
Relevant Logs or Screenshots: