Versions (relevant - OpenSearch/Dashboard/Server OS/Browser):
- 2.15.0 (both OpenSearch and its Dashboards)
- Kubernetes 1.25.6
- Controlled by OpenSearch Operator (2.5.1)
Describe the issue:
PUT /_cluster/settings
{
"persistent": {
"plugins.ml_commons.allow_registering_model_via_url": true
}
}
POST /_plugins/_ml/models/_register
{
"name": "all-MiniLM-L6-v2",
"version": "1.0.0",
"description": "test model",
"model_format": "TORCH_SCRIPT",
"model_group_id": "03jj2ZABhH7d8NGgmFHv",
"model_content_hash_value": "c15f0d2e62d872be5b5bc6c84d2e0f4921541e29fefbef51d59cc10a8ae30e0f",
"model_config": {
"model_type": "bert",
"embedding_dimension": 384,
"framework_type": "sentence_transformers",
"all_config": "{\"_name_or_path\":\"nreimers/MiniLM-L6-H384-uncased\",\"architectures\":[\"BertModel\"],\"attention_probs_dropout_prob\":0.1,\"gradient_checkpointing\":false,\"hidden_act\":\"gelu\",\"hidden_dropout_prob\":0.1,\"hidden_size\":384,\"initializer_range\":0.02,\"intermediate_size\":1536,\"layer_norm_eps\":1e-12,\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6,\"pad_token_id\":0,\"position_embedding_type\":\"absolute\",\"transformers_version\":\"4.8.2\",\"type_vocab_size\":2,\"use_cache\":true,\"vocab_size\":30522}"
},
"url": "https://artifacts.opensearch.org/models/ml-models/huggingface/sentence-transformers/all-MiniLM-L6-v2/1.0.1/torch_script/sentence-transformers_all-MiniLM-L6-v2-1.0.1-torch_script.zip"
}
The response tells me that there is something wrong(probably by firewall or SSL) when your OpenSearch cluster tries to get ML model(.zip) from external host:
{
"task_type": "REGISTER_MODEL",
"function_name": "QUESTION_ANSWERING",
"state": "FAILED",
"worker_node": [
"qnOS9tufQXCgO8og8NUDbg"
],
"create_time": 1721644031460,
"last_update_time": 1721644034093,
"error": "unable to find valid certification path to requested target",
"is_async": true
}
I think this happends because of a firewall. Is there any way to ingest .zip itself into OpenSearch Cluster? (like tokenizer plugin)
(ex. Include ML models into Docker Image or mount Volumes using NFS)
The error logs are below:
[2024-07-23T01:01:54,052][INFO ][o.o.m.m.MLModelManager ] [test-opensearch-cluster-data-0] create new model meta doc _mkb3ZAB0bsFbRwv7vBw for register model task VXgb3ZABhH7d8NGg7ZSU
[2024-07-23T01:01:54,878][ERROR][o.o.m.m.MLModelManager ] [test-opensearch-cluster-data-0] Failed to index chunk file
java.security.PrivilegedActionException: null
at java.base/java.security.AccessController.doPrivileged(AccessController.java:575) ~[?:?]
at org.opensearch.ml.engine.ModelHelper.downloadAndSplit(ModelHelper.java:267) [opensearch-ml-algorithms-2.15.0.0.jar:?]
at org.opensearch.ml.model.MLModelManager.registerModel(MLModelManager.java:724) [opensearch-ml-2.15.0.0.jar:2.15.0.0]
at org.opensearch.ml.model.MLModelManager.lambda$registerModelFromUrl$31(MLModelManager.java:699) [opensearch-ml-2.15.0.0.jar:2.15.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.15.0.jar:2.15.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.15.0.jar:2.15.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:941) [opensearch-2.15.0.jar:2.15.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.15.0.jar:2.15.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?]
at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
Caused by: javax.net.ssl.SSLHandshakeException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62) ~[?:?]
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:502) ~[?:?]
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:486) ~[?:?]
at java.base/sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:2055) ~[?:?]
at java.base/sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:2050) ~[?:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) ~[?:?]
at java.base/sun.net.www.protocol.http.HttpURLConnection.getChainedException(HttpURLConnection.java:2049) ~[?:?]
at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1619) ~[?:?]
at java.base/sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1599) ~[?:?]
at java.base/sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:223) ~[?:?]
at ai.djl.training.util.DownloadUtils.download(DownloadUtils.java:78) ~[?:?]
at ai.djl.training.util.DownloadUtils.download(DownloadUtils.java:52) ~[?:?]
at ai.djl.training.util.DownloadUtils.download(DownloadUtils.java:52) ~[?:?]
at org.opensearch.ml.engine.ModelHelper.lambda$downloadAndSplit$3(ModelHelper.java:273) ~[?:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) ~[?:?]
... 10 more
Caused by: javax.net.ssl.SSLHandshakeException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at java.base/sun.security.ssl.Alert.createSSLException(Alert.java:130) ~[?:?]
...
I found some issues similar with mine, but it wasn't helpful.
- "error": "unable to find valid certification path to requested target", - #23 by smith
- Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
My OpenSearch Cluster is based on OpenSearch Operator, and the manifest (CRD) is like:
opensearchCluster:
enabled: true
general:
httpPort: "9200"
image: harbor-srep01.xxx.com/library/opensearchproject/opensearch:v2.15.0
serviceName: "test-opensearch-cluster"
drainDataNodes: true
# https://github.com/opensearch-project/opensearch-k8s-operator/blob/main/docs/userguide/main.md#security-context-for-pods-and-containers
setVMMaxMapCount: true # In some cases, set general.setVMMaxMapCount to false as this feature also launches an init container with root
podSecurityContext:
runAsUser: 1000
runAsGroup: 1000
securityContext:
allowPrivilegeEscalation: true
privileged: true
# https://github.com/opensearch-project/opensearch-k8s-operator/blob/main/docs/userguide/main.md#deal-with-max-virtual-memory-areas-vmmax_map_count-errors
# https://github.com/opensearch-project/opensearch-k8s-operator/blob/main/docs/userguide/main.md#custom-init-helper
initHelper:
image: "harbor-srep01.xxx.com/nexus/docker-mig/library/busybox:1.31.1"
imagePullPolicy: IfNotPresent
dashboards:
enable: true
replicas: 1
image: harbor-srep01.xxx.com/library/opensearchproject/opensearch-dashboards:v2.15.0
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "500m"
tls:
enable: false
opensearchCredentialsSecret:
name: admin-credentials-secret
additionalConfig:
# https://opensearch.org/docs/latest/install-and-configure/install-dashboards/tls/
opensearch.ssl.verificationMode: none
nodePools:
- component: master
replicas: 3
pdb:
enable: false
# enable: true
# minAvailable: 1
diskSize: "10Gi"
persistence:
pvc:
storageClass: "sc-nfs-app-retain"
accessModes:
- ReadWriteOnce
roles:
- "cluster_manager"
- "master"
# https://github.com/opensearch-project/opensearch-k8s-operator/issues/669#issuecomment-1829833573
# Suggestion: 1000m CPU & 2048Mi memory
resources:
requests:
memory: "4Gi"
cpu: "1"
limits:
memory: "4Gi"
cpu: "2"
env:
- name: OPENSEARCH_INITIAL_ADMIN_PASSWORD
value: "hcpOss12~!"
- component: data
replicas: 2
diskSize: "100Gi"
persistence:
pvc:
storageClass: "sc-nfs-app-retain"
accessModes:
- ReadWriteOnce
roles:
- "data"
- "ingest"
- "ml"
resources:
requests:
memory: "8Gi"
cpu: "2"
limits:
memory: "8Gi"
cpu: "4"
env:
- name: OPENSEARCH_INITIAL_ADMIN_PASSWORD
value: "hcpOss12~!"
security:
tls:
transport:
generate: true
perNode: true
# https://opensearch-project.github.io/opensearch-k8s-operator/docs/userguide/main.html#node-httprest-api
http:
generate: true
config:
adminCredentialsSecret: # these are the admin credentials for the Operator to use
name: admin-credentials-secret
securityConfigSecret: # this is the whole security configuration for OpenSearch
name: securityconfig-secret