Replication of k-NN indices doesn't work

OpenSearch v.2.6

Hello!
I have two clusters with OpenSearch - leader-cluster and follower-cluster. I have to replicated indices from leader-cluster to follower-cluster. I’ve decided to use CRR in OpenSearch.
So, firstly I sent PUT-request on follower-cluster:

{
  "persistent": {
    "cluster": {
      "remote": {
        "my-connection-alias": {
          "seeds": ["node_ip_1:9300"
                        , "node_ip_2:9300"
                        , "node_ip_3:9300"]
        }
      }
    }
  }
}

and get this answer:

{
    "acknowledged": true,
    "persistent": {
        "cluster": {
            "remote": {
                "my-connection-alias": {
                    "seeds": [
                        "node_ip_1:9300",
                        "node_ip_2:9300",
                        "node_ip_3:9300"
                    ]
                }
            }
        }
    },
    "transient": {}
}

After this step, according to the documentation for OpenSearch v.2.6 I start replication from the follower_cluster. I’ve already have kNN-indice in leader_cluster - leader_index_1_2023-03-15_m16_ef1024_night_osfe. Here is the configuration of it:


{
  "leader_index_1_2023-03-15_m16_ef1024_night_osfe": {
    "aliases": {
      "tmp_leader_index_1": {},
      "leader_index_1": {}
    },
    "mappings": {
      "_source": {
        "excludes": [
          "my_vector"
        ]
      },
      "properties": {
        "cam_id": {
          "type": "integer"
        },
        "detect_id": {
          "type": "long"
        },
        "my_vector": {
          "type": "knn_vector",
          "dimension": 512
        },
        "time_check": {
          "type": "date",
          "store": true,
          "format": "yyyy-MM-dd HH:mm:ss"
        }
      }
    },
    "settings": {
      "index": {
        "refresh_interval": "-1",
        "translog": {
          "flush_threshold_size": "10gb"
        },
        "knn.algo_param": {
          "ef_search": "1024",
          "ef_construction": "1024",
          "m": "16"
        },
        "provided_name": "leader_index_1_2023-03-15_m16_ef1024_night_osfe",
        "max_result_window": "1000000",
        "knn": "true",
        "creation_date": "1679240518895",
        "unassigned": {
          "node_left": {
            "delayed_timeout": "240m"
          }
        },
        "number_of_replicas": "1",
        "uuid": "nVuRER3OT-2ms85obGF63w",
        "version": {
          "created": "136277827"
        },
        "number_of_shards": "3",
        "knn.space_type": "cosinesimil"
      }
    }
  }
}

I send next PUT-request: http://node_ip_4:9200/_plugins/_replication/replicated_leader_index_1_2023-03-15_m16_ef1024_night_osfe/_start?pretty with this request_body:

{
   "leader_alias": "my-connection-alias",
   "leader_index": "leader_index_1_2023-03-15_m16_ef1024_night_osfe"
}

Here is no value use_roles, because the security plugin is disabled.
But it doesn’t work. I got next answer:

{
    "error": {
        "root_cause": [
            {
                "type": "illegal_argument_exception",
                "reason": "Cannot replicate k-NN index - leader_index_1_2023-03-15_m16_ef1024_night_osfe"
            }
        ],
        "type": "illegal_argument_exception",
        "reason": "Cannot replicate k-NN index - leader_index_1_2023-03-15_m16_ef1024_night_osfe"
    },
    "status": 400
}

Here is my cluster configuration for leader-cluster:


{
  "persistent": {
    "action": {
      "destructive_requires_name": "true"
    },
    "cluster": {
      "routing": {
        "rebalance": {
          "enable": "none"
        },
        "allocation": {
          "allow_rebalance": "indices_all_active",
          "cluster_concurrent_rebalance": "15",
          "node_concurrent_recoveries": "2",
          "disk": {
            "threshold_enabled": "true",
            "watermark": {
              "low": "85%",
              "high": "90%"
            }
          },
          "enable": "all",
          "node_concurrent_outgoing_recoveries": "2"
        }
      },
      "metadata": {
        "perf_analyzer": {
          "state": "0"
        }
      }
    },
    "knn": {
      "algo_param": {
        "index_thread_qty": "8"
      },
      "cache": {
        "item": {
          "expiry": {
            "enabled": "false",
            "minutes": "1m"
          }
        }
      },
      "circuit_breaker": {
        "triggered": "false"
      },
      "memory": {
        "circuit_breaker": {
          "limit": "80%",
          "enabled": "true"
        }
      }
    },
    "plugins": {
      "index_state_management": {
        "template_migration": {
          "control": "-1"
        }
      }
    }
  },
  "transient": {}
}

Here is my cluster configuration for follower-cluster:


{
  "persistent": {
    "action": {
      "destructive_requires_name": "true"
    },
    "cluster": {
      "remote": {
        "my-connection-alias": {
          "seeds": [
            "node_ip_1:9300",
            "node_ip_2:9300",
            "node_ip_3:9300"
          ]
        }
      },
      "routing": {
        "rebalance": {
          "enable": "none"
        },
        "allocation": {
          "allow_rebalance": "indices_all_active",
          "cluster_concurrent_rebalance": "15",
          "node_concurrent_recoveries": "2",
          "disk": {
            "threshold_enabled": "true",
            "watermark": {
              "low": "85%",
              "high": "90%"
            }
          },
          "enable": "all",
          "node_concurrent_outgoing_recoveries": "2"
        }
      },
      "metadata": {
        "perf_analyzer": {
          "state": "0"
        }
      }
    },
    "knn": {
      "algo_param": {
        "index_thread_qty": "8"
      },
      "cache": {
        "item": {
          "expiry": {
            "enabled": "false",
            "minutes": "1m"
          }
        }
      },
      "circuit_breaker": {
        "triggered": "false"
      },
      "memory": {
        "circuit_breaker": {
          "limit": "80%",
          "enabled": "true"
        }
      }
    },
    "plugins": {
      "index_state_management": {
        "template_migration": {
          "control": "-1"
        }
      }
    }
  },
  "transient": {}
}

Why kNN replicating doesn’t work?

@jmazane
If there is a possibility, take a look at the topic

@krishna_ggk can you help us debug if any issue from cross cluster side?

Do I understand correctly that according to this information https://docs.aws.amazon.com/opensearch-service/latest/developerguide/replication.html#:~:text=Cross%2Dcluster%20replication%20doesn%27t,the%20k%2DNN%20plugin, CRR cannot be used with kNN indexes?

That is right - we have an open github issue to be worked on - [FEATURE] Support CCR for k-NN enabled indices. · Issue #650 · opensearch-project/cross-cluster-replication · GitHub

Please +1 on it.

This topic was automatically closed 60 days after the last reply. New replies are no longer allowed.