Opensearch K-NN semantic search issue with Nested JSON data

Versions (relevant - OpenSearch/Dashboard/Server OS/Browser): 2.19.3

Describe the issue: Opensearch K-NN semantic search issue

Configuration: We are enabling K-NN plugin for sematic search with model and followed the steps given at URL- Pretrained models - OpenSearch Documentation

Model details-

"name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b"
  "version": "1.0.2"

Indexed data in JSON format

{
  "type": "ITEM",
  "containerName": "Employee",
  "identifier": "1091",
  "primaryKey": "1091",
  "displayName": "Dhiraj",
  "attributes": {
    "Employee Specification": {
      "Marital Status": "Married",
      "Unit": "Engineering",
      "Date Of Birth": "12-05-1997 00:00:00"
    }
  }
}

Ingest Pipeline defined like below:

{
  "description": "An NLP ingest pipeline with nested mapping",
  "processors": [
    {
      "text_embedding": {
        "model_id": "LcSefJoBrBYz4hPBfVXG",
        "field_map": {
      "attributes": {
        "Employee Specification": {
            "Unit": "unit_embedding",
            "Date Of Birth": "edob_item_embedding"
        },
        "Customer Catalog Primary Spec": {
            "Date Of Birth": "cdob_item_embedding"
        }
      }
        }
      }
    }
  ]
}

We are try to search with below query-

GET /my-nested-nlp-index1/_search
{
  "_source": {
    "excludes": [
      "unit_embedding",
      "edob_item_embedding",
      "cdob_item_embedding"
    ]
  },
  "query": {
    "neural": {
      "edob_item_embedding": {
        "query_text": "employees having birth year 1997",
        "model_id": "LcSefJoBrBYz4hPBfVXG",
        "k": 5
      }
    }
  }
}

Relevant Logs or Screenshots: Sematic search response does not have anything relevant data or 0 hits

{
  "took": 63,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  }
}

@bimlesh_singh I believe the issue is that embeddings are not at top level. Can you try the following ingest pipeline:

PUT _ingest/pipeline/my-nested-nlp-pipeline
{
  "description": "Flatten nested text fields and write embeddings to top-level knn_vector fields",
  "processors": [
    {
      "copy": {
        "source_field": "attributes.Employee Specification.Unit",
        "target_field": "unit_text",
        "ignore_missing": true
      }
    },
    {
      "copy": {
        "source_field": "attributes.Employee Specification.Date Of Birth",
        "target_field": "edob_text",
        "ignore_missing": true
      }
    },
    {
      "text_embedding": {
        "model_id": "<Model_ID>",
        "field_map": {
          "unit_text": "unit_embedding",
          "edob_text": "edob_item_embedding"
        }
      }
    }
  ]
}

Then create the index as follows:

PUT my-nested-nlp-index1
{
  "settings": {
    "index.knn": true,
    "default_pipeline": "my-nested-nlp-pipeline"
  },
  "mappings": {
    "properties": {
      "type":          { "type": "text" },
      "containerName": { "type": "text" },
      "identifier":    { "type": "keyword" },
      "primaryKey":    { "type": "keyword" },
      "displayName":   { "type": "text" },

      "attributes": {
        "properties": {
          "Employee Specification": {
            "properties": {
              "Marital Status": { "type": "keyword" },
              "Unit":           { "type": "text" },
              "Date Of Birth":  { "type": "text" }
            }
          }
        }
      },

      "unit_text": { "type": "text" },
      "edob_text": { "type": "text" },

      "unit_embedding": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "engine": "lucene",
          "space_type": "l2",
          "name": "hnsw",
          "parameters": {}
        }
      },
      "edob_item_embedding": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "engine": "lucene",
          "space_type": "l2",
          "name": "hnsw",
          "parameters": {}
        }
      }
    }
  }
}

Index the doc:

PUT my-nested-nlp-index1/_doc/1091
{
  "type": "ITEM",
  "containerName": "Employee",
  "identifier": "1091",
  "primaryKey": "1091",
  "displayName": "Dhiraj",
  "attributes": {
    "Employee Specification": {
      "Marital Status": "Married",
      "Unit": "Engineering",
      "Date Of Birth": "12-05-1997 00:00:00"
    }
  }
}

Then run the search:

GET my-nested-nlp-index1/_search
{
  "_source": {
    "excludes": [
      "unit_embedding",
      "edob_item_embedding"
    ]
  },
  "query": {
    "neural": {
      "unit_embedding": {
        "query_text": "Engineering",
        "model_id": "<model_ID>",
        "k": 5
      }
    }
  }
}