How to use custom scoring and hamming dist

I’m trying to use some of these newer features and am running into issues. I am on 1.12 so should have them.

I create the data as follows via python

mapping = {
     "settings": {
    "index": {
      "knn": True,
      "knn.space_type": "l2"
    }},
  "mappings": {
    "properties": {
      "my_vector": {
        "type": "knn_vector",
        "dimension": 2
      },
      "color": {
        "type": "keyword"
      }
    }
  }
}
es.indices.create("my-knn-index-2", body=mapping)

actions = [{  "my_vector": [1, 1], "color" : "RED" },
{"my_vector": [2, 2], "color" : "RED" },
{"my_vector": [3, 3], "color" : "RED" },
{ "my_vector": [10, 10], "color" : "BLUE" },
{"my_vector": [20, 20], "color" : "BLUE" },
{ "my_vector": [30, 30], "color" : "BLUE" }
]

for (i, item) in enumerate(actions):
    es.index(index="my-knn-index-2", id=i, body=item)

This works for the traditional knn search as the query below gives results

es.search(index="my-knn-index-2",  body = {
    "size" : 10,
    "query": {
        "knn": {
            "my_vector": {
                "vector": [9,10],
                "k": 2
            }
        }
    }
})

However, if I try to do custom scoring, I get no results.

qry = {
  "query": {
    "script_score": {
      "query" : {
         'match_all' : {}
      },          
      "script": {
        "lang": "knn",
        "source": "knn_score",
        "params": {
          "field": "my_vector",
          "query_value": [2.9,3.1],
          "space_type": "l2"  
        }
      }
    }
  }
}
es.search(index="my-knn-index-2", body=qry)

What am I doing wrong here?

Similarly, having issues with trying the hamming distance in custom scoring, including the fact that I’m not sure with how to specify query_value as a binary.

Any help / documentation would be appreciated

Just wanted to provide an update since I can’t seem to update it. The above code now seems to work and here is a sample of how to use the hamming distance.

Create the index and ingest random data

import random
import codecs

index_name = "knn_binary"

mapping = {
    "settings": {
        "index": {
          "knn": True,
          "knn.space_type": "l2"
        }
     },
  "mappings": {
    "properties": {
      "my_vector": {
        "type": "binary",
          "doc_values": True
      },
      "color": {
        "type": "keyword"
      }
    }
  }
}

es.indices.delete(index=index_name, ignore=[400, 404])
es.indices.create(index_name, body=mapping)

for i in range(100):

    hx = "".join([random.choice("0123456789ABCDEF") for i in range(64)])
    hx_b64 = codecs.encode(codecs.decode(hx, 'hex'), 'base64').decode().strip("\n")
    item=  {"my_vector" : hx_b64, "color" : random.choice(["RED", "BLUE", "GREEN"])}
    es.index(index=index_name, id=i, body=item)

Get one entry to use:

 x = es.search(size=3, index=index_name)['hits']['hits'][0]['_source']['my_vector']

And then query:

qry = {
    "size" : 3,
  "query": {
    "script_score": {
      "query" : {
         'match_all' : {}
      },          
      "script": {
        "lang": "knn",
        "source": "knn_score",
        "params": {
          "field": "my_vector",
          "query_value": x,
          "space_type": "hammingbit"  
        }
      }
    }
  }
}
es.search(index=index_name, body=qry)

@gdd314596 apologies for the delay in responding. Yes, that code seems correct. We have not yet updated documentation for Hamming distance. We will do this ASAP.

Jack