Cancelled task with reason: heap usage exceeded

Versions (relevant - OpenSearch/Dashboard/Server OS/Browser): 2.13

Describe the issue:
Have an OpenSearch index named my_html with the following mapping:

{
  "my_html": {
    "mappings": {
      "properties": {
        "content": {
          "type": "text",
          "fields": {
            "ngram": {
              "type": "text",
              "analyzer": "default_my_ngram"
            }
          },
          "analyzer": "html_content"
        },
        "crawl_completed": {
          "type": "date",
          "format": "date_optional_time"
        },
        "index_name": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword"
            },
            "ngram": {
              "type": "text",
              "analyzer": "default_my_ngram"
            }
          },
          "analyzer": "my_normalized_keyword"
        },
        "minimized_byte_size": {
          "type": "integer"
        },
        "original_byte_size": {
          "type": "integer"
        },
        "url": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword"
            },
            "ngram": {
              "type": "text",
              "analyzer": "default_my_ngram"
            }
          },
          "analyzer": "my_normalized_keyword"
        },
        "url_submitted": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword"
            },
            "ngram": {
              "type": "text",
              "analyzer": "default_my_ngram"
            }
          },
          "analyzer": "my_normalized_keyword"
        },
        "url_submitted_hash": {
          "type": "keyword",
          "store": true
        }
      }
    }
  }
}

I am sending the following query to the cluster:

{
  "size": 10000,
  "_source": false,
  "stored_fields": "url_submitted_hash",
  "query": {
    "bool": {
      "filter": {
        "bool": {
        "should": [
          {
            "match_phrase": {
              "content.ngram": "Hello"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "World"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Goodbye"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Story"
            }
          }
        ],
        "minimum_should_match": 1
        }
      }
    }
  }
}

While I am getting some results, I am also getting the following failures in the response:

{
  "took": 3981,
  "timed_out": false,
  "_shards": {
    "total": 300,
    "successful": 298,
    "skipped": 0,
    "failed": 2,
    "failures": [
      {
        "shard": 0,
        "index": "my_html",
        "node": "AOz4jZjPTP6aCKoCzCLK4g",
        "reason": {
          "type": "rejected_execution_exception",
          "reason": "cancelled task with reason: heap usage exceeded [634mb >= 38.4mb]"
        }
      },
      {
        "shard": 0,
        "index": "my_html",
        "node": "AOz4jZjPTP6aCKoCzCLK4g",
        "reason": {
          "type": "rejected_execution_exception",
          "reason": "cancelled task with reason: heap usage exceeded [645.7mb >= 38.4mb]"
        }
      }
    ]
  }
. . .

This error appears randomly so the query isn’t apparently not always using the same amount of heap? Here is the jvm of that node:

"jvm": {
        "timestamp": 1725384222017,
        "uptime_in_millis": 1119230629,
        "mem": {
          "heap_used_in_bytes": 2353246688,
          "heap_used_percent": 28,
          "heap_committed_in_bytes": 8187281408,
          "heap_max_in_bytes": 8187281408,
          "non_heap_used_in_bytes": 508853184,
          "non_heap_committed_in_bytes": 516882432,
          "pools": {
            "young": {
              "used_in_bytes": 1707081728,
              "max_in_bytes": 0,
              "peak_used_in_bytes": 4907335680,
              "peak_max_in_bytes": 0,
              "last_gc_stats": {
                "used_in_bytes": 0,
                "max_in_bytes": 0,
                "usage_percent": -1
              }
            },
            "old": {
              "used_in_bytes": 633675264,
              "max_in_bytes": 8187281408,
              "peak_used_in_bytes": 6907823104,
              "peak_max_in_bytes": 8187281408,
              "last_gc_stats": {
                "used_in_bytes": 608272384,
                "max_in_bytes": 8187281408,
                "usage_percent": 7
              }
            },
            "survivor": {
              "used_in_bytes": 12489696,
              "max_in_bytes": 0,
              "peak_used_in_bytes": 616562688,
              "peak_max_in_bytes": 0,
              "last_gc_stats": {
                "used_in_bytes": 12489696,
                "max_in_bytes": 0,
                "usage_percent": -1
              }
            }
          }
        },
        "threads": {
          "count": 278,
          "peak_count": 278
        },
        "gc": {
          "collectors": {
            "young": {
              "collection_count": 12144,
              "collection_time_in_millis": 1008431
            },
            "old": {
              "collection_count": 0,
              "collection_time_in_millis": 0
            }
          }
        },
        "buffer_pools": {
          "mapped": {
            "count": 4716,
            "used_in_bytes": 58081248997,
            "total_capacity_in_bytes": 58081248997
          },
          "direct": {
            "count": 284,
            "used_in_bytes": 6439270,
            "total_capacity_in_bytes": 6439268
          },
          "mapped - 'non-volatile memory'": {
            "count": 0,
            "used_in_bytes": 0,
            "total_capacity_in_bytes": 0
          }
        },
        "classes": {
          "current_loaded_count": 54918,
          "total_loaded_count": 55469,
          "total_unloaded_count": 551
        }
      }

Based on this info, I wouldn’t expect to be getting this heap error based on the large jvm memory available to the data node. Any suggestions on where to look?

Configuration:

Relevant Logs or Screenshots:

Can you show me the results of the below command? :

curl -sS  "localhost:9200/_cat/nodes?h=heap*&v" -u {username}:{password}

Also, you can inspect logs from data nodes whether circuit breaking exception occured or not.

If it did so, you should increase jvm heap size along to logical memory size. (ex. 4gb of heap size needs at least 8gb of memory)

Thanks for the reply, this is what came up for localhost:9200/_cat/nodes?h=heap*&v:

heap.current heap.percent heap.max
     690.8mb           13      5gb
         3gb           61      5gb
       3.5gb           47    7.6gb
       1.6gb           21    7.6gb
     570.2mb            7    7.6gb
       4.1gb           54    7.6gb
       2.4gb           48      5gb
       4.6gb           61    7.6gb
       3.4gb           45    7.6gb

I will also say that the following query also leads to heap memory errors:

{
  "size": 10000,
  "_source": false,
  "query": {
    "bool": {
      "filter": {
        "bool": {
        "should": [
          {
            "match_phrase": {
              "content.ngram": "Hello"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "World"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Goodbye"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Story"
            }
          }
        ],
        "minimum_should_match": 1
        }
      }
    }
  }
}

However, the following query not not return errors albeit the _id values are not returned:

{
  "size": 10000,
  "_source": false,
  "stored_fields": "_none_",
  "query": {
    "bool": {
      "filter": {
        "bool": {
        "should": [
          {
            "match_phrase": {
              "content.ngram": "Hello"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "World"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Goodbye"
            }
          },
          {
            "match_phrase": {
              "content.ngram": "Story"
            }
          }
        ],
        "minimum_should_match": 1
        }
      }
    }
  }
}

Any suggestions from here? It really seems like my data nodes have more then enough heap size to handle this request. Do you have any suggestions?

Hi @paladinpaterson,

did you find any solution to this?