Transform job accuracy

Versions (relevant - OpenSearch/Dashboard/Server OS/Browser):

{
  "name": "653e1208a3ec",
  "cluster_name": "opensearch",
  "cluster_uuid": "XsEC1-tETh-cCcAWdBgaOQ",
  "version": {
    "distribution": "opensearch",
    "number": "2.19.1",
    "build_type": "tar",
    "build_hash": "2e4741fb45d1b150aaeeadf66d41445b23ff5982",
    "build_date": "2025-02-27T01:16:47.726162386Z",
    "build_snapshot": false,
    "lucene_version": "9.12.1",
    "minimum_wire_compatibility_version": "7.10.0",
    "minimum_index_compatibility_version": "7.0.0"
  },
  "tagline": "The OpenSearch Project: https://opensearch.org/"
}

Describe the issue:
I have a base index called X. In the past I have made a transform job that does some aggregation on X, namely summarizing the number of documents for a certain term. I noticed after a while that the aggregated data is not accurate with the actual base index X (the doc counts didn’t align). First of all, how is this possible? (I checked the transform job and no more documents needed to be processed)

I then made another transform job, using the exact same settings as the previous one. After it was done processing the documents from the base index, I noticed that the metrics of both transform jobs don’t align at all.

Here are the transform job configurations:

{
  "_id": "transform-x",
  "_version": 8,
  "_seq_no": 7116661,
  "_primary_term": 32,
  "transform": {
    "transform_id": "transform-x",
    "schema_version": 17,
    "schedule": {
      "interval": {
        "start_time": 1715097728,
        "period": 1,
        "unit": "Minutes"
      }
    },
    "metadata_id": "6ZNeSM3gb0OIsXI-p3sEBQ",
    "updated_at": 1715104885671,
    "enabled": true,
    "enabled_at": 1715104885671,
    "description": "",
    "source_index": "X",
    "data_selection_query": {
      "match_all": {
        "boost": 1
      }
    },
    "target_index": "transform_X",
    "page_size": 10,
    "groups": [
      {
        "terms": {
          "source_field": "@charid",
          "target_field": "@charid_terms"
        }
      },
      {
        "terms": {
          "source_field": "@mobid",
          "target_field": "@mobid_terms"
        }
      }
    ],
    "aggregations": {
      "count_@mobid": {
        "value_count": {
          "field": "@mobid"
        }
      }
    },
    "continuous": true
  }
}

{
  "_id": "transform-y",
  "_version": 4,
  "_seq_no": 18382258,
  "_primary_term": 75,
  "transform": {
    "transform_id": "transform-y",
    "schema_version": 21,
    "schedule": {
      "interval": {
        "start_time": 1721312263671,
        "period": 1,
        "unit": "Minutes"
      }
    },
    "metadata_id": "HClXG3G0wgQMBppjcJfgQg",
    "updated_at": 1741993333230,
    "enabled": true,
    "enabled_at": 1741993333230,
    "description": "testing purposes",
    "source_index": "X",
    "data_selection_query": {
      "match_all": {
        "boost": 1
      }
    },
    "target_index": "transform_X2",
    "page_size": 10,
    "groups": [
      {
        "terms": {
          "source_field": "@charid",
          "target_field": "@charid_terms"
        }
      },
      {
        "terms": {
          "source_field": "@mobid",
          "target_field": "@mobid_terms"
        }
      }
    ],
    "aggregations": {
      "count_@mobid": {
        "value_count": {
          "field": "@mobid"
        }
      }
    },
    "continuous": true
  }
}

And here are the metrics:

{
  "transform-x": {
    "metadata_id": "6ZNeSM3gb0OIsXI-p3sEBQ",
    "transform_metadata": {
      "transform_id": "transform-x",
      "last_updated_at": 1743152459339,
      "status": "started",
      "failure_reason": null,
      "stats": {
        "pages_processed": 667234,
        "documents_processed": 6498416982,
        "documents_indexed": 2317772,
        "index_time_in_millis": 34657427,
        "search_time_in_millis": 7209289
      },
      "continuous_stats": {
        "last_timestamp": 1743152457906,
        "documents_behind": {
          "X": 23
        }
      }
    }
  }
}

{
  "transform-y": {
    "metadata_id": "HClXG3G0wgQMBppjcJfgQg",
    "transform_metadata": {
      "transform_id": "transform-y",
      "last_updated_at": 1743152444904,
      "status": "started",
      "failure_reason": null,
      "stats": {
        "pages_processed": 457313,
        "documents_processed": 2932486986,
        "documents_indexed": 1709538,
        "index_time_in_millis": 35491862,
        "search_time_in_millis": 6177677
      },
      "continuous_stats": {
        "last_timestamp": 1743152443778,
        "documents_behind": {
          "X": 50
        }
      }
    }
  }
}

How is it possible that the aggregated data fails to align with the base index? And why are the two transforms not aligning in terms of processing stats?