diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake
index 6081c2d2e45..a953668d183 100644
--- a/cmake/Modules/SourceFiles.cmake
+++ b/cmake/Modules/SourceFiles.cmake
@@ -147,6 +147,10 @@ set(VALKEY_CLI_SRCS
     ${CMAKE_SOURCE_DIR}/src/strl.c
     ${CMAKE_SOURCE_DIR}/src/cli_commands.c)
 
+# Benchmark dataset module - used by benchmark binary and unit tests
+set(BENCHMARK_DATASET_SRCS
+    ${CMAKE_SOURCE_DIR}/src/valkey-benchmark-dataset.c)
+
 # valkey-benchmark
 set(VALKEY_BENCHMARK_SRCS
     ${CMAKE_SOURCE_DIR}/src/ae.c
@@ -155,6 +159,7 @@ set(VALKEY_BENCHMARK_SRCS
     ${CMAKE_SOURCE_DIR}/src/sha256.c
     ${CMAKE_SOURCE_DIR}/src/util.c
     ${CMAKE_SOURCE_DIR}/src/valkey-benchmark.c
+    ${BENCHMARK_DATASET_SRCS}
     ${CMAKE_SOURCE_DIR}/src/adlist.c
     ${CMAKE_SOURCE_DIR}/src/dict.c
     ${CMAKE_SOURCE_DIR}/src/zmalloc.c
diff --git a/docs/benchmark.md b/docs/benchmark.md
new file mode 100644
index 00000000000..be43cde71bb
--- /dev/null
+++ b/docs/benchmark.md
@@ -0,0 +1,276 @@
+# Valkey Benchmark
+
+Benchmark utility for measuring Valkey server performance.
+
+```bash
+valkey-benchmark [OPTIONS] [--] [COMMAND ARGS...]
+```
+
+## Connection Options
+
+| Option | Description |
+|--------|-------------|
+| `-h <hostname>` | Server hostname (default: 127.0.0.1) |
+| `-p <port>` | Server port (default: 6379) |
+| `-s <socket>` | Server socket (overrides host and port) |
+| `-u <uri>` | Server URI: `valkey://user:password@host:port/dbnum` |
+| `-a <password>` | Password for Valkey Auth |
+| `--user <username>` | Used to send ACL style 'AUTH username pass'. Needs `-a` |
+| `--dbnum <db>` | SELECT the specified db number (default: 0) |
+| `-3` | Start session in RESP3 protocol mode |
+
+## Performance Options
+
+| Option | Description |
+|--------|-------------|
+| `-c <clients>` | Number of parallel connections (default: 50) |
+| `-n <requests>` | Total number of requests (default: 100000) |
+| `--duration <seconds>` | Run benchmark for specified number of seconds (mutually exclusive with `-n`) |
+| `--warmup <seconds>` | Run benchmark for specified warmup period before recording data |
+| `-d <size>` | Data size of SET/GET value in bytes (default: 3) |
+| `-P <numreq>` | Pipeline requests (default: 1, no pipeline) |
+| `-k <boolean>` | Keep alive: 1=keep alive, 0=reconnect (default: 1) |
+| `--threads <num>` | Enable multi-thread mode |
+| `--rps <requests>` | Limit requests per second (default: 0, no limit) |
+
+## Test Selection
+
+| Option | Description |
+|--------|-------------|
+| `-t <tests>` | Comma-separated list of tests to run |
+| `-l` | Loop mode: run tests forever |
+| `-I` | Idle mode: open N idle connections and wait |
+
+Available tests: `ping`, `ping_inline`, `ping_mbulk`, `set`, `get`, `incr`, `lpush`, `rpush`, `lpop`, `rpop`, `sadd`, `hset`, `spop`, `zadd`, `zpopmin`, `lrange`, `lrange_100`, `lrange_300`, `lrange_500`, `lrange_600`, `mset`, `mget`, `xadd`, `function_load`, `fcall`
+
+## Output Options
+
+| Option | Description |
+|--------|-------------|
+| `-q` | Quiet mode: show only query/sec values |
+| `--csv` | Output in CSV format |
+| `--precision` | Number of decimal places in latency output (default: 0) |
+
+## Cluster Options
+
+| Option | Description |
+|--------|-------------|
+| `--cluster` | Enable cluster mode |
+| `--rfr <mode>` | Read from replicas: `no`/`yes`/`all` (default: `no`) |
+
+## Randomization Options
+
+| Option | Description |
+|--------|-------------|
+| `-r <keyspacelen>` | Use random keys in range [0, keyspacelen-1] |
+| `--sequential` | Use sequential numbers instead of random |
+| `--seed <num>` | Set random number generator seed |
+
+## Dataset Support
+
+| Option | Description |
+|--------|-------------|
+| `--dataset <file>` | Dataset file for field placeholder replacement |
+| `--maxdocs <num>` | Maximum number of documents to load from dataset (default: unlimited) |
+| `--xml-root-element <name>` | Root element name for XML dataset parsing (required for XML files) |
+
+### File Formats
+
+**CSV**
+```csv
+term,category
+anarchism,politics
+democracy,politics
+```
+Header row required, comma-delimited, field names become `__field:name__` placeholders.
+
+**TSV**  
+Tab-delimited with header row.
+
+**XML**
+```xml
+<page>
+    <title>Anarchism</title>
+    <id>12</id>
+    <revision>
+        <id>1317806107</id>
+        <text bytes="112881">Article content...</text>
+    </revision>
+</page>
+```
+Requires `--xml-root-element` parameter. Root element choice affects discovered fields - deeper elements include nested content.
+
+### Dataset Behavior
+
+- One row per command
+- Sequential iteration with wraparound  
+- Thread-safe atomic selection
+- Duplicate XML field names: first occurrence wins
+
+### Usage
+
+```bash
+# CSV dataset
+valkey-benchmark --dataset terms.csv \
+  -n 50000 FT.SEARCH myindex "__field:term__"
+
+# Wikipedia XML  
+valkey-benchmark --dataset wiki.xml --xml-root-element page \
+  -n 10000 HSET "doc:__rand_int__" title "__field:title__" body "__field:text__"
+```
+
+**Memory:** Large datasets may require GB-scale RAM.
+
+## Additional Options
+
+| Option | Description |
+|--------|-------------|
+| `--enable-tracking` | Send CLIENT TRACKING on |
+| `--num-functions <num>` | Functions in Lua lib (default: 10) |
+| `--num-keys-in-fcall <num>` | Keys for FCALL (default: 1) |
+| `--seed <num>` | RNG seed |
+| `-x` | Read last arg from STDIN |
+| `--mptcp` | Enable MPTCP |
+| `--help` | Show help |
+| `--version` | Show version |
+
+## Placeholder System
+
+### Random Placeholders
+
+| Placeholder | Behavior |
+|-------------|----------|
+| `__rand_int__` | Different random value per occurrence |
+| `__rand_1st__` | Same random value for all occurrences in command |
+| `__rand_2nd__` | Same random value for all occurrences in command |
+| ... | ... |
+| `__rand_9th__` | Same random value for all occurrences in command |
+
+Random values are 12-digit zero-padded numbers in range [0, keyspacelen-1].
+
+### Data Placeholders
+
+| Placeholder | Description |
+|-------------|-------------|
+| `__data__` | Random data of size specified by `-d` option |
+
+### Cluster Placeholders
+
+| Placeholder | Description |
+|-------------|-------------|
+| `{tag}` | Cluster slot hashtag for proper key distribution |
+
+Required in cluster mode to ensure commands route to correct nodes.
+
+## Command Sequences
+
+Commands can be chained using semicolon separators:
+
+```bash
+valkey-benchmark -- multi ';' set key:__rand_int__ __data__ ';' incr counter ';' exec
+```
+
+### Repetition Syntax
+
+Prefix commands with a number to repeat:
+
+```bash
+valkey-benchmark -- 5 set key:__rand_int__ value ';' get key:__rand_int__
+```
+
+This executes 5 SET commands followed by 1 GET command per pipeline iteration.
+
+
+## Examples
+
+### Basic Benchmarking
+
+```bash
+# Default benchmark suite
+valkey-benchmark
+
+# Specific tests
+valkey-benchmark -t ping,set,get -n 100000
+
+# Custom data size
+valkey-benchmark -t set -d 1024 -n 50000
+```
+
+### Random Key Distribution
+
+```bash
+# Random keys in range [0, 999999]
+valkey-benchmark -t set,get -r 1000000 -n 100000
+
+# Sequential keys
+valkey-benchmark -t set --sequential -r 1000000 -n 100000
+```
+
+### Dataset-Driven Benchmarking
+
+```bash
+# CSV dataset
+valkey-benchmark --dataset terms.csv \
+  -n 50000 FT.SEARCH myindex "__field:term__"
+
+# Wikipedia XML dataset (page-level)
+valkey-benchmark --dataset wiki_sample.xml --xml-root-element page \
+  -n 10000 HSET "doc:__rand_int__" title "__field:title__" content "__field:text__" id "__field:id__"
+
+# Wikipedia XML dataset (revision-level)  
+valkey-benchmark --dataset wiki_sample.xml --xml-root-element revision \
+  -n 10000 HSET "doc:__rand_int__" content "__field:text__" timestamp "__field:timestamp__"
+
+# Multiple field usage
+valkey-benchmark --dataset products.csv \
+  -- HSET product:__field:id__ name "__field:name__" price __field:price__
+```
+
+### Cluster Benchmarking
+
+```bash
+# Cluster mode with proper key distribution
+valkey-benchmark --cluster -t set,get \
+  -- SET key:{tag}:__rand_int__ __data__
+
+# Read from replicas
+valkey-benchmark --cluster --rfr yes -t get \
+  -- GET key:{tag}:__rand_int__
+```
+
+### Pipelining
+
+```bash
+# Pipeline 10 requests
+valkey-benchmark -P 10 -t set -n 100000
+
+# Pipeline with datasets
+valkey-benchmark --dataset terms.csv -P 5 \
+  -n 50000 FT.SEARCH index "__field:term__"
+```
+
+### Complex Command Sequences
+
+```bash
+# Transaction benchmark
+valkey-benchmark -r 100000 -n 10000 \
+  -- multi ';' set key:__rand_int__ __data__ ';' \
+     incr counter:__rand_int__ ';' exec
+
+# Mixed operations with repetition
+valkey-benchmark -r 100000 \
+  -- 3 set key:__rand_int__ __data__ ';' \
+     2 get key:__rand_int__ ';' \
+     del key:__rand_int__
+```
+
+### Rate Limiting
+
+```bash
+# Limit to 1000 requests/second
+valkey-benchmark --rps 1000 -t set -n 50000
+
+# Dataset with rate limiting
+valkey-benchmark --dataset search_terms.csv --rps 500 \
+  -n 10000 FT.SEARCH index "__field:term__"
+```
diff --git a/src/Makefile b/src/Makefile
index a0fe81f5b0c..b96dc89f1d4 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -31,7 +31,7 @@ endif
 ifneq ($(OPTIMIZATION),-O0)
 	OPTIMIZATION+=-fno-omit-frame-pointer
 endif
-DEPENDENCY_TARGETS=libvalkey linenoise hdr_histogram fpconv
+DEPENDENCY_TARGETS=libvalkey linenoise lua hdr_histogram fpconv
 NODEPS:=clean distclean
 
 # Default settings
@@ -608,6 +608,7 @@ ENGINE_BENCHMARK_OBJ = \
     strl.o \
     util.o \
     valkey-benchmark.o \
+    valkey-benchmark-dataset.o \
     zmalloc.o
 ENGINE_CHECK_RDB_NAME=$(ENGINE_NAME)-check-rdb$(PROG_SUFFIX)
 ENGINE_CHECK_AOF_NAME=$(ENGINE_NAME)-check-aof$(PROG_SUFFIX)
@@ -688,8 +689,8 @@ $(ENGINE_LIB_NAME): $(ENGINE_SERVER_OBJ)
 	$(SERVER_AR) rcs $@ $^
 
 # valkey-unit-tests
-$(ENGINE_UNIT_TESTS): $(ENGINE_TEST_OBJ) $(ENGINE_LIB_NAME)
-	$(SERVER_LD) -o $@ $^ ../deps/libvalkey/lib/libvalkey.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a $(FINAL_LIBS)
+$(ENGINE_UNIT_TESTS): $(ENGINE_TEST_OBJ) $(ENGINE_LIB_NAME) valkey-benchmark-dataset.o
+	$(SERVER_LD) -o $@ $^ ../deps/libvalkey/lib/libvalkey.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a $(FINAL_LIBS)
 
 # valkey-sentinel
 $(ENGINE_SENTINEL_NAME): $(SERVER_NAME)
diff --git a/src/unit/CMakeLists.txt b/src/unit/CMakeLists.txt
index 189c1cb1d9a..314da00e19b 100644
--- a/src/unit/CMakeLists.txt
+++ b/src/unit/CMakeLists.txt
@@ -23,7 +23,7 @@ add_library(valkeylib STATIC ${VALKEY_SERVER_SRCS})
 target_compile_options(valkeylib PRIVATE "${COMPILE_FLAGS}")
 target_compile_definitions(valkeylib PRIVATE "${COMPILE_DEFINITIONS}")
 
-add_executable(valkey-unit-tests ${UNIT_TEST_SRCS})
+add_executable(valkey-unit-tests ${UNIT_TEST_SRCS} ${BENCHMARK_DATASET_SRCS})
 target_compile_options(valkey-unit-tests PRIVATE "${COMPILE_FLAGS}")
 target_compile_definitions(valkey-unit-tests PRIVATE "${COMPILE_DEFINITIONS}")
 add_dependencies(valkey-unit-tests generate_test_files_h)
diff --git a/src/unit/test_dataset.c b/src/unit/test_dataset.c
new file mode 100644
index 00000000000..4893250ed04
--- /dev/null
+++ b/src/unit/test_dataset.c
@@ -0,0 +1,201 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "../valkey-benchmark-dataset.h"
+#include "../zmalloc.h"
+#include "../sds.h"
+#include "test_help.h"
+
+#define UNUSED(x) (void)(x)
+
+static char *create_test_file(const char *suffix, const char *content) {
+    static char filename[512];
+    snprintf(filename, sizeof(filename), "/tmp/test_dataset_%d_%s", getpid(), suffix);
+    FILE *f = fopen(filename, "w");
+    if (!f) return NULL;
+    if (content) fputs(content, f);
+    fclose(f);
+    return filename;
+}
+
+static void cleanup_test_file(const char *filename) {
+    if (filename) unlink(filename);
+}
+
+int test_field_mapping_optimization(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    const char *csv = "a,b,c,d,e,f,g,h\n1,2,3,4,5,6,7,8\n";
+    char *file = create_test_file("mapping.csv", csv);
+    TEST_ASSERT(file != NULL);
+
+    sds arg1 = sdsnew("HSET");
+    sds arg2 = sdsnew("key");
+    sds arg3 = sdsnew("b");
+    sds arg4 = sdsnew("__field:b__");
+    sds arg5 = sdsnew("f");
+    sds arg6 = sdsnew("__field:f__");
+    sds args[] = {arg1, arg2, arg3, arg4, arg5, arg6};
+    dataset *ds = datasetInit(file, NULL, 0, 1, args, 6);
+
+    TEST_ASSERT_MESSAGE("Dataset initialized", ds != NULL);
+    TEST_ASSERT_MESSAGE("Total fields discovered", ds->field_count == 8);
+    TEST_ASSERT_MESSAGE("Only used fields loaded", ds->used_field_count == 2);
+    TEST_ASSERT_MESSAGE("Correct values loaded", !strcmp(ds->records[0].fields[0], "2"));
+    TEST_ASSERT_MESSAGE("Correct values loaded", !strcmp(ds->records[0].fields[1], "6"));
+
+    datasetFree(ds);
+    sdsfree(arg1);
+    sdsfree(arg2);
+    sdsfree(arg3);
+    sdsfree(arg4);
+    sdsfree(arg5);
+    sdsfree(arg6);
+    cleanup_test_file(file);
+    return 0;
+}
+
+int test_csv_quoted_fields(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    const char *csv = "id,title,desc\n"
+                      "1,\"Book, Part 1\",\"Quote: \"\"Hello\"\"\"\n";
+    char *file = create_test_file("quoted.csv", csv);
+    TEST_ASSERT(file != NULL);
+
+    sds arg1 = sdsnew("SET");
+    sds arg2 = sdsnew("__field:title__");
+    sds args[] = {arg1, arg2};
+    dataset *ds = datasetInit(file, NULL, 0, 1, args, 2);
+
+    TEST_ASSERT_MESSAGE("Dataset initialized", ds != NULL);
+    TEST_ASSERT_MESSAGE("Quoted comma preserved", !strcmp(ds->records[0].fields[0], "Book, Part 1"));
+
+    datasetFree(ds);
+    sdsfree(arg1);
+    sdsfree(arg2);
+    cleanup_test_file(file);
+    return 0;
+}
+
+int test_field_count_correctness(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    const char *csv = "id,title,author\n1,Book,Alice\n";
+    char *file = create_test_file("fields.csv", csv);
+    TEST_ASSERT(file != NULL);
+
+    sds arg1 = sdsnew("GET");
+    sds arg2 = sdsnew("__field:title__");
+    sds args[] = {arg1, arg2};
+    dataset *ds = datasetInit(file, NULL, 0, 1, args, 2);
+
+    TEST_ASSERT_MESSAGE("Dataset initialized", ds != NULL);
+    TEST_ASSERT_MESSAGE("Correct field count", ds->field_count == 3);
+    TEST_ASSERT_MESSAGE("Correct record count", datasetGetRecordCount(ds) == 1);
+
+    datasetFree(ds);
+    sdsfree(arg1);
+    sdsfree(arg2);
+    cleanup_test_file(file);
+    return 0;
+}
+
+int test_excessive_field_name_length(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    /* Create a 513-character field name (over MAX_FIELD_NAME_LEN limit) */
+    char excessive_field[514];
+    memset(excessive_field, 'b', 513);
+    excessive_field[513] = '\0';
+
+    /* Build XML with the excessive field name */
+    sds xml = sdsempty();
+    xml = sdscat(xml, "<doc>\n");
+    xml = sdscat(xml, "  <title>ValidValue</title>\n");
+    xml = sdscat(xml, "  <");
+    xml = sdscat(xml, excessive_field);
+    xml = sdscat(xml, ">ExcessiveValue</");
+    xml = sdscat(xml, excessive_field);
+    xml = sdscat(xml, ">\n");
+    xml = sdscat(xml, "</doc>\n");
+
+    char *file = create_test_file("excessive_field.xml", xml);
+    sdsfree(xml);
+    TEST_ASSERT(file != NULL);
+
+    /* Initialize dataset - only the valid field should be loaded */
+    dataset *ds = datasetInit(file, "doc", 1, 0, NULL, 0);
+
+    TEST_ASSERT_MESSAGE("Dataset initialized", ds != NULL);
+    TEST_ASSERT_MESSAGE("Only valid field loaded (513-char field rejected)", ds->field_count == 1);
+    TEST_ASSERT_MESSAGE("Valid field is 'title'", !strcmp(ds->field_names[0], "title"));
+
+    datasetFree(ds);
+    cleanup_test_file(file);
+    return 0;
+}
+
+int test_max_dataset_fields(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    /* Build XML with exactly 1000 fields (MAX_DATASET_FIELDS limit) */
+    sds xml = sdsempty();
+    xml = sdscat(xml, "<doc>\n");
+    for (int i = 0; i < 1000; i++) {
+        xml = sdscatprintf(xml, "  <field%d>value%d</field%d>\n", i, i, i);
+    }
+    xml = sdscat(xml, "</doc>\n");
+
+    char *file = create_test_file("max_fields.xml", xml);
+    sdsfree(xml);
+    TEST_ASSERT(file != NULL);
+
+    dataset *ds = datasetInit(file, "doc", 1, 0, NULL, 0);
+
+    TEST_ASSERT_MESSAGE("Dataset initialized", ds != NULL);
+    TEST_ASSERT_MESSAGE("1000 fields accepted (at MAX_DATASET_FIELDS limit)", ds->field_count == 1000);
+    TEST_ASSERT_MESSAGE("First field correct", !strcmp(ds->field_names[0], "field0"));
+    TEST_ASSERT_MESSAGE("Last field correct", !strcmp(ds->field_names[999], "field999"));
+
+    datasetFree(ds);
+    cleanup_test_file(file);
+    return 0;
+}
+
+int test_excessive_dataset_fields(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    /* Build XML with 1001 fields (over MAX_DATASET_FIELDS limit) */
+    sds xml = sdsempty();
+    xml = sdscat(xml, "<doc>\n");
+    for (int i = 0; i < 1001; i++) {
+        xml = sdscatprintf(xml, "  <field%d>value%d</field%d>\n", i, i, i);
+    }
+    xml = sdscat(xml, "</doc>\n");
+
+    char *file = create_test_file("excessive_fields.xml", xml);
+    sdsfree(xml);
+    TEST_ASSERT(file != NULL);
+
+    /* Initialize dataset - should fail when exceeding MAX_DATASET_FIELDS */
+    dataset *ds = datasetInit(file, "doc", 1, 0, NULL, 0);
+
+    TEST_ASSERT_MESSAGE("Dataset initialization fails (1001 fields exceeds limit)", ds == NULL);
+
+    cleanup_test_file(file);
+    return 0;
+}
diff --git a/src/unit/test_files.h b/src/unit/test_files.h
index 30042dbf4e3..029f81ce480 100644
--- a/src/unit/test_files.h
+++ b/src/unit/test_files.h
@@ -9,6 +9,12 @@ typedef struct unitTest {
 int test_popcount(int argc, char **argv, int flags);
 int test_crc64(int argc, char **argv, int flags);
 int test_crc64combine(int argc, char **argv, int flags);
+int test_field_mapping_optimization(int argc, char **argv, int flags);
+int test_csv_quoted_fields(int argc, char **argv, int flags);
+int test_field_count_correctness(int argc, char **argv, int flags);
+int test_excessive_field_name_length(int argc, char **argv, int flags);
+int test_max_dataset_fields(int argc, char **argv, int flags);
+int test_excessive_dataset_fields(int argc, char **argv, int flags);
 int test_dictCreate(int argc, char **argv, int flags);
 int test_dictAdd16Keys(int argc, char **argv, int flags);
 int test_dictDisableResize(int argc, char **argv, int flags);
@@ -287,6 +293,7 @@ int test_zmallocAllocZeroByteAndFree(int argc, char **argv, int flags);
 unitTest __test_bitops_c[] = {{"test_popcount", test_popcount}, {NULL, NULL}};
 unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}};
 unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}};
+unitTest __test_dataset_c[] = {{"test_field_mapping_optimization", test_field_mapping_optimization}, {"test_csv_quoted_fields", test_csv_quoted_fields}, {"test_field_count_correctness", test_field_count_correctness}, {"test_excessive_field_name_length", test_excessive_field_name_length}, {"test_max_dataset_fields", test_max_dataset_fields}, {"test_excessive_dataset_fields", test_excessive_dataset_fields}, {NULL, NULL}};
 unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}};
 unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}};
 unitTest __test_entry_c[] = {{"test_entryCreate", test_entryCreate}, {"test_entryUpdate", test_entryUpdate}, {"test_entryHasexpiry_entrySetExpiry", test_entryHasexpiry_entrySetExpiry}, {"test_entryIsExpired", test_entryIsExpired}, {"test_entryMemUsage_entrySetExpiry_entryUpdate", test_entryMemUsage_entrySetExpiry_entryUpdate}, {"test_entryStringRef", test_entryStringRef}, {NULL, NULL}};
@@ -318,6 +325,7 @@ struct unitTestSuite {
     {"test_bitops.c", __test_bitops_c},
     {"test_crc64.c", __test_crc64_c},
     {"test_crc64combine.c", __test_crc64combine_c},
+    {"test_dataset.c", __test_dataset_c},
     {"test_dict.c", __test_dict_c},
     {"test_endianconv.c", __test_endianconv_c},
     {"test_entry.c", __test_entry_c},
diff --git a/src/valkey-benchmark-dataset.c b/src/valkey-benchmark-dataset.c
new file mode 100644
index 00000000000..0c58dad02ad
--- /dev/null
+++ b/src/valkey-benchmark-dataset.c
@@ -0,0 +1,782 @@
+/* Dataset support for valkey-benchmark
+ *
+ * Copyright Valkey Contributors.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD 3-Clause
+ */
+
+#include "fmacros.h"
+
+#include "valkey-benchmark-dataset.h"
+#include "zmalloc.h"
+#include <valkey/valkey.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Internal constants */
+#define PLACEHOLDER_COUNT 10
+#define PLACEHOLDER_LEN 12
+#define XML_TAG_OVERHEAD 16 /* Space for XML tag syntax ("<", ">", "</", ">") and null terminator */
+
+static const char *PLACEHOLDERS[PLACEHOLDER_COUNT] = {
+    "__rand_int__", "__rand_1st__", "__rand_2nd__", "__rand_3rd__", "__rand_4th__",
+    "__rand_5th__", "__rand_6th__", "__rand_7th__", "__rand_8th__", "__rand_9th__"};
+
+/* Forward declarations */
+static bool datasetBuildFieldMap(dataset *ds, sds *template_argv, int template_argc);
+static sds getFieldValue(const char *row, int column_index, char delimiter);
+static sds getXmlFieldValue(const char *xml_doc, const char *field_name);
+static sds formatBytes(size_t bytes);
+static bool csvDiscoverFields(dataset *ds);
+static bool scanXmlFieldsFromFile(dataset *ds, const char *xml_root_element);
+static bool scanXmlFields(const char *doc_start, const char *doc_end, dataset *ds, const char *start_root_tag, const char *end_root_tag);
+static bool loadXmlDataset(dataset *ds, const char *xml_root_element);
+static bool csvLoadDocuments(dataset *ds);
+static bool shouldStopLoading(dataset *ds);
+static int findFieldIndex(dataset *ds, const char *field_name, size_t field_name_len);
+static const char *extractDatasetFieldValue(dataset *ds, int field_idx, int record_index);
+static sds replaceOccurrence(sds processed_arg, const char *pos, const char *replacement);
+static sds processFieldsInArg(dataset *ds, sds arg, int record_index);
+static sds processRandPlaceholdersForDataSet(sds cmd, _Atomic uint64_t *seq_key, int replace_placeholders, int keyspacelen, int sequential_replacement);
+
+dataset *datasetInit(const char *filename, const char *xml_root_element, int max_documents, int has_field_placeholders, sds *template_argv, int template_argc) {
+    if (!filename) return NULL;
+
+    dataset *ds = zcalloc(sizeof(dataset));
+    if (!ds) return NULL;
+
+    ds->filename = filename;
+    ds->xml_root_element = xml_root_element;
+    ds->max_documents = max_documents;
+
+    /* Validate XML parameters */
+    if (strstr(filename, ".xml") && !xml_root_element) {
+        fprintf(stderr, "Error: XML dataset requires --xml-root-element parameter\n");
+        zfree(ds);
+        return NULL;
+    }
+
+    /* Detect format */
+    if (strstr(filename, ".csv")) {
+        ds->format = DATASET_FORMAT_CSV;
+        ds->delimiter = ',';
+    } else if (strstr(filename, ".tsv")) {
+        ds->format = DATASET_FORMAT_TSV;
+        ds->delimiter = '\t';
+    } else if (strstr(filename, ".xml")) {
+        ds->format = DATASET_FORMAT_XML;
+        ds->delimiter = 0;
+    } else {
+        ds->format = DATASET_FORMAT_CSV;
+        ds->delimiter = ',';
+    }
+
+    /* Discover fields */
+    if (ds->format == DATASET_FORMAT_XML) {
+        if (!scanXmlFieldsFromFile(ds, xml_root_element)) goto error;
+    } else {
+        if (!csvDiscoverFields(ds)) goto error;
+    }
+
+    /* Build field map if needed (BEFORE loading) */
+    if (has_field_placeholders && template_argv && template_argc > 0) {
+        if (!datasetBuildFieldMap(ds, template_argv, template_argc)) goto error;
+    } else {
+        ds->used_field_count = ds->field_count;
+    }
+
+    /* Load data with correct field count */
+    if (ds->format == DATASET_FORMAT_XML) {
+        if (!loadXmlDataset(ds, xml_root_element)) goto error;
+    } else {
+        if (!csvLoadDocuments(ds)) goto error;
+    }
+
+    return ds;
+
+error:
+    datasetFree(ds);
+    return NULL;
+}
+
+void datasetFree(dataset *ds) {
+    if (!ds) return;
+
+    if (ds->field_names) {
+        /* Unified memory management: all formats use zmalloc + individual sdsnew */
+        for (int i = 0; i < ds->field_count; i++) {
+            sdsfree(ds->field_names[i]);
+        }
+        zfree(ds->field_names);
+    }
+
+    if (ds->field_map) {
+        zfree(ds->field_map);
+    }
+
+    if (ds->records) {
+        for (size_t i = 0; i < ds->record_count; i++) {
+            if (ds->records[i].fields) {
+                for (int j = 0; j < ds->used_field_count; j++) {
+                    sdsfree(ds->records[i].fields[j]);
+                }
+                zfree(ds->records[i].fields);
+            }
+        }
+        zfree(ds->records);
+    }
+
+    zfree(ds);
+}
+
+bool datasetBuildFieldMap(dataset *ds, sds *template_argv, int template_argc) {
+    if (!ds) return false;
+
+    ds->field_map = zmalloc(ds->field_count * sizeof(int));
+    ds->used_field_count = 0;
+
+    for (int i = 0; i < ds->field_count; i++) {
+        ds->field_map[i] = -1;
+    }
+
+    for (int arg_idx = 0; arg_idx < template_argc; arg_idx++) {
+        const char *arg = template_argv[arg_idx];
+        const char *field_pos = strstr(arg, FIELD_PREFIX);
+
+        while (field_pos) {
+            const char *field_start = field_pos + FIELD_PREFIX_LEN;
+            const char *field_end = strstr(field_start, FIELD_SUFFIX);
+            if (!field_end) break;
+
+            size_t field_name_len = field_end - field_start;
+            sds field_name = sdsnewlen(field_start, field_name_len);
+
+            int field_idx = -1;
+            for (int k = 0; k < ds->field_count; k++) {
+                if (!strcmp(field_name, ds->field_names[k])) {
+                    field_idx = k;
+                    break;
+                }
+            }
+
+            if (field_idx == -1) {
+                fprintf(stderr, "Error: Field placeholder '__field:%s__' not found in dataset fields\n", field_name);
+                fprintf(stderr, "Available fields: ");
+                for (int j = 0; j < ds->field_count; j++) {
+                    fprintf(stderr, "%s%s", ds->field_names[j], (j < ds->field_count - 1) ? ", " : "\n");
+                }
+                sdsfree(field_name);
+                return false;
+            }
+
+            if (ds->field_map[field_idx] == -1) {
+                ds->field_map[field_idx] = ds->used_field_count++;
+            }
+
+            sdsfree(field_name);
+            field_pos = strstr(field_end + FIELD_SUFFIX_LEN, FIELD_PREFIX);
+        }
+    }
+
+    return true;
+}
+
+bool datasetLoad(dataset *ds, const char *xml_root_element) {
+    if (!ds) return false;
+    (void)xml_root_element; /* Use stored value in ds */
+
+    if (ds->format == DATASET_FORMAT_XML) {
+        return loadXmlDataset(ds, ds->xml_root_element);
+    } else {
+        return csvLoadDocuments(ds);
+    }
+}
+
+size_t datasetGetRecordCount(dataset *ds) {
+    return ds ? ds->record_count : 0;
+}
+
+void datasetReportMemory(dataset *ds) {
+    if (!ds) return;
+
+    size_t total_memory = 0;
+    for (size_t i = 0; i < ds->record_count; i++) {
+        for (int j = 0; j < ds->used_field_count; j++) {
+            total_memory += sdslen(ds->records[i].fields[j]);
+        }
+    }
+    sds size_str = formatBytes(total_memory);
+    printf("Dataset: %zu documents (%s)\n", ds->record_count, size_str);
+    sdsfree(size_str);
+}
+
+sds datasetGenerateCommand(dataset *ds, int record_index, sds *template_argv, int template_argc, _Atomic uint64_t *seq_key, int replace_placeholders, int keyspacelen, int sequential_replacement) {
+    if (!ds || !template_argv) return NULL;
+
+    sds *processed_argv = zmalloc(template_argc * sizeof(sds));
+    for (int i = 0; i < template_argc; i++) {
+        processed_argv[i] = processFieldsInArg(ds, sdsdup(template_argv[i]), record_index);
+    }
+
+    char *cmd;
+    int len = valkeyFormatCommandArgv(&cmd, template_argc, (const char **)processed_argv, NULL);
+    sds result = sdsnewlen(cmd, len);
+    free(cmd);
+
+    result = processRandPlaceholdersForDataSet(result, seq_key, replace_placeholders,
+                                               keyspacelen, sequential_replacement);
+
+    for (int i = 0; i < template_argc; i++) {
+        sdsfree(processed_argv[i]);
+    }
+    zfree(processed_argv);
+
+    return result;
+}
+
+static sds formatBytes(size_t bytes) {
+    if (bytes < 1024) {
+        return sdscatprintf(sdsempty(), "%zu bytes", bytes);
+    } else if (bytes < 1024 * 1024) {
+        return sdscatprintf(sdsempty(), "%.2f KB", bytes / 1024.0);
+    } else if (bytes < 1024 * 1024 * 1024) {
+        return sdscatprintf(sdsempty(), "%.2f MB", bytes / (1024.0 * 1024.0));
+    } else {
+        return sdscatprintf(sdsempty(), "%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0));
+    }
+}
+
+static bool shouldStopLoading(dataset *ds) {
+    if (ds->max_documents > 0 && (int)ds->record_count >= ds->max_documents) {
+        return true;
+    }
+    return false;
+}
+
+static sds getFieldValue(const char *row, int column_index, char delimiter) {
+    int current_col = 0;
+    const char *start = row;
+    const char *p = row;
+    int in_quotes = 0;
+
+    while (*p) {
+        if (*p == '"') {
+            in_quotes = !in_quotes;
+        } else if (*p == delimiter && !in_quotes) {
+            if (current_col == column_index) {
+                size_t len = p - start;
+                if (len > 0 && start[0] == '"' && p[-1] == '"') {
+                    start++;
+                    len -= 2;
+                }
+                return sdsnewlen(start, len);
+            }
+            current_col++;
+            start = p + 1;
+        }
+        p++;
+    }
+
+    if (current_col == column_index) {
+        size_t len = p - start;
+        if (len > 0 && start[0] == '"' && p[-1] == '"') {
+            start++;
+            len -= 2;
+        }
+        return sdsnewlen(start, len);
+    }
+
+    return sdsempty();
+}
+
+static sds getXmlFieldValue(const char *xml_doc, const char *field_name) {
+    size_t field_len = strlen(field_name);
+    if (field_len > MAX_FIELD_NAME_LEN) {
+        return sdsempty();
+    }
+
+    char start_tag_prefix[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD], end_tag[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD];
+    snprintf(start_tag_prefix, sizeof(start_tag_prefix), "<%s", field_name);
+    snprintf(end_tag, sizeof(end_tag), "</%s>", field_name);
+
+    const char *tag_start = strstr(xml_doc, start_tag_prefix);
+    if (!tag_start) return sdsempty();
+
+    const char *tag_end = strchr(tag_start, '>');
+    if (!tag_end) return sdsempty();
+
+    if (tag_end > tag_start && tag_end[-1] == '/') {
+        return sdsempty();
+    }
+
+    const char *content_start = tag_end + 1;
+    const char *closing_tag = strstr(content_start, end_tag);
+    if (!closing_tag) return sdsempty();
+
+    size_t content_len = closing_tag - content_start;
+    return sdsnewlen(content_start, content_len);
+}
+
+static bool csvDiscoverFields(dataset *ds) {
+    FILE *fp = fopen(ds->filename, "r");
+    if (!fp) {
+        fprintf(stderr, "Cannot open dataset file: %s\n", ds->filename);
+        return false;
+    }
+
+    char *line = NULL;
+    size_t len = 0;
+    if (getline(&line, &len, fp) == -1) {
+        fprintf(stderr, "Cannot read header from dataset file\n");
+        free(line);
+        fclose(fp);
+        return false;
+    }
+
+    len = strlen(line);
+    if (len > 0 && line[len - 1] == '\n') line[len - 1] = '\0';
+    if (len > 1 && line[len - 2] == '\r') line[len - 2] = '\0';
+
+    int count;
+    char delim_str[2] = {ds->delimiter, '\0'};
+    sds *temp = sdssplitlen(line, strlen(line), delim_str, 1, &count);
+
+    ds->field_names = zmalloc(count * sizeof(sds));
+    for (int i = 0; i < count; i++) {
+        ds->field_names[i] = sdsdup(temp[i]);
+    }
+    sdsfreesplitres(temp, count);
+    ds->field_count = count;
+
+    free(line);
+    fclose(fp);
+    return true;
+}
+
+static bool scanXmlFields(const char *doc_start, const char *doc_end, dataset *ds, const char *start_root_tag, const char *end_root_tag) {
+    char field_names[MAX_DATASET_FIELDS][MAX_FIELD_NAME_LEN + 1];
+    int field_count = 0;
+    int root_start_tag_len = strlen(start_root_tag);
+    int root_end_tag_len = strlen(end_root_tag);
+
+    const char *current_pos = doc_start;
+    while ((current_pos = strchr(current_pos, '<')) != NULL && current_pos < doc_end) {
+        if (current_pos[1] == '/' || current_pos[1] == '!' ||
+            !strncmp(current_pos, start_root_tag, root_start_tag_len) ||
+            !strncmp(current_pos, end_root_tag, root_end_tag_len)) {
+            current_pos++;
+            continue;
+        }
+
+        const char *tag_end = strchr(current_pos, '>');
+        if (!tag_end || tag_end >= doc_end) break;
+
+        const char *field_start = current_pos + 1;
+        const char *field_name_end = field_start;
+
+        while (field_name_end < tag_end && *field_name_end != ' ' && *field_name_end != '\t') {
+            field_name_end++;
+        }
+
+        size_t field_name_len = field_name_end - field_start;
+
+        if (field_name_len == 0 || field_name_len > MAX_FIELD_NAME_LEN) {
+            current_pos = tag_end + 1;
+            continue;
+        }
+
+        int is_duplicate = 0;
+        for (int i = 0; i < field_count; i++) {
+            if (strlen(field_names[i]) == field_name_len &&
+                !memcmp(field_names[i], field_start, field_name_len)) {
+                is_duplicate = 1;
+                break;
+            }
+        }
+
+        if (!is_duplicate) {
+            if (field_count >= MAX_DATASET_FIELDS) {
+                fprintf(stderr, "Error: Dataset contains more than %d fields (limit exceeded)\n", MAX_DATASET_FIELDS);
+                return false;
+            }
+            memcpy(field_names[field_count], field_start, field_name_len);
+            field_names[field_count][field_name_len] = '\0';
+            field_count++;
+        }
+
+        current_pos = tag_end + 1;
+    }
+
+    if (field_count == 0) return false;
+
+    ds->field_names = zmalloc(field_count * sizeof(sds));
+    for (int i = 0; i < field_count; i++) {
+        ds->field_names[i] = sdsnew(field_names[i]);
+    }
+    ds->field_count = field_count;
+
+    return true;
+}
+
+static bool scanXmlFieldsFromFile(dataset *ds, const char *xml_root_element) {
+    FILE *fp = fopen(ds->filename, "r");
+    if (!fp) return false;
+
+    char start_tag_prefix[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD], start_tag[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD], end_tag[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD];
+    snprintf(start_tag_prefix, sizeof(start_tag_prefix), "<%s", xml_root_element);
+    snprintf(start_tag, sizeof(start_tag), "<%s>", xml_root_element);
+    snprintf(end_tag, sizeof(end_tag), "</%s>", xml_root_element);
+
+    char buffer[1024];
+    sds current_doc = sdsempty();
+
+    while (fgets(buffer, sizeof(buffer), fp)) {
+        current_doc = sdscat(current_doc, buffer);
+
+        const char *tag_prefix_pos = strstr(current_doc, start_tag_prefix);
+        if (!tag_prefix_pos) continue;
+
+        const char *tag_end_pos = strchr(tag_prefix_pos, '>');
+        if (!tag_end_pos) continue;
+
+        const char *doc_start = tag_prefix_pos;
+        const char *doc_end = strstr(tag_end_pos, end_tag);
+        if (!doc_end) continue;
+
+        doc_end += strlen(end_tag);
+
+        bool result = scanXmlFields(doc_start, doc_end, ds, start_tag, end_tag);
+        sdsfree(current_doc);
+        fclose(fp);
+        return result;
+    }
+
+    sdsfree(current_doc);
+    fclose(fp);
+    return false;
+}
+
+static bool loadXmlDataset(dataset *ds, const char *xml_root_element) {
+    FILE *fp = fopen(ds->filename, "r");
+    if (!fp) return false;
+
+    char start_tag_prefix[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD], start_tag[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD], end_tag[MAX_FIELD_NAME_LEN + XML_TAG_OVERHEAD];
+    size_t end_tag_len;
+    snprintf(start_tag_prefix, sizeof(start_tag_prefix), "<%s", xml_root_element);
+    snprintf(start_tag, sizeof(start_tag), "<%s>", xml_root_element);
+    snprintf(end_tag, sizeof(end_tag), "</%s>", xml_root_element);
+    end_tag_len = strlen(end_tag);
+
+    size_t buffer_capacity = 4 * 1024 * 1024;
+    char *buffer = zmalloc(buffer_capacity);
+    size_t buffer_used = 0;
+    bool fields_discovered = false;
+    size_t capacity = 1000;
+
+    ds->records = zmalloc(sizeof(datasetRecord) * capacity);
+
+    printf("Loading XML dataset from %s...\n", ds->filename);
+
+    if (ds->field_names && ds->field_count > 0) {
+        fields_discovered = true;
+        printf("Using %d fields: ", ds->field_count);
+        for (int i = 0; i < ds->field_count; i++) {
+            printf("%s%s", ds->field_names[i], (i < ds->field_count - 1) ? ", " : "\n");
+        }
+    }
+
+    while (!shouldStopLoading(ds)) {
+        size_t space_available = buffer_capacity - buffer_used;
+
+        if (space_available == 0) {
+            size_t new_capacity = buffer_capacity * 2;
+            buffer = zrealloc(buffer, new_capacity);
+            buffer_capacity = new_capacity;
+            space_available = buffer_capacity - buffer_used;
+        }
+
+        size_t bytes_read = fread(buffer + buffer_used, 1, space_available, fp);
+        buffer_used += bytes_read;
+
+        if (buffer_used == 0) break;
+
+        size_t scan_pos = 0;
+        while (scan_pos < buffer_used) {
+            const char *doc_start = NULL;
+            const char *tag_open_end = NULL;
+
+            for (size_t i = scan_pos; i < buffer_used; i++) {
+                if (buffer[i] == '<' && i + 1 < buffer_used &&
+                    strncmp(buffer + i, start_tag_prefix, strlen(start_tag_prefix)) == 0) {
+                    doc_start = buffer + i;
+
+                    for (size_t j = i + 1; j < buffer_used; j++) {
+                        if (buffer[j] == '>') {
+                            tag_open_end = buffer + j;
+                            break;
+                        }
+                    }
+                    break;
+                }
+            }
+
+            if (!doc_start || !tag_open_end) break;
+
+            const char *doc_end = NULL;
+            for (size_t i = (tag_open_end - buffer); i + end_tag_len <= buffer_used; i++) {
+                if (strncmp(buffer + i, end_tag, end_tag_len) == 0) {
+                    doc_end = buffer + i + end_tag_len;
+                    break;
+                }
+            }
+
+            if (!doc_end) break;
+
+            size_t doc_len = doc_end - doc_start;
+
+            if (!fields_discovered) {
+                if (!scanXmlFields(doc_start, doc_end, ds, start_tag, end_tag)) {
+                    fprintf(stderr, "No XML fields discovered\n");
+                    zfree(buffer);
+                    fclose(fp);
+                    return false;
+                }
+                fields_discovered = true;
+
+                printf("Discovered %d fields: ", ds->field_count);
+                for (int i = 0; i < ds->field_count; i++) {
+                    printf("%s%s", ds->field_names[i], (i < ds->field_count - 1) ? ", " : "\n");
+                }
+            }
+
+            if (ds->record_count >= capacity) {
+                capacity *= 2;
+                ds->records = zrealloc(ds->records, sizeof(datasetRecord) * capacity);
+            }
+
+            datasetRecord *record = &ds->records[ds->record_count];
+            record->fields = zmalloc(sizeof(sds) * ds->used_field_count);
+
+            sds doc_str = sdsnewlen(doc_start, doc_len);
+            if (ds->field_map) {
+                /* With field mapping - only load used fields */
+                for (int i = 0; i < ds->field_count; i++) {
+                    if (ds->field_map[i] >= 0) {
+                        record->fields[ds->field_map[i]] = getXmlFieldValue(doc_str, ds->field_names[i]);
+                    }
+                }
+            } else {
+                /* No field mapping - load all fields directly */
+                for (int i = 0; i < ds->field_count; i++) {
+                    record->fields[i] = getXmlFieldValue(doc_str, ds->field_names[i]);
+                }
+            }
+            sdsfree(doc_str);
+
+            ds->record_count++;
+
+            if (ds->record_count % 1000 == 0) {
+                printf("\rLoaded %zu documents...", ds->record_count);
+                fflush(stdout);
+            }
+
+            scan_pos = doc_end - buffer;
+        }
+
+        if (scan_pos > 0 && scan_pos < buffer_used) {
+            size_t remaining = buffer_used - scan_pos;
+            memmove(buffer, buffer + scan_pos, remaining);
+            buffer_used = remaining;
+        } else if (scan_pos == buffer_used) {
+            buffer_used = 0;
+        }
+
+        if (bytes_read == 0 && (buffer_used == 0 || scan_pos == 0)) {
+            break;
+        }
+    }
+
+    printf("\rLoaded %zu documents%*s\n", ds->record_count, 20, "");
+
+    zfree(buffer);
+    fclose(fp);
+    return true;
+}
+
+static bool csvLoadDocuments(dataset *ds) {
+    FILE *fp = fopen(ds->filename, "r");
+    if (!fp) return false;
+
+    char *line = NULL;
+    size_t len = 0;
+    if (getline(&line, &len, fp) == -1) {
+        fprintf(stderr, "Cannot read header from dataset file\n");
+        free(line);
+        fclose(fp);
+        return false;
+    }
+
+    size_t capacity = 1000;
+    ds->records = zmalloc(sizeof(datasetRecord) * capacity);
+
+    const char *format_name = (ds->format == DATASET_FORMAT_CSV) ? "csv" : (ds->format == DATASET_FORMAT_TSV) ? "tsv"
+                                                                                                              : "xml";
+    (void)format_name; /* Suppress output in unit tests */
+
+    int *load_indices = NULL;
+    int load_count = 0;
+    if (ds->field_map) {
+        load_indices = zmalloc(ds->used_field_count * sizeof(int));
+        for (int i = 0; i < ds->field_count; i++) {
+            if (ds->field_map[i] >= 0) {
+                load_indices[load_count++] = i;
+            }
+        }
+    }
+
+    while (getline(&line, &len, fp) != -1 && !shouldStopLoading(ds)) {
+        if (line[0] == '\0' || line[0] == '\n') continue;
+
+        size_t line_len = strlen(line);
+        if (line_len > 0 && line[line_len - 1] == '\n') line[line_len - 1] = '\0';
+        if (line_len > 1 && line[line_len - 2] == '\r') line[line_len - 2] = '\0';
+
+        if (ds->record_count >= capacity) {
+            capacity *= 2;
+            ds->records = zrealloc(ds->records, sizeof(datasetRecord) * capacity);
+        }
+
+        datasetRecord *record = &ds->records[ds->record_count];
+        record->fields = zmalloc(sizeof(sds) * ds->used_field_count);
+
+        if (ds->field_map) {
+            for (int j = 0; j < load_count; j++) {
+                int orig_idx = load_indices[j];
+                int mapped_idx = ds->field_map[orig_idx];
+                record->fields[mapped_idx] = getFieldValue(line, orig_idx, ds->delimiter);
+            }
+        } else {
+            for (int i = 0; i < ds->field_count; i++) {
+                record->fields[i] = getFieldValue(line, i, ds->delimiter);
+            }
+        }
+
+        ds->record_count++;
+    }
+
+    if (load_indices) zfree(load_indices);
+    free(line);
+    fclose(fp);
+    return true;
+}
+
+static int findFieldIndex(dataset *ds, const char *field_name, size_t field_name_len) {
+    for (int k = 0; k < ds->field_count; k++) {
+        if (strlen(ds->field_names[k]) == field_name_len &&
+            !memcmp(ds->field_names[k], field_name, field_name_len)) {
+            return ds->field_map ? ds->field_map[k] : k;
+        }
+    }
+    return -1;
+}
+
+static const char *extractDatasetFieldValue(dataset *ds, int field_idx, int record_index) {
+    return ds->records[record_index].fields[field_idx];
+}
+
+static sds replaceOccurrence(sds processed_arg, const char *pos, const char *replacement) {
+    size_t offset = pos - processed_arg;
+    size_t replacement_len = strlen(replacement);
+    size_t total_len = offset + replacement_len + (sdslen(processed_arg) - offset - PLACEHOLDER_LEN);
+
+    sds result = sdsnewlen(NULL, total_len);
+    char *p = result;
+
+    memcpy(p, processed_arg, offset);
+    p += offset;
+
+    memcpy(p, replacement, replacement_len);
+    p += replacement_len;
+
+    const char *after_start = pos + PLACEHOLDER_LEN;
+    size_t after_len = sdslen(processed_arg) - offset - PLACEHOLDER_LEN;
+    memcpy(p, after_start, after_len);
+
+    sdsfree(processed_arg);
+    return result;
+}
+
+static sds processFieldsInArg(dataset *ds, sds arg, int record_index) {
+    if (!strstr(arg, FIELD_PREFIX)) return arg;
+
+    /* Loop through all field placeholders in the argument */
+    while (strstr(arg, FIELD_PREFIX)) {
+        const char *field_pos = strstr(arg, FIELD_PREFIX);
+        const char *field_start = field_pos + FIELD_PREFIX_LEN;
+        const char *field_end = strstr(field_start, FIELD_SUFFIX);
+        if (!field_end) break;
+
+        size_t field_name_len = field_end - field_start;
+        int field_idx = findFieldIndex(ds, field_start, field_name_len);
+        if (field_idx == -1) break;
+
+        const char *field_value = extractDatasetFieldValue(ds, field_idx, record_index);
+        size_t before_len = field_pos - arg;
+        const char *after_start = field_end + FIELD_SUFFIX_LEN;
+
+        sds result = sdsnewlen(arg, before_len);
+        result = sdscat(result, field_value);
+        result = sdscat(result, after_start);
+
+        sdsfree(arg);
+        arg = result;
+    }
+
+    return arg;
+}
+
+static sds processRandPlaceholdersForDataSet(sds cmd, _Atomic uint64_t *seq_key, int replace_placeholders, int keyspacelen, int sequential_replacement) {
+    if (!replace_placeholders || keyspacelen == 0) return cmd;
+
+    for (int ph = 0; ph < PLACEHOLDER_COUNT; ph++) {
+        if (!strstr(cmd, PLACEHOLDERS[ph])) continue;
+
+        uint64_t shared_key = 0;
+        int generate_shared_key = (ph != 0);
+
+        if (generate_shared_key) {
+            if (sequential_replacement) {
+                shared_key = atomic_fetch_add_explicit(&seq_key[ph], 1, memory_order_relaxed);
+            } else {
+                shared_key = (uint64_t)random();
+            }
+            shared_key %= keyspacelen;
+        }
+
+        size_t search_offset = 0;
+        char *pos;
+        while ((pos = strstr(cmd + search_offset, PLACEHOLDERS[ph])) != NULL) {
+            uint64_t key = generate_shared_key ? shared_key : 0;
+
+            if (!generate_shared_key) {
+                if (sequential_replacement) {
+                    key = atomic_fetch_add_explicit(&seq_key[ph], 1, memory_order_relaxed);
+                } else {
+                    key = (uint64_t)random();
+                }
+                key %= keyspacelen;
+            }
+
+            char key_str[24];
+            snprintf(key_str, sizeof(key_str), "%012llu", (unsigned long long)key);
+
+            size_t offset = pos - cmd;
+            cmd = replaceOccurrence(cmd, pos, key_str);
+            search_offset = offset + PLACEHOLDER_LEN;
+        }
+    }
+
+    return cmd;
+}
diff --git a/src/valkey-benchmark-dataset.h b/src/valkey-benchmark-dataset.h
new file mode 100644
index 00000000000..86c9925cbd4
--- /dev/null
+++ b/src/valkey-benchmark-dataset.h
@@ -0,0 +1,65 @@
+/* Dataset support for valkey-benchmark
+ *
+ * Copyright Valkey Contributors.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD 3-Clause
+ */
+
+#ifndef VALKEY_BENCHMARK_DATASET_H
+#define VALKEY_BENCHMARK_DATASET_H
+
+#include "sds.h"
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdatomic.h>
+
+/* Dataset constants */
+#define MAX_DATASET_FIELDS 1000
+#define MAX_FIELD_NAME_LEN 512
+#define FIELD_PREFIX "__field:"
+#define FIELD_PREFIX_LEN 8
+#define FIELD_SUFFIX "__"
+#define FIELD_SUFFIX_LEN 2
+
+/* Dataset format types */
+typedef enum datasetFormat {
+    DATASET_FORMAT_CSV = 0,
+    DATASET_FORMAT_TSV,
+    DATASET_FORMAT_XML
+} datasetFormat;
+
+/* Dataset structures */
+typedef struct datasetRecord {
+    sds *fields;
+} datasetRecord;
+
+typedef struct dataset {
+    datasetFormat format;
+    char delimiter;
+    sds *field_names;
+    int field_count;
+    int *field_map;
+    int used_field_count;
+    datasetRecord *records;
+    size_t record_count;
+    const char *filename;
+    const char *xml_root_element;
+    int max_documents;
+} dataset;
+
+/* Initialize dataset from file - returns NULL on error */
+dataset *datasetInit(const char *filename, const char *xml_root_element, int max_documents, int has_field_placeholders, sds *template_argv, int template_argc);
+
+/* Free dataset and all memory */
+void datasetFree(dataset *ds);
+
+/* Get number of records */
+size_t datasetGetRecordCount(dataset *ds);
+
+/* Report memory usage */
+void datasetReportMemory(dataset *ds);
+
+/* Generate complete command for given record index (caller must sdsfree) */
+sds datasetGenerateCommand(dataset *ds, int record_index, sds *template_argv, int template_argc, _Atomic uint64_t *seq_key, int replace_placeholders, int keyspacelen, int sequential_replacement);
+
+#endif /* VALKEY_BENCHMARK_DATASET_H */
diff --git a/src/valkey-benchmark.c b/src/valkey-benchmark.c
index a0738414a08..87b59ab303e 100644
--- a/src/valkey-benchmark.c
+++ b/src/valkey-benchmark.c
@@ -61,6 +61,7 @@
 #include "hdr_histogram.h"
 #include "cli_common.h"
 #include "mt19937-64.h"
+#include "valkey-benchmark-dataset.h"
 
 #define UNUSED(V) ((void)V)
 #define RANDPTR_INITIAL_SIZE 8
@@ -160,6 +161,15 @@ static struct config {
     atomic_uint_fast64_t last_time_ns;
     uint64_t time_per_token;
     uint64_t time_per_burst;
+    /* Dataset support */
+    sds dataset_file;
+    int max_documents;        /* Maximum documents to load from dataset */
+    sds xml_root_element;     /* XML root element name */
+    dataset *current_dataset; /* Current loaded dataset */
+    /* Command template for dataset mode */
+    int template_argc;
+    sds *template_argv;
+    int has_field_placeholders;
 } config;
 
 /* Locations of the placeholders __rand_int__, __rand_1st__,
@@ -171,6 +181,9 @@ static struct placeholders {
     size_t *index_data;                 /* allocation holding all index data */
 } placeholders;
 
+/* Sequence keys for dataset command generation */
+static _Atomic uint64_t dataset_seq_key[PLACEHOLDER_COUNT] = {0};
+
 typedef struct _client {
     valkeyContext *context;
     sds obuf;
@@ -237,6 +250,7 @@ static int fetchClusterSlotsConfiguration(client c);
 static void updateClusterSlotsConfiguration(void);
 static long long showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData);
 int runFuzzerClients(const char *host, int port, int max_commands, int parallel_clients, int cluster_mode, int num_keys, cliSSLconfig *ssl_config, const char *log_level, int fuzz_flags);
+static int parseCommandTemplate(int argc, char **argv);
 
 /* Dict callbacks */
 static uint64_t dictSdsHash(const void *key);
@@ -866,8 +880,28 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
             return;
         }
 
-        /* Really initialize: replace keys and set start time. */
-        if (config.replace_placeholders) replacePlaceholders(c->obuf + c->prefixlen, config.pipeline);
+        /* Dataset field access mode - completely independent command generation */
+        if (config.has_field_placeholders && config.current_dataset && config.current_dataset->record_count > 0) {
+            static _Atomic uint64_t record_counter = 0;
+
+            /* Generate complete pipeline commands for dataset placeholders */
+            sdssetlen(c->obuf, c->prefixlen);
+            for (int p = 0; p < config.pipeline; p++) {
+                uint64_t record_index = atomic_fetch_add_explicit(&record_counter, 1, memory_order_relaxed) % config.current_dataset->record_count;
+                sds complete_cmd = datasetGenerateCommand(config.current_dataset, record_index,
+                                                          config.template_argv, config.template_argc,
+                                                          dataset_seq_key, config.replace_placeholders,
+                                                          config.keyspacelen, config.sequential_replacement);
+                c->obuf = sdscatlen(c->obuf, complete_cmd, sdslen(complete_cmd));
+                sdsfree(complete_cmd);
+            }
+        } else {
+            /* Standard mode */
+            if (config.replace_placeholders) {
+                replacePlaceholders(c->obuf + c->prefixlen, config.pipeline);
+            }
+        }
+
         if (config.cluster_mode && c->staglen > 0) setClusterKeyHashTag(c);
         c->slots_last_update = atomic_load_explicit(&config.slots_last_update, memory_order_relaxed);
         c->start = ustime();
@@ -1614,6 +1648,55 @@ static void updateClusterSlotsConfiguration(void) {
     pthread_mutex_unlock(&config.is_updating_slots_mutex);
 }
 
+/* Free dataset memory */
+static void cleanupDataset(void) {
+    if (config.current_dataset) {
+        datasetFree(config.current_dataset);
+        config.current_dataset = NULL;
+    }
+}
+
+/* Add RESP command to sequence with repeat count */
+static void addRespCommandToSequence(sds *sds_args, size_t *argvlen, int start, int end, int repeat, sds *cmd_seq, int *seq_len) {
+    char *cmd = NULL;
+    int len = valkeyFormatCommandArgv(&cmd, end - start, (const char **)sds_args + start, argvlen + start);
+    for (int j = 0; j < repeat; j++) {
+        *cmd_seq = sdscatlen(*cmd_seq, cmd, len);
+    }
+    *seq_len += repeat;
+    free(cmd);
+}
+
+/* Parse and setup command template for dataset field validation */
+static int parseCommandTemplate(int argc, char **argv) {
+    sds *sds_args = getSdsArrayFromArgv(argc, argv, 0);
+    if (!sds_args) {
+        fprintf(stderr, "Invalid quoted string\n");
+        return 0;
+    }
+
+    /* Detect field placeholders */
+    config.has_field_placeholders = 0;
+    for (int i = 0; i < argc; i++) {
+        if (strstr(sds_args[i], FIELD_PREFIX)) {
+            config.has_field_placeholders = 1;
+            break;
+        }
+    }
+
+    if (config.has_field_placeholders) {
+        config.template_argc = argc;
+        config.template_argv = zmalloc(argc * sizeof(sds));
+        for (int i = 0; i < argc; i++) {
+            config.template_argv[i] = sdsdup(sds_args[i]);
+        }
+    }
+
+    sdsfreesplitres(sds_args, argc);
+    return 1;
+}
+
+
 /* Generate random data for the benchmark. See #7196. */
 static void genBenchmarkRandomData(char *data, int count) {
     static uint32_t state = 1234;
@@ -1787,6 +1870,16 @@ int parseOptions(int argc, char **argv) {
             config.num_functions = atoi(argv[++i]);
         } else if (!strcmp(argv[i], "--num-keys-in-fcall")) {
             config.num_keys_in_fcall = atoi(argv[++i]);
+        } else if (!strcmp(argv[i], "--dataset")) {
+            if (lastarg) goto invalid;
+            config.dataset_file = sdsnew(argv[++i]);
+        } else if (!strcmp(argv[i], "--maxdocs")) {
+            if (lastarg) goto invalid;
+            config.max_documents = atoi(argv[++i]);
+            if (config.max_documents <= 0) config.max_documents = -1;
+        } else if (!strcmp(argv[i], "--xml-root-element")) {
+            if (lastarg) goto invalid;
+            config.xml_root_element = sdsnew(argv[++i]);
         } else if (!strcmp(argv[i], "--help")) {
             exit_status = 0;
             goto usage;
@@ -1917,6 +2010,8 @@ int parseOptions(int argc, char **argv) {
         "__rand_1st__        Like __rand_int__ but multiple occurrences will have the same\n"
         "                    value. __rand_2nd__ through __rand_9th__ are also available.\n"
         " __data__           Replaced with data of the size specified by the -d option.\n"
+        " __field:name__     Replaced with data from the specified field/column in the\n"
+        "                    dataset. Requires --dataset option.\n"
         " {tag}              Replaced with a tag that routes the command to each node in\n"
         "                    a cluster. Include this in key names when running in cluster\n"
         "                    mode.\n"
@@ -2002,7 +2097,13 @@ int parseOptions(int argc, char **argv) {
         "                    loaded when running the 'function_load' test. (default 10).\n"
         " --num-keys-in-fcall <num>\n"
         "                    Sets the number of keys passed to FCALL command when running\n"
-        "                    the 'fcall' test. (default 1)\n",
+        "                    the 'fcall' test. (default 1)\n"
+        " --dataset <file>   Path to CSV/TSV/XML dataset file for field placeholder replacement.\n"
+        "                    All fields auto-detected with natural content lengths.\n"
+        " --maxdocs <num>    Maximum number of documents to load from dataset file.\n"
+        "                    Default: unlimited.\n"
+        " --xml-root-element <name>\n"
+        "                    Root element name for XML dataset parsing. Required for XML files.\n",
         tls_usage,
         rdma_usage,
         " --mptcp            Enable an MPTCP connection.\n"
@@ -2220,12 +2321,44 @@ int main(int argc, char **argv) {
     config.num_functions = 10;
     config.num_keys_in_fcall = 1;
     config.resp3 = 0;
+    config.dataset_file = NULL;
+    config.max_documents = -1; /* -1 = unlimited */
+    config.xml_root_element = NULL;
+    config.current_dataset = NULL;
+    config.template_argc = 0;
+    config.template_argv = NULL;
+    config.has_field_placeholders = 0;
     resetPlaceholders();
 
     i = parseOptions(argc, argv);
     argc -= i;
     argv += i;
 
+    /* Setup dataset if specified */
+    if (config.dataset_file) {
+        if (argc == 0) {
+            fprintf(stderr, "Error: Dataset mode requires a command with field placeholders\n");
+            fprintf(stderr, "Example: SET doc:__rand_int__ \"__field:content__\"\n");
+            exit(1);
+        }
+
+        /* Parse command template and setup field placeholder detection */
+        if (!parseCommandTemplate(argc, argv)) {
+            exit(1);
+        }
+
+        /* Initialize dataset - single call does everything atomically */
+        config.current_dataset = datasetInit(config.dataset_file, config.xml_root_element,
+                                             config.max_documents,
+                                             config.has_field_placeholders,
+                                             config.template_argv, config.template_argc);
+        if (!config.current_dataset) {
+            fprintf(stderr, "Failed to initialize dataset\n");
+            exit(1);
+        }
+
+        datasetReportMemory(config.current_dataset);
+    }
     /* Set default for requests if not specified */
     if (config.requests < 0) config.requests = 100000;
 
@@ -2384,18 +2517,15 @@ int main(int argc, char **argv) {
             } else if (i == argc || strcmp(";", sds_args[i]) == 0) {
                 cmd = NULL;
                 if (i == start) continue;
-                /* End of command. RESP-encode and append to sequence. */
-                len = valkeyFormatCommandArgv(&cmd, i - start,
-                                              (const char **)sds_args + start,
-                                              argvlen + start);
-                for (int j = 0; j < repeat; j++) {
-                    cmd_seq = sdscatlen(cmd_seq, cmd, len);
-                }
-                seq_len += repeat;
-                free(cmd);
+
+                addRespCommandToSequence(sds_args, argvlen, start, i, repeat, &cmd_seq, &seq_len);
                 start = i + 1;
                 repeat = 1;
             } else if (strstr(sds_args[i], "__data__")) {
+                if (config.current_dataset) {
+                    fprintf(stderr, "Error: __data__ placeholders cannot be used with --dataset option\n");
+                    exit(1);
+                }
                 /* Replace data placeholders with data of length given by -d. */
                 int num_parts;
                 sds *parts = sdssplitlen(sds_args[i], sdslen(sds_args[i]),
@@ -2414,6 +2544,7 @@ int main(int argc, char **argv) {
                 sds_args[i] = newarg;
                 argvlen[i] = sdslen(sds_args[i]);
             }
+            /* NOTE: Field placeholder processing is handled above in the command-level loop to ensure row consistency */
         }
         len = sdslen(cmd_seq);
         /* adjust the datasize to the parsed command */
@@ -2634,6 +2765,15 @@ int main(int argc, char **argv) {
     freeCliConnInfo(config.conn_info);
     if (config.server_config != NULL) freeServerConfig(config.server_config);
     resetPlaceholders();
+    cleanupDataset();
+
+    /* Clean up command template */
+    if (config.template_argv) {
+        for (int i = 0; i < config.template_argc; i++) {
+            sdsfree(config.template_argv[i]);
+        }
+        zfree(config.template_argv);
+    }
 
     return 0;
 }
diff --git a/tests/integration/valkey-benchmark.tcl b/tests/integration/valkey-benchmark.tcl
index 0cf2906c85e..5525dbfc19f 100644
--- a/tests/integration/valkey-benchmark.tcl
+++ b/tests/integration/valkey-benchmark.tcl
@@ -199,6 +199,246 @@ tags {"benchmark network external:skip logreqres:skip"} {
             assert {$different_count > 0}
         }
 
+        test {benchmark: dataset CSV with field placeholders} {
+            # Create test CSV dataset
+            set csv_data "title,content,author\nTest Title 1,Test Content 1,Author 1\nTest Title 2,Test Content 2,Author 2"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file -n 4 -r 10 -- HSET doc:__rand_int__ title \"__field:title__\" content \"__field:content__\""]
+            common_bench_setup $cmd
+            assert_match  {*calls=4,*} [cmdstat hset]
+            
+            # Verify field data was inserted correctly
+            set keys [r keys "doc:*"]
+            assert {[llength $keys] > 0}
+            set sample_key [lindex $keys 0]
+            set title [r hget $sample_key title]
+            set content [r hget $sample_key content]
+            assert {$title eq "Test Title 1" || $title eq "Test Title 2"}
+            assert {$content eq "Test Content 1" || $content eq "Test Content 2"}
+            
+            file delete $csv_file
+        }
+
+        test {benchmark: dataset XML with field placeholders} {
+            # Create test XML dataset matching Wikipedia structure
+            set xml_data "
+            <doc>
+                <title>XML Title 1</title>
+                <abstract>XML Abstract 1</abstract>
+                <url>http://example1.com</url>
+                <links>
+                    <sublink>
+                        <anchor>test1</anchor>
+                        <link>http://test1.com</link>
+                    </sublink>
+                </links>
+            </doc>
+            <doc>
+                <title>XML Title 2</title>
+                <abstract>XML Abstract 2</abstract>
+                <url>http://example2.com</url>
+                <links>
+                    <sublink>
+                        <anchor>test2</anchor>
+                        <link>http://test2.com</link>
+                    </sublink>
+                </links>
+            </doc>"
+            set xml_file [tmpfile "dataset.xml"]
+            set fd [open $xml_file w]
+            puts $fd $xml_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $xml_file --xml-root-element doc -n 4 -r 10 -- HSET xml_doc:__rand_int__ title \"__field:title__\" abstract \"__field:abstract__\""]
+            common_bench_setup $cmd
+            assert_match  {*calls=4,*} [cmdstat hset]
+            
+            # Verify XML field data was inserted correctly
+            set keys [r keys "xml_doc:*"]
+            assert {[llength $keys] > 0}
+            set sample_key [lindex $keys 0]
+            set title [r hget $sample_key title]
+            set abstract [r hget $sample_key abstract]
+            assert {$title eq "XML Title 1" || $title eq "XML Title 2"}
+            assert {$abstract eq "XML Abstract 1" || $abstract eq "XML Abstract 2"}
+            
+            file delete $xml_file
+        }
+
+        test {benchmark: dataset with maxdocs limit} {
+            # Create test dataset with multiple rows
+            set csv_data "name,value\nitem1,value1\nitem2,value2\nitem3,value3\nitem4,value4"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file --maxdocs 2 -n 4 -r 10 -- SET item:__rand_int__ \"__field:value__\""]
+            common_bench_setup $cmd
+            assert_match  {*calls=4,*} [cmdstat set]
+            
+            # Should only use first 2 documents due to maxdocs limit
+            set keys [r keys "item:*"]
+            assert {[llength $keys] > 0}
+            
+            # Verify ALL keys only contain values from first 2 documents
+            set unique_values {}
+            foreach key $keys {
+                set value [r get $key]
+                assert {$value eq "value1" || $value eq "value2"}
+                if {[lsearch $unique_values $value] == -1} {
+                    lappend unique_values $value
+                }
+            }
+            
+            file delete $csv_file
+        }
+
+        test {benchmark: dataset error handling - invalid field} {
+            set csv_data "name,value\nitem1,value1"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file -n 1 -- SET item:__rand_int__ \"__field:invalid_field__\""]
+            
+            # Should fail with invalid field error
+            if {[catch { exec {*}$cmd } error]} {
+                assert_match "*not found in dataset fields*" $error
+            } else {
+                fail "Expected error for invalid field placeholder"
+            }
+            
+            file delete $csv_file
+        }
+
+        test {benchmark: dataset TSV with field placeholders} {
+            # Create test TSV dataset (tab-separated values)
+            set tsv_data "name\tvalue\tcount\nitem1\tvalue1\t100\nitem2\tvalue2\t200"
+            set tsv_file [tmpfile "dataset.tsv"]
+            set fd [open $tsv_file w]
+            puts $fd $tsv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $tsv_file -n 4 -r 10 -- HSET tsv_doc:__rand_int__ name \"__field:name__\" value \"__field:value__\" count __field:count__"]
+            common_bench_setup $cmd
+            assert_match  {*calls=4,*} [cmdstat hset]
+            
+            # Verify TSV field data was inserted correctly
+            set keys [r keys "tsv_doc:*"]
+            assert {[llength $keys] > 0}
+            set sample_key [lindex $keys 0]
+            set name [r hget $sample_key name]
+            set value [r hget $sample_key value]
+            set count [r hget $sample_key count]
+            assert {$name eq "item1" || $name eq "item2"}
+            assert {$value eq "value1" || $value eq "value2"}
+            assert {$count eq "100" || $count eq "200"}
+            
+            file delete $tsv_file
+        }
+
+        test {benchmark: XML dataset missing root element error} {
+            # Create test XML dataset
+            set xml_data "<doc><title>XML Title 1</title><abstract>XML Abstract 1</abstract></doc>"
+            set xml_file [tmpfile "dataset.xml"]
+            set fd [open $xml_file w]
+            puts $fd $xml_data
+            close $fd
+
+            # Should fail without --xml-root-element parameter
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $xml_file -n 1 -- SET xml:__rand_int__ \"__field:title__\""]
+            
+            if {[catch { exec {*}$cmd } error]} {
+                assert_match "*XML dataset requires --xml-root-element parameter*" $error
+            } else {
+                fail "Expected error for XML dataset without --xml-root-element"
+            }
+            
+            file delete $xml_file
+        }
+
+        test {benchmark: dataset with maxdocs larger than available documents} {
+            # Create test dataset with only 2 rows but request maxdocs=5
+            set csv_data "name,value\nitem1,value1\nitem2,value2"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file --maxdocs 5 -n 4 -r 10 -- SET item:__rand_int__ \"__field:value__\""]
+            common_bench_setup $cmd
+            assert_match  {*calls=4,*} [cmdstat set]
+            
+            # Should gracefully use all available documents (2), cycling through them
+            set keys [r keys "item:*"]
+            assert {[llength $keys] > 0}
+            
+            # All values should still be only from available documents
+            foreach key $keys {
+                set value [r get $key]
+                assert {$value eq "value1" || $value eq "value2"}
+            }
+            
+            file delete $csv_file
+        }
+
+        test {benchmark: mixed placeholders - dataset fields and rand placeholders} {
+            # Test combining __field:name__ with __rand_int__ placeholders
+            set csv_data "category,description\nuser,User Management\norder,Order Processing"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file -n 6 -r 100 -- HSET mixed:__rand_int__ category \"__field:category__\" desc \"__field:description__\" score __rand_1st__"]
+            common_bench_setup $cmd
+            assert_match  {*calls=6,*} [cmdstat hset]
+            
+            # Verify both field and random placeholders work together
+            set keys [r keys "mixed:*"]
+            assert {[llength $keys] > 0}
+            set sample_key [lindex $keys 0]
+            set category [r hget $sample_key category]
+            set desc [r hget $sample_key desc]
+            set score [r hget $sample_key score]
+            
+            # Field placeholders should contain dataset values
+            assert {$category eq "user" || $category eq "order"}
+            assert {$desc eq "User Management" || $desc eq "Order Processing"}
+            
+            # Random placeholder should be a 12-digit number
+            assert {[string length $score] == 12}
+            assert {[string is digit $score]}
+            
+            file delete $csv_file
+        }
+
+        test {benchmark: dataset mode requires field placeholders} {
+            set csv_data "name,value\nitem1,value1\nitem2,value2"
+            set csv_file [tmpfile "dataset.csv"]
+            set fd [open $csv_file w]
+            puts $fd $csv_data
+            close $fd
+
+            # Dataset mode should require field placeholders in the command
+            set cmd [valkeybenchmark $master_host $master_port "--dataset $csv_file -n 10 -r 10 -t set"]
+            
+            # Should fail with error about missing field placeholders
+            if {[catch { exec {*}$cmd } error]} {
+                assert_match "*Dataset mode requires a command with field placeholders*" $error
+            } else {
+                fail "Expected error for dataset mode without field placeholders"
+            }
+            
+            file delete $csv_file
+        }
+
         test {benchmark: sequential zadd results in expected number of keys} {
             set cmd [valkeybenchmark $master_host $master_port "-r 50 -n 50 --sequential -t zadd"]
             common_bench_setup $cmd