diff --git a/Cargo.lock b/Cargo.lock
index c85a093..c89d2ce 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -483,6 +483,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
 [[package]]
 name = "bitflags"
 version = "2.11.0"
@@ -784,6 +790,15 @@ version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d"
 
+[[package]]
+name = "conpty"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b72b06487a0d4683349ad74d62e87ad639b09667082b3c495c5b6bab7d84b3da"
+dependencies = [
+ "windows",
+]
+
 [[package]]
 name = "console"
 version = "0.16.2"
@@ -1752,7 +1767,7 @@ dependencies = [
 
 [[package]]
 name = "datu"
-version = "0.2.4"
+version = "0.3.0-alpha"
 dependencies = [
  "anyhow",
  "arrow",
@@ -1764,6 +1779,7 @@ dependencies = [
  "csv",
  "cucumber",
  "datafusion",
+ "expectrl",
  "flt",
  "futures",
  "gherkin",
@@ -1900,6 +1916,18 @@ version = "3.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
 
+[[package]]
+name = "expectrl"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0e0706d01b4f43adaf7e0fb460e07477c36b74ae60fdeb1d045001bd77b4bd1"
+dependencies = [
+ "conpty",
+ "nix 0.26.4",
+ "ptyprocess",
+ "regex",
+]
+
 [[package]]
 name = "fallible-streaming-iterator"
 version = "0.1.9"
@@ -1941,7 +1969,7 @@ version = "25.12.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "rustc_version",
 ]
 
@@ -2173,7 +2201,7 @@ version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "ignore",
  "walkdir",
 ]
@@ -2718,6 +2746,15 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "memoffset"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -2743,13 +2780,26 @@ dependencies = [
  "smallvec",
 ]
 
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+ "memoffset",
+ "pin-utils",
+]
+
 [[package]]
 name = "nix"
 version = "0.30.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -3110,6 +3160,12 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
 [[package]]
 name = "pkg-config"
 version = "0.3.32"
@@ -3229,6 +3285,15 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "ptyprocess"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "101be273c0b1680d7056afddbaa88f02b6e9f2dc161165c30bee9914b6025a79"
+dependencies = [
+ "nix 0.26.4",
+]
+
 [[package]]
 name = "quad-rand"
 version = "0.2.3"
@@ -3335,7 +3400,7 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
 ]
 
 [[package]]
@@ -3424,7 +3489,7 @@ version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -3443,7 +3508,7 @@ version = "17.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "cfg-if",
  "clipboard-win",
  "fd-lock",
@@ -3451,7 +3516,7 @@ dependencies = [
  "libc",
  "log",
  "memchr",
- "nix",
+ "nix 0.30.1",
  "radix_trie",
  "unicode-segmentation",
  "unicode-width",
@@ -4226,7 +4291,7 @@ version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
- "bitflags",
+ "bitflags 2.11.0",
  "hashbrown 0.15.5",
  "indexmap",
  "semver",
@@ -4261,6 +4326,15 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "windows"
+version = "0.44.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e745dab35a0c4c77aa3ce42d595e13d2003d6902d6b08c9ef5fc326d08da12b"
+dependencies = [
+ "windows-targets 0.42.2",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.62.2"
@@ -4347,6 +4421,21 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
+dependencies = [
+ "windows_aarch64_gnullvm 0.42.2",
+ "windows_aarch64_msvc 0.42.2",
+ "windows_i686_gnu 0.42.2",
+ "windows_i686_msvc 0.42.2",
+ "windows_x86_64_gnu 0.42.2",
+ "windows_x86_64_gnullvm 0.42.2",
+ "windows_x86_64_msvc 0.42.2",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.52.6"
@@ -4380,6 +4469,12 @@ dependencies = [
  "windows_x86_64_msvc 0.53.1",
 ]
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.52.6"
@@ -4392,6 +4487,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
@@ -4404,6 +4505,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
@@ -4428,6 +4535,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
@@ -4440,6 +4553,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
@@ -4452,6 +4571,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
@@ -4464,6 +4589,12 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
@@ -4534,7 +4665,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
 dependencies = [
  "anyhow",
- "bitflags",
+ "bitflags 2.11.0",
  "indexmap",
  "log",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 947ac46..cce938b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "datu"
-version = "0.2.4"
+version = "0.3.0-alpha"
 edition = "2024"
 description = "datu - a data file utility"
 license = "MIT"
@@ -37,6 +37,7 @@ flt = { version = "0.0.2" }
 [dev-dependencies]
 criterion = "0.5"
 cucumber = "0.22.1"
+expectrl = "0.8"
 serde_yaml = "0.9"
 gherkin = "0.15"
 tempfile = "3"
@@ -48,3 +49,7 @@ harness = false
 [[test]]
 name = "features"
 harness = false
+
+[[test]]
+name = "repl"
+harness = false
diff --git a/README.md b/README.md
index 54138b1..3206c07 100644
--- a/README.md
+++ b/README.md
@@ -266,6 +266,81 @@ Print the installed `datu` version:
 datu version
 ```
 
+## Interactive Mode (REPL)
+
+Running `datu` without any command starts an interactive REPL (Read-Eval-Print Loop):
+
+```sh
+datu
+>
+```
+
+In the REPL, you compose data pipelines using the `|>` (pipe) operator to chain functions together. The general pattern is:
+
+```text
+read("input") |> ... |> write("output")
+```
+
+### Functions
+
+#### `read(path)`
+
+Read a data file. Supported formats: Parquet (`.parquet`, `.parq`), Avro (`.avro`), ORC (`.orc`).
+
+```text
+> read("data.parquet") |> write("data.csv")
+```
+
+#### `write(path)`
+
+Write data to a file. The output format is inferred from the file extension. Supported formats: CSV (`.csv`), JSON (`.json`), YAML (`.yaml`), Parquet (`.parquet`, `.parq`), Avro (`.avro`), ORC (`.orc`), XLSX (`.xlsx`).
+
+```text
+> read("data.parquet") |> write("output.json")
+```
+
+#### `select(columns...)`
+
+Select and reorder columns. Columns can be specified using symbol syntax (`:name`) or string syntax (`"name"`).
+
+```text
+> read("data.parquet") |> select(:id, :email) |> write("subset.csv")
+> read("data.parquet") |> select("id", "email") |> write("subset.csv")
+```
+
+Columns appear in the output in the order they are listed, so `select` can also be used to reorder columns:
+
+```text
+> read("data.parquet") |> select(:email, :id) |> write("reordered.csv")
+```
+
+#### `head(n)`
+
+Take the first _n_ rows.
+
+```text
+> read("data.parquet") |> head(10) |> write("first10.csv")
+```
+
+#### `tail(n)`
+
+Take the last _n_ rows.
+
+```text
+> read("data.parquet") |> tail(10) |> write("last10.csv")
+```
+
+### Composing Pipelines
+
+Functions can be chained in any order to build more complex pipelines:
+
+```text
+> read("users.avro") |> select(:id, :first_name, :email) |> head(5) |> write("top5.json")
+> read("data.parquet") |> select(:two, :one) |> tail(1) |> write("last_row.csv")
+```
+
+---
+
 ## How it Works Internally
 
 Internally, `datu` constructs a pipeline based on the command and arguments.
diff --git a/features/cli.feature b/features/cli/cli.feature
similarity index 95%
rename from features/cli.feature
rename to features/cli/cli.feature
index 9ad91db..1c05dfe 100644
--- a/features/cli.feature
+++ b/features/cli/cli.feature
@@ -2,7 +2,7 @@ Feature: CLI
 
   Scenario: Print version
     When I run `datu --version`
-    Then the output should contain "datu 0.2.4"
+    Then the first line of the output should be: datu 0.3.0-alpha
 
   Scenario: Print help with help subcommand
     When I run `datu help`
diff --git a/features/convert.feature b/features/cli/convert.feature
similarity index 100%
rename from features/convert.feature
rename to features/cli/convert.feature
diff --git a/features/count.feature b/features/cli/count.feature
similarity index 100%
rename from features/count.feature
rename to features/cli/count.feature
diff --git a/features/head.feature b/features/cli/head.feature
similarity index 85%
rename from features/head.feature
rename to features/cli/head.feature
index e21e873..5d62e09 100644
--- a/features/head.feature
+++ b/features/cli/head.feature
@@ -15,25 +15,25 @@ Feature: Head
     When I run `datu head fixtures/userdata5.avro`
     Then the command should succeed
     And the output should have a header and 10 lines
-    And the first line should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
+    And the first line of the output should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
 
   Scenario: Head Avro with -n 3
     When I run `datu head fixtures/userdata5.avro -n 3`
     Then the command should succeed
     And the output should have a header and 3 lines
-    And the first line should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
+    And the first line of the output should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
 
   Scenario: Head Parquet with --select
     When I run `datu head fixtures/userdata.parquet -n 2 --select id,last_name`
     Then the command should succeed
     And the output should have a header and 2 lines
-    And the first line should be: id,last_name
+    And the first line of the output should be: id,last_name
 
   Scenario: Head Avro with --select
     When I run `datu head fixtures/userdata5.avro -n 2 --select id,email`
     Then the command should succeed
     And the output should have a header and 2 lines
-    And the first line should be: id,email
+    And the first line of the output should be: id,email
 
   Scenario: Head ORC default (10 lines)
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -41,7 +41,7 @@ Feature: Head
     When I run `datu head $TEMPDIR/userdata5.orc`
     Then the command should succeed
     And the output should have a header and 10 lines
-    And the first line should be: id,first_name
+    And the first line of the output should be: id,first_name
 
   Scenario: Head ORC with -n 3
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -49,7 +49,7 @@ Feature: Head
     When I run `datu head $TEMPDIR/userdata5.orc -n 3`
     Then the command should succeed
     And the output should have a header and 3 lines
-    And the first line should be: id,first_name
+    And the first line of the output should be: id,first_name
 
   Scenario: Head ORC with --select
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -57,7 +57,7 @@ Feature: Head
     When I run `datu head $TEMPDIR/userdata5.orc -n 2 --select id,first_name`
     Then the command should succeed
     And the output should have a header and 2 lines
-    And the first line should be: id,first_name
+    And the first line of the output should be: id,first_name
 
   Scenario: Head ORC with --output csv
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -65,7 +65,7 @@ Feature: Head
     When I run `datu head $TEMPDIR/userdata5.orc -n 2 --output csv`
     Then the command should succeed
     And the output should have a header and 2 lines
-    And the first line should be: id,first_name
+    And the first line of the output should be: id,first_name
 
   Scenario: Head ORC with --output json
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -101,7 +101,7 @@ Feature: Head
     When I run `datu head fixtures/userdata5.avro -n 2 --output csv`
     Then the command should succeed
     And the output should have a header and 2 lines
-    And the first line should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
+    And the first line of the output should be: registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments
 
   Scenario: Head Avro with --output json
     When I run `datu head fixtures/userdata5.avro -n 2 --output json`
diff --git a/features/schema.feature b/features/cli/schema.feature
similarity index 100%
rename from features/schema.feature
rename to features/cli/schema.feature
diff --git a/features/tail.feature b/features/cli/tail.feature
similarity index 77%
rename from features/tail.feature
rename to features/cli/tail.feature
index 7c579e8..bbb826a 100644
--- a/features/tail.feature
+++ b/features/cli/tail.feature
@@ -4,67 +4,67 @@ Feature: Tail
   Scenario: Tail Parquet default (10 lines)
     When I run `datu tail fixtures/table.parquet`
     Then the command should succeed
-    And the first line should contain "one"
+    And the first line of the output should contain "one"
 
   Scenario: Tail Parquet with -n 2
     When I run `datu tail fixtures/table.parquet -n 2`
     Then the command should succeed
-    And the first line should contain "one"
-    And the first line should contain "two"
+    And the first line of the output should contain "one"
+    And the first line of the output should contain "two"
 
   Scenario: Tail Avro default (10 lines)
     When I run `datu tail fixtures/userdata5.avro`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "first_name"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "first_name"
 
   Scenario: Tail Avro with -n 3
     When I run `datu tail fixtures/userdata5.avro -n 3`
     Then the command should succeed
-    And the first line should contain "email"
+    And the first line of the output should contain "email"
 
   Scenario: Tail Parquet with --select
     When I run `datu tail fixtures/table.parquet -n 2 --select two,four`
     Then the command should succeed
-    And the first line should contain "two"
-    And the first line should contain "four"
+    And the first line of the output should contain "two"
+    And the first line of the output should contain "four"
 
   Scenario: Tail Avro with --select
     When I run `datu tail fixtures/userdata5.avro -n 2 --select id,email`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "email"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "email"
 
   Scenario: Tail ORC default (10 lines)
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
     Then the command should succeed
     When I run `datu tail $TEMPDIR/userdata5.orc`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "first_name"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "first_name"
 
   Scenario: Tail ORC with -n 3
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
     Then the command should succeed
     When I run `datu tail $TEMPDIR/userdata5.orc -n 3`
     Then the command should succeed
-    And the first line should contain "id"
+    And the first line of the output should contain "id"
 
   Scenario: Tail ORC with --select
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
     Then the command should succeed
     When I run `datu tail $TEMPDIR/userdata5.orc -n 2 --select id,first_name`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "first_name"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "first_name"
 
   Scenario: Tail ORC with --output csv
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
     Then the command should succeed
     When I run `datu tail $TEMPDIR/userdata5.orc -n 2 --output csv`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "first_name"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "first_name"
 
   Scenario: Tail ORC with --output json
     When I run `datu convert fixtures/userdata5.avro $TEMPDIR/userdata5.orc --select id,first_name --limit 10`
@@ -87,8 +87,8 @@ Feature: Tail
   Scenario: Tail Parquet with --output csv
     When I run `datu tail fixtures/table.parquet -n 2 --output csv`
     Then the command should succeed
-    And the first line should contain "one"
-    And the first line should contain "two"
+    And the first line of the output should contain "one"
+    And the first line of the output should contain "two"
 
   Scenario: Tail Parquet with --output json
     When I run `datu tail fixtures/table.parquet -n 2 --output json`
@@ -100,8 +100,8 @@ Feature: Tail
   Scenario: Tail Avro with --output csv
     When I run `datu tail fixtures/userdata5.avro -n 2 --output csv`
     Then the command should succeed
-    And the first line should contain "id"
-    And the first line should contain "first_name"
+    And the first line of the output should contain "id"
+    And the first line of the output should contain "first_name"
 
   Scenario: Tail Avro with --output json
     When I run `datu tail fixtures/userdata5.avro -n 2 --output json`
diff --git a/features/repl/conversion.feature b/features/repl/conversion.feature
new file mode 100644
index 0000000..fd5406c
--- /dev/null
+++ b/features/repl/conversion.feature
@@ -0,0 +1,196 @@
+Feature: Conversion
+
+  Scenario: Parquet to CSV
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.parquet") |> write("$TEMPDIR/userdata.csv")
+      ```
+    Then the file "$TEMPDIR/userdata.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "registration_dttm,id,first_name,last_name,email,gender,ip_address,cc,country,birthdate,salary,title,comments"
+
+  Scenario: Parquet to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> write("$TEMPDIR/table.json")
+      ```
+    Then the file "$TEMPDIR/table.json" should exist
+    And that file should be valid JSON
+    And that file should contain "two"
+    And that file should contain "foo"
+
+  Scenario: Parquet to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> write("$TEMPDIR/table.yaml")
+      ```
+    Then the file "$TEMPDIR/table.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "two:"
+    And that file should contain "foo"
+
+  Scenario: Parquet to Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> write("$TEMPDIR/table.avro")
+      ```
+    Then the file "$TEMPDIR/table.avro" should exist
+    And that file should be valid Avro
+
+  Scenario: Parquet to XLSX
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> write("$TEMPDIR/table.xlsx")
+      ```
+    Then the file "$TEMPDIR/table.xlsx" should exist
+    And that file should be valid XLSX
+
+  Scenario: Avro to CSV
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.csv")
+      ```
+    Then the file "$TEMPDIR/userdata5.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should contain "id"
+    And the first line of that file should contain "first_name"
+
+  Scenario: Avro to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.json")
+      ```
+    Then the file "$TEMPDIR/userdata5.json" should exist
+    And that file should be valid JSON
+
+  Scenario: Avro to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.yaml")
+      ```
+    Then the file "$TEMPDIR/userdata5.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "id:"
+    And that file should contain "first_name:"
+
+  Scenario: Avro to Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.parquet")
+      ```
+    Then the file "$TEMPDIR/userdata5.parquet" should exist
+    And that file should be valid Parquet
+
+  Scenario: Avro to ORC
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.orc")
+      ```
+    Then the file "$TEMPDIR/userdata5.orc" should exist
+    And that file should be valid ORC
+
+  Scenario: Avro to XLSX
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> write("$TEMPDIR/userdata5.xlsx")
+      ```
+    Then the file "$TEMPDIR/userdata5.xlsx" should exist
+    And that file should be valid XLSX
+
+  Scenario: ORC to CSV
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.csv")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.csv" should exist
+    And that file should be a CSV file
+
+  Scenario: ORC to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.json")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.json" should exist
+    And that file should be valid JSON
+
+  Scenario: ORC to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.yaml")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.yaml" should exist
+    And that file should be valid YAML
+
+  Scenario: ORC to Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.parquet")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.parquet" should exist
+    And that file should be valid Parquet
+
+  Scenario: ORC to Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.avro")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.avro" should exist
+    And that file should be valid Avro
+
+  Scenario: ORC to XLSX
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> write("$TEMPDIR/userdata_orc.xlsx")
+      ```
+    Then the file "$TEMPDIR/userdata_orc.xlsx" should exist
+    And that file should be valid XLSX
+
+  Scenario: Parquet to CSV with select
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :four) |> write("$TEMPDIR/table_select.csv")
+      ```
+    Then the file "$TEMPDIR/table_select.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,four"
+    And that file should have 4 lines
+
+  Scenario: Parquet to CSV with head
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(2) |> write("$TEMPDIR/table_head.csv")
+      ```
+    Then the file "$TEMPDIR/table_head.csv" should exist
+    And that file should be a CSV file
+    And that file should have 3 lines
+
+  Scenario: Avro to JSON with select and head
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :first_name, :email) |> head(5) |> write("$TEMPDIR/userdata5_subset.json")
+      ```
+    Then the file "$TEMPDIR/userdata5_subset.json" should exist
+    And that file should be valid JSON
+    And that file should contain "id"
+    And that file should contain "first_name"
+    And that file should contain "email"
+
+  Scenario: Parquet to YAML with select
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :four) |> write("$TEMPDIR/table_select.yaml")
+      ```
+    Then the file "$TEMPDIR/table_select.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "two:"
+    And that file should contain "four:"
+
+  Scenario: Avro to CSV with head
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> head(10) |> write("$TEMPDIR/userdata5_head.csv")
+      ```
+    Then the file "$TEMPDIR/userdata5_head.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should contain "id"
+    And that file should have 11 lines
diff --git a/features/repl/head.feature b/features/repl/head.feature
new file mode 100644
index 0000000..7a5b33c
--- /dev/null
+++ b/features/repl/head.feature
@@ -0,0 +1,122 @@
+Feature: Head
+
+  Scenario: Head from Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(2) |> write("$TEMPDIR/head_parquet.csv")
+      ```
+    Then the file "$TEMPDIR/head_parquet.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "one,two,three,four,five,__index_level_0__"
+    And that file should have 3 lines
+    And that file should contain "foo"
+    And that file should contain "bar"
+
+  Scenario: Head a single row
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(1) |> write("$TEMPDIR/head_one.csv")
+      ```
+    Then the file "$TEMPDIR/head_one.csv" should exist
+    And that file should be a CSV file
+    And that file should have 2 lines
+    And that file should contain "foo"
+
+  Scenario: Head more rows than available
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(100) |> write("$TEMPDIR/head_all.csv")
+      ```
+    Then the file "$TEMPDIR/head_all.csv" should exist
+    And that file should be a CSV file
+    And that file should have 4 lines
+
+  Scenario: Head from Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> head(5) |> write("$TEMPDIR/head_avro.csv")
+      ```
+    Then the file "$TEMPDIR/head_avro.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should contain "id"
+    And the first line of that file should contain "first_name"
+    And that file should have 6 lines
+
+  Scenario: Head from ORC
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> head(3) |> write("$TEMPDIR/head_orc.csv")
+      ```
+    Then the file "$TEMPDIR/head_orc.csv" should exist
+    And that file should be a CSV file
+    And that file should have 4 lines
+
+  Scenario: Head with select
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :three) |> head(2) |> write("$TEMPDIR/head_select.csv")
+      ```
+    Then the file "$TEMPDIR/head_select.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three"
+    And that file should have 3 lines
+
+  Scenario: Head to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(2) |> write("$TEMPDIR/head.json")
+      ```
+    Then the file "$TEMPDIR/head.json" should exist
+    And that file should be valid JSON
+    And that file should contain "foo"
+    And that file should contain "bar"
+    And that file should have 2 records
+
+  Scenario: Head to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(2) |> write("$TEMPDIR/head.yaml")
+      ```
+    Then the file "$TEMPDIR/head.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "two:"
+    And that file should contain "foo"
+
+  Scenario: Head to Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> head(10) |> write("$TEMPDIR/head.parquet")
+      ```
+    Then the file "$TEMPDIR/head.parquet" should exist
+    And that file should be valid Parquet
+    And that file should have 10 records
+
+  Scenario: Head to Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> head(2) |> write("$TEMPDIR/head.avro")
+      ```
+    Then the file "$TEMPDIR/head.avro" should exist
+    And that file should be valid Avro
+    And that file should have 2 records
+
+  Scenario: Head with select from Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :email) |> head(3) |> write("$TEMPDIR/head_select_avro.csv")
+      ```
+    Then the file "$TEMPDIR/head_select_avro.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "id,email"
+    And that file should have 4 lines
+
+  Scenario: Head with select to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :first_name) |> head(2) |> write("$TEMPDIR/head_select.json")
+      ```
+    Then the file "$TEMPDIR/head_select.json" should exist
+    And that file should be valid JSON
+    And that file should contain "id"
+    And that file should contain "first_name"
+    And that file should have 2 records
diff --git a/features/repl/repl.feature b/features/repl/repl.feature
new file mode 100644
index 0000000..69d1e0e
--- /dev/null
+++ b/features/repl/repl.feature
@@ -0,0 +1,8 @@
+Feature: REPL
+
+  Scenario:
+    When datu is ran without a command
+    Then the output should be:
+      ```
+      >
+      ```
diff --git a/features/repl/select.feature b/features/repl/select.feature
new file mode 100644
index 0000000..f7657ab
--- /dev/null
+++ b/features/repl/select.feature
@@ -0,0 +1,124 @@
+Feature: Select
+
+  Scenario: Select columns using symbol syntax
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :three) |> write("$TEMPDIR/select_symbols.csv")
+      ```
+    Then the file "$TEMPDIR/select_symbols.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three"
+    And that file should have 4 lines
+
+  Scenario: Select columns using string syntax
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select("two", "three") |> write("$TEMPDIR/select_strings.csv")
+      ```
+    Then the file "$TEMPDIR/select_strings.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three"
+    And that file should have 4 lines
+
+  Scenario: Select a single column
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two) |> write("$TEMPDIR/select_single.csv")
+      ```
+    Then the file "$TEMPDIR/select_single.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two"
+    And that file should have 4 lines
+
+  Scenario: Select reorders columns
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:three, :one) |> write("$TEMPDIR/select_reorder.csv")
+      ```
+    Then the file "$TEMPDIR/select_reorder.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "three,one"
+
+  Scenario: Select many columns
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:five, :four, :three, :two, :one) |> write("$TEMPDIR/select_many.csv")
+      ```
+    Then the file "$TEMPDIR/select_many.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "five,four,three,two,one"
+    And that file should have 4 lines
+
+  Scenario: Select from Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :email) |> write("$TEMPDIR/select_avro.csv")
+      ```
+    Then the file "$TEMPDIR/select_avro.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "id,email"
+
+  Scenario: Select from ORC
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> select(:_col1, :_col2) |> write("$TEMPDIR/select_orc.csv")
+      ```
+    Then the file "$TEMPDIR/select_orc.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "_col1,_col2"
+
+  Scenario: Select with head
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :first_name) |> head(5) |> write("$TEMPDIR/select_head.csv")
+      ```
+    Then the file "$TEMPDIR/select_head.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "id,first_name"
+    And that file should have 6 lines
+
+  Scenario: Select with tail
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :one) |> tail(1) |> write("$TEMPDIR/select_tail.csv")
+      ```
+    Then the file "$TEMPDIR/select_tail.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,one"
+    And that file should have 2 lines
+
+  Scenario: Select to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :three) |> write("$TEMPDIR/select.json")
+      ```
+    Then the file "$TEMPDIR/select.json" should exist
+    And that file should be valid JSON
+    And that file should contain "two"
+    And that file should contain "three"
+
+  Scenario: Select to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :three) |> write("$TEMPDIR/select.yaml")
+      ```
+    Then the file "$TEMPDIR/select.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "two:"
+    And that file should contain "three:"
+
+  Scenario: Select to Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:one, :two) |> write("$TEMPDIR/select.avro")
+      ```
+    Then the file "$TEMPDIR/select.avro" should exist
+    And that file should be valid Avro
+
+  Scenario: Select to Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :first_name, :email) |> write("$TEMPDIR/select.parquet")
+      ```
+    Then the file "$TEMPDIR/select.parquet" should exist
+    And that file should be valid Parquet
diff --git a/features/repl/tail.feature b/features/repl/tail.feature
new file mode 100644
index 0000000..6eff6ad
--- /dev/null
+++ b/features/repl/tail.feature
@@ -0,0 +1,118 @@
+Feature: Tail
+
+  Scenario: Tail from Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(2) |> write("$TEMPDIR/tail_parquet.csv")
+      ```
+    Then the file "$TEMPDIR/tail_parquet.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "one,two,three,four,five,__index_level_0__"
+    And that file should have 3 lines
+    And that file should contain "bar"
+    And that file should contain "baz"
+
+  Scenario: Tail a single row
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(1) |> write("$TEMPDIR/tail_one.csv")
+      ```
+    Then the file "$TEMPDIR/tail_one.csv" should exist
+    And that file should be a CSV file
+    And that file should have 2 lines
+    And that file should contain "baz"
+
+  Scenario: Tail more rows than available
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(100) |> write("$TEMPDIR/tail_all.csv")
+      ```
+    Then the file "$TEMPDIR/tail_all.csv" should exist
+    And that file should be a CSV file
+    And that file should have 4 lines
+
+  Scenario: Tail from Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> tail(5) |> write("$TEMPDIR/tail_avro.csv")
+      ```
+    Then the file "$TEMPDIR/tail_avro.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should contain "id"
+    And the first line of that file should contain "first_name"
+    And that file should have 6 lines
+
+  Scenario: Tail from ORC
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata.orc") |> tail(3) |> write("$TEMPDIR/tail_orc.csv")
+      ```
+    Then the file "$TEMPDIR/tail_orc.csv" should exist
+    And that file should be a CSV file
+    And that file should have 4 lines
+
+  Scenario: Tail with select
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, :three) |> tail(2) |> write("$TEMPDIR/tail_select.csv")
+      ```
+    Then the file "$TEMPDIR/tail_select.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three"
+    And that file should have 3 lines
+
+  Scenario: Tail to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(2) |> write("$TEMPDIR/tail.json")
+      ```
+    Then the file "$TEMPDIR/tail.json" should exist
+    And that file should be valid JSON
+    And that file should contain "bar"
+    And that file should contain "baz"
+
+  Scenario: Tail to YAML
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(2) |> write("$TEMPDIR/tail.yaml")
+      ```
+    Then the file "$TEMPDIR/tail.yaml" should exist
+    And that file should be valid YAML
+    And that file should contain "two:"
+    And that file should contain "baz"
+
+  Scenario: Tail to Parquet
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> tail(10) |> write("$TEMPDIR/tail.parquet")
+      ```
+    Then the file "$TEMPDIR/tail.parquet" should exist
+    And that file should be valid Parquet
+
+  Scenario: Tail to Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> tail(2) |> write("$TEMPDIR/tail.avro")
+      ```
+    Then the file "$TEMPDIR/tail.avro" should exist
+    And that file should be valid Avro
+
+  Scenario: Tail with select from Avro
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :email) |> tail(3) |> write("$TEMPDIR/tail_select_avro.csv")
+      ```
+    Then the file "$TEMPDIR/tail_select_avro.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "id,email"
+    And that file should have 4 lines
+
+  Scenario: Tail with select to JSON
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/userdata5.avro") |> select(:id, :first_name) |> tail(2) |> write("$TEMPDIR/tail_select.json")
+      ```
+    Then the file "$TEMPDIR/tail_select.json" should exist
+    And that file should be valid JSON
+    And that file should contain "id"
+    And that file should contain "first_name"
diff --git a/src/cli/repl.rs b/src/cli/repl.rs
index b96d260..e3ab2a0 100644
--- a/src/cli/repl.rs
+++ b/src/cli/repl.rs
@@ -18,6 +18,7 @@ enum PipelineStage {
     Select { columns: Vec<String> },
     Head { n: usize },
     Tail { n: usize },
+    Count,
     Write { path: String },
     Print,
 }
@@ -97,6 +98,7 @@ impl ReplPipelineBuilder {
             PipelineStage::Select { columns } => self.exec_select(&columns).await,
             PipelineStage::Head { n } => self.exec_head(n),
             PipelineStage::Tail { n } => self.exec_tail(n),
+            PipelineStage::Count => self.exec_count(),
             PipelineStage::Write { path } => self.exec_write(&path).await,
             PipelineStage::Print => self.print_batches(),
         }
@@ -172,6 +174,16 @@ impl ReplPipelineBuilder {
         Ok(())
     }
 
+    /// Counts the total number of rows across all batches and prints the result.
+    fn exec_count(&mut self) -> crate::Result<()> {
+        let batches = self.batches.take().ok_or_else(|| {
+            Error::GenericError("count requires a preceding read in the pipe".to_string())
+        })?;
+        let total: usize = batches.iter().map(|b| b.num_rows()).sum();
+        println!("{total}");
+        Ok(())
+    }
+
     /// Writes the batches in context to an output file.
     async fn exec_write(&mut self, output_path: &str) -> crate::Result<()> {
         let batches = self.batches.take().ok_or_else(|| {
@@ -267,6 +279,10 @@ impl ReplPipelineBuilder {
         let path = extract_path_from_args("write", &args)?;
         self.exec_write(&path).await
     }
+
+    fn eval_count(&mut self) -> crate::Result<()> {
+        self.exec_count()
+    }
 }
 
 #[cfg(test)]
@@ -305,6 +321,14 @@ fn plan_stage(expr: Expr) -> crate::Result<PipelineStage> {
                     let n = extract_tail_n(&args)?;
                     Ok(PipelineStage::Tail { n })
                 }
+                "count" => {
+                    if !args.is_empty() {
+                        return Err(Error::UnsupportedFunctionCall(
+                            "count takes no arguments".to_string(),
+                        ));
+                    }
+                    Ok(PipelineStage::Count)
+                }
                 "write" => {
                     let path = extract_path_from_args("write", &args)?;
                     Ok(PipelineStage::Write { path })
@@ -1177,6 +1201,96 @@ mod tests {
         assert!(ctx.batches.is_none(), "batches consumed by write");
     }
 
+    // ── plan_stage: count ────────────────────────────────────────
+
+    #[test]
+    fn test_plan_stage_count() {
+        let expr = parse("count()");
+        let stage = plan_stage(expr).unwrap();
+        assert_eq!(stage, PipelineStage::Count);
+    }
+
+    #[test]
+    fn test_plan_stage_count_rejects_args() {
+        let expr = Expr::FunctionCall(
+            Identifier("count".into()),
+            vec![Expr::Literal(Literal::String("extra".into()))],
+        );
+        let result = plan_stage(expr);
+        assert!(result.is_err());
+        assert!(matches!(
+            result.unwrap_err(),
+            Error::UnsupportedFunctionCall(_)
+        ));
+    }
+
+    // ── plan_pipeline: count does not auto-append print ─────────
+
+    #[test]
+    fn test_plan_pipeline_count_no_auto_print() {
+        let expr = parse(r#"read("a.parquet") |> count()"#);
+        let mut exprs = Vec::new();
+        collect_pipe_stages(expr, &mut exprs);
+        let pipeline = plan_pipeline(exprs).unwrap();
+        assert_eq!(pipeline.len(), 2);
+        assert_eq!(
+            pipeline[0],
+            PipelineStage::Read {
+                path: "a.parquet".to_string()
+            }
+        );
+        assert_eq!(pipeline[1], PipelineStage::Count);
+    }
+
+    // ── eval_count ─────────────────────────────────────────────
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_eval_count_success() {
+        let mut ctx = new_context();
+        ctx.eval_read(vec![Expr::Literal(Literal::String(
+            "fixtures/table.parquet".into(),
+        ))])
+        .await
+        .expect("read");
+
+        ctx.eval_count().expect("count");
+        assert!(ctx.batches.is_none(), "batches consumed by count");
+    }
+
+    #[test]
+    fn test_eval_count_no_preceding_read() {
+        let mut ctx = new_context();
+        let result = ctx.eval_count();
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), Error::GenericError(_)));
+    }
+
+    // ── eval_count: pipeline integration ────────────────────────
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_repl_pipeline_read_count() {
+        let expr = parse(r#"read("fixtures/table.parquet") |> count()"#);
+        let mut ctx = new_context();
+        ctx.eval(expr).await.expect("eval");
+        assert!(ctx.batches.is_none(), "batches consumed by count");
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_repl_pipeline_read_select_count() {
+        let expr = parse(r#"read("fixtures/table.parquet") |> select(:one, :two) |> count()"#);
+        let mut ctx = new_context();
+        ctx.eval(expr).await.expect("eval");
+        assert!(ctx.batches.is_none(), "batches consumed by count");
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_repl_pipeline_read_head_count() {
+        let expr = parse(r#"read("fixtures/table.parquet") |> head(2) |> count()"#);
+        let mut ctx = new_context();
+        ctx.eval(expr).await.expect("eval");
+        assert!(ctx.batches.is_none(), "batches consumed by count");
+    }
+
     #[tokio::test(flavor = "multi_thread")]
     async fn test_repl_pipeline_read_select_tail_write() {
         let temp_dir = tempfile::tempdir().expect("tempdir");
diff --git a/tests/features.rs b/tests/features.rs
index 445fd3e..d321ef4 100644
--- a/tests/features.rs
+++ b/tests/features.rs
@@ -74,7 +74,7 @@ fn command_should_succeed(world: &mut CliWorld) {
     );
 }
 
-#[then(regex = r#"^the first line should be: (.+)$"#)]
+#[then(regex = r#"^the first line of the output should be: (.+)$"#)]
 fn first_line_should_be(world: &mut CliWorld, expected: String) {
     let output = world.output.as_ref().expect("No output captured");
     let stdout = String::from_utf8_lossy(&output.stdout);
@@ -87,6 +87,17 @@ fn first_line_should_be(world: &mut CliWorld, expected: String) {
     );
 }
 
+#[then(regex = r#"^the first line of the output should contain "(.+)"$"#)]
+fn first_line_should_contain(world: &mut CliWorld, expected: String) {
+    let output = world.output.as_ref().expect("No output captured");
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let first_line = stdout.lines().next().unwrap_or("");
+    assert!(
+        first_line.contains(&expected),
+        "Expected first line to contain '{expected}', but got: {first_line}"
+    );
+}
+
 #[then(regex = r#"^the output should be:$"#)]
 fn output_should_be_docstring(world: &mut CliWorld, step: &Step) {
     let expected = step
@@ -367,5 +378,5 @@ fn that_file_should_have_n_lines(world: &mut CliWorld, n: usize) {
 }
 
 fn main() {
-    futures::executor::block_on(CliWorld::run("features"));
+    futures::executor::block_on(CliWorld::run("features/cli"));
 }
diff --git a/tests/repl.rs b/tests/repl.rs
new file mode 100644
index 0000000..efb4127
--- /dev/null
+++ b/tests/repl.rs
@@ -0,0 +1,340 @@
+use std::io::BufRead;
+use std::io::BufReader;
+use std::io::Write;
+use std::path::Path;
+use std::process::Stdio;
+use std::time::Duration;
+
+use cucumber::World;
+use cucumber::then;
+use cucumber::when;
+use expectrl::Expect;
+use expectrl::session::OsSession;
+use gherkin::Step;
+use parquet::file::reader::FileReader;
+
+const TEMPDIR_PLACEHOLDER: &str = "$TEMPDIR";
+
+#[derive(Debug, Default, World)]
+pub struct ReplWorld {
+    session: Option<OsSession>,
+    temp_dir: Option<tempfile::TempDir>,
+    last_file: Option<String>,
+}
+
+fn replace_tempdir(s: &str, temp_path: &str) -> String {
+    s.replace(TEMPDIR_PLACEHOLDER, temp_path)
+}
+
+fn resolve_path(world: &ReplWorld, path: &str) -> String {
+    if let Some(ref temp_dir) = world.temp_dir {
+        let temp_path = temp_dir
+            .path()
+            .to_str()
+            .expect("Temp path is not valid UTF-8");
+        replace_tempdir(path, temp_path)
+    } else {
+        path.to_string()
+    }
+}
+
+fn ensure_temp_dir(world: &mut ReplWorld) -> String {
+    if world.temp_dir.is_none() {
+        world.temp_dir = Some(tempfile::tempdir().expect("Failed to create temp dir"));
+    }
+    world
+        .temp_dir
+        .as_ref()
+        .unwrap()
+        .path()
+        .to_str()
+        .expect("Temp path is not valid UTF-8")
+        .to_string()
+}
+
+#[when(regex = r#"^datu is ran without a command$"#)]
+fn run_datu_repl(world: &mut ReplWorld) {
+    let datu_path = std::env::var("CARGO_BIN_EXE_datu")
+        .expect("Environment variable 'CARGO_BIN_EXE_datu' not defined");
+    let mut session = expectrl::spawn(datu_path).expect("Failed to spawn REPL");
+    session.set_expect_timeout(Some(Duration::from_secs(5)));
+    world.session = Some(session);
+}
+
+#[when(regex = r#"^the REPL is ran and the user types:$"#)]
+fn repl_is_ran_and_user_types(world: &mut ReplWorld, step: &Step) {
+    let input = step.docstring.as_ref().expect("Step requires a docstring");
+    let temp_path = ensure_temp_dir(world);
+
+    let datu_path = std::env::var("CARGO_BIN_EXE_datu")
+        .expect("Environment variable 'CARGO_BIN_EXE_datu' not defined");
+
+    let mut child = std::process::Command::new(datu_path)
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("Failed to spawn datu REPL");
+
+    {
+        let stdin = child.stdin.as_mut().expect("Failed to open stdin");
+        for line in input.trim().lines() {
+            let resolved = replace_tempdir(line.trim(), &temp_path);
+            writeln!(stdin, "{resolved}").expect("Failed to write to REPL stdin");
+        }
+    }
+    drop(child.stdin.take());
+
+    let output = child.wait_with_output().expect("Failed to wait for datu");
+    assert!(
+        output.status.success(),
+        "datu REPL exited with error.\nstdout: {}\nstderr: {}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr)
+    );
+}
+
+#[then(regex = r#"^the output should be:$"#)]
+fn the_output_should_be(world: &mut ReplWorld, step: &Step) {
+    let expected = step.docstring.as_ref().expect("Step requires a docstring");
+    let session = world.session.as_mut().expect("No session running");
+    let expected_trimmed = expected.trim();
+    session
+        .expect(expected_trimmed)
+        .unwrap_or_else(|e| panic!("Expected to find '{expected_trimmed}' in output: {e}"));
+}
+
+#[then(regex = r#"^the file "(.+)" should exist$"#)]
+fn file_should_exist(world: &mut ReplWorld, path: String) {
+    let path_resolved = resolve_path(world, &path);
+    assert!(
+        Path::new(&path_resolved).exists(),
+        "Expected file to exist: {path_resolved}"
+    );
+    world.last_file = Some(path_resolved);
+}
+
+#[then(regex = r#"^that file should be a CSV file$"#)]
+fn that_file_should_be_csv(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let mut reader = csv::Reader::from_path(path).expect("Failed to open file as CSV");
+    let headers = reader.headers().expect("Failed to read CSV headers");
+    assert!(
+        !headers.is_empty(),
+        "Expected CSV file to have headers, but got none"
+    );
+}
+
+#[then(regex = r#"^the first line of that file should be: "(.+)"$"#)]
+fn first_line_of_file_should_be(world: &mut ReplWorld, expected: String) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let first_line = std::io::BufReader::new(file)
+        .lines()
+        .next()
+        .expect("File is empty")
+        .expect("Failed to read line");
+    assert!(
+        first_line == expected,
+        "Expected first line to be '{expected}', but got: {first_line}"
+    );
+}
+
+#[then(regex = r#"^the first line of that file should contain "(.+)"$"#)]
+fn first_line_of_file_should_contain(world: &mut ReplWorld, expected: String) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let first_line = std::io::BufReader::new(file)
+        .lines()
+        .next()
+        .expect("File is empty")
+        .expect("Failed to read line");
+    assert!(
+        first_line.contains(&expected),
+        "Expected first line to contain '{expected}', but got: {first_line}"
+    );
+}
+
+#[then(regex = r#"^that file should be valid JSON$"#)]
+fn that_file_should_be_valid_json(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let content = std::fs::read_to_string(path).expect("Failed to read file");
+    serde_json::from_str::<serde_json::Value>(content.trim())
+        .expect("Expected file to contain valid JSON, but parsing failed");
+}
+
+#[then(regex = r#"^that file should be valid YAML$"#)]
+fn that_file_should_be_valid_yaml(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let content = std::fs::read_to_string(path).expect("Failed to read file");
+    serde_yaml::from_str::<serde_yaml::Value>(content.trim())
+        .expect("Expected file to contain valid YAML, but parsing failed");
+}
+
+#[then(regex = r#"^that file should be valid Avro$"#)]
+fn that_file_should_be_valid_avro(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let reader = arrow_avro::reader::ReaderBuilder::new()
+        .build(BufReader::new(file))
+        .expect("Expected file to be valid Avro, but reading failed");
+    let schema = reader.schema();
+    assert!(
+        !schema.fields().is_empty(),
+        "Expected Avro file to have at least one field"
+    );
+}
+
+#[then(regex = r#"^that file should be valid Parquet$"#)]
+fn that_file_should_be_valid_parquet(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let reader = parquet::file::reader::SerializedFileReader::new(file)
+        .expect("Expected file to be valid Parquet, but reading failed");
+    let metadata = reader.metadata();
+    assert!(
+        !metadata.file_metadata().schema().get_fields().is_empty(),
+        "Expected Parquet file to have at least one column"
+    );
+}
+
+#[then(regex = r#"^that file should be valid ORC$"#)]
+fn that_file_should_be_valid_orc(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let builder = orc_rust::arrow_reader::ArrowReaderBuilder::try_new(file)
+        .expect("Expected file to be valid ORC, but reading failed");
+    let schema = builder.schema();
+    assert!(
+        !schema.fields().is_empty(),
+        "Expected ORC file to have at least one column"
+    );
+}
+
+#[then(regex = r#"^that file should be valid XLSX$"#)]
+fn that_file_should_be_valid_xlsx(world: &mut ReplWorld) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let bytes = std::fs::read(path).expect("Failed to read file");
+    assert!(
+        bytes.len() >= 4 && bytes[..4] == [0x50, 0x4B, 0x03, 0x04],
+        "Expected file to be a valid XLSX (ZIP archive), but magic bytes did not match"
+    );
+}
+
+#[then(regex = r#"^that file should contain "(.+)"$"#)]
+fn that_file_should_contain(world: &mut ReplWorld, expected: String) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let content = std::fs::read_to_string(path).expect("Failed to read file");
+    assert!(
+        content.contains(&expected),
+        "Expected file {path} to contain '{expected}', but it did not"
+    );
+}
+
+#[then(regex = r#"^that file should have (\d+) lines$"#)]
+fn that_file_should_have_n_lines(world: &mut ReplWorld, n: usize) {
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file = std::fs::File::open(path).expect("Failed to open file");
+    let line_count = std::io::BufReader::new(file)
+        .lines()
+        .filter(|r| r.as_ref().is_ok_and(|s| !s.trim().is_empty()))
+        .count();
+    assert!(
+        line_count == n,
+        "Expected file {path} to have {n} lines, but got {line_count}"
+    );
+}
+
+#[then(regex = r#"^that file should have (\d+) records$"#)]
+fn that_file_should_have_n_records(world: &mut ReplWorld, n: usize) {
+    use datu::utils::FileType;
+
+    let path = world
+        .last_file
+        .as_ref()
+        .expect("No file has been set; use 'the file \"...\" should exist' first");
+    let file_type = FileType::try_from(path.as_str())
+        .unwrap_or_else(|e| panic!("Cannot determine file type for {path}: {e}"));
+    let row_count = match file_type {
+        FileType::Json => {
+            let content = std::fs::read_to_string(path).expect("Failed to read file");
+            let value: serde_json::Value =
+                serde_json::from_str(content.trim()).expect("Failed to parse JSON");
+            value
+                .as_array()
+                .expect("Expected JSON to be an array")
+                .len()
+        }
+        FileType::Parquet => {
+            let file = std::fs::File::open(path).expect("Failed to open file");
+            let reader = parquet::file::reader::SerializedFileReader::new(file)
+                .expect("Failed to read Parquet file");
+            reader
+                .metadata()
+                .row_groups()
+                .iter()
+                .map(|rg| rg.num_rows())
+                .sum::<i64>() as usize
+        }
+        FileType::Avro => {
+            let file = std::fs::File::open(path).expect("Failed to open file");
+            let reader = arrow_avro::reader::ReaderBuilder::new()
+                .build(BufReader::new(file))
+                .expect("Failed to read Avro file");
+            reader
+                .map(|batch| batch.expect("Failed to read Avro batch").num_rows())
+                .sum()
+        }
+        FileType::Orc => {
+            let file = std::fs::File::open(path).expect("Failed to open file");
+            let builder = orc_rust::arrow_reader::ArrowReaderBuilder::try_new(file)
+                .expect("Failed to read ORC file");
+            let reader = builder.build();
+            reader
+                .map(|batch| batch.expect("Failed to read ORC batch").num_rows())
+                .sum()
+        }
+        other => panic!("Unsupported format for record counting: {other:?}"),
+    };
+    assert!(
+        row_count == n,
+        "Expected file {path} to have {n} records, but got {row_count}"
+    );
+}
+
+fn main() {
+    futures::executor::block_on(ReplWorld::run("features/repl"));
+}