diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dcd71845..911489d2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -104,11 +104,11 @@ jobs:
           python -m venv env-plink
           source env-plink/bin/activate
           python -m pip install .
-          python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz > plink.txt 2>&1 || echo $? > plink_exit.txt
+          python -m bio2zarr plink2zarr convert tests/data/plink/example plink.vcz > plink.txt 2>&1 || echo $? > plink_exit.txt
           test "$(cat plink_exit.txt)" = "1"
           grep -q "This process requires the optional bed_reader module. Install it with: pip install bio2zarr\[plink\]" plink.txt
           python -m pip install '.[plink]'
-          python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz
+          python -m bio2zarr plink2zarr convert tests/data/plink/example plink.vcz
           deactivate
 
           python -m venv env-vcf
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8241ad4f..ab7fae43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,8 @@
 # 0.1.6 2025-0X-XX
 
-- Make format-specific dependencies optional (#385)
-
-- Add contigs to plink output (#344)
+- Initial version of supported plink2zarr (#390, #344, #382)
 
-- Add variant_length and indexing to plink output (#382)
+- Make format-specific dependencies optional (#385)
 
 Breaking changes
 
@@ -14,6 +12,8 @@ Breaking changes
 - Add dimensions and default compressor and filter settings to the schema.
   (#361)
 
+- Various changes to existing experimental plink encoding (#390)
+
 # 0.1.5 2025-03-31
 
 - Add support for merging contig IDs across multiple VCFs (#335)
diff --git a/bio2zarr/cli.py b/bio2zarr/cli.py
index a2ae88d3..6d41ed27 100644
--- a/bio2zarr/cli.py
+++ b/bio2zarr/cli.py
@@ -381,7 +381,7 @@ def encode(
     worker_processes,
 ):
     """
-    Convert intermediate columnar format to vcfzarr.
+    Convert intermediate columnar format to VCF Zarr.
     """
     setup_logging(verbose)
     check_overwrite_dir(zarr_path, force)
@@ -504,7 +504,7 @@ def convert_vcf(
     local_alleles,
 ):
     """
-    Convert input VCF(s) directly to vcfzarr (not recommended for large files).
+    Convert input VCF(s) directly to VCF Zarr (not recommended for large files).
     """
     setup_logging(verbose)
     check_overwrite_dir(zarr_path, force)
@@ -523,7 +523,7 @@ def convert_vcf(
 @click.group(cls=NaturalOrderGroup, name="vcf2zarr")
 def vcf2zarr_main():
     """
-    Convert VCF file(s) to the vcfzarr format.
+    Convert VCF file(s) to VCF Zarr format.
 
     See the online documentation at https://sgkit-dev.github.io/bio2zarr/
     for more information.
@@ -561,7 +561,9 @@ def convert_plink(
     samples_chunk_size,
 ):
     """
-    In development; DO NOT USE!
+    Convert plink fileset to VCF Zarr. Results are equivalent to
+    `plink1.9 --bfile prefix --keep-allele-order --recode vcf-iid --out tmp`
+    then running `vcf2zarr convert tmp.vcf zarr_path`
     """
     setup_logging(verbose)
     plink.convert(
@@ -577,6 +579,9 @@ def convert_plink(
 @version
 @click.group()
 def plink2zarr():
+    """
+    Convert plink fileset(s) to VCF Zarr format
+    """
     pass
 
 
@@ -676,6 +681,9 @@ def convert_tskit(
 @version
 @click.group()
 def tskit2zarr():
+    """
+    Convert tskit tree sequence(s) to VCF Zarr format
+    """
     pass
 
 
diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py
index 69ffc863..e170ddea 100644
--- a/bio2zarr/plink.py
+++ b/bio2zarr/plink.py
@@ -1,25 +1,46 @@
+import dataclasses
 import logging
 import pathlib
 
 import numpy as np
-import zarr
 
 from bio2zarr import constants, core, vcz
 
 logger = logging.getLogger(__name__)
 
 
+@dataclasses.dataclass
+class PlinkPaths:
+    bed_path: str
+    bim_path: str
+    fam_path: str
+
+
 class PlinkFormat(vcz.Source):
     @core.requires_optional_dependency("bed_reader", "plink")
-    def __init__(self, path):
+    def __init__(self, prefix):
         import bed_reader
 
-        self._path = pathlib.Path(path)
-        self.bed = bed_reader.open_bed(path, num_threads=1, count_A1=False)
+        # TODO we will need support multiple chromosomes here to join
+        # plinks into on big zarr. So, these will require multiple
+        # bed and bim files, but should share a .fam
+        self.prefix = str(prefix)
+        paths = PlinkPaths(
+            self.prefix + ".bed",
+            self.prefix + ".bim",
+            self.prefix + ".fam",
+        )
+        self.bed = bed_reader.open_bed(
+            paths.bed_path,
+            bim_location=paths.bim_path,
+            fam_location=paths.fam_path,
+            num_threads=1,
+            count_A1=True,
+        )
 
     @property
     def path(self):
-        return self._path
+        return self.prefix
 
     @property
     def num_records(self):
@@ -46,9 +67,12 @@ def iter_field(self, field_name, shape, start, stop):
         assert field_name == "position"  # Only position field is supported from plink
         yield from self.bed.bp_position[start:stop]
 
+    def iter_id(self, start, stop):
+        yield from self.bed.sid[start:stop]
+
     def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):
-        ref_field = self.bed.allele_1
-        alt_field = self.bed.allele_2
+        alt_field = self.bed.allele_1
+        ref_field = self.bed.allele_2
         bed_chunk = self.bed.read(slice(start, stop), dtype=np.int8).T
         gt = np.zeros(shape, dtype=np.int8)
         phased = np.zeros(shape[:-1], dtype=bool)
@@ -61,9 +85,10 @@ def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):
             gt[:] = 0
             gt[bed_chunk[i] == -127] = -1
             gt[bed_chunk[i] == 2] = 1
-            gt[bed_chunk[i] == 1, 0] = 1
+            gt[bed_chunk[i] == 1, 1] = 1
 
-            yield vcz.VariantData(max(len(a) for a in alleles), alleles, gt, phased)
+            # rlen is the length of the REF in PLINK as there's no END annotations
+            yield vcz.VariantData(len(alleles[0]), alleles, gt, phased)
 
     def generate_schema(
         self,
@@ -92,6 +117,9 @@ def generate_schema(
             f"variants={dimensions['variants'].chunk_size}, "
             f"samples={dimensions['samples'].chunk_size}"
         )
+        # If we don't have SVLEN or END annotations, the rlen field is defined
+        # as the length of the REF
+        max_len = self.bed.allele_2.itemsize
 
         array_specs = [
             vcz.ZarrArraySpec(
@@ -107,10 +135,22 @@ def generate_schema(
                 dimensions=["variants", "alleles"],
                 description=None,
             ),
+            vcz.ZarrArraySpec(
+                name="variant_id",
+                dtype="O",
+                dimensions=["variants"],
+                description=None,
+            ),
+            vcz.ZarrArraySpec(
+                name="variant_id_mask",
+                dtype="bool",
+                dimensions=["variants"],
+                description=None,
+            ),
             vcz.ZarrArraySpec(
                 source=None,
                 name="variant_length",
-                dtype="i4",
+                dtype=core.min_int_dtype(0, max_len),
                 dimensions=["variants"],
                 description="Length of each variant",
             ),
@@ -147,20 +187,20 @@ def generate_schema(
 
 
 def convert(
-    bed_path,
-    zarr_path,
+    prefix,
+    out,
     *,
     variants_chunk_size=None,
     samples_chunk_size=None,
     worker_processes=1,
     show_progress=False,
 ):
-    plink_format = PlinkFormat(bed_path)
+    plink_format = PlinkFormat(prefix)
     schema_instance = plink_format.generate_schema(
         variants_chunk_size=variants_chunk_size,
         samples_chunk_size=samples_chunk_size,
     )
-    zarr_path = pathlib.Path(zarr_path)
+    zarr_path = pathlib.Path(out)
     vzw = vcz.VcfZarrWriter(PlinkFormat, zarr_path)
     # Rough heuristic to split work up enough to keep utilisation high
     target_num_partitions = max(1, worker_processes * 4)
@@ -175,39 +215,3 @@ def convert(
     )
     vzw.finalise(show_progress)
     vzw.create_index()
-
-
-# FIXME do this more efficiently - currently reading the whole thing
-# in for convenience, and also comparing call-by-call
-@core.requires_optional_dependency("bed_reader", "plink")
-def validate(bed_path, zarr_path):
-    import bed_reader
-
-    root = zarr.open(store=zarr_path, mode="r")
-    call_genotype = root["call_genotype"][:]
-
-    bed = bed_reader.open_bed(bed_path, count_A1=False, num_threads=1)
-
-    assert call_genotype.shape[0] == bed.sid_count
-    assert call_genotype.shape[1] == bed.iid_count
-    bed_genotypes = bed.read(dtype="int8").T
-    assert call_genotype.shape[0] == bed_genotypes.shape[0]
-    assert call_genotype.shape[1] == bed_genotypes.shape[1]
-    assert call_genotype.shape[2] == 2
-
-    row_id = 0
-    for bed_row, zarr_row in zip(bed_genotypes, call_genotype):
-        # print("ROW", row_id)
-        # print(bed_row, zarr_row)
-        row_id += 1
-        for bed_call, zarr_call in zip(bed_row, zarr_row):
-            if bed_call == -127:
-                assert list(zarr_call) == [-1, -1]
-            elif bed_call == 0:
-                assert list(zarr_call) == [0, 0]
-            elif bed_call == 1:
-                assert list(zarr_call) == [1, 0]
-            elif bed_call == 2:
-                assert list(zarr_call) == [1, 1]
-            else:  # pragma no cover
-                raise AssertionError(f"Unexpected bed call {bed_call}")
diff --git a/docs/_toc.yml b/docs/_toc.yml
index abc53837..007cfea5 100644
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -6,6 +6,9 @@ chapters:
   sections:
   - file: vcf2zarr/tutorial
   - file: vcf2zarr/cli_ref
+- file: plink2zarr/overview
+  sections:
+  - file: plink2zarr/cli_ref
 - file: vcfpartition/overview
   sections:
   - file: vcfpartition/cli_ref
diff --git a/docs/intro.md b/docs/intro.md
index eef9ea63..8b563fae 100644
--- a/docs/intro.md
+++ b/docs/intro.md
@@ -8,6 +8,9 @@
 - {ref}`sec-vcf2zarr` converts VCF data to
   [VCF Zarr](https://github.com/sgkit-dev/vcf-zarr-spec/) format.
 
+- {ref}`sec-plink2zarr` converts PLINK 1.0 data to
+  [VCF Zarr](https://github.com/sgkit-dev/vcf-zarr-spec/) format.
+
 - {ref}`sec-vcfpartition` is a utility to split an input
   VCF into a given number of partitions. This is useful for
   parallel processing of VCF data.
@@ -17,10 +20,8 @@
 `bio2zarr` is in development, contributions, feedback and issues are welcome
 at the [GitHub repository](https://github.com/sgkit-dev/bio2zarr).
 
-Support for converting PLINK data to VCF Zarr is partially implemented,
-and adding BGEN and [tskit](https://tskit.dev/) support is also planned.
 If you would like to see
-support for other formats (or an interested in helping with implementing),
+support for other formats such as BGEN (or an interested in helping with implementing),
 please open an [issue on Github](https://github.com/sgkit-dev/bio2zarr/issues)
 to discuss!
 
diff --git a/docs/plink2zarr/cli_ref.md b/docs/plink2zarr/cli_ref.md
new file mode 100644
index 00000000..2e13db25
--- /dev/null
+++ b/docs/plink2zarr/cli_ref.md
@@ -0,0 +1,17 @@
+(sec-plink2zarr-cli-ref)=
+# CLI Reference
+
+% A note on cross references... There's some weird long-standing problem with
+% cross referencing program values in Sphinx, which means that we can't use
+% the built-in labels generated by sphinx-click. We can make our own explicit
+% targets, but these have to have slightly weird names to avoid conflicting
+% with what sphinx-click is doing. So, hence the cmd- prefix.
+% Based on: https://github.com/skypilot-org/skypilot/pull/2834
+
+```{eval-rst}
+
+.. _cmd-plink2zarr-convert:
+.. click:: bio2zarr.cli:convert_plink
+   :prog: plink2zarr convert
+   :nested: full
+
diff --git a/docs/plink2zarr/overview.md b/docs/plink2zarr/overview.md
new file mode 100644
index 00000000..cb156fdd
--- /dev/null
+++ b/docs/plink2zarr/overview.md
@@ -0,0 +1,38 @@
+(sec-plink2zarr)=
+# plink2zarr
+
+Convert plink data to the
+[VCF Zarr specification](https://github.com/sgkit-dev/vcf-zarr-spec/)
+reliably in parallel.
+
+See {ref}`sec-plink2zarr-cli-ref` for detailed documentation on
+command line options.
+
+Conversion of the plink data model to VCF follows the semantics of plink1.9 as closely
+as possible. That is, given a binary plink fileset with prefix "fileset" (i.e.,
+fileset.bed, fileset.bim, fileset.fam), running
+```
+$ plink2zarr convert fileset out.vcz
+```
+should produce the same result in ``out.vcz`` as
+```
+$ plink1.9 --bfile fileset --keep-allele-order --recode vcf-iid --out tmp
+$ vcf2zarr convert tmp.vcf out.vcz
+```
+
+:::{warning}
+It is important to note that we follow the same conventions as plink 2.0
+where the A1 allele in the [bim file](https://www.cog-genomics.org/plink/2.0/formats#bim)
+is the VCF ALT and A2 is the REF.
+:::
+
+:::{note}
+Currently we only convert the basic VCF-like data from plink, and don't include
+phenotypes and pedigree information. These are planned as future enhancements.
+Please comment on [this issue](https://github.com/sgkit-dev/bio2zarr/issues/392)
+if you are interested in this functionality.
+:::
+
+
+
+
diff --git a/tests/data/plink/example.vcf b/tests/data/plink/example.vcf
new file mode 100644
index 00000000..719b9f7d
--- /dev/null
+++ b/tests/data/plink/example.vcf
@@ -0,0 +1,9 @@
+##fileformat=VCFv4.2
+##fileDate=20250521
+##source=PLINKv1.90
+##contig=<ID=1,length=21>
+##INFO=<ID=PR,Number=0,Type=Flag,Description="Provisional reference allele, may not be based on real reference genome">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ind0	ind1	ind2	ind3	ind4	ind5	ind6	ind7	ind8	ind9
+1	10	1_10	GG	A	.	.	PR	GT	1/1	1/1	1/1	0/0	0/0	0/0	0/0	0/0	0/0	0/0
+1	20	1_20	C	TTT	.	.	PR	GT	1/1	1/1	1/1	1/1	0/0	0/0	0/0	0/0	0/0	0/0
diff --git a/tests/data/plink/example_with_fam.vcf b/tests/data/plink/example_with_fam.vcf
new file mode 100644
index 00000000..9784ba22
--- /dev/null
+++ b/tests/data/plink/example_with_fam.vcf
@@ -0,0 +1,9 @@
+##fileformat=VCFv4.2
+##fileDate=20250521
+##source=PLINKv1.90
+##contig=<ID=1,length=21>
+##INFO=<ID=PR,Number=0,Type=Flag,Description="Provisional reference allele, may not be based on real reference genome">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ind0	ind1	ind2	ind3	ind4	ind5	ind6	ind7	ind8	ind9
+1	10	1_10	G	A	.	.	PR	GT	1/1	1/1	1/1	0/0	0/0	0/0	0/0	0/0	0/0	0/0
+1	20	1_20	C	T	.	.	PR	GT	1/1	1/1	1/1	1/1	0/0	0/0	0/0	0/0	0/0	0/0
diff --git a/tests/data/plink/plink_sim_10s_100v_10pmiss.vcf b/tests/data/plink/plink_sim_10s_100v_10pmiss.vcf
new file mode 100644
index 00000000..ac8bc4ce
--- /dev/null
+++ b/tests/data/plink/plink_sim_10s_100v_10pmiss.vcf
@@ -0,0 +1,107 @@
+##fileformat=VCFv4.2
+##fileDate=20250521
+##source=PLINKv1.90
+##contig=<ID=1,length=101>
+##INFO=<ID=PR,Number=0,Type=Flag,Description="Provisional reference allele, may not be based on real reference genome">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	000	001	002	003	004	005	006	007	008	009
+1	1	1:1:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	1/1	0/1	0/1	0/0	0/0	1/1	1/1	0/1	1/1
+1	2	1:2:ACT:G	ACT	G	.	.	PR	GT	0/1	0/0	1/1	0/1	0/0	0/1	0/1	0/1	0/1	1/1
+1	3	1:3:ACT:G	ACT	G	.	.	PR	GT	0/1	./.	1/1	1/1	1/1	0/0	0/1	0/1	0/1	0/1
+1	4	1:4:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/1	./.	0/0	0/1	1/1	1/1	0/1	1/1	0/1
+1	5	1:5:G:CGCGCG	G	CGCGCG	.	.	PR	GT	./.	0/0	1/1	1/1	0/1	1/1	./.	0/0	0/1	0/0
+1	6	1:6:ACT:G	ACT	G	.	.	PR	GT	0/0	1/1	0/0	0/0	1/1	0/1	0/1	1/1	0/1	1/1
+1	7	1:7:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/0	0/1	0/1	0/0	0/1	0/1	0/1	./.	0/0
+1	8	1:8:T:GTGG	T	GTGG	.	.	PR	GT	./.	1/1	0/1	1/1	1/1	1/1	./.	1/1	0/1	1/1
+1	9	1:9:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	1/1	1/1	1/1	1/1	./.	1/1	1/1
+1	10	1:10:A:C	A	C	.	.	PR	GT	0/1	./.	0/1	0/1	0/1	0/1	./.	0/1	0/1	1/1
+1	11	1:11:ACT:G	ACT	G	.	.	PR	GT	1/1	1/1	1/1	1/1	./.	1/1	1/1	./.	1/1	./.
+1	12	1:12:G:CGCGCG	G	CGCGCG	.	.	PR	GT	1/1	1/1	./.	0/0	0/1	1/1	0/1	./.	0/0	1/1
+1	13	1:13:G:CGCGCG	G	CGCGCG	.	.	PR	GT	1/1	1/1	1/1	1/1	1/1	0/1	1/1	1/1	1/1	1/1
+1	14	1:14:T:GTGG	T	GTGG	.	.	PR	GT	1/1	./.	0/1	1/1	0/1	1/1	./.	0/1	./.	./.
+1	15	1:15:ACT:G	ACT	G	.	.	PR	GT	0/1	1/1	0/0	1/1	1/1	1/1	0/0	1/1	0/1	0/1
+1	16	1:16:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	./.	0/0	0/0	0/0	0/0	0/0	0/0
+1	17	1:17:ACT:G	ACT	G	.	.	PR	GT	1/1	1/1	1/1	0/1	0/1	1/1	0/0	./.	1/1	1/1
+1	18	1:18:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	1/1	1/1	1/1	1/1	1/1	1/1	1/1
+1	19	1:19:A:C	A	C	.	.	PR	GT	0/1	0/1	0/0	0/1	0/1	./.	1/1	0/1	1/1	0/1
+1	20	1:20:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/0	0/0	0/0	0/0	./.	0/1	0/0
+1	21	1:21:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	1/1	1/1	1/1	1/1	0/1	0/1	1/1	1/1
+1	22	1:22:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/1	0/1	./.	1/1	1/1	./.	1/1	0/1	./.
+1	23	1:23:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	0/1	./.	./.	0/1	0/1	0/1	1/1	0/1
+1	24	1:24:A:C	A	C	.	.	PR	GT	0/0	0/0	0/1	0/0	0/1	0/1	0/0	0/0	0/0	0/0
+1	25	1:25:A:C	A	C	.	.	PR	GT	0/0	0/0	./.	./.	./.	./.	0/0	0/1	0/1	0/0
+1	26	1:26:ACT:G	ACT	G	.	.	PR	GT	0/1	1/1	1/1	0/1	1/1	./.	1/1	0/1	0/1	1/1
+1	27	1:27:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/0	0/0	0/0	./.	0/0	./.	0/1	0/0	0/0
+1	28	1:28:ACT:G	ACT	G	.	.	PR	GT	0/0	1/1	0/0	0/1	0/0	./.	0/1	0/0	0/1	1/1
+1	29	1:29:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	1/1	1/1	./.	./.	./.	1/1	0/1	1/1
+1	30	1:30:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/1	0/0	0/1	0/1	0/0	0/1	0/0
+1	31	1:31:T:GTGG	T	GTGG	.	.	PR	GT	0/0	1/1	1/1	1/1	1/1	0/1	1/1	./.	0/1	1/1
+1	32	1:32:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/1	0/0	0/0	./.	0/0	1/1	./.	0/0	0/1
+1	33	1:33:ACT:G	ACT	G	.	.	PR	GT	0/1	0/1	0/1	0/1	1/1	0/1	1/1	0/1	0/1	1/1
+1	34	1:34:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	1/1	0/1	./.	0/1	./.	0/1	0/0	0/0	./.
+1	35	1:35:A:C	A	C	.	.	PR	GT	0/0	0/1	0/0	0/0	0/0	0/0	0/0	0/0	./.	0/1
+1	36	1:36:G:CGCGCG	G	CGCGCG	.	.	PR	GT	./.	0/0	./.	1/1	1/1	1/1	1/1	1/1	0/1	1/1
+1	37	1:37:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	0/1	1/1	0/1	1/1	1/1	1/1	1/1	1/1
+1	38	1:38:A:C	A	C	.	.	PR	GT	0/1	0/0	0/0	./.	1/1	1/1	./.	0/0	0/1	0/0
+1	39	1:39:A:C	A	C	.	.	PR	GT	0/0	0/0	0/1	0/1	0/1	0/0	0/0	0/0	0/1	0/0
+1	40	1:40:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	0/0	0/1	./.	0/1	1/1	0/1	1/1	1/1
+1	41	1:41:A:C	A	C	.	.	PR	GT	0/1	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0
+1	42	1:42:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	./.	0/1	0/1	0/1	1/1	0/1	0/1	1/1	0/0
+1	43	1:43:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	1/1	./.	1/1	1/1	1/1	./.	1/1
+1	44	1:44:ACT:G	ACT	G	.	.	PR	GT	1/1	0/0	1/1	1/1	1/1	1/1	1/1	1/1	1/1	1/1
+1	45	1:45:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/0	1/1	1/1	1/1	0/1	1/1	1/1	./.	1/1	0/1
+1	46	1:46:ACT:G	ACT	G	.	.	PR	GT	1/1	1/1	1/1	0/1	0/1	1/1	0/1	1/1	0/1	1/1
+1	47	1:47:G:CGCGCG	G	CGCGCG	.	.	PR	GT	1/1	0/1	./.	0/1	1/1	1/1	0/1	0/1	0/1	0/0
+1	48	1:48:A:C	A	C	.	.	PR	GT	0/1	0/0	0/1	./.	0/0	0/0	0/1	0/0	0/0	0/0
+1	49	1:49:A:C	A	C	.	.	PR	GT	0/0	0/1	0/0	0/1	0/0	0/0	0/0	0/0	./.	0/0
+1	50	1:50:A:C	A	C	.	.	PR	GT	0/1	./.	./.	./.	0/0	0/0	./.	./.	0/0	0/1
+1	51	1:51:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/0	1/1	0/0	0/0	0/1	0/1	0/1	0/1	0/0
+1	52	1:52:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/1	0/0	0/0	0/1	./.	./.	0/1
+1	53	1:53:ACT:G	ACT	G	.	.	PR	GT	1/1	./.	0/1	1/1	1/1	./.	1/1	0/1	0/1	0/0
+1	54	1:54:A:C	A	C	.	.	PR	GT	0/1	./.	0/0	0/1	./.	./.	./.	1/1	0/0	0/0
+1	55	1:55:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	1/1	0/1	1/1	0/1	0/1	./.	0/1	1/1	0/1
+1	56	1:56:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	./.	0/1	1/1	0/1	0/1	1/1	0/0	./.
+1	57	1:57:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/0	1/1	0/1	0/1	./.	0/0	1/1	0/0	1/1
+1	58	1:58:A:C	A	C	.	.	PR	GT	1/1	1/1	0/1	1/1	1/1	0/1	0/0	0/1	1/1	0/1
+1	59	1:59:T:GTGG	T	GTGG	.	.	PR	GT	0/1	0/1	1/1	1/1	./.	0/1	0/1	./.	0/1	0/1
+1	60	1:60:G:CGCGCG	G	CGCGCG	.	.	PR	GT	1/1	0/1	0/1	0/1	0/0	1/1	0/0	0/1	0/1	1/1
+1	61	1:61:ACT:G	ACT	G	.	.	PR	GT	1/1	1/1	1/1	1/1	1/1	1/1	1/1	0/1	1/1	./.
+1	62	1:62:A:C	A	C	.	.	PR	GT	1/1	0/0	0/0	0/0	0/0	./.	0/0	0/1	0/1	./.
+1	63	1:63:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	./.	0/0	1/1	0/1	./.	0/1	1/1	0/1	1/1
+1	64	1:64:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	1/1	0/1	1/1	1/1	1/1	1/1	1/1
+1	65	1:65:T:GTGG	T	GTGG	.	.	PR	GT	0/1	1/1	1/1	1/1	0/1	0/1	1/1	0/1	1/1	1/1
+1	66	1:66:ACT:G	ACT	G	.	.	PR	GT	1/1	0/0	0/1	0/0	0/0	0/1	0/0	0/1	0/1	0/0
+1	67	1:67:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	1/1	1/1	./.	1/1	1/1	1/1	0/1
+1	68	1:68:ACT:G	ACT	G	.	.	PR	GT	0/1	1/1	1/1	./.	1/1	0/1	1/1	0/1	1/1	0/1
+1	69	1:69:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	0/0	0/1	0/0	0/1	0/0	0/0	0/1	0/1	0/0
+1	70	1:70:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/1	1/1	1/1	0/1	0/0	1/1	1/1	1/1	0/0	0/1
+1	71	1:71:ACT:G	ACT	G	.	.	PR	GT	0/1	1/1	0/1	0/0	0/0	0/1	1/1	1/1	1/1	0/1
+1	72	1:72:G:CGCGCG	G	CGCGCG	.	.	PR	GT	1/1	0/1	1/1	0/1	./.	./.	1/1	1/1	0/0	0/1
+1	73	1:73:A:C	A	C	.	.	PR	GT	0/0	./.	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0
+1	74	1:74:A:C	A	C	.	.	PR	GT	0/0	0/0	0/1	0/0	0/0	0/0	1/1	1/1	0/0	0/1
+1	75	1:75:T:GTGG	T	GTGG	.	.	PR	GT	1/1	0/1	0/1	1/1	1/1	1/1	0/1	1/1	1/1	1/1
+1	76	1:76:A:C	A	C	.	.	PR	GT	0/0	0/0	./.	0/0	0/0	0/0	0/0	0/0	0/0	0/0
+1	77	1:77:ACT:G	ACT	G	.	.	PR	GT	./.	1/1	1/1	1/1	0/1	0/1	./.	1/1	1/1	1/1
+1	78	1:78:ACT:G	ACT	G	.	.	PR	GT	1/1	0/1	1/1	./.	1/1	1/1	1/1	1/1	1/1	1/1
+1	79	1:79:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/1	0/0	0/1
+1	80	1:80:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	./.	0/0	0/0	0/1	0/0	./.	0/0
+1	81	1:81:A:C	A	C	.	.	PR	GT	0/0	0/1	./.	0/1	./.	0/0	0/0	1/1	0/1	0/1
+1	82	1:82:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	0/1	1/1	1/1	1/1	0/1	1/1	0/1	1/1
+1	83	1:83:A:C	A	C	.	.	PR	GT	0/0	0/1	0/0	0/0	1/1	./.	0/0	0/0	0/1	0/1
+1	84	1:84:ACT:G	ACT	G	.	.	PR	GT	1/1	./.	1/1	1/1	1/1	1/1	0/1	1/1	1/1	1/1
+1	85	1:85:A:C	A	C	.	.	PR	GT	0/0	0/1	0/0	0/1	0/0	0/0	0/0	0/0	0/0	0/0
+1	86	1:86:G:CGCGCG	G	CGCGCG	.	.	PR	GT	0/0	0/0	1/1	0/1	0/0	0/1	0/1	0/1	0/1	1/1
+1	87	1:87:ACT:G	ACT	G	.	.	PR	GT	0/1	1/1	1/1	1/1	0/1	1/1	1/1	0/1	1/1	1/1
+1	88	1:88:A:C	A	C	.	.	PR	GT	0/1	0/1	0/1	0/0	./.	./.	0/0	0/1	0/0	0/0
+1	89	1:89:A:C	A	C	.	.	PR	GT	0/0	0/0	0/1	0/0	0/0	./.	0/0	0/0	0/0	1/1
+1	90	1:90:T:GTGG	T	GTGG	.	.	PR	GT	0/1	0/1	0/0	0/1	./.	0/1	0/1	1/1	0/0	0/1
+1	91	1:91:T:GTGG	T	GTGG	.	.	PR	GT	0/1	1/1	1/1	0/1	1/1	1/1	1/1	./.	1/1	0/1
+1	92	1:92:T:GTGG	T	GTGG	.	.	PR	GT	./.	1/1	1/1	0/1	1/1	0/1	0/1	0/1	1/1	1/1
+1	93	1:93:A:C	A	C	.	.	PR	GT	0/1	0/1	0/1	0/1	0/0	0/0	./.	./.	0/0	0/0
+1	94	1:94:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/0	0/1	0/0	0/0	0/0	0/1	0/1
+1	95	1:95:A:C	A	C	.	.	PR	GT	0/1	0/0	0/1	0/1	0/0	0/0	0/0	0/1	0/1	0/0
+1	96	1:96:A:C	A	C	.	.	PR	GT	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	./.
+1	97	1:97:T:GTGG	T	GTGG	.	.	PR	GT	1/1	1/1	1/1	./.	1/1	1/1	1/1	1/1	./.	1/1
+1	98	1:98:ACT:G	ACT	G	.	.	PR	GT	1/1	0/1	0/0	0/0	1/1	0/1	0/1	0/1	0/1	1/1
+1	99	1:99:T:GTGG	T	GTGG	.	.	PR	GT	0/1	1/1	1/1	./.	1/1	1/1	0/1	1/1	0/1	1/1
+1	100	1:100:A:C	A	C	.	.	PR	GT	0/1	1/1	0/0	0/1	0/1	./.	./.	0/0	0/0	0/0
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 2cc133be..841db800 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1026,23 +1026,6 @@ def test_part_size_multiple_vcfs(self):
         ]
 
 
-@pytest.mark.parametrize(
-    "cmd",
-    [
-        main.bio2zarr,
-        cli.vcf2zarr_main,
-        cli.plink2zarr,
-        cli.vcfpartition,
-        cli.tskit2zarr,
-    ],
-)
-def test_version(cmd):
-    runner = ct.CliRunner()
-    result = runner.invoke(cmd, ["--version"], catch_exceptions=False)
-    s = f"version {provenance.__version__}\n"
-    assert result.stdout.endswith(s)
-
-
 class TestTskitEndToEnd:
     def test_convert(self, tmp_path):
         ts_path = "tests/data/ts/example.trees"
@@ -1060,3 +1043,39 @@ def test_convert(self, tmp_path):
         assert result.exit_code == 0
         # Arbitrary check
         assert "variant_position" in result.stdout
+
+
+class TestPlinkEndToEnd:
+    def test_convert(self, tmp_path):
+        ts_path = "tests/data/plink/example"
+        zarr_path = tmp_path / "zarr"
+        runner = ct.CliRunner()
+        result = runner.invoke(
+            cli.plink2zarr,
+            f"convert {ts_path} {zarr_path}",
+            catch_exceptions=False,
+        )
+        assert result.exit_code == 0
+        result = runner.invoke(
+            cli.vcf2zarr_main, f"inspect {zarr_path}", catch_exceptions=False
+        )
+        assert result.exit_code == 0
+        # Arbitrary check
+        assert "variant_position" in result.stdout
+
+
+@pytest.mark.parametrize(
+    "cmd",
+    [
+        main.bio2zarr,
+        cli.vcf2zarr_main,
+        cli.plink2zarr,
+        cli.vcfpartition,
+        cli.tskit2zarr,
+    ],
+)
+def test_version(cmd):
+    runner = ct.CliRunner()
+    result = runner.invoke(cmd, ["--version"], catch_exceptions=False)
+    s = f"version {provenance.__version__}\n"
+    assert result.stdout.endswith(s)
diff --git a/tests/test_core.py b/tests/test_core.py
index 55abc908..eac1fe62 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -237,7 +237,9 @@ def test_examples(self, chunk_size, size, start, stop):
         # It works in CI on Linux, but it'll probably break at some point.
         # It's also necessary to update these numbers each time a new data
         # file gets added
-        ("tests/data", 5045032),
+        # NOTE: removed this after additional plink files broke it in #390.
+        # It's not worth the trouble - see #330
+        # ("tests/data", 5053610),
         ("tests/data/vcf", 5018640),
         ("tests/data/vcf/sample.vcf.gz", 1089),
     ],
diff --git a/tests/test_plink.py b/tests/test_plink.py
index 6377776e..661b62f6 100644
--- a/tests/test_plink.py
+++ b/tests/test_plink.py
@@ -5,10 +5,26 @@
 import numpy.testing as nt
 import pytest
 import sgkit as sg
-import sgkit.io.plink
 import xarray.testing as xt
+import zarr
 
-from bio2zarr import plink
+from bio2zarr import plink, vcf
+
+
+def test_missing_dependency():
+    with mock.patch(
+        "importlib.import_module",
+        side_effect=ImportError("No module named 'bed_reader'"),
+    ):
+        with pytest.raises(ImportError) as exc_info:
+            plink.convert(
+                "UNUSED_PATH",
+                "UNUSED_PATH",
+            )
+        assert (
+            "This process requires the optional bed_reader module. "
+            "Install it with: pip install bio2zarr[plink]" in str(exc_info.value)
+        )
 
 
 class TestSmallExample:
@@ -17,8 +33,6 @@ def bed_path(self, tmp_path_factory):
         tmp_path = tmp_path_factory.mktemp("data")
         path = tmp_path / "example.bed"
         # 7 sites x 3 samples
-        # These are counts of allele 1, so we have to flip them
-        # to get the values we expect
         dosages = np.array(
             [
                 [0, 1, 2],
@@ -31,8 +45,17 @@ def bed_path(self, tmp_path_factory):
             ],
             dtype=np.int8,
         )
-        bed_reader.to_bed(path, dosages.T)
-        return path
+        m = 7
+        d = {
+            "chromosome": ["chr1"] * m,
+            "sid": [f"id{j}" for j in range(m)],
+            "bp_position": range(1, m + 1),
+            "allele_1": ["A"] * m,
+            "allele_2": ["T"] * m,
+            "iid": [f"s{j}" for j in range(3)],
+        }
+        bed_reader.to_bed(path, dosages.T, properties=d)
+        return path.with_suffix("")
 
     @pytest.fixture(scope="class")
     def ds(self, tmp_path_factory, bed_path):
@@ -47,47 +70,31 @@ def test_genotypes(self, ds):
         nt.assert_array_equal(
             call_genotype,
             [
-                [[1, 1], [1, 0], [0, 0]],
-                [[1, 0], [1, 1], [0, 0]],
-                [[1, 1], [1, 1], [1, 1]],
-                [[-1, -1], [1, 1], [1, 1]],
-                [[0, 0], [1, 1], [1, 1]],
-                [[-1, -1], [-1, -1], [-1, -1]],
+                [[0, 0], [0, 1], [1, 1]],
+                [[0, 1], [0, 0], [1, 1]],
                 [[0, 0], [0, 0], [0, 0]],
+                [[-1, -1], [0, 0], [0, 0]],
+                [[1, 1], [0, 0], [0, 0]],
+                [[-1, -1], [-1, -1], [-1, -1]],
+                [[1, 1], [1, 1], [1, 1]],
             ],
         )
 
-    def test_missing_dependency(self):
-        with mock.patch(
-            "importlib.import_module",
-            side_effect=ImportError("No module named 'bed_reader'"),
-        ):
-            with pytest.raises(ImportError) as exc_info:
-                plink.convert(
-                    "UNUSED_PATH",
-                    "UNUSED_PATH",
-                )
-            assert (
-                "This process requires the optional bed_reader module. "
-                "Install it with: pip install bio2zarr[plink]" in str(exc_info.value)
-            )
+    def test_variant_position(self, ds):
+        nt.assert_array_equal(ds["variant_position"], np.arange(1, 8))
+
+    def test_variant_contig(self, ds):
+        nt.assert_array_equal(ds["variant_contig"], np.zeros(7))
+        nt.assert_array_equal(ds["contig_id"], ["chr1"])
 
+    def test_variant_id(self, ds):
+        nt.assert_array_equal(ds["variant_id"], [f"id{j}" for j in range(7)])
 
-class TestEqualSgkit:
-    def test_simulated_example(self, tmp_path):
-        data_path = "tests/data/plink/"
-        bed_path = data_path + "plink_sim_10s_100v_10pmiss.bed"
-        fam_path = data_path + "plink_sim_10s_100v_10pmiss.fam"
-        bim_path = data_path + "plink_sim_10s_100v_10pmiss.bim"
-        # print(bed_path)
-        # print(fam_path)
-        sg_ds = sgkit.io.plink.read_plink(
-            bed_path=bed_path, fam_path=fam_path, bim_path=bim_path
-        )
-        out = tmp_path / "example.plink.zarr"
-        plink.convert(bed_path, out)
-        ds = sg.load_dataset(out)
-        nt.assert_array_equal(ds.call_genotype.values, sg_ds.call_genotype.values)
+    def test_variant_allele(self, ds):
+        nt.assert_array_equal(ds["variant_allele"], [["T", "A"] for _ in range(7)])
+
+    def test_sample_id(self, ds):
+        nt.assert_array_equal(ds["sample_id"], ["s0", "s1", "s2"])
 
 
 class TestExample:
@@ -105,11 +112,15 @@ class TestExample:
     Base-pair coordinate (1-based; limited to 231-2)
     Allele 1 (corresponding to clear bits in .bed; usually minor)
     Allele 2 (corresponding to set bits in .bed; usually major)
+
+
+    Note, the VCF interpretation here can be derived by running
+    plink1.9 --bfile tests/data/plink/example --export vcf
     """
 
     @pytest.fixture(scope="class")
     def ds(self, tmp_path_factory):
-        path = "tests/data/plink/example.bed"
+        path = "tests/data/plink/example"
         out = tmp_path_factory.mktemp("data") / "example.plink.zarr"
         plink.convert(path, out)
         return sg.load_dataset(out)
@@ -121,10 +132,10 @@ def test_variant_position(self, ds):
         nt.assert_array_equal(ds.variant_position, [10, 20])
 
     def test_variant_allele(self, ds):
-        nt.assert_array_equal(ds.variant_allele, [["A", "GG"], ["TTT", "C"]])
+        nt.assert_array_equal(ds.variant_allele, [["GG", "A"], ["C", "TTT"]])
 
     def test_variant_length(self, ds):
-        nt.assert_array_equal(ds.variant_length, [2, 3])
+        nt.assert_array_equal(ds.variant_length, [2, 1])
 
     def test_contig_id(self, ds):
         """Test that contig identifiers are correctly extracted and stored."""
@@ -143,47 +154,37 @@ def test_genotypes(self, ds):
             call_genotype,
             [
                 [
-                    [0, 0],
-                    [0, 0],
-                    [0, 0],
-                    [1, 1],
-                    [1, 1],
-                    [1, 1],
                     [1, 1],
                     [1, 1],
                     [1, 1],
-                    [1, 1],
-                ],
-                [
                     [0, 0],
                     [0, 0],
                     [0, 0],
                     [0, 0],
+                    [0, 0],
+                    [0, 0],
+                    [0, 0],
+                ],
+                [
                     [1, 1],
                     [1, 1],
                     [1, 1],
                     [1, 1],
-                    [1, 1],
-                    [1, 1],
+                    [0, 0],
+                    [0, 0],
+                    [0, 0],
+                    [0, 0],
+                    [0, 0],
+                    [0, 0],
                 ],
             ],
         )
 
-    def test_sgkit(self, ds):
-        path = "tests/data/plink/example"
-        sg_ds = sgkit.io.plink.read_plink(path=path)
-        # Can't compare the full dataset yet
-        nt.assert_array_equal(ds.call_genotype.values, sg_ds.call_genotype.values)
-        # https://github.com/pystatgen/sgkit/issues/1209
-        nt.assert_array_equal(ds.variant_allele, sg_ds.variant_allele.astype(str))
-        nt.assert_array_equal(ds.variant_position, sg_ds.variant_position)
-        nt.assert_array_equal(ds.sample_id, sg_ds.sample_id)
-
 
 class TestSimulatedExample:
     @pytest.fixture(scope="class")
     def ds(self, tmp_path_factory):
-        path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed"
+        path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
         out = tmp_path_factory.mktemp("data") / "example.plink.zarr"
         plink.convert(path, out)
         return sg.load_dataset(out)
@@ -207,16 +208,16 @@ def test_genotypes(self, ds):
         )
         expected = np.array(
             [
-                [1, 0],
-                [1, 0],
-                [0, 0],
-                [0, 0],
-                [-1, -1],
+                [0, 1],
+                [0, 1],
                 [1, 1],
                 [1, 1],
+                [-1, -1],
+                [0, 0],
                 [0, 0],
                 [1, 1],
-                [1, 1],
+                [0, 0],
+                [0, 0],
             ]
         )
         gt = ds["call_genotype"].values
@@ -259,7 +260,7 @@ def test_contig_arrays(self, ds):
     def test_chunk_size(
         self, ds, tmp_path, variants_chunk_size, samples_chunk_size, worker_processes
     ):
-        path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed"
+        path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
         out = tmp_path / "example.zarr"
         plink.convert(
             path,
@@ -276,6 +277,37 @@ def test_chunk_size(
         # TODO check array chunks
 
 
+def validate(bed_path, zarr_path):
+    root = zarr.open(store=zarr_path, mode="r")
+    call_genotype = root["call_genotype"][:]
+
+    bed = bed_reader.open_bed(bed_path + ".bed", count_A1=True, num_threads=1)
+
+    assert call_genotype.shape[0] == bed.sid_count
+    assert call_genotype.shape[1] == bed.iid_count
+    bed_genotypes = bed.read(dtype="int8").T
+    assert call_genotype.shape[0] == bed_genotypes.shape[0]
+    assert call_genotype.shape[1] == bed_genotypes.shape[1]
+    assert call_genotype.shape[2] == 2
+
+    row_id = 0
+    for bed_row, zarr_row in zip(bed_genotypes, call_genotype):
+        # print("ROW", row_id)
+        # print(bed_row, zarr_row)
+        row_id += 1
+        for bed_call, zarr_call in zip(bed_row, zarr_row):
+            if bed_call == -127:
+                assert list(zarr_call) == [-1, -1]
+            elif bed_call == 0:
+                assert list(zarr_call) == [0, 0]
+            elif bed_call == 1:
+                assert list(zarr_call) == [0, 1]
+            elif bed_call == 2:
+                assert list(zarr_call) == [1, 1]
+            else:  # pragma no cover
+                raise AssertionError(f"Unexpected bed call {bed_call}")
+
+
 @pytest.mark.parametrize(
     ("variants_chunk_size", "samples_chunk_size"),
     [
@@ -290,7 +322,7 @@ def test_chunk_size(
 def test_by_validating(
     tmp_path, variants_chunk_size, samples_chunk_size, worker_processes
 ):
-    path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed"
+    path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
     out = tmp_path / "example.zarr"
     plink.convert(
         path,
@@ -299,7 +331,7 @@ def test_by_validating(
         samples_chunk_size=samples_chunk_size,
         worker_processes=worker_processes,
     )
-    plink.validate(path, out)
+    validate(path, out)
 
 
 class TestMultipleContigs:
@@ -314,7 +346,6 @@ def multi_contig_bed_path(self, tmp_path_factory):
         # - 1 variant on chromosome 2
         # - 2 variants on chromosome X
         # - 1 variant on chromosome Y
-        # values are counts of allele 1, will be flipped by the converter
         dosages = np.array(
             [
                 [0, 1, 2, 0],  # chr1
@@ -344,7 +375,7 @@ def multi_contig_bed_path(self, tmp_path_factory):
                 # Format: fam_id, ind_id, pat_id, mat_id, sex, phenotype
                 f.write(f"fam{i} ind{i} 0 0 0 -9\n")
 
-        return path
+        return path.with_suffix("")
 
     @pytest.fixture(scope="class")
     def ds(self, tmp_path_factory, multi_contig_bed_path):
@@ -356,6 +387,19 @@ def ds(self, tmp_path_factory, multi_contig_bed_path):
     def test_contig_ids(self, ds):
         nt.assert_array_equal(ds.contig_id, ["1", "2", "X", "Y"])
 
+    def test_variant_allele(self, ds):
+        nt.assert_array_equal(
+            ds.variant_allele,
+            [
+                ["G", "A"],
+                ["C", "T"],
+                ["G", "A"],
+                ["T", "C"],
+                ["A", "G"],
+                ["C", "T"],
+            ],
+        )
+
     def test_variant_contig(self, ds):
         nt.assert_array_equal(
             ds.variant_contig, [0, 0, 1, 2, 2, 3]
@@ -367,12 +411,12 @@ def test_genotypes(self, ds):
         nt.assert_array_equal(
             call_genotype,
             [
-                [[1, 1], [1, 0], [0, 0], [1, 1]],  # chr1
-                [[1, 0], [1, 1], [0, 0], [1, 0]],  # chr1
-                [[1, 1], [1, 1], [1, 1], [0, 0]],  # chr2
-                [[0, 0], [1, 1], [1, 0], [1, 1]],  # chrX
-                [[1, 0], [1, 0], [1, 0], [1, 0]],  # chrX
-                [[1, 1], [0, 0], [1, 1], [0, 0]],  # chrY
+                [[0, 0], [0, 1], [1, 1], [0, 0]],  # chr1
+                [[0, 1], [0, 0], [1, 1], [0, 1]],  # chr1
+                [[0, 0], [0, 0], [0, 0], [1, 1]],  # chr2
+                [[1, 1], [0, 0], [0, 1], [0, 0]],  # chrX
+                [[0, 1], [0, 1], [0, 1], [0, 1]],  # chrX
+                [[0, 0], [1, 1], [0, 0], [1, 1]],  # chrY
             ],
         )
 
@@ -384,3 +428,26 @@ def test_variant_length(self, ds):
             ds.variant_length,
             [1, 1, 1, 1, 1, 1],
         )
+
+
+@pytest.mark.parametrize(
+    "prefix",
+    [
+        "tests/data/plink/example",
+        "tests/data/plink/example_with_fam",
+        "tests/data/plink/plink_sim_10s_100v_10pmiss",
+    ],
+)
+def test_against_plinks_vcf_output(prefix, tmp_path):
+    vcf_path = prefix + ".vcf"
+    plink_zarr = tmp_path / "plink.zarr"
+    vcf_zarr = tmp_path / "vcf.zarr"
+    plink.convert(prefix, plink_zarr)
+    vcf.convert([vcf_path], vcf_zarr)
+    ds1 = sg.load_dataset(plink_zarr)
+    ds2 = (
+        sg.load_dataset(vcf_zarr)
+        .drop_dims("filters")
+        .drop_vars(["variant_quality", "variant_PR", "contig_length"])
+    )
+    xt.assert_equal(ds1, ds2)