diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index ad81339d..b413f323 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -110,6 +110,18 @@ def fine_tuning(ctx: click.Context) -> None:
     default="all-linear",
     help="Trainable modules for LoRA adapters. For example, 'all-linear', 'q_proj,v_proj'",
 )
+@click.option(
+    "--training-method",
+    type=click.Choice(["sft", "dpo"]),
+    default="sft",
+    help="Training method to use. Options: sft (supervised fine-tuning), dpo (Direct Preference Optimization)",
+)
+@click.option(
+    "--dpo-beta",
+    type=float,
+    default=0.1,
+    help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
+)
 @click.option(
     "--suffix", type=str, default=None, help="Suffix for the fine-tuned model name"
 )
@@ -166,6 +178,8 @@ def create(
     wandb_name: str,
     confirm: bool,
     train_on_inputs: bool | Literal["auto"],
+    training_method: str,
+    dpo_beta: float,
     from_checkpoint: str,
 ) -> None:
     """Start fine-tuning"""
@@ -195,6 +209,8 @@ def create(
         wandb_project_name=wandb_project_name,
         wandb_name=wandb_name,
         train_on_inputs=train_on_inputs,
+        training_method=training_method,
+        dpo_beta=dpo_beta,
         from_checkpoint=from_checkpoint,
     )
 
diff --git a/src/together/constants.py b/src/together/constants.py
index c64af326..99e27a4a 100644
--- a/src/together/constants.py
+++ b/src/together/constants.py
@@ -39,12 +39,18 @@ class DatasetFormat(enum.Enum):
     GENERAL = "general"
     CONVERSATION = "conversation"
     INSTRUCTION = "instruction"
+    PREFERENCE_OPENAI = "preference_openai"
 
 
 JSONL_REQUIRED_COLUMNS_MAP = {
     DatasetFormat.GENERAL: ["text"],
     DatasetFormat.CONVERSATION: ["messages"],
     DatasetFormat.INSTRUCTION: ["prompt", "completion"],
+    DatasetFormat.PREFERENCE_OPENAI: [
+        "input",
+        "preferred_output",
+        "non_preferred_output",
+    ],
 }
 REQUIRED_COLUMNS_MESSAGE = ["role", "content"]
 POSSIBLE_ROLES_CONVERSATION = ["system", "user", "assistant"]
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 11d445db..8cc48a17 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -23,6 +23,8 @@
     TrainingType,
     FinetuneLRScheduler,
     FinetuneLinearLRSchedulerArgs,
+    TrainingMethodDPO,
+    TrainingMethodSFT,
     FinetuneCheckpoint,
 )
 from together.types.finetune import (
@@ -39,6 +41,12 @@
 _FT_JOB_WITH_STEP_REGEX = r"^ft-[\dabcdef-]+:\d+$"
 
 
+AVAILABLE_TRAINING_METHODS = {
+    TrainingMethodSFT().method,
+    TrainingMethodDPO().method,
+}
+
+
 def createFinetuneRequest(
     model_limits: FinetuneTrainingLimits,
     training_file: str,
@@ -64,8 +72,11 @@ def createFinetuneRequest(
     wandb_project_name: str | None = None,
     wandb_name: str | None = None,
     train_on_inputs: bool | Literal["auto"] = "auto",
+    training_method: str = "sft",
+    dpo_beta: float | None = None,
     from_checkpoint: str | None = None,
 ) -> FinetuneRequest:
+
     if batch_size == "max":
         log_warn_once(
             "Starting from together>=1.3.0, "
@@ -113,11 +124,20 @@ def createFinetuneRequest(
     if weight_decay is not None and (weight_decay < 0):
         raise ValueError("Weight decay should be non-negative")
 
+    if training_method not in AVAILABLE_TRAINING_METHODS:
+        raise ValueError(
+            f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
+        )
+
     lrScheduler = FinetuneLRScheduler(
         lr_scheduler_type="linear",
         lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
     )
 
+    training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
+    if training_method == "dpo":
+        training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta)
+
     finetune_request = FinetuneRequest(
         model=model,
         training_file=training_file,
@@ -138,6 +158,7 @@ def createFinetuneRequest(
         wandb_project_name=wandb_project_name,
         wandb_name=wandb_name,
         train_on_inputs=train_on_inputs,
+        training_method=training_method_cls,
         from_checkpoint=from_checkpoint,
     )
 
@@ -240,6 +261,8 @@ def create(
         verbose: bool = False,
         model_limits: FinetuneTrainingLimits | None = None,
         train_on_inputs: bool | Literal["auto"] = "auto",
+        training_method: str = "sft",
+        dpo_beta: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -286,6 +309,9 @@ def create(
                 For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields
                 (Instruction format), inputs will be masked.
                 Defaults to "auto".
+            training_method (str, optional): Training method. Defaults to "sft".
+                Supported methods: "sft", "dpo".
+            dpo_beta (float, optional): DPO beta parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -300,7 +326,6 @@ def create(
 
         if model_limits is None:
             model_limits = self.get_model_limits(model=model)
-
         finetune_request = createFinetuneRequest(
             model_limits=model_limits,
             training_file=training_file,
@@ -326,6 +351,8 @@ def create(
             wandb_project_name=wandb_project_name,
             wandb_name=wandb_name,
             train_on_inputs=train_on_inputs,
+            training_method=training_method,
+            dpo_beta=dpo_beta,
             from_checkpoint=from_checkpoint,
         )
 
@@ -344,7 +371,6 @@ def create(
             ),
             stream=False,
         )
-
         assert isinstance(response, TogetherResponse)
 
         return FinetuneResponse(**response.data)
@@ -608,6 +634,8 @@ async def create(
         verbose: bool = False,
         model_limits: FinetuneTrainingLimits | None = None,
         train_on_inputs: bool | Literal["auto"] = "auto",
+        training_method: str = "sft",
+        dpo_beta: float | None = None,
         from_checkpoint: str | None = None,
     ) -> FinetuneResponse:
         """
@@ -654,6 +682,9 @@ async def create(
                 For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields
                 (Instruction format), inputs will be masked.
                 Defaults to "auto".
+            training_method (str, optional): Training method. Defaults to "sft".
+                Supported methods: "sft", "dpo".
+            dpo_beta (float, optional): DPO beta parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
@@ -694,6 +725,8 @@ async def create(
             wandb_project_name=wandb_project_name,
             wandb_name=wandb_name,
             train_on_inputs=train_on_inputs,
+            training_method=training_method,
+            dpo_beta=dpo_beta,
             from_checkpoint=from_checkpoint,
         )
 
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 1a7419a5..47fed22b 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -31,6 +31,8 @@
     FileType,
 )
 from together.types.finetune import (
+    TrainingMethodDPO,
+    TrainingMethodSFT,
     FinetuneCheckpoint,
     FinetuneDownloadResult,
     FinetuneLinearLRSchedulerArgs,
@@ -81,6 +83,8 @@
     "TrainingType",
     "FullTrainingType",
     "LoRATrainingType",
+    "TrainingMethodDPO",
+    "TrainingMethodSFT",
     "RerankRequest",
     "RerankResponse",
     "FinetuneTrainingLimits",
diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
index e3811292..c41cbce2 100644
--- a/src/together/types/finetune.py
+++ b/src/together/types/finetune.py
@@ -135,6 +135,31 @@ class LoRATrainingType(TrainingType):
     type: str = "Lora"
 
 
+class TrainingMethod(BaseModel):
+    """
+    Training method type
+    """
+
+    method: str
+
+
+class TrainingMethodSFT(TrainingMethod):
+    """
+    Training method type for SFT training
+    """
+
+    method: Literal["sft"] = "sft"
+
+
+class TrainingMethodDPO(TrainingMethod):
+    """
+    Training method type for DPO training
+    """
+
+    method: Literal["dpo"] = "dpo"
+    dpo_beta: float | None = None
+
+
 class FinetuneRequest(BaseModel):
     """
     Fine-tune request type
@@ -178,6 +203,10 @@ class FinetuneRequest(BaseModel):
     training_type: FullTrainingType | LoRATrainingType | None = None
     # train on inputs
     train_on_inputs: StrictBool | Literal["auto"] = "auto"
+    # training method
+    training_method: TrainingMethodSFT | TrainingMethodDPO = Field(
+        default_factory=TrainingMethodSFT
+    )
     # from step
     from_checkpoint: str
 
diff --git a/src/together/utils/files.py b/src/together/utils/files.py
index cc39fca0..e1e1d4ed 100644
--- a/src/together/utils/files.py
+++ b/src/together/utils/files.py
@@ -4,7 +4,7 @@
 import os
 from pathlib import Path
 from traceback import format_exc
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 from pyarrow import ArrowInvalid, parquet
 
@@ -96,6 +96,140 @@ def check_file(
     return report_dict
 
 
+def validate_messages(messages: List[Dict[str, str | bool]], idx: int) -> None:
+    """Validate the messages column."""
+    if not isinstance(messages, list):
+        raise InvalidFileFormatError(
+            message=f"Invalid format on line {idx + 1} of the input file. "
+            f"Expected a list of messages. Found {type(messages)}",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
+    if not messages:
+        raise InvalidFileFormatError(
+            message=f"Invalid format on line {idx + 1} of the input file. "
+            f"Expected a non-empty list of messages. Found empty list",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
+
+    has_weights = any("weight" in message for message in messages)
+
+    previous_role = None
+    for message in messages:
+        if not isinstance(message, dict):
+            raise InvalidFileFormatError(
+                message=f"Invalid format on line {idx + 1} of the input file. "
+                f"Expected a dictionary in the messages list. Found {type(message)}",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        for column in REQUIRED_COLUMNS_MESSAGE:
+            if column not in message:
+                raise InvalidFileFormatError(
+                    message=f"Field `{column}` is missing for a turn `{message}` on line {idx + 1} "
+                    "of the the input file.",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+            else:
+                if not isinstance(message[column], str):
+                    raise InvalidFileFormatError(
+                        message=f"Invalid format on line {idx + 1} in the column {column} for turn `{message}` "
+                        f"of the input file. Expected string. Found {type(message[column])}",
+                        line_number=idx + 1,
+                        error_source="text_field",
+                    )
+
+        if has_weights and "weight" in message:
+            weight = message["weight"]
+            if not isinstance(weight, int):
+                raise InvalidFileFormatError(
+                    message="Weight must be an integer",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+            if weight not in {0, 1}:
+                raise InvalidFileFormatError(
+                    message="Weight must be either 0 or 1",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+        if message["role"] not in POSSIBLE_ROLES_CONVERSATION:
+            raise InvalidFileFormatError(
+                message=f"Found invalid role `{message['role']}` in the messages on the line {idx + 1}. "
+                f"Possible roles in the conversation are: {POSSIBLE_ROLES_CONVERSATION}",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+
+        if previous_role == message["role"]:
+            raise InvalidFileFormatError(
+                message=f"Invalid role turns on line {idx + 1} of the input file. "
+                "`user` and `assistant` roles must alternate user/assistant/user/assistant/...",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        previous_role = message["role"]
+
+
+def validate_preference_openai(example: Dict[str, Any], idx: int = 0) -> None:
+    """Validate the OpenAI preference dataset format.
+
+    Args:
+        example (dict): Input entry to be checked.
+        idx (int): Line number in the file.
+
+    Raises:
+        InvalidFileFormatError: If the dataset format is invalid.
+    """
+    if not isinstance(example["input"], dict):
+        raise InvalidFileFormatError(
+            message="The dataset is malformed, the `input` field must be a dictionary.",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
+
+    if "messages" not in example["input"]:
+        raise InvalidFileFormatError(
+            message="The dataset is malformed, the `input` dictionary must contain a `messages` field.",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
+
+    validate_messages(example["input"]["messages"], idx)
+
+    for output_field in ["preferred_output", "non_preferred_output"]:
+        if not isinstance(example[output_field], list):
+            raise InvalidFileFormatError(
+                message=f"The dataset is malformed, the `{output_field}` field must be a list.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+
+        if len(example[output_field]) != 1:
+            raise InvalidFileFormatError(
+                message=f"The dataset is malformed, the `{output_field}` list must contain exactly one message.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        if "role" not in example[output_field][0]:
+            raise InvalidFileFormatError(
+                message=f"The dataset is malformed, the `{output_field}` message is missing the `role` field.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        elif example[output_field][0]["role"] != "assistant":
+            raise InvalidFileFormatError(
+                message=f"The dataset is malformed, the `{output_field}` must contain an assistant message.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+
+    validate_messages(example["preferred_output"], idx)
+    validate_messages(example["non_preferred_output"], idx)
+
+
 def _check_jsonl(file: Path) -> Dict[str, Any]:
     report_dict: Dict[str, Any] = {}
     # Check that the file is UTF-8 encoded. If not report where the error occurs.
@@ -164,74 +298,13 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
                         line_number=idx + 1,
                         error_source="format",
                     )
-
-                if current_format == DatasetFormat.CONVERSATION:
+                if current_format == DatasetFormat.PREFERENCE_OPENAI:
+                    validate_preference_openai(json_line, idx)
+                elif current_format == DatasetFormat.CONVERSATION:
                     message_column = JSONL_REQUIRED_COLUMNS_MAP[
                         DatasetFormat.CONVERSATION
                     ][0]
-                    if not isinstance(json_line[message_column], list):
-                        raise InvalidFileFormatError(
-                            message=f"Invalid format on line {idx + 1} of the input file. "
-                            f"Expected a list of messages. Found {type(json_line[message_column])}",
-                            line_number=idx + 1,
-                            error_source="key_value",
-                        )
-
-                    if len(json_line[message_column]) == 0:
-                        raise InvalidFileFormatError(
-                            message=f"Invalid format on line {idx + 1} of the input file. "
-                            f"Expected a non-empty list of messages. Found empty list",
-                            line_number=idx + 1,
-                            error_source="key_value",
-                        )
-
-                    for turn_id, turn in enumerate(json_line[message_column]):
-                        if not isinstance(turn, dict):
-                            raise InvalidFileFormatError(
-                                message=f"Invalid format on line {idx + 1} of the input file. "
-                                f"Expected a dictionary in the {turn_id + 1} turn. Found {type(turn)}",
-                                line_number=idx + 1,
-                                error_source="key_value",
-                            )
-
-                    previous_role = None
-                    for turn in json_line[message_column]:
-                        for column in REQUIRED_COLUMNS_MESSAGE:
-                            if column not in turn:
-                                raise InvalidFileFormatError(
-                                    message=f"Field `{column}` is missing for a turn `{turn}` on line {idx + 1} "
-                                    "of the the input file.",
-                                    line_number=idx + 1,
-                                    error_source="key_value",
-                                )
-                            else:
-                                if not isinstance(turn[column], str):
-                                    raise InvalidFileFormatError(
-                                        message=f"Invalid format on line {idx + 1} in the column {column} for turn `{turn}` "
-                                        f"of the input file. Expected string. Found {type(turn[column])}",
-                                        line_number=idx + 1,
-                                        error_source="text_field",
-                                    )
-                        role = turn["role"]
-
-                        if role not in POSSIBLE_ROLES_CONVERSATION:
-                            raise InvalidFileFormatError(
-                                message=f"Found invalid role `{role}` in the messages on the line {idx + 1}. "
-                                f"Possible roles in the conversation are: {POSSIBLE_ROLES_CONVERSATION}",
-                                line_number=idx + 1,
-                                error_source="key_value",
-                            )
-
-                        if previous_role == role:
-                            raise InvalidFileFormatError(
-                                message=f"Invalid role turns on line {idx + 1} of the input file. "
-                                "`user` and `assistant` roles must alternate user/assistant/user/assistant/...",
-                                line_number=idx + 1,
-                                error_source="key_value",
-                            )
-
-                        previous_role = role
-
+                    validate_messages(json_line[message_column], idx)
                 else:
                     for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
                         if not isinstance(json_line[column], str):
diff --git a/tests/unit/test_preference_openai.py b/tests/unit/test_preference_openai.py
new file mode 100644
index 00000000..3781c830
--- /dev/null
+++ b/tests/unit/test_preference_openai.py
@@ -0,0 +1,312 @@
+import json
+import pytest
+from pathlib import Path
+
+from together.constants import MIN_SAMPLES
+from together.utils.files import check_file
+
+
+_TEST_PREFERENCE_OPENAI_CONTENT = [
+    {
+        "input": {
+            "messages": [
+                {"role": "user", "content": "Hi there, I have a question."},
+                {"role": "assistant", "content": "Hello, how is your day going?"},
+                {
+                    "role": "user",
+                    "content": "Hello, can you tell me how cold San Francisco is today?",
+                },
+            ],
+        },
+        "preferred_output": [
+            {
+                "role": "assistant",
+                "content": "Today in San Francisco, it is not quite cold as expected. Morning clouds will give away "
+                "to sunshine, with a high near 68°F (20°C) and a low around 57°F (14°C).",
+            }
+        ],
+        "non_preferred_output": [
+            {
+                "role": "assistant",
+                "content": "It is not particularly cold in San Francisco today.",
+            }
+        ],
+    },
+    {
+        "input": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What's the best way to learn programming?",
+                },
+            ],
+        },
+        "preferred_output": [
+            {
+                "role": "assistant",
+                "content": "The best way to learn programming is through consistent practice, working on real projects, "
+                "and breaking down complex problems into smaller parts. Start with a beginner-friendly language like Python.",
+            }
+        ],
+        "non_preferred_output": [
+            {"role": "assistant", "content": "Just read some books and you'll be fine."}
+        ],
+    },
+]
+
+
+def test_check_jsonl_valid_preference_openai(tmp_path: Path):
+    """Test valid preference OpenAI format."""
+    file = tmp_path / "valid_preference_openai.jsonl"
+    content = _TEST_PREFERENCE_OPENAI_CONTENT
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert report["is_check_passed"]
+    assert report["utf8"]
+    assert report["num_samples"] == len(content)
+    assert report["has_min_samples"]
+
+
+MISSING_FIELDS_TEST_CASES = [
+    pytest.param("input", "Missing input field", id="missing_input"),
+    pytest.param(
+        "preferred_output",
+        "Missing preferred_output field",
+        id="missing_preferred_output",
+    ),
+    pytest.param(
+        "non_preferred_output",
+        "Missing non_preferred_output field",
+        id="missing_non_preferred_output",
+    ),
+]
+
+
+@pytest.mark.parametrize("field_to_remove, description", MISSING_FIELDS_TEST_CASES)
+def test_check_jsonl_invalid_preference_openai_missing_fields(
+    tmp_path: Path, field_to_remove, description
+):
+    """Test missing required fields in OpenAI preference format."""
+    file = tmp_path / f"invalid_preference_openai_missing_{field_to_remove}.jsonl"
+    content = [item.copy() for item in _TEST_PREFERENCE_OPENAI_CONTENT]
+
+    # Remove the specified field from the first item
+    del content[0][field_to_remove]
+
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"], f"Test should fail when {description}"
+
+
+STRUCTURAL_ISSUE_TEST_CASES = [
+    pytest.param(
+        "empty_messages",
+        lambda item: item.update({"input": {"messages": []}}),
+        "Empty messages array",
+        id="empty_messages",
+    ),
+    pytest.param(
+        "missing_role_preferred",
+        lambda item: item.update(
+            {"preferred_output": [{"content": "Missing role field"}]}
+        ),
+        "Missing role in preferred_output",
+        id="missing_role_preferred",
+    ),
+    pytest.param(
+        "missing_role_non_preferred",
+        lambda item: item.update(
+            {"non_preferred_output": [{"content": "Missing role field"}]}
+        ),
+        "Missing role in non_preferred_output",
+        id="missing_role_non_preferred",
+    ),
+    pytest.param(
+        "missing_content_preferred",
+        lambda item: item.update({"preferred_output": [{"role": "assistant"}]}),
+        "Missing content in preferred_output",
+        id="missing_content_preferred",
+    ),
+    pytest.param(
+        "missing_content_non_preferred",
+        lambda item: item.update({"non_preferred_output": [{"role": "assistant"}]}),
+        "Missing content in non_preferred_output",
+        id="missing_content_non_preferred",
+    ),
+    pytest.param(
+        "wrong_output_format_preferred",
+        lambda item: item.update({"preferred_output": "Not an array but a string"}),
+        "Wrong format for preferred_output",
+        id="wrong_output_format_preferred",
+    ),
+    pytest.param(
+        "wrong_output_format_non_preferred",
+        lambda item: item.update({"non_preferred_output": "Not an array but a string"}),
+        "Wrong format for non_preferred_output",
+        id="wrong_output_format_non_preferred",
+    ),
+    pytest.param(
+        "missing_content",
+        lambda item: item.update({"input": {"messages": [{"role": "user"}]}}),
+        "Missing content in messages",
+        id="missing_content",
+    ),
+    pytest.param(
+        "multiple_preferred_outputs",
+        lambda item: item.update(
+            {
+                "preferred_output": [
+                    {"role": "assistant", "content": "First response"},
+                    {"role": "assistant", "content": "Second response"},
+                ]
+            }
+        ),
+        "Multiple messages in preferred_output",
+        id="multiple_preferred_outputs",
+    ),
+    pytest.param(
+        "multiple_non_preferred_outputs",
+        lambda item: item.update(
+            {
+                "non_preferred_output": [
+                    {"role": "assistant", "content": "First response"},
+                    {"role": "assistant", "content": "Second response"},
+                ]
+            }
+        ),
+        "Multiple messages in non_preferred_output",
+        id="multiple_non_preferred_outputs",
+    ),
+    pytest.param(
+        "empty_preferred_output",
+        lambda item: item.update({"preferred_output": []}),
+        "Empty preferred_output array",
+        id="empty_preferred_output",
+    ),
+    pytest.param(
+        "empty_non_preferred_output",
+        lambda item: item.update({"non_preferred_output": []}),
+        "Empty non_preferred_output array",
+        id="empty_non_preferred_output",
+    ),
+    pytest.param(
+        "non_string_content_in_messages",
+        lambda item: item.update(
+            {"input": {"messages": [{"role": "user", "content": 123}]}}
+        ),
+        "Non-string content in messages",
+        id="non_string_content_in_messages",
+    ),
+    pytest.param(
+        "invalid_role_in_messages",
+        lambda item: item.update(
+            {"input": {"messages": [{"role": "invalid_role", "content": "Hello"}]}}
+        ),
+        "Invalid role in messages",
+        id="invalid_role_in_messages",
+    ),
+    pytest.param(
+        "non_alternating_roles",
+        lambda item: item.update(
+            {
+                "input": {
+                    "messages": [
+                        {"role": "user", "content": "Hello"},
+                        {"role": "user", "content": "How are you?"},
+                    ]
+                }
+            }
+        ),
+        "Non-alternating roles in messages",
+        id="non_alternating_roles",
+    ),
+    pytest.param(
+        "invalid_weight_type",
+        lambda item: item.update(
+            {
+                "input": {
+                    "messages": [
+                        {"role": "user", "content": "Hello", "weight": "not_an_integer"}
+                    ]
+                }
+            }
+        ),
+        "Invalid weight type",
+        id="invalid_weight_type",
+    ),
+    pytest.param(
+        "invalid_weight_value",
+        lambda item: item.update(
+            {"input": {"messages": [{"role": "user", "content": "Hello", "weight": 2}]}}
+        ),
+        "Invalid weight value",
+        id="invalid_weight_value",
+    ),
+    pytest.param(
+        "non_dict_message",
+        lambda item: item.update({"input": {"messages": ["Not a dictionary"]}}),
+        "Non-dictionary message",
+        id="non_dict_message",
+    ),
+    pytest.param(
+        "non_dict_input",
+        lambda item: item.update({"input": "Not a dictionary"}),
+        "Non-dictionary input",
+        id="non_dict_input",
+    ),
+    pytest.param(
+        "missing_messages_in_input",
+        lambda item: item.update({"input": {}}),
+        "Missing messages in input",
+        id="missing_messages_in_input",
+    ),
+    pytest.param(
+        "non_assistant_role_in_preferred",
+        lambda item: item.update(
+            {
+                "preferred_output": [
+                    {"role": "user", "content": "This should be assistant"}
+                ]
+            }
+        ),
+        "Non-assistant role in preferred output",
+        id="non_assistant_role_in_preferred",
+    ),
+    pytest.param(
+        "non_assistant_role_in_non_preferred",
+        lambda item: item.update(
+            {
+                "non_preferred_output": [
+                    {"role": "user", "content": "This should be assistant"}
+                ]
+            }
+        ),
+        "Non-assistant role in non-preferred output",
+        id="non_assistant_role_in_non_preferred",
+    ),
+]
+
+
+@pytest.mark.parametrize("name, modifier, description", STRUCTURAL_ISSUE_TEST_CASES)
+def test_check_jsonl_invalid_preference_openai_structural_issues(
+    tmp_path: Path, name, modifier, description
+):
+    """Test various structural issues in OpenAI preference format."""
+    file = tmp_path / f"invalid_preference_openai_{name}.jsonl"
+    content = [item.copy() for item in _TEST_PREFERENCE_OPENAI_CONTENT]
+
+    # Apply the modification to the first item
+    modifier(content[0])
+
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"], f"Test should fail with {description}"