diff --git a/IRT/README.md b/IRT/README.md
new file mode 100644
index 0000000..f05b9b1
--- /dev/null
+++ b/IRT/README.md
@@ -0,0 +1,55 @@
+# IRT Analysis for ACE Evaluation Scores
+
+This module performs Item Response Theory (IRT) analysis on ACE evaluation scores using the
+[`girth`](https://pypi.org/project/girth/) library. It currently supports 1PL (Rasch), 2PL,
+and 3PL logistic models for item parameter estimation.
+
+## Installation
+
+Install the required IRT library:
+
+```bash
+pip install girth
+```
+
+## Usage
+
+Run the IRT analysis from the project root:
+
+```bash
+cd IRT
+python main.py
+```
+
+## Configuration
+
+Edit `cfg/irt_config.yaml` to configure:
+- `data_cfg.scores_dir`: Path to the ACE scores directory containing JSON evaluation files
+- `output_cfg.output_dir`: Directory where results and plots will be written
+- `output_cfg.output_filename`: Name of the output JSON file
+- `irt_cfg.model_type`: IRT model type, one of `1PL`, `2PL`, or `3PL`
+- `irt_cfg.max_iterations`: Maximum number of MML iterations for GIRTH
+- `irt_cfg.quadrature_n`: Number of quadrature points used by GIRTH
+
+## Output
+
+The analysis produces:
+- **Results JSON** (at `output_cfg.output_dir/output_cfg.output_filename`) containing:
+  - **IRT Parameters**: Item difficulty, discrimination, and guessing parameters
+    (guessing is fixed to 0 for 1PL/2PL; discrimination is fixed to 1 for 1PL)
+  - **Statistics**: Question-level and model-level descriptive statistics
+  - **Question Info**: Metadata for each question (task, ID, input, target)
+- **Item-parameter plots**: A PNG file `irt_item_parameters_distributions.png` in
+  `output_cfg.output_dir`, showing histograms of difficulty, discrimination, and guessing
+  for the chosen PL model.
+
+Note: Person abilities are estimated internally (for some diagnostics) but are **not**
+saved to disk or printed in the console output.
+
+## IRT Parameters Explained
+
+- **Difficulty (b)**: How difficult the question is (higher = more difficult).
+- **Discrimination (a)**: How well the question distinguishes between high- and low-ability models.
+- **Guessing (c)**: Lower asymptote (probability of a correct answer by guessing). This is 0
+  in 1PL and 2PL models, and estimated in 3PL models.
+
diff --git a/IRT/__init__.py b/IRT/__init__.py
new file mode 100644
index 0000000..19a3120
--- /dev/null
+++ b/IRT/__init__.py
@@ -0,0 +1,3 @@
+"""IRT analysis module for ACE evaluation scores."""
+
+
diff --git a/IRT/cfg/irt_config.yaml b/IRT/cfg/irt_config.yaml
new file mode 100644
index 0000000..53def27
--- /dev/null
+++ b/IRT/cfg/irt_config.yaml
@@ -0,0 +1,26 @@
+# Configuration for IRT analysis of ACE evaluation scores
+
+data_cfg:
+  # Path to the scores directory containing evaluation JSON files
+  scores_dir: /projects/aieng/public/ace/artifacts/negin_ace/scores
+
+output_cfg:
+  # Directory to save IRT analysis results
+  output_dir: /projects/DeepLesion/projects/automated_capability_evaluation/IRT/results
+  # Name of the output JSON file
+  output_filename: irt_4pl_results.json
+
+irt_cfg:
+  # IRT model type (3PL, 2PL, 1PL)
+  model_type: 3PL
+  # Maximum iterations for model fitting
+  max_iterations: 1000
+  # Convergence tolerance
+  tolerance: 1e-6
+  # Number of quadrature points used in GIRTH for numerical integration
+  quadrature_n: 41
+
+defaults:
+  - _self_
+
+
diff --git a/IRT/irt_analysis.py b/IRT/irt_analysis.py
new file mode 100644
index 0000000..299b8f1
--- /dev/null
+++ b/IRT/irt_analysis.py
@@ -0,0 +1,202 @@
+"""IRT analysis using 3PL model via girth library.
+
+This script uses the 'girth' library for 3PL IRT parameter estimation.
+"""
+
+import numpy as np
+import logging
+from typing import List, Dict, Any
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+try:
+    import girth
+    from girth import rasch_mml, twopl_mml, threepl_mml, ability_3pl_eap
+    GIRTH_AVAILABLE = True
+except ImportError:
+    GIRTH_AVAILABLE = False
+    logger.warning("girth not available. Please install via: pip install girth")
+
+
+def fit_3pl_irt(response_matrix: List[List[int]], 
+                question_ids: List[str],
+                model_names: List[str],
+                max_iterations: int = 2000,
+                quadrature_n: int = 41,
+                model_type: str = "3PL") -> Dict[str, Any]:
+    """
+    Fit 1PL, 2PL, or 3PL IRT model using the 'girth' library.
+    
+    For 1PL and 2PL, the corresponding girth MML routines are used directly.
+    For 3PL, the three-parameter logistic model is fit with the upper asymptote
+    fixed at 1.0 (standard 3PL specification).
+    
+    Args:
+        response_matrix: 2D list where Rows=Questions, Columns=Models (Subjects)
+        question_ids: List of question IDs
+        model_names: List of model names (subjects)
+        
+    Returns:
+        Dictionary containing standardized IRT parameters.
+    """
+    if not GIRTH_AVAILABLE:
+        raise ImportError("The 'girth' library is required. Install it with: pip install girth")
+
+    model_type = (model_type or "3PL").upper()
+    if model_type not in {"1PL", "2PL", "3PL"}:
+        raise ValueError(
+            f"Unsupported IRT model_type '{model_type}'. "
+            "Supported values are '1PL', '2PL', and '3PL'."
+        )
+
+    data = np.array(response_matrix, dtype=int)
+    
+    n_items, n_persons = data.shape
+    logger.info(f"Fitting 3PL model via GIRTH on {n_items} items and {n_persons} models...")
+    print(f"  -> Response matrix dimensions: {data.shape} (rows=questions, cols=models)")
+    print(f"  -> Number of questions (items): {n_items}")
+    print(f"  -> Number of models (persons): {n_persons}")
+    print(f"  -> Fitting 3PL IRT model via girth on {n_items} items and {n_persons} models...")
+    
+    try:
+        print(f"  -> Estimating item parameters using Marginal Maximum Likelihood (MML)...")
+
+        if model_type == "1PL":
+            item_results = rasch_mml(
+                data,
+                options={
+                    'max_iteration': int(max_iterations),
+                    'quadrature_n': int(quadrature_n),
+                },
+            )
+            difficulty = item_results['Difficulty']
+            discrimination = np.ones_like(difficulty, dtype=float)
+            guessing = np.zeros_like(difficulty, dtype=float)
+
+        elif model_type == "2PL":
+            item_results = twopl_mml(
+                data,
+                options={
+                    'max_iteration': int(max_iterations),
+                    'quadrature_n': int(quadrature_n),
+                },
+            )
+            discrimination = item_results['Discrimination']
+            difficulty = item_results['Difficulty']
+            guessing = np.zeros_like(difficulty, dtype=float)
+
+        else:  # "3PL"
+            item_results = threepl_mml(
+                data,
+                options={
+                    'max_iteration': int(max_iterations),
+                    'quadrature_n': int(quadrature_n),
+                },
+            )
+            discrimination = item_results['Discrimination']
+            difficulty = item_results['Difficulty']
+            guessing = item_results.get('Guessing')
+            if guessing is None:
+                guessing = np.zeros_like(difficulty, dtype=float)
+        
+        logger.info("Item parameters estimated successfully.")
+        print(f"  -> Item parameters estimated successfully")
+        print(f"  -> Estimated parameters for {len(discrimination)} items")
+        
+        print(f"  -> Discrimination range: [{np.min(discrimination):.3f}, {np.max(discrimination):.3f}], mean: {np.mean(discrimination):.3f}")
+        print(f"  -> Difficulty range: [{np.min(difficulty):.3f}, {np.max(difficulty):.3f}], mean: {np.mean(difficulty):.3f}")
+        print(f"  -> Guessing range: [{np.min(guessing):.3f}, {np.max(guessing):.3f}], mean: {np.mean(guessing):.3f}")
+        
+        if np.allclose(discrimination, 1.0, atol=0.01):
+            print(f"  -> WARNING: All discrimination values are ~1.0. This may indicate convergence issues.")
+        if np.allclose(guessing, 0.0, atol=0.01):
+            print(f"  -> WARNING: All guessing values are ~0.0. This may indicate convergence issues.")
+        
+        # Estimate person abilities (theta) but do not log or return them
+        ability_3pl_eap(data, difficulty, discrimination, guessing)
+        
+    except Exception as e:
+        logger.error(f"GIRTH estimation failed: {e}")
+        print(f"  -> ERROR: GIRTH estimation failed: {e}")
+        raise RuntimeError(f"GIRTH estimation failed. Ensure data is not empty or all zeros. Error: {e}") from e
+
+    print(f"  -> Formatting results...")
+    if model_type == "3PL":
+        note = '3PL model: upper asymptote is fixed at 1.0 (not estimated)'
+    elif model_type == "2PL":
+        note = (
+            '2PL-style parameters derived from a 3PL fit with guessing fixed to 0; '
+            'upper asymptote is fixed at 1.0.'
+        )
+    else:  # "1PL"
+        note = (
+            '1PL (Rasch)-style parameters derived from a 3PL fit with discrimination '
+            'fixed to 1 and guessing fixed to 0; upper asymptote is fixed at 1.0.'
+        )
+
+    results = {
+        'item_parameters': {},
+        'model_info': {
+            'n_items': n_items,
+            'n_persons': n_persons,
+            'model_type': f'{model_type}',
+            'method': 'MML (Marginal Maximum Likelihood)',
+            'note': note,
+        },
+    }
+
+    print(f"  -> Mapping item parameters for {len(question_ids)} questions...")
+    for idx, q_id in enumerate(question_ids):
+        if idx < len(discrimination):
+            results['item_parameters'][q_id] = {
+                'discrimination': float(discrimination[idx]),
+                'difficulty': float(difficulty[idx]),
+                'guessing': float(guessing[idx])
+            }
+    print(f"  -> Mapped parameters for {len(results['item_parameters'])} items")
+
+    print(f"  -> 3PL IRT analysis completed successfully")
+    logger.info("3PL IRT analysis completed successfully.")
+    return results
+
+
+def calculate_statistics(response_matrix: np.ndarray,
+                         question_ids: List[str],
+                         model_names: List[str]) -> Dict:
+    """Calculate basic statistics for the response matrix."""
+    matrix = np.array(response_matrix)
+    
+    stats = {
+        'question_statistics': {},
+        'model_statistics': {},
+        'overall': {
+            'total_responses': int(matrix.size),
+            'correct_responses': int(np.sum(matrix)),
+            'accuracy': float(np.mean(matrix)),
+            'n_questions': len(question_ids),
+            'n_models': len(model_names)
+        }
+    }
+    
+    # Question-level statistics
+    for idx, question_id in enumerate(question_ids):
+        question_scores = matrix[idx, :]
+        stats['question_statistics'][question_id] = {
+            'mean_score': float(np.mean(question_scores)),
+            'std_score': float(np.std(question_scores)),
+            'total_correct': int(np.sum(question_scores)),
+            'total_attempts': len(question_scores)
+        }
+    
+    # Model-level statistics
+    for idx, model_name in enumerate(model_names):
+        model_scores = matrix[:, idx]
+        stats['model_statistics'][model_name] = {
+            'mean_score': float(np.mean(model_scores)),
+            'std_score': float(np.std(model_scores)),
+            'total_correct': int(np.sum(model_scores)),
+            'total_attempts': len(model_scores)
+        }
+    
+    return stats
diff --git a/IRT/load_scores.py b/IRT/load_scores.py
new file mode 100644
index 0000000..7d47465
--- /dev/null
+++ b/IRT/load_scores.py
@@ -0,0 +1,199 @@
+"""Load and process score data from ACE evaluation outputs."""
+
+import glob
+import json
+import logging
+import os
+from collections import defaultdict
+from typing import Dict, List, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+def load_score_files(scores_dir: str) -> List[Dict]:
+    """Load all JSON score files from the scores directory.
+    
+    Args:
+        scores_dir: Path to the scores directory
+        
+    Returns:
+        List of loaded JSON data dictionaries
+    """
+    if not os.path.exists(scores_dir):
+        print(f"  -> ERROR: Directory does not exist: {scores_dir}")
+        logger.error(f"Directory does not exist: {scores_dir}")
+        return []
+    
+    pattern = os.path.join(scores_dir, "**/*.json")
+    files = glob.glob(pattern, recursive=True)
+    print(f"  -> Found {len(files)} JSON files")
+    logger.info(f"Found {len(files)} score files")
+    
+    data = []
+    errors = 0
+    for i, file_path in enumerate(files):
+        try:
+            with open(file_path, 'r') as f:
+                file_data = json.load(f)
+                file_data['_file_path'] = file_path
+                data.append(file_data)
+            if (i + 1) % 50 == 0:
+                print(f"  -> Loaded {i + 1}/{len(files)} files...")
+        except Exception as e:
+            errors += 1
+            logger.warning(f"Error loading {file_path}: {e}")
+            continue
+    
+    if errors > 0:
+        print(f"  -> Warning: {errors} files failed to load")
+    print(f"  -> Successfully loaded {len(data)} files")
+    
+    return data
+
+
+def extract_question_responses(data: List[Dict]) -> Tuple[Dict[str, Dict], Dict[str, str]]:
+    """Extract question-response matrix from score data.
+    
+    Args:
+        data: List of score file data dictionaries
+        
+    Returns:
+        Tuple of (response_matrix, question_info)
+        - response_matrix: Dict mapping (model_name, question_id) -> score (0 or 1)
+        - question_info: Dict mapping question_id -> question metadata
+    """
+    response_matrix = {}
+    question_info = {}
+    files_processed = 0
+    samples_processed = 0
+    
+    print(f"  -> Processing {len(data)} score files...")
+    for file_idx, file_data in enumerate(data):
+        if 'samples' not in file_data:
+            continue
+        
+        # Extract model name from file path
+        file_path = file_data.get('_file_path', '')
+        model_name = extract_model_name(file_path)
+        
+        if not model_name:
+            continue
+        
+        # Extract capability/task name
+        eval_data = file_data.get('eval', {})
+        task_name = eval_data.get('task', 'unknown')
+        
+        if 'samples' not in file_data:
+            continue
+        
+        files_processed += 1
+        for sample in file_data['samples']:
+            samples_processed += 1
+            question_id = sample.get('id', '')
+            if not question_id:
+                continue
+            
+            # Create unique question ID: task_name + question_id
+            unique_question_id = f"{task_name}_{question_id}"
+            
+            # Extract score (C = Correct = 1, others = 0)
+            scores = sample.get('scores', {})
+            score_value = 0
+            if 'custom_scorer' in scores:
+                scorer_result = scores['custom_scorer']
+                if isinstance(scorer_result, dict):
+                    value = scorer_result.get('value', '')
+                    score_value = 1 if value == 'C' else 0
+                elif scorer_result == 'C':
+                    score_value = 1
+            
+            # Store response
+            key = (model_name, unique_question_id)
+            response_matrix[key] = score_value
+            
+            # Store question info (only once per question)
+            if unique_question_id not in question_info:
+                question_info[unique_question_id] = {
+                    'task': task_name,
+                    'question_id': question_id,
+                    'input': sample.get('input', ''),
+                    'target': sample.get('target', '')
+                }
+        
+        if (file_idx + 1) % 20 == 0:
+            print(f"  -> Processed {file_idx + 1}/{len(data)} files, {samples_processed} samples...")
+    
+    print(f"  -> Processed {files_processed} files with samples")
+    print(f"  -> Total samples processed: {samples_processed}")
+    logger.info(f"Extracted {len(question_info)} unique questions")
+    logger.info(f"Extracted {len(response_matrix)} model-question responses")
+    
+    return response_matrix, question_info
+
+
+def extract_model_name(file_path: str) -> str:
+    """Extract model name from file path.
+    
+    Args:
+        file_path: Full path to the score file
+        
+    Returns:
+        Model name or empty string
+    """
+    parts = file_path.split('/')
+    # Look for model name in path (typically after 'scores/')
+    try:
+        scores_idx = parts.index('scores')
+        if scores_idx + 1 < len(parts):
+            return parts[scores_idx + 1]
+    except ValueError:
+        pass
+    
+    return ''
+
+
+def create_response_matrix(response_data: Dict[Tuple[str, str], int]) -> Tuple[List[List[int]], List[str], List[str]]:
+    """Create a response matrix for IRT analysis.
+    
+    Args:
+        response_data: Dict mapping (model_name, question_id) -> score
+        
+    Returns:
+        Tuple of (response_matrix, model_names, question_ids)
+        - response_matrix: 2D list where rows are questions and columns are models
+        - model_names: List of model names (column order)
+        - question_ids: List of question IDs (row order)
+    """
+    # Get unique models and questions
+    models = sorted(set(model for model, _ in response_data.keys()))
+    questions = sorted(set(qid for _, qid in response_data.keys()))
+    
+    # Create matrix: rows = questions, columns = models
+    matrix = []
+    for question_id in questions:
+        row = []
+        for model_name in models:
+            key = (model_name, question_id)
+            score = response_data.get(key, 0)  # Default to 0 if missing
+            row.append(score)
+        matrix.append(row)
+    
+    logger.info(f"Created response matrix: {len(questions)} questions x {len(models)} models")
+    
+    return matrix, models, questions
+
+
+def get_model_question_counts(response_data: Dict[Tuple[str, str], int]) -> Dict[str, int]:
+    """Get count of questions per model.
+    
+    Args:
+        response_data: Dict mapping (model_name, question_id) -> score
+        
+    Returns:
+        Dict mapping model_name -> question_count
+    """
+    model_counts = defaultdict(int)
+    for model_name, _ in response_data.keys():
+        model_counts[model_name] += 1
+    return dict(model_counts)
+
diff --git a/IRT/main.py b/IRT/main.py
new file mode 100644
index 0000000..c6b258b
--- /dev/null
+++ b/IRT/main.py
@@ -0,0 +1,239 @@
+"""Main script for IRT analysis of ACE evaluation scores."""
+
+import json
+import logging
+import os
+from typing import Dict
+
+import hydra
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from omegaconf import DictConfig
+
+from IRT.load_scores import (
+    create_response_matrix,
+    extract_question_responses,
+    load_score_files,
+)
+from IRT.irt_analysis import calculate_statistics, fit_3pl_irt
+
+matplotlib.use("Agg")  # Use non-interactive backend
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@hydra.main(
+    version_base=None,
+    config_path="cfg",
+    config_name="irt_config"
+)
+def main(cfg: DictConfig) -> None:
+    """Main function for IRT analysis."""
+    print("="*80)
+    print("STARTING IRT ANALYSIS")
+    print("="*80)
+    logger.info("Starting IRT analysis...")
+    
+    # Load score files
+    scores_dir = cfg.data_cfg.scores_dir
+    print(f"\n[STEP 1] Loading score files from: {scores_dir}")
+    logger.info(f"Loading scores from: {scores_dir}")
+    
+    score_data = load_score_files(scores_dir)
+    print(f"  -> Loaded {len(score_data)} score files")
+    if not score_data:
+        print("  -> ERROR: No score data loaded. Check the scores directory path.")
+        logger.error("No score data loaded. Check the scores directory path.")
+        return
+    
+    # Extract question responses
+    print(f"\n[STEP 2] Extracting question responses from {len(score_data)} files...")
+    response_data, question_info = extract_question_responses(score_data)
+    print(f"  -> Extracted {len(question_info)} unique questions")
+    print(f"  -> Extracted {len(response_data)} model-question responses")
+    if not response_data:
+        print("  -> ERROR: No response data extracted.")
+        logger.error("No response data extracted.")
+        return
+    
+    # Create response matrix
+    print(f"\n[STEP 3] Creating response matrix...")
+    response_matrix, model_names, question_ids = create_response_matrix(response_data)
+    print(f"  -> Matrix shape: {len(question_ids)} questions x {len(model_names)} models")
+    print(f"  -> Models: {', '.join(model_names)}")
+    
+    # Calculate basic statistics
+    print(f"\n[STEP 4] Calculating basic statistics...")
+    logger.info("Calculating statistics...")
+    stats = calculate_statistics(response_matrix, question_ids, model_names)
+    print(f"  -> Overall accuracy: {stats['overall']['accuracy']:.3f}")
+    print(f"  -> Total responses: {stats['overall']['total_responses']}")
+    print(f"  -> Correct responses: {stats['overall']['correct_responses']}")
+    
+    # Fit IRT model
+    print(f"\n[STEP 5] Fitting IRT model (this may take a while)...")
+    logger.info("Fitting IRT model...")
+    try:
+        irt_results = fit_3pl_irt(
+            response_matrix,
+            question_ids,
+            model_names,
+            max_iterations=cfg.irt_cfg.max_iterations,
+            quadrature_n=cfg.irt_cfg.quadrature_n,
+            model_type=cfg.irt_cfg.model_type,
+        )
+        print(f"  -> IRT model fitting completed successfully")
+        print(f"  -> Model type: {irt_results.get('model_info', {}).get('model_type', 'Unknown')}")
+    except Exception as e:
+        print(f"  -> ERROR: Failed to fit IRT model: {e}")
+        logger.error(f"Error fitting IRT model: {e}")
+        logger.error("Make sure girth is installed: pip install girth")
+        import traceback
+        traceback.print_exc()
+        return
+    
+    # Combine results
+    results = {
+        'irt_parameters': irt_results,
+        'statistics': stats,
+        'question_info': question_info,
+        'model_names': model_names,
+        'question_ids': question_ids,
+    }
+    
+    # Save results
+    print(f"\n[STEP 6] Saving results...")
+    output_dir = cfg.output_cfg.output_dir
+    os.makedirs(output_dir, exist_ok=True)
+    print(f"  -> Output directory: {output_dir}")
+    
+    output_file = os.path.join(output_dir, cfg.output_cfg.output_filename)
+    print(f"  -> Writing to: {output_file}")
+    with open(output_file, 'w') as f:
+        json.dump(results, f, indent=2)
+    
+    print(f"  -> Results saved successfully!")
+    logger.info(f"Results saved to: {output_file}")
+    
+    # Create and save plots for item parameters
+    print(f"\n[STEP 7] Creating plots for item parameter distributions...")
+    try:
+        plot_file = os.path.join(output_dir, "irt_item_parameters_distributions.png")
+        create_item_parameter_plots(irt_results, plot_file)
+        print(f"  -> Plots saved to: {plot_file}")
+        logger.info(f"Plots saved to: {plot_file}")
+    except Exception as e:
+        print(f"  -> Warning: Failed to create plots: {e}")
+        logger.warning(f"Failed to create plots: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    # Print summary
+    print("\n" + "="*80)
+    print("IRT ANALYSIS SUMMARY")
+    print("="*80)
+    print(f"Total questions: {len(question_ids)}")
+    print(f"Total models: {len(model_names)}")
+    print(f"Overall accuracy: {stats['overall']['accuracy']:.3f}")
+    print(f"Total responses: {stats['overall']['total_responses']}")
+    print(f"Correct responses: {stats['overall']['correct_responses']}")
+    print(f"\nModel names: {', '.join(model_names)}")
+    
+    item_params = irt_results.get('item_parameters', {})
+    
+    print("\n" + "="*80)
+    print(f"Full results saved to: {output_file}")
+    plot_file = os.path.join(output_dir, "irt_item_parameters_distributions.png")
+    print(f"Item parameter plots saved to: {plot_file}")
+    print("="*80)
+
+
+def create_item_parameter_plots(irt_results: Dict, output_path: str) -> None:
+    """Create histogram plots for item parameter distributions.
+    
+    Args:
+        irt_results: Dictionary containing IRT results with item_parameters
+        output_path: Path to save the plot file
+    """
+    item_params = irt_results.get('item_parameters', {})
+    model_info = irt_results.get('model_info', {})
+    model_type = model_info.get('model_type', 'Unknown model')
+    
+    if not item_params:
+        logger.warning("No item parameters found for plotting")
+        return
+    
+    difficulties = []
+    discriminations = []
+    guessings = []
+    
+    for question_id, params in item_params.items():
+        diff = params.get("difficulty")
+        disc = params.get("discrimination")
+        guess = params.get("guessing")
+        
+        if diff is not None and not np.isnan(diff):
+            difficulties.append(diff)
+        if disc is not None and not np.isnan(disc):
+            discriminations.append(disc)
+        if guess is not None and not np.isnan(guess):
+            guessings.append(guess)
+    
+    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+    fig.suptitle(
+        f"IRT Item Parameter Distributions ({model_type})",
+        fontsize=16,
+        fontweight="bold",
+    )
+    
+    ax1 = axes[0]
+    if difficulties:
+        ax1.hist(difficulties, bins=50, edgecolor='black', alpha=0.7, color='skyblue')
+        ax1.set_xlabel('Difficulty (b)', fontsize=12)
+        ax1.set_ylabel('Frequency', fontsize=12)
+        ax1.set_title(f'Difficulty Distribution (n={len(difficulties)})', fontsize=12)
+        ax1.grid(True, alpha=0.3)
+        ax1.axvline(np.mean(difficulties), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(difficulties):.3f}')
+        ax1.legend()
+    else:
+        ax1.text(0.5, 0.5, 'No data available', ha='center', va='center', transform=ax1.transAxes)
+        ax1.set_title('Difficulty Distribution', fontsize=12)
+    
+    ax2 = axes[1]
+    if discriminations:
+        ax2.hist(discriminations, bins=50, edgecolor='black', alpha=0.7, color='lightgreen')
+        ax2.set_xlabel('Discrimination (a)', fontsize=12)
+        ax2.set_ylabel('Frequency', fontsize=12)
+        ax2.set_title(f'Discrimination Distribution (n={len(discriminations)})', fontsize=12)
+        ax2.grid(True, alpha=0.3)
+        ax2.axvline(np.mean(discriminations), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(discriminations):.3f}')
+        ax2.legend()
+    else:
+        ax2.text(0.5, 0.5, 'No data available', ha='center', va='center', transform=ax2.transAxes)
+        ax2.set_title('Discrimination Distribution', fontsize=12)
+    
+    ax3 = axes[2]
+    if guessings:
+        ax3.hist(guessings, bins=50, edgecolor='black', alpha=0.7, color='salmon')
+        ax3.set_xlabel('Guessing (c)', fontsize=12)
+        ax3.set_ylabel('Frequency', fontsize=12)
+        ax3.set_title(f'Guessing Parameter Distribution (n={len(guessings)})', fontsize=12)
+        ax3.grid(True, alpha=0.3)
+        ax3.axvline(np.mean(guessings), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(guessings):.3f}')
+        ax3.legend()
+    else:
+        ax3.text(0.5, 0.5, 'No data available', ha='center', va='center', transform=ax3.transAxes)
+        ax3.set_title('Guessing Parameter Distribution', fontsize=12)
+    
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    logger.info(f"Item parameter plots saved to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
+