diff --git a/dockerfiles/pytorch/Dockerfile.inf2 b/dockerfiles/pytorch/Dockerfile.inf2
index d84c43ce..426e17cf 100644
--- a/dockerfiles/pytorch/Dockerfile.inf2
+++ b/dockerfiles/pytorch/Dockerfile.inf2
@@ -1,34 +1,23 @@
 # Build based on https://github.com/aws/deep-learning-containers/blob/master/huggingface/pytorch/inference/docker/2.1/py3/sdk2.18.0/Dockerfile.neuronx
-FROM ubuntu:20.04 as base
+FROM ubuntu:22.04 AS base
 
 LABEL maintainer="Hugging Face"
 
-ARG PYTHON=python3.10
-ARG PYTHON_VERSION=3.10.12
-ARG MAMBA_VERSION=23.1.0-4
-
-# Neuron SDK components version numbers
-# ARG NEURONX_FRAMEWORK_VERSION=2.1.2.2.1.0
-# ARG NEURONX_DISTRIBUTED_VERSION=0.7.0
-# ARG NEURONX_CC_VERSION=2.13.66.0
-ARG NEURONX_TRANSFORMERS_VERSION=0.12.313
 ARG NEURONX_COLLECTIVES_LIB_VERSION=2.22.33.0-d2128d1aa
 ARG NEURONX_RUNTIME_LIB_VERSION=2.22.19.0-5856c0b42
 ARG NEURONX_TOOLS_VERSION=2.19.0.0
 
-
 # HF ARGS
 ARG OPTIMUM_NEURON_VERSION=0.0.28
 
 # See http://bugs.python.org/issue19846
-ENV LANG C.UTF-8
-ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
-ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
+ENV LANG=C.UTF-8
+ENV LD_LIBRARY_PATH=/opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
+ENV PATH=/opt/aws/neuron/bin:$PATH
 
 RUN apt-get update \
  && apt-get upgrade -y \
  && apt-get install -y --no-install-recommends software-properties-common \
- && add-apt-repository ppa:openjdk-r/ppa \
  && apt-get update \
  && apt-get install -y --no-install-recommends \
     build-essential \
@@ -36,25 +25,14 @@ RUN apt-get update \
     ca-certificates \
     cmake \
     curl \
-    emacs \
     git \
     jq \
-    libgl1-mesa-glx \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
-    openjdk-11-jdk \
-    vim \
     wget \
     unzip \
     zlib1g-dev \
-    libcap-dev \
-    gpg-agent \
- && rm -rf /var/lib/apt/lists/* \
- && rm -rf /tmp/tmp* \
- && apt-get clean
+    gpg-agent
 
-RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
+RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 
 # Install Neuronx tools
@@ -62,55 +40,30 @@ RUN apt-get update \
  && apt-get install -y \
     aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
     aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
-    aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
- && rm -rf /var/lib/apt/lists/* \
+    aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION
+
+RUN apt-get install -y \
+    python3 \
+    python3-pip \
+    python-is-python3
+
+RUN rm -rf /var/lib/apt/lists/* \
  && rm -rf /tmp/tmp* \
  && apt-get clean
 
-# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
-RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
-    mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
-    /var/lib/dpkg/info/ca-certificates-java.postinst configure;
-
-RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
- && chmod +x ~/mambaforge.sh \
- && ~/mambaforge.sh -b -p /opt/conda \
- && rm ~/mambaforge.sh \
- && /opt/conda/bin/conda update -y conda \
- && /opt/conda/bin/conda install -c conda-forge -y \
-    python=$PYTHON_VERSION \
-    pyopenssl \
-    cython \
-    mkl-include \
-    mkl \
-    botocore \
-    parso \
-    scipy \
-    typing \
-    # Below 2 are included in miniconda base, but not mamba so need to install
-    conda-content-trust \
-    charset-normalizer \
- && /opt/conda/bin/conda update -y conda \
- && /opt/conda/bin/conda clean -ya
-
-RUN conda install -c conda-forge \
-    scikit-learn \
-    h5py \
-    requests \
- && conda clean -ya \
- && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
- && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
- && pip install --no-cache-dir "protobuf>=3.18.3,<4" setuptools==69.5.1 packaging
-
+RUN pip install --no-cache-dir "protobuf>=3.18.3,<4" setuptools==69.5.1 packaging
+ 
 WORKDIR /
 
 # install Hugging Face libraries and its dependencies
-RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION}  \
+RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION} \
  && pip install --no-deps --no-cache-dir -U torchvision==0.16.*
 
+# FIXME
+RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com git+https://github.com/huggingface/optimum-neuron.git@5237fb0ada643ba471f60ed3a5d2eef3b66e8e59
 
 COPY . .
-# install wheel and setuptools
+
 RUN pip install --no-cache-dir -U pip ".[st]"
 
 # copy application
@@ -119,5 +72,7 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle
 
 # copy entrypoint and change permissions
 COPY --chmod=0755  scripts/entrypoint.sh entrypoint.sh
+COPY --chmod=0755  scripts/inf2_env.py inf2_env.py
+COPY --chmod=0755  scripts/inf2_entrypoint.sh inf2_entrypoint.sh
 
-ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
+ENTRYPOINT ["bash", "-c", "./inf2_entrypoint.sh"]
diff --git a/scripts/inf2_entrypoint.sh b/scripts/inf2_entrypoint.sh
new file mode 100644
index 00000000..650633c6
--- /dev/null
+++ b/scripts/inf2_entrypoint.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+set -e -o pipefail -u
+
+export ENV_FILEPATH=$(mktemp)
+
+trap "rm -f ${ENV_FILEPATH}" EXIT
+
+touch $ENV_FILEPATH
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+${SCRIPT_DIR}/inf2_env.py $@
+
+source $ENV_FILEPATH
+
+rm -f $ENV_FILEPATH
+
+exec ${SCRIPT_DIR}/entrypoint.sh $@
\ No newline at end of file
diff --git a/scripts/inf2_env.py b/scripts/inf2_env.py
new file mode 100644
index 00000000..da69b5a2
--- /dev/null
+++ b/scripts/inf2_env.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+
+"""
+This script is here to specify all missing environment variables that would be required to run some encoder models on
+inferentia2.
+"""
+
+import argparse
+import logging
+import os
+import sys
+from typing import Any, Dict, List, Optional
+
+from huggingface_hub import constants
+from transformers import AutoConfig
+
+from optimum.neuron.utils import get_hub_cached_entries
+from optimum.neuron.utils.version_utils import get_neuronxcc_version
+
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', force=True)
+logger = logging.getLogger(__name__)
+
+env_config_peering = [
+    ("HF_BATCH_SIZE", "static_batch_size"),
+    ("HF_OPTIMUM_SEQUENCE_LENGTH", "static_sequence_length"),
+]
+
+# By the end of this script all env vars should be specified properly
+env_vars = list(map(lambda x: x[0], env_config_peering))
+
+# Currently not used for encoder models
+# available_cores = get_available_cores()
+
+neuronxcc_version = get_neuronxcc_version()
+
+
+def parse_cmdline_and_set_env(argv: List[str] = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    if not argv:
+        argv = sys.argv
+    # All these are params passed to tgi and intercepted here
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=os.getenv("HF_BATCH_SIZE", os.getenv("BATCH_SIZE", 0)),
+    )
+    parser.add_argument(
+        "--sequence-length", type=int,
+        default=os.getenv("HF_OPTIMUM_SEQUENCE_LENGTH",
+                          os.getenv("SEQUENCE_LENGTH", 0))
+    )
+
+    parser.add_argument("--model-id", type=str, default=os.getenv("HF_MODEL_ID", os.getenv("HF_MODEL_DIR")))
+    parser.add_argument("--revision", type=str, default=os.getenv("REVISION"))
+
+    args = parser.parse_known_args(argv)[0]
+
+    if not args.model_id:
+        raise Exception(
+            "No model id provided ! Either specify it using --model-id cmdline or MODEL_ID env var"
+        )
+
+    # Override env with cmdline params
+    os.environ["MODEL_ID"] = args.model_id
+
+    # Set all tgi router and tgi server values to consistent values as early as possible
+    # from the order of the parser defaults, the tgi router value can override the tgi server ones
+    if args.batch_size > 0:
+        os.environ["HF_BATCH_SIZE"] = str(args.batch_size)
+
+    if args.sequence_length > 0:
+        os.environ["HF_OPTIMUM_SEQUENCE_LENGTH"] = str(args.sequence_length)
+
+    if args.revision:
+        os.environ["REVISION"] = str(args.revision)
+
+    return args
+
+
+def neuron_config_to_env(neuron_config):
+    with open(os.environ["ENV_FILEPATH"], "w") as f:
+        for env_var, config_key in env_config_peering:
+            f.write("export {}={}\n".format(env_var, neuron_config[config_key]))
+
+
+def sort_neuron_configs(dictionary):
+    return -dictionary["static_batch_size"]
+
+
+def lookup_compatible_cached_model(
+        model_id: str, revision: Optional[str]
+) -> Optional[Dict[str, Any]]:
+    # Reuse the same mechanic as the one in use to configure the tgi server part
+    # The only difference here is that we stay as flexible as possible on the compatibility part
+    entries = get_hub_cached_entries(model_id, "inference")
+
+    logger.debug(
+        "Found %d cached entries for model %s, revision %s",
+        len(entries),
+        model_id,
+        revision,
+    )
+
+    all_compatible = []
+    for entry in entries:
+        if check_env_and_neuron_config_compatibility(
+                entry, check_compiler_version=True
+        ):
+            all_compatible.append(entry)
+
+    if not all_compatible:
+        logger.debug(
+            "No compatible cached entry found for model %s, env %s, neuronxcc version %s",
+            model_id,
+            get_env_dict(),
+            neuronxcc_version,
+        )
+        return None
+
+    logger.info("%d compatible neuron cached models found", len(all_compatible))
+
+    all_compatible = sorted(all_compatible, key=sort_neuron_configs)
+
+    entry = all_compatible[0]
+
+    logger.info("Selected entry %s", entry)
+
+    return entry
+
+
+def check_env_and_neuron_config_compatibility(
+        neuron_config: Dict[str, Any], check_compiler_version: bool
+) -> bool:
+    logger.debug(
+        "Checking the provided neuron config %s is compatible with the local setup and provided environment",
+        neuron_config,
+    )
+
+    # Local setup compat checks
+    # if neuron_config["num_cores"] > available_cores:
+    #     logger.debug(
+    #         "Not enough neuron cores available to run the provided neuron config"
+    #     )
+    #     return False
+
+    if (
+            check_compiler_version
+            and neuron_config["compiler_version"] != neuronxcc_version
+    ):
+        logger.debug(
+            "Compiler version conflict, the local one (%s) differs from the one used to compile the model (%s)",
+            neuronxcc_version,
+            neuron_config["compiler_version"],
+        )
+        return False
+
+    for env_var, config_key in env_config_peering:
+        try:
+            neuron_config_value = str(neuron_config[config_key])
+        except KeyError:
+            logger.debug("No key %s found in neuron config %s", config_key, neuron_config)
+            return False
+        env_value = os.getenv(env_var, str(neuron_config_value))
+        if env_value != neuron_config_value:
+            logger.debug(
+                "The provided env var '%s' and the neuron config '%s' param differ (%s != %s)",
+                env_var,
+                config_key,
+                env_value,
+                neuron_config_value,
+            )
+            return False
+
+    return True
+
+
+def get_env_dict() -> Dict[str, str]:
+    d = {}
+    for k in env_vars:
+        d[k] = os.getenv(k)
+    return d
+
+
+def main():
+    """
+    This script determines proper default TGI env variables for the neuron precompiled models to
+    work properly
+    :return:
+    """
+    args = parse_cmdline_and_set_env()
+
+    for env_var in env_vars:
+        if not os.getenv(env_var):
+            break
+    else:
+        logger.info(
+            "All env vars %s already set, skipping, user know what they are doing",
+            env_vars,
+        )
+        sys.exit(0)
+
+    cache_dir = constants.HF_HUB_CACHE
+
+    logger.info("Cache dir %s, model %s", cache_dir, args.model_id)
+
+    config = AutoConfig.from_pretrained(args.model_id, revision=args.revision)
+    neuron_config = getattr(config, "neuron", None)
+    if neuron_config is not None:
+        compatible = check_env_and_neuron_config_compatibility(
+            neuron_config, check_compiler_version=False
+        )
+        if not compatible:
+            env_dict = get_env_dict()
+            msg = (
+                "Invalid neuron config and env. Config {}, env {}, neuronxcc version {}"
+            ).format(neuron_config, env_dict, neuronxcc_version)
+            logger.error(msg)
+            raise Exception(msg)
+    else:
+        neuron_config = lookup_compatible_cached_model(args.model_id, args.revision)
+
+    if not neuron_config:
+        neuron_config = {'static_batch_size': 1, 'static_sequence_length': 128}
+        msg = (
+            "No compatible neuron config found. Provided env {}, neuronxcc version {}. Falling back to default"
+        ).format(get_env_dict(), neuronxcc_version, neuron_config)
+        logger.info(msg)
+
+    logger.info("Final neuron config %s", neuron_config)
+
+    neuron_config_to_env(neuron_config)
+
+
+if __name__ == "__main__":
+    main()