Skip to content

Commit d76001a

Browse files
Update prompts and HC version for 4b/8b VLM modelcards (#2096)
* Update prompts and HC version for 4b/8b VLM modelcards * updated prompts to be shorter
1 parent 5d43f87 commit d76001a

6 files changed

+330
-201
lines changed

docs/_posts/AbdullahMubeenAnwar/2026-01-08-jsl_meds_vlm_4b_q16_v1_en.md

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ date: 2026-01-08
77
tags: [medical, clinical, vlm, q16, 4b, en, licensed, llamacpp]
88
task: [Summarization, Question Answering]
99
language: en
10-
edition: Healthcare NLP 6.2.0
10+
edition: Healthcare NLP 6.3.0
1111
spark_version: 3.4
1212
supported: true
1313
engine: llamacpp
@@ -39,10 +39,16 @@ from sparknlp_jsl.annotator import *
3939
from sparknlp_jsl.utils import *
4040
from pyspark.ml import Pipeline
4141

42-
prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
43-
current presenting symptoms, and the complete treatment plan.
44-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
45-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
42+
prompt = """
43+
Extract from the document and return strictly as JSON:
44+
45+
{
46+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
47+
"diagnoses": [string],
48+
"symptoms": [string],
49+
"treatment": [{"med": string, "dose": string, "freq": string}]
50+
}
51+
"""
4652

4753
input_df = vision_llm_preprocessor(
4854
spark=spark,
@@ -59,7 +65,7 @@ image_assembler = ImageAssembler() \
5965
.setInputCol("image") \
6066
.setOutputCol("image_assembler")
6167

62-
medicalVisionLLM = MedicalVisionLLM.load("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models") \
68+
medicalVisionLLM = MedicalVisionLLM.pretrained("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models") \
6369
.setInputCols(["caption_document", "image_assembler"]) \
6470
.setOutputCol("completions")
6571

@@ -78,10 +84,16 @@ result = model.transform(input_df)
7884
from johnsnowlabs import nlp, medical
7985
from sparknlp_jsl.utils import vision_llm_preprocessor
8086

81-
prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
82-
current presenting symptoms, and the complete treatment plan.
83-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
84-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
87+
prompt = """
88+
Extract from the document and return strictly as JSON:
89+
90+
{
91+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
92+
"diagnoses": [string],
93+
"symptoms": [string],
94+
"treatment": [{"med": string, "dose": string, "freq": string}]
95+
}
96+
"""
8597

8698
input_df = vision_llm_preprocessor(
8799
spark=spark,
@@ -98,7 +110,7 @@ image_assembler = nlp.ImageAssembler() \
98110
.setInputCol("image") \
99111
.setOutputCol("image_assembler")
100112

101-
medicalVisionLLM = medical.MedicalVisionLLM.load("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models") \
113+
medicalVisionLLM = medical.MedicalVisionLLM.pretrained("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models") \
102114
.setInputCols(["caption_document", "image_assembler"]) \
103115
.setOutputCol("completions")
104116

@@ -117,10 +129,16 @@ import com.johnsnowlabs.nlp.annotators._
117129
import com.johnsnowlabs.nlp.pretrained._
118130
import org.apache.spark.ml.Pipeline
119131

120-
val prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
121-
current presenting symptoms, and the complete treatment plan.
122-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
123-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
132+
val prompt = """
133+
Extract from the document and return strictly as JSON:
134+
135+
{
136+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
137+
"diagnoses": [string],
138+
"symptoms": [string],
139+
"treatment": [{"med": string, "dose": string, "freq": string}]
140+
}
141+
"""
124142

125143
val inputDf = VisionLLMPreprocessor(
126144
spark = spark,
@@ -138,7 +156,7 @@ val imageAssembler = new ImageAssembler()
138156
.setOutputCol("image_assembler")
139157

140158
val medicalVisionLLM = MedicalVisionLLM
141-
.load("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models")
159+
.pretrained("jsl_meds_vlm_4b_q16_v1", "en", "clinical/models")
142160
.setInputCols(Array("caption_document", "image_assembler"))
143161
.setOutputCol("completions")
144162

@@ -156,7 +174,31 @@ val result = model.transform(inputDf)
156174
## Results
157175

158176
```bash
159-
Based solely on the information visible in this document, the patient is a known case of systemic lupus erythematosus with scleroderma overlap and associated interstitial lung disease, presenting with tightness of the skin of the fists and ulcers on the pulp of the fingers, and has been advised treatment with linezolid 600 mg twice daily for 5 days if finger ulcers do not heal, clopidogrel 75 mg once daily after meals, amlodipine 5 mg once daily, domperidone 10 mg twice daily before meals, omeprazole 20 mg twice daily before meals, bosentan 62.5 mg twice daily after meals, sildenafil citrate 0.5 mg twice daily after meals, prednisolone 5 mg once daily after breakfast, mycophenolate mofetil 500 mg two tablets twice daily, L-methylfolate calcium 400 µg once daily, and ciprofloxacin 250 mg twice daily, with consultation by the Department of Rheumatology under Dr. Darshan Singh Bhakuni and a recommended review after 4 weeks.
177+
{
178+
"patient": {
179+
"name": "Ms RUKHSANA SHAHEEN",
180+
"age": "56 yrs",
181+
"sex": "Female",
182+
"hospital_no": "MH005990453",
183+
"episode_no": "030000528270",
184+
"episode_date": "02/07/2021 08:31AM"
185+
},
186+
"diagnoses": ["systemic lupus erythematosus", "scleroderma overlap", "interstitial lung disease"],
187+
"symptoms": ["tightness of skin of the fists", "ulcers on the pulp of the fingers"],
188+
"treatment": [
189+
{"med": "Linezolid", "dose": "600 mg", "freq": "twice a day for 5 Days"},
190+
{"med": "Clopidogrel", "dose": "75 mg", "freq": "once a day after meals"},
191+
{"med": "Amlodipine", "dose": "5 mg", "freq": "once a day"},
192+
{"med": "Domperidone", "dose": "10 mg", "freq": "twice a day before meals"},
193+
{"med": "Omeprazole", "dose": "20 Mg", "freq": "Twice a Day before Meal"},
194+
{"med": "Bosentan", "dose": "62.5 mg", "freq": "twice a day after meals"},
195+
{"med": "Sildenafil Citrate", "dose": "0.5 mg", "freq": "twice a day after meals"},
196+
{"med": "Prednisolone", "dose": "5 mg", "freq": "once a day after breakfast"},
197+
{"med": "Mycophenolate mofetil", "dose": "500 mg 2 tablets", "freq": "twice a day"},
198+
{"med": "L-methylfolate calcium", "dose": "400 µg 1 tablet", "freq": "once a day"},
199+
{"med": "ciprofloxacin", "dose": "250 mg", "freq": "twice a day"}
200+
]
201+
}
160202
```
161203

162204
{:.model-param}
@@ -165,7 +207,7 @@ Based solely on the information visible in this document, the patient is a known
165207
{:.table-model}
166208
|---|---|
167209
|Model Name:|jsl_meds_vlm_4b_q16_v1|
168-
|Compatibility:|Healthcare NLP 6.2.0+|
210+
|Compatibility:|Healthcare NLP 6.3.0+|
169211
|License:|Licensed|
170212
|Edition:|Official|
171213
|Input Labels:|[image, document]|

docs/_posts/AbdullahMubeenAnwar/2026-01-08-jsl_meds_vlm_4b_q4_v1_en.md

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ date: 2026-01-08
77
tags: [medical, clinical, vlm, q4, 4b, en, licensed, llamacpp]
88
task: [Summarization, Question Answering]
99
language: en
10-
edition: Healthcare NLP 6.2.0
10+
edition: Healthcare NLP 6.3.0
1111
spark_version: 3.4
1212
supported: true
1313
engine: llamacpp
@@ -39,10 +39,16 @@ from sparknlp_jsl.annotator import *
3939
from sparknlp_jsl.utils import *
4040
from pyspark.ml import Pipeline
4141

42-
prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
43-
current presenting symptoms, and the complete treatment plan.
44-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
45-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
42+
prompt = """
43+
Extract from the document and return strictly as JSON:
44+
45+
{
46+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
47+
"diagnoses": [string],
48+
"symptoms": [string],
49+
"treatment": [{"med": string, "dose": string, "freq": string}]
50+
}
51+
"""
4652

4753
input_df = vision_llm_preprocessor(
4854
spark=spark,
@@ -59,7 +65,7 @@ image_assembler = ImageAssembler() \
5965
.setInputCol("image") \
6066
.setOutputCol("image_assembler")
6167

62-
medicalVisionLLM = MedicalVisionLLM.load("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models") \
68+
medicalVisionLLM = MedicalVisionLLM.pretrained("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models") \
6369
.setInputCols(["caption_document", "image_assembler"]) \
6470
.setOutputCol("completions")
6571

@@ -78,10 +84,16 @@ result = model.transform(input_df)
7884
from johnsnowlabs import nlp, medical
7985
from sparknlp_jsl.utils import vision_llm_preprocessor
8086

81-
prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
82-
current presenting symptoms, and the complete treatment plan.
83-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
84-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
87+
prompt = """
88+
Extract from the document and return strictly as JSON:
89+
90+
{
91+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
92+
"diagnoses": [string],
93+
"symptoms": [string],
94+
"treatment": [{"med": string, "dose": string, "freq": string}]
95+
}
96+
"""
8597

8698
input_df = vision_llm_preprocessor(
8799
spark=spark,
@@ -98,7 +110,7 @@ image_assembler = nlp.ImageAssembler() \
98110
.setInputCol("image") \
99111
.setOutputCol("image_assembler")
100112

101-
medicalVisionLLM = medical.MedicalVisionLLM.load("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models") \
113+
medicalVisionLLM = medical.MedicalVisionLLM.pretrained("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models") \
102114
.setInputCols(["caption_document", "image_assembler"]) \
103115
.setOutputCol("completions")
104116

@@ -117,10 +129,16 @@ import com.johnsnowlabs.nlp.annotators._
117129
import com.johnsnowlabs.nlp.pretrained._
118130
import org.apache.spark.ml.Pipeline
119131

120-
val prompt = """Based only on the information visible in this document, identify the patient’s primary diagnoses,
121-
current presenting symptoms, and the complete treatment plan.
122-
For the treatment plan, list each medication with its dose, frequency, timing in relation to meals, and duration if specified.
123-
Also state the consulting department, consulting doctor, and the recommended follow-up interval."""
132+
val prompt = """
133+
Extract from the document and return strictly as JSON:
134+
135+
{
136+
"patient": {"name": string, "age": string, "sex": string, "hospital_no": string, "episode_no": string, "episode_date": string},
137+
"diagnoses": [string],
138+
"symptoms": [string],
139+
"treatment": [{"med": string, "dose": string, "freq": string}]
140+
}
141+
"""
124142

125143
val inputDf = VisionLLMPreprocessor(
126144
spark = spark,
@@ -138,7 +156,7 @@ val imageAssembler = new ImageAssembler()
138156
.setOutputCol("image_assembler")
139157

140158
val medicalVisionLLM = MedicalVisionLLM
141-
.load("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models")
159+
.pretrained("jsl_meds_vlm_4b_q4_v1", "en", "clinical/models")
142160
.setInputCols(Array("caption_document", "image_assembler"))
143161
.setOutputCol("completions")
144162

@@ -156,7 +174,31 @@ val result = model.transform(inputDf)
156174
## Results
157175

158176
```bash
159-
Based on the information visible in the document, the patient has systemic lupus erythematosus with scleroderma overlap and associated interstitial lung disease, is presenting with tightness of the skin of the fists and ulcers on the pulp of the fingers, and has been advised a treatment plan that includes a short 5 day course of linezolid 600 mg twice daily if the finger ulcers do not heal, along with ongoing medications comprising clopidogrel 75 mg once daily after meals, amlodipine 5 mg once daily, domperidone 10 mg twice daily before meals, omeprazole 20 mg twice daily before meals, bosentan 62.5 mg twice daily after meals, sildenafil citrate 0.5 mg twice daily after meals, prednisolone 5 mg once daily after breakfast, mycophenolate mofetil 500 mg two tablets twice daily, L-methylfolate calcium 400 µg once daily, and ciprofloxacin 250 mg twice daily, with care provided by the Rheumatology department under Dr. Darshan Singh Bhakuni and a recommended follow up after four weeks.
177+
{
178+
"patient": {
179+
"name": "Ms RUKHSANA SHAHEEN",
180+
"age": "56 yrs",
181+
"sex": "Female",
182+
"hospital_no": "MH005990453",
183+
"episode_no": "030000528270",
184+
"episode_date": "02/07/2021 08:31AM"
185+
},
186+
"diagnoses": ["systemic lupus erythematosus", "scleroderma overlap", "interstitial lung disease"],
187+
"symptoms": ["tightness of skin of the fists", "ulcers on the pulp of the fingers"],
188+
"treatment": [
189+
{"med": "Linezolid", "dose": "600 mg", "freq": "twice a day for 5 Days"},
190+
{"med": "Clopidogrel", "dose": "75 mg", "freq": "once a day after meals"},
191+
{"med": "Amlodipine", "dose": "5 mg", "freq": "once a day"},
192+
{"med": "Domperidone", "dose": "10 mg", "freq": "twice a day before meals"},
193+
{"med": "Omeprazole", "dose": "20 Mg", "freq": "Twice a Day before Meal"},
194+
{"med": "Bosentan", "dose": "62.5 mg", "freq": "twice a day after meals"},
195+
{"med": "Sildenafil Citrate", "dose": "0.5 mg", "freq": "twice a day after meals"},
196+
{"med": "Prednisolone", "dose": "5 mg", "freq": "once a day after breakfast"},
197+
{"med": "Mycophenolate mofetil", "dose": "500 mg 2 tablets", "freq": "twice a day"},
198+
{"med": "L-methylfolate calcium", "dose": "400 µg 1 tablet", "freq": "once a day"},
199+
{"med": "ciprofloxacin", "dose": "250 mg", "freq": "twice a day"}
200+
]
201+
}
160202
```
161203

162204
{:.model-param}
@@ -165,7 +207,7 @@ Based on the information visible in the document, the patient has systemic lupus
165207
{:.table-model}
166208
|---|---|
167209
|Model Name:|jsl_meds_vlm_4b_q4_v1|
168-
|Compatibility:|Healthcare NLP 6.2.0+|
210+
|Compatibility:|Healthcare NLP 6.3.0+|
169211
|License:|Licensed|
170212
|Edition:|Official|
171213
|Input Labels:|[image, document]|

0 commit comments

Comments
 (0)