-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
933 lines (779 loc) · 77.5 KB
/
index.html
File metadata and controls
933 lines (779 loc) · 77.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Image Classification</title>
<style>
body {
font-family: 'Georgia', 'Times New Roman', serif;
line-height: 1.6;
max-width: 60%;
margin: 0 auto;
padding: 20px;
background-color: #f9f9f9;
}
.header {
text-align: center;
border-bottom: 3px solid #2c3e50;
padding-bottom: 20px;
margin-bottom: 30px;
}
.main-title {
color: #2c3e50;
font-size: 2.5em;
margin-bottom: 10px;
font-weight: bold;
}
.subtitle {
color: #7f8c8d;
font-size: 1.2em;
font-style: italic;
}
article {
background: white;
margin: 25px 0;
padding: 25px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
border-left: 4px solid #3498db;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
margin-top: 0;
}
h2 {
color: #34495e;
margin-top: 25px;
}
h3 {
color: #5d6d7e;
}
.abstract {
background: #ecf0f1;
padding: 20px;
border-radius: 5px;
font-style: italic;
margin: 20px 0;
}
.keywords {
background: #e8f4f8;
padding: 15px;
border-radius: 5px;
margin: 15px 0;
}
.author-info {
text-align: center;
background: #f8f9fa;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}
.table-of-contents {
background: #f4f4f4;
padding: 20px;
border-radius: 5px;
margin: 20px 0;
}
.table-of-contents ul {
list-style-type: none;
padding-left: 0;
}
.table-of-contents li {
margin: 8px 0;
padding-left: 20px;
}
.table-of-contents li.subsection {
margin: 4px 0;
padding-left: 40px;
font-size: 0.95em;
color: #5d6d7e;
}
.methodology {
border-left: 4px solid #e74c3c;
}
.results {
border-left: 4px solid #27ae60;
}
.conclusion {
border-left: 4px solid #f39c12;
}
.references {
border-left: 4px solid #9b59b6;
}
code {
background: #f8f8f8;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
}
.highlight {
background: #fff3cd;
padding: 15px;
border-radius: 5px;
border-left: 4px solid #ffc107;
margin: 15px 0;
}
.footer {
text-align: center;
margin-top: 40px;
padding: 20px;
border-top: 2px solid #ecf0f1;
color: #7f8c8d;
}
/* Back to Top Button Styles */
.back-to-top {
position: fixed;
bottom: 30px;
right: 30px;
width: 50px;
height: 50px;
background: #3498db;
color: white;
border: none;
border-radius: 50%;
font-size: 20px;
cursor: pointer;
opacity: 0;
visibility: hidden;
transition: all 0.3s ease;
z-index: 1000;
box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
}
.back-to-top:hover {
background: #2980b9;
transform: translateY(-2px);
box-shadow: 0 6px 16px rgba(52, 152, 219, 0.4);
}
.back-to-top.show {
opacity: 1;
visibility: visible;
}
.back-to-top::before {
content: '↑';
font-weight: bold;
}
/* Mobile and Tablet Responsive Styles */
@media screen and (max-width: 1024px) {
/* Tablet View */
body {
max-width: 85%;
padding: 15px;
font-size: 16px;
}
.main-title {
font-size: 2.2em;
}
.subtitle {
font-size: 1.1em;
}
article {
padding: 20px;
margin: 20px 0;
}
.table-of-contents {
padding: 15px;
}
.table-of-contents li {
padding-left: 15px;
font-size: 0.95em;
}
.table-of-contents li.subsection {
padding-left: 30px;
font-size: 0.9em;
}
/* Adjust confusion matrix layout for tablets */
.confusion-matrix-grid {
flex-direction: column;
align-items: center;
}
.confusion-matrix-grid > div {
max-width: 80%;
margin: 15px 0;
}
}
@media screen and (max-width: 768px) {
/* Mobile View */
body {
max-width: 95%;
padding: 10px;
font-size: 15px;
line-height: 1.5;
}
.header {
padding-bottom: 15px;
margin-bottom: 20px;
}
.main-title {
font-size: 1.8em;
margin-bottom: 8px;
line-height: 1.2;
}
.subtitle {
font-size: 1em;
line-height: 1.3;
}
article {
padding: 15px;
margin: 15px 0;
border-radius: 6px;
}
h1 {
font-size: 1.6em;
margin-bottom: 15px;
}
h2 {
font-size: 1.4em;
margin-top: 20px;
margin-bottom: 12px;
}
h3 {
font-size: 1.2em;
margin-top: 15px;
margin-bottom: 10px;
}
/* Table of Contents Mobile Optimization */
.table-of-contents {
padding: 12px;
margin: 15px 0;
}
.table-of-contents ul {
padding-left: 0;
}
.table-of-contents li {
margin: 6px 0;
padding-left: 10px;
font-size: 0.9em;
line-height: 1.4;
}
.table-of-contents li.subsection {
padding-left: 25px;
font-size: 0.85em;
}
.table-of-contents li.subsection[style*="60px"] {
padding-left: 40px !important;
font-size: 0.8em !important;
}
/* Lists Mobile Optimization */
ul[style*="padding-left: 10%"] {
padding-left: 5% !important;
}
li[style*="padding-left: 20px"] {
padding-left: 15px !important;
font-size: 0.9em;
line-height: 1.4;
}
/* Image and Figure Mobile Optimization */
div[style*="display: flex"] img {
max-width: 100% !important;
height: auto !important;
margin: 10px 0;
}
div[style*="display: flex"] {
flex-direction: column !important;
align-items: center !important;
}
div[style*="text-align: center"] {
margin: 10px 0 !important;
}
/* Mathematical Formulas Mobile */
div[style*="background-color: #f8f9fa"] {
padding: 15px !important;
margin: 15px 0 !important;
}
span[style*="font-size: 1.2em"] {
font-size: 1em !important;
display: block !important;
margin: 8px 0 !important;
}
/* Back to Top Button Mobile */
.back-to-top {
bottom: 20px;
right: 20px;
width: 45px;
height: 45px;
font-size: 18px;
}
/* Author Info Mobile */
.author-info {
padding: 12px;
font-size: 0.9em;
}
/* Keywords Mobile */
.keywords {
padding: 12px;
font-size: 0.9em;
}
/* Abstract Mobile */
.abstract {
padding: 15px;
font-size: 0.95em;
line-height: 1.5;
}
/* Highlight Box Mobile */
.highlight {
padding: 12px;
margin: 12px 0;
font-size: 0.9em;
}
/* Footer Mobile */
.footer {
margin-top: 30px;
padding: 15px;
font-size: 0.85em;
}
}
@media screen and (max-width: 480px) {
/* Small Mobile View */
body {
max-width: 98%;
padding: 8px;
font-size: 14px;
}
.main-title {
font-size: 1.5em;
line-height: 1.1;
}
.subtitle {
font-size: 0.9em;
}
article {
padding: 12px;
margin: 12px 0;
}
h1 {
font-size: 1.4em;
}
h2 {
font-size: 1.2em;
}
h3 {
font-size: 1.1em;
}
.table-of-contents li {
font-size: 0.85em;
margin: 4px 0;
}
.table-of-contents li.subsection {
font-size: 0.8em;
padding-left: 20px;
}
.back-to-top {
bottom: 15px;
right: 15px;
width: 40px;
height: 40px;
font-size: 16px;
}
/* Ultra-compact lists for small screens */
li[style*="padding-left: 15px"] {
padding-left: 10px !important;
font-size: 0.85em !important;
}
/* Smaller mathematical formulas */
div[style*="background-color: #f8f9fa"] {
padding: 10px !important;
margin: 10px 0 !important;
}
}
</style>
</head>
<body>
<div class="header">
<h1 class="main-title">Advanced Image Classification Techniques in Computer Vision</h1>
<p class="subtitle">A Comprehensive Research on Image Classification</p>
</div>
<div class="author-info">
<p>Department of Computer Science | Advanced Machine Learning Research</p>
<p>Author: Muhammad Asif Nawaz</p>
</div>
<article>
<h1>Abstract</h1>
<div class="abstract">
<p>This dissertation presents a comprehensive study on advanced image classification techniques in computer vision, exploring state-of-the-art deep learning methodologies and their applications in real-world scenarios. The research investigates various neural network architectures, including Convolutional Neural Networks (CNNs), Vision Transformers, and hybrid models, analyzing their performance across different datasets and classification tasks.</p>
<p>Through extensive experimentation and comparative analysis, this work demonstrates the effectiveness of modern image classification approaches and proposes novel improvements to existing methodologies. The findings contribute to the advancement of computer vision research and provide practical insights for implementing robust image classification systems.</p>
</div>
<div class="keywords">
<strong>Keywords:</strong> Image Classification, Computer Vision, Deep Learning, Convolutional Neural Networks, Machine Learning, Artificial Intelligence, Pattern Recognition
</div>
</article>
<article>
<h1>Table of Contents</h1>
<div class="table-of-contents">
<ul>
<li><strong><a href="#introduction" style="text-decoration: none; color: inherit;">1. Introduction</a></strong></li>
<li class="subsection"><a href="#background-context" style="text-decoration: none; color: inherit;">1.1 Background and Context</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#problem-statement" style="text-decoration: none; color: inherit;">1.1.1 Problem Statement</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#research-rationale" style="text-decoration: none; color: inherit;">1.1.2 Research Rationale</a></li>
<li class="subsection"><a href="#scope-objectives" style="text-decoration: none; color: inherit;">1.2 Scope and Objectives</a></li>
<li class="subsection"><a href="#achievements" style="text-decoration: none; color: inherit;">1.3 Achievements</a></li>
<li class="subsection"><a href="#overview-research" style="text-decoration: none; color: inherit;">1.4 Overview of Research</a></li>
<li><strong><a href="#state-of-art" style="text-decoration: none; color: inherit;">2. State-of-The-Art</a></strong></li>
<li><strong><a href="#methodology" style="text-decoration: none; color: inherit;">3. Methodology</a></strong></li>
<li class="subsection"><a href="#data-understanding" style="text-decoration: none; color: inherit;">3.1 Data Understanding</a></li>
<li class="subsection"><a href="#data-preparation" style="text-decoration: none; color: inherit;">3.2 Data Preparation</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#data-augmentation" style="text-decoration: none; color: inherit;">3.2.1 Data Augmentation</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#data-visualization" style="text-decoration: none; color: inherit;">3.2.2 Data Visualization</a></li>
<li class="subsection"><a href="#model-building" style="text-decoration: none; color: inherit;">3.3 Model Building</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#best-parameters" style="text-decoration: none; color: inherit;">3.3.1 Searching Best Parameters</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#cnn-architecture" style="text-decoration: none; color: inherit;">3.3.2 Convolutional Neural Network (CNN) Architecture</a></li>
<li class="subsection" style="padding-left: 60px; font-size: 0.9em;"><a href="#pretrained-models" style="text-decoration: none; color: inherit;">3.3.3 Pre-Trained Models Architecture</a></li>
<li class="subsection"><a href="#model-evaluation" style="text-decoration: none; color: inherit;">3.4 Model Evaluation</a></li>
<li class="subsection"><a href="#deployment" style="text-decoration: none; color: inherit;">3.5 Deployment</a></li>
<li><strong><a href="#conclusion" style="text-decoration: none; color: inherit;">4. Conclusion</a></strong></li>
<li class="subsection"><a href="#summary-findings" style="text-decoration: none; color: inherit;">4.1 Summary of Findings</a></li>
<li class="subsection"><a href="#evaluation" style="text-decoration: none; color: inherit;">4.2 Evaluation</a></li>
<li class="subsection"><a href="#future-work" style="text-decoration: none; color: inherit;">4.3 Future Work</a></li>
<li><strong><a href="#references" style="text-decoration: none; color: inherit;">5. References</a></strong></li>
<li><strong><a href="#appendices" style="text-decoration: none; color: inherit;">6. Appendices</a></strong></li>
<li class="subsection"><a href="#appendix-a" style="text-decoration: none; color: inherit;">Appendix A: Technical Implementation Details</a></li>
<!-- <li class="subsection"><a href="#appendix-b" style="text-decoration: none; color: inherit;">Appendix B: Additional Experimental Results</a></li> -->
<li class="subsection"><a href="#appendix-b" style="text-decoration: none; color: inherit;">Appendix B: Code Repository</a></li>
</ul>
</div>
</article>
<article>
<h1>Overview</h1>
<p>This comprehensive research document presents an in-depth analysis of advanced image classification techniques in computer vision. The study encompasses both theoretical foundations and practical implementations, providing valuable insights into the current state of the field and future research directions.</p>
<p>The research methodology combines rigorous theoretical analysis with extensive experimental validation across multiple datasets and architectural approaches. Through systematic evaluation of various deep learning models, this work contributes to the understanding of optimal strategies for implementing robust image classification systems.</p>
<div class="highlight">
<strong>Research Scope:</strong> This study covers traditional computer vision approaches, modern deep learning architectures, and hybrid methodologies, offering a comprehensive perspective on image classification techniques and their real-world applications.
</div>
<p>The document is structured to provide both academic researchers and industry practitioners with actionable insights, technical specifications, and performance benchmarks essential for advancing the field of computer vision.</p>
</article>
<article>
<h1>Research Content</h1>
<h2 id="introduction">1. Introduction</h2>
<p>The research main goal is to create an image classification system using machine learning to help with the diagnosis of various diseases, including pneumonia, cardiomegaly, emphysema, diffusion, mass, and other conditions that are scanned on chest X-rays. Convolutional neural networks (CNNs) are utilized by the system to extract features, taking advantage of their capacity to capture spatial hierarchies of images by means of multiple convolutional and pooling layers. Using transfer learning, pre-trained models like VGG16, ResNet50, and InceptionV3 are compared and optimized for best performance alongside CNN architecture.</p>
<p>Deep learning is a subset of machine learning, which classify tasks across multiple domains have been transformed. Convolutional neural networks have become the standard architecture in medical diagnosis because of their capacity to extract features from images. Multiple layers of convolutional and pooling operations make up CNNs, which enable the network to recognize complex patterns of the images. Deep learning is especially useful for image recognition in ma-chine learning via different medium.</p>
<p>This statement according by Igor kononenko [1] that is “Medical datasets were the initial source of design and application for machine learning algorithms. These days, machine learning offers a number of essential instruments for astute data analysis. The digital revolution has made it relatively cheap and accessible to gather and store data, especially in the last few years. Large information systems collect and share data, and modern hospitals are well-equipped with monitoring and other data collection tools. Medical data analysis is currently a good fit for machine learning technology, and in particular, a lot of work has been done in the area of medical diagnosis in small, specialized diagnostic problems.”</p>
<p>Data preprocessing techniques, including image augmentation (rotation, scaling, flipping and colour adjustments) are applied to improve the robustness of the model. The model perfor-mance is evaluated using metrix like precision, f1-score, accuracy, recall, and confusion matrix, providing a comprehensive assessment of its effectiveness. CNN-based methods have various strategies to increase the performance of image classification on datasets,[2] one method is using in project is data augmentation via imageDataGenerator with configurations. Convolution neural networks are obtained by stacking one or more computational layers, The traditional transform-based data augmentation has better performance than generative adversarial net-work and other neural network-based methods. However, there are certain things that should be noted. [2] A limitation of Kermany’s research is that use the InceptionV3 model which is stop retrain the convolutional layer of InceptionV3 because of the overfitting. Therefore, we are implementing effects of retraining the convolutional layers on these models (convolutional neural network (CNN), VGG16, ResNet50 and InceptionV3) will be evaluated in this project. An examination of how the CNN transfer method performed on the small chest X-ray dataset to determine how data augmentation, network complexity, fine-tuned convolutional layer, and other prevent overfitting mechanisms affected the classification.</p>
<p>By integrating chest X-ray dataset, almost 11 million of X-ray images for multiple patients who have fourteen text-mined diseases image labels (where each image can have multiple labels). Atelectasis, Pneumothorax, Infiltration, Edema, Consolidation, Fibrosis, Emphysema, Effusion, Nodule, Pneumonia, Pleural thickening, Cardiomegaly, Mass, and Hernia are among the four-teen common diseases and all the models will be predicting multiple labels with data augmentation via the rotation range, scaling and other more configuration.</p>
<p>Deep learning needs a huge amount of data to get better result. Especially on medical problem to get and annotate the data is very important and time-consuming process. There are some solutions to solve the problem, one already little bit discuss on above is data augmentation which is prevent the overfitting and improves the accuracy on training data and another method for boosting the performance of in deep learning particularly CNNs which is named as transfer learning. So, in this project we will be utilize data augmentation method.</p>
<p>Here is the discussion on feature extraction using convolution, for all the models will be use IMAGE_SIZE variable (which represent the image pixels) which is [128,128] that will be used as input to standard feed-forward neural networks to solve image classification.</p>
<h3 id="background-context">1.1 Background and Context</h3>
<p>In this project, utilizing machine learning model convolutional neural Networks (CNNs) and pre trained models such as (InceptionV3, VGG16 and ResNet50) imageNet weights, excluding the top layers typically involves various components like weights, include top and input shape.</p>
<p>So, the study focuses on the use on convolutional neural networks, to improve image classification for chest X-ray used in medical diagnosis. CNNs is ability to precisely extract complex patterns and deep learning. This has helped to diagnosis the conditions like all the features. The research build data augmentation and deep learning to improve model performance, identify the challenges.</p>
<h3 id="problem-statement">1.1.1 Problem Statement</h3>
<p>For thoracic diseases to be effectively treated and patient outcomes to be improved, an early and accurate diagnosis is essential. The most widely used imaging technique for diagnosing diseases like Atelectasis, Consolidation, and Infiltration is a chest X-ray. Nevertheless, the inter-pretation of these images is difficult and frequently arbitrary, greatly depending on radiologists' experience. This may result in inconsistent diagnosis, particularly in settings with limited re-sources where skilled radiologists might not be available.</p>
<p>Radiologists are under a lot of pressure due to the growing number of chest X-rays that require analysis, which increases the risk of diagnostic mistakes and delays. Furthermore, a lot of thoracic diseases share similar visual features and symptoms, making it challenging to differentiate between them just by manual interpretation. This intricacy highlights the requirement for an automated, trustworthy instrument that can help with the precise categorization of thoracic diseases.</p>
<p>The goal of this project is to create a deep learning model that can use chest X-ray images to automatically classify a variety of thoracic diseases. With just one image, the model will be trained to distinguish between different conditions and produce multi-label predictions. The model can potentially outperform conventional diagnostic techniques by learning to recognize patterns and anomalies linked to specific diseases by utilizing extensive datasets like ChestX-ray. The implementation of this model aims to enhance diagnostic accuracy, reduce the work-load on radiologists, and improve the overall efficiency of the healthcare system.</p>
<h3 id="research-rationale">1.1.2 Research Rationale</h3>
<p>Is it possible to create a multi-label classification model using deep learning that can effectively identify and categorize various medical conditions from chest X-ray images, thereby enhancing diagnostic accuracy/precision and enhancing patient outcomes?</p>
<ul style="list-style-type: none; padding-left: 3%;">
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
to create a deep learning model that can identify various medical conditions from chest X-ray images.
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
To assess the suggested model's effectiveness in terms of recall, accuracy, precision, F1-score, and confusion matrices.
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
to assess how well the suggested model performs in comparison to current CAD (computer-aided diagnosis technology) systems and radiologists.
</li>
</ul>
<h3 id="scope-objectives">1.2 Scope and Objectives</h3>
<p>Develop a robust and accurate deep learning model to automatically classify medical diseases from chest X-ray images. The model should be able to detect multiple diseases from a single X-ray image, providing a reliable tool that can assist medically diagnosing conditions such as Ate-lectasis, Consolidation, Infiltration, Pneumothorax, and other Hernia.</p>
<p>The robustness of the model refers to the ability to perform well across a variety of conditions, including noisy data, variation in image quality and different patient’s demographics. A model should generalize well to new, unseen data, mean it should make accurate predictions even on X-ray images that various from those in the training set. So, in medically diagnostics, precision is essential. To reduce false positives—identifying a disease when one is not present—and false negatives—failing to detect a disease when one is present the model needs to have high precision and recall rates.</p>
<p>In this case, models are not predicting well due to the dataset, because (generating training and validation data using the image data generator with augmentation and fit that training and validation dataset on models each epochs going to be overfitting), are all models giving the low accuracy on different neuron layers as well as rest of the metrics is not having best results.</p>
<p>Massive amounts of data are required for deep learning to produce dependable results. On the other hand, certain challenges might not have adequate data. Obtaining and annotating the data is an expensive and time-consuming operation, particularly for medical conditions. Thank-fully, there are a few ways to handle this issue. Among these is data augmentation, which increases accuracy and prevents overfitting [3]. The training time data augmentation strategy was applied in this work. Various augmentation techniques were employed, including shifting, zooming, flipping, and rotating at 32-degree angles. Some of images have labels which means having this disease and some have not label which is not any disease.</p>
<div class="highlight">
<strong>Image Augmentation Examples:</strong> The following three examples demonstrate the data augmentation techniques applied to chest X-ray images to improve model robustness and prevent overfitting.
</div>
<div style="display: flex; justify-content: space-around; margin: 20px 0; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<img src="Picture1.png" alt="Original Chest X-ray Image" style="max-width: 200px; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Picture2.png" alt="Rotated and Flipped Chest X-ray" style="max-width: 200px; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Picture3.png" alt="Zoomed and Brightness Adjusted X-ray" style="max-width: 200px; height: auto;">
</div>
</div>
<p>For multi-disease detection from single X-ray image: this objective requires the models to han-dle cases where multiple diseases might be present simultaneously in a single X-ray image. For example, a patient could have Infiltration and Hernia, and model should correctly predict both conditions.</p>
<p>The Scope of the project will primarily be utilizing chest X-ray dataset which is a large collection of X-ray images labelled with 14 various diseases and added extra column “FilePath” in dataset for append path of the images in that column based on the Image Index column. I’m exploring the dataset to understand its structure, labels distribution, and image characteristics. This include identifying missing data or inconsistencies with the dataset.</p>
<p>Data Preprocessing: Splitting the dataset different training, validation and testing sets to ensure unbiased model evaluation. Apply a variety of augmentation technique (like rotation, sample wise standard normalization, shear range, width & height shift range, horizontal & vertical flip brightness range, rescale and fill model) to enhance training dataset, improving the model ability to generalize to unseen data.</p>
<h3 id="achievements">1.3 Achievements</h3>
<p>We evaluated four models by using deep learning convolutional layers, as defined the architecture of all models step by step in methodology. The deep learning models having capable of accurately classifying multiple thoracic diseases from chest x-ray images. Models effectively addresses complex challenges such as multi label classification, class imbalance and the high dimensionality of medical images. Implemented techniques such as class weighting, data augmentation and sample weighting, which ensured that the model performed well cross both common and rare thoracic diseases.</p>
<p>Performance of the models which is not well on training data because when we fit training dataset on model that is going overfit due to the dataset. The reason behind this for the identification of the label is 0 or 1. So, the most of the datapoint is 0 which mean no diseases at all, the datapoint is 1 mean have disease on that label. It could be the reason training data on the model is going to overfitting. Also is not evaluating well on unseen dataset.</p>
<p>Thus, the best model with the highest accuracy among the four models is displayed overall in this project. Showing the results for that best model which is accuracy, recall, F1-score, preci-sion and classification report as well which showing the result for label support. As well as showing the confusion metrices for all predicted labels and at the end deploy that model for integration.</p>
<h3 id="overview-research">1.4 Overview of Research</h3>
<p>This project evaluates the x-ray image dataset using machine learning models, which are briefly described in the project methodology section and already mentioned in the introduction. This dissertation focuses on using deep learning models to classify images. Focusing on multi-label classification, where a single image may have several associated disease labels—is crucial. The following are crucial steps: understanding the data, building a model with hyperparameter tuning, enhancing and visualizing the data, and evaluating the performance of the model. Therefore, augmentation with different configurations such as range rotation, shear range, flipping, and others is used to prevent overfitting on the models.</p>
<p>I am writing this section for the overview of models. In this article, all predicted models adhered to the same format. Like how to construct the first method that searches for the optimal parameters across batches and epochs, another method searches for the optimal parameters across learning rate, initialization mode, activation for dense layers, and dropout rate for the optimizer. Thus, as I indicated, there are two ways to look for the ideal parameters for a given model. Prior to fitting the training data to the model, the best fit will be assigned to a particular model and that model will be predicted on unseen data. Subsequently, the model is fitted using training data, and a report on model history visualization is displayed. After fitting the model and displaying the Roc report for the same data before and after training, predict the model once more on unseen data. The same processes are then taken for the other models, until the best accuracy model is obtained. The classification report for that model is then displayed, along with the confusion metrics, before the model is saved and deployed for use. So, the conclusion of the overview is that building this project for multi labels classification in medical field. It will be overcome burden of radiologists.</p>
<!-- Start the second section which is the State-of-The-Art -->
<h2 id="state-of-art">2. State-of-The-Art</h2>
<p>The goal of this project is to develop a deep learning system that can classify chest X-ray imag-es using multiple labels and identify several diseases from a single image. For example, deep learning model will identify the diseased image with the help of label which is one in dataset. Using various techniques for data understanding, data preparing, data augmenting, models building, fine-tuning hyperparameters, and assessing performance are all included in the methodology. Rotation, zoom, shear, channel shift range, brightness range, rescale, fill mode and flipping are examples of data augmentation techniques (we are using sample wise normal-ization for the training data augmentation and using feature wise normalization on validation and test data augmentation) that are used to enhance model robustness and avoid overfitting.</p>
<p>Before training the models, we are optimizing the best hyperparameter with using the random grid search technique, wrapping the model in kera’s classifier wrapper class then assign into random grid search function with the iterator and cross validation step. We have two methods for fine tuning of hyperparameter for models. One of the method tuning batch size and epochs and other method tuning optimizer, learning rate, init mode, activation and drop out rate with the behalf of batch size and epochs. So, this procedure is iterated over multiple models in order to determine which has the best accuracy. After that fitting train data generator on model, predict on unseen data and evaluate model on unseen data as well for getting the test accuracy and loss.</p>
<p>Finally, we have all models with the training results like test accuracy and loss, also Roc report as well before fitting and after the model on training generator. We are comparing all the mod-els with the based-on test accuracy and loss results and showing the model comparison graph. We are evaluating the model on unseen data having the higher accuracy, as well as measuring the matrix like recall, precision, F1 score and classification report. In the last, showing the con-fusion matrix for all predicted features with correctly and incorrectly prediction. Save the best model for deployment and integration for X-ray type application.</p>
<p>So, the conclusion of this project in the literature review, as per the above discussion we are building four models. The inception model is superior to other all models with higher accuracy.</p>
<p>So, the statement according to <strong>Yadav</strong> and <strong>Jadhav</strong> is that, [4] In order to improve the perfor-mance of image classification on small datasets, this research uses a variety of machine learning techniques, including methods based on convolutional neural networks and multiple strategies. One approach is data augmentation, which performs better than neural network- and generative adversarial network (GAN)-based techniques. Data augmentation is an alterna-tive to traditional transform-based methods. Transfer learning is an additional technique that, according to Kermany research, produced 92% accuracy on a small dataset of pneumonia X-ray images. But there are a few gaps that should be noted. One drawback of Kermany's work is that they employ the InceptionV3 model, but they don't retrain the InceptionV3 convolutional layer because of overfitting. Thus, this study will assess alternative models as well as the results of retraining the convolutional layer. Furthermore, did not contrast the capsule network's per-formance with that of other techniques. Thus, the following are the contributions of this report.</p>
<ul style="list-style-type: none; padding-left: 3%;">
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
Performance comparison of three distinct classification methods: training capsule network from scratch, transfer learning of VGG16 and InceptionV3, and SVM classifier with oriented fast and rotated binary robust independent elementary features (ORB).
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
An examination of how the CNN transfer learning method performed on the small chest X-ray dataset to determine how data augmentation, network complexity, optimized convolutional layers, and other anti-overfitting mechanisms affected the classification.
</li>
</ul>
<p>“Medical image classification is a sub-field of image classification. It can also be applied with a variety of image classification techniques. For example, many image-enhanced techniques to improve the distinguishable characteristics for categorization. Convolutional neural network, however, is an end-to-end image classification solution, so it will learn the feature by own. Consequently, there will be no review of the literature regarding feature selection and enhancement in the medical image. The review primarily concentrates on the use of conventional techniques and transfer learning based on CNN. Additionally, on the capsule network in papers pertaining to medical images, to look into the gaps in their work and the aspects of those mod-els that are crucial to the outcome.”</p>
<p>So, the conclusion of this research, out of the three approaches, CNN-based transfer learning is the most effective. The capsule network is superior to the ORB and SVM classifier. In general, CNN based methods are better than traditional methods due to their ability to automatically learn and select features. The best results, slightly better than the state-of-the-art result, are obtained from the transfer learning of VGG16 with one retrained Convolutional Layer. The fea-ture can be learned from the new dataset using the unfrozen convolutional layer. As a result, the specific feature is crucial for increasing accuracy. A model's expressive power and overfit-ting must be balanced.</p>
<p>The statement according to <strong>D Zhang, F Ren, Y Li, L Na, Y Ma</strong> is that [5], Many techniques, es-pecially some deep learning techniques, have been described in the past year for the diagnosis of pneumonia from chest X-ray images. Thus, deep learning has been effectively used to en-hance the capabilities of computer-aided diagnosis (CAD), especially in the scope of medical imaging, image augmentation, and image reconstruction. In this research proposed a classical deep learning network named DenseNet-121, which has 121 convolutional layers. Compared to physicians with more experience, the framework received a higher F1 score. Additionally, the team introduced Weighted Binary Cross-Entropy loss to lessen the impact of imbalanced classes. The difference between the Binary Cross-Entropy loss and the weights of the imbal-anced classes was determined by the number of each label.</p>
<p>Several data pre-processing and augmentation techniques were applied, including random im-age translation and rotation in both horizontal and vertical directions, to increase the dataset's size and enhance the CNN model's representational ability. Baltruschat compared the perfor-mance of popular CNN models in classifying pneumonia X-ray images using the same hyper-parameter settings and image pre-processing methods in order to summarize the pneumonia classification task. The methodology proposed a two-channel CNN architecture. The first chan-nel processed the images whose contrast was increased by the CLAHE method, while the second channel processed the images whose edges were enhanced by the Canny method. Fol-lowing that, these images were used to train a multichannel CNN model to ascertain whether the patients had pneumonia.</p>
<p>D Zhang only predicted the one feature which label is pneumonia using the CNN based model. So, for the using to this model diagnose the pneumonia on a chest x-ray set. The following is a list of contributions made to this paper. To improve the contrast of the image, we first applied the Dynamic Histogram Equalization (DHE) technique. With this method, contrast in images can be improved without looking washed out or creating issues like checkerboard effects. Then, using only six layers that combined the ReLU activation function, drop operation, and max-pooling layers, we created a straightforward VGG-based CNN model to extract the fea-tures from the original images or earlier feature maps. The 96.07% accuracy rate and 94.41% precision rate that we were able to obtain demonstrate how well our suggested model outperforms the most advanced CNN model architectures. We included multiple comparisons of various input shapes and loss functions to show how well our suggested model performed.</p>
<!-- Start the third section which is the Methodology -->
<h2 id="methodology">3. Methodology</h2>
<p>This deep learning project involving chest X-ray image classification using models such as (CNNs, VGG16, InceptionV3 and ResNet50), specifically targeting medical imaging data based on the disease labels. Here are dealing with several key components and methodologies, including data preprocessing, data augmentation, searching best parameters for models, models architecture definition, training process and performance evaluation.</p>
<p>I will be utilizing ‘NumPy’, ‘pandas’, ‘seaborn’ and matplotlib for data manipulation, analysis and visualization.</p>
<p>Importing various modules like ‘scikit-learn’ for model selection evaluation and preprocessing as well. ‘TensorFlow’ and ‘Keras’ for building and training deep learning models, imageDat-aGenerator for image augmentation and normalization.</p>
<p>For the model evaluation, utilizing matrices ROC-AUC, F1 score, precision, recall and confusion matrices to evaluate model performance.</p>
<h3 id="data-understanding">3.1 Data Understanding</h3>
<p>I have all the necessary data organized in one location, with a CSV file named ‘chest_X_ray_data’ and an accompanying images folder. The CSV file contains columns for the (Image Index, Patient ID, Atelectasis, Consolidation, Infiltration, Pneumothorax, Edema, Emphy-sema, Fibrosis, Effusion, Pneumonia, Pleural thickening, Cardiomegaly, Nodule, Mass and Hernia) labels, which will serve as features for evaluation except image index and patient id. I plan to enhance this data frame by adding a new column for the file path of each corresponding image index.</p>
<h3 id="data-preparation">3.2 Data Preparation</h3>
<p>Data preparation is the process of preparing data which is suitable for further processing and analysis on models. Key steps including collecting data, cleaning and extracting features which is suitable for machine learning algorithms:</p>
<ul style="list-style-type: none; padding-left: 3%;">
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
Loading the dataset from CSV file named 'chest_X_ray_data' from the directory. The nrows parameter is set to selected number of rows to read from the file. So, the resulting dataframe is stored in train_df_main.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
Drop the column named 'No Finding' fom train_df_main and extract all predicted labels which is use for image classification. Utilizing the 'Glob' library for getting the image file from directory so, the list of images path is stored in 'image_paths' variable</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
Split the dataset in various part train_set, valid_set and test_set. The train_test_split function is used to split the data into training, validation and testing sets with a test size of 0.2. The 'random_state' parameter is set to RANDOM_STATE variable</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
Added 'FilePath' column in all datasets based on the Image index because we have image name and image Index value is same in file and directory. Also remove the duplicate rows from split datasets</li>
</ul>
<h3 id="data-augmentation">3.2.1 Data Augmentation</h3>
<p>The process of artificially creating new data from preexisting data, known as data augmentation. Modern machine learning models are very powerful, so this is a crucial step in the dataset building process. If these models are given too small datasets, they may begin to "overfit," which is a problem where the models simply memorize the mappings between their inputs and expected outputs.</p>
<p>In the code I have two functions for data augmentation one ‘get_train_generator’ is giving augmented generator for training dataset, and other ‘get_test_and_valid_generator’ is giving augmented generator for validation and testing dataset. One of the most important thing, generating augmented data based on the flow_from_dataframe here is the couple of configuration where I’m using in ImageDataGenerator for training data</p>
<ul style="list-style-type: none; padding-left: 3%;">
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>samplewise_center = true: </strong>utilizing the center option each image by subtracting the mean pixel value from each pixel. So rather than doing this for whole dataset, it is done for each individual image.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>samplewise_std_normalization= true: </strong>utilizing this option for standardization of images is achieved by subtracting the mean pixel value and dividing the result by the standard deviation of the pixel values.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>rotation_range= 32: </strong>this parameter specifies the maximum rotation angle (in degrees) that will be applied. In this project I'm using maximum rotation angle is 32 degrees.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>shear_range= 0.1: </strong>parameter will apply random shear transformation on the images. So the maximum angle of radians that will be applied. Here the maximum shear angle is 0.1 radians.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>zoom_range= 0.15: </strong>parameter will apply random zoom transformation on the images. So the parameter specifies the maximum zoom factor that will be applied. In this case the maximum zoom factor is 0.15, which mean that the image will be zoomed in or out by up to 15%.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>width_shift_range=0.1: </strong>parameter specifies the maximum horizontal shift as a frac-tion of the image width that will be applied. In this case the maximum horizontal shift is 0.1 which mean that the image will be shifted by up to 10% width.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>height_shift_range=0.05: </strong>parameter specifies the maximum vertical shift as friction of the image height that will be applied. In this case the maximum vertical shift is 0.05, which mean that the image will be shifted by up to 5% height.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>rescale=1./255: </strong>So the rescale parameter specifies the scaling factor that will be ap-plied on the pixel values.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>brightness_range=(0.8, 1.2): </strong>this parameter specifies the minimum and maximum brightness multiplier that will be applied. In this case the brightness will be adjusted by a factor between 0.8 and 1.2.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>horizontal_flip=True: </strong>parameter applies a random horizontal flip to the images. If True, image will be flipped horizontally with a probability of 0.5.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>vertical_flip=False: </strong>this parameter applies a random vertical flip to the images. So, in the case vertical flipping is disabled.</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
<strong>fill_mode=reflect: </strong>parameter mean image will be reflected at the boundary.</li>
</ul>
<h3 id="data-visualization">3.2.2 Data Visualization</h3>
<p>Data visualization is the process of graphical representation of data, at this stage we have train, valid and test generator batch dataset with various configuration by image data generator. For the multi-classification of image that will be show single X-ray image having multiple diseases that will predict at the end of evaluation. So, this figure showing the labels on each image that will represent diseases on that X-ray image. For example, a patient could have Infiltration and Hernia, and model should correctly predict both conditions.</p>
<div style="display: flex; justify-content: space-around; margin: 20px 0; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<img src="Visualization_1.png" alt="Result Chest X-ray Image" style="max-width: 600px; height: auto;">
</div>
</div>
<p>So, this image visualization develops a better understanding of the relationship between the images and their labels that can ultimately lead to better model performance and more accurate predictions.</p>
<p>This plot shows us how the prevalence of positive cases varies greatly amongst the various pathologies. (These trends also correspond with those found in the entire dataset.) We can see the label ‘Hernia’ has the greatest imbalance with the proportion of positive training cases being about 0.25%. Same as if we see the label ‘Infiltration’, which has the least amount of imbalance has only 17% of the training positive cases.</p>
<div style="display: flex; justify-content: center; margin: 20px 0; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<img src="Visualization_Trained_graph.png" alt="Trained Model Performance Graph" style="max-width: 600px; height: auto; border: 2px solid #3498db; border-radius: 5px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
<p style="margin-top: 10px; font-weight: bold; color: #2c3e50; font-size: 1.1em;"><strong>Figure 1:</strong> Percentage of Positive Labels</p>
</div>
</div>
<p>As we can see in the Figure 2, the contribution of the positive cases is significantly less than that of the negative cases in training dataset.</p>
<div style="display: flex; justify-content: center; margin: 20px 0; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<img src="Positive_and_negative_graph.png" alt="Trained Model Performance Graph" style="max-width: 600px; height: auto; border: 2px solid #3498db; border-radius: 5px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
<p style="margin-top: 10px; font-weight: bold; color: #2c3e50; font-size: 1.1em;"><strong>Figure 2:</strong> Percentage of Positive and Negative Labels</p>
</div>
</div>
<h3 id="model-building">3.3 Model Building</h3>
<p>Utilizing machine learning model convolutional neural Networks (CNNs) and pre trained models such as (InceptionV3, VGG16 and ResNet50) imageNet weights, excluding the top layers typically involves various components like weights, include top and input shape</p>
<h3 id="best-parameters">3.3.1 Searching Best Parameters</h3>
<p>Before fitting the model on the training and validation datasets, it's important to determine the best parameters using RandomizedSearchCV along with a Keras classifier. The following steps outline the process of finding the optimal parameters using this approach.</p>
<p>I created two methods for finding the best parameters on train generator dataset. The first method named (get_batch_parameter) feeding the model with default values in search the best batch size and epochs on one augmented batch which is default 32. Same as for the sec-ond searching method named (get_parameters_using_bacth) feeding the model with default values and batch size and epochs which got best from the first method. This method gives the best optimizer, learning rate, init mode, activation and dropout rate for that model.</p>
<p>We defined the array of all parameters and build dictionary for all parameters. Pass the keras model to random search class as well as grid search dictionary and iteration step with cross validation score.</p>
<h3 id="cnn-architecture">3.3.2 Convolutional Neural Network (CNN) Architecture</h3>
<p>I wrap this model in function having parameters with default values, and that parameters is using in function body:</p>
<p><strong>Input Layer:</strong> Defined the sequential model then add the input layer with convolutional layer that has 64 filter, a 3*3 kernel size, and use the activation parameter with default value is relu and also add kernel initializer function with assign the default value of init_mode. So the input shape matches the size of the images, and padding is assign to same to preserve the spatial dimensions.</p>
<p><strong>Conv2D Layers:</strong> Convolutional layers with 32, 64, and 128 filters are gradually added by the model. These layers capture various level of features, so from the low level features like edges and textures in earlier layers to more complex patterns in deeper layers.</p>
<p><strong>MaxPooling2D Layers:</strong> Pooling layers follow every set of convolutional layers, which overcomes the spatial dimensions and helping to generalize the learned features. This down sampling overcomes the computational load and helps prevent overfitting.</p>
<p><strong>Dropout Layers:</strong> After every pooling layer, set the dropout layer with the parameter drop-out_rate default value is 0.5 where randomly deactivate neurons during training time, which is also help to overcomes overfitting.</p>
<p><strong>Fully Connected Layer:</strong> The fully connected layer begins by flattening the output from the last convolutional layer into a one-dimensional vector. A dense layer with 1024 units and default ReLU activation follows, enabling the model to learn complex patterns from the extracted features. To mitigate overfitting, a dropout layer with a specified rate is applied after this dense layer. The output layer, designed for multi-label classification, has units equal to the number of labels and uses a sigmoid default activation function to generate probability scores for each label.</p>
<p><strong>Compilation:</strong> For the CNN model compilation, binary_crossentropy is employed as the loss function, suitable for independent label predictions, with default Adam as the optimizer set to a 0.01 default learning rate, and accuracy as the metric to evaluate performance.</p>
<h3 id="pretrained-models">3.3.3 Pre-Trained Models Architecture</h3>
<p>In this project we are using three pre-trained models which is (RESNET50, VGG16 and InceptionV3) these models having the same architecture except base layer. Every model wrapped in the individual function with parameters having default values, thus these default values are using at the time of best parameters searching. Every model will reinitialize after finding the best parameter values and then that values will be feed to specific model. Now defining the layers that will be common in for every models.</p>
<p><strong>Input Layer:</strong> defined the input layer with the shape of image size is 128*128 and colour channel is 3 (which is in the RGB).</p>
<p><strong>Base Model:</strong> Defined the pre-trained model architecture, which is exclude the fully connected layers at the top of the model network, keeping only the convolutional base layer. Also initializ-es the base model with pre trained weights with ImageNet dataset. So, at the end of this layer using inputs which is defined in the previous step.</p>
<p><strong>Freezing Base Layer:</strong> Freez the layers of the base model which is prevent from being updated during training. This layer only to allow the model to use the pre-trained features.</p>
<p><strong>Dense and Dropout Layers:</strong> dense layers are fully connected that help the model to learn on complex patterns. So, for the dense layer filters are gradually added in convolutional layers which is 64, 128 and 512, and for the output connected convolutional layers is 1024. Dropout rate is defined in parameter which is 0.5.</p>
<p><strong>Fully Connected Layer:</strong> the first layer is flattening the output of the previous layer into a one-dimensional vector. So the forth layer as mentioned in connected layer is dense with the filter is 1024. Also added the dropout rate layer with default value of dropout_rate parameter. Add the next layer is dense with the units are equal to the length of the predicted labels with searched activation function.</p>
<p><strong>Compilation:</strong> In the model compilation, utilizing the ‘binary_crossentropy’ loss which is suitable for multi-label classification where each label is treated independently. For the model default optimizer function using Adam with the default learning rate is 0.01. Next, last and third parameter is metrics which is accuracy to evaluate the model performance.</p>
<p>I'm writing for the definition of each of the models. To find the best parameters, I first assign the model to the Kera’s classifier using default values, as I previously discussed in the model architecture. Subsequently, the parameter values will be assigned to the respective model, and the model summary will be displayed in support of the optimal parameter values. Next, before fitting the model, make predictions about unseen data. Run the model fit on a data generator for both training and validation. We will obtain the historical data on model fitting in order to display the loss and accuracy graph visualization. Next, test the model on hypothetical data to determine its accuracy and loss, then store the name of a particular model variable to deter-mine which model performs the best. After fitting the training data, predict the model on unseen data once more. The ROC report for that particular model will then be displayed. The same process is implemented on all models.</p>
<h3 id="model-evaluation">3.4 Model Evaluation</h3>
<p>In the phase of the model evaluation, we rank the highest performing model according to accuracy. As previously mentioned, we store the accuracy and loss of each model in the corresponding model variables, create a new array based on the model’s variables and display visualization graph for all models.</p>
<div style="display: flex; justify-content: center; margin: 20px 0; flex-wrap: wrap;">
<div style="text-align: center; margin: 10px;">
<img src="Models_comparison.png" alt="Trained Model Performance Graph" style="max-width: 600px; height: auto; border: 2px solid #3498db; border-radius: 5px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
<p style="margin-top: 10px; font-weight: bold; color: #2c3e50; font-size: 1.1em;"><strong>Figure 3:</strong> Models Comparison</p>
</div>
</div>
<p>Finally, the best model based on the accuracy which is the InceptionV3 with 12% accuracy. Predicting that model on unseen data and getting the metrices like accuracy, F1-score, recall, precision, classification report as well. We describe according to the best outputs of model, True Positive, True Negative, False Positive and False Negative are used to analyze and identify the performance of the model.</p>
<div style="margin: 20px 0; padding: 20px; background-color: #f8f9fa; border-radius: 8px; border: 1px solid #e9ecef;">
<div style="text-align: center; margin: 15px 0;">
<strong>Accuracy:</strong><br>
<span style="font-size: 1.2em; margin: 10px 0; display: inline-block;">
$$accuracy = \frac{TP + TN}{TP + TN + FP + FN}$$
</span>
</div>
<div style="text-align: center; margin: 15px 0;">
<strong>Precision:</strong><br>
<span style="font-size: 1.2em; margin: 10px 0; display: inline-block;">
$$precision = \frac{TP}{TP + FP}$$
</span>
</div>
<div style="text-align: center; margin: 15px 0;">
<strong>Recall:</strong><br>
<span style="font-size: 1.2em; margin: 10px 0; display: inline-block;">
$$recall = \frac{TP}{FP + FN}$$
</span>
</div>
<div style="text-align: center; margin: 15px 0;">
<strong>F1 Score:</strong><br>
<span style="font-size: 1.2em; margin: 10px 0; display: inline-block;">
$$F1 = 2 \times \frac{precision}{precision + recall}$$
</span>
</div>
</div>
<p>Also showing the confusion metric for all the predicted labels for the InceptionV3 model. So, the confusion matrix is a performance measurement for machine learning classification problem, particularly for binary classification. We have confusion matrix for each individual label.</p>
<ul style="list-style-type: none; padding-left: 10%;">
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
True Negative cases where the condition was correctly predicted.
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
False Positive cases where the condition was incorrectly predicted.
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
False Negative cases where the condition was incorrectly predicted.
</li>
<li style="margin: 8px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0; color: #2c3e50; font-weight: bold;">❖</span>
True Positive cases where the condition was correctly predicted.
</li>
</ul>
<div style="text-align: center; margin: 10px;">
<img src="Matrix_1.png" alt="Confusion Matrix" style="max-width: 100%; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Matrix_2.png" alt="Confusion Matrix" style="max-width: 100%; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Matrix_3.png" alt="Confusion Matrix" style="max-width: 100%; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Matrix_4.png" alt="Confusion Matrix" style="max-width: 100%; height: auto;">
</div>
<div style="text-align: center; margin: 10px;">
<img src="Matrix_5.png" alt="Confusion Matrix" style="max-width: 100%; height: auto;">
<p style="margin-top: 10px; font-weight: bold; color: #2c3e50; font-size: 1.1em;"><strong>Figure 4:</strong> Confusion Matrices</p>
</div>
<h3 id="deployment">3.5 Deployment</h3>
<p>In the deployment phase, the best-performing model, identified as <strong>InceptionV3</strong> with the highest accuracy, is saved for future use. This model is prepared for integration into a production environment where it can be utilized for real-time chest X-ray image classification. The deployment process includes finalizing the model's architecture, ensuring that it can efficiently handle unseen data, and exporting it in a format compatible with deployment platforms. The deployed model is expected to aid in medical diagnostics by accurately predicting multiple diseases from chest X-ray images.</p>
<!-- Start the fourth section which is the Conclusion -->
<h2 id="conclusion">4. Conclusion</h2>
<h3 id="summary-findings">4.1 Summary of Findings</h3>
<p>This project focuses on the multi-label classification of chest X-ray images using deep learning models, aiming to predict multiple diseases from a single image. The methodology involves several key steps, including understanding the data, preparation of the data, data augmentation, model building with hyperparameter tuning, and performance evaluation. Data augmentation techniques like rotation, shear, and flipping are employed to prevent overfitting and enhance model robustness.</p>
<p>The dissertation outlines a systematic approach to finding the optimal hyperparameters for the models. Two methods are used: one for searching the best parameters across batches and epochs, and another for tuning learning rates, initialization modes, activation functions, and dropout rates. Once the best parameters are identified, they are applied to the models, which are then trained on the data. After training, the models are evaluated on unseen data, and performance is visualized through metrics like ROC curves.</p>
<p>The process is repeated across different models, with the goal of identifying the model that achieves the highest accuracy. The best-performing model is then subjected to further evalua-tion, including the generation of confusion matrices, F1-score, recall, precision and classification reports. Finally, this model is saved and prepared for deployment, where it can be utilized to assist radiologists by automating the detection of multiple diseases from chest X-rays, thereby reducing their workload and improving diagnostic efficiency.</p>
<h3 id="evaluation">4.2 Evaluation</h3>
<p>The objective of this project was to develop a robust and accurate deep learning models capabilities of classifying multiple diseases from chest X-ray images. The model aimed to assist in the medical diagnosis of conditions such as Atelectasis, Consolidation, Infiltration, Pneumothorax, Edema, Emphysema, Fibrosis, Effusion, Pneumonia, Pleural thickening, Cardiomegaly, Nodule, Mass and Hernia, even in the presence of unseen data, varying image quality, and diverse patient demographics. The project assessed four deep learning models for the classification of several thoracic diseases from chest X-ray images. The model low accuracy and performance metrics across various neuron layers were caused by overfitting, despite the fact that it was intended to reduce false positives and negatives and improve generalization.</p>
<h3 id="future-work">4.3 Future Work</h3>
<p>The limitations that I have faced during the project is the model performance on training data as I mentioned earlier in achievements section, when I fit the training and validation generator data on model that is going to be overfitting during the training process due to the dataset. right now, getting the image file path from dataset, but for the improving next will get direct from the directory when generating the data via augmentation process. For improving the model performance create the folder with the name of label and furthermore will create test and train folder in every label sub directory, put the images there will use that directory for the time of data augmentation which will give the training generator and test generator. After that will use that generator dataset for the searching best parameters and model fitting.</p>
<p>The next step that I have to improve getting the best parameters via Random search, for now when we search best param’s for specific model, we are fitting the first batch on grid search because if we pass all the batches or generated dataset in the grid search it takes lots of time in searching and sometime giving the error due to the memory pressure (the error is out of the range or by a memory leak).</p>
<!-- Start the fifth section which is the References and Appendices -->
<h2 id="references">References</h2>
<p>The following references represent key contributions to the field of image classification and computer vision:</p>
<ol>
<li>Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. <em>Advances in neural information processing systems</em>, 25.</li>
<li>He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. <em>Proceedings of the IEEE conference on computer vision and pattern recognition</em>, 770-778.</li>
<li>Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., ... & Houlsby, N. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. <em>arXiv preprint arXiv:2010.11929</em>.</li>
<li>Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. <em>arXiv preprint arXiv:1409.1556</em>.</li>
<li>Tan, M., & Le, Q. (2019). EfficientNet: Rethinking model scaling for convolutional neural networks. <em>International conference on machine learning</em>, 6105-6114.</li>
</ol>
<p><em>Note: This is a representative selection of references. The complete bibliography contains additional sources covering theoretical foundations, methodological approaches, and recent advances in image classification research.</em></p>
<h2 id="appendices">Appendices</h2>
<h3 id="appendix-a">Appendix A: Technical Implementation Details</h3>
<p>Review the methodology section for detailed implementation specifications, including hyperparameter configurations, training procedures, and computational requirements for reproducing the experimental results.</p>
<h3 id="appendix-b">Appendix B: Code Repository</h3>
<p>Visit <a href="https://github.com/Asif-Nawaz27/ML-image-classification" target="_blank">this GitHub repository</a> for access to the codebase, including implementation details, trained models, and experimental scripts.</p>
</article>
<!-- Back to Top Button -->
<button class="back-to-top" id="backToTopBtn" title="Back to Top"></button>
<div class="footer">
<p>© 2025 Advanced Image Classification Research | Department of Computer Science</p>
<p><em>This represents to original research in computer vision and machine learning</em></p>
</div>
<script>
// Back to Top Button Functionality
window.addEventListener('scroll', function() {
const backToTopBtn = document.getElementById('backToTopBtn');
// Show button when user scrolls down 300px
if (window.pageYOffset > 300) {
backToTopBtn.classList.add('show');
} else {
backToTopBtn.classList.remove('show');
}
});
// Smooth scroll to top when button is clicked
document.getElementById('backToTopBtn').addEventListener('click', function() {
window.scrollTo({
top: 0,
behavior: 'smooth'
});
});
// Add smooth scrolling to all anchor links
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
anchor.addEventListener('click', function (e) {
e.preventDefault();
const target = document.querySelector(this.getAttribute('href'));
if (target) {
target.scrollIntoView({
behavior: 'smooth',
block: 'start'
});
}
});
});
</script>
</body>
</html>