@@ -449,103 +449,103 @@ def accuracy(output, target, topk=(1,)):
449
449
train_loader = torch .utils .data .DataLoader (train_data ,batch_size = batch_size ,shuffle = True , num_workers = 4 ,pin_memory = True ,prefetch_factor = 2 ,drop_last = True )
450
450
val_loader = torch .utils .data .DataLoader (val_data ,batch_size = batch_size ,shuffle = True , num_workers = 4 ,pin_memory = True ,prefetch_factor = 2 ,drop_last = True )
451
451
452
- model = model .to (device )
453
-
454
- grad_clip = 1.0 # setting gradient clipping to 1.0
455
-
456
- for e in range (epoch ) :
457
- print (f'Training Epoch : { e } ' )
458
- total_loss = 0
459
- val_iter = iter (val_loader )
460
- train_acc = [0 ,0 ]
461
- train_num = 0
462
-
463
- total_acc = [0 ,0 ]
464
- count = 0
465
- for i , data in enumerate (train_loader ) :
466
-
467
-
468
- model .train ()
469
- img ,label = data
470
- img ,label = img .to (device , non_blocking = True ) ,label .to (device , non_blocking = True )
471
-
472
- output = model (img )
473
-
474
- loss = criterion (output ,label ) / accum_step
475
-
476
- temp_output ,temp_label = output .detach ().to ('cpu' ) , label .detach ().to ('cpu' )
477
- temp_acc = accuracy (temp_output ,temp_label ,(1 ,5 ))
478
- train_acc = [train_acc [0 ]+ temp_acc [0 ] , train_acc [1 ]+ temp_acc [1 ]]
479
- train_num += batch_size
480
- temp_output ,temp_label ,temp_acc = None ,None ,None
481
-
482
- loss .backward ()
483
- total_loss += loss .detach ().to ('cpu' )
484
- img ,label = None ,None
485
- torch .cuda .empty_cache ()
486
- if i > 0 and i % update_count == 0 :
487
- print (f'Training steps : { i } parameter update loss :{ total_loss } ' )
488
- if grad_clip is not None :
489
- torch .nn .utils .clip_grad_norm_ (model .parameters (), grad_clip )
490
- optimizer .step ()
491
- optimizer .zero_grad (set_to_none = True )
492
-
493
- if total_loss < 7.0 :
494
- # print(f"train loss {total_loss}less than 7.0 ,set grad clip to {clip}")
495
- grad_clip = clip
496
- if i % eval_step != 0 :
497
- total_loss = 0
498
-
499
- output ,loss = None ,None
452
+ model = model .to (device )
453
+
454
+ grad_clip = 1.0 # setting gradient clipping to 1.0
455
+
456
+ for e in range (epoch ) :
457
+ print (f'Training Epoch : { e } ' )
458
+ total_loss = 0
459
+ val_iter = iter (val_loader )
460
+ train_acc = [0 ,0 ]
461
+ train_num = 0
462
+
463
+ total_acc = [0 ,0 ]
464
+ count = 0
465
+ for i , data in enumerate (train_loader ) :
466
+
467
+
468
+ model .train ()
469
+ img ,label = data
470
+ img ,label = img .to (device , non_blocking = True ) ,label .to (device , non_blocking = True )
471
+
472
+ output = model (img )
473
+
474
+ loss = criterion (output ,label ) / accum_step
475
+
476
+ temp_output ,temp_label = output .detach ().to ('cpu' ) , label .detach ().to ('cpu' )
477
+ temp_acc = accuracy (temp_output ,temp_label ,(1 ,5 ))
478
+ train_acc = [train_acc [0 ]+ temp_acc [0 ] , train_acc [1 ]+ temp_acc [1 ]]
479
+ train_num += batch_size
480
+ temp_output ,temp_label ,temp_acc = None ,None ,None
481
+
482
+ loss .backward ()
483
+ total_loss += loss .detach ().to ('cpu' )
484
+ img ,label = None ,None
500
485
torch .cuda .empty_cache ()
501
- if i > 0 and i % eval_step == 0 :
502
-
503
- print (f'train losss :{ total_loss } ' )
504
- temp_loss = total_loss
505
- total_loss = 0
506
-
507
- val_loss = 0
508
- torch .cuda .empty_cache ()
509
-
510
- for j in range (update_count ) :
511
- loss = None
512
- print (f'Evaluation Steps Start' )
513
- try :
514
- img ,label = next (val_iter )
515
- except StopIteration :
516
- val_iter = iter (val_loader )
517
- img ,label = next (val_iter )
518
- with torch .no_grad ():
519
- model .eval ()
520
-
521
- img , label = img .to (device , non_blocking = True ) , label .to (device , non_blocking = True )
522
- output = model (img )
523
- temp_output ,temp_label = output .detach ().to ('cpu' ) , label .detach ().to ('cpu' )
524
- temp_acc = accuracy (temp_output ,temp_label ,(1 ,5 ))
525
- total_acc = [total_acc [0 ]+ temp_acc [0 ] , total_acc [1 ]+ temp_acc [1 ]]
526
- count += batch_size
527
-
528
- loss = criterion (output ,label )/ accum_step
529
- val_loss += loss .detach ().to ('cpu' )
530
- # loss.backward()
531
- torch .cuda .empty_cache ()
532
-
533
-
534
- img ,label ,output ,loss = None ,None ,None ,None
535
-
536
-
537
-
486
+ if i > 0 and i % update_count == 0 :
487
+ print (f'Training steps : { i } parameter update loss :{ total_loss } ' )
488
+ if grad_clip is not None :
489
+ torch .nn .utils .clip_grad_norm_ (model .parameters (), grad_clip )
490
+ optimizer .step ()
491
+ optimizer .zero_grad (set_to_none = True )
492
+
493
+ if total_loss < 7.0 :
494
+ # print(f"train loss {total_loss}less than 7.0 ,set grad clip to {clip}")
495
+ grad_clip = clip
496
+ if i % eval_step != 0 :
497
+ total_loss = 0
498
+
499
+ output ,loss = None ,None
538
500
torch .cuda .empty_cache ()
539
-
540
- if abs (val_loss - temp_loss ) > 0.03 :
541
- grad_clip = clip
542
- # print(f"val_loss {val_loss} - train_loss {temp_loss} = {abs(val_loss-temp_loss)} > 0.3")
543
- # print(f"set grad clip to {grad_clip}")
544
-
545
- best_val_loss = val_loss
546
-
547
- val_loss = None
548
- img ,label ,output = None ,None ,None
501
+ if i > 0 and i % eval_step == 0 :
502
+
503
+ print (f'train losss :{ total_loss } ' )
504
+ temp_loss = total_loss
505
+ total_loss = 0
506
+
507
+ val_loss = 0
508
+ torch .cuda .empty_cache ()
509
+
510
+ for j in range (update_count ) :
511
+ loss = None
512
+ print (f'Evaluation Steps Start' )
513
+ try :
514
+ img ,label = next (val_iter )
515
+ except StopIteration :
516
+ val_iter = iter (val_loader )
517
+ img ,label = next (val_iter )
518
+ with torch .no_grad ():
519
+ model .eval ()
520
+
521
+ img , label = img .to (device , non_blocking = True ) , label .to (device , non_blocking = True )
522
+ output = model (img )
523
+ temp_output ,temp_label = output .detach ().to ('cpu' ) , label .detach ().to ('cpu' )
524
+ temp_acc = accuracy (temp_output ,temp_label ,(1 ,5 ))
525
+ total_acc = [total_acc [0 ]+ temp_acc [0 ] , total_acc [1 ]+ temp_acc [1 ]]
526
+ count += batch_size
527
+
528
+ loss = criterion (output ,label )/ accum_step
529
+ val_loss += loss .detach ().to ('cpu' )
530
+ # loss.backward()
531
+ torch .cuda .empty_cache ()
532
+
533
+
534
+ img ,label ,output ,loss = None ,None ,None ,None
535
+
536
+
537
+
538
+ torch .cuda .empty_cache ()
539
+
540
+ if abs (val_loss - temp_loss ) > 0.03 :
541
+ grad_clip = clip
542
+ # print(f"val_loss {val_loss} - train_loss {temp_loss} = {abs(val_loss-temp_loss)} > 0.3")
543
+ # print(f"set grad clip to {grad_clip}")
544
+
545
+ best_val_loss = val_loss
546
+
547
+ val_loss = None
548
+ img ,label ,output = None ,None ,None
549
549
550
550
551
551
0 commit comments