diff --git a/src/deepgraphpose/models/fitdgp.py b/src/deepgraphpose/models/fitdgp.py
index fccbb7e..e0362f3 100644
--- a/src/deepgraphpose/models/fitdgp.py
+++ b/src/deepgraphpose/models/fitdgp.py
@@ -476,6 +476,13 @@ def fit_dgp_labeledonly(
         all_data_batch_ids = []
         all_joint_locs = []
         video_names = []
+        all_visible_frames = []
+        all_hidden_frames = []
+        all_wt_batch_mask = []
+        all_visible_marker = []
+        all_hidden_marker = []
+        all_visible_marker_in_targets = []
+
         for dataset_id in range(len(data_batcher.datasets)):
             (visible_frame, hidden_frame, _, all_data_batch, joint_loc, wt_batch_mask,
              all_marker_batch, addn_batch_info), d = \
@@ -485,33 +492,46 @@ def fit_dgp_labeledonly(
             all_joint_locs.append(joint_loc)
             # add the corresponding name of the view to a list of video_names (important that these added at the same time to their respective lists to preserve ordering)
             video_names.append(data_batcher.datasets[dataset_id].video_name)
+            all_visible_frames.append(visible_frame)
+            all_hidden_frames.append(hidden_frame)
+            all_wt_batch_mask.append(wt_batch_mask) 
+
+            visible_marker, hidden_marker, visible_marker_in_targets = addn_batch_info
+            all_visible_marker.append(visible_marker) #!
+            all_hidden_marker.append(hidden_marker) #!
+            all_visible_marker_in_targets.append(visible_marker_in_targets) #!
 
         # make all_data_batch_ids a single ndarray
         all_data_batch_ids = np.concatenate(all_data_batch_ids)
         all_joint_locs = np.concatenate(all_joint_locs)
-
-        nt_batch = len(visible_frame) + len(hidden_frame)
-        visible_marker, hidden_marker, visible_marker_in_targets = addn_batch_info
-        all_frame = np.sort(list(visible_frame) + list(hidden_frame))
-        visible_frame_within_batch = [np.where(all_frame == i)[0][0] for i in visible_frame]
+        all_hidden_frames = np.concatenate(all_hidden_frames)
+        all_visible_frames = np.concatenate(all_visible_frames)
+        all_wt_batch_mask = np.concatenate(all_wt_batch_mask)
+        all_visible_marker = np.concatenate(all_visible_marker)
+        all_hidden_marker = np.concatenate(all_hidden_marker)
+        all_visible_marker_in_targets = np.concatenate(all_visible_marker_in_targets)
+
+        nt_batch = len(all_visible_frames) + len(all_hidden_frames)
+        #visible_marker, hidden_marker, visible_marker_in_targets = addn_batch_info
+        all_frame = np.sort(list(all_visible_frames) + list(all_hidden_frames))
+        visible_frame_within_batch = [np.where(all_frame == i)[0][0] for i in all_visible_frames]
 
         # batch data for placeholders
         if dgp_cfg.wt > 0:
-            vector_field = learn_wt(all_data_batch)  # vector field from optical flow
+            vector_field = learn_wt(all_data_batch_ids)  # vector field from optical flow
         else:
             vector_field = np.zeros((1,1,1))
         wt_batch = np.ones(nt_batch - 1, ) * dgp_cfg.wt
 
         # data augmentation for visible frames
         if dgp_cfg.aug and dgp_cfg.wt == 0:
-            all_data_batch, joint_loc = data_aug(all_data_batch, visible_frame_within_batch, joint_loc, pipeline, dgp_cfg)
+            all_data_batch_ids, all_joint_locs = data_aug(all_data_batch_ids, visible_frame_within_batch, all_joint_locs, pipeline, dgp_cfg)
 
-        locref_targets_batch, locref_mask_batch = coord2map(pdata, joint_loc, nx_out, ny_out, nj)
+        locref_targets_batch, locref_mask_batch = coord2map(pdata, all_joint_locs, nx_out, ny_out, nj)
         if locref_mask_batch.shape[0] != 0:
             locref_targets_all_batch = np.zeros(
                 (len(all_frame), nx_out, ny_out, nj * 2))
-            locref_targets_all_batch[
-            visible_frame_within_batch, :, :, :] = locref_targets_batch
+            locref_targets_all_batch[visible_frame_within_batch, :, :, :] = locref_targets_batch
             locref_mask_all_batch = np.zeros(
                 (len(all_frame), nx_out, ny_out, nj * 2))
             locref_mask_all_batch[visible_frame_within_batch, :, :, :] = locref_mask_batch
@@ -531,10 +551,10 @@ def fit_dgp_labeledonly(
             placeholders['targets']: all_joint_locs,
             placeholders['locref_map']: locref_targets_all_batch,
             placeholders['locref_mask']: locref_mask_all_batch,
-            placeholders['visible_marker_pl']: visible_marker,
-            placeholders['hidden_marker_pl']: hidden_marker,
-            placeholders['visible_marker_in_targets_pl']: visible_marker_in_targets,
-            placeholders['wt_batch_mask_pl']: wt_batch_mask,
+            placeholders['visible_marker_pl']: all_visible_marker,
+            placeholders['hidden_marker_pl']: all_hidden_marker,
+            placeholders['visible_marker_in_targets_pl']: all_visible_marker_in_targets,
+            placeholders['wt_batch_mask_pl']: all_wt_batch_mask,
             placeholders['vector_field_tf']: vector_field,
             placeholders['nt_batch_pl']: nt_batch,
             placeholders['wt_batch_pl']: wt_batch,
@@ -1118,21 +1138,29 @@ def dgp_loss(data_batcher, dgp_cfg, placeholders):
     if data_batcher.multiview:
         F_dict = data_batcher.fundamental_mat_dict
         num_pts_per_frame = targets_pred.shape[1]
-        num_pts_per_view = tf.dtypes.cast(num_pts_per_frame * nt_batch_pl, tf.int64) # need to cast this as an int64 for some reason or it breaks
+        num_pts_per_view = tf.dtypes.cast(num_pts_per_frame * nt_batch_pl / len(data_batcher.datasets), tf.int64) # need to cast this as an int64 for some reason or it breaks
+        #num_pts_per_view = 2
         loss['epipolar_loss'] = 0
         for key, F in F_dict.items():
             v1_name, v2_name = key.split(data_batcher.F_dict_key_delim)
             # get coordinates of predictions for video 1
             name1_idx = tf.where(tf.equal(video_names, v1_name))[0][0]
-            v1_pts = targets_pred_marker[name1_idx * num_pts_per_view:name1_idx * num_pts_per_view + num_pts_per_view]
+            v1_pts = targets_pred_marker[(name1_idx * num_pts_per_view):(name1_idx * num_pts_per_view + num_pts_per_view)]
             # get coordinates of predictions for video 2
             name2_idx = tf.where(tf.equal(video_names, v2_name))[0][0]
-            v2_pts = targets_pred_marker[name2_idx * num_pts_per_view:name2_idx * num_pts_per_view + num_pts_per_view]
+            v2_pts = targets_pred_marker[(name2_idx * num_pts_per_view):(name2_idx * num_pts_per_view + num_pts_per_view)]
             # compute epipolar loss. (every point in v1_pts should correspond to the same point in space as the point at
             # the same index in v2_pts. I.e. v1_pts[n] and v2_pts[n] correspond to the same point in space)
+            print('********************************************')
+            print('num_pts_per_view: ', num_pts_per_view)
+            print('num_pts_per_frame: ', num_pts_per_frame)
+            print('v1_pts: ', v1_pts)
+            print('v2_pts: ', v2_pts)
+            print('********************************************')
             epipolar_loss = compute_epipolar_loss(v1_pts, v2_pts, F)
             loss['epipolar_loss'] += dgp_cfg.epipolar_wt * epipolar_loss
 
+
         total_loss += loss['epipolar_loss']
 
 
@@ -1270,9 +1298,12 @@ def compute_epipolar_loss(v1_pts, v2_pts, F):
     -------
     scalar loss value of ||x'Fx||, the magnitude of the vector v2_pts•F•v1_pts
     """
+
+
     # convert to homogeneous coordinates
     ones = tf.ones_like(v1_pts)[:,0]
     ones = tf.expand_dims(ones, axis=1)
+
     im1_pts_hom = tf.concat([v1_pts, ones], axis=1)
     im2_pts_hom = tf.concat([v2_pts, ones], axis=1)