[main] Added functions

cthacker-udel · cthacker-udel · commit 9587ac7d5099 · 2022-11-16T23:55:45.000-05:00
diff --git a/pythonProblems/dp/boxes.py b/pythonProblems/dp/boxes.py
@@ -0,0 +1,39 @@
+
+from typing import List
+
+
+class Box:
+    def __init__(self, width, height, depth):
+        self.width = width
+        self.height = height
+        self.depth = depth
+
+
+def boxes(stack: List[Box]) -> int:
+    n = len(stack)
+    boxes: List[Box] = []
+    for eachbox in stack:
+        boxes.append(eachbox) ## the box
+        boxes.append(Box(min(eachbox.height, eachbox.depth), eachbox.width, max(eachbox.height, eachbox.depth))) ## the box rotated, with width as height, width as min(height, depth) and depth as min(height, depth)
+        boxes.append(Box(min(eachbox.height, eachbox.width), eachbox.depth, max(eachbox.height, eachbox.width))) ## the box rotated, with depth as height, width as min(height, width), and depth as min(height, width)
+    n *= 3
+
+    boxes.sort(key=lambda b: b.depth * b.width, reverse=True)
+
+    msh = [0] * n
+
+    for i in range(len(msh)):
+        msh[i] = boxes[i].height
+    
+    for i in range(1, n):
+        for j in range(0, i):
+            if boxes[i].width < boxes[j].width and boxes[i].depth < boxes[j].depth: ## if the current target box's width is less then the subproblem box width, and depth likewise
+                if msh[i] < msh[j] + boxes[i].height: ## we can make a taller combination if the current dp target solution is less then the smaller dp solution + the target box height
+                    msh[i] = msh[j] + boxes[i].height ## we assign the target problem the result of subproblem j's solution + the target box's height
+
+    return max(msh) ## return the max height, which is the last element in the dp table
+
+
+if __name__ == '__main__':
+    arr = [Box(6, 4, 7), Box(2, 1, 3), Box(5, 4, 6), Box(12, 10, 32)]
+    print(boxes(arr))
diff --git a/pythonProblems/dp/firstMissingPositive.py b/pythonProblems/dp/firstMissingPositive.py
@@ -1,22 +1,60 @@
+import random
 from typing import List
 
 
 class Solution:
     def firstMissingPositive(self, nums: List[int]) -> int:
         min_number = -1
-        max_number = -1
-        if 1 not in nums:
+        max_number = 0
+        gap_base = -1
+        for eachnum in nums:
+            if eachnum > 0:
+                if min_number == -1:
+                    ## initialize all trackers
+                    max_number = eachnum
+                    min_number = eachnum
+                else:
+                    gap = abs(eachnum - max_number)
+                    gap_base_diff = abs(eachnum - gap_base)
+                    if gap > 1 and ((eachnum > max_number and gap_base == -1) or (eachnum < max_number and gap_base == -1) or (eachnum < max_number and gap_base_diff == 1) or (eachnum < max_number and gap_base_diff > 0 and eachnum < gap_base)): ## before and after
+                        if gap_base_diff == 1:
+                            gap_base = eachnum
+                        elif gap_base == -1:
+                            gap_base = min(max_number, eachnum)
+                        else:
+                            gap_base = min(max_number, eachnum)
+                    min_number = min(min_number, eachnum)
+                    max_number = max(max_number, eachnum)
+        if min_number != 0 and min_number > 1:
             return 1
-        for eachnumber in nums:
-            if eachnumber < 0:
-                continue
-            else:
-                min_number = eachnumber if min_number == -1 else min(min_number, eachnumber)
-                max_number = eachnumber if max_number == -1 else max(max_number, eachnumber)
-        return max_number + 1 if min_number <= 1 else min_number - 1
+        if gap_base == -1:
+            ## no gaps detected, check max, if max is 0, return 1
+            if max_number == 0:
+                return 1
+            # return max + 1
+            return max_number + 1
+        else:
+            return gap_base + 1
+
 
 
 if __name__ == '__main__':
     sol = Solution()
-    nums = [3, 4, -1, 1]
-    print(sol.firstMissingPositive(nums))
+    arrs = [
+        [1,2,0],
+        [3,4,-1,1],
+        [7,8,9,11,12],
+        [1, 2, 3, 50, 48, 5214, 32, 6324, 234, -32, -1231, -432134, -5123, -412243, -44, -324, 0],
+        [1, 2, 3, 6, 7, 4],
+        [40, 33, 50, -11, 0, 12, 50, 1, 33, -1, -3, 27, 29, -19],
+        [1,2,6,3,5,4]
+    ]
+    # for i in range(5):
+    #     new_arr = []
+    #     for j in range(random.randint(10, 30)):
+    #         new_arr.append(random.randint(-20, 50))
+    #     arrs.append(new_arr)
+    #     new_arr = []
+    for eacharr in arrs:
+        print('testing {}'.format(eacharr))
+        print(sol.firstMissingPositive(eacharr))
diff --git a/pythonProblems/dp/lcs.py b/pythonProblems/dp/lcs.py
@@ -0,0 +1,25 @@
+from pprint import pprint
+from typing import List
+
+def find_subsequence_len(str1: str, str2: str, max_length = 0) -> int:
+    for ind, elem in enumerate(str1):
+        if elem in str2:
+            max_length = max(max_length, 1 + find_subsequence_len(str1[ind + 1:], str2[str2.index(elem) + 1:]))
+    return max_length
+    
+
+def lcs(substr1: str, substr2: str) -> int:
+    while len(substr1) > 0 and len(substr2) > 0:
+        if substr1[-1] != substr2[-1]:
+            return max(lcs(substr1[:-1], substr2), lcs(substr1, substr2[:-1]))
+        return 1 + lcs(substr1[:-1], substr2[:-1])
+    return 0
+
+
+if __name__ == '__main__':
+    x = 'abcbdab'
+    y = 'bdcaba'
+    print(lcs(x, y))
+
+
+        
diff --git a/pythonProblems/dp/needleman.py b/pythonProblems/dp/needleman.py
@@ -0,0 +1,218 @@
+from random import randint
+
+# Constant values to be used for the scoring grid in the Needleman-Wunsch algorithm
+IDENTITY = 4
+TRANSITION = -2
+TRANSVERSION = -3
+GAP = -8
+SCORES = { # scoring matrix for the Needleman-Wunsch algorithm
+    "A" : {
+        "A" : IDENTITY,
+        "T" : TRANSVERSION,
+        "C" : TRANSVERSION,
+        "G" : TRANSITION
+    },
+    "T" : {
+        "A" : TRANSVERSION,
+        "T" : IDENTITY,
+        "C" : TRANSITION,
+        "G" : TRANSVERSION
+    },
+    "C" : {
+        "A" : TRANSVERSION,
+        "T" : TRANSITION,
+        "C" : IDENTITY,
+        "G" : TRANSVERSION
+    },
+    "G" : {
+        "A" : TRANSITION,
+        "T" : TRANSVERSION,
+        "C" : TRANSVERSION,
+        "G" : IDENTITY
+    }
+}
+
+# left, up, diagonal, used to fill direction matrix path for backpropagation
+DIRECTIONS = ("DIAG", "LEFT", "UP")
+
+def get_seq() -> tuple[str, str]:
+    """Prompt user for sequences to be used in the Needleman-Wunsch algorithm,
+    then read and format sequences.
+
+    Returns a tuple of cleaned DNA sequence strings.
+    """
+    seq = ["", ""]
+    path = input("Please enter a filepath containing two sequences:\n").strip()
+
+    try:
+        with open(path, "r", encoding='utf-8') as f:
+            lines = f.readlines()
+            idx_1, idx_2 = None, None
+            for i in range(len(lines)):
+                if lines[i].startswith(">"):
+                    if idx_1 == None:
+                        idx_1 = i
+                    elif idx_2 == None:
+                        idx_2 = i
+                        break
+            
+            # Format txt file input of FASTA seq into a continuous string of nucleotides
+            try:
+                seq[0] = "".join(lines[idx_1+1:idx_2]).upper().replace("\n", "")
+                seq[1] = "".join(lines[idx_2+1:]).upper().replace("\n", "")
+            except TypeError as e:
+                print(e, "\nFile must be in the form of\n>SEQUENCE 1 NAME\nAGTC ... GTA\n>SEQUENCE 2 NAME\nTGAT ... CCA")
+    except FileNotFoundError as e:
+        print("ERROR READING FILEPATH: ", e)
+
+    
+    return seq
+
+def make_matrix(seq_1:str, seq_2:str, make_dir:bool) -> list[list]:
+    """Create grid of values to be used for the Needleman-Wunsch algorithm
+    This matrix is in the form of:
+    [
+        [_, _, A, T, A, ..., G]
+        [_, 0, _, _, _, ..., _]
+        ...
+        [A, _, _, _, _, ..., _]
+    ]
+
+    if make_dir is True, the [1][1] position contains the string 'DONE' to halt
+    back propagation, and the first row/col axes have 'LEFT' and 'UP' directions,
+    respectively
+
+    otherwise, fill the valued matrix alongside the first row/col by multiples
+    of the GAP penalty
+    """
+
+    # initialize empty array
+    mat = [ [None]*(len(seq_2) + 2) for i in range(len(seq_1) + 2) ]
+
+    if make_dir: # fill direction matrix
+        # make seq 1 exist along the y axis (rows)
+        for i in range(len(seq_1)):
+            mat[i+2][0] = seq_1[i]
+            mat[i+2][1] = DIRECTIONS[0]
+
+        # make seq 2 exist along the x axis (cols)
+        for i in range(len(seq_2)):
+            mat[0][i+2] = seq_2[i]
+            mat[1][i+2] = DIRECTIONS[1]
+    else: # fill value matrix
+        # make seq 1 exist along the y axis (rows)
+        for i in range(len(seq_1)):
+            mat[i+2][0] = seq_1[i]
+            mat[i+2][1] = GAP*(i+1)
+
+        # make seq 2 exist along the x axis (cols)
+        for i in range(len(seq_2)):
+            mat[0][i+2] = seq_2[i]
+            mat[1][i+2] = GAP*(i+1)
+
+    mat[1][1] = "DONE" if make_dir else 0
+    
+    return mat
+
+def fill_matrix(mat:list[list], mat_dir:list[list], x:int, y:int, max_x:int, max_y:int) -> None:
+    """Uses the scores matrix to fill values and directions for the Needleman-Wunsch algorithm grid"""
+    # end if x or y positions are out of bounds or mat not initialized
+    if mat == None or x > max_x or y > max_y:
+        return
+    # end if None is at the current position, the left, or above
+    if mat[y][x] != None or mat[y-1][x] == None or mat[y][x-1] == None:
+        return
+
+    # find the score that corresponds to the index/column of current position
+    score = SCORES[mat[y][0]] [mat[0][x]]
+
+    # get list of the 3 possible new values
+    values = [
+        mat[y-1][x-1] + score, # value coming from diagonal,
+        mat[y][x-1] + GAP,  # value coming from left
+        mat[y-1][x] + GAP,  # value coming from up
+    ]
+
+    max_val = max(values)   # max val
+    max_idx = values.index(max_val) # index corresponds to the back-propagation movement, prioritizes first index (diagonal)
+    if max_idx > 0 and values[1] == values [2]:
+        max_idx = randint(1,2) # if theres a tie between left and up, randomly pick one
+
+    mat[y][x] = max_val # make position's value the best scoring outcome
+    mat_dir[y][x] = DIRECTIONS[max_idx] # make positions value the backprop direction
+
+    fill_matrix(mat, mat_dir, x+1, y, max_x, max_y)  # recusively fill values by moving right
+    fill_matrix(mat, mat_dir, x, y+1, max_x, max_y)  # recursively fill values by moving down
+
+def get_alignment(mat:list[list], mat_dir:list[list]) -> tuple[str, str, int]:
+    """Uses a filled direction matrix and uses the Needleman-Wunsch algorithm to get the
+    best scoring alignment via backpropogation. The maximum value is also obtained from
+    the valued matrix
+    
+    Function returns a tuple containing an optimal alignment for seq1, seq2, and the best
+    score associated with the alignment"""
+    x = len(mat[0]) -1
+    y = len(mat) -1
+    best_score = mat[y][x]
+
+    align_1 = ""  # vertical seq
+    align_2 = ""  # horizontal seq
+    while mat_dir[y][x] != "DONE":
+        direction = mat_dir[y][x]
+
+        if direction == "DIAG":
+            # backpropagation must move diagonally
+            align_1 = mat_dir[y][0] + align_1
+            align_2 = mat_dir[0][x] + align_2
+            x -= 1
+            y -= 1
+        elif direction == "LEFT":
+            # backpropagation must move left
+            align_1 = "-" + align_1
+            align_2 = mat_dir[0][x] + align_2
+            x -=1
+        else:
+            # backpropagation must move up
+            align_1 = mat_dir[y][0] + align_1
+            align_2 = "-" + align_2
+            y -=1
+
+    return (align_1, align_2, best_score)
+
+def print_align_results(align_1:str, align_2:str, score:int) -> None:
+    """Takes the aligned sequences strings and best score, then prints out the results"""
+    print(f"SEQUENCE 1:\t{align_1}")
+    print(f"SEQUENCE 2:\t{align_2}")
+    print("Alignment Score:", score)
+
+def print_mat(mat:list[list]) -> None:
+    """cleanly prints a provided matrix"""
+    for row in mat:
+        for i in row:
+            print("_" if i == None else i, end="\t")
+        print("\n")
+
+if __name__ == "__main__":
+    while True:
+        seq1, seq2 = get_seq()
+        if seq1 == "" or seq2 == "":
+            print("Inputs contain invalid sequences, please check your FASTA files.")
+        else:
+            mat = make_matrix(seq1, seq2, make_dir=False)
+            mat_dir = make_matrix(seq1, seq2, make_dir=True)
+
+            fill_matrix(mat, mat_dir, 2, 2, len(mat[0])-1, len(mat)-1)
+
+            align_1, align_2, score = get_alignment(mat, mat_dir)
+
+            print("############ DYNAMIC PROGRAMMING TABLE ############\n")
+            print_mat(mat)
+            print_mat(mat_dir)
+
+            print("############ ALIGNMENT RESULTS ############\n")
+            print_align_results(align_1, align_2, score)
+
+        if input("Would you like to align sequences again? (y/n)").strip().lower() == "y":
+            continue
+        else:
+            break
diff --git a/pythonProblems/dp/test.txt b/pythonProblems/dp/test.txt
@@ -0,0 +1,4 @@
+>Sequence 1
+taccagta
+>Sequence 2
+aggcat
diff --git a/pythonProblems/dp/test2.txt b/pythonProblems/dp/test2.txt
@@ -0,0 +1,4 @@
+>Sequence 1
+atcg
+>Sequence 2
+aattccgg