Merge pull request #327 from Song2017/master

wangzheng0822 · web-flow · commit 8d005f42e109 · 2019-06-07T07:54:32.000+08:00
LRUCache&amp;min_heap
diff --git a/python/06_linkedlist/LRUCache.py b/python/06_linkedlist/LRUCache.py
@@ -0,0 +1,107 @@
+# Definition for singly-linked list.
+class DbListNode(object):
+    def __init__(self, x, y):
+        self.key = x
+        self.val = y
+        self.next = None
+        self.prev = None
+
+
+class LRUCache:
+    '''
+    leet code: 146
+        运用你所掌握的数据结构，设计和实现一个  LRU (最近最少使用) 缓存机制。
+        它应该支持以下操作： 获取数据 get 和 写入数据 put 。
+        获取数据 get(key) - 如果密钥 (key) 存在于缓存中，则获取密钥的值（总是正数），否则返回 -1。
+        写入数据 put(key, value) - 如果密钥不存在，则写入其数据值。
+            当缓存容量达到上限时，它应该在写入新数据之前删除最近最少使用的数据值，从而为新的数据值留出空间
+
+    哈希表+双向链表
+    哈希表: 查询 O(1)
+    双向链表: 有序, 增删操作 O(1)
+
+    Author: Ben
+    '''
+
+    def __init__(self, capacity: int):
+        self.cap = capacity
+        self.hkeys = {}
+        # self.top和self.tail作为哨兵节点, 避免越界
+        self.top = DbListNode(None, -1)
+        self.tail = DbListNode(None, -1)
+        self.top.next = self.tail
+        self.tail.prev = self.top
+
+    def get(self, key: int) -> int:
+
+        if key in self.hkeys.keys():
+            # 更新结点顺序
+            cur = self.hkeys[key]
+            # 跳出原位置
+            cur.next.prev = cur.prev
+            cur.prev.next = cur.next
+            # 最近用过的置于链表首部
+            top_node = self.top.next
+            self.top.next = cur
+            cur.prev = self.top
+            cur.next = top_node
+            top_node.prev = cur
+
+            return self.hkeys[key].val
+        return -1
+
+    def put(self, key: int, value: int) -> None:
+        if key in self.hkeys.keys():
+            cur = self.hkeys[key]
+            cur.val = value
+            # 跳出原位置
+            cur.prev.next = cur.next
+            cur.next.prev = cur.prev
+
+            # 最近用过的置于链表首部
+            top_node = self.top.next
+            self.top.next = cur
+            cur.prev = self.top
+            cur.next = top_node
+            top_node.prev = cur
+        else:
+            # 增加新结点至首部
+            cur = DbListNode(key, value)
+            self.hkeys[key] = cur
+            # 最近用过的置于链表首部
+            top_node = self.top.next
+            self.top.next = cur
+            cur.prev = self.top
+            cur.next = top_node
+            top_node.prev = cur
+            if len(self.hkeys.keys()) > self.cap:
+                self.hkeys.pop(self.tail.prev.key)
+                # 去掉原尾结点
+                self.tail.prev.prev.next = self.tail
+                self.tail.prev = self.tail.prev.prev
+
+    def __repr__(self):
+        vals = []
+        p = self.top.next
+        while p.next:
+            vals.append(str(p.val))
+            p = p.next
+        return '->'.join(vals)
+
+
+if __name__ == '__main__':
+    cache = LRUCache(2)
+    cache.put(1, 1)
+    cache.put(2, 2)
+    print(cache)
+    cache.get(1)  # 返回  1
+    cache.put(3, 3)  # 该操作会使得密钥 2 作废
+    print(cache)
+    cache.get(2)  # 返回 -1 (未找到)
+    cache.put(4, 4)  # 该操作会使得密钥 1 作废
+    print(cache)
+    cache.get(1)  # 返回 -1 (未找到)
+    cache.get(3)  # 返回  3
+    print(cache)
+    cache.get(4)  # 返回  4
+    print(cache)
diff --git a/python/28_heap/min_heap.py b/python/28_heap/min_heap.py
@@ -0,0 +1,108 @@
+class Heap(object):
+    '''
+    索引从0开始的小顶堆
+    参考: https://github.com/python/cpython/blob/master/Lib/heapq.py
+
+    author: Ben
+    '''
+
+    def __init__(self, nums):
+        self._heap = nums
+
+    def _siftup(self, pos):
+        '''
+        从上向下的堆化
+        将pos节点的子节点中的最值提升到pos位置
+        '''
+        start = pos
+        startval = self._heap[pos]
+        n = len(self._heap)
+        # 完全二叉树特性
+        child = pos * 2 + 1
+        # 比较叶子节点
+        while child < n:
+            right = child + 1
+            # 平衡二叉树的特性, 大的都在右边
+            if right < n and not self._heap[right] > self._heap[child]:
+                child = right
+            self._heap[pos] = self._heap[child]
+            pos = child
+            child = pos * 2 + 1
+        self._heap[pos] = startval
+
+        # 此时只有pos是不确定的
+        self._siftdown(start, pos)
+
+    def _siftdown(self, start, pos):
+        '''
+        最小堆: 大于start的节点, 除pos外已经是最小堆
+        以pos为叶子节点, start为根节点之间的元素进行排序. 将pos叶子节点交换到正确的排序位置
+        操作: 从叶子节点开始, 当父节点的值大于子节点时, 父节点的值降低到子节点
+        '''
+        startval = self._heap[pos]
+        while pos > start:
+            parent = (pos - 1) >> 1
+            parentval = self._heap[parent]
+            if parentval > startval:
+                self._heap[pos] = parentval
+                pos = parent
+                continue
+            break
+        self._heap[pos] = startval
+
+    def heapify(self):
+        '''
+        堆化: 从后向前(从下向上)的方式堆化, _siftup中pos节点的子树已经是有序的,
+        这样要排序的节点在慢慢减少
+        1. 因为n/2+1到n的节点是叶子节点(完全二叉树的特性), 它们没有子节点,
+        所以, 只需要堆化n/2到0的节点, 以对应的父节点为根节点, 将最值向上筛选,
+        然后交换对应的根节点和查找到的最值
+        2. 因为开始时待排序树的根节点还没有排序, 为了保证根节点的有序,
+        需要将子树中根节点交换到正确顺序
+        '''
+        n = len(self._heap)
+        for i in reversed(range(n // 2)):
+            self._siftup(i)
+
+    def heappop(self):
+        '''
+        弹出堆首的最值 O(logn)
+        '''
+        tail = self._heap.pop()
+        # 为避免破环完全二叉树特性, 将堆尾元素填充到堆首
+        # 此时, 只有堆首是未排序的, 只需要一次从上向下的堆化
+        if self._heap:
+            peak = self._heap[0]
+            self._heap[0] = tail
+            self._siftup(0)
+            return peak
+        return tail
+
+    def heappush(self, val):
+        '''
+        添加元素到堆尾 O(logn)
+        '''
+        n = len(self._heap)
+        self._heap.append(val)
+        # 此时只有堆尾的节点是未排序的, 将添加的节点迭代到正确的位置
+        self._siftdown(0, n)
+
+    def __repr__(self):
+        vals = [str(i) for i in self._heap]
+        return '>'.join(vals)
+
+
+if __name__ == '__main__':
+    h = Heap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
+    h.heapify()
+    print(h)
+    print(h.heappop())
+    print(h)
+    h.heappush(3.5)
+    print(h)
+    h.heappush(0.1)
+    print(h)
+    h.heappush(0.5)
+    print(h)
+    print(h.heappop())
+    print(h)