113113 * have room for two bit at least.
114114 */
115115#define OBJ_ALLOCATED_TAG 1
116- #define OBJ_TAG_BITS 1
116+
117+ #ifdef CONFIG_ZPOOL
118+ /*
119+ * The second least-significant bit in the object's header identifies if the
120+ * value stored at the header is a deferred handle from the last reclaim
121+ * attempt.
122+ *
123+ * As noted above, this is valid because we have room for two bits.
124+ */
125+ #define OBJ_DEFERRED_HANDLE_TAG 2
126+ #define OBJ_TAG_BITS 2
127+ #define OBJ_TAG_MASK (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG)
128+ #else
129+ #define OBJ_TAG_BITS 1
130+ #define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
131+ #endif /* CONFIG_ZPOOL */
132+
117133#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
118134#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
119135
@@ -222,6 +238,12 @@ struct link_free {
222238 * Handle of allocated object.
223239 */
224240 unsigned long handle ;
241+ #ifdef CONFIG_ZPOOL
242+ /*
243+ * Deferred handle of a reclaimed object.
244+ */
245+ unsigned long deferred_handle ;
246+ #endif
225247 };
226248};
227249
@@ -272,8 +294,6 @@ struct zspage {
272294 /* links the zspage to the lru list in the pool */
273295 struct list_head lru ;
274296 bool under_reclaim ;
275- /* list of unfreed handles whose objects have been reclaimed */
276- unsigned long * deferred_handles ;
277297#endif
278298
279299 struct zs_pool * pool ;
@@ -897,7 +917,8 @@ static unsigned long handle_to_obj(unsigned long handle)
897917 return * (unsigned long * )handle ;
898918}
899919
900- static bool obj_allocated (struct page * page , void * obj , unsigned long * phandle )
920+ static bool obj_tagged (struct page * page , void * obj , unsigned long * phandle ,
921+ int tag )
901922{
902923 unsigned long handle ;
903924 struct zspage * zspage = get_zspage (page );
@@ -908,13 +929,27 @@ static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
908929 } else
909930 handle = * (unsigned long * )obj ;
910931
911- if (!(handle & OBJ_ALLOCATED_TAG ))
932+ if (!(handle & tag ))
912933 return false;
913934
914- * phandle = handle & ~OBJ_ALLOCATED_TAG ;
935+ /* Clear all tags before returning the handle */
936+ * phandle = handle & ~OBJ_TAG_MASK ;
915937 return true;
916938}
917939
940+ static inline bool obj_allocated (struct page * page , void * obj , unsigned long * phandle )
941+ {
942+ return obj_tagged (page , obj , phandle , OBJ_ALLOCATED_TAG );
943+ }
944+
945+ #ifdef CONFIG_ZPOOL
946+ static bool obj_stores_deferred_handle (struct page * page , void * obj ,
947+ unsigned long * phandle )
948+ {
949+ return obj_tagged (page , obj , phandle , OBJ_DEFERRED_HANDLE_TAG );
950+ }
951+ #endif
952+
918953static void reset_page (struct page * page )
919954{
920955 __ClearPageMovable (page );
@@ -946,22 +981,36 @@ static int trylock_zspage(struct zspage *zspage)
946981}
947982
948983#ifdef CONFIG_ZPOOL
984+ static unsigned long find_deferred_handle_obj (struct size_class * class ,
985+ struct page * page , int * obj_idx );
986+
949987/*
950988 * Free all the deferred handles whose objects are freed in zs_free.
951989 */
952- static void free_handles (struct zs_pool * pool , struct zspage * zspage )
990+ static void free_handles (struct zs_pool * pool , struct size_class * class ,
991+ struct zspage * zspage )
953992{
954- unsigned long handle = (unsigned long )zspage -> deferred_handles ;
993+ int obj_idx = 0 ;
994+ struct page * page = get_first_page (zspage );
995+ unsigned long handle ;
955996
956- while (handle ) {
957- unsigned long nxt_handle = handle_to_obj (handle );
997+ while (1 ) {
998+ handle = find_deferred_handle_obj (class , page , & obj_idx );
999+ if (!handle ) {
1000+ page = get_next_page (page );
1001+ if (!page )
1002+ break ;
1003+ obj_idx = 0 ;
1004+ continue ;
1005+ }
9581006
9591007 cache_free_handle (pool , handle );
960- handle = nxt_handle ;
1008+ obj_idx ++ ;
9611009 }
9621010}
9631011#else
964- static inline void free_handles (struct zs_pool * pool , struct zspage * zspage ) {}
1012+ static inline void free_handles (struct zs_pool * pool , struct size_class * class ,
1013+ struct zspage * zspage ) {}
9651014#endif
9661015
9671016static void __free_zspage (struct zs_pool * pool , struct size_class * class ,
@@ -979,7 +1028,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
9791028 VM_BUG_ON (fg != ZS_EMPTY );
9801029
9811030 /* Free all deferred handles from zs_free */
982- free_handles (pool , zspage );
1031+ free_handles (pool , class , zspage );
9831032
9841033 next = page = get_first_page (zspage );
9851034 do {
@@ -1067,7 +1116,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
10671116#ifdef CONFIG_ZPOOL
10681117 INIT_LIST_HEAD (& zspage -> lru );
10691118 zspage -> under_reclaim = false;
1070- zspage -> deferred_handles = NULL ;
10711119#endif
10721120
10731121 set_freeobj (zspage , 0 );
@@ -1568,7 +1616,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
15681616}
15691617EXPORT_SYMBOL_GPL (zs_malloc );
15701618
1571- static void obj_free (int class_size , unsigned long obj )
1619+ static void obj_free (int class_size , unsigned long obj , unsigned long * handle )
15721620{
15731621 struct link_free * link ;
15741622 struct zspage * zspage ;
@@ -1582,15 +1630,29 @@ static void obj_free(int class_size, unsigned long obj)
15821630 zspage = get_zspage (f_page );
15831631
15841632 vaddr = kmap_atomic (f_page );
1585-
1586- /* Insert this object in containing zspage's freelist */
15871633 link = (struct link_free * )(vaddr + f_offset );
1588- if (likely (!ZsHugePage (zspage )))
1589- link -> next = get_freeobj (zspage ) << OBJ_TAG_BITS ;
1590- else
1591- f_page -> index = 0 ;
1634+
1635+ if (handle ) {
1636+ #ifdef CONFIG_ZPOOL
1637+ /* Stores the (deferred) handle in the object's header */
1638+ * handle |= OBJ_DEFERRED_HANDLE_TAG ;
1639+ * handle &= ~OBJ_ALLOCATED_TAG ;
1640+
1641+ if (likely (!ZsHugePage (zspage )))
1642+ link -> deferred_handle = * handle ;
1643+ else
1644+ f_page -> index = * handle ;
1645+ #endif
1646+ } else {
1647+ /* Insert this object in containing zspage's freelist */
1648+ if (likely (!ZsHugePage (zspage )))
1649+ link -> next = get_freeobj (zspage ) << OBJ_TAG_BITS ;
1650+ else
1651+ f_page -> index = 0 ;
1652+ set_freeobj (zspage , f_objidx );
1653+ }
1654+
15921655 kunmap_atomic (vaddr );
1593- set_freeobj (zspage , f_objidx );
15941656 mod_zspage_inuse (zspage , -1 );
15951657}
15961658
@@ -1615,7 +1677,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
16151677 zspage = get_zspage (f_page );
16161678 class = zspage_class (pool , zspage );
16171679
1618- obj_free (class -> size , obj );
16191680 class_stat_dec (class , OBJ_USED , 1 );
16201681
16211682#ifdef CONFIG_ZPOOL
@@ -1624,15 +1685,15 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
16241685 * Reclaim needs the handles during writeback. It'll free
16251686 * them along with the zspage when it's done with them.
16261687 *
1627- * Record current deferred handle at the memory location
1628- * whose address is given by handle.
1688+ * Record current deferred handle in the object's header.
16291689 */
1630- record_obj (handle , (unsigned long )zspage -> deferred_handles );
1631- zspage -> deferred_handles = (unsigned long * )handle ;
1690+ obj_free (class -> size , obj , & handle );
16321691 spin_unlock (& pool -> lock );
16331692 return ;
16341693 }
16351694#endif
1695+ obj_free (class -> size , obj , NULL );
1696+
16361697 fullness = fix_fullness_group (class , zspage );
16371698 if (fullness == ZS_EMPTY )
16381699 free_zspage (pool , class , zspage );
@@ -1713,11 +1774,11 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
17131774}
17141775
17151776/*
1716- * Find alloced object in zspage from index object and
1777+ * Find object with a certain tag in zspage from index object and
17171778 * return handle.
17181779 */
1719- static unsigned long find_alloced_obj (struct size_class * class ,
1720- struct page * page , int * obj_idx )
1780+ static unsigned long find_tagged_obj (struct size_class * class ,
1781+ struct page * page , int * obj_idx , int tag )
17211782{
17221783 unsigned int offset ;
17231784 int index = * obj_idx ;
@@ -1728,7 +1789,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
17281789 offset += class -> size * index ;
17291790
17301791 while (offset < PAGE_SIZE ) {
1731- if (obj_allocated (page , addr + offset , & handle ))
1792+ if (obj_tagged (page , addr + offset , & handle , tag ))
17321793 break ;
17331794
17341795 offset += class -> size ;
@@ -1742,6 +1803,28 @@ static unsigned long find_alloced_obj(struct size_class *class,
17421803 return handle ;
17431804}
17441805
1806+ /*
1807+ * Find alloced object in zspage from index object and
1808+ * return handle.
1809+ */
1810+ static unsigned long find_alloced_obj (struct size_class * class ,
1811+ struct page * page , int * obj_idx )
1812+ {
1813+ return find_tagged_obj (class , page , obj_idx , OBJ_ALLOCATED_TAG );
1814+ }
1815+
1816+ #ifdef CONFIG_ZPOOL
1817+ /*
1818+ * Find object storing a deferred handle in header in zspage from index object
1819+ * and return handle.
1820+ */
1821+ static unsigned long find_deferred_handle_obj (struct size_class * class ,
1822+ struct page * page , int * obj_idx )
1823+ {
1824+ return find_tagged_obj (class , page , obj_idx , OBJ_DEFERRED_HANDLE_TAG );
1825+ }
1826+ #endif
1827+
17451828struct zs_compact_control {
17461829 /* Source spage for migration which could be a subpage of zspage */
17471830 struct page * s_page ;
@@ -1784,7 +1867,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
17841867 zs_object_copy (class , free_obj , used_obj );
17851868 obj_idx ++ ;
17861869 record_obj (handle , free_obj );
1787- obj_free (class -> size , used_obj );
1870+ obj_free (class -> size , used_obj , NULL );
17881871 }
17891872
17901873 /* Remember last position in this iteration */
@@ -2478,6 +2561,90 @@ void zs_destroy_pool(struct zs_pool *pool)
24782561EXPORT_SYMBOL_GPL (zs_destroy_pool );
24792562
24802563#ifdef CONFIG_ZPOOL
2564+ static void restore_freelist (struct zs_pool * pool , struct size_class * class ,
2565+ struct zspage * zspage )
2566+ {
2567+ unsigned int obj_idx = 0 ;
2568+ unsigned long handle , off = 0 ; /* off is within-page offset */
2569+ struct page * page = get_first_page (zspage );
2570+ struct link_free * prev_free = NULL ;
2571+ void * prev_page_vaddr = NULL ;
2572+
2573+ /* in case no free object found */
2574+ set_freeobj (zspage , (unsigned int )(-1UL ));
2575+
2576+ while (page ) {
2577+ void * vaddr = kmap_atomic (page );
2578+ struct page * next_page ;
2579+
2580+ while (off < PAGE_SIZE ) {
2581+ void * obj_addr = vaddr + off ;
2582+
2583+ /* skip allocated object */
2584+ if (obj_allocated (page , obj_addr , & handle )) {
2585+ obj_idx ++ ;
2586+ off += class -> size ;
2587+ continue ;
2588+ }
2589+
2590+ /* free deferred handle from reclaim attempt */
2591+ if (obj_stores_deferred_handle (page , obj_addr , & handle ))
2592+ cache_free_handle (pool , handle );
2593+
2594+ if (prev_free )
2595+ prev_free -> next = obj_idx << OBJ_TAG_BITS ;
2596+ else /* first free object found */
2597+ set_freeobj (zspage , obj_idx );
2598+
2599+ prev_free = (struct link_free * )vaddr + off / sizeof (* prev_free );
2600+ /* if last free object in a previous page, need to unmap */
2601+ if (prev_page_vaddr ) {
2602+ kunmap_atomic (prev_page_vaddr );
2603+ prev_page_vaddr = NULL ;
2604+ }
2605+
2606+ obj_idx ++ ;
2607+ off += class -> size ;
2608+ }
2609+
2610+ /*
2611+ * Handle the last (full or partial) object on this page.
2612+ */
2613+ next_page = get_next_page (page );
2614+ if (next_page ) {
2615+ if (!prev_free || prev_page_vaddr ) {
2616+ /*
2617+ * There is no free object in this page, so we can safely
2618+ * unmap it.
2619+ */
2620+ kunmap_atomic (vaddr );
2621+ } else {
2622+ /* update prev_page_vaddr since prev_free is on this page */
2623+ prev_page_vaddr = vaddr ;
2624+ }
2625+ } else { /* this is the last page */
2626+ if (prev_free ) {
2627+ /*
2628+ * Reset OBJ_TAG_BITS bit to last link to tell
2629+ * whether it's allocated object or not.
2630+ */
2631+ prev_free -> next = -1UL << OBJ_TAG_BITS ;
2632+ }
2633+
2634+ /* unmap previous page (if not done yet) */
2635+ if (prev_page_vaddr ) {
2636+ kunmap_atomic (prev_page_vaddr );
2637+ prev_page_vaddr = NULL ;
2638+ }
2639+
2640+ kunmap_atomic (vaddr );
2641+ }
2642+
2643+ page = next_page ;
2644+ off %= PAGE_SIZE ;
2645+ }
2646+ }
2647+
24812648static int zs_reclaim_page (struct zs_pool * pool , unsigned int retries )
24822649{
24832650 int i , obj_idx , ret = 0 ;
@@ -2561,6 +2728,12 @@ static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
25612728 return 0 ;
25622729 }
25632730
2731+ /*
2732+ * Eviction fails on one of the handles, so we need to restore zspage.
2733+ * We need to rebuild its freelist (and free stored deferred handles),
2734+ * put it back to the correct size class, and add it to the LRU list.
2735+ */
2736+ restore_freelist (pool , class , zspage );
25642737 putback_zspage (class , zspage );
25652738 list_add (& zspage -> lru , & pool -> lru );
25662739 unlock_zspage (zspage );
0 commit comments