88import time
99import six
1010import tempfile
11+ import struct
1112
1213from pwnlib .context import context
1314from pwnlib .elf import ELF
1415from pwnlib .filesystem .path import Path
1516from pwnlib .log import getLogger
1617from pwnlib .tubes .process import process
17- from pwnlib .util .fiddling import enhex
18+ from pwnlib .util .fiddling import enhex , unhex
1819from pwnlib .util .hashes import sha1filehex , sha256filehex , md5filehex
1920from pwnlib .util .misc import read
2021from pwnlib .util .misc import which
2324
2425log = getLogger (__name__ )
2526
26- HASHES = {
27- 'build_id' : lambda path : enhex (ELF (path , checksec = False ).buildid or b'' ),
27+
28+ def _turbofast_extract_build_id (path ):
29+ """
30+ Elf_External_Note:
31+
32+ 0x00 +--------+
33+ | namesz | <- Size of entry's owner string
34+ 0x04 +--------+
35+ | descsz | <- Size of the note descriptor
36+ 0x08 +--------+
37+ | type | <- Interpretation of the descriptor
38+ 0x0c +--------+
39+ | name | <- Start of the name+desc data
40+ ... +--------
41+ | desc |
42+ ... +--------+
43+ """
44+ data = read (path , 0x1000 )
45+ # search NT_GNU_BUILD_ID and b"GNU\x00" (type+name)
46+ idx = data .find (unhex ("03000000474e5500" ))
47+ if idx == - 1 :
48+ return enhex (ELF (path , checksec = False ).buildid or b'' )
49+ descsz , = struct .unpack ("<L" , data [idx - 4 : idx ])
50+ return enhex (data [idx + 8 : idx + 8 + descsz ])
51+
52+
53+ TYPES = {
54+ 'libs_id' : None ,
55+ 'build_id' : _turbofast_extract_build_id ,
2856 'sha1' : sha1filehex ,
2957 'sha256' : sha256filehex ,
3058 'md5' : md5filehex ,
3159}
60+
61+ # mapping for search result (same as libc.rip)
62+ MAP_TYPES = {
63+ 'libs_id' : 'id' ,
64+ 'build_id' : 'buildid'
65+ }
66+
3267DEBUGINFOD_SERVERS = [
3368 'https://debuginfod.elfutils.org/' ,
3469]
4277
4378# https://gitlab.com/libcdb/libcdb wasn't updated after 2019,
4479# but still is a massive database of older libc binaries.
45- def provider_libcdb (hex_encoded_id , hash_type ):
80+ def provider_libcdb (hex_encoded_id , search_type ):
81+ if search_type == 'libs_id' :
82+ return None
83+
4684 # Deferred import because it's slow
4785 import requests
4886 from six .moves import urllib
4987
5088 # Build the URL using the requested hash type
51- url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type
89+ url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % search_type
5290 url = urllib .parse .urljoin (url_base , hex_encoded_id )
5391
5492 data = b""
@@ -58,15 +96,15 @@ def provider_libcdb(hex_encoded_id, hash_type):
5896 data = wget (url , timeout = 20 )
5997
6098 if not data :
61- log .warn_once ("Could not fetch libc for %s %s from libcdb" , hash_type , hex_encoded_id )
99+ log .warn_once ("Could not fetch libc for %s %s from libcdb" , search_type , hex_encoded_id )
62100 break
63101
64102 # GitLab serves up symlinks with
65103 if data .startswith (b'..' ):
66104 url = os .path .dirname (url ) + '/'
67105 url = urllib .parse .urljoin (url .encode ('utf-8' ), data )
68106 except requests .RequestException as e :
69- log .warn_once ("Failed to fetch libc for %s %s from libcdb: %s" , hash_type , hex_encoded_id , e )
107+ log .warn_once ("Failed to fetch libc for %s %s from libcdb: %s" , search_type , hex_encoded_id , e )
70108 return data
71109
72110def query_libc_rip (params ):
@@ -86,16 +124,17 @@ def query_libc_rip(params):
86124 return None
87125
88126# https://libc.rip/
89- def provider_libc_rip (hex_encoded_id , hash_type ):
127+ def provider_libc_rip (search_target , search_type ):
90128 # Build the request for the hash type
91129 # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml
92- if hash_type == 'build_id' :
93- hash_type = 'buildid'
94- params = {hash_type : hex_encoded_id }
130+ if search_type in MAP_TYPES .keys ():
131+ search_type = MAP_TYPES [search_type ]
132+
133+ params = {search_type : search_target }
95134
96135 libc_match = query_libc_rip (params )
97136 if not libc_match :
98- log .warn_once ("Could not find libc info for %s %s on libc.rip" , hash_type , hex_encoded_id )
137+ log .warn_once ("Could not find libc info for %s %s on libc.rip" , search_type , search_target )
99138 return None
100139
101140 if len (libc_match ) > 1 :
@@ -107,13 +146,13 @@ def provider_libc_rip(hex_encoded_id, hash_type):
107146 data = wget (url , timeout = 20 )
108147
109148 if not data :
110- log .warn_once ("Could not fetch libc binary for %s %s from libc.rip" , hash_type , hex_encoded_id )
149+ log .warn_once ("Could not fetch libc binary for %s %s from libc.rip" , search_type , search_target )
111150 return None
112151 return data
113152
114153# Check if the local system libc matches the requested hash.
115- def provider_local_system (hex_encoded_id , hash_type ):
116- if hash_type == 'id ' :
154+ def provider_local_system (hex_encoded_id , search_type ):
155+ if search_type == 'libs_id ' :
117156 return None
118157 shell_path = os .environ .get ('SHELL' , None ) or '/bin/sh'
119158 if not os .path .exists (shell_path ):
@@ -123,22 +162,29 @@ def provider_local_system(hex_encoded_id, hash_type):
123162 if not local_libc :
124163 log .debug ('Cannot lookup libc from shell %r. Skipping local system libc matching.' , shell_path )
125164 return None
126- if HASHES [ hash_type ](local_libc .path ) == hex_encoded_id :
165+ if TYPES [ search_type ](local_libc .path ) == hex_encoded_id :
127166 return local_libc .data
128167 return None
129168
130169# Offline search https://github.com/niklasb/libc-database for hash type
131- def provider_local_database (hex_encoded_id , hash_type ):
170+ def provider_local_database (search_target , search_type ):
132171 if not context .local_libcdb :
133172 return None
134173
135174 localdb = Path (context .local_libcdb )
136175 if not localdb .is_dir ():
137176 return None
138177
139- log .debug ("Searching local libc database, %s: %s" , hash_type , hex_encoded_id )
178+ # Handle the specific search type 'libs_id'
179+ if search_type == 'libs_id' :
180+ libc_list = list (localdb .rglob ("%s.so" % search_target ))
181+ if len (libc_list ) == 0 :
182+ return None
183+ return read (libc_list [0 ])
184+
185+ log .debug ("Searching local libc database, %s: %s" , search_type , search_target )
140186 for libc_path in localdb .rglob ("*.so" ):
141- if hex_encoded_id == HASHES [ hash_type ](libc_path ):
187+ if search_target == TYPES [ search_type ](libc_path ):
142188 return read (libc_path )
143189
144190 return None
@@ -185,11 +231,28 @@ def query_local_database(params):
185231 "online" : [provider_libcdb , provider_libc_rip ]
186232}
187233
188- def search_by_hash (hex_encoded_id , hash_type = 'build_id' , unstrip = True , offline_only = False ):
189- assert hash_type in HASHES , hash_type
234+ def search_by_hash (search_target , search_type = 'build_id' , unstrip = True , offline_only = False ):
235+ """search_by_hash(str, str, bool, bool) -> str
236+ Arguments:
237+ search_target(str):
238+ Use for searching the libc. This could be a hex encoded ID (`hex_encoded_id`) or a library
239+ name (`libs_id`). Depending on `search_type`, this can represent different types of encoded
240+ values or names.
241+ search_type(str):
242+ The type of the search to be performed, it should be one of the keys in the `TYPES` dictionary.
243+ unstrip(bool):
244+ Try to fetch debug info for the libc and apply it to the downloaded file.
245+ offline_only(bool):
246+ If True, restricts the search to offline providers only (local database). If False, it will also
247+ search online providers. Default is False.
248+
249+ Returns:
250+ The path to the cached directory containing the downloaded libraries.
251+ """
252+ assert search_type in TYPES , search_type
190253
191254 # Ensure that the libcdb cache directory exists
192- cache , cache_valid = _check_elf_cache ('libcdb' , hex_encoded_id , hash_type )
255+ cache , cache_valid = _check_elf_cache ('libcdb' , search_target , search_type )
193256 if cache_valid :
194257 return cache
195258
@@ -203,12 +266,12 @@ def search_by_hash(hex_encoded_id, hash_type='build_id', unstrip=True, offline_o
203266
204267 # Run through all available libc database providers to see if we have a match.
205268 for provider in providers :
206- data = provider (hex_encoded_id , hash_type )
269+ data = provider (search_target , search_type )
207270 if data and data .startswith (b'\x7F ELF' ):
208271 break
209272
210273 if not data :
211- log .warn_once ("Could not find libc for %s %s anywhere" , hash_type , hex_encoded_id )
274+ log .warn_once ("Could not find libc for %s %s anywhere" , search_type , search_target )
212275
213276 # Save whatever we got to the cache
214277 write (cache , data or b'' )
@@ -257,7 +320,7 @@ def _search_debuginfo_by_hash(base_url, hex_encoded_id):
257320
258321 return cache
259322
260- def _check_elf_cache (cache_type , hex_encoded_id , hash_type ):
323+ def _check_elf_cache (cache_type , search_target , search_type ):
261324 """
262325 Check if there already is an ELF file for this hash in the cache.
263326
@@ -270,14 +333,14 @@ def _check_elf_cache(cache_type, hex_encoded_id, hash_type):
270333 True
271334 """
272335 # Ensure that the cache directory exists
273- cache_dir = os .path .join (context .cache_dir , cache_type , hash_type )
336+ cache_dir = os .path .join (context .cache_dir , cache_type , search_type )
274337
275338 if not os .path .isdir (cache_dir ):
276339 os .makedirs (cache_dir )
277340
278341 # If we already downloaded the file, and it looks even passingly like
279342 # a valid ELF file, return it.
280- cache = os .path .join (cache_dir , hex_encoded_id )
343+ cache = os .path .join (cache_dir , search_target )
281344
282345 if not os .path .exists (cache ):
283346 return cache , False
@@ -289,7 +352,7 @@ def _check_elf_cache(cache_type, hex_encoded_id, hash_type):
289352 # Retry failed lookups after some time
290353 if time .time () > os .path .getmtime (cache ) + NEGATIVE_CACHE_EXPIRY :
291354 return cache , False
292- log .info_once ("Skipping invalid cached ELF %s" , hex_encoded_id )
355+ log .info_once ("Skipping invalid cached ELF %s" , search_target )
293356 return None , False
294357
295358 log .info_once ("Using cached data from %r" , cache )
@@ -583,7 +646,7 @@ def _handle_multiple_matching_libcs(matching_libcs):
583646 selected_index = options ("Select the libc version to use:" , [libc ['id' ] for libc in matching_libcs ])
584647 return matching_libcs [selected_index ]
585648
586- def search_by_symbol_offsets (symbols , select_index = None , unstrip = True , return_as_list = False , offline_only = False ):
649+ def search_by_symbol_offsets (symbols , select_index = None , unstrip = True , return_as_list = False , offline_only = False , search_type = 'build_id' ):
587650 """
588651 Lookup possible matching libc versions based on leaked function addresses.
589652
@@ -608,6 +671,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
608671 offline_only(bool):
609672 When pass `offline_only=True`, restricts search mode to offline sources only,
610673 disable online lookup. Defaults to `False`, and enable both offline and online providers.
674+ search_type(str):
675+ An option to select searched hash.
611676
612677 Returns:
613678 Path to the downloaded library on disk, or :const:`None`.
@@ -626,6 +691,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
626691 >>> for buildid in matched_libcs: # doctest +SKIP
627692 ... libc = ELF(search_by_build_id(buildid)) # doctest +SKIP
628693 """
694+ assert search_type in TYPES , search_type
695+
629696 for symbol , address in symbols .items ():
630697 if isinstance (address , int ):
631698 symbols [symbol ] = hex (address )
@@ -661,21 +728,49 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
661728 if return_as_list :
662729 return [libc ['buildid' ] for libc in matching_list ]
663730
731+ mapped_type = MAP_TYPES .get (search_type , search_type )
732+
664733 # If there's only one match, return it directly
665734 if len (matching_list ) == 1 :
666- return search_by_build_id (matching_list [0 ]['buildid' ] , unstrip = unstrip , offline_only = offline_only )
735+ return search_by_hash (matching_list [0 ][mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
667736
668737 # If a specific index is provided, validate it and return the selected libc
669738 if select_index is not None :
670739 if select_index > 0 and select_index <= len (matching_list ):
671- return search_by_build_id (matching_list [select_index - 1 ]['buildid' ] , unstrip = unstrip , offline_only = offline_only )
740+ return search_by_hash (matching_list [select_index - 1 ][mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
672741 else :
673742 log .error ('Invalid selected libc index. %d is not in the range of 1-%d.' , select_index , len (matching_list ))
674743 return None
675744
676745 # Handle multiple matches interactively if no index is specified
677746 selected_libc = _handle_multiple_matching_libcs (matching_list )
678- return search_by_build_id (selected_libc ['buildid' ], unstrip = unstrip , offline_only = offline_only )
747+ return search_by_hash (selected_libc [mapped_type ], search_type = search_type , unstrip = unstrip , offline_only = offline_only )
748+
749+ def search_by_libs_id (libs_id , unstrip = True , offline_only = False ):
750+ """
751+ Given a Libs ID, attempt to download a matching libc from libcdb.
752+
753+ Arguments:
754+ libs_id(str):
755+ Libs ID (e.g. 'libc6_...') of the library
756+ unstrip(bool):
757+ Try to fetch debug info for the libc and apply it to the downloaded file.
758+ offline_only(bool):
759+ When pass `offline_only=True`, restricts search mode to offline sources only,
760+ disable online lookup. Defaults to `False`, and enable both offline and online providers.
761+
762+ Returns:
763+ Path to the downloaded library on disk, or :const:`None`.
764+
765+ Examples:
766+
767+ >>> None == search_by_libs_id('XX')
768+ True
769+ >>> filename = search_by_libs_id('libc6_2.31-3_amd64')
770+ >>> hex(ELF(filename).symbols.read)
771+ '0xeef40'
772+ """
773+ return search_by_hash (libs_id , 'libs_id' , unstrip , offline_only )
679774
680775def search_by_build_id (hex_encoded_id , unstrip = True , offline_only = False ):
681776 """
@@ -819,9 +914,16 @@ def _pack_libs_info(path, libs_id, libs_url, syms):
819914 info ["libs_url" ] = libs_url
820915 info ["download_url" ] = ""
821916
822- for hash_type , hash_func in HASHES .items ():
823- # replace 'build_id' to 'buildid'
824- info [hash_type .replace ("_" , "" )] = hash_func (path )
917+ for search_type , hash_func in TYPES .items ():
918+ # pass libs_id
919+ if search_type == 'libs_id' :
920+ continue
921+
922+ # replace search_type
923+ if search_type in MAP_TYPES .keys ():
924+ search_type = MAP_TYPES [search_type ]
925+
926+ info [search_type ] = hash_func (path )
825927
826928 default_symbol_list = [
827929 "__libc_start_main_ret" , "dup2" , "printf" , "puts" , "read" , "system" , "str_bin_sh"
@@ -886,4 +988,4 @@ def get_build_id_offsets():
886988 }.get (context .arch , [])
887989
888990
889- __all__ = ['get_build_id_offsets' , 'search_by_build_id' , 'search_by_sha1' , 'search_by_sha256' , 'search_by_md5' , 'unstrip_libc' , 'search_by_symbol_offsets' , 'download_libraries' ]
991+ __all__ = ['get_build_id_offsets' , 'search_by_build_id' , 'search_by_sha1' , 'search_by_sha256' , 'search_by_md5' , 'search_by_libs_id' , ' unstrip_libc' , 'search_by_symbol_offsets' , 'download_libraries' ]
0 commit comments