1
1
import importlib .machinery
2
2
import importlib .util
3
3
import logging
4
+ import marshal
5
+ import os
4
6
import os .path
5
7
import sys
6
8
import types
7
9
from importlib .abc import MetaPathFinder , SourceLoader
8
- from typing import Optional
10
+ from typing import Optional , List , Dict
9
11
10
12
import basilisp .compiler as compiler
11
13
import basilisp .lang .runtime as runtime
12
14
import basilisp .reader as reader
13
15
from basilisp .lang .util import demunge
16
+ from basilisp .util import timed
17
+
18
+ MAGIC_NUMBER = (1149 ).to_bytes (2 , 'little' ) + b'\r \n '
14
19
15
20
logger = logging .getLogger (__name__ )
16
21
17
22
23
+ def _r_long (int_bytes : bytes ) -> int :
24
+ """Convert 4 bytes in little-endian to an integer."""
25
+ return int .from_bytes (int_bytes , 'little' )
26
+
27
+
28
+ def _w_long (x : int ) -> bytes :
29
+ """Convert a 32-bit integer to little-endian."""
30
+ return (int (x ) & 0xFFFFFFFF ).to_bytes (4 , 'little' )
31
+
32
+
33
+ def _basilisp_bytecode (mtime : int ,
34
+ source_size : int ,
35
+ code : List [types .CodeType ]) -> bytes :
36
+ """Return the bytes for a Basilisp bytecode cache file."""
37
+ data = bytearray (MAGIC_NUMBER )
38
+ data .extend (_w_long (mtime ))
39
+ data .extend (_w_long (source_size ))
40
+ data .extend (marshal .dumps (code )) # type: ignore
41
+ return data
42
+
43
+
44
+ def _get_basilisp_bytecode (fullname : str ,
45
+ mtime : int ,
46
+ source_size : int ,
47
+ cache_data : bytes ) -> List [types .CodeType ]:
48
+ """Unmarshal the bytes from a Basilisp bytecode cache file, validating the
49
+ file header prior to returning. If the file header does not match, throw
50
+ an exception."""
51
+ exc_details = {'name' : fullname }
52
+ magic = cache_data [:4 ]
53
+ raw_timestamp = cache_data [4 :8 ]
54
+ raw_size = cache_data [8 :12 ]
55
+ if magic != MAGIC_NUMBER :
56
+ message = f"Incorrect magic number ({ magic } ) in { fullname } ; expected { MAGIC_NUMBER } "
57
+ logger .debug (message )
58
+ raise ImportError (message , ** exc_details ) # type: ignore
59
+ elif len (raw_timestamp ) != 4 :
60
+ message = f"Reached EOF while reading timestamp in { fullname } "
61
+ logger .debug (message )
62
+ raise EOFError (message )
63
+ elif _r_long (raw_timestamp ) != mtime :
64
+ message = f"Non-matching timestamp ({ _r_long (raw_timestamp )} ) in { fullname } bytecode cache; expected { mtime } "
65
+ logger .debug (message )
66
+ raise ImportError (message , ** exc_details ) # type: ignore
67
+ elif len (raw_size ) != 4 :
68
+ message = f"Reached EOF while reading size of source in { fullname } "
69
+ logger .debug (message )
70
+ raise EOFError (message )
71
+ elif _r_long (raw_size ) != source_size :
72
+ message = f"Non-matching filesize ({ _r_long (raw_size )} ) in { fullname } bytecode cache; expected { source_size } "
73
+ logger .debug (message )
74
+ raise ImportError (message , ** exc_details ) # type: ignore
75
+
76
+ return marshal .loads (cache_data [12 :]) # type: ignore
77
+
78
+
79
+ def _cache_from_source (path : str ) -> str :
80
+ """Return the path to the cached file for the given path. The original path
81
+ does not have to exist."""
82
+ cache_path , cache_file = os .path .split (importlib .util .cache_from_source (path ))
83
+ filename , _ = os .path .splitext (cache_file )
84
+ return os .path .join (cache_path , filename + '.lpyc' )
85
+
86
+
18
87
class BasilispImporter (MetaPathFinder , SourceLoader ):
19
88
"""Python import hook to allow directly loading Basilisp code within
20
89
Python."""
@@ -41,7 +110,13 @@ def find_spec(self,
41
110
f"{ os .path .join (entry , * module_name )} .lpy" ]
42
111
for filename in filenames :
43
112
if os .path .exists (filename ):
44
- state = {'fullname' : fullname , "filename" : filename , 'path' : entry , 'target' : target }
113
+ state = {
114
+ 'fullname' : fullname ,
115
+ 'filename' : filename ,
116
+ 'path' : entry ,
117
+ 'target' : target ,
118
+ 'cache_filename' : _cache_from_source (filename )
119
+ }
45
120
logger .debug (f"Found potential Basilisp module '{ fullname } ' in file '{ filename } '" )
46
121
return importlib .machinery .ModuleSpec (fullname , self , origin = filename , loader_state = state )
47
122
return None
@@ -50,10 +125,22 @@ def invalidate_caches(self):
50
125
super ().invalidate_caches ()
51
126
self ._cache = {}
52
127
53
- def get_data (self , path ) -> bytes :
128
+ def _cache_bytecode (self , source_path , cache_path , data ): # pylint: disable=unused-argument
129
+ self .set_data (cache_path , data )
130
+
131
+ def path_stats (self , path ):
132
+ stat = os .stat (path )
133
+ return {'mtime' : int (stat .st_mtime ), 'size' : stat .st_size }
134
+
135
+ def get_data (self , path ):
54
136
with open (path , mode = 'r+b' ) as f :
55
137
return f .read ()
56
138
139
+ def set_data (self , path , data ):
140
+ os .makedirs (os .path .dirname (path ), exist_ok = True )
141
+ with open (path , mode = 'w+b' ) as f :
142
+ f .write (data )
143
+
57
144
def get_filename (self , fullname : str ) -> str :
58
145
try :
59
146
cached = self ._cache [fullname ]
@@ -72,6 +159,50 @@ def create_module(self, spec: importlib.machinery.ModuleSpec):
72
159
self ._cache [spec .name ] = {"spec" : spec }
73
160
return mod
74
161
162
+ def _exec_cached_module (self ,
163
+ fullname : str ,
164
+ loader_state : Dict [str , str ],
165
+ path_stats : Dict [str , int ],
166
+ module : types .ModuleType ):
167
+ """Load and execute a cached Basilisp module."""
168
+ filename = loader_state ["filename" ]
169
+ cache_filename = loader_state ["cache_filename" ]
170
+
171
+ with timed (lambda duration : logger .debug (
172
+ f"Loaded cached Basilisp module '{ fullname } ' in { duration / 1000000 } ms" )):
173
+ logger .debug (f"Checking for cached Basilisp module '{ fullname } ''" )
174
+ cache_data = self .get_data (cache_filename )
175
+ cached_code = _get_basilisp_bytecode (fullname , path_stats ['mtime' ], path_stats ['size' ], cache_data )
176
+ compiler .compile_bytecode (cached_code , compiler .CompilerContext (), module , filename )
177
+
178
+ def _exec_module (self ,
179
+ fullname : str ,
180
+ loader_state : Dict [str , str ],
181
+ path_stats : Dict [str , int ],
182
+ module : types .ModuleType ):
183
+ """Load and execute a non-cached Basilisp module."""
184
+ filename = loader_state ["filename" ]
185
+ cache_filename = loader_state ["cache_filename" ]
186
+
187
+ with timed (lambda duration : logger .debug (
188
+ f"Loaded Basilisp module '{ fullname } ' in { duration / 1000000 } ms" )):
189
+ # During compilation, bytecode objects are added to the list via the closure
190
+ # add_bytecode below, which is passed to the compiler. The collected bytecodes
191
+ # will be used to generate an .lpyc file for caching the compiled file.
192
+ all_bytecode = []
193
+
194
+ def add_bytecode (bytecode : types .CodeType ):
195
+ all_bytecode .append (bytecode )
196
+
197
+ logger .debug (f"Reading and compiling Basilisp module '{ fullname } '" )
198
+ forms = reader .read_file (filename , resolver = runtime .resolve_alias )
199
+ compiler .compile_module ( # pylint: disable=unexpected-keyword-arg
200
+ forms , compiler .CompilerContext (), module , filename , collect_bytecode = add_bytecode )
201
+
202
+ # Cache the bytecode that was collected through the compilation run.
203
+ cache_file_bytes = _basilisp_bytecode (path_stats ['mtime' ], path_stats ['size' ], all_bytecode )
204
+ self ._cache_bytecode (filename , cache_filename , cache_file_bytes )
205
+
75
206
def exec_module (self , module ):
76
207
"""Compile the Basilisp module into Python code.
77
208
@@ -84,6 +215,7 @@ def exec_module(self, module):
84
215
cached ["module" ] = module
85
216
spec = cached ["spec" ]
86
217
filename = spec .loader_state ["filename" ]
218
+ path_stats = self .path_stats (filename )
87
219
88
220
# During the bootstrapping process, the 'basilisp.core namespace is created with
89
221
# a blank module. If we do not replace the module here with the module we are
@@ -93,10 +225,13 @@ def exec_module(self, module):
93
225
ns : runtime .Namespace = runtime .set_current_ns (ns_name ).value
94
226
ns .module = module
95
227
96
- logger .debug (f"Reading and compiling Basilisp module '{ fullname } ''" )
97
- forms = reader .read_file (filename , resolver = runtime .resolve_alias )
98
- compiler .compile_module (forms , compiler .CompilerContext (), module , filename )
99
- logger .debug (f"Loaded Basilisp module '{ fullname } ''" )
228
+ # Check if a valid, cached version of this Basilisp namespace exists and, if so,
229
+ # load it and bypass the expensive compilation process below.
230
+ try :
231
+ self ._exec_cached_module (fullname , spec .loader_state , path_stats , module )
232
+ except (EOFError , ImportError , IOError , OSError ) as e :
233
+ logger .debug (f"Failed to load cached Basilisp module: { e } " )
234
+ self ._exec_module (fullname , spec .loader_state , path_stats , module )
100
235
101
236
# Because we want to (by default) add 'basilisp.core into every namespace by default,
102
237
# we want to make sure we don't try to add 'basilisp.core into itself, causing a
0 commit comments