@@ -67,7 +67,7 @@ tf.serialization.registerClass(LogLayer)
6767
6868type CausalSelfAttentionConfig =
6969 ConstructorParameters < typeof tf . layers . Layer > [ 0 ]
70- & Record < 'blockSize ' | 'nHead' | 'nEmbd' | 'dropout' | 'nLayer' | 'seed' , number >
70+ & Record < 'contextLength ' | 'nHead' | 'nEmbd' | 'dropout' | 'nLayer' | 'seed' , number >
7171
7272class CausalSelfAttention extends tf . layers . Layer {
7373 static readonly className = 'CausalSelfAttention'
@@ -97,7 +97,7 @@ class CausalSelfAttention extends tf.layers.Layer {
9797 // mask is a lower triangular matrix filled with 1
9898 // calling bandPart zero out the upper triangular part of the all-ones matrix
9999 // from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part
100- this . mask = tf . linalg . bandPart ( tf . ones ( [ config . blockSize , config . blockSize ] ) , - 1 , 0 )
100+ this . mask = tf . linalg . bandPart ( tf . ones ( [ config . contextLength , config . contextLength ] ) , - 1 , 0 )
101101 }
102102
103103 override build ( ) : void {
@@ -266,15 +266,15 @@ class GELU extends tf.layers.Layer {
266266tf . serialization . registerClass ( GELU )
267267
268268type MLPConfig = ConstructorParameters < typeof tf . layers . Layer > [ 0 ] &
269- Required < ModelSize > & Record < 'blockSize ' | 'residDrop' | 'nLayer' | 'seed' , number >
269+ Required < ModelSize > & Record < 'contextLength ' | 'residDrop' | 'nLayer' | 'seed' , number >
270270
271271function MLP ( config : MLPConfig ) : tf . LayersModel {
272272 return tf . sequential ( { layers : [
273273 tf . layers . dense ( {
274274 name : config . name + `.mlp.c_fc` ,
275275 units : 4 * config . nEmbd ,
276276 inputDim : config . nEmbd ,
277- inputShape : [ config . blockSize , config . nEmbd ] ,
277+ inputShape : [ config . contextLength , config . nEmbd ] ,
278278 kernelInitializer : tf . initializers . randomNormal ( {
279279 mean : 0 , stddev : 0.02 , seed : config . seed
280280 } ) ,
@@ -284,7 +284,7 @@ function MLP(config: MLPConfig): tf.LayersModel {
284284 name : config . name + '.mlp.c_proj' ,
285285 units : config . nEmbd ,
286286 inputDim : 4 * config . nEmbd ,
287- inputShape : [ config . blockSize , 4 * config . nEmbd ] ,
287+ inputShape : [ config . contextLength , 4 * config . nEmbd ] ,
288288 kernelInitializer : tf . initializers . randomNormal ( {
289289 mean : 0 , stddev : 0.02 * Math . sqrt ( 2 * config . nLayer ) , seed : config . seed
290290 } ) ,
@@ -306,7 +306,7 @@ type BlockConfig = CausalSelfAttentionConfig & MLPConfig & { debug: boolean }
306306 */
307307function TransformerBlock ( conf : BlockConfig ) : tf . LayersModel {
308308 const config = Object . assign ( { name : '.h' } , conf )
309- const inputs = tf . input ( { shape : [ config . blockSize , config . nEmbd ] } )
309+ const inputs = tf . input ( { shape : [ config . contextLength , config . nEmbd ] } )
310310 let x1 , x2
311311 // input normalization
312312 x1 = tf . layers . layerNormalization ( {
@@ -469,7 +469,7 @@ export function GPTArchitecture(config: Required<GPTConfig>): tf.LayersModel {
469469 const range = new Range ( { } ) . apply ( inputs )
470470 let posEmb = tf . layers . embedding ( {
471471 name : config . name + '.wpe' ,
472- inputDim : config . blockSize ,
472+ inputDim : config . contextLength ,
473473 outputDim : config . nEmbd ,
474474 embeddingsInitializer : tf . initializers . randomNormal ( {
475475 mean : 0 , stddev : 0.02 , seed : config . seed
0 commit comments