2323
2424switch nvp .Model
2525 case " japanese-base"
26- zipFilePath = bert .internal .getSupportFilePath(" japanese-base" , " bert-base-japanese.zip" );
27- modelDir = fullfile(fileparts(zipFilePath ), " bert-base-japanese" );
28- unzip(zipFilePath , modelDir );
29- % Build the tokenizer
30- btok = bert .tokenizer .internal .TokenizedDocumentTokenizer(" Language" ," ja" ," TokenizeMethod" ," mecab" ,IgnoreCase= false );
31- vocabFile = fullfile(modelDir , " vocab.txt" );
32- ftok = bert .tokenizer .internal .FullTokenizer(vocabFile ,BasicTokenizer= btok );
33- tok = bert .tokenizer .BERTTokenizer(vocabFile ,FullTokenizer= ftok );
34- % Build the model
35- params.Weights = load(fullfile(modelDir , " weights.mat" ));
36- params.Weights = dlupdate(@dlarray ,params .Weights );
37- params.Hyperparameters = struct(...
38- NumHeads= 12 ,...
39- NumLayers= 12 ,...
40- NumContext= 512 ,...
41- HiddenSize= 768 );
42- mdl = struct(...
43- Tokenizer= tok ,...
44- Parameters= params );
26+ mdl = iJapaneseBERTModel(" japanese-base" , " bert-base-japanese.zip" );
4527 case " japanese-base-wwm"
46- zipFilePath = bert .internal .getSupportFilePath(" japanese-base" , " bert-base-japanese-whole-word-masking.zip" );
47- modelDir = fullfile(fileparts(zipFilePath ), " bert-base-japanese-whole-word-masking" );
48- unzip(zipFilePath , modelDir );
49- % Build the tokenizer
50- btok = bert .tokenizer .internal .TokenizedDocumentTokenizer(" Language" ," ja" ," TokenizeMethod" ," mecab" ,IgnoreCase= false );
51- vocabFile = fullfile(modelDir , " vocab.txt" );
52- ftok = bert .tokenizer .internal .FullTokenizer(vocabFile ,BasicTokenizer= btok );
53- tok = bert .tokenizer .BERTTokenizer(vocabFile ,FullTokenizer= ftok );
54- % Build the model
55- params.Weights = load(fullfile(modelDir , " weights.mat" ));
56- params.Weights = dlupdate(@dlarray ,params .Weights );
57- params.Hyperparameters = struct(...
58- NumHeads= 12 ,...
59- NumLayers= 12 ,...
60- NumContext= 512 ,...
61- HiddenSize= 768 );
62- mdl = struct(...
63- Tokenizer= tok ,...
64- Parameters= params );
28+ mdl = iJapaneseBERTModel(" japanese-base-wwm" , " bert-base-japanese-whole-word-masking.zip" );
6529 otherwise
6630 % Download the license file
6731 bert .internal .getSupportFilePath(nvp .Model ," bert.RIGHTS" );
7640 ' Tokenizer' ,bert .tokenizer .BERTTokenizer(vocabFile ,' IgnoreCase' ,ignoreCase ),...
7741 ' Parameters' ,params );
7842end
43+ end
44+
45+ function mdl = iJapaneseBERTModel(modelName , zipFileName )
46+ zipFilePath = bert .internal .getSupportFilePath(modelName , zipFileName );
47+ modelDir = fullfile(fileparts(zipFilePath ), replace(zipFileName , " .zip" , " " ));
48+ unzip(zipFilePath , modelDir );
49+ % Build the tokenizer
50+ btok = bert .tokenizer .internal .TokenizedDocumentTokenizer(" Language" ," ja" ," TokenizeMethod" ," mecab" ,IgnoreCase= false );
51+ vocabFile = fullfile(modelDir , " vocab.txt" );
52+ ftok = bert .tokenizer .internal .FullTokenizer(vocabFile ,BasicTokenizer= btok );
53+ tok = bert .tokenizer .BERTTokenizer(vocabFile ,FullTokenizer= ftok );
54+ % Build the model
55+ params.Weights = load(fullfile(modelDir , " weights.mat" ));
56+ params.Weights = dlupdate(@dlarray ,params .Weights );
57+ params.Hyperparameters = struct(...
58+ NumHeads= 12 ,...
59+ NumLayers= 12 ,...
60+ NumContext= 512 ,...
61+ HiddenSize= 768 );
62+ mdl = struct(...
63+ Tokenizer= tok ,...
64+ Parameters= params );
7965end
0 commit comments