fixing indentation.

debymf · debymf · commit cb9738f4ff8b · 2023-04-11T14:53:14.000+01:00
diff --git a/+bert/+tokenizer/+internal/FullTokenizer.m b/+bert/+tokenizer/+internal/FullTokenizer.m
@@ -1,41 +1,41 @@
 classdef FullTokenizer < bert.tokenizer.internal.Tokenizer
-   % FullTokenizer   A tokenizer based on word-piece tokenization.
-   %
-   %   tokenizer = FullTokenizer(vocabFile) constructs a FullTokenizer
-   %   using the vocabulary specified in the newline delimited txt file
-   %   vocabFile.
-   %
-   %   tokenizer = FullTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...)
-   %   specifies the optional parameter name/value pairs:
-   %
-   %   'BasicTokenizer'       - Tokenizer used to split text into words.
-   %                            If not specified, a default
-   %                            BasicTokenizer is constructed.
-   %
-   %   'IgnoreCase'           - A logical value to control if the
-   %                            FullTokenizer is case sensitive or not.
-   %                            The default value is true.
-   %
-   %   FullTokenizer methods:
-   %     tokenize - tokenize text
-   %     encode   - encode tokens
-   %     decode   - decode encoded tokens
-   %
-   % Example:
-   %   % Save a file named fakeVocab.txt with the text on the next 3 lines:
-   %   fake
-   %   vo
-   %   ##cab
-   %
-   %   % Now create a FullTokenizer
-   %   tokenizer = bert.tokenizer.internal.FullTokenizer('fakeVocab.txt');
-   %   tokens = tokenizer.tokenize("This tokenizer has a fake vocab")
-   %   % Note that most tokens are unknown as they are not in the
-   %   % vocabulary and neither are any sub-tokens. However "fake" is
-   %   % detected and "vocab" is split into "vo" and "##cab".
-   %   tokenizer.encode(tokens)
-   %   % This returns the encoded form of the tokens - each token is
-   %   % replaced by its corresponding line number in the fakeVocab.txt
+    % FullTokenizer   A tokenizer based on word-piece tokenization.
+    %
+    %   tokenizer = FullTokenizer(vocabFile) constructs a FullTokenizer
+    %   using the vocabulary specified in the newline delimited txt file
+    %   vocabFile.
+    %
+    %   tokenizer = FullTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...)
+    %   specifies the optional parameter name/value pairs:
+    %
+    %   'BasicTokenizer'       - Tokenizer used to split text into words.
+    %                            If not specified, a default
+    %                            BasicTokenizer is constructed.
+    %
+    %   'IgnoreCase'           - A logical value to control if the
+    %                            FullTokenizer is case sensitive or not.
+    %                            The default value is true.
+    %
+    %   FullTokenizer methods:
+    %     tokenize - tokenize text
+    %     encode   - encode tokens
+    %     decode   - decode encoded tokens
+    %
+    % Example:
+    %   % Save a file named fakeVocab.txt with the text on the next 3 lines:
+    %   fake
+    %   vo
+    %   ##cab
+    %
+    %   % Now create a FullTokenizer
+    %   tokenizer = bert.tokenizer.internal.FullTokenizer('fakeVocab.txt');
+    %   tokens = tokenizer.tokenize("This tokenizer has a fake vocab")
+    %   % Note that most tokens are unknown as they are not in the
+    %   % vocabulary and neither are any sub-tokens. However "fake" is
+    %   % detected and "vocab" is split into "vo" and "##cab".
+    %   tokenizer.encode(tokens)
+    %   % This returns the encoded form of the tokens - each token is
+    %   % replaced by its corresponding line number in the fakeVocab.txt
     
     % Copyright 2021-2023 The MathWorks, Inc.
     
diff --git a/+bert/+tokenizer/BERTTokenizer.m b/+bert/+tokenizer/BERTTokenizer.m
@@ -1,45 +1,45 @@
 classdef BERTTokenizer
-   % BERTTokenizer   Construct a tokenizer to use with BERT
-   % models.
-   %
-   %   tokenizer = BERTTokenizer()   Constructs a case-insensitive
-   %   BERTTokenizer using the BERT-Base vocabulary file.
-   %
-   %   tokenizer = BERTTokenizer(vocabFile)   Constructs a
-   %   case-insensitive BERTTokenizer using the file vocabFile as
-   %   the vocabulary.
-   %
-   %   tokenizer = BERTTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...) 
-   %   specifies the optional parameter name/value pairs:
-   %
-   %   'IgnoreCase'           - A logical value to control if the
-   %                            BERTTokenizer is case sensitive or not.
-   %                            The default value is true.
-   %
-   %   'FullTokenizer'        - The underlying word-piece tokenizer.
-   %                            If not specified, a default
-   %                            FullTokenizer is constructed.
-   %
-   %   BERTTokenizer properties:
-   %     FullTokenizer  - The underlying word-piece tokenizer.
-   %     PaddingToken   - The string "[PAD]"
-   %     StartToken     - The string "[CLS]"
-   %     SeparatorToken - The string "[SEP]"
-   %     MaskToken      - The string "[MASK]"
-   %     PaddingCode    - The encoded PaddingToken
-   %     StartCode      - The encoded StartToken
-   %     SeparatorCode  - The encoded SeparatorToken
-   %     MaskCode       - The encoded MaskToken
-   %
-   %   BERTTokenizer methods:
-   %     tokenize     - Tokenize strings
-   %     encode       - Tokenize and encode strings
-   %     encodeTokens - Encode pre-tokenized token sequences
-   %     decode       - Decode an encoded sequence to string
-   %
-   % Example:
-   %   tokenizer = bert.tokenizer.BERTTokenizer();
-   %   sequences = tokenizer.encode("Hello World!")
+    % BERTTokenizer   Construct a tokenizer to use with BERT
+    % models.
+    %
+    %   tokenizer = BERTTokenizer()   Constructs a case-insensitive
+    %   BERTTokenizer using the BERT-Base vocabulary file.
+    %
+    %   tokenizer = BERTTokenizer(vocabFile)   Constructs a
+    %   case-insensitive BERTTokenizer using the file vocabFile as
+    %   the vocabulary.
+    %
+    %   tokenizer = BERTTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...) 
+    %   specifies the optional parameter name/value pairs:
+    %
+    %   'IgnoreCase'           - A logical value to control if the
+    %                            BERTTokenizer is case sensitive or not.
+    %                            The default value is true.
+    %
+    %   'FullTokenizer'        - The underlying word-piece tokenizer.
+    %                            If not specified, a default
+    %                            FullTokenizer is constructed.
+    %
+    %   BERTTokenizer properties:
+    %     FullTokenizer  - The underlying word-piece tokenizer.
+    %     PaddingToken   - The string "[PAD]"
+    %     StartToken     - The string "[CLS]"
+    %     SeparatorToken - The string "[SEP]"
+    %     MaskToken      - The string "[MASK]"
+    %     PaddingCode    - The encoded PaddingToken
+    %     StartCode      - The encoded StartToken
+    %     SeparatorCode  - The encoded SeparatorToken
+    %     MaskCode       - The encoded MaskToken
+    %
+    %   BERTTokenizer methods:
+    %     tokenize     - Tokenize strings
+    %     encode       - Tokenize and encode strings
+    %     encodeTokens - Encode pre-tokenized token sequences
+    %     decode       - Decode an encoded sequence to string
+    %
+    % Example:
+    %   tokenizer = bert.tokenizer.BERTTokenizer();
+    %   sequences = tokenizer.encode("Hello World!")
     
     % Copyright 2021-2023 The MathWorks, Inc.
     
@@ -60,47 +60,47 @@
     
     methods
         function this = BERTTokenizer(vocabFile,nvp)
-           % BERTTokenizer   Construct a tokenizer to use with BERT
-           % models.
-           %
-           %   tokenizer = BERTTokenizer()   Constructs a case-insensitive
-           %   BERTTokenizer using the BERT-Base vocabulary file.
-           %
-           %   tokenizer = BERTTokenizer(vocabFile)   Constructs a
-           %   case-insensitive BERTTokenizer using the file vocabFile as
-           %   the vocabulary.
-           %
-           %   tokenizer = BERTTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...) 
-           %   specifies the optional parameter name/value pairs:
-           %
-           %   'IgnoreCase'           - A logical value to control if the
-           %                            BERTTokenizer is case sensitive or not.
-           %                            The default value is true.
-           %
-           %   'FullTokenizer'        - The underlying word-piece tokenizer.
-           %                            If not specified, a default
-           %                            FullTokenizer is constructed.
-           %
-           %   BERTTokenizer properties:
-           %     FullTokenizer  - The underlying word-piece tokenizer.
-           %     PaddingToken   - The string "[PAD]"
-           %     StartToken     - The string "[CLS]"
-           %     SeparatorToken - The string "[SEP]"
-           %     MaskToken      - The string "[MASK]"
-           %     PaddingCode    - The encoded PaddingToken
-           %     StartCode      - The encoded StartToken
-           %     SeparatorCode  - The encoded SeparatorToken
-           %     MaskCode       - The encoded MaskToken
-           %
-           %   BERTTokenizer methods:
-           %     tokenize     - Tokenize strings
-           %     encode       - Tokenize and encode strings
-           %     encodeTokens - Encode pre-tokenized token sequences
-           %     decode       - Decode an encoded sequence to string
-           %
-           % Example:
-           %   tokenizer = bert.tokenizer.BERTTokenizer();
-           %   sequences = tokenizer.encode("Hello World!")
+            % BERTTokenizer   Construct a tokenizer to use with BERT
+            % models.
+            %
+            %   tokenizer = BERTTokenizer()   Constructs a case-insensitive
+            %   BERTTokenizer using the BERT-Base vocabulary file.
+            %
+            %   tokenizer = BERTTokenizer(vocabFile)   Constructs a
+            %   case-insensitive BERTTokenizer using the file vocabFile as
+            %   the vocabulary.
+            %
+            %   tokenizer = BERTTokenizer(vocabFile,'PARAM1', VAL1, 'PARAM2', VAL2, ...) 
+            %   specifies the optional parameter name/value pairs:
+            %
+            %   'IgnoreCase'           - A logical value to control if the
+            %                            BERTTokenizer is case sensitive or not.
+            %                            The default value is true.
+            %
+            %   'FullTokenizer'        - The underlying word-piece tokenizer.
+            %                            If not specified, a default
+            %                            FullTokenizer is constructed.
+            %
+            %   BERTTokenizer properties:
+            %     FullTokenizer  - The underlying word-piece tokenizer.
+            %     PaddingToken   - The string "[PAD]"
+            %     StartToken     - The string "[CLS]"
+            %     SeparatorToken - The string "[SEP]"
+            %     MaskToken      - The string "[MASK]"
+            %     PaddingCode    - The encoded PaddingToken
+            %     StartCode      - The encoded StartToken
+            %     SeparatorCode  - The encoded SeparatorToken
+            %     MaskCode       - The encoded MaskToken
+            %
+            %   BERTTokenizer methods:
+            %     tokenize     - Tokenize strings
+            %     encode       - Tokenize and encode strings
+            %     encodeTokens - Encode pre-tokenized token sequences
+            %     decode       - Decode an encoded sequence to string
+            %
+            % Example:
+            %   tokenizer = bert.tokenizer.BERTTokenizer();
+            %   sequences = tokenizer.encode("Hello World!")
             arguments
                 vocabFile (1,1) string {mustBeFile} = bert.internal.getSupportFilePath("base","vocab.txt")
                 nvp.IgnoreCase (1,1) logical = true