[Doc] Encourage use of encoding constants

byroot · byroot · commit e2f1f7c56716 · 2025-01-12T11:48:01.000+01:00
Lots of documentation examples still use encoding APIs with encoding names
rather than encoding constants. I think it would be preferable to direct
users toward constants as it can help with auto-completion, static analysis
and such.
diff --git a/doc/encodings.rdoc b/doc/encodings.rdoc
@@ -12,9 +12,9 @@ Some character sets contain only 1-byte characters;
 {US-ASCII}[https://en.wikipedia.org/wiki/ASCII], for example, has 256 1-byte characters.
 This string, encoded in US-ASCII, has six characters that are stored as six bytes:
 
-  s = 'Hello!'.encode('US-ASCII')  # => "Hello!"
-  s.encoding                       # => #<Encoding:US-ASCII>
-  s.bytes                          # => [72, 101, 108, 108, 111, 33]
+  s = 'Hello!'.encode(Encoding::US_ASCII)  # => "Hello!"
+  s.encoding                               # => #<Encoding:US-ASCII>
+  s.bytes                                  # => [72, 101, 108, 108, 111, 33]
 
 Other encodings may involve multi-byte characters.
 {UTF-8}[https://en.wikipedia.org/wiki/UTF-8], for example,
@@ -106,17 +106,17 @@ Method Encoding.default_external returns the default external \Encoding:
 
 Method Encoding.default_external= sets that value:
 
-  Encoding.default_external = 'US-ASCII' # => "US-ASCII"
-  Encoding.default_external              # => #<Encoding:US-ASCII>
+  Encoding.default_external = Encoding::US_ASCII # => #<Encoding:US-ASCII>
+  Encoding.default_external                      # => #<Encoding:US-ASCII>
 
 Method Encoding.default_internal returns the default internal \Encoding:
 
   Encoding.default_internal # => nil
 
 Method Encoding.default_internal= sets the default internal \Encoding:
 
-  Encoding.default_internal = 'US-ASCII' # => "US-ASCII"
-  Encoding.default_internal              # => #<Encoding:US-ASCII>
+  Encoding.default_internal = Encoding::US_ASCII # => #<Encoding:US-ASCII>
+  Encoding.default_internal                      # => #<Encoding:US-ASCII>
 
 === Compatible Encodings
 
@@ -128,9 +128,9 @@ returns the \Encoding of the concatenated string, or +nil+ if incompatible:
   eng = 'text'
   Encoding.compatible?(rus, eng) # => #<Encoding:UTF-8>
 
-  s0 = "\xa1\xa1".force_encoding('iso-8859-1') # => "\xA1\xA1"
-  s1 = "\xa1\xa1".force_encoding('euc-jp')     # => "\x{A1A1}"
-  Encoding.compatible?(s0, s1)                 # => nil
+  s0 = "\xa1\xa1".force_encoding(Encoding::ISO_8859_1) # => "\xA1\xA1"
+  s1 = "\xa1\xa1".force_encoding(Encoding::EUCJP)      # => "\x{A1A1}"
+  Encoding.compatible?(s0, s1)                         # => nil
 
 == \String \Encoding
 
@@ -151,38 +151,38 @@ The default encoding for a string created with method String.new is:
 
 In either case, any encoding may be specified:
 
-  s = String.new(encoding: 'UTF-8')             # => ""
-  s.encoding                                    # => #<Encoding:UTF-8>
-  s = String.new('foo', encoding: 'ASCII-8BIT') # => "foo"
-  s.encoding                                    # => #<Encoding:ASCII-8BIT>
+  s = String.new(encoding: Encoding::UTF_8)         # => ""
+  s.encoding                                        # => #<Encoding:UTF-8>
+  s = String.new('foo', encoding: Encoding::BINARY) # => "foo"
+  s.encoding                                        # => #<Encoding:BINARY (ASCII-8BIT)>
 
 The encoding for a string may be changed:
 
-  s = "R\xC3\xA9sum\xC3\xA9"     # => "Résumé"
-  s.encoding                     # => #<Encoding:UTF-8>
-  s.force_encoding('ISO-8859-1') # => "R\xC3\xA9sum\xC3\xA9"
-  s.encoding                     # => #<Encoding:ISO-8859-1>
+  s = "R\xC3\xA9sum\xC3\xA9"              # => "Résumé"
+  s.encoding                              # => #<Encoding:UTF-8>
+  s.force_encoding(Encoding::ISO_8859_1)  # => "R\xC3\xA9sum\xC3\xA9"
+  s.encoding                              # => #<Encoding:ISO-8859-1>
 
 Changing the assigned encoding does not alter the content of the string;
 it changes only the way the content is to be interpreted:
 
-  s                         # => "R\xC3\xA9sum\xC3\xA9"
-  s.force_encoding('UTF-8') # => "Résumé"
+  s                                 # => "R\xC3\xA9sum\xC3\xA9"
+  s.force_encoding(Encoding::UTF_8) # => "Résumé"
 
 The actual content of a string may also be altered;
 see {Transcoding a String}[#label-Transcoding+a+String].
 
 Here are a couple of useful query methods:
 
-  s = "abc".force_encoding("UTF-8")         # => "abc"
-  s.ascii_only?                             # => true
-  s = "abc\u{6666}".force_encoding("UTF-8") # => "abc晦"
-  s.ascii_only?                             # => false
+  s = "abc".force_encoding(Encoding::UTF_8)         # => "abc"
+  s.ascii_only?                                     # => true
+  s = "abc\u{6666}".force_encoding(Encoding::UTF_8) # => "abc晦"
+  s.ascii_only?                                     # => false
 
-  s = "\xc2\xa1".force_encoding("UTF-8") # => "¡"
-  s.valid_encoding?                      # => true
-  s = "\xc2".force_encoding("UTF-8")     # => "\xC2"
-  s.valid_encoding?                      # => false
+  s = "\xc2\xa1".force_encoding(Encoding::UTF_8)    # => "¡"
+  s.valid_encoding?                                 # => true
+  s = "\xc2".force_encoding(Encoding::UTF_8)        # => "\xC2"
+  s.valid_encoding?                                 # => false
 
 == \Symbol and \Regexp Encodings
 
@@ -337,8 +337,8 @@ then reads the file into a new string, encoding it as UTF-8:
 
   s = "R\u00E9sum\u00E9"
   path = 't.tmp'
-  ext_enc = 'ISO-8859-1'
-  int_enc = 'UTF-8'
+  ext_enc = Encoding::ISO_8859_1
+  int_enc = Encoding::UTF_8
 
   File.write(path, s, external_encoding: ext_enc)
   raw_text = File.binread(path)
@@ -373,8 +373,8 @@ These keyword-value pairs specify encoding options:
   Examples:
 
     s = "\x80foo\x80"
-    s.encode('ISO-8859-3') # Raises Encoding::InvalidByteSequenceError.
-    s.encode('ISO-8859-3', invalid: :replace) # => "?foo?"
+    s.encode(Encoding::ISO_8859_3) # Raises Encoding::InvalidByteSequenceError.
+    s.encode(Encoding::ISO_8859_3, invalid: :replace) # => "?foo?"
 
 - For an undefined character:
 
@@ -385,8 +385,8 @@ These keyword-value pairs specify encoding options:
   Examples:
 
     s = "\x80foo\x80"
-    "\x80".encode('UTF-8', 'ASCII-8BIT') # Raises Encoding::UndefinedConversionError.
-    s.encode('UTF-8', 'ASCII-8BIT', undef: :replace) # => "�foo�"
+    "\x80".encode(Encoding::UTF_8, Encoding::BINARY) # Raises Encoding::UndefinedConversionError.
+    s.encode(Encoding::UTF_8, Encoding::BINARY, undef: :replace) # => "�foo�"
 
 
 - Replacement string:
@@ -400,7 +400,7 @@ These keyword-value pairs specify encoding options:
 
     s = "\xA5foo\xA5"
     options = {:undef => :replace, :replace => 'xyzzy'}
-    s.encode('UTF-8', 'ISO-8859-3', **options) # => "xyzzyfooxyzzy"
+    s.encode(Encoding::UTF_8, Encoding::ISO_8859_3, **options) # => "xyzzyfooxyzzy"
 
 - Replacement fallback:
 
@@ -420,12 +420,12 @@ These keyword-value pairs specify encoding options:
 
     hash = {"\u3042" => 'xyzzy'}
     hash.default = 'XYZZY'
-    s.encode('ASCII', fallback: hash) # => "xyzzyfooXYZZY"
+    s.encode(Encoding::US_ASCII, fallback: hash) # => "xyzzyfooXYZZY"
 
     def (fallback = "U+%.4X").escape(x)
       self % x.unpack("U")
     end
-    "\u{3042}".encode("US-ASCII", fallback: fallback.method(:escape)) # => "U+3042"
+    "\u{3042}".encode(Encoding::US_ASCII, fallback: fallback.method(:escape)) # => "U+3042"
 
     proc = Proc.new {|x| x == "\u3042" ? 'xyzzy' : 'XYZZY' }
     s.encode('ASCII', fallback: proc) # => "XYZZYfooXYZZY"
@@ -458,8 +458,8 @@ These keyword-value pairs specify encoding options:
   Examples:
 
     s = 'foo"<&>"bar' + "\u3042"
-    s.encode('ASCII', xml: :text) # => "foo\"&lt;&amp;&gt;\"bar&#x3042;"
-    s.encode('ASCII', xml: :attr) # => "\"foo&quot;&lt;&amp;&gt;&quot;bar&#x3042;\""
+    s.encode(Encoding::US_ASCII, xml: :text) # => "foo\"&lt;&amp;&gt;\"bar&#x3042;"
+    s.encode(Encoding::US_ASCII, xml: :attr) # => "\"foo&quot;&lt;&amp;&gt;&quot;bar&#x3042;\""
 
 
 - Newlines:
@@ -476,7 +476,7 @@ These keyword-value pairs specify encoding options:
 
   Examples:
 
-    s = "\n \r \r\n"                           # => "\n \r \r\n"
-    s.encode('ASCII', cr_newline: true)        # => "\r \r \r\r"
-    s.encode('ASCII', crlf_newline: true)      # => "\r\n \r \r\r\n"
-    s.encode('ASCII', universal_newline: true) # => "\n \n \n"
+    s = "\n \r \r\n"                                      # => "\n \r \r\n"
+    s.encode(Encoding::US_ASCII, cr_newline: true)        # => "\r \r \r\r"
+    s.encode(Encoding::US_ASCII, crlf_newline: true)      # => "\r\n \r \r\r\n"
+    s.encode(Encoding::US_ASCII, universal_newline: true) # => "\n \n \n"
diff --git a/string.c b/string.c
@@ -3902,13 +3902,13 @@ rb_str_append_as_bytes(int argc, VALUE *argv, VALUE str)
  *    s = 'foo'
  *    s.encoding              # => <Encoding:UTF-8>
  *    s << 0x00110000         # 1114112 out of char range (RangeError)
- *    s = 'foo'.encode('EUC-JP')
+ *    s = 'foo'.encode(Encoding::EUC_JP)
  *    s << 0x00800080         # invalid codepoint 0x800080 in EUC-JP (RangeError)
  *
  *  If the encoding is US-ASCII and the codepoint is 0..0xff, _string_
  *  is automatically promoted to ASCII-8BIT.
  *
- *    s = 'foo'.encode('US-ASCII')
+ *    s = 'foo'.encode(Encoding::US_ASCII)
  *    s << 0xff
  *    s.encoding              # => #<Encoding:BINARY (ASCII-8BIT)>
  *
@@ -4137,7 +4137,7 @@ rb_str_cmp(VALUE str1, VALUE str2)
  *    s == 'FOO' # => false
  *
  *  Returns +false+ if the two strings' encodings are not compatible:
- *    "\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false
+ *    "\u{e4 f6 fc}".encode(Encoding::ISO_8859_1) == ("\u{c4 d6 dc}") # => false
  *
  *  If +object+ is not an instance of +String+ but responds to +to_str+, then the
  *  two strings are compared using <code>object.==</code>.
@@ -4170,7 +4170,7 @@ rb_str_equal(VALUE str1, VALUE str2)
  *
  *  Returns +false+ if the two strings' encodings are not compatible:
  *
- *    "\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false
+ *    "\u{e4 f6 fc}".encode(Encoding::ISO_8859_1).eql?("\u{c4 d6 dc}") # => false
  *
  */
 
@@ -11412,9 +11412,9 @@ rb_str_b(VALUE str)
  *
  *  Returns +true+ if +self+ is encoded correctly, +false+ otherwise:
  *
- *    "\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
- *    "\xc2".force_encoding("UTF-8").valid_encoding?     # => false
- *    "\x80".force_encoding("UTF-8").valid_encoding?     # => false
+ *    "\xc2\xa1".force_encoding(Encoding::UTF_8).valid_encoding? # => true
+ *    "\xc2".force_encoding(Encoding::UTF_8).valid_encoding?     # => false
+ *    "\x80".force_encoding(Encoding::UTF_8).valid_encoding?     # => false
  */
 
 static VALUE
@@ -11889,7 +11889,7 @@ rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
  *
  *  Raises an exception if +self+ is not in a Unicode encoding:
  *
- *    s = "\xE0".force_encoding('ISO-8859-1')
+ *    s = "\xE0".force_encoding(Encoding::ISO_8859_1)
  *    s.unicode_normalized? # Raises Encoding::CompatibilityError.
  *
  *  Related: String#unicode_normalize, String#unicode_normalize!.

Original file line number	Diff line number	Diff line change
`@@ -3902,13 +3902,13 @@ rb_str_append_as_bytes(int argc, VALUE *argv, VALUE str)`
`3902`	`3902`	`* s = 'foo'`
`3903`	`3903`	`* s.encoding # => <Encoding:UTF-8>`
`3904`	`3904`	`* s << 0x00110000 # 1114112 out of char range (RangeError)`
`3905`		`- * s = 'foo'.encode('EUC-JP')`
	`3905`	`+ * s = 'foo'.encode(Encoding::EUC_JP)`
`3906`	`3906`	`* s << 0x00800080 # invalid codepoint 0x800080 in EUC-JP (RangeError)`
`3907`	`3907`	`*`
`3908`	`3908`	`* If the encoding is US-ASCII and the codepoint is 0..0xff, _string_`
`3909`	`3909`	`* is automatically promoted to ASCII-8BIT.`
`3910`	`3910`	`*`
`3911`		`- * s = 'foo'.encode('US-ASCII')`
	`3911`	`+ * s = 'foo'.encode(Encoding::US_ASCII)`
`3912`	`3912`	`* s << 0xff`
`3913`	`3913`	`* s.encoding # => #<Encoding:BINARY (ASCII-8BIT)>`
`3914`	`3914`	`*`
`@@ -4137,7 +4137,7 @@ rb_str_cmp(VALUE str1, VALUE str2)`
`4137`	`4137`	`* s == 'FOO' # => false`
`4138`	`4138`	`*`
`4139`	`4139`	`* Returns +false+ if the two strings' encodings are not compatible:`
`4140`		`- * "\u{e4 f6 fc}".encode("ISO-8859-1") == ("\u{c4 d6 dc}") # => false`
	`4140`	`+ * "\u{e4 f6 fc}".encode(Encoding::ISO_8859_1) == ("\u{c4 d6 dc}") # => false`
`4141`	`4141`	`*`
`4142`	`4142`	`* If +object+ is not an instance of +String+ but responds to +to_str+, then the`
`4143`	`4143`	`* two strings are compared using <code>object.==</code>.`
`@@ -4170,7 +4170,7 @@ rb_str_equal(VALUE str1, VALUE str2)`
`4170`	`4170`	`*`
`4171`	`4171`	`* Returns +false+ if the two strings' encodings are not compatible:`
`4172`	`4172`	`*`
`4173`		`- * "\u{e4 f6 fc}".encode("ISO-8859-1").eql?("\u{c4 d6 dc}") # => false`
	`4173`	`+ * "\u{e4 f6 fc}".encode(Encoding::ISO_8859_1).eql?("\u{c4 d6 dc}") # => false`
`4174`	`4174`	`*`
`4175`	`4175`	`*/`
`4176`	`4176`
`@@ -11412,9 +11412,9 @@ rb_str_b(VALUE str)`
`11412`	`11412`	`*`
`11413`	`11413`	`* Returns +true+ if +self+ is encoded correctly, +false+ otherwise:`
`11414`	`11414`	`*`
`11415`		`- * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true`
`11416`		`- * "\xc2".force_encoding("UTF-8").valid_encoding? # => false`
`11417`		`- * "\x80".force_encoding("UTF-8").valid_encoding? # => false`
	`11415`	`+ * "\xc2\xa1".force_encoding(Encoding::UTF_8).valid_encoding? # => true`
	`11416`	`+ * "\xc2".force_encoding(Encoding::UTF_8).valid_encoding? # => false`
	`11417`	`+ * "\x80".force_encoding(Encoding::UTF_8).valid_encoding? # => false`
`11418`	`11418`	`*/`
`11419`	`11419`
`11420`	`11420`	`static VALUE`
`@@ -11889,7 +11889,7 @@ rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)`
`11889`	`11889`	`*`
`11890`	`11890`	`* Raises an exception if +self+ is not in a Unicode encoding:`
`11891`	`11891`	`*`
`11892`		`- * s = "\xE0".force_encoding('ISO-8859-1')`
	`11892`	`+ * s = "\xE0".force_encoding(Encoding::ISO_8859_1)`
`11893`	`11893`	`* s.unicode_normalized? # Raises Encoding::CompatibilityError.`
`11894`	`11894`	`*`
`11895`	`11895`	`* Related: String#unicode_normalize, String#unicode_normalize!.`