Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions unicodetools/data/linkification/dev/LinkEmail.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# LinkEmail.txt
# Date: 2026-01-31, 12:27:25 GMT
# © 2026 Unicode®, Inc.
# Date: 2026-02-03, 13:35:49 GMT
# © 2025 Unicode®, Inc.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should definitely write 2026 now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we should make this change consistently and globally by updating MakeUnicodeFiles.txt. This will rewrite the whole UCD. Let’s do that in another PR.

# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand Down
4 changes: 2 additions & 2 deletions unicodetools/data/linkification/dev/LinkTerm.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# LinkTerm.txt
# Date: 2026-02-03, 12:09:16 GMT
# © 2026 Unicode®, Inc.
# Date: 2026-02-03, 19:23:30 GMT
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.unicode.text.utility.Settings;
import org.unicode.text.utility.UnicodeDataFile;
import org.unicode.text.utility.Utility;
import org.unicode.tools.GenerateLinkData;
import org.unicode.tools.Segmenter;

public class MakeUnicodeFiles {
Expand Down Expand Up @@ -634,6 +635,21 @@ public static void generateFile(String filename) throws IOException {
case "DoNotEmit":
generateDoNotEmit(filename);
break;
case "LinkEmail":
GenerateLinkData.generateLinkEmail(Default.getYear());
break;
case "LinkTerm":
GenerateLinkData.generateLinkTerm(Default.getYear());
break;
case "LinkBracket":
GenerateLinkData.generateLinkBracket(Default.getYear());
break;
case "LinkDetectionTest":
GenerateLinkData.generateDetectionTestData(Default.getYear());
break;
case "LinkFormattingTest":
GenerateLinkData.generateFormattingTestData(Default.getYear());
break;
default:
generatePropertyFile(filename);
break;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.unicode.tools.emoji;
package org.unicode.text.utility;

import com.ibm.icu.util.ICUUncheckedIOException;
import java.io.File;
Expand All @@ -7,18 +7,17 @@
import java.io.Writer;
import java.util.Random;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.text.utility.Utility;

public class TempPrintWriter extends Writer {
final PrintWriter tempPrintWriter;
public class DiffingPrintWriter extends Writer {
public final PrintWriter tempPrintWriter;
final String tempName;
final String filename;

public TempPrintWriter(String dir, String filename) {
public DiffingPrintWriter(String dir, String filename) {
this(new File(dir, filename));
}

public TempPrintWriter(File file) {
public DiffingPrintWriter(File file) {
super();
final String parentFile = file.getParent();
this.filename = file.toString();
Expand Down
119 changes: 75 additions & 44 deletions unicodetools/src/main/java/org/unicode/tools/GenerateLinkData.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.unicode.props.BagFormatter;
import org.unicode.props.UcdProperty;
import org.unicode.props.UcdPropertyValues;
import org.unicode.text.utility.DiffingPrintWriter;
import org.unicode.utilities.LinkUtilities;
import org.unicode.utilities.LinkUtilities.LinkTermination;
import org.unicode.utilities.LinkUtilities.Part;
Expand All @@ -45,7 +46,7 @@
*
* @throws IOException
*/
class GenerateLinkData {
public class GenerateLinkData {

private static final Transliterator FIX_ODD =
Transliterator.createFromRules(
Expand Down Expand Up @@ -75,9 +76,13 @@ class GenerateLinkData {
"");

public static void main(String[] args) throws IOException {
generatePropertyData();
generateDetectionTestData();
generateFormattingTestData();
System.out.println("TLDs=\t" + Joiner.on(' ').join(LinkUtilities.TLDS));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is LinkUtilities.TLDS?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven’t the faintest idea, this print statement was at the beginning of generatePropertyData and wasn’t part of the generation of any of the three files, so I lifted it here to preserve the behaviour of this tool.

I guess it has something to do with top level domains ?

final String copyrightYear = dty.format(now);
generateLinkTerm(copyrightYear);
generateLinkEmail(copyrightYear);
generateLinkBracket(copyrightYear);
generateDetectionTestData(copyrightYear);
generateFormattingTestData(copyrightYear);
}

static final Instant now = Instant.now();
Expand Down Expand Up @@ -213,21 +218,24 @@ static void writePropHeader(
SimpleFormatter simpleFormatter,
String filename,
String propertyName,
String missingValue) {
String missingValue,
String copyrightYear) {
out.println(
simpleFormatter.format(
filename, dt.format(now), dty.format(now), propertyName, missingValue));
filename, dt.format(now), copyrightYear, propertyName, missingValue));
}

static void writeTestHeader(
PrintWriter out, SimpleFormatter simpleFormatter, String filename, String testName) {
out.println(simpleFormatter.format(filename, dt.format(now), dty.format(now), testName));
PrintWriter out,
SimpleFormatter simpleFormatter,
String filename,
String testName,
String copyrightYear) {
out.println(simpleFormatter.format(filename, dt.format(now), copyrightYear, testName));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious / probably for later: are dt.format(now) and copyrightYear used for the same output?

Copy link
Member Author

@eggrobin eggrobin Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dt.format(now) is for the Date: line; that one is actually the time of regeneration, but ignored for diffing.

copyrightYear is not ignored for diffing, so when using MakeUnicodeFiles it is from MakeUnicodeFiles.txt so that we don’t break on the 1st of January.

(Of course the emoji do break on the 1st of January, see #1273. I should fix that, timebombs in CI are annoying.)

}

/** Generate property data for the UTS */
static void generatePropertyData() {
System.out.println("TLDs=\t" + Joiner.on(' ').join(LinkUtilities.TLDS));

public static void generateLinkTerm(String copyrightYear) {
BagFormatter bf = new BagFormatter(LinkUtilities.IUP).setLineSeparator("\n");
bf.setShowLiteral(FIX_ODD);

Expand All @@ -236,62 +244,81 @@ static void generatePropertyData() {
bf.setValueSource(LinkTermination.PROPERTY);
bf.setLabelSource(LinkUtilities.IUP.getProperty(UcdProperty.Age));

try (final PrintWriter out =
FileUtilities.openUTF8Writer(LinkUtilities.DATA_DIR_DEV, "LinkTerm.txt"); ) {
writePropHeader(out, HEADER_PROP_TERM, "LinkTerm", "Link_Term", "Hard");
try (final var out = new DiffingPrintWriter(LinkUtilities.DATA_DIR_DEV, "LinkTerm.txt"); ) {
writePropHeader(
out.tempPrintWriter,
HEADER_PROP_TERM,
"LinkTerm",
"Link_Term",
"Hard",
copyrightYear);
for (LinkTermination propValue : LinkTermination.NON_MISSING) {
bf.showSetNames(out, propValue.base);
bf.showSetNames(out.tempPrintWriter, propValue.base);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When do we use out.tempPrintWriter vs. just out?

out.println("");
out.flush();
System.out.println(propValue + "=\t" + propValue.base.toPattern(false));
}

} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

public static void generateLinkEmail(String copyrightYear) {
BagFormatter bf = new BagFormatter(LinkUtilities.IUP).setLineSeparator("\n");
bf.setShowLiteral(FIX_ODD);
bf.setLabelSource(LinkUtilities.IUP.getProperty(UcdProperty.Age));
// LinkEmail.txt
bf.setValueSource(UnicodeLabel.NULL);
try (final PrintWriter out =
FileUtilities.openUTF8Writer(LinkUtilities.DATA_DIR_DEV, "LinkEmail.txt"); ) {
try (final var out =
new DiffingPrintWriter(LinkUtilities.DATA_DIR_DEV, "LinkEmail.txt"); ) {
writePropHeader(
out,
out.tempPrintWriter,
HEADER_PROP_BINARY,
"LinkEmail",
"Link_Email",
UcdPropertyValues.Binary.No.toString());
UcdPropertyValues.Binary.No.toString(),
copyrightYear);
UnicodeSet linkEmailSet = LinkUtilities.LinkEmail.getSet(UcdPropertyValues.Binary.Yes);
bf.showSetNames(out, linkEmailSet);
bf.showSetNames(out.tempPrintWriter, linkEmailSet);
System.out.println("LinkEmail=\t" + linkEmailSet.toPattern(false));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

public static void generateLinkBracket(String copyrightYear) {
BagFormatter bf = new BagFormatter(LinkUtilities.IUP).setLineSeparator("\n");
bf.setShowLiteral(FIX_ODD);
bf.setLabelSource(LinkUtilities.IUP.getProperty(UcdProperty.Age));
// LinkBracket.txt
bf.setValueSource(LinkUtilities.getLinkBracket());
bf.setHexValue(true);
bf.setShowDehexedValue(true);
try (final PrintWriter out =
FileUtilities.openUTF8Writer(LinkUtilities.DATA_DIR_DEV, "LinkBracket.txt"); ) {
writePropHeader(out, HEADER_PROP_STRING, "LinkBracket", "Link_Bracket", "<none>");
bf.showSetNames(out, LinkTermination.Close.base);
} catch (IOException e) {
throw new UncheckedIOException(e);
try (final var out =
new DiffingPrintWriter(LinkUtilities.DATA_DIR_DEV, "LinkBracket.txt"); ) {
writePropHeader(
out.tempPrintWriter,
HEADER_PROP_STRING,
"LinkBracket",
"Link_Bracket",
"<none>",
copyrightYear);
bf.showSetNames(out.tempPrintWriter, LinkTermination.Close.base);
}
}

/**
* The format of the test file sources are: source<TAB>expected OR just source If there is an
* expected value, then it is checked against what is generated.
*/
static void generateDetectionTestData() {
public static void generateDetectionTestData(String copyrightYear) {

OutputInt errorCount = new OutputInt();

try (final PrintWriter out =
FileUtilities.openUTF8Writer(
LinkUtilities.DATA_DIR_DEV, "LinkDetectionTest.txt"); ) {
writeTestHeader(out, HEADER_DETECT_TEST, "LinkDetectionTest", "LinkDetectionTest");
try (final var out =
new DiffingPrintWriter(LinkUtilities.DATA_DIR_DEV, "LinkDetectionTest.txt"); ) {
writeTestHeader(
out.tempPrintWriter,
HEADER_DETECT_TEST,
"LinkDetectionTest",
"LinkDetectionTest",
copyrightYear);

out.println("\n# Misc. test cases\n");

Expand Down Expand Up @@ -364,16 +391,20 @@ static void generateDetectionTestData() {
}
}

static void generateFormattingTestData() {
public static void generateFormattingTestData(String copyrightYear) {

OutputInt errorCount = new OutputInt();

try (final PrintWriter out =
FileUtilities.openUTF8Writer(
LinkUtilities.DATA_DIR_DEV, "LinkFormattingTest.txt"); ) {
writeTestHeader(out, HEADER_FORMAT_TEST, "LinkFormattingTest", "LinkFormattingTest");
try (final var out =
new DiffingPrintWriter(LinkUtilities.DATA_DIR_DEV, "LinkFormattingTest.txt"); ) {
writeTestHeader(
out.tempPrintWriter,
HEADER_FORMAT_TEST,
"LinkFormattingTest",
"LinkFormattingTest",
copyrightYear);

out.println("\n# Selected test cases\n");
out.tempPrintWriter.println("\n# Selected test cases\n");

List<String> comments = new ArrayList<>();
Output<Integer> lineCount = new Output<>(0);
Expand Down Expand Up @@ -439,7 +470,7 @@ static void generateFormattingTestData() {
comments.clear();
return;
}
outputTestCase(out, comments, internals, actual);
outputTestCase(out.tempPrintWriter, comments, internals, actual);
});

out.println("\n# Wikipedia test cases\n");
Expand Down Expand Up @@ -480,7 +511,7 @@ static void generateFormattingTestData() {

String actual = internals.minimalEscape(true, null);

outputTestCase(out, comments, internals, actual);
outputTestCase(out.tempPrintWriter, comments, internals, actual);
});
} catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Set;
import org.unicode.text.utility.DiffingPrintWriter;
import org.unicode.text.utility.Utility;
import org.unicode.tools.emoji.EmojiOrder.MajorGroup;

Expand Down Expand Up @@ -203,7 +204,7 @@ public UnicodeSet getMultiPersonGroupings() {
*
* @param reformatted
*/
public void showOrderingInterleaved(TempPrintWriter reformatted) {
public void showOrderingInterleaved(DiffingPrintWriter reformatted) {
showOrderingInterleaved(30, reformatted);
}

Expand Down
Loading
Loading