diff --git a/doc/release-notes/11485-mpconfig-personororg.md b/doc/release-notes/11485-mpconfig-personororg.md new file mode 100644 index 00000000000..c30ef3829c1 --- /dev/null +++ b/doc/release-notes/11485-mpconfig-personororg.md @@ -0,0 +1,7 @@ +The settings `dataverse.personOrOrg.assumeCommaInPersonName` and `dataverse.personOrOrg.orgPhraseArray` now support configuration via MicroProfile Config. + +They have been renamed to `dataverse.person-or-org.assume-comma-in-person-name` and `dataverse.person-or-org.org-phrase-array` for consistency with naming conventions. + +In addition to the existing `asadmin` JVM option method, any [supported MicroProfile Config API source](https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html) can now be used to set their values. + +For backwards compatibility, `dataverse.personOrOrg.assumeCommaInPersonName` is still supported. However, `dataverse.personOrOrg.orgPhraseArray` is not, due to a change in the expected value format. `dataverse.person-or-org.org-phrase-array` now expects a comma-separated list of phrases as a value instead of a JsonArray of strings. Please update both the name and value format if using the old setting. \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 200c3a3e342..97baf3e0c8e 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -65,5 +65,5 @@ Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine w The Dataverse software implements two jvm-options that can be used to tune the algorithm: -- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. -- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. +- :ref:`dataverse.person-or-org.assume-comma-in-person-name` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.person-or-org.org-phrase-array` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ac6b9e48347..16d96e42114 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3124,27 +3124,36 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_VIA", "REMOTE_ADDR" -.. _dataverse.personOrOrg.assumeCommaInPersonName: +.. _dataverse.person-or-org.assume-comma-in-person-name: -dataverse.personOrOrg.assumeCommaInPersonName -+++++++++++++++++++++++++++++++++++++++++++++ +dataverse.person-or-org.assume-comma-in-person-name ++++++++++++++++++++++++++++++++++++++++++++++++++++ Please note that this setting is experimental. The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. -.. _dataverse.personOrOrg.orgPhraseArray: +``./asadmin create-jvm-options '-Ddataverse.person-or-org.assume-comma-in-person-name=true'`` -dataverse.personOrOrg.orgPhraseArray -++++++++++++++++++++++++++++++++++++ +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_PERSON_OR_ORG_ASSUME_COMMA_IN_PERSON_NAME``. + +**Note:** This setting was previously called `dataverse.personOrOrg.assumeCommaInPersonName`, which is still available as an alias for backwards compatiblity. + +.. _dataverse.person-or-org.org-phrase-array: + +dataverse.person-or-org.org-phrase-array +++++++++++++++++++++++++++++++++++++++++ Please note that this setting is experimental. The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. -The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. +The value is expected to be a comma-separated list of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_PERSON_OR_ORG_ORG_PHRASE_ARRAY``. +**Note:** This setting was previously called `dataverse.personOrOrg.orgPhraseArray` and expected a JsonArray of strings. Please update both the name and value format if using the old setting. .. _dataverse.api.signature-secret: diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 07de576a0eb..0578dc4297b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -74,7 +74,7 @@ public enum JvmSettings { // INDEX CONCURENCY SCOPE_SOLR_CONCURENCY(SCOPE_SOLR, "concurrency"), MAX_ASYNC_INDEXES(SCOPE_SOLR_CONCURENCY, "max-async-indexes"), - + // RSERVE CONNECTION SCOPE_RSERVE(PREFIX, "rserve"), RSERVE_HOST(SCOPE_RSERVE, "host"), @@ -271,7 +271,12 @@ public enum JvmSettings { //CSL CITATION SETTINGS SCOPE_CSL(PREFIX, "csl"), CSL_COMMON_STYLES(SCOPE_CSL, "common-styles"), - + + // PersonOrOrgUtil SETTINGS + SCOPE_PERSONORORG(PREFIX, "person-or-org"), + ASSUME_COMMA_IN_PERSON_NAME(SCOPE_PERSONORORG, "assume-comma-in-person-name", "dataverse.personOrOrg.assumeCommaInPersonName"), + ORG_PHRASE_ARRAY(SCOPE_PERSONORORG, "org-phrase-array"), + // CORS SETTINGS SCOPE_CORS(PREFIX, "cors"), CORS_ORIGIN(SCOPE_CORS, "origin"), diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 80e32184731..a3039d99ff7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -4,12 +4,10 @@ import java.util.List; import java.util.logging.Logger; -import jakarta.json.JsonArray; +import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonString; -import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; /** @@ -42,8 +40,8 @@ public class PersonOrOrgUtil { static List orgPhrases; static { - setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); - setOrgPhraseArray(System.getProperty("dataverse.personOrOrg.orgPhraseArray", null)); + setAssumeCommaInPersonName(JvmSettings.ASSUME_COMMA_IN_PERSON_NAME.lookupOptional(Boolean.class).orElse(false)); + setOrgPhraseArray(JvmSettings.ORG_PHRASE_ARRAY.lookupOptional(String[].class).orElse(new String[]{})); } /** @@ -137,25 +135,16 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } // Public for testing - public static void setOrgPhraseArray(String phraseArray) { - orgPhrases = new ArrayList(); - if (!StringUtil.isEmpty(phraseArray)) { - try { - JsonArray phrases = JsonUtil.getJsonArray(phraseArray); - phrases.forEach(val -> { - JsonString strVal = (JsonString) val; - orgPhrases.add(strVal.getString()); - }); - } catch (Exception e) { - logger.warning("Could not parse Org phrase list"); - } + public static void setOrgPhraseArray(String[] phraseArray) { + if (phraseArray == null) { + orgPhrases = new ArrayList<>(); + } else { + orgPhrases = List.of(phraseArray); } - } // Public for testing public static void setAssumeCommaInPersonName(boolean assume) { assumeCommaInPersonName = assume; } - } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index d772ba2b9da..ea0c9756418 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -1,13 +1,17 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; import jakarta.json.JsonObject; +@LocalJvmSettings public class PersonOrOrgUtilTest { public PersonOrOrgUtilTest() { @@ -26,27 +30,41 @@ public void testOrganizationCOMPLEXName() { verifyIsOrganization("The Ford Foundation"); verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); - // The next example is one known to be asserted to be a Person without an entry - // in the OrgWordArray - // So we test with it in the array and then when the array is empty to verify - // the array works, resetting the array works, and the problem still exists in + // The next examples are known to be asserted to be a Person without an entry in the OrgWordArray + // So we test when no array is set via JvmSetting to verify the problem still exists in // the underlying algorithm - PersonOrOrgUtil.setOrgPhraseArray("[\"Portable\"]"); - verifyIsOrganization("Portable Antiquities of the Netherlands"); - PersonOrOrgUtil.setOrgPhraseArray(null); JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization("Portable Antiquities of the Netherlands", false, false); assertTrue(obj.getBoolean("isPerson")); + JsonObject obj2 = PersonOrOrgUtil.getPersonOrOrganization("Max Mustermann GmbH", false, false); + assertTrue(obj2.getBoolean("isPerson")); + } + + @Test + public void testOrganizationWithOrgPhraseArray() { + PersonOrOrgUtil.setOrgPhraseArray(new String[]{"Portable", "GmbH"}); + // The next examples are known to be asserted to be a Person without an entry in the OrgWordArray + // So we test with the array set via JvmSetting to verify the array works + verifyIsOrganization("Portable Antiquities of the Netherlands"); + verifyIsOrganization("Max Mustermann GmbH"); + PersonOrOrgUtil.setOrgPhraseArray(null); } @Test public void testOrganizationAcademicName() { + verifyIsOrganization("John Smith Center"); + verifyIsOrganization("John Smith Group"); + // An example the base algorithm doesn't handle: + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization("John Smith Project", false, false); + assertTrue(obj.getBoolean("isPerson")); + } - verifyIsOrganization("John Smith Center"); - verifyIsOrganization("John Smith Group"); - //An example the base algorithm doesn't handle: - PersonOrOrgUtil.setAssumeCommaInPersonName(true); - verifyIsOrganization("John Smith Project"); - PersonOrOrgUtil.setAssumeCommaInPersonName(false); + @Test + public void testOrganizationAcademicNameWithAssumeComma() { + PersonOrOrgUtil.setAssumeCommaInPersonName(true); + verifyIsOrganization("John Smith Center"); + verifyIsOrganization("John Smith Group"); + verifyIsOrganization("John Smith Project"); + PersonOrOrgUtil.setAssumeCommaInPersonName(false); }