You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: example-schemas/transcription_metadata_v1.3_RG101B0000 - names, ages only.csvs
+6-5Lines changed: 6 additions & 5 deletions
Original file line number
Diff line number
Diff line change
@@ -19,7 +19,7 @@ last_date_day:
19
19
last_date_month:
20
20
last_date_year:
21
21
county:
22
-
ed_letter_code:
22
+
ED_letter_code:
23
23
borough:
24
24
registration_district:
25
25
sub_district:
@@ -29,19 +29,20 @@ street_name:
29
29
schedule_no:
30
30
sub_schedule_no:
31
31
surname: (regex("^((((([dDL][\?aeiou]([- ]?))|([dDAL](e)?\')|([dD]e([- ]?)[lL]a([- ]?))|(St(e?[- ]?))|([Vv][\?ao]n( ?)([Dd]e( ?))))|(M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15})([- ](((([dDL][\?aeiou]([- ])?)|([dDAL]\')|([dD]e([- ])?[lL]a([- ])?)|(St(e?[- ]?))|([Vv][\?ao]n( )?([Dd]e( ?))))|((M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15}))){0,1})$") and regex("^(.)*\S$")) or is("???") or is("*") @warning @optional
32
-
surname_other: (regex("^((((([dDL][\?aeiou]([- ]?))|([dDAL](e?)\')|([dD]e([- ]?)[lL]a([- ]?))|(St(e?[- ]?))|([Vv][\?ao]n( ?)([Dd]e( ?))))|(M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15})([-, ](((([dDL][\?aeiou]([- ])?)|([dDAL]')|([dD]e([- ])?[lL]a([- ])?)|(St(e?[- ]?))|([Vv][\?ao]n( )?([Dd]e( ?))))|((M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15}))){0,3})$") and regex("^(.)*\S$")) or is("???") or is("*") @warning @optional
32
+
//ORIGINAL CODE surname_other: (regex("^((((([dDL][\?aeiou]([- ]?))|([dDAL](e?)\')|([dD]e([- ]?)[lL]a([- ]?))|(St(e?[- ]?))|([Vv][\?ao]n( ?)([Dd]e( ?))))|(M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15})([-, ](((([dDL][\?aeiou]([- ])?)|([dDAL]')|([dD]e([- ])?[lL]a([- ])?)|(St(e?[- ]?))|([Vv][\?ao]n( )?([Dd]e( ?))))|((M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15}))){0,3})$") and regex("^(.)*\S$")) or is("???") or is("*") @warning @optional
33
+
surname_other: (regex("^((((([dDL][\?aeiou]([- ]?))|([dDAL](e?)\')|([dD]e([- ]?)[lL]a([- ]?))|(St(e?[- ]?))|([Vv][\?ao]n( ?)([Dd]e( ?))))|(M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15})([-, ,\,](((([dDL][\?aeiou]([- ])?)|([dDAL]')|([dD]e([- ])?[lL]a([- ])?)|(St(e?[- ]?))|([Vv][\?ao]n( )?([Dd]e( ?))))|((M[\?a]?[\?c]|M\'|O\'))?[\?A-Z][\?a-z]{2,15}))){0,3})$") and regex("^(.)*\S$")) or is("???") or is("*") @warning @optional //added acceptance of , between surnames : [-, ,\,]
33
34
//The regex used for both surname fields is identical, both are set as optional, so the check only applies if something is in the field, the other schema checks that the field is only filled when it should be
34
35
//The surname is divided into an optional prefix, eg Mc, Mac, De, De La, with varied capitalisation, with or without space or hyphen to separate it from the mandatory part of the name which must begin with a capital letter, and then be lowercase only, then we allow for a single additional "barrel" (with identical formatting rules), separated by a space or hyphen. Names with more barrells are sufficiently uncommon that it's probably worth double checking that one aprt isn't actually a middle name, or an amendment that's not been properly captured
35
36
forenames: regex("^(Rev\: )?(M[\?a]?[\?c]|M\'|O\')?[\?A-Z][\?a-z]{0,15}([- ]((M[\?a]?[\?c]|M\'|O\')?[\?A-Zdv][\?a-z]{0,15}))*( M\.A\.| B\.A\.)?$") or is("???") or is("*") @optional @warning
36
37
forenames_other: regex("^(Rev\: )?(M[\?a]?[\?c]|M\'|O\')?[\?A-Z][\?a-z]{0,15}([-, ]((M[\?a]?[\?c]|M\'|O\')?[\?A-Zdv][\?a-z]{0,15}))*( M\.A\.| B\.A\.)?$") or is("???") or is("*") @optional @warning
37
-
ovspi:
38
+
OVSPI:
38
39
gender:
39
40
birth_date_day:
40
41
birth_date_month:
41
-
birth_date_year: if(positiveInteger,if($marital_status/is("*") or $marital_status/is("?") or $marital_status/is("single"),range(1845,1939),if($marital_status/is("married") or $marital_status/is("divorced") or $marital_status/is("widowed"),range(1845,1923))))
42
+
birth_date_year: if(positiveInteger,if($marital_status/is("*") or $marital_status/is("?") or $marital_status/is("single"),range(1845,1939),if($marital_status/is("married") or $marital_status/is("divorced") or $marital_status/is("widowed"),range(1840,1923)))) //changed from range(1845,1923)
metadata_type: is("PIECE") or is("ITEM") or is("ITEM_TITLEPAGE") or is("ITEM_MAINPAGE") or is("ITEM_CONPAGE") or is("SUBITEM_NAME") or is("SUBITEM_CONNAME") or is("SUBITEM_VACANT") or is("SUBITEM_BLANK") or is("SUBITEM_REMOVED") or is("SUBITEM_QADDRESS")
26
26
//This check prescribes the acceptable values for metadata_type (this value is then used by most of the rest of the checks to define acceptable values for that type of metadata row
27
27
file_path: if($metadata_type/starts("ITEM_"), uri and regex("^file:\/\/\/RG_101\/[0-9]{1,5}[A-Z]\/[0-9]{1,3}\/[0-9]{1,5}[A-Z]_[0-9]{1,3}_[0-9]{1,4}.+\.jp2$") and unique fileExists, is(""))
28
-
//For item rows we expect to have the JP2 image, this should be given in the form of a URI, which is of the form file:///RG_101/{piece}/{item}/{piece}_{item}_0001.jp2 (values in {} are those for the appropriate field of the same name), the file_path must also be unique, and we check that the file actually exists (so a path substitution will be required)
28
+
//For item rows we expect to have the JP2 image, this should be given in the form of a URI, which is of the form file:///RG_101/{piece}/{item}/{piece}_{item}_0001.jp2 (values in {} are those for the appropriate field of the same name), the file_path must also be unique
29
29
page_number: if($metadata_type/regex("(ITEM_MAINPAGE)|(ITEM_CONPAGE)"), regex("^([0-9a-zA-Z]{1,4})$") or is("missing"), is(""))
30
30
//For actual register pages with entries on them (as opposed to cover pages), we should a have a page_number, transcribed from the top right hand corner of the page (lost in redaction), 1-4 alphanumeric characters are allowed (as often additional pages were added in later and so on), or the value 2missing" if the page number was not present for some reason
31
31
volume_number: if($metadata_type/is("PIECE"), regex("^[A-Z]{4}\s\-\s[A-Z]{4}$") or regex("^[A-Z]{4}\s\-\s[A-Z]{3}$") or regex("^[A-Z]{2}\s\([A-Z]{2}\)$") or regex("^[0-9]{0,5}(\s){0,1}[A-Z]{2}(\s){0,1}\([A-Z]{2}(\s){0,1}\-(\s){0,1}[A-Z]{2,3}\)$") or regex("^[0-9]{0,5}(\s)?[A-Z]{2}\([A-Z]{2}\)(\s)?\-(\s)?[A-Z]{2}\([A-Z]{2}\)$"), is(""))
//The various date fields allow us to construct a covering date for this information - we are suing just the date on which the original register was compiled, and not making any attempt to capture when the data was subsequently updated when the register was in use
//County is data derived by DCTFH based on the enumeration district. We are merely checking that there are at least 3 alphanumeric characters here, we are not attempting to introduce a controlled vocab
//Like house_number, for sub_item rows other than SUBITEM_BLANK, at least one of house_number and house_name must be populated (having both is permitted). The only real restriction on a name is that it must have at least two characters
//Again for sub_items other than SUBITEM_BLANK this field must have at least three chracters of any sort (though in fact it's optional, as not everywhere has named streets, so only a field with only 1 or 2 characters would actually produce an error!)
59
+
//Again for sub_items other than SUBITEM_BLANK this field must have at least three characters of any sort (though in fact it's optional, as not everywhere has named streets, so only a field with only 1 or 2 characters would actually produce an error!)
//Within each enumeration district the enumerator numbered the schedules issued to each individual household beginning at 1. Occasionally duplicate schedule numbers were accidentally created, these were corrected at the time by adding an alphabetic suffix, or some have only just been found where we distinguish by adding DUPnn (where nn is a numeric with 1 or 2 digits)
62
62
sub_schedule_no: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_QADDRESS)"), range(1,613) and unique($piece,$schedule_no,$sub_schedule_no), if($metadata_type/is("SUBITEM_CONNAME"),range(1,613),is("")))
63
-
//Within each individual household, each individual person was given a sub_schedule_no - this only applied to SUBITEM types where individuals actaully lived there, so if the property was vacant etc, this field will be blank. Normally we expect that the combination of piece, schedule_no and sub_schedule_no should be unique, but if it's a continuation sub_item then by definition that combination exists elsewhere
63
+
//Within each individual household, each individual person was given a sub_schedule_no - this only applied to SUBITEM types where individuals actually lived there, so if the property was vacant etc, this field will be blank. Normally we expect that the combination of piece, schedule_no and sub_schedule_no should be unique, but if it's a continuation sub_item then by definition that combination exists elsewhere
64
64
surname: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or is("???") or length(1,*), is(""))
65
65
//Here we just check that something is filled in for surname for relevant SUBITEM types - see separate schema for some more detailed checking
66
66
surname_other: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or is("???") or length(1,*), is("")) @optional
//Here we just check that something is filled in for forenames for relevant SUBITEM types - see separate schema for some more detailed checking
70
70
forenames_other: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or is("???") or length(1,*), is("")) @optional
71
71
//Here if forenames have been amended at some point this field should be populated
72
-
ovspi: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("Officer") or is("Visitor") or is("Servant") or is("Patient") or is("Inmate") or is("?") or is("*"), is(""))
72
+
OVSPI: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("Officer") or is("Visitor") or is("Servant") or is("Patient") or is("Inmate") or is("?") or is("*"), is(""))
73
73
//Records the data recorded in one of the original form columns, expanded from the single character originally used according to the detailed instructions to enumerators, again only required for appropriate surname types
74
74
gender: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("male") or is("female") or is("*"))
75
75
birth_date_day: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"), is("")) //validator will fail if you open csv template as expecting single digit days to have a leading 0 and Excel removes it
//Like OVSPI, expanded from the single character used in the original register to the full word the character represents
81
81
occupation: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or length(3,*), is(""))
82
82
//a fairly freeform field as no sort of controlled vocab was used in the register
83
-
instructions: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or length(3,*) or is("continuation") or regex("^([0-9a-zA-Z]{1,4})$") , is("")) @optional
83
+
refers_to: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("continuation") or regex("^([0-9a-zA-Z]{1,4})$") , is("")) @optional
84
+
//range(1,200) replaced by the regex to allow refs to pages in other registers, this field also records membership of Home Guard, Auxiliary Fire Service, ARP etc - added by AG 8th Oct 2015
85
+
//code for instructions: if($metadata_type/regex("(SUBITEM_NAME)|(SUBITEM_CONNAME)|(SUBITEM_QADDRESS)"), is("*") or length(3,*) or is("continuation") or regex("^([0-9a-zA-Z]{1,4})$") , is("")) @optional
84
86
//range(1,200) replaced by the regex to allow refs to pages in other registers, this field also records membership of Home Guard, Auxiliary Fire Service, ARP etc
//It was believed that there was potential for some records to show that a person had been adopted, if this does happen, it should be recorded in this field and the relevant line redacted in perpertuity
88
+
//It was believed that there was potential for some records to show that a person had been adopted, if this does happen, it should be recorded in this field and the relevant line redacted in perpetuity
0 commit comments