Skip to content

UTF-8 and CJK error  #26

@nosgnoh

Description

@nosgnoh

Hi Kripken,

I have used your library in my project and see some issue but didn't know this issue belong to your lib or mine. So I log this issue there:

When I validate my xml file using xsd schema with format (utf-8). In xml file I have use some CJK characters and then the result was failed. I research some way to resolve but have no ideas. This is my schema and xml file:

`

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:usdm="http://usdm.asia/usdm">

<xs:simpleType name="idType">
    <xs:restriction base="xs:string">
        <xs:pattern value="[^A-Z]+"/>
    </xs:restriction>
</xs:simpleType>

<xs:simpleType name="sortType">
    <xs:restriction base="xs:string">
        <xs:pattern value="[^A-Z]+"/>
    </xs:restriction>
</xs:simpleType>

<xs:simpleType name="NOType">
    <xs:restriction base="xs:string">
        <xs:pattern value="[^a-z]*"/>
    </xs:restriction>
</xs:simpleType>

<xs:simpleType name="richcontentType">
    <xs:restriction base="xs:string"/>
</xs:simpleType>

<xs:complexType name="reasonType">
    <xs:sequence>
        <xs:element name="richcontent" type="richcontentType" minOccurs="0" />
    </xs:sequence>
    <xs:attribute name="id" type="idType" use="required"/>
    <xs:attribute name="sort" type="sortType" use="required"/>
    <xs:attribute name="NO" type="NOType" use="required"/>
</xs:complexType>

<xs:complexType name="descType">
    <xs:sequence>
        <xs:element name="richcontent" type="richcontentType" minOccurs="0" />
    </xs:sequence>
    <xs:attribute name="id" type="idType" use="required"/>
    <xs:attribute name="sort" type="sortType" use="required"/>
    <xs:attribute name="NO" type="NOType" use="required"/>
</xs:complexType>

<xs:complexType name="reqspecType">
    <xs:sequence>
        <xs:choice minOccurs="0" maxOccurs="unbounded">
            <xs:element name="group" type="groupType"/>
            <xs:element name="reqspec" type="reqspecType"/>
            <xs:element name="reason" type="reasonType" />
            <xs:element name="desc" type="descType"/>
        </xs:choice>
        <xs:sequence>
            <xs:element name="richcontent" type="richcontentType" minOccurs="0" />
            <xs:choice minOccurs="0" maxOccurs="unbounded">
                <xs:element name="group" type="groupType"/>
                <xs:element name="reqspec" type="reqspecType"/>
                <xs:element name="reason" type="reasonType" />
                <xs:element name="desc" type="descType"/>
            </xs:choice>
        </xs:sequence>
    </xs:sequence>
    <xs:attribute name="id" type="idType" use="required"/>
    <xs:attribute name="sort" type="sortType" use="required"/>
    <xs:attribute name="NO" type="NOType" use="required"/>
</xs:complexType>

<xs:complexType name="groupType">
    <xs:sequence>
        <xs:choice minOccurs="0" maxOccurs="unbounded">
            <xs:element name="group" type="groupType" />
            <xs:element name="reqspec" type="reqspecType"/>
        </xs:choice>
        <xs:sequence>
            <xs:element name="richcontent" type="richcontentType" minOccurs="0" />
            <xs:choice minOccurs="0" maxOccurs="unbounded">
                <xs:element name="group" type="groupType" />
                <xs:element name="reqspec" type="reqspecType"/>
            </xs:choice>
        </xs:sequence>
    </xs:sequence>
    <xs:attribute name="id" type="idType" use="required"/>
    <xs:attribute name="sort" type="sortType" use="required"/>
    <xs:attribute name="NO" type="NOType" use="required"/>
</xs:complexType>

<xs:complexType name="usdmType">
    <xs:sequence>
        <xs:element name="group" type="groupType" minOccurs="0" />
    </xs:sequence>
    <xs:attribute name="version" type="xs:string" use="required"/>
</xs:complexType>

<xs:element name="usdm" type="usdmType"/>

</xs:schema>
`

xml :
<?xml version="1.0" encoding="utf-8"?> <usdm version="0.0.0" xmlns:usdm="http://usdm.asia/usdm"> <group id="0" sort="0" NO="ROOT.0"> <richcontent>を</richcontent> </group> </usdm>

I realize from this page https://www.utf8-chartable.de/unicode-utf8-table.pl?start=12288&number=512&names=- that the characters begin
U+3081 | め | e3 82 81
to the end is failed with utf-8

Thank for your attention!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions