diff --git a/src/main/java/de/uni_hamburg/corpora/validation/quest/XsdChecker.java b/src/main/java/de/uni_hamburg/corpora/validation/quest/XsdChecker.java index c49846ea58b6aa4ae148790f54889a27f8839f2b..2296818afe45c2be054d4705a0030f75ac6c262c 100644 --- a/src/main/java/de/uni_hamburg/corpora/validation/quest/XsdChecker.java +++ b/src/main/java/de/uni_hamburg/corpora/validation/quest/XsdChecker.java @@ -11,13 +11,15 @@ import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; +import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.SchemaFactory; import javax.xml.xpath.XPathExpressionException; -import java.io.File; -import java.io.IOException; +import java.io.*; import java.lang.reflect.Modifier; import java.net.URISyntaxException; import java.security.NoSuchAlgorithmException; @@ -29,6 +31,8 @@ public class XsdChecker extends Checker implements CorpusFunction { private final Logger logger = Logger.getLogger(getFunction()); + private Map<String,String> schemas = new HashMap<>(); + static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; static final String W3C_XML_SCHEMA = @@ -36,6 +40,10 @@ public class XsdChecker extends Checker implements CorpusFunction { public XsdChecker(Properties properties) { super(false, properties); + // Map for external schema files needed if the schema is not linked in the file format + schemas.put(ELANData.class.getSimpleName(),"xsd/eaf.xsd"); + schemas.put(EXMARaLDATranscriptionData.class.getSimpleName(),"xsd/exmaralda_exb.xsd"); + schemas.put(EXMARaLDASegmentedTranscriptionData.class.getSimpleName(),"xsd/exmaralda_exs.xsd"); } @Override @@ -48,9 +56,20 @@ public class XsdChecker extends Checker implements CorpusFunction { Report report = new Report(); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); - dbf.setValidating(true); try { - dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); + // Add external schema if necessary + if (schemas.containsKey(cd.getClass().getSimpleName())) { + logger.info(schemas.get(cd.getClass().getSimpleName())); + InputStream is = + this.getClass().getClassLoader().getResourceAsStream(schemas.get(cd.getClass().getSimpleName())); + dbf.setSchema(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI) + .newSchema(new StreamSource(is))); + } + else { + // Otherwise set the schema language + dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); + dbf.setValidating(true); + } DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(new ErrorHandler() { @Override diff --git a/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/coma.xsd b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/coma.xsd new file mode 100644 index 0000000000000000000000000000000000000000..559f75a6ec1d7a1ed5ae4da0880fe23d0390756c --- /dev/null +++ b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/coma.xsd @@ -0,0 +1,383 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- edited with XMLSPY v2004 rel. 3 U (http://www.xmlspy.com) by eval (eval) --> +<!-- schema version 0.5a040130 --> +<!--changes: + 2.7_20111114: AsocFileType also for speakers and corpora. + 2.6_20110125: AsocFileType. Für Hanna. + 2.5_20101222: replaced all file/URL/path references with FileType (HH) + 2.1_20101216: moved Availability from Recording to Media (HH), renamed "Filestores" to "Mirrors" + 2.0: added "role" as replacement for speaker-linking via "setting" + 1.6_20100412: added FileType for attaching files + 1.6_20090326: elements below personType do not need a specific order anymore. removed "related person"-element, made PeriodExact optional + 1.5_20081215: changed schema so Communication and Speakers can appear in any sequence + changed RecodringDuration to milliseconds (xs:long type) + 1.1_20080603: added "Annotation" and "AnnotationType" to "Transcription" to use it with "sextant" + 1.0_20080402: changed "known human" from mandatory to optional since nobody understands it anyway + 0.9_20070320: changed CorpusData from xs:sequence to xs:choice maxOccurs="unbounded" to allow speaker / comm-childs unordered + 0.8_20070320: added attribute "uniqueSpeakerDistinction" to CorpusType for usage with the PartiturEditor + 0.7_20070222: added attribute "Type" to Languagetype + 0.6_20070222: added attribute "Type" to Location-Complextype + 0.5a_20040130 - initial +--> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="qualified"> + <xs:element name="Corpus"> + <xs:annotation> + <xs:documentation>Mother Of All Metadata Elements (MOAME(tm))</xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:complexContent> + <xs:restriction base="CorpusType"> + <xs:sequence> + <xs:element name="DBNode" minOccurs="0"> + <xs:annotation> + <xs:documentation>deprecated: forgot it's purpose</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Description" type="DescriptionType" minOccurs="0"> + <xs:annotation> + <xs:documentation>description of the (sub)corpus</xs:documentation> + </xs:annotation> + </xs:element> + <xs:choice minOccurs="0"> + <xs:element name="CorpusData" type="CorpusData"/> + <xs:element name="Corpus" type="CorpusType" maxOccurs="unbounded"/> + </xs:choice> + </xs:sequence> + </xs:restriction> + </xs:complexContent> + </xs:complexType> + </xs:element> + <xs:complexType name="LocationType"> + <xs:annotation> + <xs:documentation>type for storing addresses</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="Street" type="xs:string" minOccurs="0"/> + <xs:element name="City" type="xs:string" minOccurs="0"/> + <xs:element name="PostalCode" type="xs:string" minOccurs="0"/> + <xs:element name="Country" type="xs:string" minOccurs="0"/> + <xs:element name="Period" type="PeriodType" minOccurs="0"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + </xs:all> + <xs:attribute name="Type" type="xs:string" use="optional"/> + </xs:complexType> + <xs:simpleType name="non-empty-string"> + <xs:annotation> + <xs:documentation>should not be empty ;)</xs:documentation> + </xs:annotation> + <xs:restriction base="xs:string"> + <xs:minLength value="1"/> + </xs:restriction> + </xs:simpleType> + <xs:complexType name="CorpusType"> + <xs:annotation> + <xs:documentation>Type for storing corpus information</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="DBNode" minOccurs="0"> + <xs:annotation> + <xs:documentation>link to the node containing the corpus</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Description" type="DescriptionType" minOccurs="0"> + <xs:annotation> + <xs:documentation>description of the (sub)corpus</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Mirrors" type="DescriptionType" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation>description can hold links to mirrored versions of the + corpus</xs:documentation> + </xs:annotation> + </xs:element> + <xs:choice minOccurs="0"> + <xs:element name="CorpusData" type="CorpusData"/> + <xs:element name="Corpus" type="CorpusType" maxOccurs="unbounded"/> + </xs:choice> + <xs:element name="AsocFile" type="AsocFileType" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + <xs:attribute name="Name" type="non-empty-string" use="required"/> + <xs:attribute name="Id" type="xs:ID" use="required"/> + <xs:attribute name="Parent" type="xs:IDREF" use="optional"/> + <xs:attribute name="uniqueSpeakerDistinction" type="xs:string" use="optional"> </xs:attribute> + <xs:attribute name="schemaVersion" type="xs:string" use="required"/> + </xs:complexType> + <xs:complexType name="RecordingType"> + <xs:annotation> + <xs:documentation>type for storing information about recordings</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="Name" type="xs:string" minOccurs="0"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="Media" type="MediaType" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation>deprecated: use File Element instead</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="File" type="FileType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="RecordingDateTime" type="xs:dateTime" minOccurs="0"/> + <xs:element name="RecordingDuration" type="xs:long" minOccurs="0"/> + <xs:element name="Availablilty" type="AvailabilityType" minOccurs="0"> + <xs:annotation> + <xs:documentation>deprecated: exists inside File</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + <xs:complexType name="AvailabilityType"> + <xs:annotation> + <xs:documentation>type for storing copyright/obtaining data</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="Available" type="xs:boolean"/> + <xs:element name="URL" type="xs:anyURI" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="Copyright" type="xs:string" minOccurs="0"/> + <xs:element name="ObtainingInformation" type="DescriptionType" minOccurs="0"/> + </xs:sequence> + </xs:complexType> + <xs:complexType name="DescriptionType"> + <xs:annotation> + <xs:documentation>type for storing descriptions for various elements</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="Key" type="KeyType" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + <xs:complexType name="MediaType"> + <xs:annotation> + <xs:documentation>deprecated: use File type instead</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="FileStore" type="xs:string" minOccurs="0"/> + <xs:element name="Filename" minOccurs="0"/> + <xs:element name="NSLink" type="xs:anyURI" minOccurs="0"/> + <xs:element name="LastBackup" type="xs:date" minOccurs="0"/> + <xs:element name="Availability" type="AvailabilityType" minOccurs="0"/> + </xs:all> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + <xs:complexType name="FileType"> + <xs:annotation> + <xs:documentation>a file in the filesystem</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="Availability" type="AvailabilityType"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="filename" type="xs:string" minOccurs="0"/> + <xs:element name="mimetype" type="xs:string" minOccurs="0"/> + <xs:element name="relPath" type="xs:string" minOccurs="0"/> + <xs:element name="absPath" type="xs:anyURI" minOccurs="0"/> + <xs:element name="URL" type="xs:anyURI" minOccurs="0"/> + </xs:all> + <xs:attribute name="Id" type="xs:ID" use="required"> + <xs:annotation> + <xs:documentation>hanna wants to get rid of this...</xs:documentation> + </xs:annotation> + </xs:attribute> + </xs:complexType> + <xs:complexType name="PersonType"> + <xs:annotation> + <xs:documentation>type for storing speaker descriptions</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:choice maxOccurs="unbounded"> + <xs:element name="Sigle" type="xs:string" minOccurs="1" maxOccurs="1"/> + <xs:element name="KnownHuman" type="xs:boolean" default="true" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation>deprecated: didn't turn out to be + useful</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Pseudo" type="xs:string" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation>Can hold names, but names should always be + pseudos</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Sex" type="xs:anySimpleType" minOccurs="0" maxOccurs="1"/> + <xs:element name="Location" type="LocationType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0" maxOccurs="1"/> + <xs:element name="Language" type="LanguageType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="role" type="roleType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="AsocFile" type="AsocFileType" minOccurs="0" maxOccurs="unbounded"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + + <xs:complexType name="AsocFileType"> + <xs:annotation> + <xs:documentation>type for storing any associated file</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="Name"/> + <xs:element name="File" type="FileType"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + </xs:sequence> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + + <xs:complexType name="TranscriptionType"> + <xs:annotation> + <xs:documentation>type for storing transcript-information</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:element name="Name"/> + <xs:element name="File" type="FileType"/> + <xs:element name="FileStore" type="xs:string" minOccurs="0"> + <xs:annotation> + <xs:documentation>deprecated: forgot it's purpose</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Filename"> + <xs:annotation> + <xs:documentation>deprecated: now uses File type</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="NSLink" type="xs:anyURI"> + <xs:annotation> + <xs:documentation>deprecated: now uses File type</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="Availability" type="AvailabilityType" minOccurs="0"> + <xs:annotation> + <xs:documentation>deprecated: now uses File type</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Annotation" type="AnnotationType" minOccurs="0"/> + </xs:sequence> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + <xs:complexType name="AnnotationType"> + <xs:annotation> + <xs:documentation>type for linking to annotation files</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="Description" type="DescriptionType"/> + <xs:element name="File" type="FileType"/> + </xs:all> + <xs:attribute name="Id" type="xs:ID" use="required"/> + <xs:attribute name="Name" type="xs:string"/> + </xs:complexType> + <xs:complexType name="LanguageType"> + <xs:annotation> + <xs:documentation>type for storing languages</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="LanguageCode" type="xs:string"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + </xs:all> + <xs:attribute name="Type" type="xs:string" use="optional"/> + </xs:complexType> + <xs:complexType name="CommunicationType"> + <xs:annotation> + <xs:documentation>type for storing Session Data. [06.10.03] Media deleted, Object + added</xs:documentation> + </xs:annotation> + <xs:sequence maxOccurs="unbounded"> + <xs:choice> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="Setting"> + <xs:complexType> + <xs:complexContent> + <xs:extension base="SettingType"/> + </xs:complexContent> + </xs:complexType> + </xs:element> + <xs:element name="Recording" type="RecordingType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="Transcription" type="TranscriptionType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="Location" type="LocationType" minOccurs="0"/> + <xs:element name="Language" type="LanguageType" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="File" type="FileType" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation>deprecated: now uses AsocFile</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="AsocFile" type="AsocFileType" minOccurs="0" maxOccurs="unbounded"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="Id" type="xs:ID" use="required"/> + <xs:attribute name="Name" type="xs:string" use="required"/> + </xs:complexType> + <xs:complexType name="CorpusData"> + <xs:annotation> + <xs:documentation>actual corpus data</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:choice maxOccurs="unbounded"> + <xs:element name="Communication" type="CommunicationType" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation>=Session=Discourse (im Moment am ehesten IMDI, im Grunde + nur Location Data) </xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="Speaker" type="PersonType" minOccurs="0" maxOccurs="unbounded"/> + </xs:choice> + </xs:sequence> + </xs:complexType> + <xs:complexType name="KeyType"> + <xs:annotation> + <xs:documentation>key/value pairs fpr storing all kinds of + information</xs:documentation> + </xs:annotation> + <xs:simpleContent> + <xs:extension base="xs:string"> + <xs:attribute name="Name" type="xs:string" use="required"/> + </xs:extension> + </xs:simpleContent> + </xs:complexType> + <xs:complexType name="ObjectType"> + <xs:annotation> + <xs:documentation>Objects used/present in a communication</xs:documentation> + </xs:annotation> + <xs:all> + <xs:element name="Name" type="xs:string"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="Availability" type="AvailabilityType" minOccurs="0"/> + </xs:all> + <xs:attribute name="Id" type="xs:ID" use="required"/> + </xs:complexType> + <xs:complexType name="SettingType"> + <xs:annotation> + <xs:documentation>Setting of a recording (communication?)</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:choice maxOccurs="unbounded"> + <xs:element name="Person" type="xs:IDREF" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + <xs:element name="Object" type="ObjectType" minOccurs="0" maxOccurs="unbounded"/> + </xs:choice> + </xs:sequence> + </xs:complexType> + <xs:complexType name="PeriodType"> + <xs:annotation> + <xs:documentation>marks a period of time</xs:documentation> + </xs:annotation> + <xs:all minOccurs="0"> + <xs:element name="PeriodStart" type="xs:dateTime" minOccurs="0"/> + <xs:element name="PeriodExact" type="xs:boolean" minOccurs="0"/> + <xs:element name="PeriodDuration" type="xs:long" minOccurs="0"/> + </xs:all> + </xs:complexType> + <xs:complexType name="roleType"> + <!-- + role types starting with a '#' are used coma-internal + + pre-defined role types: + #participant = speaker participating in a communication; replaces linking in settings + --> + <xs:annotation> + <xs:documentation>Role of Speakers (and potentially other datatypes)</xs:documentation> + </xs:annotation> + <xs:sequence> + <xs:choice maxOccurs="unbounded"> + <xs:element name="Description" type="DescriptionType" minOccurs="0"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="Type" type="xs:string" use="optional"/> + <xs:attribute name="target" type="xs:IDREF" use="required"/> + </xs:complexType> +</xs:schema> \ No newline at end of file diff --git a/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/eaf.xsd b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/eaf.xsd new file mode 100644 index 0000000000000000000000000000000000000000..fdc05e23729a50ed20343acb1527a57c50a0cbb7 --- /dev/null +++ b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/eaf.xsd @@ -0,0 +1,816 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + VERSION 3.0 + DATE December 2016 + - added two referential link elements, CROSS_REF_LINK and GROUP_REF_LINK, contained in + sets of such elements, REF_LINK_SET. + + VERSION 2.8 + DATE April 2014 + - changes that add support for multilingual controlled vocabularies and for associating tiers + and annotations with a specific language + - added new element LANGUAGE + - changed the structure of CONTROLLED_VOCABULARY and CV_ENTRY elements + - a cv entry can now have multiple CVE_VALUE child nodes with a language reference + - introduction of a LICENSE element + - added an EXT_REF attribute on the TIER level, so that e.g. a data category reference can be specified on + the tier level (overriding the one specified on the TYPE level) + + VERSION 2.7 + DATE December 2010 + - new elements and attributes where added in relation to + - support for externally defined controlled vocabularies. A new possible root element CV_RESOURCE + has been added for such vocabularies in an eaf like xml file. Annotations can hold a reference + to the id of ean entry in an external CV. + - a new element for storing information about a lexicon and about a link to an entry or a field + in a lexicon has been added. A linguistic type can be associated with a lexicon or a field / + data category in a lexicon + + VERSION 2.6 + DATE May 2008 + - added elements and attributes for references to concepts defined in the ISO Data Category Registry + and possibly/eventually other external resources. + - attribute EXT_REF added to type annotationAttribute, to elements CV_ENTRY and LINGUISTIC_TYPE + - element EXTERNAL_REF with attributes EXT_REF_ID, TYPE and VALUE + + DATE November 2007 + - added optional attributes: RELATIVE_MEDIA_URL to MEDIA_DESCRIPTOR and RELATIVE_LINK_URL to + LINKED_FILE_DESCRIPTOR for storage of relative url's + - changed the FORMAT from fixed to default, and from 2.4. to 2.5 + + DATE December 2006 + - added attribute: ANNOTATOR to element TIER + - added element: PROPERTY to element HEADER + - changed the type of attribute SVG_REF of ALIGNABLE_ANNOTATION to xsd:string since + it does not refer to an ID in the same file + - changed the type of the TIME_ALIGNABLE and GRAPHIC_REFERENCES attributes of the LINGUISTIC_TYPE + element to type="xsd:boolean" (was xsd:string) + - changed the ID/IDREF mechanism for the combinations of: + - TIER/TIER_ID and TIER/PARENT_REF + - LINGUISTIC_TYPE/LINGUISTIC_TYPE_ID and TIER/LINGUISTIC_TYPE_REF + - CONTROLLED_VOCABULARY/CV_ID and LINGUISTIC_TYPE/CONTROLLED_VOCABULARY_REF + into pairs of xsd:key and xsd:keyref elements. + The advantage is that the ID's only have to be unique per element type (e.g. TIER_ID's + should be unique within the TIER elements but can be the same as a LINGUISTIC_TYPE_ID) + and that there are no constraints on characters that can be used in id's/names. +--> +<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + ELAN Annotation Format + version 3.0 + December 2016 + Schema by Alexander Klassmann 17/01/03 + Adapted by Hennie Brugman, Han Sloetjes, Micha Hulsbosch + </xsd:documentation> + </xsd:annotation> + + <xsd:element name="ANNOTATION_DOCUMENT"> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="LICENSE" type="licenseType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="HEADER" type="headType"/> + <xsd:element name="TIME_ORDER" type="timeType"/> + <xsd:element name="TIER" type="tierType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="LINGUISTIC_TYPE" type="lingType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="LOCALE" type="localeType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="CONSTRAINT" type="constraintType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="0" maxOccurs="unbounded"> + <xsd:key name="cvEntryKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The entry id should be unique within the collection of entry elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CV_ENTRY_ML"/> + <xsd:field xpath="@CVE_ID"/> + </xsd:key> + </xsd:element> + <xsd:element name="LEXICON_REF" type="lexRefType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="REF_LINK_SET" type="refLinksType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="DATE" type="xsd:dateTime" use="required"/> + <xsd:attribute name="AUTHOR" type="xsd:string" use="required"/> + <xsd:attribute name="VERSION" type="xsd:string" use="required"/> + <xsd:attribute name="FORMAT" type="xsd:string" use="optional" default="3.0"/> + </xsd:complexType> + + <!-- define key - keyref pairs --> + <xsd:key name="tierNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Tier name/id should be unique within the collection + of Tier elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER"/> + <xsd:field xpath="@TIER_ID"/> + </xsd:key> + <xsd:keyref name="tierNameRef" refer="tierNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + A Tier can be associated with a parent Tier by referring to an existing Tier id. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER"/> + <xsd:field xpath="@PARENT_REF"/> + </xsd:keyref> + + <xsd:key name="linTypeNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Linguistic Type name/id should be unique within the collection + of Linguistic Type elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="LINGUISTIC_TYPE"/> + <xsd:field xpath="@LINGUISTIC_TYPE_ID"/> + </xsd:key> + <xsd:keyref name="linTypeNameRef" refer="linTypeNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + A Tier must refer to an existing Linguistic Type id. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER"/> + <xsd:field xpath="@LINGUISTIC_TYPE_REF"/> + </xsd:keyref> + + <xsd:key name="cvNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Controlled Vocabulary name/id should be unique within the + collection of Controlled Vocabulary elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CONTROLLED_VOCABULARY"/> + <xsd:field xpath="@CV_ID"/> + </xsd:key> + <xsd:keyref name="cvNameRef" refer="cvNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + A Linguistic Type can be associated with a Controlled Vocabulary by + referring to an existing Controlled Vocabulary id. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="LINGUISTIC_TYPE"/> + <xsd:field xpath="@CONTROLLED_VOCABULARY_REF"/> + </xsd:keyref> + + <xsd:key name="lexNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Lexicon Service name/id should be unique within the + collection of Lexicon Service elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="LEXICON_REF"/> + <xsd:field xpath="@LEX_REF_ID"/> + </xsd:key> + <xsd:keyref name="lexNameRef" refer="lexNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + A Linguistic Type can be associated with a Lexicon Service by + referring to an existing Lexicon Service id. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="LINGUISTIC_TYPE"/> + <xsd:field xpath="@LEXICON_REF"/> + </xsd:keyref> + + <!-- added in 2.8 but unrelated to the introduction of new elements and attributes --> + <!-- previous annotation reference --> + <xsd:key name="prevAnnoKey"> + <xsd:annotation> + <xsd:documentation> + A key and keyref pair to enforce that a previous annotation idref at least refers + to an annotation id of a reference annotation. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/> + <xsd:field xpath="@ANNOTATION_ID"/> + </xsd:key> + <xsd:keyref name="prevAnnoRef" refer="prevAnnoKey"> + <xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/> + <xsd:field xpath="@PREVIOUS_ANNOTATION"/> + </xsd:keyref> + <!-- time slot references --> + <xsd:key name="timeSlotKey"> + <xsd:annotation> + <xsd:documentation> + Two key-keyref pairs to enforce that time slot references refer to the id of a time slot. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIME_ORDER/TIME_SLOT"/> + <xsd:field xpath="@TIME_SLOT_ID"/> + </xsd:key> + <xsd:keyref name="timeSlotRef1" refer="timeSlotKey"> + <xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/> + <xsd:field xpath="@TIME_SLOT_REF1"/> + </xsd:keyref> + <xsd:keyref name="timeSlotRef2" refer="timeSlotKey"> + <xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/> + <xsd:field xpath="@TIME_SLOT_REF2"/> + </xsd:keyref> + + <!-- introduced in 2.8 --> + <xsd:key name="langIdKey"> + <xsd:annotation> + <xsd:documentation> + The ID of a language identifier, can be referred to by any element that + needs a reference to a language identifier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="LANGUAGE"/> + <xsd:field xpath="@LANG_ID"/> + </xsd:key> + <xsd:keyref name="cvValueLangRef" refer="langIdKey"> + <xsd:annotation> + <xsd:documentation> + Reference from a value in a multilingual CV to a language identifier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CONTROLLED_VOCABULARY/CV_ENTRY_ML/CVE_VALUE"/> + <xsd:field xpath="@LANG_REF"/> + </xsd:keyref> + <xsd:keyref name="cvDescLangRef" refer="langIdKey"> + <xsd:annotation> + <xsd:documentation> + Reference from a description in a multilingual CV to a language identifier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CONTROLLED_VOCABULARY/DESCRIPTION"/> + <xsd:field xpath="@LANG_REF"/> + </xsd:keyref> + <xsd:keyref name="tierLangRef" refer="langIdKey"> + <xsd:annotation> + <xsd:documentation> + Reference from a tier to a language identifier, to indicate the (main) language recorded + on that tier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER"/> + <xsd:field xpath="@LANG_REF"/> + </xsd:keyref> + <xsd:keyref name="annoAlignLangRef" refer="langIdKey"> + <xsd:annotation> + <xsd:documentation> + Reference from an individual alignable annotation to a language identifier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/> + <xsd:field xpath="@LANG_REF"/> + </xsd:keyref> + <xsd:keyref name="annoRefLangRef" refer="langIdKey"> + <xsd:annotation> + <xsd:documentation> + Reference from an individual reference annotation to a language identifier. + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/> + <xsd:field xpath="@LANG_REF"/> + </xsd:keyref> + <!-- + Since we try to describe that the @CVE_IDs are unique within the CONTROLLED_VOCABULARY, + the xsd:key element must be located just inside the CONTROLLED_VOCABULARY. + <xsd:key name="cvEntryKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The entry id should be unique within the collection of entry elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CV_ENTRY_ML"/> + <xsd:field xpath="@CVE_ID"/> + </xsd:key> + --> + <!-- + Getting from the CVE_REF to the appropriately matching CVE_ID isn't so simple! + It probably can't be done in XPath, never mind the more restricted version that is + allowed here. + http://www.w3.org/TR/2004/PER-xmlschema-1-20040318/structures.html#coss-identity-constraint + + TIER/ALIGNABLE_ANNOTATION/@CVE_REF/../../@LINGUISTIC_TYPE_REF => call this value x + search for a value equal to x in + LINGUISTIC_TYPE/@LINGUISTIC_TYPE_ID . When found, take (relative to that) + ../@CONTROLLED_VOCABULARY_REF => call this value y + search for a value equal to y in + CONTROLLED_VOCABULARY/@CV_ID and this is the CONTROLLED_VOCABULARY which should + contain (in CVE_ENTRY_ML/@CVE_ID) the value from @CVE_REF. + + + A weaker check could just try to find any matching CONTROLLED_VOCABULARY/CVE_ENTRY_ML/@CVE_ID, + without checking if this is in the correct CONTROLLED_VOCABULARY. + + According to http://docstore.mik.ua/orelly/xml/schema/ch09_02.htm, putting a keyref + in a parent node of some key definition creates an extra uniqueness constraint on + the key values. That is not desired here. + (The validator that we use doesn't seem to check that but gives other, strange, error + messages) + <xsd:keyref name="cvEntryAlignRef" refer="cvEntryKey"> + <xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/> + <xsd:field xpath="@CVE_REF"/> + </xsd:keyref> + <xsd:keyref name="cvEntryRefRef" refer="cvEntryKey"> + <xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/> + <xsd:field xpath="@CVE_REF"/> + </xsd:keyref> + --> + <xsd:key name="alignAnnotationIdKey"> + <xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/> + <xsd:field xpath="@ANNOTATION_ID"/> + </xsd:key> + <xsd:key name="refAnnotationIdKey"> + <xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/> + <xsd:field xpath="@ANNOTATION_ID"/> + </xsd:key> + <!-- set of key and key refs for referential links + 4 keys for links to refer to: alignable and reference annotation id's and cross link and group link id. + 2 x 4 keyrefs for the cross link ref1 and ref2 idrefs to one of the 4 keys and + 1 x 4 keyrefs for the group link refs idrefs to one of the 4 keys. + --> + <xsd:key name="crossRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="@REF_LINK_ID"/> + </xsd:key> + <xsd:key name="groupRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/> + <xsd:field xpath="@REF_LINK_ID"/> + </xsd:key> + + <xsd:keyref name="crossLinkRef1AlignAnnoKeyRef" refer="alignAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF1"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef1RefAnnoKeyRef" refer="refAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF1"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef1CrossLinkKeyRef" refer="crossRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF1"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef1GroupLinkKeyRef" refer="groupRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF1"/> + </xsd:keyref> + + <xsd:keyref name="crossLinkRef2AlignAnnoKeyRef" refer="alignAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF2"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef2RefAnnoKeyRef" refer="refAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF2"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef2CrossLinkKeyRef" refer="crossRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF2"/> + </xsd:keyref> + <xsd:keyref name="crossLinkRef2GroupLinkKeyRef" refer="groupRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/> + <xsd:field xpath="REF2"/> + </xsd:keyref> + + + <xsd:keyref name="groupLinkRefsAlignAnnoKeyRef" refer="alignAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/> + <xsd:field xpath="REFS"/> + </xsd:keyref> + <xsd:keyref name="groupLinkRefsRefAnnoKeyRef" refer="refAnnotationIdKey"> + <xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/> + <xsd:field xpath="REFS"/> + </xsd:keyref> + <xsd:keyref name="groupLinkRefsCrossLinkKeyRef" refer="crossRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/> + <xsd:field xpath="REFS"/> + </xsd:keyref> + <xsd:keyref name="groupLinkRefsGroupLinkKeyRef" refer="groupRefLinkIdKey"> + <xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/> + <xsd:field xpath="REFS"/> + </xsd:keyref> + <!-- end of key - keyref pairs --> + </xsd:element> + + <xsd:complexType name="headType"> + <xsd:sequence> + <xsd:element name="MEDIA_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded"> + <xsd:complexType> + <xsd:attribute name="MEDIA_URL" type="xsd:anyURI" use="required"/> + <xsd:attribute name="RELATIVE_MEDIA_URL" type="xsd:anyURI" use="optional"/> + <xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/> + <xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/> + <xsd:attribute name="EXTRACTED_FROM" type="xsd:anyURI" use="optional"/> + </xsd:complexType> + </xsd:element> + <xsd:element name="LINKED_FILE_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded"> + <xsd:complexType> + <xsd:attribute name="LINK_URL" type="xsd:anyURI" use="required"/> + <xsd:attribute name="RELATIVE_LINK_URL" type="xsd:anyURI" use="optional"/> + <xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/> + <xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/> + <xsd:attribute name="ASSOCIATED_WITH" type="xsd:anyURI" use="optional"/> + </xsd:complexType> + </xsd:element> + <xsd:element name="PROPERTY" type="propType" minOccurs="0" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="MEDIA_FILE" use="optional" type="xsd:string"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + This attribute is deprecated. Use MEDIA_DESCRIPTOR elements instead. + </xsd:documentation> + <xsd:appinfo>Ignore</xsd:appinfo> + </xsd:annotation> + </xsd:attribute> + <xsd:attribute name="TIME_UNITS" use="optional" default="milliseconds"> + <xsd:simpleType> + <xsd:restriction base="xsd:string"> + <xsd:enumeration value="NTSC-frames"/> + <xsd:enumeration value="PAL-frames"/> + <xsd:enumeration value="milliseconds"/> + </xsd:restriction> + </xsd:simpleType> + </xsd:attribute> + </xsd:complexType> + + <xsd:complexType name="timeType"> + <xsd:sequence> + <xsd:element name="TIME_SLOT" minOccurs="0" maxOccurs="unbounded"> + <xsd:complexType> + <xsd:attribute name="TIME_SLOT_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="TIME_VALUE" type="xsd:unsignedInt" use="optional"/> + </xsd:complexType> + </xsd:element> + </xsd:sequence> + </xsd:complexType> + + <xsd:complexType name="tierType"> + <xsd:sequence> + <xsd:element name="ANNOTATION" type="annotationType" minOccurs="0" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="TIER_ID" type="xsd:string" use="required"/> + <xsd:attribute name="PARTICIPANT" type="xsd:string" use="optional"/> + <xsd:attribute name="ANNOTATOR" type="xsd:string" use="optional"/> + <xsd:attribute name="LINGUISTIC_TYPE_REF" type="xsd:string" use="required"/> + <xsd:attribute name="DEFAULT_LOCALE" type="xsd:IDREF" use="optional"/> + <xsd:attribute name="PARENT_REF" type="xsd:string" use="optional"/> + <!-- since 2.8, to overrule an EXT_REF on the type level --> + <xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/> + <!-- since 2.8 --> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="annotationType"> + <xsd:choice> + <xsd:element name="ALIGNABLE_ANNOTATION" type="alignableType"/> + <xsd:element name="REF_ANNOTATION" type="refAnnoType"/> + </xsd:choice> + </xsd:complexType> + + <xsd:complexType name="alignableType"> + <xsd:sequence> + <xsd:element name="ANNOTATION_VALUE" type="xsd:string"/> + </xsd:sequence> + <xsd:attributeGroup ref="annotationAttribute"/> + <xsd:attribute name="TIME_SLOT_REF1" type="xsd:IDREF" use="required"/> + <xsd:attribute name="TIME_SLOT_REF2" type="xsd:IDREF" use="required"/> + <xsd:attribute name="SVG_REF" type="xsd:string" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="refAnnoType"> + <xsd:sequence> + <xsd:element name="ANNOTATION_VALUE" type="xsd:string"/> + </xsd:sequence> + <xsd:attributeGroup ref="annotationAttribute"/> + <xsd:attribute name="ANNOTATION_REF" type="xsd:IDREF" use="required"> + <xsd:annotation> + <xsd:documentation> + This is in fact a reference to the parent annotation. + </xsd:documentation> + </xsd:annotation> + </xsd:attribute> + <xsd:attribute name="PREVIOUS_ANNOTATION" type="xsd:IDREF" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="lingType"> + <xsd:attribute name="LINGUISTIC_TYPE_ID" type="xsd:string" use="required"/> + <xsd:attribute name="TIME_ALIGNABLE" type="xsd:boolean" use="optional"/> + <xsd:attribute name="CONSTRAINTS" type="xsd:IDREF" use="optional"/> + <xsd:attribute name="GRAPHIC_REFERENCES" type="xsd:boolean" use="optional"/> + <xsd:attribute name="CONTROLLED_VOCABULARY_REF" type="xsd:string" use="optional"/> + <xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/> + <xsd:attribute name="LEXICON_REF" type="xsd:IDREF" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="localeType"> + <xsd:attribute name="LANGUAGE_CODE" type="xsd:ID" use="required"/> + <xsd:attribute name="COUNTRY_CODE" type="xsd:string" use="optional"/> + <xsd:attribute name="VARIANT" type="xsd:string" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="constraintType"> + <xsd:attribute name="STEREOTYPE" type="xsd:ID" use="required"/> + <xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="convocType"> + <!-- change in 2.8, now it contains + a list of multilingual entries plus possible multiple description elements --> + <xsd:sequence> + <xsd:element name="DESCRIPTION" type="descMultiLangType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="CV_ENTRY_ML" type="cventryType" minOccurs="0" maxOccurs="unbounded"/> + </xsd:sequence> + + <xsd:attribute name="CV_ID" type="xsd:string" use="required"/> + <xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"> + <xsd:annotation> + <xsd:documentation> + A reference to an url of an external Controlled Vocabulary. + Is intended to be mutually exclusive with a sequence of CV_ENTRY_ML elements. + </xsd:documentation> + </xsd:annotation> + </xsd:attribute> + </xsd:complexType> + + <!-- introduced in 2.8, modification that breaks compatibility with previous version --> + <xsd:complexType name="cventryType"> + <xsd:annotation> + <xsd:documentation> + An entry in a multilingual controlled vocabulary, containing the values and the descriptions + in multiple languages. + </xsd:documentation> + </xsd:annotation> + <xsd:sequence> + <xsd:element name="CVE_VALUE" type="cveValueType" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="CVE_ID" type="xsd:string" use="required"/><!-- in 2.8 moved from ecventry to cv entry --> + <xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/> + </xsd:complexType> + + <!-- introduced in 2.8 --> + <xsd:complexType name="cveValueType"> + <xsd:annotation> + <xsd:documentation> + A controlled vocabulary entry value with a language attribute. + This allows multilingual controlled vocabularies. It adds a language reference attribute + compared to the mono-lingual cv entry element. + </xsd:documentation> + </xsd:annotation> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/> + <xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + + <!-- introduced in 2.8 --> + <xsd:complexType name="descMultiLangType"> + <xsd:annotation> + <xsd:documentation> + A description element with a language reference attribute. + </xsd:documentation> + </xsd:annotation> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + + <xsd:complexType name="propType"> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="NAME" type="xsd:string" use="optional"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + + <xsd:complexType name="extRefType"> + <xsd:attribute name="EXT_REF_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="TYPE" use="required"> + <xsd:simpleType> + <xsd:restriction base="xsd:string"> + <xsd:enumeration value="iso12620"> + <xsd:annotation> + <xsd:documentation> + A reference to the id of an ISO Data Category (url including id). + </xsd:documentation> + </xsd:annotation> + </xsd:enumeration> + <xsd:enumeration value="ecv"> + <xsd:annotation> + <xsd:documentation> + A reference to an external (closed) Controlled Vocabulary (url). + </xsd:documentation> + </xsd:annotation> + </xsd:enumeration> + <xsd:enumeration value="cve_id"> + <xsd:annotation> + <xsd:documentation> + A reference to the id of an Entry in an external Controlled Vocabulary (id). + </xsd:documentation> + </xsd:annotation> + </xsd:enumeration> + <xsd:enumeration value="lexen_id"> + <xsd:annotation> + <xsd:documentation> + A reference to the id of an entry in a lexicon (url, url+id or id) + </xsd:documentation> + </xsd:annotation> + </xsd:enumeration> + <xsd:enumeration value="resource_url"> + <xsd:annotation> + <xsd:documentation> + A reference or hyperlink to any type document (url) + </xsd:documentation> + </xsd:annotation> + </xsd:enumeration> + <!-- other external reference types can be added later --> + </xsd:restriction> + </xsd:simpleType> + </xsd:attribute> + <xsd:attribute name="VALUE" type="xsd:string" use="required"/> + </xsd:complexType> + + <xsd:complexType name="lexRefType"> + <xsd:attribute name="LEX_REF_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="NAME" type="xsd:string" use="required"/> + <xsd:attribute name="TYPE" type="xsd:string" use="required"/> + <xsd:attribute name="URL" type="xsd:string" use="required"/> + <xsd:attribute name="LEXICON_ID" type="xsd:string" use="required"/> + <xsd:attribute name="LEXICON_NAME" type="xsd:string" use="required"/> + <xsd:attribute name="DATCAT_ID" type="xsd:string" use="optional"/> + <xsd:attribute name="DATCAT_NAME" type="xsd:string" use="optional"/> + </xsd:complexType> + + <xsd:complexType name="langType"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Language element containing a reference to a language name or (if possible persistent) definition. + </xsd:documentation> + </xsd:annotation> + <xsd:attribute name="LANG_ID" type="xsd:ID" use="required"/> + <!-- definition is optional so that user defined languages are easy to add --> + <xsd:attribute name="LANG_DEF" type="xsd:string" use="optional"> + <xsd:annotation><xsd:documentation> + ISO-639-3 still seems to be the best choice for language codes and closest to persistent language ID's + seem to be the http://cdb.iso.org/lg/... identifiers also used by the iso-language-639-3 component in + the CLARIN ComponentRegistry? + </xsd:documentation></xsd:annotation> + </xsd:attribute> + <xsd:attribute name="LANG_LABEL" type="xsd:string" use="optional"/> + </xsd:complexType> + <!-- since 2.8 --> + <xsd:complexType name="licenseType"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The license element can be used to include license information in the eaf file itself. + </xsd:documentation> + </xsd:annotation> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="LICENSE_URL" type="xsd:anyURI" use="optional"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + + <!-- introduced in 3.0 --> + <xsd:complexType name="refLinksType"> + <xsd:annotation> + <xsd:documentation> + A set containing referential links. + A set can contain both cross-references and grouping referential links. + Apart from an ID the set can have a meaningful, "friendly" name. + A set can have an external reference, a language and a CV reference. + </xsd:documentation> + </xsd:annotation> + <xsd:sequence> + <xsd:choice minOccurs="0" maxOccurs="unbounded"> + <xsd:element name="CROSS_REF_LINK" type="crossLinkType"/> + <xsd:element name="GROUP_REF_LINK" type="groupLinkType"/> + </xsd:choice> + </xsd:sequence> + <xsd:attribute name="LINK_SET_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="LINK_SET_NAME" type="xsd:string" use="optional"/> + <xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/> + <xsd:attribute name="CV_REF" type="xsd:string" use="optional"/> + </xsd:complexType> + <!-- introduced in 3.0 --> + <!-- a cross reference element --> + <xsd:complexType name="crossLinkType"> + <xsd:annotation> + <xsd:documentation> + A cross reference is a referential link between two existing elements (REF1 and REF2). + Each of these elements can be either an annotation or a referential link. + Optionally the direction of the link can be specified. + </xsd:documentation> + </xsd:annotation> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <!-- refers to the ID of an annotation or a reference link --> + <xsd:attribute name="REF1" type="xsd:IDREF" use="required"/> + <xsd:attribute name="REF2" type="xsd:IDREF" use="required"/> + <xsd:attribute name="DIRECTIONALITY" use="optional"> + <xsd:simpleType> + <xsd:restriction base="xsd:string"> + <xsd:enumeration value="undirected"/> + <xsd:enumeration value="unidirectional"/> + <xsd:enumeration value="bidirectional"/> + </xsd:restriction> + </xsd:simpleType> + </xsd:attribute> + + <xsd:attributeGroup ref="refLinkAttribute"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + <!-- a grouping reference element --> + <xsd:complexType name="groupLinkType"> + <xsd:annotation> + <xsd:documentation> + A referential element for grouping any number of existing elements (the REFS). + Each element can be an annotation or a referential link. + </xsd:documentation> + </xsd:annotation> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="REFS" type="xsd:IDREFS" use="required"/> + <xsd:attributeGroup ref="refLinkAttribute"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + <!-- attributes shared by reference link elements --> + <xsd:attributeGroup name="refLinkAttribute"> + <xsd:annotation> + <xsd:documentation> + Attributes common for both cross- and group references. + Apart from an ID it is possible to associate a meaningful, "friendly" + name to the link. Furthermore a link can have an external reference, a language and a + CV entry reference and a type attribute. + </xsd:documentation> + </xsd:annotation> + <xsd:attribute name="REF_LINK_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="REF_LINK_NAME" type="xsd:string" use="optional"/> + <xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/> + <xsd:attribute name="CVE_REF" type="xsd:string" use="optional"/> + <xsd:attribute name="REF_TYPE" type="xsd:string" use="optional"> + <xsd:annotation> + <xsd:documentation> + An attribute that allows to specify the type of the cross- or group reference/link. + </xsd:documentation> + </xsd:annotation> + </xsd:attribute> + </xsd:attributeGroup> + <!-- end of new in 3.0 --> + + <xsd:attributeGroup name="annotationAttribute"> + <xsd:attribute name="ANNOTATION_ID" type="xsd:ID" use="required"/> + <xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/> + <xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/><!-- since 2.8 --> + <xsd:attribute name="CVE_REF" type="xsd:string" use="optional"/><!-- since 2.8 --> + </xsd:attributeGroup> + + + <!-- Start of CV_RESOURCE part, an alternative root element --> + <xsd:element name="CV_RESOURCE"> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/> + <xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="1" maxOccurs="unbounded"> + <xsd:key name="cvEntryKey2"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The entry id should be unique within the + collection of entry elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CV_ENTRY_ML"/> + <xsd:field xpath="@CVE_ID"/> + </xsd:key> + + </xsd:element> + <xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="DATE" type="xsd:dateTime" use="optional"/> + <xsd:attribute name="AUTHOR" type="xsd:string" use="optional"/> + <xsd:attribute name="VERSION" type="xsd:string" use="optional"/> + </xsd:complexType> + <!-- define key - keyref pairs --> + <!-- If not commented this is considered a double global definition of cvNameKey --> + <!-- <xsd:key name="cvNameKey"> + <xsd:annotation> + <xsd:documentation xml:lang="en"> + The Controlled Vocabulary name/id should be unique within the + collection of Controlled Vocabulary elements + </xsd:documentation> + </xsd:annotation> + <xsd:selector xpath="CONTROLLED_VOCABULARY"/> + <xsd:field xpath="@CV_ID"/> + </xsd:key>--> + </xsd:element> + +</xsd:schema> \ No newline at end of file diff --git a/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exb.xsd b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exb.xsd new file mode 100644 index 0000000000000000000000000000000000000000..ae380f4661c409158aba48a52c7a8792da08d722 --- /dev/null +++ b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exb.xsd @@ -0,0 +1,418 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:element name="basic-transcription"> + <xs:complexType> + <xs:sequence> + <xs:element ref="head"/> + <xs:element ref="basic-body"/> + <xs:sequence minOccurs="0"> + <xs:element ref="tierformat-table"/> + </xs:sequence> + </xs:sequence> + <xs:attribute name="Id" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="head"> + <xs:complexType> + <xs:sequence> + <xs:element ref="meta-information"/> + <xs:element ref="speakertable"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="basic-body"> + <xs:complexType> + <xs:sequence> + <xs:element ref="common-timeline"/> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="tier"/> + </xs:sequence> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="tierformat-table"> + <xs:complexType> + <xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="referenced-file"/> + </xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="timeline-item-format"/> + </xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="tier-format"/> + </xs:sequence> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="meta-information"> + <xs:complexType> + <xs:sequence> + <xs:element ref="project-name"/> + <xs:element ref="transcription-name"/> + <xs:sequence maxOccurs="unbounded"> + <xs:element ref="referenced-file"/> + </xs:sequence> + <xs:element ref="ud-meta-information"/> + <xs:element ref="comment"/> + <xs:element ref="transcription-convention"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="speakertable"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="speaker"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="project-name" type="xs:string"/> + + <xs:element name="transcription-name" type="xs:string"/> + + <xs:element name="referenced-file"> + <xs:complexType> + <xs:attribute name="url" type="xs:string" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="ud-meta-information"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ud-information"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="comment" type="xs:string"/> + + <xs:element name="transcription-convention" type="xs:string"/> + + <xs:element name="ud-information"> + <xs:complexType> + <xs:attribute name="attribute-name" type="xs:string" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="speaker"> + <xs:complexType> + <xs:sequence> + <xs:element ref="abbreviation"/> + <xs:element ref="sex"/> + <xs:element ref="languages-used"/> + <xs:element ref="l1"/> + <xs:element ref="l2"/> + <xs:element ref="ud-speaker-information"/> + <xs:element ref="comment"/> + </xs:sequence> + <xs:attribute name="id" type="xs:ID" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="abbreviation" type="xs:string"/> + + <xs:element name="sex"> + <xs:complexType> + <xs:attribute name="value" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="m"/> + <xs:enumeration value="f"/> + <xs:enumeration value="u"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="languages-used"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="l1"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="l2"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="ud-speaker-information"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ud-information"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="language"> + <xs:complexType> + <xs:attribute name="lang" type="xs:NMTOKEN" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="common-timeline"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="tli"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="tier"> + <xs:complexType> + <xs:sequence> + <xs:sequence minOccurs="0"> + <xs:element ref="ud-tier-information"/> + </xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="event"/> + </xs:sequence> + </xs:sequence> + <xs:attribute name="id" type="xs:ID" use="required"/> + <xs:attribute name="speaker" type="xs:IDREF"/> + <xs:attribute name="category" type="xs:string" use="required"/> + <xs:attribute name="display-name" type="xs:string"/> + <xs:attribute name="type" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="t"/> + <xs:enumeration value="d"/> + <xs:enumeration value="a"/> + <xs:enumeration value="l"/> + <xs:enumeration value="u"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="tli"> + <xs:complexType> + <xs:attribute name="id" type="xs:ID" use="required"/> + <xs:attribute name="time" type="xs:string"/> + <xs:attribute name="type"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="unsp"/> + <xs:enumeration value="user"/> + <xs:enumeration value="appl"/> + <xs:enumeration value="intp"/> + <xs:enumeration value="othr"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="bookmark" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="ud-tier-information"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ud-information"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="event"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:choice> + <xs:element ref="ud-information"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="start" type="xs:IDREF" use="required"/> + <xs:attribute name="end" type="xs:IDREF" use="required"/> + <xs:attribute name="medium"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="aud"/> + <xs:enumeration value="vid"/> + <xs:enumeration value="img"/> + <xs:enumeration value="txt"/> + <xs:enumeration value="oth"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="url" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="timeline-item-format"> + <xs:complexType> + <xs:attribute name="show-every-nth-numbering" type="xs:string"/> + <xs:attribute name="show-every-nth-absolute" type="xs:string"/> + <xs:attribute name="absolute-time-format"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="decimal"/> + <xs:enumeration value="time"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="miliseconds-digits" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="tier-format"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:choice> + <xs:element ref="property"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="tierref" type="xs:string" use="required"/> + <xs:attribute name="style-name"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="Plain"/> + <xs:enumeration value="Bold"/> + <xs:enumeration value="Italic"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="size"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="1"/> + <xs:enumeration value="2"/> + <xs:enumeration value="3"/> + <xs:enumeration value="4"/> + <xs:enumeration value="5"/> + <xs:enumeration value="6"/> + <xs:enumeration value="7"/> + <xs:enumeration value="8"/> + <xs:enumeration value="9"/> + <xs:enumeration value="10"/> + <xs:enumeration value="11"/> + <xs:enumeration value="12"/> + <xs:enumeration value="13"/> + <xs:enumeration value="14"/> + <xs:enumeration value="15"/> + <xs:enumeration value="16"/> + <xs:enumeration value="17"/> + <xs:enumeration value="18"/> + <xs:enumeration value="19"/> + <xs:enumeration value="20"/> + <xs:enumeration value="21"/> + <xs:enumeration value="22"/> + <xs:enumeration value="23"/> + <xs:enumeration value="24"/> + <xs:enumeration value="25"/> + <xs:enumeration value="26"/> + <xs:enumeration value="27"/> + <xs:enumeration value="28"/> + <xs:enumeration value="29"/> + <xs:enumeration value="30"/> + <xs:enumeration value="31"/> + <xs:enumeration value="32"/> + <xs:enumeration value="33"/> + <xs:enumeration value="34"/> + <xs:enumeration value="35"/> + <xs:enumeration value="36"/> + <xs:enumeration value="37"/> + <xs:enumeration value="38"/> + <xs:enumeration value="39"/> + <xs:enumeration value="40"/> + <xs:enumeration value="41"/> + <xs:enumeration value="42"/> + <xs:enumeration value="43"/> + <xs:enumeration value="44"/> + <xs:enumeration value="45"/> + <xs:enumeration value="46"/> + <xs:enumeration value="47"/> + <xs:enumeration value="48"/> + <xs:enumeration value="72"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="alignment-name"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="Left"/> + <xs:enumeration value="Right"/> + <xs:enumeration value="Center"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="textcolor-name"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="white"/> + <xs:enumeration value="lightGray"/> + <xs:enumeration value="darkGray"/> + <xs:enumeration value="black"/> + <xs:enumeration value="red"/> + <xs:enumeration value="pink"/> + <xs:enumeration value="orange"/> + <xs:enumeration value="yellow"/> + <xs:enumeration value="green"/> + <xs:enumeration value="magenta"/> + <xs:enumeration value="cyan"/> + <xs:enumeration value="blue"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="bgcolor-name"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="white"/> + <xs:enumeration value="lightGray"/> + <xs:enumeration value="darkGray"/> + <xs:enumeration value="black"/> + <xs:enumeration value="red"/> + <xs:enumeration value="pink"/> + <xs:enumeration value="orange"/> + <xs:enumeration value="yellow"/> + <xs:enumeration value="green"/> + <xs:enumeration value="magenta"/> + <xs:enumeration value="cyan"/> + <xs:enumeration value="blue"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="property"> + <xs:complexType> + <xs:attribute name="name" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="font-color"/> + <xs:enumeration value="bg-color"/> + <xs:enumeration value="font-size"/> + <xs:enumeration value="font-name"/> + <xs:enumeration value="font-face"/> + <xs:enumeration value="chunk-border"/> + <xs:enumeration value="chunk-border-color"/> + <xs:enumeration value="chunk-border-style"/> + <xs:enumeration value="text-alignment"/> + <xs:enumeration value="row-height-calculation"/> + <xs:enumeration value="fixed-row-height"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + +</xs:schema> diff --git a/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exs.xsd b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exs.xsd new file mode 100644 index 0000000000000000000000000000000000000000..812d7bd7bbd54a1c3e11c34f3c75776a3aad3fc6 --- /dev/null +++ b/src/main/java/de/uni_hamburg/corpora/validation/quest/resources/xsd/exmaralda_exs.xsd @@ -0,0 +1,365 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:element name="segmented-transcription"> + <xs:complexType> + <xs:sequence> + <xs:element ref="head"/> + <xs:element ref="segmented-body"/> + <xs:sequence minOccurs="0"> + <xs:element ref="conversion-info"/> + </xs:sequence> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="head"> + <xs:complexType> + <xs:sequence> + <xs:element ref="meta-information"/> + <xs:element ref="speakertable"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="segmented-body"> + <xs:complexType> + <xs:sequence> + <xs:element ref="common-timeline"/> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="segmented-tier"/> + </xs:sequence> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="conversion-info"> + <xs:complexType> + <xs:sequence minOccurs="0"> + <xs:element ref="basic-transcription-conversion-info"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="meta-information"> + <xs:complexType> + <xs:sequence> + <xs:element ref="project-name"/> + <xs:element ref="transcription-name"/> + <xs:sequence maxOccurs="unbounded"> + <xs:element ref="referenced-file"/> + </xs:sequence> + <xs:element ref="ud-meta-information"/> + <xs:element ref="comment"/> + <xs:element ref="transcription-convention"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="speakertable"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="speaker"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="project-name" type="xs:string"/> + + <xs:element name="transcription-name" type="xs:string"/> + + <xs:element name="referenced-file"> + <xs:complexType> + <xs:attribute name="url" type="xs:string" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="ud-meta-information"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ud-information"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="comment" type="xs:string"/> + + <xs:element name="transcription-convention" type="xs:string"/> + + <xs:element name="ud-information"> + <xs:complexType> + <xs:attribute name="attribute-name" type="xs:string" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="speaker"> + <xs:complexType> + <xs:sequence> + <xs:element ref="abbreviation"/> + <xs:element ref="sex"/> + <xs:element ref="languages-used"/> + <xs:element ref="l1"/> + <xs:element ref="l2"/> + <xs:element ref="ud-speaker-information"/> + <xs:element ref="comment"/> + </xs:sequence> + <xs:attribute name="id" type="xs:ID" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="abbreviation" type="xs:string"/> + + <xs:element name="sex"> + <xs:complexType> + <xs:attribute name="value" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="m"/> + <xs:enumeration value="f"/> + <xs:enumeration value="u"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="languages-used"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="l1"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="l2"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="language"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="ud-speaker-information"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ud-information"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="language"> + <xs:complexType> + <xs:attribute name="lang" type="xs:NMTOKEN" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="common-timeline"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="tli"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="segmented-tier"> + <xs:complexType> + <xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="timeline-fork"/> + </xs:sequence> + <xs:sequence maxOccurs="unbounded"> + <xs:element ref="segmentation"/> + </xs:sequence> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="annotation"/> + </xs:sequence> + </xs:sequence> + <xs:attribute name="id" type="xs:ID" use="required"/> + <xs:attribute name="speaker" type="xs:IDREF"/> + <xs:attribute name="category" type="xs:string" use="required"/> + <xs:attribute name="display-name" type="xs:string"/> + <xs:attribute name="type" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="t"/> + <xs:enumeration value="d"/> + <xs:enumeration value="a"/> + <xs:enumeration value="l"/> + <xs:enumeration value="u"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="tli"> + <xs:complexType> + <xs:attribute name="id" type="xs:ID" use="required"/> + <xs:attribute name="time" type="xs:string"/> + <xs:attribute name="type"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="unsp"/> + <xs:enumeration value="user"/> + <xs:enumeration value="appl"/> + <xs:enumeration value="intp"/> + <xs:enumeration value="othr"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="bookmark" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="timeline-fork"> + <xs:complexType> + <xs:sequence maxOccurs="unbounded"> + <xs:element ref="tli"/> + </xs:sequence> + <xs:attribute name="start" type="xs:IDREF" use="required"/> + <xs:attribute name="end" type="xs:IDREF" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="segmentation"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:choice> + <xs:element ref="ts"/> + <xs:element ref="ats"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="name" type="xs:string" use="required"/> + <xs:attribute name="tierref" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="annotation"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="ta"/> + </xs:sequence> + <xs:attribute name="name" type="xs:string" use="required"/> + <xs:attribute name="tierref" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="ts"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:choice> + <xs:element ref="ts"/> + <xs:element ref="ats"/> + <xs:element ref="nts"/> + </xs:choice> + </xs:sequence> + <xs:attribute name="n" type="xs:string" use="required"/> + <xs:attribute name="id" type="xs:ID"/> + <xs:attribute name="s" type="xs:IDREF" use="required"/> + <xs:attribute name="e" type="xs:IDREF" use="required"/> + <xs:attribute name="medium"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="aud"/> + <xs:enumeration value="vid"/> + <xs:enumeration value="img"/> + <xs:enumeration value="txt"/> + <xs:enumeration value="oth"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="url" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="ats"> + <xs:complexType> + <xs:attribute name="n" type="xs:string" use="required"/> + <xs:attribute name="id" type="xs:ID"/> + <xs:attribute name="s" type="xs:IDREF" use="required"/> + <xs:attribute name="e" type="xs:IDREF" use="required"/> + <xs:attribute name="medium"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="aud"/> + <xs:enumeration value="vid"/> + <xs:enumeration value="img"/> + <xs:enumeration value="txt"/> + <xs:enumeration value="oth"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="url" type="xs:string"/> + </xs:complexType> + </xs:element> + + <xs:element name="nts"> + <xs:complexType> + <xs:attribute name="n" type="xs:string" use="required"/> + <xs:attribute name="id" type="xs:ID"/> + </xs:complexType> + </xs:element> + + <xs:element name="ta"> + <xs:complexType> + <xs:attribute name="n" type="xs:string"/> + <xs:attribute name="id" type="xs:ID"/> + <xs:attribute name="s" type="xs:IDREF" use="required"/> + <xs:attribute name="e" type="xs:IDREF" use="required"/> + </xs:complexType> + </xs:element> + + <xs:element name="basic-transcription-conversion-info"> + <xs:complexType> + <xs:sequence> + <xs:element ref="conversion-timeline"/> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="conversion-tier"/> + </xs:sequence> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="conversion-timeline"> + <xs:complexType> + <xs:sequence minOccurs="0" maxOccurs="unbounded"> + <xs:element ref="conversion-tli"/> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="conversion-tier"> + <xs:complexType> + <xs:attribute name="segmented-tier-id" type="xs:IDREF" use="required"/> + <xs:attribute name="name" type="xs:string" use="required"/> + <xs:attribute name="category" type="xs:string" use="required"/> + <xs:attribute name="display-name" type="xs:string" use="required"/> + <xs:attribute name="type" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="t"/> + <xs:enumeration value="a"/> + <xs:enumeration value="d"/> + <xs:enumeration value="l"/> + <xs:enumeration value="u"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + </xs:complexType> + </xs:element> + + <xs:element name="conversion-tli"> + <xs:complexType> + <xs:attribute name="id" type="xs:IDREF" use="required"/> + </xs:complexType> + </xs:element> + +</xs:schema>