Skip to content
Snippets Groups Projects
Select Git revision
  • a5edc6a4de932d0ca8c185ccc7e21451072c7c45
  • main default
2 results

extract-utterances-from-tei.xsl

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    extract-utterances-from-tei.xsl 1.60 KiB
    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:string="https://inel.corpora.uni-hamburg.de/xmlns/string"
        exclude-result-prefixes="xs string"
        version="2.0">
        
        <xsl:output method="xml" media-type="text/xml" encoding="UTF-8"/>
        
        <!-- Global parameters -->
        <xsl:param name="base-directory" select="'file:/E:/emnlp2020/corpora/'" as="xs:string"/>
        <xsl:param name="corpus-directory" select="'selkup-0.1', 'dolgan-1.0', 'kamas-1.0'" as="xs:string+"/>
        <xsl:param name="file-pattern" select="'*_tei.xml'" as="xs:string"/>
        <xsl:param name="utterance-tier-category" select="'fe'" as="xs:string"/>
        
        
        <!-- Templates -->
        <xsl:template match="/">
            <xsl:for-each select="$corpus-directory">
               <xsl:result-document href="{.}-utterances.xml">
                   <corpus name="{.}" dir="{concat($base-directory, .)}">
                       <xsl:for-each select="collection(concat($base-directory, ., '/', '?select=', $file-pattern, ';recurse=yes'))//*:spanGrp[@type=$utterance-tier-category]/*:span">
                           <u file-ref="{tokenize(base-uri(), '/')[last()]}" tier-category="{../$utterance-tier-category}" speaker="{../../@who}" from="{@from}" to="{@to}">
                               <xsl:value-of select="replace(text(), '(\([^\(]*\)|\[|\])', '')"/>
                           </u>
                       </xsl:for-each>
                   </corpus>            
               </xsl:result-document>            
            </xsl:for-each>
        </xsl:template>
        
    </xsl:stylesheet>