Select Git revision
extract-utterances-from-tei.xsl
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
extract-utterances-from-tei.xsl 1.60 KiB
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:string="https://inel.corpora.uni-hamburg.de/xmlns/string"
exclude-result-prefixes="xs string"
version="2.0">
<xsl:output method="xml" media-type="text/xml" encoding="UTF-8"/>
<!-- Global parameters -->
<xsl:param name="base-directory" select="'file:/E:/emnlp2020/corpora/'" as="xs:string"/>
<xsl:param name="corpus-directory" select="'selkup-0.1', 'dolgan-1.0', 'kamas-1.0'" as="xs:string+"/>
<xsl:param name="file-pattern" select="'*_tei.xml'" as="xs:string"/>
<xsl:param name="utterance-tier-category" select="'fe'" as="xs:string"/>
<!-- Templates -->
<xsl:template match="/">
<xsl:for-each select="$corpus-directory">
<xsl:result-document href="{.}-utterances.xml">
<corpus name="{.}" dir="{concat($base-directory, .)}">
<xsl:for-each select="collection(concat($base-directory, ., '/', '?select=', $file-pattern, ';recurse=yes'))//*:spanGrp[@type=$utterance-tier-category]/*:span">
<u file-ref="{tokenize(base-uri(), '/')[last()]}" tier-category="{../$utterance-tier-category}" speaker="{../../@who}" from="{@from}" to="{@to}">
<xsl:value-of select="replace(text(), '(\([^\(]*\)|\[|\])', '')"/>
</u>
</xsl:for-each>
</corpus>
</xsl:result-document>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>