Skip to content
Snippets Groups Projects
Commit b9ae6295 authored by Ferger, Anne's avatar Ferger, Anne :bat:
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
# Audio Wordclouds
Creating word clouds with audio functionality from INEL corpora.
\ No newline at end of file
This diff is collapsed.
<!DOCTYPE html>
<html>
<head>
<title>Audio Word Cloud</title>
<script src="https://cdn.anychart.com/releases/v8/js/anychart-base.min.js"></script>
<script src="https://cdn.anychart.com/releases/v8/js/anychart-ui.min.js"></script>
<script src="https://cdn.anychart.com/releases/v8/js/anychart-exports.min.js"></script>
<script src="https://cdn.anychart.com/releases/v8/js/anychart-tag-cloud.min.js"></script>
<script src="https://cdn.anychart.com/releases/v8/js/anychart-data-adapter.min.js"></script>
<link href="https://cdn.anychart.com/releases/v8/css/anychart-ui.min.css" type="text/css" rel="stylesheet">
<link href="https://cdn.anychart.com/releases/v8/fonts/css/anychart-font.min.css" type="text/css" rel="stylesheet">
<style>
html, body, #container {
width: 100%;
height: 100%;
margin: 0;
padding: 0;
}
</style>
</head>
<body>
<div id="container"></div>
<script>
anychart.onDocumentReady(function() {
// code to create a word cloud chart will be here
anychart.data.loadJsonFile('dataDolganWords.json', function (data) {
var dataSet = anychart.data.set(data);
// create a tag (word) cloud chart
var chart = anychart.tagCloud();
// set a chart title
chart.title('Most frequent words in corpus with their sound');
// set data with settings
chart.data(dataSet)
// set color scale
// additional empty space in all directions from the text, only in pixels
// set an array of angles at which the words will be laid out
chart.angles([0])
// enable a color range
chart.colorRange(true);
// set the color range length
chart.colorRange().length('80%');
// format the tooltips
var formatter = "English translation: {%eng_transl}\nFrequency in corpus: {%value}\nFile for Audio: {%audio_filename}";
var tooltip = chart.tooltip();
tooltip.format(formatter);
//No legend because it always plays the audio
//chart.legend(true);
// display the word cloud chart
chart.container("container");
chart.draw();
// add an event listener
chart.listen("pointClick", function(e){
var audiourl = e.point.get("audio_file");
var audio = new Audio(audiourl);
//var audio = new Audio('audio_files/AkEE_19900810_PearlBeard_flk.mp3');
var startTime = e.point.get("audio_start");
var endTime = e.point.get("audio_end");
audio.currentTime = startTime;
//audio.currentTime = 0.3;
audio.play();
int = setInterval(function() {
if (audio.currentTime > endTime) {
audio.pause();
clearInterval(int);
}
}, 3);
});
});
});
</script>
</body>
</html>
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:output method="text" omit-xml-declaration="yes"/>
<xsl:param name="tei-base-path" required="yes" as="xs:string"/>
<xsl:key name="x-by-name-and-category" match="x" use="concat(text(), '#', @category)"/>
<xsl:variable name="NEWLINE" as="xs:string">
<xsl:text>&#xa;</xsl:text>
</xsl:variable>
<!-- insert correct templates with param into the JSON template -->
<xsl:template match="/">
<xsl:text>[</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:call-template name="getmostfrequentwordsfromcorpus"/>
<xsl:value-of select="$NEWLINE"/>
<xsl:text>]</xsl:text>
</xsl:template>
<xsl:template name="getmostfrequentwordsfromcorpus">
<xsl:variable name="tier1" select="'ps'"/>
<xsl:variable name="tier2" select="'ge'"/>
<xsl:variable name="tier3" select="'mb'"/>
<xsl:variable name="tier4" select="'BOR'"/>
<xsl:variable name="allwords">
<xsl:for-each
select="collection(concat($tei-base-path, '?select=*.xml;recurse=yes;on-error=warning'))">
<xsl:variable name="DOC" select="."/>
<xsl:for-each select="$DOC//*:spanGrp[@type = $tier3]/*:span/*:span">
<!-- TO DO -->
<xsl:variable name="corpus-version-name" select="'dolgan-1.0'"/>
<xsl:variable name="mid" select="@xml:id"/>
<xsl:variable name="wid" select="../@from"/>
<xsl:variable name="TstartTime"
select="../../../*:u/*:seg/*:w[@xml:id = $wid]/preceding-sibling::*:anchor[1]/@synch"/>
<xsl:variable name="TendTime"
select="../../../*:u/*:seg/*:w[@xml:id = $wid]/following-sibling::*:anchor[1]/@synch"/>
<!--if the corresponding ge morpheme is not only CAPs and /.[]-->
<xsl:variable name="gloss"
select="not(../../../*:spanGrp[@type = $tier2]/*:span/*:span[@from = $mid]/matches(text(), '[A-Z0-9\.:\\]{2}'))"/>
<!--if the corresponding BOR word annotation is empty-->
<xsl:variable name="BOR"
select="empty(../../../*:spanGrp[@type = $tier4]/*:span[@from = $wid])"/>
<xsl:variable name="audio-filename">
<xsl:value-of
select="../../../../../../*:teiHeader/*:fileDesc/*:sourceDesc/*:recordingStmt/*:recording/*:media[ends-with(lower-case(@url), '.mp3')]/@url"
/>
</xsl:variable>
<xsl:if test="$BOR and $gloss">
<x>
<!-- "category": "v",
"eng_transl": "like",
"audio_file": "audio_snippets/AkEE_19900810_GirlAnys_flk.mp3",
"audio_filename" : "AkEE_19900810_GirlAnys_flk",
"audio_start": "1.275",
"audio_end": "2.55" -->
<xsl:attribute name="category">
<xsl:value-of
select="../../../*:spanGrp[@type = $tier1]/*:span[@from = $wid]"
/>
</xsl:attribute>
<xsl:attribute name="eng_transl">
<xsl:value-of
select="../../../*:spanGrp[@type = $tier2]/*:span/*:span[@from = $mid]"
/>
</xsl:attribute>
<xsl:attribute name="audio_file">
<!-- https://corpora.uni-hamburg.de/hzsk/de/islandora/object/recording:dolgan-1.0_PoNA_200X_GirlFromTundra_nar/datastream/MP3/PoNA_200X_GirlFromTundra_nar.mp3 -->
<xsl:value-of select="concat('https://corpora.uni-hamburg.de/hzsk/de/islandora/object/recording:', $corpus-version-name, '_', tokenize($audio-filename, '.mp3')[1], '/datastream/MP3/', $audio-filename)"/>
</xsl:attribute>
<xsl:attribute name="audio_filename">
<xsl:value-of select="$audio-filename"/>
</xsl:attribute>
<xsl:attribute name="audio_start">
<xsl:value-of
select="../../../../../*:timeline/*:when[@xml:id = $TstartTime]/@interval"
/>
</xsl:attribute>
<xsl:attribute name="audio_end">
<xsl:value-of
select="../../../../../*:timeline/*:when[@xml:id = $TendTime]/@interval"
/>
</xsl:attribute>
<xsl:value-of select="text()"/>
</x>
</xsl:if>
</xsl:for-each>
</xsl:for-each>
</xsl:variable>
<!-- add the value number and only have one instance of each word -->
<xsl:variable name="allwordsvalues">
<xsl:for-each-group select="$allwords/x" group-by="text()">
<xsl:variable name="group-text" select="text()"/>
<xsl:for-each-group select="current-group()" group-by="@category">
<xsl:variable name="group-cat" select="@category"/>
<xsl:for-each select="current-group()[not(@audio_filename = '')][not(@audio_start='')][not(@audio_end='')]">
<xsl:if test="position() = 1">
<x>
<xsl:copy-of select="@*"/>
<xsl:attribute name="value">
<xsl:value-of
select="count(//$allwords/x[text() = $group-text][@category = $group-cat])"
/>
</xsl:attribute>
<xsl:value-of select="current()"/>
</x>
</xsl:if>
</xsl:for-each>
</xsl:for-each-group>
</xsl:for-each-group>
</xsl:variable>
<!-- And the audio filename is not allowed to be empty when choosing it later -->
<!-- exists($audio-filename) -->
<!-- now get the 50 most common values from this variable -->
<xsl:for-each select="$allwordsvalues/x">
<xsl:sort select="@value" data-type="number" order="descending"/>
<xsl:if test="position() &lt; 100">
<xsl:value-of select="$NEWLINE"/>
<xsl:text> {</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "x": "</xsl:text>
<xsl:value-of select="current()"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "category": "</xsl:text>
<xsl:value-of select="@category"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "eng_transl": "</xsl:text>
<xsl:value-of select="@eng_transl"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "audio_file": "</xsl:text>
<xsl:value-of select="@audio_file"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "audio_filename": "</xsl:text>
<xsl:value-of select="@audio_filename"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "audio_start": "</xsl:text>
<xsl:value-of select="@audio_start"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "audio_end": "</xsl:text>
<xsl:value-of select="@audio_end"/>
<xsl:text>",</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> "value": "</xsl:text>
<xsl:value-of select="@value"/>
<xsl:text>"</xsl:text>
<xsl:value-of select="$NEWLINE"/>
<xsl:text> }</xsl:text>
<xsl:if test="not(position() = 99)">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:if>
<!-- if it's the last one, don't add a comma -->
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment