Skip to content
Snippets Groups Projects
Commit e10216b9 authored by Anne Ferger's avatar Anne Ferger
Browse files

initial commit for release

parents
No related branches found
No related tags found
No related merge requests found
Pipeline #4591 passed
Showing
with 1606 additions and 0 deletions
*
!*/
!.gitignore
!*.java
!*.form
!*.xsl
!*.xml
!*.html
!*.css
!*.png
!*.sh
!exmaralda
!LICENSE
!README.md
target/
doc/
compile_withmaven:
image: maven:latest
script:
- mvn install:install-file -Dfile=lib/EXMARaLDA.jar -DgroupId=org.exmaralda -DartifactId=exmaralda -Dversion=1.10 -Dpackaging=jar -DgeneratePom=true
- mvn clean compile assembly:single
artifacts:
expire_in: 5 week
paths:
- target/hzsk-corpus-services-0.2-jar-with-dependencies.jar
[submodule "exmaralda"]
path = exmaralda
url = https://github.com/EXMARaLDA/exmaralda.git
This diff is collapsed.
# Introduction
The HZSK Corpus Services project bundles functionality used for maintenance, curation, conversion, and visualization of corpus data at the HZSK and in the project INEL.
# Gitlab artifacts
The latest compiled .jar can be found here:
https://gitlab.rrz.uni-hamburg.de/hzsk-open-access/hzsk-corpus-services-release/-/jobs/artifacts/release/browse?job=compile_withmaven
# Compilation
To use the validator for HZSK corpora, compile it using `mvn clean compile assembly:single`.
or use a pregenerated artifact form gitlab (see Gitlab artifacts).
# Usage
The usable functions can be found in the help output:
`java -jar hzsk-corpus-services-0.2-jar-with-dependencies.jar -h`
\ No newline at end of file
Subproject commit f59456aaf784545c9b869ab11c2c0220e0430973
#!/bin/bash
echo "
________._________________ ___ .__
/ _____/| \__ ___/ | \ ____ | | ______ ___________
/ \ ___| | | | / ~ \_/ __ \| | \____ \_/ __ \_ __ \
\ \_\ \ | | | \ Y /\ ___/| |_| |_> > ___/| | \/
\______ /___| |____| \___|_ / \___ >____/ __/ \___ >__|
\/ \/ \/ |__| \/
Welcome to GIT
"
today=$(date)
timestamp=$(date +%s)
remote="origin"
branch="master"
conflictPath="${timestamp}-git-conflict.txt"
SCRIPT=`realpath $0`
directory=`dirname $SCRIPT`
corpusServicesJar="../hzsk-corpus-services-0.2-jar-with-dependencies.jar"
#options=("Viewing your current GIT configuration." "Setting up your GIT configuration." "See the current state of your local GIT repository." "See the changes in the files of your local GIT repository." "Update your local GIT repository." "Save all your changes, add a message, publish your changes to the main GIT repository and update your local GIT repository." "Help!" "Quit")
options=("Aktuellen Stand des lokalen GIT repository anschauen." "Lokales GIT repository auf den neuesten Stand bringen." "Alle ausgeführten Änderungen speichern, eine Nachricht hinzufügen, diese Änderungen beim main GIT repository veröffentlichen und das lokale GIT repository auf den neuesten Stand bringen." "Die aktuelle GIT Konfiguration ansehen." "Die GIT Konfiguration ändern." "Hilfe!" "Beenden")
PS3="
Bitte wähle eine Option (1-${#options[@]}) oder drücke ENTER um das Menü anzuzeigen: "
select opt in "${options[@]}"
do
case $opt in
"Die aktuelle GIT Konfiguration ansehen.")
echo "Name:"
git config user.name
echo "Email-Adresse:"
git config user.email
;;
"Die GIT Konfiguration ändern.")
read -p "Vorname Nachname eingeben: " usrname
git config --global user.name "\"$usrname\""
read -p "Email-Adresse eingeben: " usrmail
git config --global user.email "\"$usrmail\""
;;
"Aktuellen Stand des lokalen GIT repository anschauen.")
git status
;;
#"See the changes in the files of your local GIT repository.")
# if [[ $(git diff) ]]; then
# echo "To close the following list of differences press 'q'"
# git diff
# else
# echo "There are no changes in your local files."
# fi
# read -n 1 -s -r -p "Press any key to continue"
# ;;
"Lokales GIT repository auf den neuesten Stand bringen.")
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "Das lokale GIT repository kann nicht auf den neuesten Stand gebracht werden, weil ein GIT-Konflikt vorliegt." >> $conflictPath
echo "Der GIT-Konflikt muss manuell gelöst werden." >> $conflictPath
read
exit 1
else
if [ -z "$(git status --porcelain)" ]
then
echo "Es liegen keine lokalen Änderungen vor, das updaten wird vorbereitet."
git fetch
git pull $remote $branch
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "Es liegt ein GIT-Konflikt vor. Der Vorgang wird abgebrochen."
git merge --abort
echo "Der GIT-Konflikt muss manuell gelöst werden in $directory." >> $conflictPath
read
else
echo "Es liegt kein GIT-Konflikt vor, das GIT repository wird auf den neuesten Stand gebracht."
fi
if [ -z "$(git status --porcelain)" ]
then
echo "Der Vorgang war erfolgreich."
else
git status
echo "Achtung: Der pull konnte nicht durchgeführt werden. "
git status >> $conflictPath
echo "Achtung: Der pull konnte nicht durchgeführt werden. " >> $conflictPath
read
fi
else
git status
echo "Es liegen lokale Änderungen vor. Diese müssen zuerst entfernt oder mithilfe des GIT Assistenten gespeichert werden, bevor das GIT repository auf den neuesten Stand gebracht werden kann."
read
fi
fi
;;
"Alle ausgeführten Änderungen speichern, eine Nachricht hinzufügen, diese Änderungen beim main GIT repository veröffentlichen und das lokale GIT repository auf den neuesten Stand bringen.")
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "Der GIT-Konflikt muss manuell gelöst werden." >> $conflictPath
read
exit 1
else
echo "No merge conflict to begin with."
#show all the files that are changed and ask if they should be added
echo "The files that are changed are:"
git status
read -p "Sollen folgende geänderte Dateien im main GIT repository veröffentlicht werden? (j/n)" yn
case $yn in
[YyJj]* )
while true; do
read -p "Commit Nachricht eingeben: " message
echo "Die Commit Nachricht ist: $message"
read -p "Ist die Nachricht korrekt? (j/n)" yn2
case $yn2 in
[YyJj]* ) break;;
[Nn]* ) echo "Bitte Nachricht erneut eingeben";;
* ) echo "Bitte mit j (ja) oder n (nein) antworten.";;
esac
done
git add -A
git commit -m "$message"
#read -p "The files will be formatted automatically, do you want to proceed? (y/n)" yn
#case $yn in
# [YyJj]* ) java -Xmx3g -jar $corpusServicesJar -i $directory -o $directory/prettyprint-output.html -c PrettyPrintData -f
java -Xmx3g -jar $corpusServicesJar -i $directory -o $directory/prettyprint-output.html -c PrettyPrintData -f
git add -A
git reset -- curation/CorpusServices_Errors.xml
git checkout curation/CorpusServices_Errors.xml
git commit -am "Automatically pretty printed on $today"
git fetch
git pull $remote $branch
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "Es liegt ein GIT-Konflikt vor. Der Vorgang wird abgebrochen."
git merge --abort
echo "Der GIT-Konflikt muss manuell gelöst werden in $directory." >> $conflictPath
echo "Der Vorgang wurde abgebrochen und der GIT Assistent wird geschlossen."
read
exit
else
echo "Merging war erfolgreich bzw. nicht nötig."
git push $remote $branch
fi
if [ -z "$(git status --porcelain)" ]
then
echo "Der Vorgang war erfolgreich."
read
else
git status
echo "Achtung: Der pull konnte nicht durchgeführt werden. "
git status >> $conflictPath
echo "Achtung: Der pull konnte nicht durchgeführt werden. " >> $conflictPath
read
fi
# break;;
# [Nn]* ) echo "The process was stopped and GIT Helper will be closed."
# read
# exit;;
# * ) echo "Please answer yes or no.";;
# esac
;;
[Nn]* ) echo "Der Vorgang wurde abgebrochen.";;
* ) echo "Bitte mit j (ja) oder n (nein) antworten.";;
esac
fi
;;
"Hilfe!")
clear
echo "Dieses Skript kann genutzt werden, um Änderungen, die lokal gemacht wurden, dem main GIT repository hinzuzufügen, sodass alle, die mit den Daten arbeiten, sie sehen und nutzen können. "
echo "Bitte ENTER drücken um das Menü anzuzeigen und die Nummer des Vorgangs, der ausgeführt werden soll, eingeben und mit ENTER bestätigen. "
echo "Wenn ein GIT-Konflikt oder ein Fehler auftritt bitte beim technischen Team melden. "
echo " "
echo "GitHelper Version: 1.1"
echo "Git Version:"
git --version
echo "Java Version:"
java -version
echo "Corpus-Services version: " $corpusServicesJar
if [ -f "$corpusServicesJar" ]; then
echo "$corpusServicesJar wurde gefunden. "
else
echo "FEHLER: $corpusServicesJar wurde nicht gefunden. "
fi
;;
"Beenden")
clear
break
;;
*) echo "Die Option $REPLY ist nicht verfügbar.";;
esac
done
#!/bin/bash
echo "
________._________________ ___ .__
/ _____/| \__ ___/ | \ ____ | | ______ ___________
/ \ ___| | | | / ~ \_/ __ \| | \____ \_/ __ \_ __ \
\ \_\ \ | | | \ Y /\ ___/| |_| |_> > ___/| | \/
\______ /___| |____| \___|_ / \___ >____/ __/ \___ >__|
\/ \/ \/ |__| \/
Welcome to GIT
"
today=$(date)
timestamp=$(date +%s)
remote="origin"
branch="master"
conflictPath="${timestamp}-git-conflict.txt"
SCRIPT=`realpath $0`
directory=`dirname $SCRIPT`
corpusServicesJar="../hzsk-corpus-services-0.2-jar-with-dependencies.jar"
#options=("Viewing your current GIT configuration." "Setting up your GIT configuration." "See the current state of your local GIT repository." "See the changes in the files of your local GIT repository." "Update your local GIT repository." "Save all your changes, add a message, publish your changes to the main GIT repository and update your local GIT repository." "Help!" "Quit")
options=("See the current state of your local GIT repository." "Update your local GIT repository." "Save all your changes, add a message, publish your changes to the main GIT repository and update your local GIT repository." "Viewing your current GIT configuration." "Setting up your GIT configuration." "Help!" "Quit")
PS3="
Please choose an option (1-${#options[@]}) or press ENTER to display menu: "
select opt in "${options[@]}"
do
case $opt in
"Viewing your current GIT configuration.")
echo "Your username is:"
git config user.name
echo "Your email is:"
git config user.email
;;
"Setting up your GIT configuration.")
read -p "Enter your user name: " usrname
git config --global user.name "\"$usrname\""
read -p "Enter your email: " usrmail
git config --global user.email "\"$usrmail\""
;;
"See the current state of your local GIT repository.")
git status
;;
#"See the changes in the files of your local GIT repository.")
# if [[ $(git diff) ]]; then
# echo "To close the following list of differences press 'q'"
# git diff
# else
# echo "There are no changes in your local files."
# fi
# read -n 1 -s -r -p "Press any key to continue"
# ;;
"Update your local GIT repository.")
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "The local GIT repository cannot be updated because of a GIT conflict." >> $conflictPath
echo "Please resolve merge conflict manually." >> $conflictPath
read
exit 1
else
if [ -z "$(git status --porcelain)" ]
then
echo "There are no local changes, updating will be started."
git fetch
git pull $remote $branch
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "There is a merge conflict. Aborting"
git merge --abort
echo "Please resolve merge conflict manually in $directory." >> $conflictPath
read
else
echo "There are no merge conflicts, updating will be carried out."
fi
if [ -z "$(git status --porcelain)" ]
then
echo "Everything was successful."
else
git status
echo "The pull was faulty. Please fix it. "
git status >> $conflictPath
echo "The pull was faulty. Please fix it. " >> $conflictPath
read
fi
else
git status
echo "There are local changes. Please remove or save them with the GIT Helper before updating."
read
fi
fi
;;
"Save all your changes, add a message, publish your changes to the main GIT repository and update your local GIT repository.")
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "Please resolve merge conflict manually." >> $conflictPath
read
exit 1
else
echo "No merge conflict to begin with."
#show all the files that are changed and ask if they should be added
echo "The files that are changed are:"
git status
read -p "Do you want to add these changes to the main GIT repository? (y/n)" yn
case $yn in
[Yy]* )
while true; do
read -p "Enter your commit message: " message
echo "Your commit message is: $message"
read -p "Is the message correct? (y/n)" yn2
case $yn2 in
[YyJj]* ) break;;
[Nn]* ) echo "Please enter the message again";;
* ) echo "Please answer yes or no.";;
esac
done
git add -A
git commit -m "$message"
#read -p "The files will be formatted automatically, do you want to proceed? (y/n)" yn
#case $yn in
# [Yy]* ) java -Xmx3g -jar $corpusServicesJar -i $directory -o $directory/prettyprint-output.html -c PrettyPrintData -f
java -Xmx3g -jar $corpusServicesJar -i $directory -o $directory/prettyprint-output.html -c PrettyPrintData -f
git add -A
git reset -- curation/CorpusServices_Errors.xml
git checkout curation/CorpusServices_Errors.xml
git commit -am "Automatically pretty printed on $today"
git fetch
git pull $remote $branch
CONFLICTS=$(git ls-files -u | wc -l)
if [ "$CONFLICTS" -gt 0 ]
then
echo "There is a merge conflict. Aborting"
git merge --abort
echo "Please resolve merge conflict manually in $directory." >> $conflictPath
echo "The process was stopped and GIT Helper will be closed."
read
exit
else
echo "Merging was successful or not needed."
git push $remote $branch
fi
if [ -z "$(git status --porcelain)" ]
then
echo "Everything was successful."
read
else
git status
echo "The pull was faulty. Please fix it. "
git status >> $conflictPath
echo "The pull was faulty. Please fix it. " >> $conflictPath
read
fi
# break;;
# [Nn]* ) echo "The process was stopped and GIT Helper will be closed."
# read
# exit;;
# * ) echo "Please answer yes or no.";;
# esac
;;
[Nn]* ) echo "The process was stopped.";;
* ) echo "Please answer yes or no.";;
esac
fi
;;
"Help!")
clear
echo "This script can be used to add changes you made to the main GIT repository so everyone working with the data can see them."
echo "Please press ENTER to display the menu and enter the number of the option you want to use."
echo "If there is a conflict or something goes wrong, please contact the technical team."
echo " "
echo "GitHelper version: 1.1"
echo "Git version:"
git --version
echo "Java version:"
java -version
echo "corpus-services version: " $corpusServicesJar
if [ -f "$corpusServicesJar" ]; then
echo "$corpusServicesJar exists"
else
echo "ERROR: $corpusServicesJar does not exist"
fi
;;
"Quit")
clear
break
;;
*) echo "The option $REPLY is not available";;
esac
done
images/CorpusServicesBackendNeu.png

175 KiB

File added
pom.xml 0 → 100644
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>de.uni_hamburg.corpora</groupId>
<artifactId>hzsk-corpus-services</artifactId>
<version>0.2</version>
<packaging>jar</packaging>
<name>hzsk-corpus-services</name>
<url>http://maven.apache.org</url>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.2</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<!--
https://stackoverflow.com/questions/574594/how-can-i-create-an-executable-jar-with-dependencies-using-maven
-->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>de.uni_hamburg.corpora.CorpusMagician</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
<!-- Doxygen plugin -->
<plugin>
<groupId>com.soebes.maven.plugins</groupId>
<artifactId>doxygen-maven-plugin</artifactId>
<version>1.1.0</version>
<configuration>
<quiet>false</quiet>
<projectName>${project.artifactId}</projectName>
<projectNumber>${project.version}</projectNumber>
<optimizeOutputJava>true</optimizeOutputJava>
<extractAll>true</extractAll>
<extractStatic>true</extractStatic>
<recursive>true</recursive>
<exclude>.git</exclude>
<recursive>true</recursive>
<umlLook>true</umlLook>
<haveGraph>true</haveGraph>
<generateLatex>false</generateLatex>
<generateHtml>true</generateHtml>
<outputDirectory>${basedir}/doc</outputDirectory>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>src/main/java/de/uni_hamburg/corpora/validation/resources</directory>
</resource>
<resource>
<directory>src/main/java/de/uni_hamburg/corpora/visualization/resources</directory>
</resource>
<resource>
<directory>src/main/java/de/uni_hamburg/corpora/conversion/resources</directory>
</resource>
<resource>
<directory>src/test/java/de/uni_hamburg/corpora/utilities/resources</directory>
</resource>
<resource>
<directory>src/test/java/de/uni_hamburg/corpora/resources</directory>
</resource>
</resources>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.exmaralda</groupId>
<artifactId>exmaralda</artifactId>
<version>1.10</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>1.1.3</version>
</dependency>
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
<version>9.8.0-6</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
<version>1.1.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli for validator CLI -->
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.3.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-lang/commons-lang -->
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.ini4j/ini4j testing using ini as configuration file format-->
<dependency>
<groupId>org.ini4j</groupId>
<artifactId>ini4j</artifactId>
<version>0.5.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api
for validator servlet e.g. -->
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.1.0</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.reflections/reflections -->
<dependency>
<groupId>org.reflections</groupId>
<artifactId>reflections</artifactId>
<version>0.9.11</version>
</dependency>
<!-- https://github.com/phax/ph-schematron -->
<dependency>
<groupId>com.helger</groupId>
<artifactId>ph-schematron</artifactId>
<version>5.0.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jdom/jdom
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>2.0.2</version>
</dependency>
-->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.2</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.languagetool</groupId>
<artifactId>language-de</artifactId>
<version>4.2</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
<type>jar</type>
</dependency>
</dependencies>
</project>
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
/**
*
* @author fsnv625
*/
class AdditionalData {
}
package de.uni_hamburg.corpora;
import de.uni_hamburg.corpora.utilities.PrettyPrinter;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.io.FilenameUtils;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.SAXException;
/**
*
* @author fsnv625
*/
class AnnotationSpecification implements CorpusData, XMLData {
String originalstring;
Document jdom;
URL url;
URL parenturl;
String filename;
String filenamewithoutending;
public AnnotationSpecification(URL url) {
try {
this.url = url;
SAXBuilder builder = new SAXBuilder();
jdom = builder.build(url);
originalstring = new String(Files.readAllBytes(Paths.get(url.toURI())), "UTF-8");
URI uri = url.toURI();
URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
parenturl = parentURI.toURL();
filename = FilenameUtils.getName(url.getPath());
filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
} catch (JDOMException ex) {
Logger.getLogger(CmdiData.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(CmdiData.class.getName()).log(Level.SEVERE, null, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(AnnotationSpecification.class.getName()).log(Level.SEVERE, null, ex);
}
}
@Override
public URL getURL() {
return url;
}
@Override
public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
return toPrettyPrintedXML();
}
@Override
public String toUnformattedString() {
return originalstring;
}
private String toPrettyPrintedXML() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
PrettyPrinter pp = new PrettyPrinter();
String prettyCorpusData = pp.indent(toUnformattedString(), "event");
//String prettyCorpusData = pp.indent(bt.toXML(bt.getTierFormatTable()), "event");
return prettyCorpusData;
}
@Override
public void updateUnformattedString(String newUnformattedString) {
originalstring = newUnformattedString;
}
@Override
public URL getParentURL() {
return parenturl;
}
@Override
public void setURL(URL nurl) {
url = nurl;
}
@Override
public void setParentURL(URL url) {
parenturl = url;
}
@Override
public String getFilename() {
return filename;
}
@Override
public void setFilename(String s) {
filename = s;
}
@Override
public String getFilenameWithoutFileEnding() {
return filenamewithoutending;
}
@Override
public void setFilenameWithoutFileEnding(String s) {
filenamewithoutending = s;
}
@Override
public Document getJdom() {
return jdom;
}
@Override
public void setJdom(Document njdom) {
jdom = njdom;
}
}
/**
* @file BasicTranscriptionData.java
*
* Connects BasicTranscription from Exmaralda to HZSK corpus services.
*
* @author Tommi A Pirinen <tommi.antero.pirinen@uni-hamburg.de>
* @author HZSK
*/
package de.uni_hamburg.corpora;
import de.uni_hamburg.corpora.utilities.PrettyPrinter;
import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
import java.io.File;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.jdom.Document;
import org.jdom.input.SAXBuilder;
import org.xml.sax.SAXException;
import org.jdom.JDOMException;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.io.FilenameUtils;
import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
/**
* Provides access to basic transcriptions as a data type that can be read and
* written HZSK corpus services. Naming might change, depending on what it ends
* up being implemented as. It seems to me like a bridge now, or just aggregate.
*/
public class BasicTranscriptionData implements CorpusData, ContentData, XMLData {
private BasicTranscription bt;
URL url;
Document jdom = new Document();
String originalstring;
URL parenturl;
String filename;
String filenamewithoutending;
public BasicTranscriptionData() {
}
public BasicTranscriptionData(URL url) {
try {
this.url = url;
SAXBuilder builder = new SAXBuilder();
jdom = builder.build(url);
File f = new File(url.toURI());
loadFile(f);
originalstring = new String(Files.readAllBytes(Paths.get(url.toURI())), "UTF-8");
URI uri = url.toURI();
URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
parenturl = parentURI.toURL();
filename = FilenameUtils.getName(url.getPath());
filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
} catch (JDOMException ex) {
Logger.getLogger(UnspecifiedXMLData.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(UnspecifiedXMLData.class.getName()).log(Level.SEVERE, null, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(BasicTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
} catch (SAXException ex) {
Logger.getLogger(BasicTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
} catch (JexmaraldaException ex) {
Logger.getLogger(BasicTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* loads basic transcription from file. Some versions of exmaralda this
* emits a harmless message to stdout.
*/
public void loadFile(File f) throws SAXException, JexmaraldaException, MalformedURLException {
//we want to read the BasicTranscription as it is without resolving the paths!
//bt = new BasicTranscription(f.getAbsolutePath());
org.exmaralda.partitureditor.jexmaralda.sax.BasicTranscriptionSaxReader reader = new org.exmaralda.partitureditor.jexmaralda.sax.BasicTranscriptionSaxReader();
BasicTranscription t = new BasicTranscription();
t = reader.readFromFile(f.getAbsolutePath());
bt = t;
url = f.toURI().toURL();
}
/*
* uses the field of the Exmaralda Basic transcription to update the jdom field
*/
public void updateJdomDoc() throws SAXException, JexmaraldaException, MalformedURLException, JDOMException, IOException {
String xmlString = bt.toXML();
SAXBuilder builder = new SAXBuilder();
jdom = builder.build(xmlString);
}
/*
private String toPrettyPrintedXML() throws SAXException, JDOMException,
IOException, UnsupportedEncodingException {
String xmlString = bt.toXML();
// this is a bit ugly workaround:
SAXBuilder builder = new SAXBuilder();
Document xmlDoc = builder.build(new StringReader(xmlString));
// FIXME: make HZSK format somewhere
Format hzskFormat = Format.getPrettyFormat();
hzskFormat.setIndent("\t");
XMLOutputter xmlout = new XMLOutputter(hzskFormat);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
xmlout.output(xmlDoc, baos);
return new String(baos.toByteArray(), "UTF-8");
}
*/
//I just use the hzsk-corpus-services\src\main\java\de\ uni_hamburg\corpora\
//utilities\PrettyPrinter.java here to pretty print the files, so they
//will always get pretty printed in the same way
//TODO
private String toPrettyPrintedXML() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException{
PrettyPrinter pp = new PrettyPrinter();
String prettyCorpusData = pp.indent(toUnformattedString(), "event");
//String prettyCorpusData = pp.indent(bt.toXML(bt.getTierFormatTable()), "event");
return prettyCorpusData;
}
public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
return toPrettyPrintedXML();
}
public static void main(String[] args) {
if ((args.length != 2) && (args.length != 1)) {
System.out.println("Usage: "
+ BasicTranscriptionData.class.getName()
+ " INPUT [OUTPUT]");
System.exit(1);
}
try {
BasicTranscriptionData btd = new BasicTranscriptionData();
btd.loadFile(new File(args[0]));
String prettyXML = btd.toSaveableString();
boolean emplace = false;
PrintWriter output;
if (args.length == 2) {
output = new PrintWriter(args[1]);
} else {
// FIXME: reaö temp
output = new PrintWriter("tempfile.exb");
emplace = true;
}
output.print(prettyXML);
output.close();
if (emplace) {
Files.move(Paths.get("tempfile.exb"), Paths.get(args[0]),
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
}
} catch (SAXException saxe) {
saxe.printStackTrace();
System.exit(1);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
System.exit(1);
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(1);
} catch (JexmaraldaException je) {
je.printStackTrace();
System.exit(1);
} catch (TransformerException ex) {
ex.printStackTrace();
System.exit(1);
} catch (ParserConfigurationException ex) {
ex.printStackTrace();
System.exit(1);
} catch (XPathExpressionException ex) {
ex.printStackTrace();
System.exit(1);
}
}
@Override
public URL getURL() {
return url;
}
public Document getReadbtasjdom() {
return jdom;
}
@Override
public String toUnformattedString() {
return originalstring;
}
@Override
public void updateUnformattedString(String newUnformattedString) {
originalstring = newUnformattedString;
}
public BasicTranscription getEXMARaLDAbt() {
return bt;
}
public void setEXMARaLDAbt(BasicTranscription btn) {
bt = btn;
}
public void setOriginalString(String s) {
originalstring = s;
}
@Override
public Document getJdom() {
return getReadbtasjdom();
}
@Override
public void setJdom(Document doc) {
jdom = doc;
}
public void setReadbtasjdom(Document doc) {
setJdom(doc);
}
@Override
public URL getParentURL() {
return parenturl;
}
@Override
public void setURL(URL nurl) {
url = nurl;
}
@Override
public void setParentURL(URL url) {
parenturl = url;
}
@Override
public String getFilename() {
return filename;
}
@Override
public void setFilename(String s) {
filename = s;
}
@Override
public String getFilenameWithoutFileEnding() {
return filenamewithoutending;
}
@Override
public void setFilenameWithoutFileEnding(String s) {
filenamewithoutending = s;
}
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
import de.uni_hamburg.corpora.utilities.PrettyPrinter;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.io.FilenameUtils;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.SAXException;
/**
*
* @author Ozzy
*/
public class CmdiData implements CorpusData, XMLData, Metadata {
Document jdom;
URL url;
String originalstring;
URL parenturl;
String filename;
String filenamewithoutending;
public CmdiData(URL url) {
try {
this.url = url;
SAXBuilder builder = new SAXBuilder();
jdom = builder.build(url);
originalstring = new String(Files.readAllBytes(Paths.get(url.toURI())), "UTF-8");
URI uri = url.toURI();
URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
parenturl = parentURI.toURL();
filename = FilenameUtils.getName(url.getPath());
filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
} catch (JDOMException ex) {
Logger.getLogger(CmdiData.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(CmdiData.class.getName()).log(Level.SEVERE, null, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(CmdiData.class.getName()).log(Level.SEVERE, null, ex);
}
}
@Override
public URL getURL() {
return url;
}
@Override
public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException{
return toPrettyPrintedXML();
}
@Override
public String toUnformattedString() {
return originalstring;
}
private String toPrettyPrintedXML() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException{
PrettyPrinter pp = new PrettyPrinter();
String prettyCorpusData = pp.indent(toUnformattedString(), "event");
//String prettyCorpusData = indent(bt.toXML(bt.getTierFormatTable()), "event");
return prettyCorpusData;
}
@Override
public void updateUnformattedString(String newUnformattedString) {
originalstring = newUnformattedString;
}
@Override
public URL getParentURL() {
return parenturl;
}
@Override
public Document getJdom() {
return jdom;
}
@Override
public void setJdom(Document doc) {
jdom = doc;
}
@Override
public void setURL(URL nurl) {
url = nurl;
}
@Override
public void setParentURL(URL url) {
parenturl = url;
}
@Override
public String getFilename() {
return filename;
}
@Override
public void setFilename(String s) {
filename = s;
}
@Override
public String getFilenameWithoutFileEnding() {
return filenamewithoutending;
}
@Override
public void setFilenameWithoutFileEnding(String s) {
filenamewithoutending = s;
}
@Override
public Collection<URL> getReferencedCorpusDataURLs() {
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
import de.uni_hamburg.corpora.utilities.PrettyPrinter;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collection;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.SAXException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.jdom.Element;
import org.jdom.xpath.XPath;
import org.apache.commons.io.FilenameUtils;
/**
*
* @author fsnv625
*/
public class ComaData implements Metadata, CorpusData, XMLData {
//TODO
//private Coma coma;
//TODO change exceptions to adding ReportItems
URL url;
Document readcomaasjdom = new Document();
String originalstring;
String filename;
String filenamewithoutending;
public URL CORPUS_BASEDIRECTORY;
public static String SEGMENTED_FILE_XPATH = "//Transcription[Description/Key[@Name='segmented']/text()='true']/NSLink";
public static String BASIC_FILE_XPATH = "//Transcription[Description/Key[@Name='segmented']/text()='false']/NSLink";
public static String ALL_FILE_XPATH = "//Transcription/NSLink";
public ArrayList<URL> referencedCorpusDataURLs;
public ComaData() {
}
public ComaData(URL url) {
try {
this.url = url;
SAXBuilder builder = new SAXBuilder();
readcomaasjdom = builder.build(url);
File f = new File(url.toURI());
originalstring = new String(Files.readAllBytes(Paths.get(url.toURI())), "UTF-8");
//loadFile(f);
URI uri = url.toURI();
URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
CORPUS_BASEDIRECTORY = parentURI.toURL();
filename = FilenameUtils.getName(url.getPath());
filenamewithoutending = FilenameUtils.getBaseName(url.getPath());
} catch (JDOMException ex) {
Logger.getLogger(UnspecifiedXMLData.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(UnspecifiedXMLData.class.getName()).log(Level.SEVERE, null, ex);
} catch (URISyntaxException ex) {
Logger.getLogger(BasicTranscriptionData.class.getName()).log(Level.SEVERE, null, ex);
}
}
//TODO
// public void loadFile(File f) throws SAXException, JexmaraldaException, MalformedURLException {
// coma = new BasicTranscription(f.getAbsolutePath());
// url = f.toURI().toURL();
// }
//TODO
/*
public void updateReadcomaasjdom() throws SAXException, JexmaraldaException, MalformedURLException, JDOMException, IOException {
String xmlString = bt.toXML();
SAXBuilder builder = new SAXBuilder();
readbtasjdom = builder.build(xmlString);
}
*/
@Override
public URL getURL() {
return url;
}
@Override
public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
return toPrettyPrintedXML();
}
private String toPrettyPrintedXML() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
PrettyPrinter pp = new PrettyPrinter();
String prettyCorpusData = pp.indent(toUnformattedString(), "event");
//String prettyCorpusData = pp.indent(bt.toXML(bt.getTierFormatTable()), "event");
return prettyCorpusData;
}
@Override
public String toUnformattedString() {
return originalstring;
}
//TODO!
public Collection<URL> getReferencedCorpusDataURLs() {
//now read the NSLinks and add the URLs from the files
//we need to have different ArrayLists for exb, exs, audio, pdf
//TODO!
return referencedCorpusDataURLs;
}
public ArrayList<URL> getAllBasicTranscriptionURLs() throws MalformedURLException, URISyntaxException {
try {
URL resulturl;
ArrayList<URL> resulturls = new ArrayList<>();
XPath xpath = XPath.newInstance(BASIC_FILE_XPATH);
List transcriptionList = xpath.selectNodes(readcomaasjdom);
for (int pos = 0; pos < transcriptionList.size(); pos++) {
Element nslink = (Element) (transcriptionList.get(pos));
//String fullTranscriptionName = CORPUS_BASEDIRECTORY.toURI().getPath() + nslink.getText();
resulturl = new URL (CORPUS_BASEDIRECTORY + nslink.getText());
//Paths.get(fullTranscriptionName).toUri().toURL();
resulturls.add(resulturl);
}
return resulturls;
} catch (JDOMException ex) {
ex.printStackTrace();
}
return null;
}
public ArrayList<String> getAllBasicTranscriptionFilenames() {
try {
ArrayList<String> result = new ArrayList<>();
XPath xpath = XPath.newInstance(BASIC_FILE_XPATH);
List transcriptionList = xpath.selectNodes(readcomaasjdom);
for (int pos = 0; pos < transcriptionList.size(); pos++) {
Element nslink = (Element) (transcriptionList.get(pos));
// currentElement = nslink;
// String fullTranscriptionName = CORPUS_BASEDIRECTORY + "\\" +
// nslink.getText();
result.add(nslink.getText());
//resulturl = Paths.get(nslink.getText()).toUri().toURL();
//resulturls.add(resulturl);
}
return result;
} catch (JDOMException ex) {
ex.printStackTrace();
}
return null;
}
public void updateUnformattedString(String newUnformattedString) {
originalstring = newUnformattedString;
}
public void setBaseDirectory(URL url) {
CORPUS_BASEDIRECTORY = url;
}
public URL getBasedirectory() throws URISyntaxException, MalformedURLException {
URI uri = url.toURI();
URI parentURI = uri.getPath().endsWith("/") ? uri.resolve("..") : uri.resolve(".");
CORPUS_BASEDIRECTORY = parentURI.toURL();
return CORPUS_BASEDIRECTORY;
}
@Override
public URL getParentURL() {
return CORPUS_BASEDIRECTORY;
}
@Override
public void setURL(URL nurl) {
url = nurl;
}
@Override
public void setParentURL(URL url) {
CORPUS_BASEDIRECTORY = url;
}
@Override
public String getFilename() {
return filename;
}
@Override
public void setFilename(String s) {
filename = s;
}
@Override
public String getFilenameWithoutFileEnding() {
return filenamewithoutending;
}
@Override
public void setFilenameWithoutFileEnding(String s) {
filenamewithoutending = s;
}
@Override
public Document getJdom() {
return readcomaasjdom;
}
@Override
public void setJdom(Document jdom) {
readcomaasjdom = jdom;
}
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
/**
*
* @author fsnv625
*/
class ConfigParameters {
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
/**
*
* @author fsnv625
*/
interface ContentData extends CorpusData{
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
import java.net.URL;
import java.util.Collection;
import de.uni_hamburg.corpora.CorpusData;
import de.uni_hamburg.corpora.CorpusIO;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
import org.xml.sax.SAXException;
/**
*
* @author fsnv625
*/
public class Corpus {
//only the metadata file, coma or cmdi in most cases, or a list of files
Collection<Metadata> metadata = new ArrayList();
//the transcriptions
Collection<ContentData> contentdata = new ArrayList();
Collection<Recording> recording = new ArrayList();
Collection<AdditionalData> additionaldata = new ArrayList();
Collection<AnnotationSpecification> annotationspecification = new ArrayList();
Collection<ConfigParameters> configparameters = new ArrayList();
private Collection<CmdiData> cmdidata = new ArrayList();
//all the data together
Collection<CorpusData> cdc;
public Corpus() {
}
public Corpus(URL url) throws MalformedURLException, MalformedURLException, MalformedURLException, SAXException, JexmaraldaException, URISyntaxException, IOException {
CorpusIO cio = new CorpusIO();
cdc = cio.read(url);
for (CorpusData cd : cdc) {
if (cd instanceof ContentData) {
contentdata.add((ContentData) cd);
} else if (cd instanceof Recording) {
recording.add((Recording) cd);
} else if (cd instanceof AdditionalData) {
additionaldata.add((AdditionalData) cd);
} else if (cd instanceof Metadata) {
metadata.add((Metadata) cd);
} else if (cd instanceof AnnotationSpecification) {
annotationspecification.add((AnnotationSpecification) cd);
} else if (cd instanceof ConfigParameters) {
configparameters.add((ConfigParameters) cd);
} else if (cd instanceof CmdiData) {
cmdidata.add((CmdiData) cd);
}
}
//and also the other collections maybe
}
public Collection<CorpusData> getCorpusData() {
return cdc;
}
public Collection<Metadata> getMetadata() {
return metadata;
}
public Collection<ContentData> getContentdata() {
return contentdata;
}
public Collection<Recording> getRecording() {
return recording;
}
public Collection<AdditionalData> getAdditionaldata() {
return additionaldata;
}
public Collection<AnnotationSpecification> getAnnotationspecification() {
return annotationspecification;
}
public Collection<ConfigParameters> getConfigparameters() {
return configparameters;
}
public Collection<CmdiData> getCmdidata() {
return cmdidata;
}
public void setMetadata(Collection<Metadata> metadata) {
this.metadata = metadata;
}
public void setContentdata(Collection<ContentData> contentdata) {
this.contentdata = contentdata;
}
public void setRecording(Collection<Recording> recording) {
this.recording = recording;
}
public void setAdditionaldata(Collection<AdditionalData> additionaldata) {
this.additionaldata = additionaldata;
}
public void setAnnotationspecification(Collection<AnnotationSpecification> annotationspecification) {
this.annotationspecification = annotationspecification;
}
public void setConfigparameters(Collection<ConfigParameters> configparameters) {
this.configparameters = configparameters;
}
public void setCdc(Collection<CorpusData> cdc) {
this.cdc = cdc;
}
public void setCmdidata(Collection<CmdiData> cmdidata) {
this.cmdidata = cmdidata;
}
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package de.uni_hamburg.corpora;
import java.io.IOException;
import java.net.URL;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.xml.sax.SAXException;
/**
*
* @author fsnv625
*/
public interface CorpusData {
public URL getURL();
public void setURL(URL url);
public URL getParentURL();
public void setParentURL(URL url);
public String getFilename();
public void setFilename(String s);
public String getFilenameWithoutFileEnding();
public void setFilenameWithoutFileEnding(String s);
public String toSaveableString() throws TransformerException, ParserConfigurationException, SAXException, IOException, XPathExpressionException;
public String toUnformattedString();
//needed if there were changes to the file so they are represented in the object too
public void updateUnformattedString(String newUnformattedString);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment