Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
corpus-services
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lange, Dr. Herbert
corpus-services
Commits
91ba13fd
Commit
91ba13fd
authored
3 years ago
by
Lange, Dr. Herbert
Browse files
Options
Downloads
Patches
Plain Diff
add more transcription standards
parent
14aa8d8d
No related branches found
No related tags found
1 merge request
!4
Feature/new checkers
Pipeline
#11550
passed
3 years ago
Stage: test
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/main/java/de/uni_hamburg/corpora/validation/quest/TranscriptionChecker.java
+55
-3
55 additions, 3 deletions
...amburg/corpora/validation/quest/TranscriptionChecker.java
with
55 additions
and
3 deletions
src/main/java/de/uni_hamburg/corpora/validation/quest/TranscriptionChecker.java
+
55
−
3
View file @
91ba13fd
...
@@ -75,7 +75,38 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
...
@@ -75,7 +75,38 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
// "))" // doppelte runde schließende Klammer
// "))" // doppelte runde schließende Klammer
}).
map
((
c
)
->
String
.
valueOf
(
Character
.
toChars
(
Integer
.
decode
(
c
.
replace
(
"U+"
,
"0x"
)))))
}).
map
((
c
)
->
String
.
valueOf
(
Character
.
toChars
(
Integer
.
decode
(
c
.
replace
(
"U+"
,
"0x"
)))))
.
collect
(
Collectors
.
toList
()));
.
collect
(
Collectors
.
toList
()));
private
final
Set
<
String
>
didaSpecial
=
new
HashSet
<>(
Arrays
.
asList
(
"←"
,
"→"
,
"*"
)
);
private
final
Set
<
String
>
gatSpecial
=
new
HashSet
<>(
Arrays
.
asList
(
"("
,
"."
,
")"
,
"("
,
"-"
,
")"
,
"("
,
"-"
,
"-"
,
")"
,
"("
,
"-"
,
"-"
,
"-"
,
")"
,
"'"
,
"?"
,
","
,
"-"
,
";"
,
"."
,
"↑"
,
"↓"
,
"ˋ"
,
"ˊ"
,
"ˉ"
,
"ˆ"
,
"ˇ"
,
"<"
,
">"
)
);
private
final
Set
<
String
>
ipaSpecial
=
new
HashSet
<>(
Arrays
.
asList
(
"ɐ"
,
"ɑ"
,
"ɒ"
,
"ɓ"
,
"ɔ"
,
"ɕ"
,
"ɖ"
,
"ɗ"
,
"ɘ"
,
"ə"
,
"ɚ"
,
"ɛ"
,
"ɜ"
,
"ɝ"
,
"ɞ"
,
"ɟ"
,
"ɠ"
,
"ɡ"
,
"ɢ"
,
"ɣ"
,
"ɤ"
,
"ɥ"
,
"ɦ"
,
"ɧ"
,
"ɨ"
,
"ɩ"
,
"ɪ"
,
"ɫ"
,
"ɬ"
,
"ɭ"
,
"ɮ"
,
"ɯ"
,
"ɰ"
,
"ɱ"
,
"ɲ"
,
"ɳ"
,
"ɴ"
,
"ɵ"
,
"ɶ"
,
"ɷ"
,
"ɸ"
,
"ɹ"
,
"ɺ"
,
"ɻ"
,
"ɼ"
,
"ɽ"
,
"ɾ"
,
"ɿ"
,
"ʀ"
,
"ʁ"
,
"ʂ"
,
"ʃ"
,
"ʄ"
,
"ʅ"
,
"ʆ"
,
"ʇ"
,
"ʈ"
,
"ʉ"
,
"ʊ"
,
"ʋ"
,
"ʌ"
,
"ʍ"
,
"ʎ"
,
"ʏ"
,
"ʐ"
,
"ʑ"
,
"ʒ"
,
"ʓ"
,
"ʔ"
,
"ʕ"
,
"ʖ"
,
"ʗ"
,
"ʘ"
,
"ʙ"
,
"ʚ"
,
"ʛ"
,
"ʜ"
,
"ʝ"
,
"ʞ"
,
"ʟ"
,
"ʠ"
,
"ʡ"
,
"ʢ"
,
"ʣ"
,
"ʤ"
,
"ʥ"
,
"ʦ"
,
"ʧ"
,
"ʨ"
));
/**
/**
* Function to enumerate all alphabetic characters
* Function to enumerate all alphabetic characters
* @return all alphabetic characters in the unicode standard
* @return all alphabetic characters in the unicode standard
...
@@ -100,13 +131,33 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
...
@@ -100,13 +131,33 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
knownGraphemes
.
addAll
(
digitChars
);
knownGraphemes
.
addAll
(
digitChars
);
knownGraphemes
.
addAll
(
hiatSpecial
);
knownGraphemes
.
addAll
(
hiatSpecial
);
}
}
else
if
(
properties
.
getProperty
(
"transcription-method"
).
equalsIgnoreCase
(
"dida"
))
{
knownGraphemes
.
addAll
(
alphaChars
);
knownGraphemes
.
addAll
(
digitChars
);
knownGraphemes
.
addAll
(
didaSpecial
);
}
else
if
(
properties
.
getProperty
(
"transcription-method"
).
equalsIgnoreCase
(
"gat"
))
{
knownGraphemes
.
addAll
(
alphaChars
);
knownGraphemes
.
addAll
(
digitChars
);
knownGraphemes
.
addAll
(
gatSpecial
);
}
else
if
(
properties
.
getProperty
(
"transcription-method"
).
equalsIgnoreCase
(
"ipa"
))
{
knownGraphemes
.
addAll
(
ipaSpecial
);
knownGraphemes
.
addAll
(
Arrays
.
asList
(
"abcdefghijklmnopqrstuvwzyz"
.
split
(
""
))
);
knownGraphemes
.
addAll
(
Arrays
.
asList
(
"abcdefghijklmnopqrstuvwzyz"
.
toUpperCase
().
split
(
""
))
);
}
}
}
}
}
@Override
@Override
public
Report
function
(
CorpusData
cd
,
Boolean
fix
)
throws
NoSuchAlgorithmException
,
ClassNotFoundException
,
FSMException
,
URISyntaxException
,
SAXException
,
IOException
,
ParserConfigurationException
,
JexmaraldaException
,
TransformerException
,
XPathExpressionException
,
JDOMException
{
public
Report
function
(
CorpusData
cd
,
Boolean
fix
)
throws
NoSuchAlgorithmException
,
ClassNotFoundException
,
FSMException
,
URISyntaxException
,
SAXException
,
IOException
,
ParserConfigurationException
,
JexmaraldaException
,
TransformerException
,
XPathExpressionException
,
JDOMException
{
Report
report
=
new
Report
();
Report
report
=
new
Report
();
logger
.
info
(
"Checking: "
+
cd
.
getURL
());
try
{
try
{
// Find transcription tiers
// Find transcription tiers
List
<
Element
>
transcriptionTiers
=
getTranscriptionTiers
(
cd
);
List
<
Element
>
transcriptionTiers
=
getTranscriptionTiers
(
cd
);
...
@@ -175,7 +226,8 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
...
@@ -175,7 +226,8 @@ abstract class TranscriptionChecker extends Checker implements CorpusFunction {
public
Map
<
String
,
String
>
getParameters
()
{
public
Map
<
String
,
String
>
getParameters
()
{
Map
<
String
,
String
>
params
=
super
.
getParameters
();
Map
<
String
,
String
>
params
=
super
.
getParameters
();
params
.
put
(
"transcription-graphemes"
,
"List of transcription graphemes, separated by commas"
);
params
.
put
(
"transcription-graphemes"
,
"List of transcription graphemes, separated by commas"
);
params
.
put
(
"transcription-method"
,
"Standard transcription method used, if any. Currently only HIAT"
);
params
.
put
(
"transcription-method"
,
"Standard transcription method used, if any. Currently HIAT, DIDA, GAT and"
+
" IPA"
);
return
params
;
return
params
;
}
}
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment