Skip to content
Snippets Groups Projects
Commit 45d619ad authored by Peukert, Dr. Hagen's avatar Peukert, Dr. Hagen
Browse files

Lemmatization implemented

parent d986a052
No related branches found
No related tags found
No related merge requests found
...@@ -92,7 +92,10 @@ public class Init ...@@ -92,7 +92,10 @@ public class Init
// normalizedWords.add("dignitary"); // normalizedWords.add("dignitary");
// normalizedWords.add("proposition"); // normalizedWords.add("proposition");
// normalizedWords.add("daskommtnichtvor"); // normalizedWords.add("daskommtnichtvor");
// // normalizedWords.add("annoyaunce");
// normalizedWords.add("assygnement");
// normalizedWords.add("daskommtnichtvor");
frame.setMessage("All words of type " + wordclass + " selected\n"); frame.setMessage("All words of type " + wordclass + " selected\n");
//detect affixes in word list as a pre-processing and countercheck these with OED REST API //detect affixes in word list as a pre-processing and countercheck these with OED REST API
......
...@@ -54,6 +54,14 @@ public class OED ...@@ -54,6 +54,14 @@ public class OED
this.resultPath = resultPath; this.resultPath = resultPath;
} }
/*
* gets lemmatized word representation of OED REST API as JSON object
*/
private String getRESTAPILemmatizedWord(String word)
{
return "https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/lemmatize/?form=" + word;// + "&part_of_speech=" + wordclass;
}
/* /*
* gets word representation of OED REST API as JSON object * gets word representation of OED REST API as JSON object
*/ */
...@@ -80,9 +88,9 @@ public class OED ...@@ -80,9 +88,9 @@ public class OED
JSONArray arr = obj.getJSONArray("data"); JSONArray arr = obj.getJSONArray("data");
for (int i = 0; i < arr.length(); i++) for (int i = 0; i < arr.length(); i++)
{ {
wordid = arr.getJSONObject(i).getString("id"); wordid = arr.getJSONObject(i).getJSONObject("word").getString("id");
//System.out.println("Wort-ID:" + wordid); //System.out.println("Wort-ID:" + wordid);
if (wordid.equals(word + wordclass)) break; //words may be part of several word classes if (wordid.endsWith(wordclass)) break; //words may be part of several word classes
} }
return wordid; return wordid;
...@@ -181,7 +189,8 @@ public class OED ...@@ -181,7 +189,8 @@ public class OED
{ {
//Map<String, Integer> oedData = new HashMap<String,Integer>(); //Map<String, Integer> oedData = new HashMap<String,Integer>();
Boolean entryAvailable = false; Boolean entryAvailable = false;
String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase()); //String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
String wordJSON = getRESTAPILemmatizedWord(word.toLowerCase());
JSONObject jo = getJSonResponse(wordJSON); JSONObject jo = getJSonResponse(wordJSON);
String id = processJSonWordID(jo); String id = processJSonWordID(jo);
if (!id.isEmpty()) if (!id.isEmpty())
...@@ -199,7 +208,8 @@ public class OED ...@@ -199,7 +208,8 @@ public class OED
public Set<String> getMorphemesOEDRequest() public Set<String> getMorphemesOEDRequest()
{ {
String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase()); //String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
String wordJSON = getRESTAPILemmatizedWord(word.toLowerCase());
JSONObject jo = getJSonResponse(wordJSON); JSONObject jo = getJSonResponse(wordJSON);
String id = processJSonWordID(jo); String id = processJSonWordID(jo);
Set<String> affixes = new HashSet<String>(); Set<String> affixes = new HashSet<String>();
......
...@@ -88,9 +88,9 @@ public enum SuffixEnum { ...@@ -88,9 +88,9 @@ public enum SuffixEnum {
ward("ward"), wards("wards"), ware("ware"), uaeras("ware"), uaras("ware"), uaro("ware"), waeras("ware"), wara("ware"), waran("ware"), ward("ward"), wards("wards"), ware("ware"), uaeras("ware"), uaras("ware"), uaro("ware"), waeras("ware"), wara("ware"), waran("ware"),
waras("ware"), waru("ware"), wearan("ware"), waeren("ware"), warae("ware"), wick("wick"), y("y"), ig("y"), ye("y"), igan("y"), izen("y"), waras("ware"), waru("ware"), wearan("ware"), waeren("ware"), warae("ware"), wick("wick"), y("y"), ig("y"), ye("y"), igan("y"), izen("y"),
ezen("y"), yen("y"), ey("y"), yl("yl"), yne("yne"); ezen("y"), yen("y"), ey("y"), yl("yl"), yne("yne");
private final String morpheme;
//constructor private String morpheme;
SuffixEnum(String morpheme) { SuffixEnum(String morpheme) {
this.morpheme = morpheme; this.morpheme = morpheme;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment