From 45d619ad3434607481de500bcec7bcb2419c88ff Mon Sep 17 00:00:00 2001
From: Hagen Peukert <hagen.peukert@uni-hamburg.de>
Date: Fri, 1 Apr 2022 10:28:30 +0200
Subject: [PATCH] Lemmatization implemented

---
 Morphochron/src/Init.java       |  5 ++++-
 Morphochron/src/OED.java        | 20 +++++++++++++++-----
 Morphochron/src/SuffixEnum.java |  6 +++---
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/Morphochron/src/Init.java b/Morphochron/src/Init.java
index f1a3941..226c6ab 100644
--- a/Morphochron/src/Init.java
+++ b/Morphochron/src/Init.java
@@ -92,7 +92,10 @@ public class Init
 //									normalizedWords.add("dignitary");
 //									normalizedWords.add("proposition");
 //									normalizedWords.add("daskommtnichtvor");
-//									
+//									normalizedWords.add("annoyaunce");
+//									normalizedWords.add("assygnement");
+//									normalizedWords.add("daskommtnichtvor");
+									
 								
 									frame.setMessage("All words of type " + wordclass + " selected\n");
 									//detect affixes in word list as a pre-processing and countercheck these with OED REST API
diff --git a/Morphochron/src/OED.java b/Morphochron/src/OED.java
index 7369afc..52ea05f 100644
--- a/Morphochron/src/OED.java
+++ b/Morphochron/src/OED.java
@@ -54,6 +54,14 @@ public class OED
 		this.resultPath = resultPath;
 	}
 	
+	/*
+	 * gets lemmatized word representation of OED REST API as JSON object
+	 */
+	private String getRESTAPILemmatizedWord(String word)
+	{
+		return "https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/lemmatize/?form=" + word;// + "&part_of_speech=" + wordclass;
+	}
+	
 	/*
 	 * gets word representation of OED REST API as JSON object
 	 */
@@ -80,11 +88,11 @@ public class OED
 		JSONArray arr = obj.getJSONArray("data"); 
 		for (int i = 0; i < arr.length(); i++)
 		{
-		    wordid = arr.getJSONObject(i).getString("id");
+		    wordid = arr.getJSONObject(i).getJSONObject("word").getString("id");
 		    //System.out.println("Wort-ID:" + wordid);
-		    if (wordid.equals(word + wordclass)) break; //words may be part of several word classes
+		    if (wordid.endsWith(wordclass)) break; //words may be part of several word classes
 		}
-		
+
 		return wordid;
 	}
 	
@@ -181,7 +189,8 @@ public class OED
 	{
 		//Map<String, Integer> oedData = new HashMap<String,Integer>();
 		Boolean entryAvailable = false;
-		String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
+		//String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
+		String wordJSON = getRESTAPILemmatizedWord(word.toLowerCase());
 		JSONObject jo = getJSonResponse(wordJSON);
 		String id = processJSonWordID(jo);
 		if (!id.isEmpty())
@@ -199,7 +208,8 @@ public class OED
 	
 	public Set<String> getMorphemesOEDRequest()
 	{
-		String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
+		//String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
+		String wordJSON = getRESTAPILemmatizedWord(word.toLowerCase());
 		JSONObject jo = getJSonResponse(wordJSON);
 		String id = processJSonWordID(jo);
 		Set<String> affixes = new HashSet<String>();
diff --git a/Morphochron/src/SuffixEnum.java b/Morphochron/src/SuffixEnum.java
index 2093588..8106fb2 100644
--- a/Morphochron/src/SuffixEnum.java
+++ b/Morphochron/src/SuffixEnum.java
@@ -88,9 +88,9 @@ public enum SuffixEnum {
     ward("ward"), wards("wards"), ware("ware"), uaeras("ware"), uaras("ware"), uaro("ware"), waeras("ware"), wara("ware"), waran("ware"),
     waras("ware"), waru("ware"), wearan("ware"), waeren("ware"), warae("ware"), wick("wick"), y("y"), ig("y"), ye("y"), igan("y"), izen("y"),
     ezen("y"), yen("y"), ey("y"), yl("yl"), yne("yne");
-    private final String morpheme;
-
-    //constructor
+	
+	private String morpheme;
+	
     SuffixEnum(String morpheme) {
         this.morpheme = morpheme;
     }
-- 
GitLab