Select Git revision
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Affix.java 3.64 KiB
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class Affix
{
private Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
private Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
private ArrayList<String> filteredWords = new ArrayList<String>();
private String wordclass = "";
private String affixtype = "";
private int startdate = 0;
private int enddate = 0;
private String app_key = "";
private String app_id = "";
private String resultPath = "";
private Corpus corpus;
private String corpusName = "";
private String corpusPeriod = "";
public Affix(ArrayList<String> filteredWords, Corpus corpus, String wordclass, String affixtype, String app_key, String app_id, String resultPath)
{
this.corpus = corpus;
this.filteredWords = filteredWords;
this.affixtype = affixtype;
this.wordclass = wordclass;
this.startdate = corpus.getStartDate();
this.enddate = corpus.getEndDate();
this.app_key = app_key;
this.app_id = app_id;
this.resultPath = resultPath;
this.corpusName = corpus.getCorpusName();
this.corpusPeriod = corpus.getPeriod();
processMorphemes();
}
public Map<String, ArrayList<String>> getMorphemeWordList()
{
return morphemeWordList;
}
public Map<String, ArrayList<String>> getNotInOEDWordList()
{
return notInOEDWordList;
}
private void processMorphemes()
{
Set<String> wordTypes = new HashSet<String>(filteredWords);
//Map<String, Integer> affixMorpheme = new HashMap<String,Integer>();
//Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
//these ints are only for feedback in the System.output
int numberOfWordTypes = wordTypes.size();
int lookups = 0;
for (String word : wordTypes)
{
Map<String,Integer> affixMorpheme = new HashMap<String,Integer>();
OED ox = new OED(word, "", wordclass, affixtype, corpus, app_key, app_id, resultPath);
Set<String> affixInWord = ox.getMorphemesOEDRequest();
lookups++;
if (affixInWord.size() > 0)
{
for (String morpheme : affixInWord)
{
//System.out.println(morpheme);
if (morphemeWordList.containsKey(morpheme))
{
morphemeWordList.get(morpheme).add(word);
}
else
{
ArrayList<String> wordsWithAffix = new ArrayList<String>();
wordsWithAffix.add(word);
morphemeWordList.put(morpheme, wordsWithAffix);
}
}
}
else
{
System.out.println("No Morpheme Representation in OED: " + word);
AffixStripper as = new AffixStripper(word);
if (affixtype.equals("_su01"))
{
affixMorpheme = as.getSuffixMorphem(); //contains all suffix morphemes found in noun
}
else if (affixtype.equals("_pr01"))
{
affixMorpheme = as.getPrefixMorphem(); //contains all prefix morphemes found in noun
}
else
{
System.out.println("Affixtype not known");
}
if (!affixMorpheme.isEmpty())
{
for (String morpheme : affixMorpheme.keySet())
{
if (notInOEDWordList.containsKey(morpheme) && !notInOEDWordList.get(morpheme).equals(word))
{
notInOEDWordList.get(morpheme).add(word);
}
else
{
ArrayList<String> wordsWithAffixEstimate = new ArrayList<String>(); //this list contains only one word
wordsWithAffixEstimate.add(word);
notInOEDWordList.put(morpheme, wordsWithAffixEstimate);
}
}
}
}
System.out.println("Word " + lookups + " from " + numberOfWordTypes + " mapped.");
}
}
}