Result.java

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class Result 
{
	
	private Corpus cp;
	private String affixtype = "";
	private String wordclass = "";
	private String postprocessingfile = "";
	private String finalresultsfile = "";
	private String resultPath = "";
	private ArrayList<String> filteredWords = new ArrayList<String>();
	private ArrayList<String> allWordsOfCorpus = new ArrayList<String>();
	private Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
	private Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
	
	public Result(Map<String, ArrayList<String>> morphemeWordList, Map<String, ArrayList<String>> notInOEDWordList, Corpus cp, ArrayList<String> filteredWords, String wordtype, String affixtype, String resultPath) throws IOException
	{
		this.morphemeWordList = morphemeWordList; //the mapping between the morpheme and all word types it is contained in
		this.notInOEDWordList = notInOEDWordList;
		this.filteredWords = filteredWords; //these are all the words of the respective word class without tags)
		this.affixtype = affixtype;
		this.wordclass = wordtype;
		this.cp = cp;
		this.allWordsOfCorpus = cp.getCorpus(); //all words of the corpus irrespective of the word class (words contain tags!)
		this.resultPath = resultPath;
		setFileNames();
		//generateDataSet(morphemeWordList);
		writeAllMeasuresFile();
		writePostProcessingFile();
	}
	
	private ArrayList<String> findHapaxes(ArrayList<String> allWordsContainingAffix)
	{
		ArrayList<String> hapaxes = new ArrayList<String>();
		
		for (String wordContainingAffix  : allWordsContainingAffix) 
		{
			if (Collections.frequency(filteredWords, wordContainingAffix)  == 1)
			{
				hapaxes.add(wordContainingAffix);
			}
		}
	
		return hapaxes;
	}
	
	private int calculateNumberOfAffixes(ArrayList<String> allWordsContainingAffix)
	{
		int numberOfAffixInCorpus = 0;
		for (String wordContainingAffix  : allWordsContainingAffix) 
		{
			numberOfAffixInCorpus += Collections.frequency(filteredWords, wordContainingAffix);
		}
	 	
		return numberOfAffixInCorpus;
	}
	
	private double calculateP_Value(int hapaxtypes, int numberOfAffixes)
	{
		double p_value = 0.0;
			if (numberOfAffixes != 0)
			{
				p_value = (double)hapaxtypes / (double)numberOfAffixes; 
			}
	
		return p_value;
	}
	
	private Map<String, Integer> countTokens(ArrayList<String> words)
	{
		Map<String, Integer> frequencyWords = new HashMap<String,Integer>();
		Set<String> wordTypes = new HashSet<String>(words);
						
		for (String key : wordTypes) 
		{
			frequencyWords.put(key, Collections.frequency(words, key));
		    //System.out.println(key + ": " + Collections.frequency(filteredWords, key));
		}

//		for (String word: frequencyWords.keySet()) {
//		    String key = word.toString();
//		    String value = frequencyWords.get(word).toString();
//		    System.out.println(key + " " + value);
//		}
		
		return frequencyWords;
	}
	
	private Set<String> setWordTypes(ArrayList<String> words)
	{
		Set<String> wordTypes = new HashSet<String>(words);
		return wordTypes;
	}
	
	/*
	 * Defines Header as it appears as a heading in the CSC file
	 */
	private String createHeader()
	{
		//more appropriate Naming for Header Output
		if (affixtype.equals("_su01")) affixtype = "Suffixes";
		if (affixtype.equals("_pr01")) affixtype = "Prefixes";
		if (wordclass.equals("_nn01")) wordclass = "Nouns";
		if (wordclass.equals("_vb01")) wordclass = "Verbs";
		if (wordclass.equals("_jj01")) wordclass = "Adjectives";
				
		return  affixtype + ": " + wordclass + " in " + cp.getCorpusName() + "/" + cp.getPeriod() + " (" + cp.getStartDate() + "-" + cp.getEndDate() + ")\n" +
				"Total Types (" + wordclass+ "): " + setWordTypes(filteredWords).size() + " of " + 
				filteredWords.size() + " " + wordclass + " and of " + allWordsOfCorpus.size() + 
				" words in total\n\n" + "Morpheme;Contained in Words;Hapaxes;Types (V);Tokens;No Hapaxes;P\n";
	}
	
	/*
	 * Generates the Data string written to the CSV result file
	 */
	private String generateDataSet(Map<String, ArrayList<String>> list) 
	{
		String data = createHeader();

		  for (String s : list.keySet())
	 		{
	 			String key = s.toString();
			  	// allWordsContainingAffix is a list with all wordtypes containing one affix s.toString
	 			ArrayList<String> allWordsContainingAffix = list.get(s);
	 			ArrayList<String> hapaxes = findHapaxes(allWordsContainingAffix);
	 			int affixfrequencyForAllWordTokens = calculateNumberOfAffixes(allWordsContainingAffix);
	 			int affixfrequencyForAllWordTypes = allWordsContainingAffix.size();
	 			
	 			data += key + ";" + allWordsContainingAffix + ";" + hapaxes + ";" + 
				affixfrequencyForAllWordTypes + ";" + affixfrequencyForAllWordTokens + ";" + 
				hapaxes.size() + ";"+ calculateP_Value(hapaxes.size(), affixfrequencyForAllWordTokens) + "\n";
	 		}
		  
		return data + "\n\n";
	}
			
	private void setFileNames()
	{
				// location postprocessing file
				postprocessingfile = resultPath + File.separator + "postprocessingfile" + wordclass + "" + affixtype + "_" + cp.getPeriod() + "-" + cp.getCorpusName() + ".csv";
				// location final result file
				finalresultsfile = resultPath + File.separator + "resultsMorphochron.csv";
	}
	
	private void writePostProcessingFile()
	{
				IO io = new IO();
				//write csv file to manually postprocess the data
				io.writeMorphemeWordListToCSVFile(postprocessingfile, notInOEDWordList);
	}
			
	private void writeAllMeasuresFile() throws IOException
	{
				IO io = new IO();
				// write all results to CSV file
				io.appendResultsToCSVFile(finalresultsfile, generateDataSet(morphemeWordList));
	}
}