Skip to content
Snippets Groups Projects
Select Git revision
  • 0d577bccdc21dc9309d51088452138880c4f2dab
  • master default protected
2 results

Result.java

Blame
  • Hagen Peukert's avatar
    Peukert, Dr. Hagen authored
    TODOs cosmetic details left, ppc check, postprocessingfile include
    0d577bcc
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    Result.java 5.87 KiB
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.HashSet;
    import java.util.Map;
    import java.util.Set;
    
    public class Result 
    {
    	
    	private Corpus cp;
    	private String affixtype = "";
    	private String wordclass = "";
    	private String postprocessingfile = "";
    	private String finalresultsfile = "";
    	private String resultPath = "";
    	private ArrayList<String> filteredWords = new ArrayList<String>();
    	private ArrayList<String> allWordsOfCorpus = new ArrayList<String>();
    	private Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
    	private Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
    	
    	public Result(Map<String, ArrayList<String>> morphemeWordList, Map<String, ArrayList<String>> notInOEDWordList, Corpus cp, ArrayList<String> filteredWords, String wordtype, String affixtype, String resultPath) throws IOException
    	{
    		this.morphemeWordList = morphemeWordList; //the mapping between the morpheme and all word types it is contained in
    		this.notInOEDWordList = notInOEDWordList;
    		this.filteredWords = filteredWords; //these are all the words of the respective word class without tags)
    		this.affixtype = affixtype;
    		this.wordclass = wordtype;
    		this.cp = cp;
    		this.allWordsOfCorpus = cp.getCorpus(); //all words of the corpus irrespective of the word class (words contain tags!)
    		this.resultPath = resultPath;
    		setFileNames();
    		//generateDataSet(morphemeWordList);
    		writeAllMeasuresFile();
    		writePostProcessingFile();
    	}
    	
    	private ArrayList<String> findHapaxes(ArrayList<String> allWordsContainingAffix)
    	{
    		ArrayList<String> hapaxes = new ArrayList<String>();
    		
    		for (String wordContainingAffix  : allWordsContainingAffix) 
    		{
    			if (Collections.frequency(filteredWords, wordContainingAffix)  == 1)
    			{
    				hapaxes.add(wordContainingAffix);
    			}
    		}
    	
    		return hapaxes;
    	}
    	
    	private int calculateNumberOfAffixes(ArrayList<String> allWordsContainingAffix)
    	{
    		int numberOfAffixInCorpus = 0;
    		for (String wordContainingAffix  : allWordsContainingAffix) 
    		{
    			numberOfAffixInCorpus += Collections.frequency(filteredWords, wordContainingAffix);
    		}
    	 	
    		return numberOfAffixInCorpus;
    	}
    	
    	private double calculateP_Value(int hapaxtypes, int numberOfAffixes)
    	{
    		double p_value = 0.0;
    			if (numberOfAffixes != 0)
    			{
    				p_value = (double)hapaxtypes / (double)numberOfAffixes; 
    			}
    	
    		return p_value;
    	}
    	
    	private Map<String, Integer> countTokens(ArrayList<String> words)
    	{
    		Map<String, Integer> frequencyWords = new HashMap<String,Integer>();
    		Set<String> wordTypes = new HashSet<String>(words);
    						
    		for (String key : wordTypes) 
    		{
    			frequencyWords.put(key, Collections.frequency(words, key));
    		    //System.out.println(key + ": " + Collections.frequency(filteredWords, key));
    		}
    
    //		for (String word: frequencyWords.keySet()) {
    //		    String key = word.toString();
    //		    String value = frequencyWords.get(word).toString();
    //		    System.out.println(key + " " + value);
    //		}
    		
    		return frequencyWords;
    	}
    	
    	private Set<String> setWordTypes(ArrayList<String> words)
    	{
    		Set<String> wordTypes = new HashSet<String>(words);
    		return wordTypes;
    	}
    	
    	/*
    	 * Defines Header as it appears as a heading in the CSC file
    	 */
    	private String createHeader()
    	{
    		//more appropriate Naming for Header Output
    		if (affixtype.equals("_su01")) affixtype = "Suffixes";
    		if (affixtype.equals("_pr01")) affixtype = "Prefixes";
    		if (wordclass.equals("_nn01")) wordclass = "Nouns";
    		if (wordclass.equals("_vb01")) wordclass = "Verbs";
    		if (wordclass.equals("_jj01")) wordclass = "Adjectives";
    				
    		return  affixtype + ": " + wordclass + " in " + cp.getCorpusName() + "/" + cp.getPeriod() + " (" + cp.getStartDate() + "-" + cp.getEndDate() + ")\n" +
    				"Total Types (" + wordclass+ "): " + setWordTypes(filteredWords).size() + " of " + 
    				filteredWords.size() + " " + wordclass + " and of " + allWordsOfCorpus.size() + 
    				" words in total\n\n" + "Morpheme;Contained in Words;Hapaxes;Types (V);Tokens;No Hapaxes;P\n";
    	}
    	
    	/*
    	 * Generates the Data string written to the CSV result file
    	 */
    	private String generateDataSet(Map<String, ArrayList<String>> list) 
    	{
    		String data = createHeader();
    
    		  for (String s : list.keySet())
    	 		{
    	 			String key = s.toString();
    			  	// allWordsContainingAffix is a list with all wordtypes containing one affix s.toString
    	 			ArrayList<String> allWordsContainingAffix = list.get(s);
    	 			ArrayList<String> hapaxes = findHapaxes(allWordsContainingAffix);
    	 			int affixfrequencyForAllWordTokens = calculateNumberOfAffixes(allWordsContainingAffix);
    	 			int affixfrequencyForAllWordTypes = allWordsContainingAffix.size();
    	 			
    	 			data += key + ";" + allWordsContainingAffix + ";" + hapaxes + ";" + 
    				affixfrequencyForAllWordTypes + ";" + affixfrequencyForAllWordTokens + ";" + 
    				hapaxes.size() + ";"+ calculateP_Value(hapaxes.size(), affixfrequencyForAllWordTokens) + "\n";
    	 		}
    		  
    		return data + "\n\n";
    	}
    			
    	private void setFileNames()
    	{
    				// location postprocessing file
    				postprocessingfile = resultPath + File.separator + "postprocessingfile" + wordclass + "" + affixtype + "_" + cp.getPeriod() + "-" + cp.getCorpusName() + ".csv";
    				// location final result file
    				finalresultsfile = resultPath + File.separator + "resultsMorphochron.csv";
    	}
    	
    	private void writePostProcessingFile()
    	{
    				IO io = new IO();
    				//write csv file to manually postprocess the data
    				io.writeMorphemeWordListToCSVFile(postprocessingfile, notInOEDWordList);
    	}
    			
    	private void writeAllMeasuresFile() throws IOException
    	{
    				IO io = new IO();
    				// write all results to CSV file
    				io.appendResultsToCSVFile(finalresultsfile, generateDataSet(morphemeWordList));
    	}
    }