Skip to content
Snippets Groups Projects
Commit 0d577bcc authored by Peukert, Dr. Hagen's avatar Peukert, Dr. Hagen
Browse files

GUI implemented, OS independence

TODOs cosmetic details left, ppc check, postprocessingfile include
parent dac27b04
No related branches found
No related tags found
No related merge requests found
......@@ -17,5 +17,6 @@
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="jgoodies-forms-1.8.0.jar" sourcepath="jgoodies-forms-1.8.0-sources.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
......@@ -15,9 +15,15 @@
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>net.sourceforge.metrics.builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>net.sourceforge.metrics.nature</nature>
</natures>
</projectDescription>
File added
File added
......@@ -10,11 +10,12 @@ public class Adjective implements WordClass{
{
for (String word : allWordsOfCorpus)
{
if (word.endsWith("ADJ") || word.endsWith("ADJR") ||
word.endsWith("ADJS") || word.endsWith("ADV") ||
word.endsWith("ADVR") || word.endsWith("ADVS"))
//the inflections -er/est and -ier/iest are left because inflected word 1. is found in OED and 2. is monomorphemic
if (word.matches("[a-zA-Z]+[_|/](ADVR|ADJ|ADJR|ADJS|ADV|ADVS)"))
{
allAdjectivesOfCorpus.add(word);
word = word.replaceAll("[_|/](ADVR|ADJR|ADJS|ADVS)", "");
word = word.replaceAll("[_|/](ADJ|ADV)", "");
allAdjectivesOfCorpus.add(word.toLowerCase());
}
}
}
......@@ -22,6 +23,7 @@ public class Adjective implements WordClass{
public void setWords(ArrayList<String> al)
{
this.allWordsOfCorpus = al;
deleteInflections();
}
public ArrayList<String> getNormalizedWords()
......
......@@ -7,19 +7,32 @@ import java.util.Set;
public class Affix
{
private Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
private Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
private ArrayList<String> filteredWords = new ArrayList<String>();
private String wordclass = "";
private String affixtype = "";
private int startdate = 0;
private int enddate = 0;
private String app_key = "";
private String app_id = "";
private String resultPath = "";
private Corpus corpus;
private String corpusName = "";
private String corpusPeriod = "";
public Affix(ArrayList<String> filteredWords, int startdate, int enddate, String wordclass, String affixtype)
public Affix(ArrayList<String> filteredWords, Corpus corpus, String wordclass, String affixtype, String app_key, String app_id, String resultPath)
{
this.corpus = corpus;
this.filteredWords = filteredWords;
this.affixtype = affixtype;
this.wordclass = wordclass;
this.startdate = startdate;
this.enddate = enddate;
this.startdate = corpus.getStartDate();
this.enddate = corpus.getEndDate();
this.app_key = app_key;
this.app_id = app_id;
this.resultPath = resultPath;
this.corpusName = corpus.getCorpusName();
this.corpusPeriod = corpus.getPeriod();
processMorphemes();
}
......@@ -28,19 +41,54 @@ public class Affix
return morphemeWordList;
}
public Map<String, ArrayList<String>> getNotInOEDWordList()
{
return notInOEDWordList;
}
private void processMorphemes()
{
Set<String> wordTypes = new HashSet<String>(filteredWords);
Map<String, Integer> affixMorpheme = new HashMap<String,Integer>();
//Map<String, Integer> affixMorpheme = new HashMap<String,Integer>();
//Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
//these ints are only for feedback in the System.output
int numberOfWordTypes = wordTypes.size();
int lookups = 0;
for (String word : wordTypes)
{
Map<String,Integer> affixMorpheme = new HashMap<String,Integer>();
OED ox = new OED(word, "", wordclass, affixtype, corpus, app_key, app_id, resultPath);
Set<String> affixInWord = ox.getMorphemesOEDRequest();
lookups++;
if (affixInWord.size() > 0)
{
for (String morpheme : affixInWord)
{
//System.out.println(morpheme);
if (morphemeWordList.containsKey(morpheme))
{
morphemeWordList.get(morpheme).add(word);
}
else
{
ArrayList<String> wordsWithAffix = new ArrayList<String>();
wordsWithAffix.add(word);
morphemeWordList.put(morpheme, wordsWithAffix);
}
}
}
else
{
System.out.println("No Morpheme Representation in OED: " + word);
AffixStripper as = new AffixStripper(word);
if (affixtype.equals("_su01"))
{
//System.out.println("Suffix morpheme list will be generated");
affixMorpheme = as.getSuffixMorphem(); //contains all suffix morphemes found in noun
}
else if (affixtype.equals("_pr01"))
......@@ -53,32 +101,25 @@ public class Affix
}
if (!affixMorpheme.isEmpty())
{
for (String morpheme : affixMorpheme.keySet())
{
ArrayList<String> wordsWithAffix = new ArrayList<String>();
if (morphemeWordList.get(morpheme)!=null)//only for the first iteration when the morphemeWordList does not contain any data
if (notInOEDWordList.containsKey(morpheme) && !notInOEDWordList.get(morpheme).equals(word))
{
// keep the values of morphemeWordList that were written to it previously
wordsWithAffix = morphemeWordList.get(morpheme);
//System.out.println("First Iteration: " + morphemeWordList.get(morpheme));
notInOEDWordList.get(morpheme).add(word);
}
//System.out.println(word + " " + morpheme);
//call the Oxford class and check if the morpheme occurs in the noun
OED ox = new OED(word, morpheme, wordclass, affixtype, startdate, enddate);
if (ox.processOEDRequest())
else
{
wordsWithAffix.add(word);
morphemeWordList.put(morpheme, wordsWithAffix);
//System.out.println("when OED was consulted: " + word + ": " + morpheme);
ArrayList<String> wordsWithAffixEstimate = new ArrayList<String>(); //this list contains only one word
wordsWithAffixEstimate.add(word);
notInOEDWordList.put(morpheme, wordsWithAffixEstimate);
}
//if (number_of_queries == 1000) break;
}
//System.out.println("Outside the second for-loop: " + word + ": " + affixMorpheme.keySet());
}
}
System.out.println("Word " + lookups + " from " + numberOfWordTypes + " mapped.");
}
}
}
......@@ -12,11 +12,12 @@ public class Corpus
private int enddate = 0;
private ArrayList<String> allWordsOfCorpus = new ArrayList<String>();
public Corpus(String corpusname, String period)
public Corpus(String corpusname, String period, String directory)
{
this.corpusname = corpusname;
this.period = period;
setCorpusDirectory();
this.directory = directory;
setCorpusDates();
readCorpus();
}
......@@ -46,24 +47,24 @@ public class Corpus
}
// location directory of corpus
private void setCorpusDirectory()
private void setCorpusDates()
{
switch (corpusname)
{
case "ppcmbe":
startdate = 1700;
enddate = 1914;
directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCMBE-RELEASE-1\\corpus\\pos";
//directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCMBE-RELEASE-1\\corpus\\pos";
break;
case "ppceme":
startdate = 1500;
enddate = 1710;
directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCEME-RELEASE-2\\corpus\\pos\\penn2";
//directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCEME-RELEASE-2\\corpus\\pos\\penn2";
break;
case "ppcme2":
startdate = 1150;
enddate = 1500;
directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCME2-RELEASE-3\\corpus\\pos";
//directory = "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA\\PPCME2-RELEASE-3\\corpus\\pos";
break;
}
//System.out.println(directory);
......
import java.awt.BorderLayout;
import java.awt.EventQueue;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.border.EmptyBorder;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JTextField;
import javax.swing.JButton;
import javax.swing.border.LineBorder;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.Color;
import javax.swing.JDesktopPane;
import javax.swing.JFileChooser;
import java.awt.GridLayout;
import java.awt.GridBagLayout;
import java.awt.GridBagConstraints;
import java.awt.Insets;
import java.awt.FlowLayout;
import javax.swing.GroupLayout;
import javax.swing.GroupLayout.Alignment;
import javax.swing.BoxLayout;
import javax.swing.ButtonGroup;
import com.jgoodies.forms.layout.FormLayout;
import com.jgoodies.forms.layout.ColumnSpec;
import com.jgoodies.forms.layout.FormSpecs;
import com.jgoodies.forms.layout.RowSpec;
import java.awt.event.ActionListener;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.awt.event.ActionEvent;
import javax.swing.JCheckBox;
import javax.swing.JRadioButton;
import javax.swing.JTextArea;
import javax.swing.JTextPane;
import javax.swing.JScrollBar;
import javax.swing.JScrollPane;
public class GUI_mainMenu extends JFrame {
private JPanel contentPane;
private JTextField textFieldCorpusDir;
private JTextField textFieldResultDir;
private javax.swing.JFileChooser selectCorpus;
private javax.swing.JFileChooser selectResultDir;
private JTextArea resultTextArea;
private JTextField textFieldID;
private JTextField textFieldKey;
private javax.swing.JFileChooser selectCredentialsID;
private javax.swing.JFileChooser selectCredentialsKey;
private JCheckBox chckbxM1;
private JCheckBox chckbxM2;
private JCheckBox chckbxM3;
private JCheckBox chckbxM4;
private JCheckBox chckbxE1;
private JCheckBox chckbxE2;
private JCheckBox chckbxE3;
private JCheckBox chckbxEmod1;
private JCheckBox chckbxEmod2;
private JCheckBox chckbxEmod3;
private JRadioButton rdbtnPrefix;
private JRadioButton rdbtnSuffix;
private JRadioButton rdbtnAdjective;
private JRadioButton rdbtnNoun;
private JRadioButton rdbtnVerb;
private String corpus = "";
private String wordclass ="";
private String affixtype = "";
private String corpusPath = "";
private String resultPath = "";
private String app_id = "";
private String app_key = "";
private Set<String> period = new HashSet<String>();
private JButton btnRun = new JButton("Run");
private ButtonGroup affixGroup;
private ButtonGroup wordclassGroup;
/**
* Launch the application.
*/
// public static void main(String[] args) {
// EventQueue.invokeLater(new Runnable() {
// public void run() {
// try {
// GUI_mainMenu frame = new GUI_mainMenu();
// frame.setVisible(true);
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// });
// }
public void setMessage(String msg)
{
resultTextArea.append(msg);
}
public String getAppID()
{
return app_id;
}
public String getAppKey()
{
return app_key;
}
public String getWordClass()
{
return wordclass;
}
public String getAffixType()
{
return affixtype;
}
public Set<String> getPeriods()
{
return period;
}
public String getCorpus()
{
return corpus;
}
public String getResultPath()
{
return textFieldResultDir.getText();
}
public String getCorpusPath()
{
return textFieldCorpusDir.getText();
}
public JButton getRunButton()
{
return btnRun;
}
private String getOSPathCorpora()
{
if (System.getProperty("os.name").startsWith("Windows"))
{
return "C:\\Users\\Peukert\\Corpora\\PENN Corpus\\PENN-CORPORA";
}
else
{
return "/";
}
}
private String getOSPathID()
{
if (System.getProperty("os.name").startsWith("Windows"))
{
return "C:\\Users\\Peukert\\Documents\\Morphochron";
}
else
{
return "/";
}
}
/**
* Create the frame.
*/
public GUI_mainMenu() {
setTitle("Morphochron");
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setBounds(200, 200, 650, 600);
contentPane = new JPanel();
contentPane.setBorder(new LineBorder(new Color(0, 0, 0)));
setContentPane(contentPane);
textFieldCorpusDir = new JTextField();
textFieldCorpusDir.setBounds(22, 46, 498, 20);
textFieldCorpusDir.setColumns(10);
textFieldResultDir = new JTextField();
textFieldResultDir.setBounds(22, 96, 498, 20);
textFieldResultDir.setColumns(10);
selectCorpus = new javax.swing.JFileChooser();
selectResultDir = new javax.swing.JFileChooser();
selectCredentialsID = new javax.swing.JFileChooser();
selectCredentialsKey = new javax.swing.JFileChooser();
JLabel lblCorpusDirectory = new JLabel("Select Corpus Directory");
lblCorpusDirectory.setBounds(22, 27, 116, 14);
JButton btnSelectResultDirectory = new JButton("Select Result");
btnSelectResultDirectory.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
selectResultDir.setCurrentDirectory(new File(getOSPathID()));
//selectResultDir.setFileFilter( new FileNameExtensionFilter(".","pos")) ;
selectResultDir.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
int action = selectResultDir.showOpenDialog(btnSelectResultDirectory);
if (action == JFileChooser.APPROVE_OPTION)
{
File location = selectResultDir.getSelectedFile();
// dictName = location.getAbsolutePath().toString();
resultPath = location.getAbsolutePath();//getParent();
}
//selection is displayed at the left
textFieldResultDir.setText(resultPath);
}
});
btnSelectResultDirectory.setBounds(527, 95, 99, 23);
contentPane.setLayout(null);
JButton btnSelectCorpusDirectory = new JButton("Select Corpus");
btnSelectCorpusDirectory.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e)
{
selectCorpus.setCurrentDirectory(new File(getOSPathCorpora()));
//selectCorpus.setFileFilter( new FileNameExtensionFilter(".","pos")) ;
selectCorpus.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
int action = selectCorpus.showOpenDialog(btnSelectCorpusDirectory);
if (action == JFileChooser.APPROVE_OPTION)
{
File location = selectCorpus.getSelectedFile();
// dictName = location.getAbsolutePath().toString();
corpusPath = location.getParent();
}
//selection is displayed at the left
textFieldCorpusDir.setText(corpusPath);
if (corpusPath.matches(".*PPCMBE-RELEASE-1.*"))
{
corpus = "ppcmbe";
chckbxM1.setEnabled(false);
chckbxM2.setEnabled(false);
chckbxM3.setEnabled(false);
chckbxM4.setEnabled(false);
chckbxM1.setSelected(false);
chckbxM2.setSelected(false);
chckbxM3.setSelected(false);
chckbxM4.setSelected(false);
chckbxE1.setEnabled(false);
chckbxE2.setEnabled(false);
chckbxE3.setEnabled(false);
chckbxE1.setSelected(false);
chckbxE2.setSelected(false);
chckbxE3.setSelected(false);
chckbxEmod1.setSelected(false);
chckbxEmod2.setSelected(false);
chckbxEmod3.setSelected(false);
chckbxEmod1.setEnabled(true);
chckbxEmod2.setEnabled(true);
chckbxEmod3.setEnabled(true);
period.clear();
}
else if (corpusPath.matches(".*PPCEME-RELEASE-2.*"))
{
corpus = "ppceme";
chckbxM1.setEnabled(false);
chckbxM2.setEnabled(false);
chckbxM3.setEnabled(false);
chckbxM4.setEnabled(false);
chckbxM1.setSelected(false);
chckbxM2.setSelected(false);
chckbxM3.setSelected(false);
chckbxM4.setSelected(false);
chckbxE1.setEnabled(true);
chckbxE2.setEnabled(true);
chckbxE3.setEnabled(true);
chckbxE1.setSelected(false);
chckbxE2.setSelected(false);
chckbxE3.setSelected(false);
chckbxEmod1.setSelected(false);
chckbxEmod2.setSelected(false);
chckbxEmod3.setSelected(false);
chckbxEmod1.setEnabled(false);
chckbxEmod2.setEnabled(false);
chckbxEmod3.setEnabled(false);
period.clear();
}
else if (corpusPath.matches(".*PPCME2-RELEASE-3.*"))
{
corpus = "ppcmb2";
chckbxM1.setEnabled(true);
chckbxM2.setEnabled(true);
chckbxM3.setEnabled(true);
chckbxM4.setEnabled(true);
chckbxM1.setSelected(false);
chckbxM2.setSelected(false);
chckbxM3.setSelected(false);
chckbxM4.setSelected(false);
chckbxE1.setEnabled(false);
chckbxE2.setEnabled(false);
chckbxE3.setEnabled(false);
chckbxE1.setSelected(false);
chckbxE2.setSelected(false);
chckbxE3.setSelected(false);
chckbxEmod1.setSelected(false);
chckbxEmod2.setSelected(false);
chckbxEmod3.setSelected(false);
chckbxEmod1.setEnabled(false);
chckbxEmod2.setEnabled(false);
chckbxEmod3.setEnabled(false);
period.clear();
}
else
{
chckbxM1.setEnabled(false);
chckbxM2.setEnabled(false);
chckbxM3.setEnabled(false);
chckbxM4.setEnabled(false);
chckbxM1.setSelected(false);
chckbxM2.setSelected(false);
chckbxM3.setSelected(false);
chckbxM4.setSelected(false);
chckbxE1.setEnabled(false);
chckbxE2.setEnabled(false);
chckbxE3.setEnabled(false);
chckbxE1.setSelected(false);
chckbxE2.setSelected(false);
chckbxE3.setSelected(false);
chckbxEmod1.setSelected(false);
chckbxEmod2.setSelected(false);
chckbxEmod3.setSelected(false);
chckbxEmod1.setEnabled(false);
chckbxEmod2.setEnabled(false);
chckbxEmod3.setEnabled(false);
period.clear();
}
}
});
btnSelectCorpusDirectory.setBounds(527, 45, 99, 23);
contentPane.add(btnSelectCorpusDirectory, "9, 1, left, top");
contentPane.add(textFieldCorpusDir, "5, 2, left, center");
contentPane.add(textFieldResultDir, "9, 2, left, center");
contentPane.add(lblCorpusDirectory, "11, 2, left, center");
JLabel lblSelectResultDirectory = new JLabel("Select Result Directory");
lblSelectResultDirectory.setBounds(25, 77, 113, 14);
contentPane.add(lblSelectResultDirectory, "12, 2, left, center");
contentPane.add(btnSelectResultDirectory, "13, 2, left, top");
JLabel lblCredentialID = new JLabel("Enter OED ID or load from file");
lblCredentialID.setBounds(28, 130, 269, 14);
contentPane.add(lblCredentialID);
textFieldID = new JTextField();
textFieldID.setBounds(22, 152, 498, 20);
contentPane.add(textFieldID);
textFieldID.setColumns(10);
JButton btnCredentialsID = new JButton("Load ID");
btnCredentialsID.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
selectCredentialsID.setCurrentDirectory(new File(getOSPathID()));
//selectCredentialsID.setFileFilter( new FileNameExtensionFilter(".","txt")) ;
selectCredentialsID.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
int action = selectCredentialsID.showOpenDialog(btnCredentialsID);
if (action == JFileChooser.APPROVE_OPTION)
{
File location = selectCredentialsID.getSelectedFile();
String idName = location.getAbsolutePath().toString();
IO credentials = new IO();
app_id = credentials.readFile(idName, false);
}
//selection is displayed at the left
textFieldID.setText(app_id);
}
});
btnCredentialsID.setBounds(527, 151, 99, 23);
contentPane.add(btnCredentialsID);
textFieldKey = new JTextField();
textFieldKey.setColumns(10);
textFieldKey.setBounds(22, 205, 498, 20);
contentPane.add(textFieldKey);
JLabel lblCredentialKey = new JLabel("Enter OED key or load from file");
lblCredentialKey.setBounds(22, 183, 269, 14);
contentPane.add(lblCredentialKey);
JButton btnCredentialsKey = new JButton("Load Key");
btnCredentialsKey.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
selectCredentialsKey.setCurrentDirectory(new File(getOSPathID()));
//selectCredentialsKey.setFileFilter( new FileNameExtensionFilter(".","txt")) ;
selectCredentialsKey.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
int action = selectCredentialsKey.showOpenDialog(btnCredentialsKey);
if (action == JFileChooser.APPROVE_OPTION)
{
File location = selectCredentialsKey.getSelectedFile();
String idName = location.getAbsolutePath().toString();
IO credentials = new IO();
app_key = credentials.readFile(idName, false);
}
//selection is displayed at the left
textFieldKey.setText(app_key);
}
});
btnCredentialsKey.setBounds(527, 204, 99, 23);
contentPane.add(btnCredentialsKey);
chckbxM1 = new JCheckBox("M1");
chckbxM1.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxM1.isSelected())
{
period.add("M1");
}
else
{
period.remove("M1");
}
}
});
chckbxM1.setBounds(22, 264, 49, 23);
chckbxM1.setEnabled(false);
contentPane.add(chckbxM1);
chckbxM2 = new JCheckBox("M2");
chckbxM2.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxM1.isSelected())
{
period.add("M2");
}
else
{
period.remove("M2");
}
}
});
chckbxM2.setEnabled(false);
chckbxM2.setBounds(22, 289, 54, 23);
contentPane.add(chckbxM2);
chckbxM3 = new JCheckBox("M3");
chckbxM3.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxM3.isSelected())
{
period.add("M3");
}
else
{
period.remove("M3");
}
}
});
chckbxM3.setEnabled(false);
chckbxM3.setBounds(22, 317, 49, 23);
contentPane.add(chckbxM3);
chckbxM4 = new JCheckBox("M4");
chckbxM4.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxM4.isSelected())
{
period.add("M4");
}
else
{
period.remove("M4");
}
}
});
chckbxM4.setEnabled(false);
chckbxM4.setBounds(22, 343, 49, 23);
contentPane.add(chckbxM4);
chckbxE1 = new JCheckBox("E1");
chckbxE1.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxE1.isSelected())
{
period.add("E1");
}
else
{
period.remove("E1");
}
}
});
chckbxE1.setBounds(99, 264, 49, 23);
chckbxE1.setEnabled(false);
contentPane.add(chckbxE1);
chckbxE2 = new JCheckBox("E2");
chckbxE2.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxE2.isSelected())
{
period.add("E2");
}
else
{
period.remove("E2");
}
}
});
chckbxE2.setBounds(99, 289, 41, 23);
chckbxE2.setEnabled(false);
contentPane.add(chckbxE2);
chckbxE3 = new JCheckBox("E3");
chckbxE3.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxE3.isSelected())
{
period.add("E3");
}
else
{
period.remove("E3");
}
}
});
chckbxE3.setBounds(99, 317, 41, 23);
chckbxE3.setEnabled(false);
contentPane.add(chckbxE3);
chckbxEmod1 = new JCheckBox("Emod1");
chckbxEmod1.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxEmod1.isSelected())
{
period.add("Emod1");
}
else
{
period.remove("Emod1");
}
}
});
chckbxEmod1.setBounds(171, 264, 59, 23);
chckbxEmod1.setEnabled(false);
contentPane.add(chckbxEmod1);
chckbxEmod2 = new JCheckBox("Emod2");
chckbxEmod2.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxEmod2.isSelected())
{
period.add("Emod2");
}
else
{
period.remove("Emod2");
}
}
});
chckbxEmod2.setBounds(171, 289, 59, 23);
chckbxEmod2.setEnabled(false);
contentPane.add(chckbxEmod2);
chckbxEmod3 = new JCheckBox("Emod3");
chckbxEmod3.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (chckbxEmod3.isSelected())
{
period.add("Emod3");
}
else
{
period.remove("Emod3");
}
}
});
chckbxEmod3.setBounds(171, 317, 59, 23);
chckbxEmod3.setEnabled(false);
contentPane.add(chckbxEmod3);
JRadioButton rdbtnPrefix = new JRadioButton("Prefix");
rdbtnPrefix.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
affixtype = "_pr01";
}
});
rdbtnPrefix.setBounds(416, 264, 111, 23);
contentPane.add(rdbtnPrefix);
JRadioButton rdbtnSuffix = new JRadioButton("Suffix");
rdbtnSuffix.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
affixtype = "_su01";
}
});
rdbtnSuffix.setBounds(416, 289, 111, 23);
contentPane.add(rdbtnSuffix);
affixGroup = new ButtonGroup();
affixGroup.add(rdbtnPrefix);
affixGroup.add(rdbtnSuffix);
JButton btnCancel = new JButton("Cancel");
btnCancel.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
System.exit(0);
}
});
btnCancel.setBounds(438, 343, 89, 23);
contentPane.add(btnCancel);
btnRun.setBounds(537, 343, 89, 23);
contentPane.add(btnRun);
JLabel lblppcme2 = new JLabel("PPCME2");
lblppcme2.setBounds(22, 243, 59, 14);
contentPane.add(lblppcme2);
JLabel lblppceme = new JLabel("PPCEME");
lblppceme.setBounds(99, 243, 62, 14);
contentPane.add(lblppceme);
JLabel lblppcmbe = new JLabel("PPCMBE");
lblppcmbe.setBounds(171, 243, 59, 14);
contentPane.add(lblppcmbe);
JLabel lblAffixType = new JLabel("Affix Type");
lblAffixType.setBounds(418, 243, 49, 14);
contentPane.add(lblAffixType);
JLabel lblResults = new JLabel("Results");
lblResults.setBounds(22, 371, 49, 14);
contentPane.add(lblResults);
//contentPane.add(scrollPane);
JLabel lblWordclass = new JLabel("Word Class");
lblWordclass.setBounds(284, 243, 99, 14);
contentPane.add(lblWordclass);
JRadioButton rdbtnNoun = new JRadioButton("Noun");
rdbtnNoun.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
wordclass = "_nn01";
}
});
rdbtnNoun.setBounds(291, 264, 111, 23);
contentPane.add(rdbtnNoun);
JRadioButton rdbtnVerb = new JRadioButton("Verb");
rdbtnVerb.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
wordclass = "_vb01";
}
});
rdbtnVerb.setBounds(291, 289, 111, 23);
contentPane.add(rdbtnVerb);
JRadioButton rdbtnAdjective = new JRadioButton("Adjective");
rdbtnAdjective.addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
wordclass = "_jj01";
}
});
rdbtnAdjective.setBounds(291, 317, 111, 23);
contentPane.add(rdbtnAdjective);
wordclassGroup = new ButtonGroup();
wordclassGroup.add(rdbtnNoun);
wordclassGroup.add(rdbtnVerb);
wordclassGroup.add(rdbtnAdjective);
JScrollPane scrollPane = new JScrollPane();
scrollPane.setBounds(25, 396, 601, 140);
contentPane.add(scrollPane);
resultTextArea = new JTextArea();
resultTextArea.setEditable(false);
scrollPane.setViewportView(resultTextArea);
resultTextArea.setAutoscrolls(true);
}
public boolean validateForm() {
StringBuilder errors = new StringBuilder();
IO testapi = new IO();
// Confirm mandatory fields are filled out
if (textFieldCorpusDir.getText().trim().isEmpty()) {
errors.append("- Please enter a PPC directory.\n");
textFieldCorpusDir.requestFocusInWindow();
JOptionPane.showMessageDialog(null, errors, "Directory not specified!", JOptionPane.ERROR_MESSAGE);
return false;
}
else
if (textFieldResultDir.getText().trim().isEmpty()) {
errors.append("- Please enter a result directory.\n");
textFieldResultDir.requestFocusInWindow();
JOptionPane.showMessageDialog(null, errors, "Directory not specified!", JOptionPane.ERROR_MESSAGE);
return false;
}
else if (textFieldID.getText().trim().isEmpty())
{
errors.append("- Please enter a valid OED ID.\n");
textFieldID.requestFocusInWindow();
JOptionPane.showMessageDialog(null, errors, "No OED ID available!", JOptionPane.ERROR_MESSAGE);
return false;
}
else if (textFieldKey.getText().trim().isEmpty())
{
errors.append("- Please enter a valid OED ID.\n");
textFieldKey.requestFocusInWindow();
JOptionPane.showMessageDialog(null, errors, "No OED ID available!", JOptionPane.ERROR_MESSAGE);
return false;
}
else if ( affixGroup.getSelection() == null )
{
errors.append("- Please select the affix type.\n");
JOptionPane.showMessageDialog(null, errors, "No Affix type available!", JOptionPane.ERROR_MESSAGE);
return false;
}
else if (wordclassGroup.getSelection() == null)
{
errors.append("- Please select the word class.\n");
JOptionPane.showMessageDialog(null, errors, "No word class available!", JOptionPane.ERROR_MESSAGE);
return false;
}
else if (testapi.requestRESTfulAPI("https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/words/?lemma=test", app_id, app_key).isEmpty())
{
errors.append("- Your OED credentials are invalid.\n");
JOptionPane.showMessageDialog(null, errors, "No valid OED access!", JOptionPane.ERROR_MESSAGE);
return false;
}
else
{
return true;
}
}
}
import java.awt.EventQueue;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class Init {
public class Init
{
/**
* Short description of the algorithm given a PENN tagged text corpus
* loop through list: for each word do:
......@@ -15,6 +20,8 @@ public class Init {
* 5. write all words that contain affix to list
* 6. Check with Token-Hashmap if word in 5 is hapax legonoma
*/
public static void main(String[] args) throws IOException
{
/*
......@@ -29,53 +36,88 @@ public class Init {
* Affix Type: _su01, _pr01
*
* TODO:
* 1. implement GUI to have properties selected
* 2. include credentials and directories as selection
* 1. implement GUI to have properties selected -- Done
* 2. include credentials and directories as selection -- Done
* 3. check if selected corpus is indeed ppc
* 4. procedure to incorporate postprocessingfiles in the result (i.e. merge list from postprocessing in morphemeWordList
*/
String corpus = "ppcmbe";
String period = "Emod2";
String wordclass ="_nn01";
String affixtype = "_su01";
EventQueue.invokeLater(new Runnable() {
public void run() {
GUI_mainMenu frame = new GUI_mainMenu();
frame.setVisible(true);
frame.getRunButton().addActionListener(new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
if (frame.validateForm())
{
String corpus = frame.getCorpus();
Set<String> periods = frame.getPeriods();
String wordclass = frame.getWordClass();
String affixtype = frame.getAffixType();
String app_key = frame.getAppKey();
String app_id = frame.getAppID();
String corpusPath = frame.getCorpusPath();
String resultPath = frame.getResultPath();
System.out.println(
for (String period : periods)
{
frame.setMessage(
"Selection made\ncorpus: " + corpus + "\nperiod: " + period +
"\nword class: " + wordclass + "\naffixtype: " + affixtype);
"\nword class: " + wordclass + "\naffixtype: " + affixtype + "\n");
Corpus cp = new Corpus(corpus, period);
Corpus cp = new Corpus(corpus, period, corpusPath);
ArrayList<String> allWordsOfCorpus = new ArrayList<String>();
allWordsOfCorpus = cp.getCorpus();
System.out.println("Corpus read completely and normalized");
frame.setMessage("Corpus read completely and normalized\n");
//System.getProperties().list(System.out);
//create normalized word lists (factory pattern)
WordClassFactory wordClassFactory = new WordClassFactory();
WordClass wc = wordClassFactory.normalizeWords(wordclass, allWordsOfCorpus);
ArrayList<String> normalizedWords = new ArrayList<String>();
// normalizedWords = wc.getNormalizedWords();
normalizedWords = wc.getNormalizedWords();
// for (String word : normalizedWords)
// {
// System.out.println(word);
// }
normalizedWords.add("mountainousness");
normalizedWords.add("mountainous");
normalizedWords.add("counterargument");
normalizedWords.add("precondition");
normalizedWords.add("reanimation");
normalizedWords.add("degeneration");
normalizedWords.add("proposition");
// normalizedWords.add("mountainousness");
// normalizedWords.add("mountainous");
// normalizedWords.add("consideration");
// normalizedWords.add("precondition");
// normalizedWords.add("restlessness");
// normalizedWords.add("dignitary");
// normalizedWords.add("proposition");
// normalizedWords.add("daskommtnichtvor");
System.out.println("All words of type " + wordclass + " selected");
frame.setMessage("All words of type " + wordclass + " selected\n");
//detect affixes in word list as a pre-processing and countercheck these with OED REST API
Affix aff = new Affix(normalizedWords, cp.getStartDate(), cp.getEndDate(), wordclass, affixtype);
Affix aff = new Affix(normalizedWords, cp, wordclass, affixtype, app_key, app_id, resultPath);
Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
morphemeWordList = aff.getMorphemeWordList();
notInOEDWordList = aff.getNotInOEDWordList();
System.out.println("Affixes parsed and validated in OED");
frame.setMessage("Affixes parsed and validated in OED\n");
System.out.println("Writing results to file");
frame.setMessage("Writing results to file\n");
//calculate results and write them to file
Result rs = new Result(morphemeWordList, cp, normalizedWords, wordclass, affixtype);
System.out.println("Done!");
try
{
Result rs = new Result(morphemeWordList, notInOEDWordList, cp, normalizedWords, wordclass, affixtype, resultPath);
} catch (Exception ex) {
ex.printStackTrace();
}
frame.setMessage("Done!\n");
}
}
}
});
}
});
}
}
import javax.net.ssl.HttpsURLConnection;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.json.JSONObject;
import org.json.JSONArray;
......@@ -32,18 +36,22 @@ public class OED
private final String app_id;
private final String app_key;
private IO restapi = new IO();
private String resultPath = "";
private Corpus corpus;
//for each look up the word and one of its contained affixes is needed
public OED(String word, String morpheme, String wordclass, String affixtype, int startdateCorpus, int enddateCorpus)
public OED(String word, String morpheme, String wordclass, String affixtype, Corpus corpus, String app_key, String app_id, String resultPath)
{
this.app_id = restapi.readFile("C:\\Users\\Peukert\\Documents\\Morphochron\\id", false);
this.app_key = restapi.readFile("C:\\Users\\Peukert\\Documents\\Morphochron\\key", false);
this.corpus = corpus;
this.app_id = app_id;//restapi.readFile("C:\\Users\\Peukert\\Documents\\Morphochron\\id", false);
this.app_key = app_key;//restapi.readFile("C:\\Users\\Peukert\\Documents\\Morphochron\\key", false);
this.word = word;
this.morpheme = morpheme;
this.wordclass = wordclass;
this.affixtype = affixtype;
this.startdateCorpus = startdateCorpus;
this.enddateCorpus = enddateCorpus;
this.startdateCorpus = corpus.getStartDate();
this.enddateCorpus = corpus.getEndDate();
this.resultPath = resultPath;
}
/*
......@@ -80,7 +88,7 @@ public class OED
return wordid;
}
/*
/* @deprecated
* checks if a word has an extra entry for the given morpheme
* checks if field daterange.obsolete = false and daterange.end is null
*/
......@@ -108,6 +116,7 @@ public class OED
JSONArray arr = obj.getJSONArray("data");
for (int i = 0; i < arr.length(); i++)
{
//System.out.println("The following morpheme is checked for existence: " + morpheme);
occurredIn = arr.getJSONObject(i).getString("lemma").equals(affix);
if (occurredIn)
......@@ -119,10 +128,36 @@ public class OED
break;
}
}
// && obsolete not included because it seems to be today's perspective of obselete
return (occurredIn && startyearOED < startdateCorpus && enddateCorpus < endyearOED);
}
private Set<String> getAllAffixesFromJSonRoot(JSONObject obj)
{
int startyearOED = 0;
int endyearOED = 0;
Set<String> affixTypes = new HashSet<String>();
JSONArray arr = obj.getJSONArray("data");
for (int i = 0; i < arr.length(); i++)
{
String s = arr.getJSONObject(i).getString("lemma");
if ((s.startsWith("-") && affixtype.equals("_su01") ) || (s.endsWith("-") && affixtype.equals("_pr01")))
{
endyearOED = arr.getJSONObject(i).getJSONObject("daterange").optInt("end", 10000);//lots of enddates are null,i.e. not set
startyearOED = arr.getJSONObject(i).getJSONObject("daterange").optInt("start", 0);
//obsolete = arr.getJSONObject(i).getJSONObject("daterange").getBoolean("obsolete");
if (startyearOED < startdateCorpus && enddateCorpus < endyearOED)
{
affixTypes.add(s.replace("-", ""));
}
}
}
return affixTypes;
}
/*
* processes OED API queries
*/
......@@ -155,13 +190,40 @@ public class OED
JSONObject o = getJSonResponse(s);
entryAvailable = processJSonRoot(o);
}
// else
// {
// entryAvailable = false;
// }
return entryAvailable;
}
public Set<String> getMorphemesOEDRequest()
{
String wordJSON = getRESTAPIWordRepresentation(word.toLowerCase());
JSONObject jo = getJSonResponse(wordJSON);
String id = processJSonWordID(jo);
Set<String> affixes = new HashSet<String>();
if (!id.isEmpty())
{
String s = getRESTAPIRootRepresentation(id);
JSONObject o = getJSonResponse(s);
affixes = getAllAffixesFromJSonRoot(o);
}
else
{
System.out.println("Word does not exist in OED");
System.out.println("Word does not exist in OED: " + word);
String file = resultPath + File.separator + "wordsNotExistentInOED" + wordclass + "" + affixtype + "_" + corpus.getPeriod() + "-" + corpus.getCorpusName() + ".csv";
try
{
restapi.appendResultsToCSVFile(file, word);
}
return entryAvailable;
catch (IOException e)
{
System.out.println(e.getMessage());
}
}
return affixes;
}
/*
* delete the main method once programm is finished
* exist only for test purposes
......
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
......@@ -14,21 +15,26 @@ public class Result
private String wordclass = "";
private String postprocessingfile = "";
private String finalresultsfile = "";
private String resultPath = "";
private ArrayList<String> filteredWords = new ArrayList<String>();
private ArrayList<String> allWordsOfCorpus = new ArrayList<String>();
private Map<String, ArrayList<String>> morphemeWordList = new HashMap<String,ArrayList<String>>();
private Map<String, ArrayList<String>> notInOEDWordList = new HashMap<String,ArrayList<String>>();
public Result(Map<String, ArrayList<String>> morphemeWordList, Corpus cp, ArrayList<String> filteredWords, String wordtype, String affixtype) throws IOException
public Result(Map<String, ArrayList<String>> morphemeWordList, Map<String, ArrayList<String>> notInOEDWordList, Corpus cp, ArrayList<String> filteredWords, String wordtype, String affixtype, String resultPath) throws IOException
{
this.morphemeWordList = morphemeWordList; //the mapping between the morpheme and all word types it is contained in
this.notInOEDWordList = notInOEDWordList;
this.filteredWords = filteredWords; //these are all the words of the respective word class without tags)
this.affixtype = affixtype;
this.wordclass = wordtype;
this.cp = cp;
this.allWordsOfCorpus = cp.getCorpus(); //all words of the corpus irrespective of the word class (words contain tags!)
this.resultPath = resultPath;
setFileNames();
generateDataSet();
//generateDataSet(morphemeWordList);
writeAllMeasuresFile();
writePostProcessingFile();
}
private ArrayList<String> findHapaxes(ArrayList<String> allWordsContainingAffix)
......@@ -115,15 +121,15 @@ public class Result
/*
* Generates the Data string written to the CSV result file
*/
private String generateDataSet()
private String generateDataSet(Map<String, ArrayList<String>> list)
{
String data = createHeader();
for (String s : morphemeWordList.keySet())
for (String s : list.keySet())
{
String key = s.toString();
// allWordsContainingAffix is a list with all wordtypes containing one affix s.toString
ArrayList<String> allWordsContainingAffix = morphemeWordList.get(s);
ArrayList<String> allWordsContainingAffix = list.get(s);
ArrayList<String> hapaxes = findHapaxes(allWordsContainingAffix);
int affixfrequencyForAllWordTokens = calculateNumberOfAffixes(allWordsContainingAffix);
int affixfrequencyForAllWordTypes = allWordsContainingAffix.size();
......@@ -139,22 +145,22 @@ public class Result
private void setFileNames()
{
// location postprocessing file
postprocessingfile = "C:\\Users\\Peukert\\Documents\\postprocessingfile.csv";
postprocessingfile = resultPath + File.separator + "postprocessingfile" + wordclass + "" + affixtype + "_" + cp.getPeriod() + "-" + cp.getCorpusName() + ".csv";
// location final result file
finalresultsfile = "C:\\Users\\Peukert\\Documents\\resultsMorphochron.csv";
finalresultsfile = resultPath + File.separator + "resultsMorphochron.csv";
}
private void writePostProcessingFile()
{
IO io = new IO();
//write csv file to manually postprocess the data
io.writeMorphemeWordListToCSVFile(postprocessingfile, morphemeWordList);
io.writeMorphemeWordListToCSVFile(postprocessingfile, notInOEDWordList);
}
private void writeAllMeasuresFile() throws IOException
{
IO io = new IO();
// write all results to CSV file
io.appendResultsToCSVFile(finalresultsfile, generateDataSet());
io.appendResultsToCSVFile(finalresultsfile, generateDataSet(morphemeWordList));
}
}
......@@ -9,13 +9,13 @@ public class Verb implements WordClass{
{
for (String word : allWordsOfCorpus)
{
if (word.endsWith("MD") || word.endsWith("MD0") ||
word.endsWith("VAG") || word.endsWith("VAN") ||
word.endsWith("VB") || word.endsWith("VBI") ||
word.endsWith("VBD") || word.endsWith("VBN") ||
word.endsWith("VBP"))
//inflections are left since word are found in OED and monomorphemic except 3.P Sing, which is not marked in PENN
//[a-zA-Z]+[_|/](MD|MD0|VAG|VAN|VB|VBI|VBD|VBN|VBP)
if (word.matches("[a-zA-Z]+[_|/](VBN)"))
{
allVerbsOfCorpus.add(word);
word = word.replaceAll("[_|/](MD0|VAG|VAN|VBI|VBD|VBN|VBP)","");
word = word.replaceAll("[_|/](MD|VB)","");
allVerbsOfCorpus.add(word.toLowerCase());
}
}
}
......@@ -23,6 +23,7 @@ public class Verb implements WordClass{
public void setWords(ArrayList<String> al)
{
this.allWordsOfCorpus = al;
deleteInflections();
}
public ArrayList<String> getNormalizedWords()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment