import andyr.jtokeniser.Tokeniser; import andyr.jtokeniser.WhiteSpaceTokeniser; import andyr.jtokeniser.StringTokeniser; import andyr.jtokeniser.RegexTokeniser; import andyr.jtokeniser.BreakIteratorTokeniser; /** * An example file showing how to use the various tokenisers. * There are no comments because the code speaks for itself * really. *

* Essentiall one or more tokenisers are initialised for each of * the possible classes. I then use a while loop iterate over each * token and output to a new line. Run this example to see how each * tokeniser deals with the string: He said, "Shut-up!" * * * @author Andrew Roberts * @version 1.0 (11-Mar-2005) */ public class JTokeniserExample { private String testString = "He said, \"Shut-up!\""; public JTokeniserExample() { whiteSpaceTokeniserExample(); stringTokeniserExample(); regexTokeniserExample(); breakIteratorExample(); } public void whiteSpaceTokeniserExample() { Tokeniser wst1 = new WhiteSpaceTokeniser(testString); System.out.println("WhiteSpaceTokeniser(" + testString + ")"); while (wst1.hasMoreTokens()) { System.out.println(wst1.nextToken()); } System.out.println("------"); } public void stringTokeniserExample() { Tokeniser st1 = new StringTokeniser(testString); System.out.println("StringTokeniser(" + testString + ")"); while (st1.hasMoreTokens()) { System.out.println(st1.nextToken()); } System.out.println("------"); String delimiters = " ,\"!-"; Tokeniser st2 = new StringTokeniser(testString, delimiters); System.out.println("StringTokeniser(" + testString + ", " + delimiters + ")"); while (st2.hasMoreTokens()) { System.out.println(st2.nextToken()); } System.out.println("------"); Tokeniser st3 = new StringTokeniser(testString, delimiters, true); System.out.println("StringTokeniser(" + testString + ", " + delimiters + ", true)"); while (st3.hasMoreTokens()) { System.out.println(st3.nextToken()); } System.out.println("------"); } public void regexTokeniserExample() { String regex1 = "\\w+"; String regex2 = "(\\w+(-\\s*\\w+)?)"; Tokeniser rt1 = new RegexTokeniser(testString); System.out.println("RegexTokeniser(" + testString + ") // default breaks on whitespace"); while (rt1.hasMoreTokens()) { System.out.println(rt1.nextToken()); } System.out.println("------"); Tokeniser rt2 = new RegexTokeniser(testString, regex1); System.out.println("RegexTokeniser(" + testString + ", " + regex1 + ")"); while (rt2.hasMoreTokens()) { System.out.println(rt2.nextToken()); } System.out.println("------"); Tokeniser rt3 = new RegexTokeniser(testString, regex2); System.out.println("RegexTokeniser(" + testString + ", " + regex2 + ")"); while (rt3.hasMoreTokens()) { System.out.println(rt3.nextToken()); } System.out.println("------"); } public void breakIteratorExample() { String testString2 = "H" + "\u00fc" + "llo world!"; Tokeniser bit1 = new BreakIteratorTokeniser(testString); System.out.println("BreakIteratorTokeniser(" + testString + ")"); while (bit1.hasMoreTokens()) { System.out.println(bit1.nextToken()); } System.out.println("------"); Tokeniser bit2 = new BreakIteratorTokeniser(testString2); System.out.println("BreakIteratorTokeniser(" + testString2 + ")"); while (bit2.hasMoreTokens()) { System.out.println(bit2.nextToken()); } System.out.println("\nNB A RegexTokeniser would struggle with this string."); System.out.println("------"); } public static void main(String[] args) { try { new JTokeniserExample(); } catch (Exception e) { System.err.print(e); } } }