import java.io.BufferedReader; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.HashSet; import java.util.Set; import java.util.*; public class WordFun { public static List slurp(BufferedReader r) throws IOException { StringBuffer sb = new StringBuffer(); char[] buf = new char[1024]; int c = 0; while ((c = r.read(buf)) > 0) { sb.append(String.valueOf(buf, 0, c).toLowerCase()); } return Arrays.asList(sb.toString().split("\\W+")); } static String reverse(String s) { return new StringBuffer(s).reverse().toString(); } /** * Get the first n elements from a collection */ static List first_n(Collection c, int n) { int i = 0; Iterator it = c.iterator(); List ell = new ArrayList(); while (it.hasNext() && i++ < n) ell.add(it.next()); return ell; } /** * Playground * @param r the reader to read from * @param w the writer to write to * @throws IOException */ public static void doIt(BufferedReader r, PrintWriter w) throws IOException { // gulp down the file and splice it into a list of words List wordList = slurp(r); System.out.println(first_n(wordList, 20)); // count the number of words System.out.println("# of words = " + wordList.size()); // count the number of distinct words (vocabulary) Set wordSet = new HashSet<>(); for (String s : wordList) { wordSet.add(s); } System.out.println("# of distinct words (vocabular size)= " + wordSet.size()); // a random sample of 50 words Collections.shuffle(wordList); System.out.println(first_n(wordList, 50)); // find all the ananyms for (String s: wordSet) { if (wordSet.contains(reverse(s))) { System.out.println(s); } } // make a dictionary (a sorted list of distinct words) SortedSet sortedWordSet = new TreeSet<>(); for (String s : wordSet) { sortedWordSet.add(s); } System.out.println(first_n(sortedWordSet, 20)); // lookup a specific word SortedSet ts1 = sortedWordSet.tailSet("a"); System.out.println(first_n(ts1, 20)); SortedSet ts2 = sortedWordSet.tailSet("fre"); System.out.println(first_n(ts2, 20)); // find the most frequently occurring word Map freq = new HashMap<>(); for (String s : wordList) { if (freq.containsKey(s)) { freq.put(s, freq.get(s) + 1); } else { freq.put(s, 1); } } int besti = 0; String bests = ""; for (String s : freq.keySet()) { if (freq.get(s) > besti) { besti = freq.get(s); bests = s; } } System.out.println(bests + " (" + besti + ")"); // find the top k most frequently occurring words List> entryList = new ArrayList<>(); for (Map.Entry me : freq.entrySet()) { entryList.add(me); } Collections.sort(entryList, new Comparator>() { public int compare(Map.Entry a, Map.Entry b) { return b.getValue() - a.getValue(); } }); System.out.println(first_n(entryList, 20)); } /** * The driver. Open a BufferedReader and a PrintWriter, either from System.in * and System.out or from filenames specified on the command line, then call doIt. * @param args */ public static void main(String[] args) { try { BufferedReader r; PrintWriter w; if (args.length == 0) { r = new BufferedReader(new InputStreamReader(System.in)); w = new PrintWriter(System.out); } else if (args.length == 1) { r = new BufferedReader(new FileReader(args[0])); w = new PrintWriter(System.out); } else { r = new BufferedReader(new FileReader(args[0])); w = new PrintWriter(new FileWriter(args[1])); } long start = System.nanoTime(); doIt(r, w); w.flush(); long stop = System.nanoTime(); System.out.println("Execution time: " + 1e-9 * (stop-start)); } catch (IOException e) { System.err.println(e); System.exit(-1); } } }