
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
import java.util.*;

public class WordFun {

	public static List<String> slurp(BufferedReader r) throws IOException {
		StringBuffer sb = new StringBuffer();
    char[] buf = new char[1024];
    int c = 0;
		while ((c = r.read(buf)) > 0) {
			sb.append(String.valueOf(buf, 0, c).toLowerCase());
		}
		return Arrays.asList(sb.toString().split("\\W+"));
	}

	static String reverse(String s) {
		return new StringBuffer(s).reverse().toString();
	}

  /**
   * Get the first n elements from a collection
   */
  static <T> List<T> first_n(Collection<T> c, int n) {
		int i = 0;
		Iterator<T> it = c.iterator();
		List<T> ell = new ArrayList<T>();
		while (it.hasNext() && i++ < n)
	    ell.add(it.next());
		return ell;
  }

	/**
     * Playground
	 * @param r the reader to read from
	 * @param w the writer to write to
	 * @throws IOException
	 */
	public static void doIt(BufferedReader r, PrintWriter w) throws IOException {
    // gulp down the file and splice it into a list of words
		List<String> wordList = slurp(r);

		System.out.println(first_n(wordList, 20));

    // count the number of words
		System.out.println("# of words = " + wordList.size());

    // count the number of distinct words (vocabulary)
		Set<String> wordSet = new HashSet<>();
		for (String s : wordList) {
			wordSet.add(s);
		}
		System.out.println("# of distinct words (vocabular size)= " + wordSet.size());

    // a random sample of 50 words
		Collections.shuffle(wordList);
		System.out.println(first_n(wordList, 50));

		// find all the ananyms
		for (String s: wordSet) {
			if (wordSet.contains(reverse(s))) {
				System.out.println(s);
			}

		}

    // make a dictionary (a sorted list of distinct words)
		SortedSet<String> sortedWordSet = new TreeSet<>();
		for (String s : wordSet) {
			sortedWordSet.add(s);
		}
		System.out.println(first_n(sortedWordSet, 20));

		// lookup a specific word
		SortedSet<String> ts1 = sortedWordSet.tailSet("a");
		System.out.println(first_n(ts1, 20));
		SortedSet<String> ts2 = sortedWordSet.tailSet("fre");
		System.out.println(first_n(ts2, 20));

    // find the most frequently occurring word
		Map<String,Integer> freq = new HashMap<>();
		for (String s : wordList) {
			if (freq.containsKey(s)) {
				freq.put(s, freq.get(s) + 1);
			} else {
				freq.put(s, 1);
			}
		}
		int besti = 0;
		String bests = "";
		for (String s : freq.keySet()) {
			if (freq.get(s) > besti) {
				besti = freq.get(s);
				bests = s;
			}
		}
		System.out.println(bests + " (" + besti + ")");

    // find the top k most frequently occurring words
		List<Map.Entry<String,Integer>> entryList = new ArrayList<>();
		for (Map.Entry<String,Integer> me : freq.entrySet()) {
				entryList.add(me);
		}
		Collections.sort(entryList, new Comparator<Map.Entry<String,Integer>>() {
			public int compare(Map.Entry<String,Integer> a, Map.Entry<String,Integer> b) {
				return b.getValue() -  a.getValue();
			}
		});
		System.out.println(first_n(entryList, 20));
	}

	/**
	 * The driver.  Open a BufferedReader and a PrintWriter, either from System.in
	 * and System.out or from filenames specified on the command line, then call doIt.
	 * @param args
	 */
	public static void main(String[] args) {
		try {
			BufferedReader r;
			PrintWriter w;
			if (args.length == 0) {
				r = new BufferedReader(new InputStreamReader(System.in));
				w = new PrintWriter(System.out);
			} else if (args.length == 1) {
				r = new BufferedReader(new FileReader(args[0]));
				w = new PrintWriter(System.out);
			} else {
				r = new BufferedReader(new FileReader(args[0]));
				w = new PrintWriter(new FileWriter(args[1]));
			}
			long start = System.nanoTime();
			doIt(r, w);
			w.flush();
			long stop = System.nanoTime();
			System.out.println("Execution time: " + 1e-9 * (stop-start));
		} catch (IOException e) {
			System.err.println(e);
			System.exit(-1);
		}
	}
}
