/*
 * Ken's word-counter in C++
 */
#include <cstdlib>
#include <cstdio>
#include <thread>
#include <mutex>
#include <vector>
#include <cstring>
#include <map>
#include <condition_variable>
#include <fcntl.h>
#include <unistd.h>
#include "filenames.hpp"

// Beyond C++ 11, these are part of the std library.
class Semaphore {
	public:
		Semaphore (int count_ = 0)
			: count(count_) 
		{
		}

		inline void notify() {
			std::unique_lock<std::mutex> lock(mtx);
			count++;
			//notify the waiting thread
			cv.notify_one();
		}
		inline void wait() {
			std::unique_lock<std::mutex> lock(mtx);
			while(count == 0) {
				//wait on the mutex until notify is called
				cv.wait(lock);
			}
			count--;
		}
	private:
		std::mutex mtx;
		std::condition_variable cv;
		int count;
};

const int MAXTHREADS = 64;
const int BLOCKSIZE = 16*1024;
char 	token_chars[512] = { 0 };
int	fdescs[MAXTHREADS];
Semaphore	fcount(MAXTHREADS);
Semaphore	opencount(0);
int	free_ptr, next_ptr;
using   WC = std::map<std::string, int>;
WC   	total_count;

void fopener(int nfiles, char **file_list)
{
	while(nfiles--)
	{
		fcount.wait();
		if((fdescs[free_ptr++ % MAXTHREADS] = open(*file_list, O_RDONLY)) == -1)
		{
			printf("Unable to open file: %s\n", *file_list);
		}
		else
		{
			opencount.notify();
		}
		file_list++;
	}
	for(int n = 0; n < MAXTHREADS; n++) 
	{
		fcount.wait();
		fdescs[free_ptr++ % MAXTHREADS] = -2;
		opencount.notify();
	}
}

std::mutex lock;

WC	sub_count[MAXTHREADS];

inline void found(int& tn, char*& word)
{
	auto sword = std::string(word);
	sub_count[tn][sword]++;
}

inline void found(int& tn, char*& prefix, char*& suffix)
{
	int plen = strlen(prefix);
	int slen = strlen(suffix);
	if(plen + slen >= 1023) 
	{
		printf("Word is unreasonably long! %s + %s (len %d)\n", prefix, suffix, plen+slen);
		return;
	}
	char word[1024];
	char* wp = word;
	memcpy(word, prefix, plen);
	memcpy(word+plen, suffix, slen+1);
	found(tn, wp);
}

void wcounter(int n)
{
	char prefix_copy[1024];
	int my_count = 0;
	int fdesc;
	while(true)
	{
		{
			opencount.wait();
			std::unique_lock<std::mutex> lock;
			fdesc = fdescs[next_ptr++ % MAXTHREADS];
		}
		if(fdesc == -2)
		{
			return;
		}
		char buffer[BLOCKSIZE+1024];
		buffer[BLOCKSIZE] = 0;
		int nbytes;
		char *prefix = nullptr;
		int sptr = -1;
		while((nbytes = read(fdesc, buffer, BLOCKSIZE)) > 0)
		{
			int cptr = 0;
			while(cptr < nbytes)
			{
				if(token_chars[0xFF & (unsigned)buffer[cptr]] == 1)
				{
					if(sptr == -1)
					{
						sptr = cptr;
					}
					cptr++;
					continue;
				}
				buffer[cptr++] = 0;
				if(prefix != nullptr)
				{
					if(sptr != -1)
					{
						char* sbptr = buffer+sptr;
						found(n, prefix, sbptr);
						++my_count;
					}
					else
					{
						found(n, prefix);
						++my_count;
					}
					prefix = nullptr;
				}
				else if(sptr != -1)
				{
					char* sbptr = buffer+sptr;
					found(n, sbptr);
					++my_count;
				}
				sptr = -1;
			}
			if(cptr == nbytes && sptr != -1)
			{
				int len = nbytes-sptr;
				prefix = prefix_copy;
				memcpy(prefix, buffer+sptr, len);
				prefix[len] = 0;
				sptr = -1;
			}
		}
		if(prefix != nullptr)
		{
			if(sptr != -1)
			{
				char* sbptr = buffer+sptr;
				found(n, prefix, sbptr);
			}
			else
			{
				found(n, prefix);
			}
			prefix = nullptr;
		}
		else if(sptr != -1)
		{
			char* sbptr = buffer+sptr;
			found(n, sbptr);
		}
		close(fdesc);
		fcount.notify();
	}
}


struct SortOrder: public std::binary_function<std::pair<int, std::string>, std::pair<int, std::string>, bool>
{
    bool operator()(const std::pair<int, std::string>& lhs, const std::pair<int, std::string>& rhs) const
    {
        return lhs.first > rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second);
    }
};

using SO = std::map<std::pair<int, std::string>, int, SortOrder>;

int main(int argc, char **argv)
{
	int ncores = 1;
	int nfiles = sizeof(file_names)/sizeof(*file_names);

	while(--argc && **(++argv) == '-')
	{
		switch(argv[0][1]) {
			case 'n':
				ncores = atoi(*argv+2);
				break;
			case 'f':
				nfiles = atoi(*argv+2);
				break;
			default:
				printf("Usage: fast-wc [-n#] file0 file1...\n");
				return 1;
		}
	}
	printf("fast-wc with %d cores, %d files\n", ncores, nfiles);
	auto str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
	while(*str)
	{
		token_chars[(int)*str++] = 1;
	}
	std::thread my_threads[MAXTHREADS];
	auto fot = std::thread(fopener, nfiles, (char**)file_names);
	for(int n = 0; n < ncores; n++)
	{
		my_threads[n] = std::thread(wcounter, n);
	}
	for(int n = 0; n < ncores; n++)
		my_threads[n].join();
	fot.join();
	WC totals;
	for(int n = 0; n < ncores; n++)
	{
		for(auto wc: sub_count[n])
		{
			totals[wc.first] += wc.second;
		}
	}
        SO sorted_totals;
	for(auto wc: totals)
	{
		std::pair<int,std::string> new_pair(wc.second, wc.first);
		sorted_totals[new_pair] = wc.second;
	}
	for(auto wc: sorted_totals)
	{
		printf("%32s   | %8d\n", wc.first.second.data(), wc.second);
	}
	return 0;
}
