
#include <iostream>
#include <sstream>
#include <vector>
#include <string>
#include <map>


///////////////////////////////////////////////////////////
// parseCmdline.h

#include <vector>
#include <string>

void parseCmdline(
	int argc,
	char **argv,
	std::vector<std::string> &args );


///////////////////////////////////////////////////////////
// parseCmdline.C

// #include "parseCmdline.h" // but not here

void parseCmdline(
	int argc,
	char **argv,
	std::vector<std::string> &args )
{
	args.clear();
	args.reserve(argc);
	for ( int i = 0; i < argc; ++i )
	{
		args.push_back(argv[i]);
	}
	return;
}


///////////////////////////////////////////////////////////
// readwriteNumbers.h

#include <vector>

void readNumbers(
	const std::string & filename,
	std::vector<double> &values );

void writeNumbers(
	const std::string & filename,
	const std::vector<double> &values );


///////////////////////////////////////////////////////////
// readwriteNumbers.C

// #include "readwriteNumbers.h" // but not here
#include <fstream>

void readNumbers(
	const std::string & filename,
	std::vector<double> &values )
{
	values.clear();
	std::ifstream is(filename.c_str());
	double number = 0;
	// this loop will stop when reading fails
	while(is >> number)
	{
		values.push_back(number);
	}
}

void writeNumbers(
	const std::string & filename,
	const std::vector<double> &values )
{
	std::ofstream os(filename.c_str());
	for ( std::vector<double>::const_iterator ci = values.begin();
				ci != values.end();
				++ci
			)
	{
		// '\n' is much faster than std::endl (no buffer flushed)
		os << *ci << '\n';
	}
	// we are done, now flush the output buffer
	os.flush();
}


///////////////////////////////////////////////////////////
// removeOutliers.h

#include <vector>

void removeOutliers(
	const std::vector<double> & data,
	std::vector<double> & result );


///////////////////////////////////////////////////////////
// removeOutliers.C

// #include "removeOutliers.h" // but not here
#include <cmath>

void removeOutliers(
	const std::vector<double> & data,
	std::vector<double> & result )
{
	result.clear();

	double mean = 0;
	// don't try this type of one-liners when delivering an exam!
	for ( int i = 0; i < data.size(); mean += data[i], ++i );
	mean /= data.size();

	double variance = 0;
	double delta;
	for ( int i = 0; i < data.size(); ++i )
	{
		delta = data[i] - mean;
	 	delta *= delta;
		variance += delta;
	}
	if ( data.size() > 1 )
	{
		variance /= data.size() - 1;
	}

	double stdev = std::sqrt(variance);

	for ( int i = 0; i < data.size(); ++i )
	{
		double delta = data[i] - mean;
		if ( delta < 0 )
		{
			delta = -delta;
		}
		if ( delta <= stdev )
		{
			result.push_back(data[i]);
		}
	}
	return;
}


///////////////////////////////////////////////////////////
// and now the actual main() program...

int main ( int argc, char * * argv )
{
	std::vector<std::string> args;

	parseCmdline( argc, argv, args );

	if ( args.size() < 6 )
	{
		std::cerr << "usage: " << args[0] << " input_data output_data bin_width input_histogram output_histogram" << std::endl;
		return 1;
	}

	std::vector<double> input;
	
	readNumbers( args[1], input );

	std::vector<double> output;
	
	removeOutliers( input, output );

	writeNumbers( args[2], output );

	double bin_width;
	std::istringstream iss(args[3]);
	iss >> bin_width;

	// copy&paste code below - this is error-prone programming style...

	std::map<double,int> histogram_input;
	std::map<double,int> histogram_output;

	for ( int i = 0; i < input.size(); ++i )
	{
		// this does the following:
		// input[i] is rounded down to the next bin margin
		// that is used as a key in a map
		// if there is a value for this key in the map, it is used
		// otherwise it is default constructed as zero
		// finally the returned entry in the map is incremented by one.
		++histogram_input[std::floor(input[i]/bin_width)*bin_width];
	}

	for ( int i = 0; i < output.size(); ++i )
	{
		++histogram_output[std::floor(output[i]/bin_width)*bin_width];
	}

	std::ofstream oshi(args[4].c_str());
	oshi << "# histogram_input:\n";
	// iterators are really necessary when we want to walk through a map
	// note that the traversal respects the ordering of the keys
	for ( std::map<double,int>::const_iterator ci = histogram_input.begin();
				ci != histogram_input.end();
				++ci
			)
	{
		// first is key, second is value
		oshi << ci->first << ' ' << ci->second << '\n';
	}
	oshi.close();

	std::ofstream osho(args[5].c_str());
	osho << "# histogram_output:\n";
	for ( std::map<double,int>::const_iterator ci = histogram_output.begin();
				ci != histogram_output.end();
				++ci
			)
	{
		osho << ci->first << ' ' << ci->second << '\n';
	}
	osho.close();




	return 0;
}

