// Clemens Groepl, 2009-10-06
// This will split a FastA file with many proteins into individual files.
// The comment lines are expected to have the following form, e.g.:
// >B. subtilis 168|BG12556|AapA: amino acid permease
// In this instance, output goes to the file BG12556.fasta .

#include <fstream>
#include <iostream>
#include <sstream>

int main ( int argc, char ** argv )
{

	if ( argc != 2 )
	{
		std::cout << "usage: " << argv[0] << " fasta_input_file" << std::endl;
	}

	std::fstream input( argv[1] );
	std::stringstream single_fasta;
	std::string single_filename;
	std::string line;
	unsigned line_number = 0;

	while ( input )
	{
		++line_number;
		line.clear();
		std::getline(input,line);
		if ( line[0] == '>' || !input )
		{
			if ( !single_fasta.str().empty() )
			{
#if 0
				// debugging 
				std::cout <<
					"--- @line " << line_number << " ---\n" << 
					single_filename << ":\n" << 
					single_fasta.str() << "\n"
					"---" << std::endl;
#endif
				{
					std::cout << single_filename << std::endl;
					std::ofstream single_output(single_filename.c_str());
					single_output << single_fasta.str() << std::endl;
				}
				single_fasta.str("");
			}
			size_t const begin = line.find_first_of('|', 0) + 1;
			size_t const end = line.find_first_of('|', begin);
			single_filename = line.substr(begin,end-begin) + ".fasta";
		}
		single_fasta << line << '\n';
	}	
	return 0;
}



