#! /usr/bin/perl

use strict;
use Pod::Usage;
use Getopt::Long;
use File::Find;

# define  the line width for the output
my $linesize       = 40;

# define a megabyte:
my $Mb = 1024*1024;

# some predifined sizes the user can use to set the size to fill
my %predef_sizes = (
	kb		=> 1024,
	mb		=> $Mb,
	gb		=> 1024*$Mb,
	cd		=> 700*$Mb,
	dvd		=> 4481*$Mb,
	dvdram	=> 4579655680,
	floppy	=> 1.44*$Mb,
	);

my @files 		= ();					# the files to pack
my $medium_size	= $predef_sizes{dvd};	# default medium is a dvd
my $tolerance	= -1;					# initialize at -1 to check if has been defined by user

#process options from the command line
my ($help,$verbose,$quiet,$units);
GetOptions (
	"medium-size=s"		=> \$medium_size,	# string
	"help"				=> \$help,			# switch
	"verbose"			=> \$verbose,		# switch
	"quiet"				=> \$quiet,			# switch
	"tolerance=s"		=> \$tolerance,		# string
	"units"				=> \$units			#switch
			) or pod2usage({-exitval => 1, -verbose => 1});
			
pod2usage({-exitval => 2, -verbose => 2}) if($help);

list_units() if($units);

my $max = parse_size($medium_size);				# $max will be the target size 
# $tol will be tolerance on the result: the program will stop as soon as
# a size between $max-$tol and $max is reached. By default, the tolerance is 
# set to 0.01% of the target size
$tolerance = 0.0001*$max unless($tolerance>=0);
my $tol = parse_size($tolerance);

# after the options are stripped from the command line, what is left
# tells us what to pack
if($#ARGV<0)		# no argument => all the files in the current directory
{
	@files		= glob("*");
}
elsif($#ARGV==0)	# 1 argument: the directory in which the files to pack are
{
	$ARGV[0]	=~ s![/\\]$!!;	# strip the trailing '/' or '\'
    if(! -d $ARGV[0])
    {
        print $ARGV[0]." is not a directory\n";
        pod2usage({-exitval => 4, -verbose => 2})
    }
	@files		= glob($ARGV[0]."/*");
}
else				# more than 1 argument => those are the files to pack
{
	@files		= @ARGV;	
}

# initializations:
my $totalsize      = 0;			# the total size of the selected files in the current iteration
my $minrest        = $max;
my @sizes;						# the size of each file
my @selected;					# the status (selected or not) of each file in the current iteration
my $i;

# loop thru all the files, print if the user asked for verbosity,
# and calculates the size of each one
print "All files:\n" if($verbose);
for($i=0; $i<=$#files; ++$i)
{
	if(-d $files[$i])   #it's a directory => calculate its size by recurrence
	{
		$sizes[$i]	= 0;
		find sub{$sizes[$i] += -s $_; }, $files[$i] ;
		$files[$i]	.= " (DIR)";
	}
	else
	{
		$sizes[$i]	= -s $files[$i];
	} 
	
	$selected[$i]	= 0;				#initialize the selection array to 0
	$totalsize		+= $sizes[$i];
	if($verbose)
	{
		my $out		= "  $files[$i]";
		print $out;
		print " " x ($linesize-length($out));
		printf( ": %5d Mb\n",int($sizes[$i]/$Mb+0.5));
	}
}

print "-" x ($linesize+11)."\n" if($verbose);

#print some info unless the user asked for quiet
if(!$quiet)
{
	my $out	= "  TOTAL (".($#files+1)." files)" ;
	print $out. " " x ($linesize-length($out));
	printf(": %5d Mb\n",int($totalsize/$Mb+0.5));
	
	$out	= "  MAXIMUM SIZE TO FILL" ;
	print $out. " " x ($linesize-length($out));
	printf(": %5d Mb\n",int($max/$Mb+0.5));
	print "\n  Nb combinaisons: ".2**($#files+1)."\n\n";
}

my @best =  @selected;		# the best selection so far

if($totalsize<$max) #all the files don't fill the medium => nothing to do
{
	print "All the selected files amount to less than the medium size.\n";
	print "You can add all of them to the medium without getting over\n";
	print "the size limit. No optimization to be done.\n";
	#select all the files
	@best = map { 1 } @selected;
	#display result on screen and exit
	finish();
}

# catch Ctrl-C to finish the search and display the best so far
$SIG{INT} = \&finish;

#start the first iteration:
print "Press Ctrl-C when the solution is satisfactory\n" unless $quiet;
$|=1;   # we want the real-time results displayed on the screen
iter(0,$max);
finish();

#should never reach this point, finish calls the exit function

################################################################################
#
################################################################################
sub showbest
{
	return if($quiet);
	print "\b"x50;  # backspace characters to go back to the beginning of the line
	if($minrest>$Mb)
	{
		printf("Best solution so far:%8.3f Mb       ",$minrest/$Mb);
	}
	elsif($minrest>1024)
	{
		printf("Best solution so far:%8.3f Kb       ",$minrest/1024);
	}
	else
	{
		printf("Best solution so far:%4d bytes       ",$minrest);
	}
}

sub finish
{
	# called when we found the best solution
	my $totalsize		= 0;
	my $nbfiles			= 0;
    my $out;
	$| = 0;  # we don't need the autoflush anymore
	print "\b"x55 . "RESULTS:". " "x43 ."\n";
	for(my $i=0; $i<=$#files; ++$i)
	{
		if($best[$i])
		{
			$out          = "  $files[$i]";
			print $out . " " x ($linesize-length($out));
			printf( ": %5d Mb\n",int($sizes[$i]/$Mb+0.5));
			$totalsize    += $sizes[$i];
			++$nbfiles;
		}
	}
	
	print "-" x ($linesize+11)."\n";
	
	my $out	= "  TOTAL ($nbfiles files)" ;
	print $out. " " x ($linesize-length($out));
	printf(": %5d Mb\n",int($totalsize/$Mb+0.5));
	
	$out	= "  MAXIMUM SIZE TO FILL" ;
	print $out. " " x ($linesize-length($out));
	printf(": %5d Mb\n",int($max/$Mb+0.5));
	
	$out          = "  UNUSED " ;
	print $out. " " x ($linesize-length($out));
	printf(": %5d Mb\n",int($max/$Mb+0.5)-int($totalsize/$Mb+0.5));
	
	exit;
}

sub iter
{
	# The search is done by iteration on the depth. We have to explore
	# a tree, each file has 2 possible status: packed or not packed.
	# So, at each level (file) each tree branch is divided in 2.
	# Each node on the tree corresponds to a certain number of
	# files being packed and hence to a certain size.
	# Obviously, we stop exploring branches where the size is already
	# larger that the maximum allowed
	my $i         = shift;	# current file number or depth
	my $maxsize   = shift;	# the size of the medium left to fill
	if($i>$#files)	# no more files to add
	{
		# since there are no more files, maxsize is here also the
		# size of the space left on the medium in this node
		if($maxsize < $minrest && $maxsize > 0)
		{
			# we have a new best solution!
			$minrest  = $maxsize;
			@best     = @selected;
			showbest();
            finish() if($minrest<=$tol);
		}
		return 1;
	}
	
	# we now have to test 2 options: pack this file or not:
	
	# 1. let's try taking the file i into the package
	if($sizes[$i]< $maxsize)
	{
		$selected[$i] = 1;
		# we continue to explore the tree of solution with the next file,
		# but this time the size available is decreased since we just included
		# a file in the package 
		iter($i+1, $maxsize-$sizes[$i]);
	}
	else
	{
		# we have reached the maximum, adding the file would make 
		# the package too big. Test if this solution is better than
		# what we had this far:
		if($maxsize < $minrest && $maxsize > 0)
		{
			# we have a new best solution
			$minrest  = $maxsize;
			@best     = @selected;
			showbest();
            finish() if($minrest<=$tol);
		}
	}

	# 2. let's try the other alternative: we don't take the file i
	#    in the package
	$selected[$i] = 0;
	# we continue to explore the tree of solution with the next file 
	iter($i+1, $maxsize);
}

################################################################################
#
################################################################################
sub parse_size
{
	# allows the use of units pre defined in %predef_sizes
	# ex: 2.5Gb, dvd, cd, 35Mb,...
	
	my $size = shift;		# string to decode
	
	# we are going to decompose the string into a numerical factor (=1 if absent)
	# and a unit (=1 byte if absent).
	my ($factor,$unit);
	if($size =~ m/^(\d+\.\d*|\.?\d+)(.*)/)	# if $max starts with a number
	{
		$factor		= $1;
		$unit	 	= $2;
	}
	else	# just the unit without multiplying factor, ex: 'cd', 'dvd'
	{
		$factor		= 1;
		$unit		= $size;
	}
	
	return $factor if($unit eq "");		# no unit, just a number!
	return $factor * $predef_sizes{lc($unit)} 
		if(exists($predef_sizes{lc($unit)}));
	# error: should never reach this point
	return -1;
}

sub list_units
{
	my ($unit,$value);
	print "The use of units is NOT case-sensitive.\n";
	print "The pre-defined units are:\n";
	while(my($unit, $value) = each(%predef_sizes)) 
	{
		printf("%10s: %10.0f (%7.2f Mb)\n",$unit,$value,$value/$Mb);
		#print	$unit . " "x(10-length($unit)) .": $value
	}
	exit;
}

__END__

=pod 

=head1 NAME

packit.pl - chooses files to pack to avoid wasting space on cd,dvd,...

=head1 SYNOPSIS

packit.pl [options] [directory]

packit.pl [options] file/directory file/directory ... 

=head1 ARGUMENTS

=over 2

=item B<[directory]>

The directory in which the files to pack are. If omitted, the
current directory is used. The subdirectories are considered as
un breakable blocks. To make sure a group of files will be on the 
same medium, place them in a subdirectory (ex: MyBook-Tome1.pdf,
MyBook-Tome2.pdf,...)

=item B<file/directory>

If more than 1 argument is passed, each one is assumed to be a file or
a directory (considered 1 piece) to pack.

=back

=head1 OPTIONS

=over 2

=item B<-h> or B<--help>

Produces full documentation.

=item B<-m size> or B<--medium-size size>

Sets the size of the medium to fill. packit will try to reach this
target size without overshooting. The default is the size of a dvd.
The format supports the usage of 
units (run B<packit -u> to see a list of supported units). If no
unit is specified, bytes is assumed.

ex: 700Mb, dvd, floppy, 3Gb, 12345

=item B<-q> or B<--quiet>

Reduces the programs output.

=item B<-t size> or B<--tolerance size>

The tolerance on the result. For the possible formats of size, see
B<--medium-size size>. The tolerance is set by default to 0.01% of
the medium-size. 

For example, if you have a lot of files, the program can take a 
very long time to test all the possible solutions. When the solution 
is close enough, it's is not worth waiting a long time just to 
improve by a few bytes. So you can set the tolerance to e.g. 10Kb,
if you think that a solution within 10Kb is good enough. 

=item B<-u> or B<--units>

Display the list of supported units and exit.

=item B<-v> or B<--verbose>

Prints more information about the set of files we start with.

=back

=head1 EXAMPLES

=over 2

=item packit -t 25kb -m dvd

Will try to group some files or directories from the current directory so 
as to fill a dvd with as little as possible space left on the dvd. The 
program will stop and  display the solution as soon as it finds one 
leaving less than 25 kilobytes free on the dvd.

=head1 BUGS

Could in principle get into trouble if the number of files is very large 
since the program uses recurrence. I never had that happening in practice 
yet. Try increasing the tolerance if it does.

=head1 COPYRIGHT

Copyright Thomas Materna <tmaterna@tmaterna.com> (http://www.tmaterna.com)

Use and redistribution is allowed provided that this copyright and disclaimer
is reproduced.

This code is provided ``as is'', without B<ANY> express or implied warranties.
In no event should the author be liable for any consequence of any type of use
of this software.

=head1 AUTHORS

Thomas Materna <tmaterna@tmaterna.com> (L<http://www.tmaterna.com>)

=head1 README

B<packit.pl> helps saving space when burning CDs, DVDs,... It computes
which files/directories to burn on a medium in order to make the 
unused space on the medium as little as possible.
The current best solution is displayed in real time (unless B<--quiet> is 
used), the user can stop the program at any time when satisfied by hitting
Ctrl-C. The program will then display the detail of the best solution and 
exit.

=head1 PREREQUISITES

C<strict>,
C<Pod::Usage> for the documentation,
C<Getopt::Long> for the comman line parsing,
C<File::Find> to compute a directory's size

=head1 OSNAMES

any

=head1 SCRIPT CATEGORIES

CPAN
Win32/Utilities

=cut



