#!/usr/bin/perl -WT
# (C) Stephan Beyer, GPL
# reformat -h or see the pod (at the bottom of the file, or 'man reformat')

# default options
# Note: booleans are true if ($boolean % 2)
$width = 72; # -w
$justify = 0; # -j (boolean)
$lmargin = ""; # -l
$allpar = 0; # -p (boolean)

# default stuff
$version = "20040319";
$newline = 0;
$lines = 0;

# options
for ($i = 0; $i < @ARGV; $i++) {
	$ARGV[$i] = $ARGV[$i];
	if ($ARGV[$i] eq
	"-j") {
		$justify++;
	} elsif ($ARGV[$i] eq
	"-l") {
		$i++;
		$lmargin = "";
		for (my $tmp = 0; $tmp < int($ARGV[$i]); $tmp++) {
			$lmargin .= " ";
		}
	} elsif ($ARGV[$i] eq
	"-p") {
		$allpar++;
	} elsif ($ARGV[$i] eq
	"-w") {
		$i++;
		$width = int($ARGV[$i]);
	} else {
		if ($ARGV[$i] ne "-h") {
			print STDERR "Unknown option: ".$ARGV[$i]."\n";
		}
		print <<STOP
reformat  $version     by sbeyer
(C) Stephan Beyer <s-beyer\@gmx.net>, GPL
http://www.s-beyer.de/

Usage: $0 [options] [<inputfile] [>outputfile]

"reformat" only handles with stdin and stdout (and sometimes stderr).
Pipe as much as you want :)

Options (alphabetically):
  -h          Displays this help.
  -j          Switches hyphenless justification. [$justify]
  -l margin   Sets a left margin (numeric). [$lmargin]
  -p          Also reformat lines beginning with a whitespace. [$allpar]
  -w width    Sets a width (numeric) for the text. [$width]

Have fun!
STOP
		;
		if ($ARGV[$i] eq "-h") {
			exit(0);
		} else {
			exit(1);
		}
	}
}

# Usage: prepare (STRING) -> prepares a string (see source)
sub prepare {
	my $ret = $_[0];
	$ret =~ s/[\r\n\t]/ /g;
	$ret =~ s/ +/ /g;
	$ret =~ s/^ //g;
	$ret =~ s/ $//g;
	return $ret;
}

# Usage: round (FLOAT) -> rounded to integer
sub round {
	my $ret;
	if ($_[0] - int($_[0]) >= 0.5) {
		$ret = int($_[0]) + 1;
	} else {
		$ret = int($_[0]);
	}
	return $ret;
}

# Usage: splen (STRING) -> returns length of STRING + 1 (added space)
sub splen {
	return length($_[0])+1;
}

# Usage: mindiff (INTEGER, ARRAY OF INTEGERS)
# -> returns integer value (of the array) of
#    smallest absolute difference betwwen INTEGER and the ARRAY
sub mindiff {
	my $num = $_[0];
	shift(@_);
	my $min = -1;
	my $ret;
	for(@_) {
		if ((abs($_-$num) < $min) or ($min < 0)) {
			$min = abs($_-$num);
			$ret = $_;
		}
	}
	return $ret;
}

# Usage: newline; -> registers a new line
sub newline {
	print "\n";
	$newline = 0;
	$lines++;
}

# almost MAIN:
# Usage: reformat (STRING) -> reformat string
sub reformat {
	my $str = $_[0];
	my @words = split(/ /, prepare($str));
	my $wc = @words;	# word count
	
	my $lc = $width + 1;	# length count
	my $last = 0;		# old $i, used later
	for ($i = 0; $i <= $wc; $i++) {
		if ($newline == 0) {
			print $lmargin;
			$newline = 1;
		}
		if ($i < $wc) {
			# substract current word
			$lc -= splen($words[$i]);
	#		print $i." : ".$words[$i]." : ".length($words[$i])." : ".$lc."\n";
	
			# is our mission impossible?
			if (length($words[$i]) > $width) {
				print STDERR ("ERROR: word is too long to fit in width ($width).\n");
				print STDERR ("(\"".$words[$i]."\")\n");
				print STDERR ("Aborting.\n");
				exit(2);
			}
		}

		if ($lc < 0) {
			# add the last substracted word
			$lc += splen($words[$i]);

			if ($justify % 2) {
				# justify?
				# j1. $lc is how many spaces we need to fill
				# j2. now we should find <lc> landmarks
				
				# LandMark Factor  *  {1..lc+1}  =  set of landmarks
				$lmf = $width / ($lc + 1); #--> TODO not very correct because the length changes when adding a space

				# find available spaces
				$tmp = 0;
				for ($ii = $last; $ii < $i; $ii++) {
					$tmp += splen($words[$ii]);
					push(@spaces, $tmp);
				} pop(@spaces);
#				print "DEBUG: ".@spaces." spaces available: @spaces\n";

				# find landmarks
				for ($ii = 1; $ii <= $lc+1; $ii++) {
					$tmp = round($lmf*$ii);
					push(@landmarks, mindiff($tmp, @spaces));	# positions of our landmark
				} pop(@landmarks);
#				print "DEBUG: $lc landmarks to set: @landmarks\n";

				$tmp = 0;
				$lmhc = 0;	# landmark help counter
			}
			for ($ii = $last; $ii < $i; $ii++) {
				print $words[$ii];
				
				if ($justify % 2) {
					$tmp += splen($words[$ii]);
					while (($lmhc < @landmarks) and ($tmp == $landmarks[$lmhc])) {
						$lmhc++;
						print " ";
					}
				}

				if ($ii < $i-1) {
					print " ";
				} else {
					newline;
				}
			}
			$last = $i;
			$lc = $width - length($words[$i]);
		}

		# remove landmarks & spaces
		if ($justify % 2) {
			for(@landmarks) {
				pop(@landmarks);
			}
			for(@spaces) {
				pop(@spaces);
			}
		}

		# end of paragraph - print as usual (word1 word2 word3.\n)
		if ($i == $wc) {
			for ($ii = $last; $ii < $i; $ii++) {
				print $words[$ii];
				if ($ii < $i-1) {
					print " ";
				} else {
					newline;
				}
			}
		}
	}
}

# Usage: unformatted (STRING)
sub unformatted {
	$_[0] =~ s/[\r\n]//;
	print $_[0];
	newline;
}

############
### MAIN ###
############

$cache = "";
while(<STDIN>) {
	if ($allpar % 2) { # -p set
		$_ =~ s/^[\t ]*//;
	}
	if ($_ =~ m/^[\r\n\t ]/) { # ignore those lines
		# reformat cache
		reformat($cache);
		# print ignored line
		unformatted($_);
		# clear cache
		$cache = "";
	} else {
		$cache .= $_;
	}
}
reformat($cache);

exit(0);

=head1 NAME

reformat - tool to simple format plain ascii texts

=head1 SYNOPSIS

B<reformat> [B<-h>] [B<-j>] [B<-l> I<margin>] [B<-p>] [B<-w> I<width>]

=head1 DESCRIPTION

B<reformat> is a simple tool to reformat plain texts. reformat reads
from F<stdin> and writes to F<stdout>.

Available options are:

=over

=item B<-h>

prints usage information

=item B<-j>

switch justify mode: Each line of a paragraph will have the same width (see
B<-w> option). To reach this, spaces (' ') will be added between words.
Default: disabled

=item B<-l> I<left-margin>

Sets the left margin to I<left-margin>. The margin is produced by 
I<left-margin> spaces (' '), no tabs will be used. Default: 0

=item B<-p>

Accept lines beginning with a whitespace as usual paragraphs, too.

=item B<-w> I<width>

Sets the paragraph width to I<width>. No reformatted line will be longer
than I<width> (plus defined margins) then. Default: 72

=back

=head1 LIMITATIONS

=over

=item

B<reformat> isn't an intelligent program. It just reads a whole paragraph
into a buffer and then reformats it. The end of a paragraph is indicated by
an empty line (may also contain spaces or tabs) or at a line beginning
with whitespaces (if you don't use B<-p> option).

Lines beginning with whitespaces are lines to keep untouched. Nothing happens
with them, unless you use B<-p> option (as just mentioned).

=item

B<reformat> doesn't look for hyphenation and hyphens at all. It won't
make new lines when reached a hyphen. B<reformat> works word-by-word.

=item

B<reformat> doesn't detect 'small paragraphs' (paragraphs without an
empty line).

=item

Check for errors! If B<reformat> detects a word with a length greater
than the specified width, it will abort.

=item

B<reformat> has problems with control characters. Some text
documents contain the B<^L> character (0x0c), for example.

=back

=head1 TODO

Planned features are:

=over

=item

Fix I<some> problems, see L</LIMITATIONS>.

=item

Add an option to declare a string that indicates "don't reformat" in 
the text. Would be nice on reformatting emails, and don't touch the quoteas
('> '-lines).

=item

Add an option (e.g. B<-i>) to keep indenting.

=back

=head1 EXAMPLES

=over

=item reformat < foo > bar

Reads text from F<foo>, reformats and writes to F<bar>.

=item reformat -l 15 -j -w 50 < foo

Nice justified, centered text from file F<foo> on an 80x25 terminal.

=back

=head1 SEE ALSO

L<fold(1)>

=head1 AUTHOR AND COPYRIGHT

(C) Stephan Beyer E<lt>s-beyer@gmx.netE<gt>, 2003-2004, GPL

=cut
