#! /usr/bin/perl -w
#
# Usage: ./urlbst.pl [input-file [output-file]]
#
# If either input-file or output-file is omitted, they are replaced by
# stdin or stdout respectively.
#
# $Id: urlbst.pl,v 1.3 2002/04/22 15:49:06 norman Exp $

$version = '0.1';		# %%VERSION%%
($progname = $0) =~ s/.*\///;
$mymarker = "% $progname";

$infile  = ($#ARGV >= 0 ? $ARGV[0] : '-');
$outfile = ($#ARGV >= 1 ? $ARGV[1] : '-');

$exitstatus = 0;		# success status

open (IN, "<$infile") || die "Can't open $infile to read";
open (OUT, ">$outfile") || die "Can't open $outfile to write";

# We have to make certain assumptions about the source files, in order
# to patch them at the correct places.  Specifically, we assume that
#
#   - there's a function init.state.consts
#
#   - ...and an output.nonnull which does the actual outputting, which
#         has the `usual' interface.
#
#   - we can replace
#           note output
#       by
#           output.url    % the function which formats and displays any URL
#           new.block
#           note output
#
#   - there is a function which handles the `article' entry type (this
#         will always be true)
#
#   - there is a function output.bibitem which is called at the
#         beginning of each entry type
#   - ...and one called fin.entry which is called at the end
#
# If the functions format.date, format.title or new.block are not defined (the
# former is not in apalike, for example, and the last is not in the
# AMS styles), then replacements areincluded in the output.
#
# All these assumptions are true of the standard files and, since most
# style files derive from them more or less directly, are true of most (?)
# other style files, too.
#
# There's some rather ugly Perl down here.  The parsing for
# brace-matching could probably do with being rewritten in places, to
# make it less ugly, and more robust.

print OUT "%%% Modification of BibTeX style file ", ($infile eq '-' ? '<stdin>' : $infile), "\n";
print OUT "%%% to add webpage entry type, and url and lastchecked fields\n%%% Edits by $progname, version $version\n%%% (marked with $mymarker)\n";
print OUT "%%% Original headers follow...\n";

$found{initconsts} = 0;
$found{outputnonnull} = 0;
$found{article} = 0;
$found{outputbibitem} = 0;
$found{finentry} = 0;
$found{formatdate} = 0;
$found{formattitle} = 0;
$found{newblock} = 0;

while (<IN>) {
    /^ *ENTRY/ && do {
	# Work through the list of entry types, finding what ones are there.
	# If we find a URL entry there already, object, since these edit
	# will mess things up.
	$line = $_;
	until ($line =~ /\{\s*(\w*)/) {
	    $line .= <IN>;
	}
	$bracematchtotal = 0;	# reset
	bracematcher($line);
	$line =~ /\{\s*(\w*)/;
	$found{'entry'.$1} = 1;
	print OUT $line;
	$line = <IN>;
	until (bracematcher($line) == 0) {
	    # XXX deal with multiple entries on one line
	    ($line =~ /^\s*(\w*)/) && ($found{'entry'.$1} = 1);
	    print OUT $line;
	    $line = <IN>;
	}
	if (defined($found{entryurl})) {
	    print STDERR "$progname: style file $infile already has URL entry!\n";
	    # print out the rest of the file, and give up
	    while (<IN>) {
		print OUT;
	    }
	    $exitstatus = 1;
	    last;
	} else {
	    print OUT "    url $mymarker\n    lastchecked\n";
	}
	print OUT $line;
	next;
    };

    /^ *FUNCTION *\{init\.state\.consts\}/ && do {
	print OUT "INTEGERS { bracket.state outside.brackets open.brackets within.brackets close.brackets } $mymarker\n";
	$line = $_;
	until ($line =~ /\{.*\}.*\{/s) {
	    $line .= <IN>;
	}
	$line =~ s/(\{.*?\}.*?\{)/$1 #0 'outside.brackets := $mymarker
  #1 'open.brackets :=
  #2 'within.brackets :=
  #3 'close.brackets :=

  /s;
	print OUT $line;
	$found{initconsts} = 1;
	next;
    };

    /^ *FUNCTION *{output\.nonnull}/ && do {
	print OUT "$mymarker\n";
	print OUT "FUNCTION {output.nonnull.original}\n";
	$bracematchtotal = 0;
	while (defined ($line = <IN>) && bracematcher($line) > 0) {
	    print OUT $line;
	}
	print OUT "$line\n";	# print out terminating \} (assumed
                                # alone on the line)
	print_output_functions();
	$found{outputnonnull} = 1;
	next;
    };

    /note *output/ && do {
	print OUT "  output.url $mymarker\n  new.block\n";
	print OUT;
	next;
    };

    /^ *FUNCTION *{format\.date}/ && do {
	$found{formatdate} = 1;
	print OUT;
	next;
    };

    /^ *FUNCTION *{format\.title}/ && do {
	$found{formattitle} = 1;
	print OUT;
	next;
    };

    /^ *FUNCTION *{new.block}/ && do {
	$found{newblock} = 1;
	print OUT;
	next;
    };

    /^ *FUNCTION *\{article\}/ && do {

	print_missing_functions();
	print_webpage_def();

	print OUT;
	$found{article} = 1;
	next;
    };

    /FUNCTION *\{output.bibitem\}/ && do {
	$line = $_;
	until ($line =~ /\{.*\}.*\{/s) {
	    $line .= <IN>;
	}
	$line =~ s/(\{.*?\}.*?\{)/$1 outside.brackets 'bracket.state := $mymarker
  /s;
	print OUT $line;
	$found{outputbibitem} = 1;
	next;
    };

    /FUNCTION *\{fin\.entry\}/ && do {
	$line = $_;
	until ($line =~ /\{.*\}.*\{/s) {
	    $line .= <IN>;
	}
	$line =~ s/(\{.*?\}.*?\{)/$1 
  bracket.state close.brackets = $mymarker
    \{ "]" * \}
    'skip\$
  if\$
  /s;
        print OUT $line;
	$found{finentry} = 1;
	next;
    };

    print OUT;    
}

foreach $k (keys %found) {
    if ($found{$k} == 0) {
	print STDERR "$progname: $infile: failed to find feature $k\n";
    }
}

close (IN);
close (OUT);

exit $exitstatus;;






sub print_output_functions {
    print OUT "$mymarker...\n";
    print OUT <<'EOD';
FUNCTION {output.nonnull}
{ % Save the thing we've been asked to output
  's :=
  % If the bracket-state is close.brackets, then add a close-bracket to what's
  % currently at the top of the stack, and set bracket.state to outside.brackets
  bracket.state close.brackets =
    { "]" *
      outside.brackets 'bracket.state :=
    }
    'skip$
  if$
  bracket.state outside.brackets =
    { % We're outside all brackets -- this is the normal situation.
      % Write out what's currently at the top of the stack, using the
      % original output.nonnull function.
      s
      output.nonnull.original
    }
    { % Still in brackets.  Add open-bracket or (continuation) comma, add the
      % new text (in s) to the top of the stack, and move to the close-brackets
      % state, ready for next time (unless inbrackets resets it).  If we come
      % into this branch, then output.state is carefully undisturbed.
      bracket.state open.brackets =
        { " [" * }
        { ", " * } % bracket.state will be within.brackets
      if$ 
      s * 
      close.brackets 'bracket.state :=
    }
  if$
}

% Call this function just before adding something which should be presented in 
% brackets.  bracket.state is handled specially within output.nonnull.
FUNCTION {inbrackets}
{ bracket.state close.brackets =
    { within.brackets 'bracket.state := } % reset the state: not open nor closed
    { open.brackets 'bracket.state := }
  if$
}

FUNCTION {format.url}
{ url empty$
    { "" }
    { "Available from World Wide Web: \url{" url * "}" * }
  if$
}

FUNCTION {format.lastchecked}
{ lastchecked empty$
    { "" }
    { inbrackets "cited " lastchecked * }
  if$
}
EOD
    print OUT "%  ...$mymarker\n";
}

sub print_webpage_def {
    print OUT "$mymarker...\n";
    print OUT <<'EOD';
% Output a URL.  We can't use the more normal idiom (something like
% `format.url output'), because the `inbrackets' within
% format.lastchecked applies to everything between calls to `output',
% so that `format.url format.lastchecked * output' ends up with both
% the URL and the lastchecked in brackets.
FUNCTION {output.url}
{ url empty$
    'skip$ 
    { new.block 
      format.url output
      format.lastchecked output 
    }
  if$
}

% Webpage entry type.
% Title and url fields required;
% author, note, year, month, and lastchecked fields optional
% See references 
%   ISO 690-2 http://www.nlc-bnc.ca/iso/tc46sc9/standard/690-2e.htm
%   http://www.classroom.net/classroom/CitingNetResources.html
%   http://neal.ctstateu.edu/history/cite.html
%   http://www.cas.usf.edu/english/walker/mla.html
% for citation formats for web pages.
FUNCTION {webpage}
{ output.bibitem
  author empty$
    'skip$
    { format.authors output.nonnull }
  if$
  new.block
  format.title "title" output.check
  inbrackets "online" output
  new.block
  year empty$
    'skip$
    { format.date "year" output.check }
  if$
  lastchecked empty$
    'skip$
    { format.lastchecked output }
  if$
  new.block
  format.url "url" output.check
  new.block
  note output
  fin.entry
}
EOD
    print OUT "%   ...$mymarker\n\n\n";
}


sub print_missing_functions {
    # We've got to the bit of the file which handles the entry
    # types, so write out the webpage entry handler.  This uses
    # the format.date function, which which many but not all
    # bst files have (for example, apalike doesn't).  So
    # check that we either have found this function already, or
    # add it.
    if (! $found{formatdate}) {
	if ($found{entrymonth}) {
	    print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
    { month empty$
	{ "" }
	{ "there's a month but no year in " cite$ * warning$
	  month
	}
      if$
    }
    { month empty$
	'year
	{ month " " * year * }
      if$
    }
  if$
}
EOD
	} else {
	    print OUT <<'EOD';
FUNCTION {format.date}
{ year empty$
    'skip$
    { %write$
      "(" year * ")" *
    }
  if$
}
EOD
  	}
	$found{formatdate} = 1;
    }

    if (! $found{formattitle}) {
	print OUT <<'EOD';
FUNCTION {format.title}
{ title empty$
    { "" }
    { title "t" change.case$ }
  if$
}
EOD
    }

    # The same, for the new.block function
    if (! $found{newblock}) {
	# No new.block function defined.  Odd (AMS).  So define dummy
	print OUT 'FUNCTION {new.block} { skip$ }'."\n";
	$found{newblock} = 1;
    }
}

# Keep track of open and close braces in the string argument.
# Keep state in $matchtotal, return the current value.
sub bracematcher {
    my $s = shift;
    $s =~ s/[^\{\}]//g;
    #print "s=$s\n";
    foreach my $c (split (//, $s)) {
	$bracematchtotal += ($c eq '{' ? 1 : -1);
    }
    return $bracematchtotal;
}
