#!/usr/bin/perl

# marc2xhtml.pl - output bunches o' HTML based on sets of MARC records
# Eric Lease Morgan <eric_morgan@infomotions.com>

# May 27, 2004 - getting ready for Shining a LAMP on XML 'n Monterey


######################################################
# no configuration should be necessary below this line


# use the necessary modules
use File::Find;
use MARC::Batch;
use strict;

# get the input
my $in_dir  = shift @ARGV;
my $out_dir = shift @ARGV;

# check for input
if (! $in_dir or ! $out_dir) {

	# print help text and quit
	print "Usage: $0 [full path to input directory] [full path to output directory]\n";
	exit;
	
}

# slurp up the template
my $template = &html_template;

# process every file in the defined directory
find (\&process_files, $in_dir);

# done
exit;


# process each found file
sub process_files {

	# get the name of the found file
	my $file = $File::Find::name;
	
	# make sure it has the correct extension
	next if ($file !~ m/\.marc$/);
		
	# echo progress
	print "Processing $file...\n";
	
	# create a batch object from the file
	my $batch = MARC::Batch->new('USMARC', $file);
	
	# be leanient
	$batch->strict_off;
	
	# process every record in the object
	while (my $record = $batch->next()) {
				
		# extract the key
		my $id = $record->field('099', 'a');
		$id = $id->as_string;
		
		# create a brief description
		my $brief = $record->title;
		$brief = &escape_entities($brief);
		
		# extract the author
		my $author = $record->author;
		if ($author eq '') { $author = $record->subfield('700',"a") }
		if ($author eq '') { $author = $record->subfield('710',"a") }
		if ($author eq '') { $author = '[anonymous]' }
		
		# extract the title and munge it for display and sorting
		my $title = $record->title_proper;
		$title =~ s/\W$//;
		$title =~ s/\s$//;
		my @words = split / /, $title;
		my $first_word = @words[0];
		if ($first_word eq 'The' || $first_word eq 'A' || $first_word eq 'An') {
		
			shift @words;
			$title = join ' ', @words;
			$title =~ s/(^.)/uc($1)/e;
			
		}
		$title = &escape_entities($title);

		# extract the publisher
		my $publisher = $record->field(260);
		$publisher = $publisher->as_string;
		$publisher = &escape_entities($publisher);
		
		# extract the year, as well as I can
		my $year = $record->subfield(260, 'c');
		my @year = split / /, $year;
		$year = @year[0];
		$year =~ s/\D//g;
				
		# extract the pagination
		my $pagination = $record->field(300);
		$pagination = $pagination->as_string;
		$pagination = &escape_entities($pagination);

		# extract the notes and format them
		my @notes = $record->field('5..');
		my $notes = undef;
		foreach my $note (@notes) { $notes .= $note->as_string . ' ' }
		$notes = &escape_entities($notes);
		chop $notes;
		
		# extract te subjects and format them
		my @subjects = $record->field('6..');
		my $subjects = undef;
		foreach my $subject (@subjects) { $subjects .= $subject->as_string . ' ' }
		$subjects = &escape_entities($subjects);
		chop $subjects;
		
		# get all fields
		my @fields  = $record->fields;
		my $details = undef;
		foreach my $field (@fields) {
		
			# format the details
			$details .= $field->tag . ' ';
			if (defined $field->indicator(1)) { $details .= $field->indicator(1) }
			if (defined $field->indicator(2)) { $details .= $field->indicator(2) }
			$details .= ' ' . $field->as_string . "\n";
			
		}
		$details = &escape_entities($details);

		# initalize the html
		my $html = $template;
		
		# process each macro
		$html =~ s/##AUTHOR##/$author/g;
		$html =~ s/##BRIEF##/$brief/g;
		$html =~ s/##YEAR##/$year/g;
		$html =~ s/##DETAILS##/$details/g;
		$html =~ s/##ID##/$id/g;
		$html =~ s/##NOTES##/$notes/g;
		$html =~ s/##PAGINATION##/$pagination/g;
		$html =~ s/##PUBLISHER##/$publisher/g;
		$html =~ s/##SUBJECTS##/$subjects/g;
		$html =~ s/##TITLE##/$title/g;
		$html =~ s/##TITLEPROPER##/$title/g;
			
		# save the resulting HTML
		open HTML, "> $out_dir$id.html";
		print HTML $html;
		close HTML;
		
	}
		
}


sub escape_entities {

	# get the input
	my $s = shift;
	
	# escape
	$s =~ s/&/&amp;/g;
	$s =~ s/</&lt;/g;
	$s =~ s/>/&gt;/g;
	$s =~ s/"/&quot;/g;
	$s =~ s/'/&apos;/g;

	# done
	return $s;
	
}


sub html_template {

	# simply return a string; the skeleton of an HTML file
	return <<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
	<title>##TITLEPROPER##</title> 
	<meta name="id"         content="##ID##"/>
	<meta name="brief"      content="##BRIEF##"/>
	<meta name="author"     content="##AUTHOR##"/>
	<meta name="year"       content="##YEAR##"/>
	<meta name="title"      content="##TITLE##"/>
	<meta name="publisher"  content="##PUBLISHER##"/>
	<meta name="pagination" content="##PAGINATION##"/>
	<meta name="note"       content="##NOTES##"/>
	<meta name="subject"    content="##SUBJECTS##"/>
</head>
<body>
<h1>##BRIEF##</h1>
<pre>##DETAILS##</pre>
</body>
</html>
EOF

}



