#!/usr/bin/perl

# title-index.pl - read bunches o' TEI to create an title index

# Eric Lease Morgan <eric_morgan@infomotions.com>

# July 8, 2004 - Gettin' ready 4 LAMP


######################################################
# no configuration should be necessary below this line


# require the necessary modules
use File::Basename;
use File::Find;
use strict;
use XML::LibXML;

# get the input
my $xml_dir = $ARGV[0];

# check for input
if (! $xml_dir) {

	print "Usage: $0 /full/path/to/directory/\n";
	exit;
	
}

# initialize
our @all_items;
my $parser = XML::LibXML->new;

# process every file in the XML directory
find (\&process_files, $xml_dir);

# process each item
for (my $i; $i <= $#all_items; $i++) {

	# display each item
	print $all_items[$i]{title} . " by " . $all_items[$i]{author} . " - " . $all_items[$i]{file} . "\n";
	
}

# done
exit;


# do the work
sub process_files {

	# get the name of the found file
	my $file = $File::Find::name;

	# make sure it has the correct extension
	next if ($file !~ m/\.xml$/);

	# parse the file and extract the necessary data;  s o   s l o w !
	print "Processing $file... \n";
	my $doc    = $parser->parse_file($file);
	my $root   = $doc->getDocumentElement;
	my @header = $root->findnodes('teiHeader');
	
	# extract the desired data
	my $author = $header[0]->findvalue('fileDesc/titleStmt/author');
	my $title  = $header[0]->findvalue('fileDesc/titleStmt/title');

	# save it
	push @all_items, ({author=>$author, title=>$title, file=>$file});

}
