#!/usr/bin/perl

# doaj2mylibrary.pl - harvest DOAJ metadata and import it into MyLibrary

# Eric Lease Morgan <emorgan@nd.edu>
# 2006-01-12 - added pod
# 2004-10-12 - first cut


=head1 NAME

doaj2mylibrary.pl - harvest DOAJ metadata and import it into MyLibrary

=head1 DESCRIPTION

The purpose of this program is to populate your MyLibrary database with records harvested from the OAI-PMH data repository called the Directory of Open Access Journals (DOAJ).

If it doesn't already exist, the program will automatically create a facet called Subjects.

The script will then get a list of OAI sets supported by the repository and create facet/term combinations in the form of Subjects/foobar where foobar is the name of each set.

If it doesn't already exist, the program will automatically create a facet/term combination called Formats/Journals.

Each set will then be harvested and each record in each set will be used to create a MyLibrary resource. Only very basic meta-data is recorded like title, creator, description, and identifier.

=head1 AUTHOR

Eric Lease Morgan

=cut


# include the necessary modules/subroutines
use lib '../lib/';
use MyLibrary::Facet;
use MyLibrary::Resource;
use MyLibrary::Term;
use Net::OAI::Harvester;
use strict;
require 'subroutines.pl';

# define where the OAI interface to DOAJ is
use constant DOAJ => 'http://www.doaj.org/oai';

# define the LOCATIONTYPE of your URLs
use constant LOCATIONTYPE => 20;

# display an introduction
&clearScreen;
print "\nIf they don't exist, this program will first create a 'Subjects' facet.\n";
print "It will then create a set of subject terms based on the OAI sets defined\n";
print "by the Directory of Open Access Journals. It will then create a Formats\n";
print "facet, if it doesn't exist and create a related term called Journals.\n";
print "Finally, it will harvest the Directory's metadata and create MyLibrary\n";
print "records accordingly.\n\n";
print "Press enter (or return) to begin. "; <STDIN>;

# check for a facet called Subjects
my $facet = MyLibrary::Facet->new;
if (! MyLibrary::Facet->get_facets(value => 'Subjects', field => 'name')) {

	# create it
	$facet->facet_name('Subjects');
	$facet->facet_note('Here you will find lists of terms describing the aboutness of things');
	$facet->commit;
	print "\nThe facet Subjects was created.\n";

}

else {

	# already exists
	$facet = MyLibrary::Facet->new(name => 'Subjects');
	print "\nThe facet Subjects already exists.\n";
	
}
my $facetID = $facet->facet_id;

# get and set the subject terms/sets from DOAJ
my $harvester = Net::OAI::Harvester->new('baseURL' => DOAJ);
my $sets = $harvester->listSets;
foreach ($sets->setSpecs) {

	# check for this particular term
	if (! MyLibrary::Term->get_terms(value => $sets->setName($_), field => 'name')) {
	
		# create it
		my $term = MyLibrary::Term->new;
		$term->term_name($sets->setName($_));
		$term->term_note('This term comes from the DOAJ.');
		$term->facet_id($facetID);
		$term->commit;
		print 'The term ', $sets->setName($_), " was created.\n";
		
	}
	
	# it already exits
	else { print 'The term ', $sets->setName($_), " already exists.\n" }
	
}


# check for a facet called Formats
$facet = MyLibrary::Facet->new;
if (! MyLibrary::Facet->get_facets(value => 'Formats', field => 'name')) {

	# create it
	$facet->facet_name('Formats');
	$facet->facet_note('This is a list of physical formats for information resources.');
	$facet->commit;
	print "The facet Formats was created.\n";

}

else {

	# it already exits
	$facet = MyLibrary::Facet->new(name => 'Formats');
	print "The facet Formats already exists.\n";
	
}
my $formatID = $facet->facet_id;

# check for term named Journals
my $term = MyLibrary::Term->new;
if (! MyLibrary::Term->get_terms(value => 'Journals', field => 'name')) {

	# create it
	$term->term_name('Journals');
	$term->term_note('These are scholarly serial publications.');
	$term->facet_id($formatID);
	$term->commit;
	print "The term Journals was created.\n";
	
}

else {

	# it already exists
	$term = MyLibrary::Term->new(name => 'Journals');
	print "The term Journals already exists.\n";
	
}
my $journalTermID = $term->term_id;

# loop through each OAI set from DOAJ
foreach ($sets->setSpecs) {

	# get this set name
	print "\n$_\n";
	my $term = MyLibrary::Term->new(name => $sets->setName($_));
	my $termID = $term->term_id;
	
	# get the records in this set
	my $records = $harvester->listAllRecords(metadataPrefix => 'oai_dc', set => $_);
	
	# process each record
	while (my $record = $records->next) {
	
		# extract the metadata
		my $FKey      = $record->header->identifier;
		my $metadata  = $record->metadata;
		my $name      = $metadata->title;
		my $publisher = $metadata->publisher;
		my $language  = $metadata->language;
		my $location  = $metadata->identifier;
		print "$name...";
		
		# check to see if it already exits
		if (! MyLibrary::Resource->new(fkey => $FKey)) {
			
			# create it
			my $resource = MyLibrary::Resource->new;
			$resource->name($name);
			$resource->publisher($publisher);
			$resource->language($language);
			$resource->fkey($FKey);
			$resource->related_terms(new => [$journalTermID, $termID]);
			$resource->add_location(location => $location, location_type => LOCATIONTYPE);
			$resource->commit;
			print "added (", $resource->id, ").\n";
		
		}
		
		else {
		
			# update it
			my $resource = MyLibrary::Resource->new(fkey => $FKey);
			$resource->related_terms(new => [$termID]);
			$resource->commit;
			print "already exists. Updated.\n";

		}
				
	}
	
}

# done
print "\nDone\n";
exit;
