#!/usr/local/bin/perl -w

###############################################################################
# Program     : validateMzML.pl
# Author      : Eric Deutsch <edeutsch@systemsbiology.org>
# $Id$
#
# Description : This script reads a mzML 0.93 file and applies
#               some basic validation
#
###############################################################################


###############################################################################
   # Generic SBEAMS setup for all the needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;
use XML::Xerces;

#use lib "/net/db/projects/PSI/dataXML/validator";
use mzMLContentHandler;

use vars qw ($PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $TESTONLY
            );


###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] mzML_inputfile
Options:
  --verbose n                 Set verbosity level.  default is 0
  --quiet                     Set flag to print nothing at all except errors
  --debug n                   Set debug flag
  --testonly                  If set, rows in the database are not changed or added

 e.g.: $PROG_NAME --verbose 1 tiny1.mzML0.93.xml
EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s","testonly",
    )) {

    die "\n$USAGE";

}


$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;
$TESTONLY = $OPTIONS{"testonly"} || 0;

if ($DEBUG) {
    print "Options settings:\n";
    print "  VERBOSE = $VERBOSE\n";
    print "  QUIET = $QUIET\n";
    print "  DEBUG = $DEBUG\n";
    print "  TESTONLY = $TESTONLY\n";
}



###############################################################################
# Set Global Variables and execute main()
###############################################################################

main();
exit(0);



###############################################################################
# main: Main Function
###############################################################################
sub main {

  #### Check to see if there is an input file to work on
  unless ( $ARGV[0] ) {
    print "ERROR: No files specified\n";
    print $USAGE;
    return;
  }

  #### Process all files
  foreach my $file ( @ARGV ) {
    print "INFO: Validating file '$file'\n" unless ($QUIET);
    if ( -e $file ) {
      validateMzML(
        inputfile => $file,
        validate => $OPTIONS{validate},
        namespaces => $OPTIONS{namespaces},
        schemas => $OPTIONS{schemas}
      );

    } else {
      print "ERROR: File '$file' does not exist\n";
    }

  }

} # end main



#######################################################################
# validateMzML - uses SAX Content handler to parse mzML file
#######################################################################
sub validateMzML {
  my %args = @_;

  my $inputfile = $args{'inputfile'}
    or die("ERROR: inputfile not passed");

  #### Process parser options
  my $validate = $args{'validate'} || 'always';
  my $namespaces = $args{'namespaces'} || 1;
  my $schemas = $args{'schemas'} || 1;

  if (uc($validate) eq 'ALWAYS') {
    $validate = $XML::Xerces::SAX2XMLReader::Val_Always;
  } elsif (uc($validate) eq 'NEVER') {
    $validate = $XML::Xerces::SAX2XMLReader::Val_Never;
  } elsif (uc($validate) eq 'AUTO') {
    $validate = $XML::Xerces::SAX2XMLReader::Val_Auto;
  } else {
    die("Unknown value for -v: $validate\n$USAGE");
  }

  #### Set up the Xerces parser
  my $parser = XML::Xerces::XMLReaderFactory::createXMLReader();
  $parser->setFeature("http://xml.org/sax/features/namespaces", $namespaces);

  if ($validate eq $XML::Xerces::SAX2XMLReader::Val_Auto) {
    $parser->setFeature("http://xml.org/sax/features/validation", 1);
    $parser->setFeature("http://apache.org/xml/features/validation/dynamic",1);

  } elsif ($validate eq $XML::Xerces::SAX2XMLReader::Val_Never) {
    $parser->setFeature("http://xml.org/sax/features/validation", 0);

  } elsif ($validate eq $XML::Xerces::SAX2XMLReader::Val_Always) {
    $parser->setFeature("http://xml.org/sax/features/validation", 1);
    $parser->setFeature("http://apache.org/xml/features/validation/dynamic",0);
  }

  $parser->setFeature("http://apache.org/xml/features/validation/schema",
    $schemas);


  #### Create the error handler and content handler
  my $error_handler = XML::Xerces::PerlErrorHandler->new();
  $parser->setErrorHandler($error_handler);

  my $CONTENT_HANDLER = mzMLContentHandler->new();
  $parser->setContentHandler($CONTENT_HANDLER);

  $CONTENT_HANDLER->setVerbosity($VERBOSE);
  $CONTENT_HANDLER->{counter} = 0;

  $parser->parse(XML::Xerces::LocalFileInputSource->new($inputfile));
  print "\n" if ($VERBOSE);

  if ($VERBOSE) {
    if ($CONTENT_HANDLER->{spectrum_counter}) {
      print "Counted $CONTENT_HANDLER->{spectrum_counter} spectra\n";
    } else {
      print "ERROR: No spectra found\n";
    }
  }

  return(1);

}
