#!/usr/bin/perl 


# Copyright (C) 2003 Rainer Typke
#pae2xml is licensed under the terms of the GNU General Public License Version
#2 as published by the <a href="http://www.fsf.org/" target="_top">Free Software Foundation</a>.
#This gives you legal permission to copy, distribute and/or modify <em>pae2xml</em> under
#certain conditions. Read 
#the <a href="http://www.gnu.org/copyleft/gpl.html" target="_top">online version of the license</a>
#for more details. pae2xml is provided AS IS with NO WARRANTY OF ANY KIND,
#INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.



$divisions = 960;
$old_duration = $divisions;
$old_octave = 4;

($mday, $mon, $year) = (localtime()) [3..5];
$encoding_date = sprintf("%4d-%02d-%02d", $year + 1900, $mon+1, $mday);

$TIE = 0;

foreach $a (@ARGV) {
  $p = read_file($a);
  $toprint = "";
  $p =~ s/\s*\=\=+\s*(.*?)\s*\=\=+\s*/$1/sg;
  $p =~ s/\s*included.*?-------------*\s*(.*?)\s*/$1/s;

  ($q, $r) = ($p, $p);
  if ($q !~ /^.*1\.1\.1.*$/gsx && $r =~ /^.*plain.*$/gsx) {
    print_error("$a contains 'plain', but not 1.1.1!\n");
  } else {
    if ($p =~ /^\s*([^\n]+)\n(.*?)\n((\d+\.\d+\.\d.*?plain.*?\n)+)(.*?)\n?([^\n]+)\n([^\n]+)\s*$/gs)    {
      my ($comp, $title, $incipits, $sonst, $libsig, $rismsig) = ($1, $2, $3, $5, $6, $7);

      $toprint .= "
COMPOSER:   $comp
TITLE:      $title
INCIPIT(S): $incipits
OTHER INFO: $sonst
LIB. SIGN.: $libsig
RISM SIGN.: $rismsig\n\n";
      parse_incipits($incipits, $comp, $title, $sonst, $libsig, $rismsig);
    }
    else { if (index($p,"plain&easy") > -1) {
      print_error("Ignoring the following text:\n\n\n$p\n\n\n");
           }
	 }
  }
}


sub parse_incipits {
  my ($incipits, $comp, $title, $sonst, $libsig, $rismsig) = @_;

  $toprint .= "parsing:   $incipits\n";

  while ($incipits =~ /^(\d+\.\d+\..+?)(\d+\.\d+\..*)$/gs) {
    my ($inc1) = $1;
    $incipits = $2;
    parse_pe($inc1, $comp, $title, $sonst, $libsig, $rismsig);
  }
  parse_pe($incipits, $comp, $title, $sonst, $libsig, $rismsig);
}

sub parse_pe {
  my ($pe, $comp, $title, $sonst, $libsig, $rismsig) = @_;

  $pe =~ s/@�/@0�/gs; # make missing time signature explicit
  while ($pe =~ s/([^\-])(\d+)(\'|\,)(A|B|C|D|E|F|G)/$1$3$2$4/gs) {};  # octave first, then duration. Truly global.

  if ($pe =~ /^\s*(\d+\.\d+\.\d)(\.|:)\s*(.*?)\nplain&easy:\s*(%([\w\-\d]+))?(@([\d\w\/]+))?\s*&?\s*(\$([^�]+))?�(.*)$/gs) {
    my ($inr, $instr, $clef, $timesig, $keysig, $rest) = ($1, $3, $5, $7, $9, $10);

    my $filename="$rismsig-$inr.xml";
    $filename =~ s/RISM\s*A\/II\s*:?\s*//gs;
    print "Writing $filename...\n";

    open(OUT, ">$filename");

    if ($clef =~ /^(\w)\-(\d)$/) {
      ($clefsign, $clefline) = ($1, $2);
    } else {
      ($clefsign, $clefline) = ("G", 2);
    }

    $timesig = timesignature($timesig);

    my %fif=("", 0, "xF", 1, "xFC", 2, "xFCG",3, "xFCGD",4, "xFCGDA",5, "xFCGDAE",6, "xFCGDAEB",7, "bB",-1, "bBE",-2, "bBEA",-3, "bBEAD",-4, "bBEADG",-5, "bBEADGC",-6, "bBEADGCF",-7);
    $keysig =~ s/(\s+)|&//gs;  # it is unclear what the & means, so we'll ignore it for now.
    $keysig =~ s/\[|\]//gs; # IGNORING brackets around a key sig.
    $fifths = $fif{$keysig};
    if ($fifths eq "") { $fifths = "0"; 
		       print_error("Strange key signature '$keysig'.\n");}
    
    foreach $_ ($rismsig,$title,$inr,$instr,$comp,$encoding_date,$libsig,$sonst)
    {
        s/
//;
    }
    print OUT '<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE score-partwise PUBLIC "-//Recordare//DTD MusicXML 0.6 Partwise//EN" "file:/c:/Program Files/MusicXML/partwise.dtd">
<score-partwise>
	<work>
                <work-number>'.$rismsig.'</work-number>
		<work-title>'.$title.'</work-title>
	</work>
        <movement-number>'.$inr.'</movement-number>
        <movement-title>'.$instr.'</movement-title>
	<identification>
		<creator type="composer">'.$comp.'</creator>
		<encoding>
			<software>pae2xml by R. Typke</software>
			<encoding-date>'.$encoding_date.'</encoding-date>
		</encoding>
                <source>'.$libsig.'</source>
	</identification>
	<part-list>
		<score-part id="P1">
			<part-name>'.$sonst.'</part-name>
		</score-part>
	</part-list>
	<part id="P1">
		<measure number="1">
			<attributes>
				<divisions>'.$divisions.'</divisions>
				<key>
					<fifths>'.$fifths.'</fifths>
				</key>
				'.$timesig.'
				<clef>
					<sign>'.$clefsign.'</sign>
					<line>'.$clefline.'</line>
				</clef>
			</attributes>
';


    $toprint .= "
INCIPIT NO.:  $inr
INSTR.:       $instr
CLEF:         $clef
KEY SIG.:     $keysig
TIME SIG.:    $timesig
REST:         $rest\n";
    parse_notes($rest, $keysig);
  }
  else { print_error("could not parse $pe\n"); }
  print OUT "   </part>
</score-partwise>\n";
  close OUT;
}


sub parse_notes {
  my ($notes, $keysig) = @_;
  my $qq = 0; # in group of cue notes

  my $meas = 2;   # measure number
  my $mopen = 1;  # measure tag still open

  if ($notes =~ /^\s*(.*?)\s*$/) {
    $notes = $1;
  }

  $notes =~ s/!([^!]*)!/$1$1/gs;  # write out repetitions
  $notes =~ s/\{([^\}]*)\}/$1/gs; # ignore beamings

  $notes =~ s/(\d+)\(([^;]+\))/\($1$2/gs;       # pull note lengths into fermatas or triplets
  $notes =~ s/(xx|x|bb|b|n)\(/\($1/gs;       # pull accidentals into tuplets or fermatas:
  $notes =~ s/(\d+)(xx|x|bb|b|n)(A|B|C|D|E|F|G)/$2$1$3/gs;  # accidentals first, then duration

#  $notes =~ s/x\(/\(x/gs;       # pull accidentals into tuplets or fermatas
#  $notes =~ s/bb\(/\(bb/gs;       # pull accidentals into tuplets or fermatas
#  $notes =~ s/b\(/\(b/gs;       # pull accidentals into tuplets or fermatas
#  $notes =~ s/n\(/\(n/gs;       # pull accidentals into tuplets or fermatas
  #  $notes =~ s/(\'+|\,+)\(/\($1/g;       # pull octave marks into tuplets or fermatas

  $notes =~ s/(\.|\d|\,|\')qq/qq$1/gs; # pull beginning mark of group of grace notes in front of corresponding notes
  $notes =~ s/(xx|x|bb|b|n)qq/qq$1/gs;  # qq first, then parts of notes

  $notes =~ s/\=(\d)/$1/gs; # replace multibar rests #n with just n

  while ($notes ne "") {
    if ($notes =~ /^(\'+|\,+)(.*)$/) {
      ($oct, $notes) = ($1, $2);
      octave($oct);
    } elsif ($notes =~ /^qq(.*)$/) {
      $notes = $1;
      $qq = 1;
    } elsif ($notes =~ /^r(.*)$/) {
      $notes = $1;
      $qq = 0;
    } elsif ($notes =~ /^(\d+|\=)(\/.*)$/) {
      $measrest = $1;
      $notes = $2;
      if ($measrest eq '=') {
	$measrest = 1;
      }
      $toprint .= "$measrest measures of rest.\n";
      for $n (1..$measrest) {
	print OUT '			<note>
				<rest />
				<duration>'.($beats*$divisions*4/$beattype).'</duration>
'.#				<type>quarter</type>
'
			</note>
';
	if ($n < $measrest) {
	  print OUT "		</measure>\n";
	  if ($notes ne "") {
	    print OUT '		<measure number="'.$meas.'">
';
	    $meas++;
	  }
	}
      }
    } elsif ($notes =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)$/) {  # a note
      ($note, $notes) = ($1,$6);
      parse_note($note, $keysig, "", "", $qq);
    } elsif ($notes =~ /^(\((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?\))(.*)$/) {  # one note with a fermata
      ($note, $notes) = ($1,$6);
      parse_note($note, $keysig, "", "", $qq);
    } elsif ($notes =~ /^(\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?){3}\))(.*)$/) {  # a triplet
      ($triplet, $notes) = ($1,$7);
      #      print "TRIPLET: ".$triplet." -> ";
      $triplet =~ /^\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)\)$/gs;
      ($note, $triplet) = ($1,$6);
      #print "$note $triplet\n";
      parse_note($note, $keysig, '<tuplet type="start"/>', '				<time-modification>
					<actual-notes>3</actual-notes>
					<normal-notes>2</normal-notes>
				</time-modification>', $qq);
      $triplet =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)$/gs;
      ($note, $triplet) = ($1,$6);
      #print "$note $triplet\n";
      parse_note($note, $keysig,  '', '				<time-modification>
					<actual-notes>3</actual-notes>
					<normal-notes>2</normal-notes>
				</time-modification>', $qq);
      parse_note($triplet,  $keysig, '<tuplet type="stop"/>',  '				<time-modification>
					<actual-notes>3</actual-notes>
					<normal-notes>2</normal-notes>
				</time-modification>', $qq);
    } elsif ($notes =~ /^((\d+)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)+\;(\d+)\))(.*)$/)  {  # an n-tuplet
      ($tuplet, $notes) = ($1,$9);
      #      print "N-TUPLET: ".$tuplet." -> ";
      $tuplet =~ /^(\d+)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*);(\d)\)$/gs;
      ($combdur, $note, $tuplet, $numval) = ($1,$2,$7,$8);
      #print "i=$combdur, n=$numval; $note / $tuplet\n";
      my $ind_dur = duration($combdur)/$numval;
      # my $norm_notes = 
      my $act_notes = $numval;
      parse_note($note, $keysig, '<tuplet type="start"/>', '				<time-modification>
					<actual-notes>'.$act_notes.'</actual-notes>
					<normal-notes>1</normal-notes>
				</time-modification>', $qq);
      while ($tuplet =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.+)$/gs) {
	($note, $tuplet) = ($1,$6);
	#print "$note / $tuplet\n";
	parse_note($note, $keysig,  '', '				<time-modification>
					<actual-notes>'.$act_notes.'</actual-notes>
					<normal-notes>1</normal-notes>
				</time-modification>', $qq);
      }
      parse_note($tuplet,  $keysig, '<tuplet type="stop"/>',  '				<time-modification>
					<actual-notes>'.$act_notes.'</actual-notes>
					<normal-notes>1</normal-notes>
				</time-modification>', $qq);
    } elsif ($notes =~ /^(%\w-\d)(.*)$/) {
      ($clef,$notes) = ($1,$2);
      $clef =~ /^%(\w)\-(\d)$/;
      ($clefsign, $clefline) = ($1, $2);
      print OUT '			<attributes>
				<clef>
					<sign>'.$clefsign.'</sign>
					<line>'.$clefline.'</line>
				</clef>
			</attributes>
';
    } elsif ($notes =~ /^@(\d\/\d|c\/?)\s*(.*)$/) {
      #	print "$notes\n";
      ($timesig,$notes) = ($1,$2);
      #print "-> $timesig / $notes\n"; exit;
      $timesig = timesignature($timesig);
      print OUT "			<attributes>\n$timesig
			</attributes>\n";
    } elsif ($notes =~ /^\/(.*)$/) {
      $notes = $1;
      if ($notes =~ /^\/(.*)$/) {
	$notes = $1;
	print OUT '       <barline location="right">
				<bar-style>light-light</bar-style>

			</barline>
';
      }
      print OUT "		</measure>\n";
      if ($notes ne "") {
	print OUT '		<measure number="'.$meas.'">
'.$clefattr;
	$meas++;
      } else {
	$mopen = 0;
      }
      $toprint .= "bar line\n";
    } #elsif ($notes =~ /^(\d*\.*\-)(.*)$/) {
      #($rst, $notes) = ($1, $2);
      #$toprint .= "rest: $rst\n";
      #$rst =~ /^(\d*)(\.*)\-$/;
      #($rst, $dots) =($1,$2);
      #print OUT '			<note>
	#			<rest />
	#			<duration>'.duration($rst, $dots).'</duration>
#'.#				<type>quarter</type>
#'
#			</note>
#';
    elsif ($notes =~ /^\((\=)\)(.*)$/) {  # a bar of rest with a fermata
      ($rst, $notes) = ($1, $2);
      $toprint .= "rest: $rst\n";
      print OUT '			<note>
				<rest />
				<duration>'.($beats*$divisions*4/$beattype).'</duration>
'.#				<type>quarter</type>
'				<notations>
					<fermata type="upright"/>
				</notations>
			</note>
';
    }
    elsif ($notes =~ s/(\d+\.*)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)\)/\($1$2\)/gs) { # pull duration into fermata parentheses
      #      print "after replacement: $notes\n"; exit;
    }
    else {
      print_error("got stuck with $notes\n");
      $notes = "";
    }
  }
  if ($mopen) {
    print OUT "		</measure>\n";
  }
}


sub parse_note {
  my($note, $keysig, $notation, $addition, $in_qq_group) = @_;

  my ($fermata) = (0);
  my ($actualnotes, $normalnotes) = (1,1);

  if ($addition =~ /^\s*<time-modification>\s*<actual-notes>\s*(\d+)\s*<\/actual-notes>\s*<normal-notes>\s*(\d+)\s*<\/normal-notes>\s*<\/time-modification>\s*$/) {
    ($actualnotes, $normalnotes) = ($1, $2);
  }

  if ($note =~ /^\((.*)\)$/) {
    $note = $1;
    $fermata = 1;
  }

  $note =~ /^((\,|\')*)(x|xx|b|bb|n)?(\d*)(\.*)(g|q)?(\-|A|B|C|D|E|F|G)(t?)(\+?)$/;
  my ($oct, $acc, $dur, $dot, $gracecue, $pitch, $trill, $tie) = ($1, $3, $4, $5, $6, $7, $8, $9);

  print OUT '			<note>
';
  if ($gracecue eq "g") {
    print OUT '    	<grace steal-time-following="33"/>
';
  }
  if ($gracecue eq "q" || $in_qq_group) {
    print OUT '    	<cue/>
';
  }
  if ($pitch eq "-") {
    print OUT "         	<rest />\n";
  } else {
    print OUT '				<pitch>
					<step>'.$pitch.'</step>
'.
   alter($pitch, $acc, $keysig).'
					<octave>'.octave($oct).'</octave>
				</pitch>
';
  }
  if ($gracecue ne "g") {
    print OUT '				<duration>'.(duration($dur, $dot)*$normalnotes/$actualnotes).'</duration>
';
  }
#				<type>quarter</type>

  if ($tie eq "+") {
    if (!$TIE) {
      $TIE = 1;
      print OUT '<tie type="start"/>
';
    }
  } else {
    if ($TIE) {
      print OUT '<tie type="stop"/>
';
      $TIE = 0;
    }
  }

  print OUT $addition;

  my $notationbracket = $fermata || ($trill eq "t") || ($notation ne "");
  if ($notationbracket) {
    print OUT "				<notations>\n";
  }
  if ($fermata) { print OUT '
					<fermata type="upright"/>'."\n"; }
  if ($trill eq "t") { print OUT '					<ornaments>
						<trill-mark/>
					</ornaments>
';
		     }
  if ($notation ne "") {
    print OUT "                         $notation\n";
  }
  if ($notationbracket) {
    print OUT "				</notations>\n";
  }

  print OUT '			</note>
';

  $toprint .= "note: oct. $oct/acc. $acc/dur. $dur/dots $dot/grace,cue $gracecue/pitch $pitch\n";
}

sub alter {
  my ($pitch, $acc, $keysig) = @_;

  my $alt = 0;

  if (index ($keysig,$pitch) > -1) {
    $keysig =~ /^(.).*$/gs;
    if ($1 eq 'x') {
      $alt = 1;
    } else {$alt = -1;}
  }

  my %acc_alt = ("n", 0, "b", -1, "bb", -2, "x", 1, "xx", 2);
  if ($acc_alt{$acc} ne "") {
    $alt = $acc_alt{$acc};
  }

  if ($alt != 0) {
    return "<alter>$alt</alter>\n";
  }
  return "";
}

sub duration {
  my ($duration, $dots) = @_;

  if ($duration.$dots ne "") {
    my %du=("1",4*$divisions,"2",2*$divisions,"4",$divisions,
            "8",$divisions/2,"6",$divisions/4,"3",$divisions/8,
            "5",$divisions/16,"7",$divisions/32,
            "9",$divisions*8,"0",$divisions*16); # breve/long
    $old_duration = $du{$duration};
    if ($old_duration eq "") {
      print_error("strange duration '$duration'\n");
    }
    my $add = $old_duration;
    while ($dots ne "") {
      $add /= 2;
      $old_duration += $add;
      $dots =~ /^.(.*)$/gs;
      $dots = $1;
    }
  }
  return $old_duration;
}

sub octave {
  my ($octave) = @_;

  if ($octave ne "") {
    $octave =~ /^(.)(.*)$/gs;
    if ($1 eq ",") {
      $old_octave = 4 - length $octave;
    } else {
      $old_octave = 3 + length $octave;
    }
  }
  return $old_octave;
}

sub timesignature {
  my ($timesig) = @_;

  if ($timesig eq "c3") {
    $timesig = "3/2";  # it would be better to display it as "C". Example: 451.023.814
  }
  if ($timesig =~ /^c(\d+)\/(\d+)$/gs) {
    $timesig = "$1/$2";  # it would be better to show the "C"
  }

  if ($timesig eq "0" || $timesig eq "") {  # unclear how to handle absence of time signature.
    $timesig ='<time symbol="common">
      <beats>4</beats>
      <beat-type>4</beat-type>
    </time>
';  # using 4/4 for now.
    ($beats, $beattype) = (4,4);
  } elsif ($timesig =~ /^c(\/?)$/gi) {
    if ($1 eq "/") {
      $timesig = '<time symbol="cut">
      <beats>2</beats>
      <beat-type>2</beat-type>
    </time>
';
      ($beats, $beattype) = (2,2);
    } else {
      $timesig = '<time symbol="common">
      <beats>4</beats>
      <beat-type>4</beat-type>
    </time>
';
      ($beats, $beattype) = (4,4);
    }
  } elsif ($timesig =~ /^(\d+)\/(\d+)$/gs) {
    ($beats, $beattype) = ($1, $2);
    $timesig = '<time>
      <beats>'.$beats.'</beats>
      <beat-type>'.$beattype.'</beat-type>
    </time>
';
  } else {
    print_error("Time signature '$timesig' looks strange.\n");
    #	$timesig = "";   we assume 4/4 just to  get something legible:
    ($beats, $beattype) = (4,4);
    $timesig = '<time>
      <beats>'.$beats.'</beats>
      <beat-type>'.$beattype.'</beat-type>
    </time>
';
  }
  return $timesig;
}

sub print_error {
  my ($msg) = @_;

  print "\nAn error occurred; context:\n\n$toprint\n
Error: $msg\n";
}

sub read_file {
  my ($fn) = @_;
  if (!(open FH, $fn))
    {
      return "";
    }
  my $res = "";
  while (<FH>) { $res .= $_; } # read all lines
  close (FH);
  return $res;
}