diff --git a/pae2xml.pl b/pae2xml.pl index fc7c595b88571537f81de1c58a1a9626a0541982..67332f0730d6c66c73ccc3897a65d3529615da88 100755 --- a/pae2xml.pl +++ b/pae2xml.pl @@ -18,6 +18,9 @@ our $version = "1.0"; # import module use Getopt::Long; +use File::Basename; + +our $format = 'rism'; sub print_help { print 'pae2xml.pl [OPTIONS...] FILE @@ -26,6 +29,9 @@ sub print_help { Possible options: -h, --help Prints out this help message + --format=FORMAT, -f FORMAT + Interprets input file as FORMAT. Possible values are + rism, pae (default: rism) -v, -- version Prints out version information '; } @@ -38,7 +44,7 @@ sub print_version { sub handle_options { my $print_help = 0; my $print_version = 0; - my $result = GetOptions ("help|h" => \$print_help, "version|v" => \$print_version); + my $result = GetOptions ("f|format=s" => \$format, "help|h" => \$print_help, "version|v" => \$print_version); if ($print_version) { print_version (); exit 0; @@ -64,36 +70,49 @@ $TIE = 0; foreach $a (@ARGV) { $p = read_file($a); $toprint = ""; - $p =~ s/\s*\=\=+\s*(.*?)\s*\=\=+\s*/$1/sg; - $p =~ s/\s*included.*?-------------*\s*(.*?)\s*/$1/s; + if ($format eq "rism") { + $p =~ s/\s*\=\=+\s*(.*?)\s*\=\=+\s*/$1/sg; + $p =~ s/\s*included.*?-------------*\s*(.*?)\s*/$1/s; - ($q, $r) = ($p, $p); - if ($q !~ /^.*1\.1\.1.*$/gsx && $r =~ /^.*plain.*$/gsx) { - print_error("$a contains 'plain', but not 1.1.1!\n"); - } else { - if ($p =~ /^\s*([^\n]+)\n(.*?)\n((\d+\.\d+\.\d.*?plain.*?\n)+)(.*?)\n?([^\n]+)\n([^\n]+)\s*$/gs) { - my ($comp, $title, $incipits, $sonst, $libsig, $rismsig) = ($1, $2, $3, $5, $6, $7); + ($q, $r) = ($p, $p); + if ($q !~ /^.*1\.1\.1.*$/gsx && $r =~ /^.*plain.*$/gsx) { + print_error("$a contains 'plain', but not 1.1.1!\n"); + } else { + if ($p =~ /^\s*([^\n]+)\n(.*?)\n((\d+\.\d+\.\d.*?plain.*?\n)+)(.*?)\n?([^\n]+)\n([^\n]+)\s*$/gs) { + my ($comp, $title, $incipits, $sonst, $libsig, $rismsig) = ($1, $2, $3, $5, $6, $7); - $toprint .= " + $toprint .= " COMPOSER: $comp TITLE: $title INCIPIT(S): $incipits OTHER INFO: $sonst LIB. SIGN.: $libsig RISM SIGN.: $rismsig\n\n"; - parse_incipits($incipits, $comp, $title, $sonst, $libsig, $rismsig); - } - else { - if (index($p,"plain&easy") > -1) { - print_error("Ignoring the following text:\n\n\n$p\n\n\n"); + parse_rism_incipits($incipits, $comp, $title, $sonst, $libsig, $rismsig); } + else { + if (index($p,"plain&easy") > -1) { + print_error("Ignoring the following text:\n\n\n$p\n\n\n"); + } + } + } + } else { + # Just a plaine & easie snippet, without any further RISM fields + if ($a eq "-") { + $filename = "out.xml"; + } else { + $filename = basename ($a, ".pae") . ".xml"; } + parse_pe ($filename, $p, "", "", "", "", "", "", ""); } } +############################################################################## +### RISM file parsing +############################################################################## -sub parse_incipits { +sub parse_rism_incipits { my ($incipits, $comp, $title, $sonst, $libsig, $rismsig) = @_; $toprint .= "parsing: $incipits\n"; @@ -101,24 +120,50 @@ sub parse_incipits { while ($incipits =~ /^(\d+\.\d+\..+?)(\d+\.\d+\..*)$/gs) { my ($inc1) = $1; $incipits = $2; - parse_pe($inc1, $comp, $title, $sonst, $libsig, $rismsig); + parse_rism_incipit($inc1, $comp, $title, $sonst, $libsig, $rismsig); } - parse_pe($incipits, $comp, $title, $sonst, $libsig, $rismsig); + parse_rism_incipit($incipits, $comp, $title, $sonst, $libsig, $rismsig); } -sub parse_pe { +sub parse_rism_incipit { my ($pe, $comp, $title, $sonst, $libsig, $rismsig) = @_; + if ($pe =~ /^\s*(\d+\.\d+\.\d)(\.|:)\s*(.*?)\nplain&easy:\s*(.*)$/gs) { + my ($inr, $instr, $pecode) = ($1, $3, $4); + + my $filename="$rismsig-$inr.xml"; + $filename =~ s/RISM\s*A\/II\s*:?\s*//gs; + + foreach $_ ($rismsig,$title,$inr,$instr,$comp,$libsig,$sonst) + { + s/ +//; + } + $toprint .= " +INCIPIT NO.: $inr +INSTR.: $instr\n"; + parse_pe ($filename, $pecode, $inr, $instr, $comp, $title, $sonst, $libsig, $rismsig); + + } else { + print_error("could not parse $pe\n"); + } +} + + +############################################################################## +### pure Plaine & Easie data parsing +############################################################################## + +sub parse_pe { + my ($filename, $pe, $inr, $instr, $comp, $title, $sonst, $libsig, $rismsig) = @_; + $pe =~ s/@�/@0�/gs; # make missing time signature explicit while ($pe =~ s/([^\-])(\d+)(\'|\,)(A|B|C|D|E|F|G)/$1$3$2$4/gs) {}; # octave first, then duration. Truly global. - if ($pe =~ /^\s*(\d+\.\d+\.\d)(\.|:)\s*(.*?)\nplain&easy:\s*(%([\w\-\d]+))?(@([\d\w\/]+))?\s*&?\s*(\$([^�]+))?�(.*)$/gs) { - my ($inr, $instr, $clef, $timesig, $keysig, $rest) = ($1, $3, $5, $7, $9, $10); + if ($pe =~ /\s*(%([\w\-\d]+))?(@([\d\w\/]+))?\s*&?\s*(\$([^�]+))?�(.*)$/gs) { + my ($clef, $timesig, $keysig, $rest) = ($2, $4, $6, $7); - my $filename="$rismsig-$inr.xml"; - $filename =~ s/RISM\s*A\/II\s*:?\s*//gs; print "Writing $filename...\n"; - open(OUT, ">$filename"); if ($clef =~ /^(\w)\-(\d)$/) { @@ -138,28 +183,25 @@ sub parse_pe { print_error("Strange key signature '$keysig'.\n"); } - foreach $_ ($rismsig,$title,$inr,$instr,$comp,$encoding_date,$libsig,$sonst) - { - s/ -//; - } print OUT '<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> <!DOCTYPE score-partwise PUBLIC "-//Recordare//DTD MusicXML 2.0 Partwise//EN" "http://www.musicxml.org/dtds/partwise.dtd"> <score-partwise> - <work> - <work-number>'.$rismsig.'</work-number> - <work-title>'.$title.'</work-title> - </work> - <movement-number>'.$inr.'</movement-number> - <movement-title>'.$instr.'</movement-title> - <identification> - <creator type="composer">'.$comp.'</creator> - <encoding> +'; + print OUT " <work>\n" if ($rismsig || $title); + print OUT " <work-number>$rismsig</work-number>\n" if ($rismsig); + print OUT " <work-title>$title</work-title>\n" if ($title); + print OUT " </work>\n" if ($rismsig || $title); + print OUT " <movement-number>$inr</movement-number>\n" if ($inr); + print OUT " <movement-title>$instr</movement-title>\n" if ($instr); + print OUT " <identification>\n"; + print OUT " <creator type=\"composer\">$comp</creator>\n" if ($comp); + print OUT ' <encoding> <software>pae2xml by R. Typke</software> <encoding-date>'.$encoding_date.'</encoding-date> </encoding> - <source>'.$libsig.'</source> - </identification> +'; + print OUT " <source>$libsig</source>\n" if ($libsig); + print OUT ' </identification> <part-list> <score-part id="P1"> <part-name>'.$sonst.'</part-name> @@ -182,8 +224,6 @@ sub parse_pe { $toprint .= " -INCIPIT NO.: $inr -INSTR.: $instr CLEF: $clef KEY SIG.: $keysig TIME SIG.: $timesig