#!/usr/bin/perl -w use strict; package MasonBook::ToHTML; use Pod::Simple 0.96; use base qw( Pod::Simple ); use File::Basename; use File::Spec; use HTML::Entities; use Image::Size qw( html_imgsize ); use Text::Wrap; use URI; use URI::Heuristic qw( uf_uristr ); $Text::Wrap::columns = 80; sub new { my $class = shift; my %p = @_; my $self = $class->SUPER::new(%p); $self->accept_code( qw( A G H K M N Q R T U ) ); $self->accept_targets( qw( figure listing table ) ); $self->accept_targets_as_text( qw( blockquote note ) ); $self->accept_directive_as_processed( qw( head0 headrow row cell bodyrows ) ); $self->merge_text(1); # set to 0 for debugging POD errors $self->no_errata_section(1); $self->{index} = $p{index}; $self->{toc} = $p{toc}; $self->{state} = { stack => [], text => '', last => '', ext => $p{ext}, target => undef, table_data => undef, toc_anchor_count => 0, current_file => $p{current_file}, chapter_number => $p{chapter_number}, listing => 1, figure => 1, table => 1, last_index_anchor => '', last_a_link => '', save_for_toc => 0, item_count => 0, chapter_name => $p{chapter_name}, in_footnote => 0, footnote_buffer => '', footnotes => [], }; return $self; } # why did ORA use Z<> when it could have used something not in use? I # dunno. But Pod::Simple simply drops Z<> on the floor normally, so # this hack undoes that. sub _treat_Zs {} my @actions = ( [ qr/^head(?:0|1|2)$/ => '_toc_flag_on', '_toc_flag_off' ], [ qr/^Para$/ => '_para_begin', '_para_end' ], [ qr/^over-text$/ => '_over_text_begin', '_over_text_end' ], [ qr/^item-text$/ => '_item_text_begin', '_item_text_end' ], [ qr/^over-number$/ => '_over_number_begin', '_over_number_end' ], [ qr/^item-number$/ => '_item_number_begin', undef ], [ qr/^over-bullet$/ => '_over_bullet_begin', '_over_bullet_end' ], [ qr/^item-bullet$/ => '_item_bullet_begin', undef ], [ qr/^N$/ => '_N_begin', '_N_end' ], [ qr/^for$/ => '_for_begin', '_for_end' ], [ qr/^Document$/ => undef, '_append_footnotes' ], [ qr/^headrow$/ => '_headrow_begin', undef ], [ qr/^bodyrows$/ => '_bodyrows_begin', undef ], [ qr/^row$/ => '_row_begin', undef ], [ qr/^cell$/ => '_cell_begin', undef ], ); sub _handle_element_start { my ( $self, $elt, $data ) = @_; $self->_push_elt_stack($elt); foreach my $a (@actions) { if ( $elt =~ /$a->[0]/ && $a->[1] ) { my $m = $a->[1]; $self->$m($elt, $data); } } $self->{state}{text} = ''; } sub _toc_flag_on { my ($level) = ($_[1] =~ /(\d+)/); $_[0]->{state}{save_for_toc} = $level + 1; } sub _toc_flag_off { $_[0]->{state}{save_for_toc} = 0 } sub _para_begin { my $class = $_[0]->{state}{target} ? $_[0]->{state}{target}{name} : 'content'; $_[0]->_out( qq|
\n| ) } sub _para_end { $_[0]->_out( "\n
\n" ) } sub _over_text_begin { $_[0]->_reset_item_count; $_[0]->_out( qq|\n| ); } elsif ( $target eq 'blockquote' ) { $self->_out( qq|
\n| ); } elsif ( $target eq 'table' ) { $self->_out( qq|\n\n| ); } } sub _for_end { my $self = shift; my $target = $self->{state}{target}{name}; if ( $target eq 'listing' || $target eq 'figure' ) { $self->_out( "\n\n" ); } elsif ( $target eq 'blockquote' ) { $self->_out( "\n\n\n" ); } elsif ( $target eq 'table' ) { $self->_out( " \n \n
\n" ); $self->_out( ' \n" ) if $self->{state}{target}{caption}; } $self->{state}{target} = undef; } sub _handle_text { my ( $self, $text ) = @_; if ( $self->{state}{target} ) { if ( $self->{state}{target}{name} eq 'listing' || $self->{state}{target}{name} eq 'figure' || $self->{state}{target}{name} eq 'table' ) { unless ( $self->{state}{target}{caption} ) { my $thing = ( $self->{state}{target}{name} eq 'listing' ? 'Example' : $self->{state}{target}{name} eq 'figure' ? 'Figure' : 'Table' ); my $number = $self->{state}{ $self->{state}{target}{name} }++; if ( $self->{state}{target}{name} eq 'table' ) { $text =~ s/\s*picture//; } $self->{state}{target}{caption} = "$thing $self->{state}{chapter_number}-$number. $text"; return; } if ( $text =~ s/Z<([^>]+)>// ) { local $self->{state}{target} = undef; # no need to rewrite Z handling code $self->_push_elt_stack('Z'); $self->_handle_text($1); $self->_pop_elt_stack; } return unless length $text; if ( $self->{state}{target}{name} eq 'listing' ) { $self->_out( qq|\n\n" ); return; } elsif ( $self->{state}{target}{name} eq 'figure' ) { my ($image) = $text =~ /F<([^>]+)/; my $hw = html_imgsize( File::Spec->catfile( 'figures', $image ) ); $self->_out( qq| \n" ); local $self->{state}{target} = undef; # see above $self->_push_elt_stack('Verbatim'); $self->_handle_text($text); $self->_pop_elt_stack; $self->_out( "\n
\n
\n| . ' \n" ); return; } } } $text = "$self->{state}{chapter_name}: $text" if $self->_current_elt eq 'head0' && length $self->{state}{chapter_name}; if ( $self->{state}{save_for_toc} && $self->{toc} ) { my $anchor = "TOC-ANCHOR-" . $self->{state}{toc_anchor_count}++; $self->_add_to_toc($text, $anchor); $self->_out( qq|\n| ); } if ( $self->_current_elt eq 'A' ) { $self->{state}{last_a_link} = $text; return; } if ( $self->_last_elt eq 'A' && $self->_parent_elt ne 'X' ) { return if $self->_handle_A_link($text); } if ( $self->_current_elt eq 'U' ) { my $uri = uf_uristr($text); $text = encode_entities($text); $self->_out( qq|$text| ); return; } if ( $self->_current_elt eq 'X' ) { return unless $self->{index} && $self->_parent_elt eq 'Z'; $self->_remember_for_index($text); return; } return unless $text =~ /\S/; $text = encode_entities($text); if ( $self->_current_elt eq 'Z' ) { $self->_out( qq|\n| ); if ( $self->{do_index} ) { $self->{state}{last_index_anchor} = $text; } return; } # ORA apparently put a space between the parens, which looks good # in the book, but not so good online. if ( $self->_current_elt eq 'C' ) { $text =~ s/(\w+)\( \)/$1()/g; } my @text = $self->_current_elt eq 'Verbatim' ? $text : wrap( '', '', $text ); $self->_out( "\n
\n" ) if $self->_current_elt =~ /^head/ && $self->_last_elt eq 'Verbatim'; my ( $start, $end ) = $self->_tag( $self->_current_elt ); $self->_out( $start, @text, $end ); } sub _add_to_toc { my ( $self, $text, $anchor ) = @_; my $link = $self->{state}{current_file}; $link .= "#$anchor" unless $self->{state}{save_for_toc} == 1; push @{ $self->{toc} }, { level => $self->{state}{save_for_toc}, text => $text, link => $link, }; } sub _handle_A_link { my ( $self, $text ) = @_; return unless $self->{state}{last_a_link} && $self->{state}{last_a_link} =~ /^(?:(CHP-(\d\d?))|(APP-([ABCD])))(-?)/; my $url = $1 ? "chapter-$2$self->{state}{ext}" : "appendix-\L$4\E$self->{state}{ext}"; $url .= "#$self->{state}{last_a_link}" if $5; $text =~ s/("[^"]+?"|\S+\s+(?:[^,.:\s]+))//s; my $link_text = encode_entities($1); my $href = qq|$link_text|; $self->_out( $href, encode_entities($text) ); $self->{state}{last_a_link} = ''; return 1; } sub _remember_for_index { my ( $self, $text ) = @_; my @pieces; foreach my $piece ( split /\s*;\s*/, $text ) { # split on single colons but not double colons! my @p = split /(? $term, sort_as => $sort_as }; } my $heading = ( substr( $pieces[0]{sort_as}, 0, 1 ) =~ /[a-z]/i ? uc substr( $pieces[0]{sort_as}, 0, 1 ) : 'Symbols' ); push @{ $self->{index} }, { pieces => \@pieces, anchor => $self->{state}{last_index_anchor}, heading => $heading, }; } sub _out { my $self = shift; if ( $self->{state}{in_footnote} ) { $self->{state}{footnote_buffer} .= join '', @_; } else { print { $self->{output_fh} } @_; } } my %tags = ( head0 => 'h1', head1 => 'h2', head2 => 'h3', head3 => 'h4', head4 => 'h5', 'B' => 'strong', 'C' => 'code', 'F' => 'u', 'I' => 'em', 'R' => 'em', 'T' => 'em', # URLs are handled specially 'U' => '', 'X' => '', 'Z' => '', 'Verbatim' => [ qq|\n| ], ); sub _tag { my ( $self, $elt ) = @_; my $tag = $tags{$elt}; if ( ref $tag ) { return @$tag; } elsif ( $tag ) { return "<$tag>", "$tag>"; } # handle specially return '', ''; } sub _handle_element_end { my ( $self, $elt ) = @_; $self->_pop_elt_stack($elt); foreach my $a (@actions) { if ( $elt =~ /$a->[0]/ && $a->[2] ) { my $m = $a->[2]; $self->$m($elt); } } $self->{state}{last} = $elt; } sub _append_footnotes { my $self = shift; return unless @{ $self->{state}{footnotes} }; $self->_out( "\n|, qq|\nFootnotes
\n" ); my $x = 1; foreach my $note ( @{ $self->{state}{footnotes} } ) { $self->_out( qq|\n| . qq|\n$x. $note| . qq| -- Return.\n| . "
\n" ); $x++; } } sub _current_elt { $_[0]->{state}{stack}[-1] } sub _elt_at { $_[0]->{state}{stack}[ $_[1] ] } sub _parent_elt { $_[0]->{state}{stack}[-2] || '' } sub _last_elt { $_[0]->{state}{last} || '' } sub _push_elt_stack { push @{ $_[0]->{state}{stack} }, $_[1] } sub _pop_elt_stack { pop @{ $_[0]->{state}{stack} } } sub _item_count { $_[0]->{state}{item_count} } sub _reset_item_count { $_[0]->{state}{item_count} = 0; } sub _increment_item_count { $_[0]->{state}{item_count}++ } package main; use File::Basename; use File::Copy; use File::Path; use File::Spec; use Getopt::Long; use HTML::Entities; my %opts; GetOptions( 'index' => \$opts{index}, 'toc' => \$opts{toc}, 'all' => \$opts{all}, 'target=s' => \$opts{target}, 'ext=s' => \$opts{ext}, 'help' => \$opts{help}, ); unless ( $opts{target} ) { warn "Must provide a --target directory.\n"; exit 1; } $opts{ext} ||= '.mhtml'; mkpath( $opts{target}, 1, 0755 ); mkpath( File::Spec->catdir( $opts{target}, 'figures' ), 1, 0755 ); foreach my $fig ( glob File::Spec->catfile( 'figures', 'mas*.png' ) ) { my $to = File::Spec->catfile( $opts{target}, 'figures', basename($fig) ); copy $fig => $to or die "Cannot copy $fig to $to: $!"; } { my $to = File::Spec->catfile( $opts{target}, basename($0) ); $to =~ s/\.pl$//; copy $0 => $to or die "Cannot copy $0 to $to: $!"; } if ( $opts{all} ) { $opts{index} = $opts{toc} = 1; my @chapters = map { "ch$_.pod" } 1..12; my @apps = map { "appendix-$_.pod" } 'a'..'d'; @ARGV = ( 'foreword.pod', 'preface.pod', @chapters, @apps, 'glossary.pod', 'colophon.pod', 'copyright.pod' ); } my (@toc, @index); foreach my $file (@ARGV) { my $target = $file; $target =~ s/^ch/chapter-/; $target =~ s/\.pod/$opts{ext}/; $target = File::Spec->catfile( $opts{target}, $target ); my $chapter_name = ( $file =~ /^ch(\d+)/ ? "Chapter $1" : $file =~ /^appendix-(\w)/ ? "Appendix \U$1" : '' ); my ($chapter_number) = ($file =~ /^(?:ch|appendix-)([\dabcd]+)/); my $p = MasonBook::ToHTML->new( %opts, ext => $opts{ext}, current_file => basename($target), chapter_name => $chapter_name, chapter_number => uc $chapter_number, $opts{toc} ? ( toc => \@toc ) : (), $opts{index} ? ( index => \@index ) : (), ); $p->output_fh(*FH); open IN, "<$file" or die "Cannot read $file: $!"; my $data = join '',; # needed so Pod::Simple allows these as real =begin/=end constructs $data =~ s/=begin\s+(\S+)\s+(\S+)(.+?)=end(?!\s+\1)/=begin $1\n\n$2\n$3=end $1\n/sg; warn "$file => $target\n"; open FH, ">$target" or die "Cannot write to $target: $!"; print FH <<'EOF'; <& book_menu.mas, top => 1 &>
<%text> EOF $p->parse_string_document($data); print FH <<'EOF'; %text>
<& book_menu.mas, bottom => 1 &> EOF } if ( $opts{toc} ) { my $toc = File::Spec->catfile( $opts{target}, "index$opts{ext}" ); warn "Writing TOC in $toc\n"; open *FH, ">$toc" or die "Cannot write toc: $!"; print FH (toc_as_html(@toc)); } if ( $opts{index} ) { my $index = File::Spec->catfile( $opts{target}, "the-index$opts{ext}" ); warn "Writing index in $index\n"; open *FH, ">$index" or die "Cannot write index: $!"; print FH (index_as_html(@index)); } sub toc_as_html { my $last_level = 0; my $html = qq|<& book_menu.mas, top => 1 &>\n
\nTable of Contents
\n|; $html .= "<%text>\n"; foreach my $item (@_) { if ( $item->{level} > $last_level ) { until ( $last_level == $item->{level} ) { $html .= "\n\n"; $last_level++; } } elsif ( $item->{level} < $last_level ) { until ( $last_level == $item->{level} ) { $html .= "\n
\n"; $last_level--; } } $html .= qq||; $html .= encode_entities( $item->{text} ); $html .= " \n"; $last_level = $item->{level}; } while ( $last_level-- ) { $html .= "\n\n"; } $html .= "%text>\n"; $html .= qq|\n
\n<& book_menu.mas, bottom => 1 &>|; return $html; } sub index_as_html { return ''; }