| Server IP : 170.10.162.208 / Your IP : 216.73.216.181 Web Server : LiteSpeed System : Linux altar19.supremepanel19.com 4.18.0-553.69.1.lve.el8.x86_64 #1 SMP Wed Aug 13 19:53:59 UTC 2025 x86_64 User : deltahospital ( 1806) PHP Version : 7.4.33 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /var/tmp/ |
Upload File : |
Functions.pm 0000644 00000034036 15051126136 0007060 0 ustar 00 package Pod::Functions;
use strict;
=head1 NAME
Pod::Functions - Group Perl's functions a la perlfunc.pod
=head1 SYNOPSIS
use Pod::Functions;
my @misc_ops = @{ $Kinds{ 'Misc' } };
my $misc_dsc = $Type_Description{ 'Misc' };
or
perl /path/to/lib/Pod/Functions.pm
This will print a grouped list of Perl's functions, like the
L<perlfunc/"Perl Functions by Category"> section.
=head1 DESCRIPTION
It exports the following variables:
=over 4
=item %Kinds
This holds a hash-of-lists. Each list contains the functions in the category
the key denotes.
=item %Type
In this hash each key represents a function and the value is the category.
The category can be a comma separated list.
=item %Flavor
In this hash each key represents a function and the value is a short
description of that function.
=item %Type_Description
In this hash each key represents a category of functions and the value is
a short description of that category.
=item @Type_Order
This list of categories is used to produce the same order as the
L<perlfunc/"Perl Functions by Category"> section.
=back
=cut
our $VERSION = '1.11';
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(%Kinds %Type %Flavor %Type_Description @Type_Order);
our(%Kinds, %Type, %Flavor, %Type_Description, @Type_Order);
foreach (
[String => 'Functions for SCALARs or strings'],
[Regexp => 'Regular expressions and pattern matching'],
[Math => 'Numeric functions'],
[ARRAY => 'Functions for real @ARRAYs'],
[LIST => 'Functions for list data'],
[HASH => 'Functions for real %HASHes'],
['I/O' => 'Input and output functions'],
[Binary => 'Functions for fixed-length data or records'],
[File => 'Functions for filehandles, files, or directories'],
[Flow => 'Keywords related to the control flow of your Perl program'],
[Namespace => 'Keywords related to scoping'],
[Misc => 'Miscellaneous functions'],
[Process => 'Functions for processes and process groups'],
[Modules => 'Keywords related to Perl modules'],
[Objects => 'Keywords related to classes and object-orientation'],
[Socket => 'Low-level socket functions'],
[SysV => 'System V interprocess communication functions'],
[User => 'Fetching user and group info'],
[Network => 'Fetching network info'],
[Time => 'Time-related functions'],
) {
push @Type_Order, $_->[0];
$Type_Description{$_->[0]} = $_->[1];
};
while (<DATA>) {
chomp;
s/^#.*//;
next unless $_;
my($name, @data) = split "\t", $_;
$Flavor{$name} = pop @data;
$Type{$name} = join ',', @data;
for my $t (@data) {
push @{$Kinds{$t}}, $name;
}
}
close DATA;
my( $typedesc, $list );
unless (caller) {
foreach my $type ( @Type_Order ) {
$list = join(", ", sort @{$Kinds{$type}});
$typedesc = $Type_Description{$type} . ":";
write;
}
}
format =
^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$typedesc
~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$typedesc
~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$list
.
1;
__DATA__
-X File a file test (-r, -x, etc)
abs Math absolute value function
accept Socket accept an incoming socket connect
alarm Process schedule a SIGALRM
atan2 Math arctangent of Y/X in the range -PI to PI
bind Socket binds an address to a socket
binmode I/O prepare binary files for I/O
bless Objects create an object
break Flow break out of a C<given> block
caller Flow Namespace get context of the current subroutine call
chdir File change your current working directory
chmod File changes the permissions on a list of files
chomp String remove a trailing record separator from a string
chop String remove the last character from a string
chown File change the ownership on a list of files
chr String get character this number represents
chroot File make directory new root for path lookups
close I/O close file (or pipe or socket) handle
closedir I/O close directory handle
connect Socket connect to a remote socket
continue Flow optional trailing block in a while or foreach
cos Math cosine function
crypt String one-way passwd-style encryption
dbmclose I/O Objects breaks binding on a tied dbm file
dbmopen I/O Objects create binding on a tied dbm file
defined Misc test whether a value, variable, or function is defined
delete HASH deletes a value from a hash
die Flow I/O raise an exception or bail out
do Flow Modules turn a BLOCK into a TERM
dump Flow create an immediate core dump
each ARRAY HASH retrieve the next key/value pair from a hash
endgrent User be done using group file
endhostent User be done using hosts file
endnetent User be done using networks file
endprotoent Network be done using protocols file
endpwent User be done using passwd file
endservent Network be done using services file
eof I/O test a filehandle for its end
eval Flow catch exceptions or compile and run code
evalbytes Flow similar to string eval, but intend to parse a bytestream
exec Process abandon this program to run another
exists HASH test whether a hash key is present
exit Flow terminate this program
exp Math raise I<e> to a power
fc String return casefolded version of a string
fcntl File file control system call
__FILE__ Flow the name of the current source file
fileno I/O return file descriptor from filehandle
flock I/O lock an entire file with an advisory lock
fork Process create a new process just like this one
format I/O declare a picture format with use by the write() function
formline Misc internal function used for formats
getc I/O get the next character from the filehandle
getgrent User get next group record
getgrgid User get group record given group user ID
getgrnam User get group record given group name
gethostbyaddr Network get host record given its address
gethostbyname Network get host record given name
gethostent Network get next hosts record
getlogin User return who logged in at this tty
getnetbyaddr Network get network record given its address
getnetbyname Network get networks record given name
getnetent Network get next networks record
getpeername Socket find the other end of a socket connection
getpgrp Process get process group
getppid Process get parent process ID
getpriority Process get current nice value
getprotobyname Network get protocol record given name
getprotobynumber Network get protocol record numeric protocol
getprotoent Network get next protocols record
getpwent User get next passwd record
getpwnam User get passwd record given user login name
getpwuid User get passwd record given user ID
getservbyname Network get services record given its name
getservbyport Network get services record given numeric port
getservent Network get next services record
getsockname Socket retrieve the sockaddr for a given socket
getsockopt Socket get socket options on a given socket
glob File expand filenames using wildcards
gmtime Time convert UNIX time into record or string using Greenwich time
goto Flow create spaghetti code
grep LIST locate elements in a list test true against a given criterion
hex Math String convert a hexadecimal string to a number
import Modules Namespace patch a module's namespace into your own
index String find a substring within a string
int Math get the integer portion of a number
ioctl File system-dependent device control system call
join LIST join a list into a string using a separator
keys ARRAY HASH retrieve list of indices from a hash
kill Process send a signal to a process or process group
last Flow exit a block prematurely
lc String return lower-case version of a string
lcfirst String return a string with just the next letter in lower case
length String return the number of characters in a string
__LINE__ Flow the current source line number
link File create a hard link in the filesystem
listen Socket register your socket as a server
local Namespace create a temporary value for a global variable (dynamic scoping)
localtime Time convert UNIX time into record or string using local time
lock Misc get a thread lock on a variable, subroutine, or method
log Math retrieve the natural logarithm for a number
lstat File stat a symbolic link
m// Regexp match a string with a regular expression pattern
map LIST apply a change to a list to get back a new list with the changes
mkdir File create a directory
msgctl SysV SysV IPC message control operations
msgget SysV get SysV IPC message queue
msgrcv SysV receive a SysV IPC message from a message queue
msgsnd SysV send a SysV IPC message to a message queue
my Namespace declare and assign a local variable (lexical scoping)
next Flow iterate a block prematurely
no Modules unimport some module symbols or semantics at compile time
oct Math String convert a string to an octal number
open File open a file, pipe, or descriptor
opendir File open a directory
ord String find a character's numeric representation
our Namespace declare and assign a package variable (lexical scoping)
pack Binary String convert a list into a binary representation
package Modules Namespace Objects declare a separate global namespace
__PACKAGE__ Flow the current package
pipe Process open a pair of connected filehandles
pop ARRAY remove the last element from an array and return it
pos Regexp find or set the offset for the last/next m//g search
print I/O output a list to a filehandle
printf I/O output a formatted list to a filehandle
prototype Misc get the prototype (if any) of a subroutine
push ARRAY append one or more elements to an array
q/STRING/ String singly quote a string
qq/STRING/ String doubly quote a string
qr/STRING/ Regexp compile pattern
quotemeta Regexp quote regular expression magic characters
qw/STRING/ LIST quote a list of words
qx/STRING/ Process backquote quote a string
rand Math retrieve the next pseudorandom number
read Binary I/O fixed-length buffered input from a filehandle
readdir I/O get a directory from a directory handle
readline I/O fetch a record from a file
readlink File determine where a symbolic link is pointing
readpipe Process execute a system command and collect standard output
recv Socket receive a message over a Socket
redo Flow start this loop iteration over again
ref Objects find out the type of thing being referenced
rename File change a filename
require Modules load in external functions from a library at runtime
reset Misc clear all variables of a given name
return Flow get out of a function early
reverse LIST String flip a string or a list
rewinddir I/O reset directory handle
rindex String right-to-left substring search
rmdir File remove a directory
s/// Regexp replace a pattern with a string
say I/O output a list to a filehandle, appending a newline
scalar Misc force a scalar context
seek I/O reposition file pointer for random-access I/O
seekdir I/O reposition directory pointer
select File I/O reset default output or do I/O multiplexing
semctl SysV SysV semaphore control operations
semget SysV get set of SysV semaphores
semop SysV SysV semaphore operations
send Socket send a message over a socket
setgrent User prepare group file for use
sethostent Network prepare hosts file for use
setnetent Network prepare networks file for use
setpgrp Process set the process group of a process
setpriority Process set a process's nice value
setprotoent Network prepare protocols file for use
setpwent User prepare passwd file for use
setservent Network prepare services file for use
setsockopt Socket set some socket options
shift ARRAY remove the first element of an array, and return it
shmctl SysV SysV shared memory operations
shmget SysV get SysV shared memory segment identifier
shmread SysV read SysV shared memory
shmwrite SysV write SysV shared memory
shutdown Socket close down just half of a socket connection
sin Math return the sine of a number
sleep Process block for some number of seconds
socket Socket create a socket
socketpair Socket create a pair of sockets
sort LIST sort a list of values
splice ARRAY add or remove elements anywhere in an array
split Regexp split up a string using a regexp delimiter
sprintf String formatted print into a string
sqrt Math square root function
srand Math seed the random number generator
stat File get a file's status information
state Namespace declare and assign a persistent lexical variable
study Regexp no-op, formerly optimized input data for repeated searches
sub Flow declare a subroutine, possibly anonymously
__SUB__ Flow the current subroutine, or C<undef> if not in a subroutine
substr String get or alter a portion of a string
symlink File create a symbolic link to a file
syscall Binary I/O execute an arbitrary system call
sysopen File open a file, pipe, or descriptor
sysread Binary I/O fixed-length unbuffered input from a filehandle
sysseek Binary I/O position I/O pointer on handle used with sysread and syswrite
system Process run a separate program
syswrite Binary I/O fixed-length unbuffered output to a filehandle
tell I/O get current seekpointer on a filehandle
telldir I/O get current seekpointer on a directory handle
tie Objects bind a variable to an object class
tied Objects get a reference to the object underlying a tied variable
time Time return number of seconds since 1970
times Process Time return elapsed time for self and child processes
tr/// String transliterate a string
truncate I/O shorten a file
uc String return upper-case version of a string
ucfirst String return a string with just the next letter in upper case
umask File set file creation mode mask
undef Misc remove a variable or function definition
unlink File remove one link to a file
unpack Binary LIST convert binary structure into normal perl variables
unshift ARRAY prepend more elements to the beginning of a list
untie Objects break a tie binding to a variable
use Modules Namespace Objects load in a module at compile time and import its namespace
utime File set a file's last access and modify times
values ARRAY HASH return a list of the values in a hash
vec Binary test or set particular bits in a string
wait Process wait for any child process to die
waitpid Process wait for a particular child process to die
wantarray Flow get void vs scalar vs list context of current subroutine call
warn I/O print debugging info
write I/O print a picture record
y/// String transliterate a string
Html.pm 0000644 00000057507 15051126136 0006024 0 ustar 00 package Pod::Html;
use strict;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
$VERSION = 1.2202;
@ISA = qw(Exporter);
@EXPORT = qw(pod2html htmlify);
@EXPORT_OK = qw(anchorify);
use Carp;
use Config;
use Cwd;
use File::Basename;
use File::Spec;
use File::Spec::Unix;
use Getopt::Long;
use Pod::Simple::Search;
use locale; # make \w work right in non-ASCII lands
=head1 NAME
Pod::Html - module to convert pod files to HTML
=head1 SYNOPSIS
use Pod::Html;
pod2html([options]);
=head1 DESCRIPTION
Converts files from pod format (see L<perlpod>) to HTML format. It
can automatically generate indexes and cross-references, and it keeps
a cache of things it knows how to cross-reference.
=head1 FUNCTIONS
=head2 pod2html
pod2html("pod2html",
"--podpath=lib:ext:pod:vms",
"--podroot=/usr/src/perl",
"--htmlroot=/perl/nmanual",
"--recurse",
"--infile=foo.pod",
"--outfile=/perl/nmanual/foo.html");
pod2html takes the following arguments:
=over 4
=item backlink
--backlink
Turns every C<head1> heading into a link back to the top of the page.
By default, no backlinks are generated.
=item cachedir
--cachedir=name
Creates the directory cache in the given directory.
=item css
--css=stylesheet
Specify the URL of a cascading style sheet. Also disables all HTML/CSS
C<style> attributes that are output by default (to avoid conflicts).
=item flush
--flush
Flushes the directory cache.
=item header
--header
--noheader
Creates header and footer blocks containing the text of the C<NAME>
section. By default, no headers are generated.
=item help
--help
Displays the usage message.
=item htmldir
--htmldir=name
Sets the directory to which all cross references in the resulting
html file will be relative. Not passing this causes all links to be
absolute since this is the value that tells Pod::Html the root of the
documentation tree.
Do not use this and --htmlroot in the same call to pod2html; they are
mutually exclusive.
=item htmlroot
--htmlroot=name
Sets the base URL for the HTML files. When cross-references are made,
the HTML root is prepended to the URL.
Do not use this if relative links are desired: use --htmldir instead.
Do not pass both this and --htmldir to pod2html; they are mutually
exclusive.
=item index
--index
--noindex
Generate an index at the top of the HTML file. This is the default
behaviour.
=item infile
--infile=name
Specify the pod file to convert. Input is taken from STDIN if no
infile is specified.
=item outfile
--outfile=name
Specify the HTML file to create. Output goes to STDOUT if no outfile
is specified.
=item poderrors
--poderrors
--nopoderrors
Include a "POD ERRORS" section in the outfile if there were any POD
errors in the infile. This section is included by default.
=item podpath
--podpath=name:...:name
Specify which subdirectories of the podroot contain pod files whose
HTML converted forms can be linked to in cross references.
=item podroot
--podroot=name
Specify the base directory for finding library pods. Default is the
current working directory.
=item quiet
--quiet
--noquiet
Don't display I<mostly harmless> warning messages. These messages
will be displayed by default. But this is not the same as C<verbose>
mode.
=item recurse
--recurse
--norecurse
Recurse into subdirectories specified in podpath (default behaviour).
=item title
--title=title
Specify the title of the resulting HTML file.
=item verbose
--verbose
--noverbose
Display progress messages. By default, they won't be displayed.
=back
=head2 htmlify
htmlify($heading);
Converts a pod section specification to a suitable section specification
for HTML. Note that we keep spaces and special characters except
C<", ?> (Netscape problem) and the hyphen (writer's problem...).
=head2 anchorify
anchorify(@heading);
Similar to C<htmlify()>, but turns non-alphanumerics into underscores. Note
that C<anchorify()> is not exported by default.
=head1 ENVIRONMENT
Uses C<$Config{pod2html}> to setup default options.
=head1 AUTHOR
Marc Green, E<lt>marcgreen@cpan.orgE<gt>.
Original version by Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
=head1 SEE ALSO
L<perlpod>
=head1 COPYRIGHT
This program is distributed under the Artistic License.
=cut
my $Cachedir;
my $Dircache;
my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
my($Podfile, @Podpath, $Podroot);
my $Poderrors;
my $Css;
my $Recurse;
my $Quiet;
my $Verbose;
my $Doindex;
my $Backlink;
my($Title, $Header);
my %Pages = (); # associative array used to find the location
# of pages referenced by L<> links.
my $Curdir = File::Spec->curdir;
init_globals();
sub init_globals {
$Cachedir = "."; # The directory to which directory caches
# will be written.
$Dircache = "pod2htmd.tmp";
$Htmlroot = "/"; # http-server base directory from which all
# relative paths in $podpath stem.
$Htmldir = ""; # The directory to which the html pages
# will (eventually) be written.
$Htmlfile = ""; # write to stdout by default
$Htmlfileurl = ""; # The url that other files would use to
# refer to this file. This is only used
# to make relative urls that point to
# other files.
$Poderrors = 1;
$Podfile = ""; # read from stdin by default
@Podpath = (); # list of directories containing library pods.
$Podroot = $Curdir; # filesystem base directory from which all
# relative paths in $podpath stem.
$Css = ''; # Cascading style sheet
$Recurse = 1; # recurse on subdirectories in $podpath.
$Quiet = 0; # not quiet by default
$Verbose = 0; # not verbose by default
$Doindex = 1; # non-zero if we should generate an index
$Backlink = 0; # no backlinks added by default
$Header = 0; # produce block header/footer
$Title = ''; # title to give the pod(s)
}
sub pod2html {
local(@ARGV) = @_;
local $_;
init_globals();
parse_command_line();
# prevent '//' in urls
$Htmlroot = "" if $Htmlroot eq "/";
$Htmldir =~ s#/\z##;
if ( $Htmlroot eq ''
&& defined( $Htmldir )
&& $Htmldir ne ''
&& substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
) {
# Set the 'base' url for this file, so that we can use it
# as the location from which to calculate relative links
# to other files. If this is '', then absolute links will
# be used throughout.
#$Htmlfileurl = "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
# Is the above not just "$Htmlfileurl = $Htmlfile"?
$Htmlfileurl = Pod::Html::_unixify($Htmlfile);
}
# load or generate/cache %Pages
unless (get_cache($Dircache, \@Podpath, $Podroot, $Recurse)) {
# generate %Pages
my $pwd = getcwd();
chdir($Podroot) ||
die "$0: error changing to directory $Podroot: $!\n";
# find all pod modules/pages in podpath, store in %Pages
# - callback used to remove Podroot and extension from each file
# - laborious to allow '.' in dirnames (e.g., /usr/share/perl/5.14.1)
Pod::Simple::Search->new->inc(0)->verbose($Verbose)->laborious(1)
->callback(\&_save_page)->recurse($Recurse)->survey(@Podpath);
chdir($pwd) || die "$0: error changing to directory $pwd: $!\n";
# cache the directory list for later use
warn "caching directories for later use\n" if $Verbose;
open my $cache, '>', $Dircache
or die "$0: error open $Dircache for writing: $!\n";
print $cache join(":", @Podpath) . "\n$Podroot\n";
my $_updirs_only = ($Podroot =~ /\.\./) && !($Podroot =~ /[^\.\\\/]/);
foreach my $key (keys %Pages) {
if($_updirs_only) {
my $_dirlevel = $Podroot;
while($_dirlevel =~ /\.\./) {
$_dirlevel =~ s/\.\.//;
# Assume $Pages{$key} has '/' separators (html dir separators).
$Pages{$key} =~ s/^[\w\s\-\.]+\///;
}
}
print $cache "$key $Pages{$key}\n";
}
close $cache or die "error closing $Dircache: $!";
}
# set options for the parser
my $parser = Pod::Simple::XHTML::LocalPodLinks->new();
$parser->codes_in_verbatim(0);
$parser->anchor_items(1); # the old Pod::Html always did
$parser->backlink($Backlink); # linkify =head1 directives
$parser->htmldir($Htmldir);
$parser->htmlfileurl($Htmlfileurl);
$parser->htmlroot($Htmlroot);
$parser->index($Doindex);
$parser->no_errata_section(!$Poderrors); # note the inverse
$parser->output_string(\my $output); # written to file later
$parser->pages(\%Pages);
$parser->quiet($Quiet);
$parser->verbose($Verbose);
# XXX: implement default title generator in pod::simple::xhtml
# copy the way the old Pod::Html did it
$Title = html_escape($Title);
# We need to add this ourselves because we use our own header, not
# ::XHTML's header. We need to set $parser->backlink to linkify
# the =head1 directives
my $bodyid = $Backlink ? ' id="_podtop_"' : '';
my $csslink = '';
my $tdstyle = ' style="background-color: #cccccc; color: #000"';
if ($Css) {
$csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
$csslink =~ s,\\,/,g;
$csslink =~ s,(/.):,$1|,;
$tdstyle= '';
}
# header/footer block
my $block = $Header ? <<END_OF_BLOCK : '';
<table border="0" width="100%" cellspacing="0" cellpadding="3">
<tr><td class="_podblock_"$tdstyle valign="middle">
<big><strong><span class="_podblock_"> $Title</span></strong></big>
</td></tr>
</table>
END_OF_BLOCK
# create own header/footer because of --header
$parser->html_header(<<"HTMLHEAD");
<?xml version="1.0" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>$Title</title>$csslink
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<link rev="made" href="mailto:$Config{perladmin}" />
</head>
<body$bodyid>
$block
HTMLHEAD
$parser->html_footer(<<"HTMLFOOT");
$block
</body>
</html>
HTMLFOOT
my $input;
unless (@ARGV && $ARGV[0]) {
if ($Podfile and $Podfile ne '-') {
$input = $Podfile;
} else {
$input = '-'; # XXX: make a test case for this
}
} else {
$Podfile = $ARGV[0];
$input = *ARGV;
}
warn "Converting input file $Podfile\n" if $Verbose;
$parser->parse_file($input);
# Write output to file
$Htmlfile = "-" unless $Htmlfile; # stdout
my $fhout;
if($Htmlfile and $Htmlfile ne '-') {
open $fhout, ">", $Htmlfile
or die "$0: cannot open $Htmlfile file for output: $!\n";
} else {
open $fhout, ">-";
}
binmode $fhout, ":utf8";
print $fhout $output;
close $fhout or die "Failed to close $Htmlfile: $!";
chmod 0644, $Htmlfile unless $Htmlfile eq '-';
}
##############################################################################
sub usage {
my $podfile = shift;
warn "$0: $podfile: @_\n" if @_;
die <<END_OF_USAGE;
Usage: $0 --help --htmldir=<name> --htmlroot=<URL>
--infile=<name> --outfile=<name>
--podpath=<name>:...:<name> --podroot=<name>
--cachedir=<name> --flush --recurse --norecurse
--quiet --noquiet --verbose --noverbose
--index --noindex --backlink --nobacklink
--header --noheader --poderrors --nopoderrors
--css=<URL> --title=<name>
--[no]backlink - turn =head1 directives into links pointing to the top of
the page (off by default).
--cachedir - directory for the directory cache files.
--css - stylesheet URL
--flush - flushes the directory cache.
--[no]header - produce block header/footer (default is no headers).
--help - prints this message.
--htmldir - directory for resulting HTML files.
--htmlroot - http-server base directory from which all relative paths
in podpath stem (default is /).
--[no]index - generate an index at the top of the resulting html
(default behaviour).
--infile - filename for the pod to convert (input taken from stdin
by default).
--outfile - filename for the resulting html file (output sent to
stdout by default).
--[no]poderrors - include a POD ERRORS section in the output if there were
any POD errors in the input (default behavior).
--podpath - colon-separated list of directories containing library
pods (empty by default).
--podroot - filesystem base directory from which all relative paths
in podpath stem (default is .).
--[no]quiet - suppress some benign warning messages (default is off).
--[no]recurse - recurse on those subdirectories listed in podpath
(default behaviour).
--title - title that will appear in resulting html file.
--[no]verbose - self-explanatory (off by default).
END_OF_USAGE
}
sub parse_command_line {
my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,
$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,
$opt_outfile,$opt_poderrors,$opt_podpath,$opt_podroot,
$opt_quiet,$opt_recurse,$opt_title,$opt_verbose);
unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
my $result = GetOptions(
'backlink!' => \$opt_backlink,
'cachedir=s' => \$opt_cachedir,
'css=s' => \$opt_css,
'flush' => \$opt_flush,
'help' => \$opt_help,
'header!' => \$opt_header,
'htmldir=s' => \$opt_htmldir,
'htmlroot=s' => \$opt_htmlroot,
'index!' => \$opt_index,
'infile=s' => \$opt_infile,
'outfile=s' => \$opt_outfile,
'poderrors!' => \$opt_poderrors,
'podpath=s' => \$opt_podpath,
'podroot=s' => \$opt_podroot,
'quiet!' => \$opt_quiet,
'recurse!' => \$opt_recurse,
'title=s' => \$opt_title,
'verbose!' => \$opt_verbose,
);
usage("-", "invalid parameters") if not $result;
usage("-") if defined $opt_help; # see if the user asked for help
$opt_help = ""; # just to make -w shut-up.
@Podpath = split(":", $opt_podpath) if defined $opt_podpath;
$Backlink = $opt_backlink if defined $opt_backlink;
$Cachedir = _unixify($opt_cachedir) if defined $opt_cachedir;
$Css = $opt_css if defined $opt_css;
$Header = $opt_header if defined $opt_header;
$Htmldir = _unixify($opt_htmldir) if defined $opt_htmldir;
$Htmlroot = _unixify($opt_htmlroot) if defined $opt_htmlroot;
$Doindex = $opt_index if defined $opt_index;
$Podfile = _unixify($opt_infile) if defined $opt_infile;
$Htmlfile = _unixify($opt_outfile) if defined $opt_outfile;
$Poderrors = $opt_poderrors if defined $opt_poderrors;
$Podroot = _unixify($opt_podroot) if defined $opt_podroot;
$Quiet = $opt_quiet if defined $opt_quiet;
$Recurse = $opt_recurse if defined $opt_recurse;
$Title = $opt_title if defined $opt_title;
$Verbose = $opt_verbose if defined $opt_verbose;
warn "Flushing directory caches\n"
if $opt_verbose && defined $opt_flush;
$Dircache = "$Cachedir/pod2htmd.tmp";
if (defined $opt_flush) {
1 while unlink($Dircache);
}
}
my $Saved_Cache_Key;
sub get_cache {
my($dircache, $podpath, $podroot, $recurse) = @_;
my @cache_key_args = @_;
# A first-level cache:
# Don't bother reading the cache files if they still apply
# and haven't changed since we last read them.
my $this_cache_key = cache_key(@cache_key_args);
return 1 if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
$Saved_Cache_Key = $this_cache_key;
# load the cache of %Pages if possible. $tests will be
# non-zero if successful.
my $tests = 0;
if (-f $dircache) {
warn "scanning for directory cache\n" if $Verbose;
$tests = load_cache($dircache, $podpath, $podroot);
}
return $tests;
}
sub cache_key {
my($dircache, $podpath, $podroot, $recurse) = @_;
return join('!',$dircache,$recurse,@$podpath,$podroot,stat($dircache));
}
#
# load_cache - tries to find if the cache stored in $dircache is a valid
# cache of %Pages. if so, it loads them and returns a non-zero value.
#
sub load_cache {
my($dircache, $podpath, $podroot) = @_;
my $tests = 0;
local $_;
warn "scanning for directory cache\n" if $Verbose;
open(my $cachefh, '<', $dircache) ||
die "$0: error opening $dircache for reading: $!\n";
$/ = "\n";
# is it the same podpath?
$_ = <$cachefh>;
chomp($_);
$tests++ if (join(":", @$podpath) eq $_);
# is it the same podroot?
$_ = <$cachefh>;
chomp($_);
$tests++ if ($podroot eq $_);
# load the cache if its good
if ($tests != 2) {
close($cachefh);
return 0;
}
warn "loading directory cache\n" if $Verbose;
while (<$cachefh>) {
/(.*?) (.*)$/;
$Pages{$1} = $2;
}
close($cachefh);
return 1;
}
#
# html_escape: make text safe for HTML
#
sub html_escape {
my $rest = $_[0];
$rest =~ s/&/&/g;
$rest =~ s/</</g;
$rest =~ s/>/>/g;
$rest =~ s/"/"/g;
# ' is only in XHTML, not HTML4. Be conservative
#$rest =~ s/'/'/g;
return $rest;
}
#
# htmlify - converts a pod section specification to a suitable section
# specification for HTML. Note that we keep spaces and special characters
# except ", ? (Netscape problem) and the hyphen (writer's problem...).
#
sub htmlify {
my( $heading) = @_;
$heading =~ s/(\s+)/ /g;
$heading =~ s/\s+\Z//;
$heading =~ s/\A\s+//;
# The hyphen is a disgrace to the English language.
# $heading =~ s/[-"?]//g;
$heading =~ s/["?]//g;
$heading = lc( $heading );
return $heading;
}
#
# similar to htmlify, but turns non-alphanumerics into underscores
#
sub anchorify {
my ($anchor) = @_;
$anchor = htmlify($anchor);
$anchor =~ s/\W/_/g;
return $anchor;
}
#
# store POD files in %Pages
#
sub _save_page {
my ($modspec, $modname) = @_;
# Remove Podroot from path
$modspec = $Podroot eq File::Spec->curdir
? File::Spec->abs2rel($modspec)
: File::Spec->abs2rel($modspec,
File::Spec->canonpath($Podroot));
# Convert path to unix style path
$modspec = Pod::Html::_unixify($modspec);
my ($file, $dir) = fileparse($modspec, qr/\.[^.]*/); # strip .ext
$Pages{$modname} = $dir.$file;
}
sub _unixify {
my $full_path = shift;
return '' unless $full_path;
return $full_path if $full_path eq '/';
my ($vol, $dirs, $file) = File::Spec->splitpath($full_path);
my @dirs = $dirs eq File::Spec->curdir()
? (File::Spec::Unix->curdir())
: File::Spec->splitdir($dirs);
if (defined($vol) && $vol) {
$vol =~ s/:$// if $^O eq 'VMS';
$vol = uc $vol if $^O eq 'MSWin32';
if( $dirs[0] ) {
unshift @dirs, $vol;
}
else {
$dirs[0] = $vol;
}
}
unshift @dirs, '' if File::Spec->file_name_is_absolute($full_path);
return $file unless scalar(@dirs);
$full_path = File::Spec::Unix->catfile(File::Spec::Unix->catdir(@dirs),
$file);
$full_path =~ s|^\/|| if $^O eq 'MSWin32'; # C:/foo works, /C:/foo doesn't
$full_path =~ s/\^\././g if $^O eq 'VMS'; # unescape dots
return $full_path;
}
package Pod::Simple::XHTML::LocalPodLinks;
use strict;
use warnings;
use parent 'Pod::Simple::XHTML';
use File::Spec;
use File::Spec::Unix;
__PACKAGE__->_accessorize(
'htmldir',
'htmlfileurl',
'htmlroot',
'pages', # Page name => relative/path/to/page from root POD dir
'quiet',
'verbose',
);
sub resolve_pod_page_link {
my ($self, $to, $section) = @_;
return undef unless defined $to || defined $section;
if (defined $section) {
$section = '#' . $self->idify($section, 1);
return $section unless defined $to;
} else {
$section = '';
}
my $path; # path to $to according to %Pages
unless (exists $self->pages->{$to}) {
# Try to find a POD that ends with $to and use that.
# e.g., given L<XHTML>, if there is no $Podpath/XHTML in %Pages,
# look for $Podpath/*/XHTML in %Pages, with * being any path,
# as a substitute (e.g., $Podpath/Pod/Simple/XHTML)
my @matches;
foreach my $modname (keys %{$self->pages}) {
push @matches, $modname if $modname =~ /::\Q$to\E\z/;
}
if ($#matches == -1) {
warn "Cannot find \"$to\" in podpath: " .
"cannot find suitable replacement path, cannot resolve link\n"
unless $self->quiet;
return '';
} elsif ($#matches == 0) {
warn "Cannot find \"$to\" in podpath: " .
"using $matches[0] as replacement path to $to\n"
unless $self->quiet;
$path = $self->pages->{$matches[0]};
} else {
warn "Cannot find \"$to\" in podpath: " .
"more than one possible replacement path to $to, " .
"using $matches[-1]\n" unless $self->quiet;
# Use [-1] so newer (higher numbered) perl PODs are used
$path = $self->pages->{$matches[-1]};
}
} else {
$path = $self->pages->{$to};
}
my $url = File::Spec::Unix->catfile(Pod::Html::_unixify($self->htmlroot),
$path);
if ($self->htmlfileurl ne '') {
# then $self->htmlroot eq '' (by definition of htmlfileurl) so
# $self->htmldir needs to be prepended to link to get the absolute path
# that will be relativized
$url = relativize_url(
File::Spec::Unix->catdir(Pod::Html::_unixify($self->htmldir), $url),
$self->htmlfileurl # already unixified
);
}
return $url . ".html$section";
}
#
# relativize_url - convert an absolute URL to one relative to a base URL.
# Assumes both end in a filename.
#
sub relativize_url {
my ($dest, $source) = @_;
# Remove each file from its path
my ($dest_volume, $dest_directory, $dest_file) =
File::Spec::Unix->splitpath( $dest );
$dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' );
my ($source_volume, $source_directory, $source_file) =
File::Spec::Unix->splitpath( $source );
$source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' );
my $rel_path = '';
if ($dest ne '') {
$rel_path = File::Spec::Unix->abs2rel( $dest, $source );
}
if ($rel_path ne '' && substr( $rel_path, -1 ) ne '/') {
$rel_path .= "/$dest_file";
} else {
$rel_path .= "$dest_file";
}
return $rel_path;
}
1;
Perldoc/GetOptsOO.pm 0000644 00000007460 15051135563 0010330 0 ustar 00 package Pod::Perldoc::GetOptsOO;
use strict;
use vars qw($VERSION);
$VERSION = '3.28';
BEGIN { # Make a DEBUG constant ASAP
*DEBUG = defined( &Pod::Perldoc::DEBUG )
? \&Pod::Perldoc::DEBUG
: sub(){10};
}
sub getopts {
my($target, $args, $truth) = @_;
$args ||= \@ARGV;
$target->aside(
"Starting switch processing. Scanning arguments [@$args]\n"
) if $target->can('aside');
return unless @$args;
$truth = 1 unless @_ > 2;
DEBUG > 3 and print " Truth is $truth\n";
my $error_count = 0;
while( @$args and ($_ = $args->[0]) =~ m/^-(.)(.*)/s ) {
my($first,$rest) = ($1,$2);
if ($_ eq '--') { # early exit if "--"
shift @$args;
last;
}
if ($first eq '-' and $rest) { # GNU style long param names
($first, $rest) = split '=', $rest, 2;
}
my $method = "opt_${first}_with";
if( $target->can($method) ) { # it's argumental
if($rest eq '') { # like -f bar
shift @$args;
$target->warn( "Option $first needs a following argument!\n" ) unless @$args;
$rest = shift @$args;
} else { # like -fbar (== -f bar)
shift @$args;
}
DEBUG > 3 and print " $method => $rest\n";
$target->$method( $rest );
# Otherwise, it's not argumental...
} else {
if( $target->can( $method = "opt_$first" ) ) {
DEBUG > 3 and print " $method is true ($truth)\n";
$target->$method( $truth );
# Otherwise it's an unknown option...
} elsif( $target->can('handle_unknown_option') ) {
DEBUG > 3
and print " calling handle_unknown_option('$first')\n";
$error_count += (
$target->handle_unknown_option( $first ) || 0
);
} else {
++$error_count;
$target->warn( "Unknown option: $first\n" );
}
if($rest eq '') { # like -f
shift @$args
} else { # like -fbar (== -f -bar )
DEBUG > 2 and print " Setting args->[0] to \"-$rest\"\n";
$args->[0] = "-$rest";
}
}
}
$target->aside(
"Ending switch processing. Args are [@$args] with $error_count errors.\n"
) if $target->can('aside');
$error_count == 0;
}
1;
__END__
=head1 NAME
Pod::Perldoc::GetOptsOO - Customized option parser for Pod::Perldoc
=head1 SYNOPSIS
use Pod::Perldoc::GetOptsOO ();
Pod::Perldoc::GetOptsOO::getopts( $obj, \@args, $truth )
or die "wrong usage";
=head1 DESCRIPTION
Implements a customized option parser used for
L<Pod::Perldoc>.
Rather like Getopt::Std's getopts:
=over
=item Call Pod::Perldoc::GetOptsOO::getopts($object, \@ARGV, $truth)
=item Given -n, if there's a opt_n_with, it'll call $object->opt_n_with( ARGUMENT )
(e.g., "-n foo" => $object->opt_n_with('foo'). Ditto "-nfoo")
=item Otherwise (given -n) if there's an opt_n, we'll call it $object->opt_n($truth)
(Truth defaults to 1)
=item Otherwise we try calling $object->handle_unknown_option('n')
(and we increment the error count by the return value of it)
=item If there's no handle_unknown_option, then we just warn, and then increment
the error counter
=back
The return value of Pod::Perldoc::GetOptsOO::getopts is true if no errors,
otherwise it's false.
=head1 SEE ALSO
L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002-2007 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToChecker.pm 0000644 00000003233 15051135563 0010346 0 ustar 00 package Pod::Perldoc::ToChecker;
use strict;
use warnings;
use vars qw(@ISA);
use vars qw($VERSION);
$VERSION = '3.28';
# Pick our superclass...
#
eval 'require Pod::Simple::Checker';
if($@) {
require Pod::Checker;
@ISA = ('Pod::Checker');
} else {
@ISA = ('Pod::Simple::Checker');
}
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' }
sub if_zero_length {
my( $self, $file, $tmp, $tmpfd ) = @_;
print "No Pod errors in $file\n";
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToChecker - let Perldoc check Pod for errors
=head1 SYNOPSIS
% perldoc -o checker SomeFile.pod
No Pod errors in SomeFile.pod
(or an error report)
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Simple::Checker as a "formatter" class (or if that is
not available, then Pod::Checker), to check for errors in a given
Pod file.
This is actually a Pod::Simple::Checker (or Pod::Checker) subclass, and
inherits all its options.
=head1 SEE ALSO
L<Pod::Simple::Checker>, L<Pod::Simple>, L<Pod::Checker>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToNroff.pm 0000644 00000005336 15051135563 0010062 0 ustar 00 package Pod::Perldoc::ToNroff;
use strict;
use warnings;
use parent qw(Pod::Perldoc::BaseTo);
use vars qw($VERSION);
$VERSION = '3.28';
# This is unlike ToMan.pm in that it emits the raw nroff source!
sub is_pageable { 1 } # well, if you ask for it...
sub write_with_binmode { 0 }
sub output_extension { 'man' }
use Pod::Man ();
sub center { shift->_perldoc_elem('center' , @_) }
sub date { shift->_perldoc_elem('date' , @_) }
sub fixed { shift->_perldoc_elem('fixed' , @_) }
sub fixedbold { shift->_perldoc_elem('fixedbold' , @_) }
sub fixeditalic { shift->_perldoc_elem('fixeditalic' , @_) }
sub fixedbolditalic { shift->_perldoc_elem('fixedbolditalic', @_) }
sub quotes { shift->_perldoc_elem('quotes' , @_) }
sub release { shift->_perldoc_elem('release' , @_) }
sub section { shift->_perldoc_elem('section' , @_) }
sub new { return bless {}, ref($_[0]) || $_[0] }
sub parse_from_file {
my $self = shift;
my $file = $_[0];
my @options =
map {; $_, $self->{$_} }
grep !m/^_/s,
keys %$self
;
defined(&Pod::Perldoc::DEBUG)
and Pod::Perldoc::DEBUG()
and print "About to call new Pod::Man ",
$Pod::Man::VERSION ? "(v$Pod::Man::VERSION) " : '',
"with options: ",
@options ? "[@options]" : "(nil)", "\n";
;
Pod::Man->new(@options)->parse_from_file(@_);
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToNroff - let Perldoc convert Pod to nroff
=head1 SYNOPSIS
perldoc -o nroff -d something.3 Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Man as a formatter class.
The following options are supported: center, date, fixed, fixedbold,
fixeditalic, fixedbolditalic, quotes, release, section
Those options are explained in L<Pod::Man>.
For example:
perldoc -o nroff -w center:Pod -d something.3 Some::Modulename
=head1 CAVEAT
This module may change to use a different pod-to-nroff formatter class
in the future, and this may change what options are supported.
=head1 SEE ALSO
L<Pod::Man>, L<Pod::Perldoc>, L<Pod::Perldoc::ToMan>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToPod.pm 0000644 00000003647 15051135563 0007535 0 ustar 00 package Pod::Perldoc::ToPod;
use strict;
use warnings;
use parent qw(Pod::Perldoc::BaseTo);
use vars qw($VERSION);
$VERSION = '3.28';
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'pod' }
sub new { return bless {}, ref($_[0]) || $_[0] }
sub parse_from_file {
my( $self, $in, $outfh ) = @_;
open(IN, "<", $in) or $self->die( "Can't read-open $in: $!\nAborting" );
my $cut_mode = 1;
# A hack for finding things between =foo and =cut, inclusive
local $_;
while (<IN>) {
if( m/^=(\w+)/s ) {
if($cut_mode = ($1 eq 'cut')) {
print $outfh "\n=cut\n\n";
# Pass thru the =cut line with some harmless
# (and occasionally helpful) padding
}
}
next if $cut_mode;
print $outfh $_ or $self->die( "Can't print to $outfh: $!" );
}
close IN or $self->die( "Can't close $in: $!" );
return;
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToPod - let Perldoc render Pod as ... Pod!
=head1 SYNOPSIS
perldoc -opod Some::Modulename
(That's currently the same as the following:)
perldoc -u Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to display Pod source as
itself! Pretty Zen, huh?
Currently this class works by just filtering out the non-Pod stuff from
a given input file.
=head1 SEE ALSO
L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallencpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/BaseTo.pm 0000644 00000007065 15051135563 0007663 0 ustar 00 package Pod::Perldoc::BaseTo;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = '3.28';
use Carp qw(croak carp);
use Config qw(%Config);
use File::Spec::Functions qw(catfile);
sub is_pageable { '' }
sub write_with_binmode { 1 }
sub output_extension { 'txt' } # override in subclass!
# sub new { my $self = shift; ... }
# sub parse_from_file( my($class, $in, $out) = ...; ... }
#sub new { return bless {}, ref($_[0]) || $_[0] }
# this is also in Perldoc.pm, but why look there when you're a
# subclass of this?
sub TRUE () {1}
sub FALSE () {return}
BEGIN {
*is_vms = $^O eq 'VMS' ? \&TRUE : \&FALSE unless defined &is_vms;
*is_mswin32 = $^O eq 'MSWin32' ? \&TRUE : \&FALSE unless defined &is_mswin32;
*is_dos = $^O eq 'dos' ? \&TRUE : \&FALSE unless defined &is_dos;
*is_os2 = $^O eq 'os2' ? \&TRUE : \&FALSE unless defined &is_os2;
*is_cygwin = $^O eq 'cygwin' ? \&TRUE : \&FALSE unless defined &is_cygwin;
*is_linux = $^O eq 'linux' ? \&TRUE : \&FALSE unless defined &is_linux;
*is_hpux = $^O =~ m/hpux/ ? \&TRUE : \&FALSE unless defined &is_hpux;
*is_openbsd = $^O =~ m/openbsd/ ? \&TRUE : \&FALSE unless defined &is_openbsd;
*is_freebsd = $^O =~ m/freebsd/ ? \&TRUE : \&FALSE unless defined &is_freebsd;
*is_bitrig = $^O =~ m/bitrig/ ? \&TRUE : \&FALSE unless defined &is_bitrig;
}
sub _perldoc_elem {
my($self, $name) = splice @_,0,2;
if(@_) {
$self->{$name} = $_[0];
} else {
$self->{$name};
}
}
sub debugging {
my( $self, @messages ) = @_;
( defined(&Pod::Perldoc::DEBUG) and &Pod::Perldoc::DEBUG() )
}
sub debug {
my( $self, @messages ) = @_;
return unless $self->debugging;
print STDERR map { "DEBUG $_" } @messages;
}
sub warn {
my( $self, @messages ) = @_;
carp join "\n", @messages, '';
}
sub die {
my( $self, @messages ) = @_;
croak join "\n", @messages, '';
}
sub _get_path_components {
my( $self ) = @_;
my @paths = split /\Q$Config{path_sep}/, $ENV{PATH};
return @paths;
}
sub _find_executable_in_path {
my( $self, $program ) = @_;
my @found = ();
foreach my $dir ( $self->_get_path_components ) {
my $binary = catfile( $dir, $program );
$self->debug( "Looking for $binary\n" );
next unless -e $binary;
unless( -x $binary ) {
$self->warn( "Found $binary but it's not executable. Skipping.\n" );
next;
}
$self->debug( "Found $binary\n" );
push @found, $binary;
}
return @found;
}
1;
__END__
=head1 NAME
Pod::Perldoc::BaseTo - Base for Pod::Perldoc formatters
=head1 SYNOPSIS
package Pod::Perldoc::ToMyFormat;
use parent qw( Pod::Perldoc::BaseTo );
...
=head1 DESCRIPTION
This package is meant as a base of Pod::Perldoc formatters,
like L<Pod::Perldoc::ToText>, L<Pod::Perldoc::ToMan>, etc.
It provides default implementations for the methods
is_pageable
write_with_binmode
output_extension
_perldoc_elem
The concrete formatter must implement
new
parse_from_file
=head1 SEE ALSO
L<perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002-2007 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToText.pm 0000644 00000004332 15051135563 0007727 0 ustar 00 package Pod::Perldoc::ToText;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = '3.28';
use parent qw(Pod::Perldoc::BaseTo);
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' }
use Pod::Text ();
sub alt { shift->_perldoc_elem('alt' , @_) }
sub indent { shift->_perldoc_elem('indent' , @_) }
sub loose { shift->_perldoc_elem('loose' , @_) }
sub quotes { shift->_perldoc_elem('quotes' , @_) }
sub sentence { shift->_perldoc_elem('sentence', @_) }
sub width { shift->_perldoc_elem('width' , @_) }
sub new { return bless {}, ref($_[0]) || $_[0] }
sub parse_from_file {
my $self = shift;
my @options =
map {; $_, $self->{$_} }
grep !m/^_/s,
keys %$self
;
defined(&Pod::Perldoc::DEBUG)
and Pod::Perldoc::DEBUG()
and print "About to call new Pod::Text ",
$Pod::Text::VERSION ? "(v$Pod::Text::VERSION) " : '',
"with options: ",
@options ? "[@options]" : "(nil)", "\n";
;
Pod::Text->new(@options)->parse_from_file(@_);
}
1;
=head1 NAME
Pod::Perldoc::ToText - let Perldoc render Pod as plaintext
=head1 SYNOPSIS
perldoc -o text Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Text as a formatter class.
It supports the following options, which are explained in
L<Pod::Text>: alt, indent, loose, quotes, sentence, width
For example:
perldoc -o text -w indent:5 Some::Modulename
=head1 CAVEAT
This module may change to use a different text formatter class in the
future, and this may change what options are supported.
=head1 SEE ALSO
L<Pod::Text>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToTk.pm 0000644 00000007640 15051135563 0007366 0 ustar 00 package Pod::Perldoc::ToTk;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = '3.28';
use parent qw(Pod::Perldoc::BaseTo);
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' } # doesn't matter
sub if_zero_length { } # because it will be 0-length!
sub new { return bless {}, ref($_[0]) || $_[0] }
# TODO: document these and their meanings...
sub tree { shift->_perldoc_elem('tree' , @_) }
sub tk_opt { shift->_perldoc_elem('tk_opt' , @_) }
sub forky { shift->_perldoc_elem('forky' , @_) }
use Pod::Perldoc ();
use File::Spec::Functions qw(catfile);
BEGIN{ # Tk is not core, but this is
eval { require Tk } ||
__PACKAGE__->die( <<"HERE" );
You must have the Tk module to use Pod::Perldoc::ToTk.
If you have it installed, ensure it's in your Perl library
path.
HERE
__PACKAGE__->die(
__PACKAGE__,
" doesn't work nice with Tk.pm version $Tk::VERSION"
) if $Tk::VERSION eq '800.003';
}
BEGIN { eval { require Tk::FcyEntry; }; };
BEGIN{ # Tk::Pod is not core, but this is
eval { require Tk::Pod } ||
__PACKAGE__->die( <<"HERE" );
You must have the Tk::Pod module to use Pod::Perldoc::ToTk.
If you have it installed, ensure it's in your Perl library
path.
HERE
}
# The following was adapted from "tkpod" in the Tk-Pod dist.
sub parse_from_file {
my($self, $Input_File) = @_;
if($self->{'forky'}) {
return if fork; # i.e., parent process returns
}
$Input_File =~ s{\\}{/}g
if $self->is_mswin32 or $self->is_dos
# and maybe OS/2
;
my($tk_opt, $tree);
$tree = $self->{'tree' };
$tk_opt = $self->{'tk_opt'};
#require Tk::ErrorDialog;
# Add 'Tk' subdirectories to search path so, e.g.,
# 'Scrolled' will find doc in 'Tk/Scrolled'
if( $tk_opt ) {
push @INC, grep -d $_, map catfile($_,'Tk'), @INC;
}
my $mw = MainWindow->new();
#eval 'use blib "/home/e/eserte/src/perl/Tk-App";require Tk::App::Debug';
$mw->withdraw;
# CDE use Font Settings if available
my $ufont = $mw->optionGet('userFont','UserFont'); # fixed width
my $sfont = $mw->optionGet('systemFont','SystemFont'); # proportional
if (defined($ufont) and defined($sfont)) {
foreach ($ufont, $sfont) { s/:$//; };
$mw->optionAdd('*Font', $sfont);
$mw->optionAdd('*Entry.Font', $ufont);
$mw->optionAdd('*Text.Font', $ufont);
}
$mw->optionAdd('*Menu.tearOff', $Tk::platform ne 'MSWin32' ? 1 : 0);
$mw->Pod(
'-file' => $Input_File,
(($Tk::Pod::VERSION >= 4) ? ('-tree' => $tree) : ())
)->focusNext;
# xxx dirty but it works. A simple $mw->destroy if $mw->children
# does not work because Tk::ErrorDialogs could be created.
# (they are withdrawn after Ok instead of destory'ed I guess)
if ($mw->children) {
$mw->repeat(1000, sub {
# ErrorDialog is withdrawn not deleted :-(
foreach ($mw->children) {
return if "$_" =~ /^Tk::Pod/ # ->isa('Tk::Pod')
}
$mw->destroy;
});
} else {
$mw->destroy;
}
#$mw->WidgetDump;
MainLoop();
exit if $self->{'forky'}; # we were the child! so exit now!
return;
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToTk - let Perldoc use Tk::Pod to render Pod
=head1 SYNOPSIS
perldoc -o tk Some::Modulename &
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Tk::Pod as a formatter class.
You have to have installed Tk::Pod first, or this class won't load.
=head1 SEE ALSO
L<Tk::Pod>, L<Pod::Perldoc>
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>;
Sean M. Burke C<< <sburke@cpan.org> >>;
significant portions copied from
F<tkpod> in the Tk::Pod dist, by Nick Ing-Simmons, Slaven Rezic, et al.
=cut
Perldoc/ToTerm.pm 0000644 00000007350 15051135563 0007715 0 ustar 00 package Pod::Perldoc::ToTerm;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = '3.28';
use parent qw(Pod::Perldoc::BaseTo);
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' }
use Pod::Text::Termcap ();
sub alt { shift->_perldoc_elem('alt' , @_) }
sub indent { shift->_perldoc_elem('indent' , @_) }
sub loose { shift->_perldoc_elem('loose' , @_) }
sub quotes { shift->_perldoc_elem('quotes' , @_) }
sub sentence { shift->_perldoc_elem('sentence', @_) }
sub width {
my $self = shift;
$self->_perldoc_elem('width' , @_) ||
$self->_get_columns_from_manwidth ||
$self->_get_columns_from_stty ||
$self->_get_default_width;
}
sub pager_configuration {
my($self, $pager, $perldoc) = @_;
# do not modify anything on Windows or DOS
return if ( $perldoc->is_mswin32 || $perldoc->is_dos );
if ( $pager =~ /less/ ) {
$self->_maybe_modify_environment('LESS');
}
elsif ( $pager =~ /more/ ) {
$self->_maybe_modify_environment('MORE');
}
return;
}
sub _maybe_modify_environment {
my($self, $name) = @_;
if ( ! defined $ENV{$name} ) {
$ENV{$name} = "-R";
}
# if the environment is set, don't modify
# anything
}
sub _get_stty { `stty -a` }
sub _get_columns_from_stty {
my $output = $_[0]->_get_stty;
if( $output =~ /\bcolumns\s+(\d+)/ ) { return $1; }
elsif( $output =~ /;\s*(\d+)\s+columns;/ ) { return $1; }
else { return 0 }
}
sub _get_columns_from_manwidth {
my( $self ) = @_;
return 0 unless defined $ENV{MANWIDTH};
unless( $ENV{MANWIDTH} =~ m/\A\d+\z/ ) {
$self->warn( "Ignoring non-numeric MANWIDTH ($ENV{MANWIDTH})\n" );
return 0;
}
if( $ENV{MANWIDTH} == 0 ) {
$self->warn( "Ignoring MANWIDTH of 0. Really? Why even run the program? :)\n" );
return 0;
}
if( $ENV{MANWIDTH} =~ m/\A(\d+)\z/ ) { return $1 }
return 0;
}
sub _get_default_width {
76
}
sub new { return bless {}, ref($_[0]) || $_[0] }
sub parse_from_file {
my $self = shift;
$self->{width} = $self->width();
my @options =
map {; $_, $self->{$_} }
grep !m/^_/s,
keys %$self
;
defined(&Pod::Perldoc::DEBUG)
and Pod::Perldoc::DEBUG()
and print "About to call new Pod::Text::Termcap ",
$Pod::Text::VERSION ? "(v$Pod::Text::Termcap::VERSION) " : '',
"with options: ",
@options ? "[@options]" : "(nil)", "\n";
;
Pod::Text::Termcap->new(@options)->parse_from_file(@_);
}
1;
=head1 NAME
Pod::Perldoc::ToTerm - render Pod with terminal escapes
=head1 SYNOPSIS
perldoc -o term Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Text as a formatter class.
It supports the following options, which are explained in
L<Pod::Text>: alt, indent, loose, quotes, sentence, width
For example:
perldoc -o term -w indent:5 Some::Modulename
=head1 PAGER FORMATTING
Depending on the platform, and because this class emits terminal escapes it
will attempt to set the C<-R> flag on your pager by injecting the flag into
your environment variable for C<less> or C<more>.
On Windows and DOS, this class will not modify any environment variables.
=head1 CAVEAT
This module may change to use a different text formatter class in the
future, and this may change what options are supported.
=head1 SEE ALSO
L<Pod::Text>, L<Pod::Text::Termcap>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2017 Mark Allen.
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
=head1 AUTHOR
Mark Allen C<< <mallen@cpan.org> >>
=cut
Perldoc/ToMan.pm 0000644 00000033650 15051135563 0007523 0 ustar 00 require 5.006;
package Pod::Perldoc::ToMan;
use strict;
use warnings;
use parent qw(Pod::Perldoc::BaseTo);
use vars qw($VERSION);
$VERSION = '3.28';
use File::Spec::Functions qw(catfile);
use Pod::Man 2.18;
# This class is unlike ToText.pm et al, because we're NOT paging thru
# the output in our particular format -- we make the output and
# then we run nroff (or whatever) on it, and then page thru the
# (plaintext) output of THAT!
sub SUCCESS () { 1 }
sub FAILED () { 0 }
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' }
sub __filter_nroff { shift->_perldoc_elem('__filter_nroff' , @_) }
sub __nroffer { shift->_perldoc_elem('__nroffer' , @_) }
sub __bindir { shift->_perldoc_elem('__bindir' , @_) }
sub __pod2man { shift->_perldoc_elem('__pod2man' , @_) }
sub __output_file { shift->_perldoc_elem('__output_file' , @_) }
sub center { shift->_perldoc_elem('center' , @_) }
sub date { shift->_perldoc_elem('date' , @_) }
sub fixed { shift->_perldoc_elem('fixed' , @_) }
sub fixedbold { shift->_perldoc_elem('fixedbold' , @_) }
sub fixeditalic { shift->_perldoc_elem('fixeditalic' , @_) }
sub fixedbolditalic { shift->_perldoc_elem('fixedbolditalic', @_) }
sub name { shift->_perldoc_elem('name' , @_) }
sub quotes { shift->_perldoc_elem('quotes' , @_) }
sub release { shift->_perldoc_elem('release' , @_) }
sub section { shift->_perldoc_elem('section' , @_) }
sub new {
my( $either ) = shift;
my $self = bless {}, ref($either) || $either;
$self->init( @_ );
return $self;
}
sub init {
my( $self, @args ) = @_;
unless( $self->__nroffer ) {
my $roffer = $self->_find_roffer( $self->_roffer_candidates );
$self->debug( "Using $roffer\n" );
$self->__nroffer( $roffer );
}
else {
$self->debug( "__nroffer is " . $self->__nroffer() . "\n" );
}
$self->_check_nroffer;
}
sub _roffer_candidates {
my( $self ) = @_;
if( $self->is_openbsd || $self->is_freebsd || $self->is_bitrig ) { qw( mandoc groff nroff ) }
else { qw( groff nroff mandoc ) }
}
sub _find_roffer {
my( $self, @candidates ) = @_;
my @found = ();
foreach my $candidate ( @candidates ) {
push @found, $self->_find_executable_in_path( $candidate );
}
return wantarray ? @found : $found[0];
}
sub _check_nroffer {
return 1;
# where is it in the PATH?
# is it executable?
# what is its real name?
# what is its version?
# does it support the flags we need?
# is it good enough for us?
}
sub _get_stty { `stty -a` }
sub _get_columns_from_stty {
my $output = $_[0]->_get_stty;
if( $output =~ /\bcolumns\s+(\d+)/ ) { return $1 }
elsif( $output =~ /;\s*(\d+)\s+columns;/ ) { return $1 }
else { return 0 }
}
sub _get_columns_from_manwidth {
my( $self ) = @_;
return 0 unless defined $ENV{MANWIDTH};
unless( $ENV{MANWIDTH} =~ m/\A\d+\z/ ) {
$self->warn( "Ignoring non-numeric MANWIDTH ($ENV{MANWIDTH})\n" );
return 0;
}
if( $ENV{MANWIDTH} == 0 ) {
$self->warn( "Ignoring MANWIDTH of 0. Really? Why even run the program? :)\n" );
return 0;
}
if( $ENV{MANWIDTH} =~ m/\A(\d+)\z/ ) { return $1 }
return 0;
}
sub _get_default_width {
73
}
sub _get_columns {
$_[0]->_get_columns_from_manwidth ||
$_[0]->_get_columns_from_stty ||
$_[0]->_get_default_width;
}
sub _get_podman_switches {
my( $self ) = @_;
my @switches = map { $_, $self->{$_} } grep !m/^_/s, keys %$self;
# There needs to be a cleaner way to handle setting
# the UTF-8 flag, but for now, comment out this
# line because it often does the wrong thing.
#
# See RT #77465
#
#push @switches, 'utf8' => 1;
$self->debug( "Pod::Man switches are [@switches]\n" );
return @switches;
}
sub _parse_with_pod_man {
my( $self, $file ) = @_;
#->output_fh and ->output_string from Pod::Simple aren't
# working, apparently, so there's this ugly hack:
local *STDOUT;
open STDOUT, '>', $self->{_text_ref};
my $parser = Pod::Man->new( $self->_get_podman_switches );
$self->debug( "Parsing $file\n" );
$parser->parse_from_file( $file );
$self->debug( "Done parsing $file\n" );
close STDOUT;
$self->die( "No output from Pod::Man!\n" )
unless length $self->{_text_ref};
$self->_save_pod_man_output if $self->debugging;
return SUCCESS;
}
sub _save_pod_man_output {
my( $self, $fh ) = @_;
$fh = do {
my $file = "podman.out.$$.txt";
$self->debug( "Writing $file with Pod::Man output\n" );
open my $fh2, '>', $file;
$fh2;
} unless $fh;
print { $fh } ${ $self->{_text_ref} };
}
sub _have_groff_with_utf8 {
my( $self ) = @_;
return 0 unless $self->_is_groff;
my $roffer = $self->__nroffer;
my $minimum_groff_version = '1.20.1';
my $version_string = `$roffer -v`;
my( $version ) = $version_string =~ /\(?groff\)? version (\d+\.\d+(?:\.\d+)?)/;
$self->debug( "Found groff $version\n" );
# is a string comparison good enough?
if( $version lt $minimum_groff_version ) {
$self->warn(
"You have an old groff." .
" Update to version $minimum_groff_version for good Unicode support.\n" .
"If you don't upgrade, wide characters may come out oddly.\n"
);
}
$version ge $minimum_groff_version;
}
sub _have_mandoc_with_utf8 {
my( $self ) = @_;
$self->_is_mandoc and not system 'mandoc -Tlocale -V > /dev/null 2>&1';
}
sub _collect_nroff_switches {
my( $self ) = shift;
my @render_switches = ('-man', $self->_get_device_switches);
# Thanks to Brendan O'Dea for contributing the following block
if( $self->_is_roff and -t STDOUT and my ($cols) = $self->_get_columns ) {
my $c = $cols * 39 / 40;
$cols = $c > $cols - 2 ? $c : $cols -2;
push @render_switches, '-rLL=' . (int $c) . 'n' if $cols > 80;
}
# I hear persistent reports that adding a -c switch to $render
# solves many people's problems. But I also hear that some mans
# don't have a -c switch, so that unconditionally adding it here
# would presumably be a Bad Thing -- sburke@cpan.org
push @render_switches, '-c' if( $self->_is_roff and $self->is_cygwin );
return @render_switches;
}
sub _get_device_switches {
my( $self ) = @_;
if( $self->_is_nroff ) { qw() }
elsif( $self->_have_groff_with_utf8 ) { qw(-Kutf8 -Tutf8) }
elsif( $self->_is_ebcdic ) { qw(-Tcp1047) }
elsif( $self->_have_mandoc_with_utf8 ) { qw(-Tlocale) }
elsif( $self->_is_mandoc ) { qw() }
else { qw(-Tlatin1) }
}
sub _is_roff {
my( $self ) = @_;
$self->_is_nroff or $self->_is_groff;
}
sub _is_nroff {
my( $self ) = @_;
$self->__nroffer =~ /\bnroff\b/;
}
sub _is_groff {
my( $self ) = @_;
$self->__nroffer =~ /\bgroff\b/;
}
sub _is_mandoc {
my ( $self ) = @_;
$self->__nroffer =~ /\bmandoc\b/;
}
sub _is_ebcdic {
my( $self ) = @_;
return 0;
}
sub _filter_through_nroff {
my( $self ) = shift;
$self->debug( "Filtering through " . $self->__nroffer() . "\n" );
# Maybe someone set rendering switches as part of the opt_n value
# Deal with that here.
my ($render, $switches) = $self->__nroffer() =~ /\A([\/a-zA-Z0-9_\.-]+)\b(.+)?\z/;
$self->die("no nroffer!?") unless $render;
my @render_switches = $self->_collect_nroff_switches;
if ( $switches ) {
# Eliminate whitespace
$switches =~ s/\s//g;
# Then separate the switches with a zero-width positive
# lookahead on the dash.
#
# See:
# http://www.effectiveperlprogramming.com/blog/1411
# for a good discussion of this technique
push @render_switches, split(/(?=-)/, $switches);
}
$self->debug( "render is $render\n" );
$self->debug( "render options are @render_switches\n" );
require Symbol;
require IPC::Open3;
require IO::Handle;
my $pid = IPC::Open3::open3(
my $writer,
my $reader,
my $err = Symbol::gensym(),
$render,
@render_switches
);
$reader->autoflush(1);
use IO::Select;
my $selector = IO::Select->new( $reader );
$self->debug( "Writing to pipe to $render\n" );
my $offset = 0;
my $chunk_size = 4096;
my $length = length( ${ $self->{_text_ref} } );
my $chunks = $length / $chunk_size;
my $done;
my $buffer;
while( $offset <= $length ) {
$self->debug( "Writing chunk $chunks\n" ); $chunks++;
syswrite $writer, ${ $self->{_text_ref} }, $chunk_size, $offset
or $self->die( $! );
$offset += $chunk_size;
$self->debug( "Checking read\n" );
READ: {
last READ unless $selector->can_read( 0.01 );
$self->debug( "Reading\n" );
my $bytes = sysread $reader, $buffer, 4096;
$self->debug( "Read $bytes bytes\n" );
$done .= $buffer;
$self->debug( sprintf "Output is %d bytes\n",
length $done
);
next READ;
}
}
close $writer;
$self->debug( "Done writing\n" );
# read any leftovers
$done .= do { local $/; <$reader> };
$self->debug( sprintf "Done reading. Output is %d bytes\n",
length $done
);
if( $? ) {
$self->warn( "Error from pipe to $render!\n" );
$self->debug( 'Error: ' . do { local $/; <$err> } );
}
close $reader;
if( my $err = $? ) {
$self->debug(
"Nonzero exit ($?) while running `$render @render_switches`.\n" .
"Falling back to Pod::Perldoc::ToPod\n"
);
return $self->_fallback_to_pod( @_ );
}
$self->debug( "Output:\n----\n$done\n----\n" );
${ $self->{_text_ref} } = $done;
return length ${ $self->{_text_ref} } ? SUCCESS : FAILED;
}
sub parse_from_file {
my( $self, $file, $outfh) = @_;
# We have a pipeline of filters each affecting the reference
# in $self->{_text_ref}
$self->{_text_ref} = \my $output;
$self->_parse_with_pod_man( $file );
# so far, nroff is an external command so we ensure it worked
my $result = $self->_filter_through_nroff;
return $self->_fallback_to_pod( @_ ) unless $result == SUCCESS;
$self->_post_nroff_processing;
print { $outfh } $output or
$self->die( "Can't print to $$self{__output_file}: $!" );
return;
}
sub _fallback_to_pod {
my( $self, @args ) = @_;
$self->warn( "Falling back to Pod because there was a problem!\n" );
require Pod::Perldoc::ToPod;
return Pod::Perldoc::ToPod->new->parse_from_file(@_);
}
# maybe there's a user setting we should check?
sub _get_tab_width { 4 }
sub _expand_tabs {
my( $self ) = @_;
my $tab_width = ' ' x $self->_get_tab_width;
${ $self->{_text_ref} } =~ s/\t/$tab_width/g;
}
sub _post_nroff_processing {
my( $self ) = @_;
if( $self->is_hpux ) {
$self->debug( "On HP-UX, I'm going to expand tabs for you\n" );
# this used to be a pipe to `col -x` for HP-UX
$self->_expand_tabs;
}
if( $self->{'__filter_nroff'} ) {
$self->debug( "filter_nroff is set, so filtering\n" );
$self->_remove_nroff_header;
$self->_remove_nroff_footer;
}
else {
$self->debug( "filter_nroff is not set, so not filtering\n" );
}
$self->_handle_unicode;
return 1;
}
# I don't think this does anything since there aren't two consecutive
# newlines in the Pod::Man output
sub _remove_nroff_header {
my( $self ) = @_;
$self->debug( "_remove_nroff_header is still a stub!\n" );
return 1;
# my @data = split /\n{2,}/, shift;
# shift @data while @data and $data[0] !~ /\S/; # Go to header
# shift @data if @data and $data[0] =~ /Contributed\s+Perl/; # Skip header
}
# I don't think this does anything since there aren't two consecutive
# newlines in the Pod::Man output
sub _remove_nroff_footer {
my( $self ) = @_;
$self->debug( "_remove_nroff_footer is still a stub!\n" );
return 1;
${ $self->{_text_ref} } =~ s/\n\n+.*\w.*\Z//m;
# my @data = split /\n{2,}/, shift;
# pop @data if @data and $data[-1] =~ /^\w/; # Skip footer, like
# 28/Jan/99 perl 5.005, patch 53 1
}
sub _unicode_already_handled {
my( $self ) = @_;
$self->_have_groff_with_utf8 ||
1 # so, we don't have a case that needs _handle_unicode
;
}
sub _handle_unicode {
# this is the job of preconv
# we don't need this with groff 1.20 and later.
my( $self ) = @_;
return 1 if $self->_unicode_already_handled;
require Encode;
# it's UTF-8 here, but we need character data
my $text = Encode::decode( 'UTF-8', ${ $self->{_text_ref} } ) ;
# http://www.mail-archive.com/groff@gnu.org/msg01378.html
# http://linux.die.net/man/7/groff_char
# http://www.gnu.org/software/groff/manual/html_node/Using-Symbols.html
# http://lists.gnu.org/archive/html/groff/2011-05/msg00007.html
# http://www.simplicidade.org/notes/archives/2009/05/fixing_the_pod.html
# http://lists.freebsd.org/pipermail/freebsd-questions/2011-July/232239.html
$text =~ s/(\P{ASCII})/
sprintf '\\[u%04X]', ord $1
/eg;
# should we encode?
${ $self->{_text_ref} } = $text;
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToMan - let Perldoc render Pod as man pages
=head1 SYNOPSIS
perldoc -o man Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Man and C<groff> for reading Pod pages.
The following options are supported: center, date, fixed, fixedbold,
fixeditalic, fixedbolditalic, quotes, release, section
(Those options are explained in L<Pod::Man>.)
For example:
perldoc -o man -w center:Pod Some::Modulename
=head1 CAVEAT
This module may change to use a different pod-to-nroff formatter class
in the future, and this may change what options are supported.
=head1 SEE ALSO
L<Pod::Man>, L<Pod::Perldoc>, L<Pod::Perldoc::ToNroff>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2011 brian d foy. All rights reserved.
Copyright (c) 2002,3,4 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToANSI.pm 0000644 00000004377 15051135563 0007546 0 ustar 00 package Pod::Perldoc::ToANSI;
use strict;
use warnings;
use parent qw(Pod::Perldoc::BaseTo);
use vars qw($VERSION);
$VERSION = '3.28';
sub is_pageable { 1 }
sub write_with_binmode { 0 }
sub output_extension { 'txt' }
use Pod::Text::Color ();
sub alt { shift->_perldoc_elem('alt' , @_) }
sub indent { shift->_perldoc_elem('indent' , @_) }
sub loose { shift->_perldoc_elem('loose' , @_) }
sub quotes { shift->_perldoc_elem('quotes' , @_) }
sub sentence { shift->_perldoc_elem('sentence', @_) }
sub width { shift->_perldoc_elem('width' , @_) }
sub new { return bless {}, ref($_[0]) || $_[0] }
sub parse_from_file {
my $self = shift;
my @options =
map {; $_, $self->{$_} }
grep !m/^_/s,
keys %$self
;
defined(&Pod::Perldoc::DEBUG)
and Pod::Perldoc::DEBUG()
and print "About to call new Pod::Text::Color ",
$Pod::Text::VERSION ? "(v$Pod::Text::VERSION) " : '',
"with options: ",
@options ? "[@options]" : "(nil)", "\n";
;
Pod::Text::Color->new(@options)->parse_from_file(@_);
}
1;
=head1 NAME
Pod::Perldoc::ToANSI - render Pod with ANSI color escapes
=head1 SYNOPSIS
perldoc -o ansi Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Text as a formatter class.
It supports the following options, which are explained in
L<Pod::Text>: alt, indent, loose, quotes, sentence, width
For example:
perldoc -o term -w indent:5 Some::Modulename
=head1 CAVEAT
This module may change to use a different text formatter class in the
future, and this may change what options are supported.
=head1 SEE ALSO
L<Pod::Text>, L<Pod::Text::Color>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2011 Mark Allen. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToRtf.pm 0000644 00000004145 15051135563 0007540 0 ustar 00 package Pod::Perldoc::ToRtf;
use strict;
use warnings;
use parent qw( Pod::Simple::RTF );
use vars qw($VERSION);
$VERSION = '3.28';
sub is_pageable { 0 }
sub write_with_binmode { 0 }
sub output_extension { 'rtf' }
sub page_for_perldoc {
my($self, $tempfile, $perldoc) = @_;
return unless $perldoc->IS_MSWin32;
my $rtf_pager = $ENV{'RTFREADER'} || 'write.exe';
$perldoc->aside( "About to launch <\"$rtf_pager\" \"$tempfile\">\n" );
return 1 if system( qq{"$rtf_pager"}, qq{"$tempfile"} ) == 0;
return 0;
}
1;
__END__
=head1 NAME
Pod::Perldoc::ToRtf - let Perldoc render Pod as RTF
=head1 SYNOPSIS
perldoc -o rtf Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Simple::RTF as a formatter class.
This is actually a Pod::Simple::RTF subclass, and inherits
all its options.
You have to have Pod::Simple::RTF installed (from the Pod::Simple dist),
or this module won't work.
If Perldoc is running under MSWin and uses this class as a formatter,
the output will be opened with F<write.exe> or whatever program is
specified in the environment variable C<RTFREADER>. For example, to
specify that RTF files should be opened the same as they are when you
double-click them, you would do C<set RTFREADER=start.exe> in your
F<autoexec.bat>.
Handy tip: put C<set PERLDOC=-ortf> in your F<autoexec.bat>
and that will set this class as the default formatter to run when
you do C<perldoc whatever>.
=head1 SEE ALSO
L<Pod::Simple::RTF>, L<Pod::Simple>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Perldoc/ToXml.pm 0000644 00000002577 15051135563 0007554 0 ustar 00 package Pod::Perldoc::ToXml;
use strict;
use warnings;
use vars qw($VERSION);
use parent qw( Pod::Simple::XMLOutStream );
use vars qw($VERSION);
$VERSION = '3.28';
sub is_pageable { 0 }
sub write_with_binmode { 0 }
sub output_extension { 'xml' }
1;
__END__
=head1 NAME
Pod::Perldoc::ToXml - let Perldoc render Pod as XML
=head1 SYNOPSIS
perldoc -o xml -d out.xml Some::Modulename
=head1 DESCRIPTION
This is a "plug-in" class that allows Perldoc to use
Pod::Simple::XMLOutStream as a formatter class.
This is actually a Pod::Simple::XMLOutStream subclass, and inherits
all its options.
You have to have installed Pod::Simple::XMLOutStream (from the Pod::Simple
dist), or this class won't work.
=head1 SEE ALSO
L<Pod::Simple::XMLOutStream>, L<Pod::Simple>, L<Pod::Perldoc>
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Current maintainer: Mark Allen C<< <mallen@cpan.org> >>
Past contributions from:
brian d foy C<< <bdfoy@cpan.org> >>
Adriano R. Ferreira C<< <ferreira@cpan.org> >>,
Sean M. Burke C<< <sburke@cpan.org> >>
=cut
Text.pm 0000644 00000107145 15051135563 0006042 0 ustar 00 # Convert POD data to formatted text.
#
# This module converts POD to formatted text. It replaces the old Pod::Text
# module that came with versions of Perl prior to 5.6.0 and attempts to match
# its output except for some specific circumstances where other decisions
# seemed to produce better output. It uses Pod::Parser and is designed to be
# very easy to subclass.
#
# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
##############################################################################
# Modules and declarations
##############################################################################
package Pod::Text;
use 5.006;
use strict;
use warnings;
use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
use Carp qw(carp croak);
use Encode qw(encode);
use Exporter ();
use Pod::Simple ();
@ISA = qw(Pod::Simple Exporter);
# We have to export pod2text for backward compatibility.
@EXPORT = qw(pod2text);
$VERSION = '4.11';
# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code
# taken from Pod::Simple 3.32, but was only added in 3.30.
my ($NBSP, $SHY);
if ($Pod::Simple::VERSION ge 3.30) {
$NBSP = $Pod::Simple::nbsp;
$SHY = $Pod::Simple::shy;
} else {
if ($] ge 5.007_003) {
$NBSP = chr utf8::unicode_to_native(0xA0);
$SHY = chr utf8::unicode_to_native(0xAD);
} elsif (Pod::Simple::ASCII) {
$NBSP = "\xA0";
$SHY = "\xAD";
} else {
$NBSP = "\x41";
$SHY = "\xCA";
}
}
##############################################################################
# Initialization
##############################################################################
# This function handles code blocks. It's registered as a callback to
# Pod::Simple and therefore doesn't work as a regular method call, but all it
# does is call output_code with the line.
sub handle_code {
my ($line, $number, $parser) = @_;
$parser->output_code ($line . "\n");
}
# Initialize the object and set various Pod::Simple options that we need.
# Here, we also process any additional options passed to the constructor or
# set up defaults if none were given. Note that all internal object keys are
# in all-caps, reserving all lower-case object keys for Pod::Simple and user
# arguments.
sub new {
my $class = shift;
my $self = $class->SUPER::new;
# Tell Pod::Simple to handle S<> by automatically inserting .
$self->nbsp_for_S (1);
# Tell Pod::Simple to keep whitespace whenever possible.
if ($self->can ('preserve_whitespace')) {
$self->preserve_whitespace (1);
} else {
$self->fullstop_space_harden (1);
}
# The =for and =begin targets that we accept.
$self->accept_targets (qw/text TEXT/);
# Ensure that contiguous blocks of code are merged together. Otherwise,
# some of the guesswork heuristics don't work right.
$self->merge_text (1);
# Pod::Simple doesn't do anything useful with our arguments, but we want
# to put them in our object as hash keys and values. This could cause
# problems if we ever clash with Pod::Simple's own internal class
# variables.
my %opts = @_;
my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
%$self = (%$self, @opts);
# Send errors to stderr if requested.
if ($$self{opt_stderr} and not $$self{opt_errors}) {
$$self{opt_errors} = 'stderr';
}
delete $$self{opt_stderr};
# Validate the errors parameter and act on it.
if (not defined $$self{opt_errors}) {
$$self{opt_errors} = 'pod';
}
if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') {
$self->no_errata_section (1);
$self->complain_stderr (1);
if ($$self{opt_errors} eq 'die') {
$$self{complain_die} = 1;
}
} elsif ($$self{opt_errors} eq 'pod') {
$self->no_errata_section (0);
$self->complain_stderr (0);
} elsif ($$self{opt_errors} eq 'none') {
$self->no_errata_section (1);
$self->no_whining (1);
} else {
croak (qq(Invalid errors setting: "$$self{errors}"));
}
delete $$self{errors};
# Initialize various things from our parameters.
$$self{opt_alt} = 0 unless defined $$self{opt_alt};
$$self{opt_indent} = 4 unless defined $$self{opt_indent};
$$self{opt_margin} = 0 unless defined $$self{opt_margin};
$$self{opt_loose} = 0 unless defined $$self{opt_loose};
$$self{opt_sentence} = 0 unless defined $$self{opt_sentence};
$$self{opt_width} = 76 unless defined $$self{opt_width};
# Figure out what quotes we'll be using for C<> text.
$$self{opt_quotes} ||= '"';
if ($$self{opt_quotes} eq 'none') {
$$self{LQUOTE} = $$self{RQUOTE} = '';
} elsif (length ($$self{opt_quotes}) == 1) {
$$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
} elsif (length ($$self{opt_quotes}) % 2 == 0) {
my $length = length ($$self{opt_quotes}) / 2;
$$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length);
$$self{RQUOTE} = substr ($$self{opt_quotes}, $length);
} else {
croak qq(Invalid quote specification "$$self{opt_quotes}");
}
# If requested, do something with the non-POD text.
$self->code_handler (\&handle_code) if $$self{opt_code};
# Return the created object.
return $self;
}
##############################################################################
# Core parsing
##############################################################################
# This is the glue that connects the code below with Pod::Simple itself. The
# goal is to convert the event stream coming from the POD parser into method
# calls to handlers once the complete content of a tag has been seen. Each
# paragraph or POD command will have textual content associated with it, and
# as soon as all of a paragraph or POD command has been seen, that content
# will be passed in to the corresponding method for handling that type of
# object. The exceptions are handlers for lists, which have opening tag
# handlers and closing tag handlers that will be called right away.
#
# The internal hash key PENDING is used to store the contents of a tag until
# all of it has been seen. It holds a stack of open tags, each one
# represented by a tuple of the attributes hash for the tag and the contents
# of the tag.
# Add a block of text to the contents of the current node, formatting it
# according to the current formatting instructions as we do.
sub _handle_text {
my ($self, $text) = @_;
my $tag = $$self{PENDING}[-1];
$$tag[1] .= $text;
}
# Given an element name, get the corresponding method name.
sub method_for_element {
my ($self, $element) = @_;
$element =~ tr/-/_/;
$element =~ tr/A-Z/a-z/;
$element =~ tr/_a-z0-9//cd;
return $element;
}
# Handle the start of a new element. If cmd_element is defined, assume that
# we need to collect the entire tree for this element before passing it to the
# element method, and create a new tree into which we'll collect blocks of
# text and nested elements. Otherwise, if start_element is defined, call it.
sub _handle_element_start {
my ($self, $element, $attrs) = @_;
my $method = $self->method_for_element ($element);
# If we have a command handler, we need to accumulate the contents of the
# tag before calling it.
if ($self->can ("cmd_$method")) {
push (@{ $$self{PENDING} }, [ $attrs, '' ]);
} elsif ($self->can ("start_$method")) {
my $method = 'start_' . $method;
$self->$method ($attrs, '');
}
}
# Handle the end of an element. If we had a cmd_ method for this element,
# this is where we pass along the text that we've accumulated. Otherwise, if
# we have an end_ method for the element, call that.
sub _handle_element_end {
my ($self, $element) = @_;
my $method = $self->method_for_element ($element);
# If we have a command handler, pull off the pending text and pass it to
# the handler along with the saved attribute hash.
if ($self->can ("cmd_$method")) {
my $tag = pop @{ $$self{PENDING} };
my $method = 'cmd_' . $method;
my $text = $self->$method (@$tag);
if (defined $text) {
if (@{ $$self{PENDING} } > 1) {
$$self{PENDING}[-1][1] .= $text;
} else {
$self->output ($text);
}
}
} elsif ($self->can ("end_$method")) {
my $method = 'end_' . $method;
$self->$method ();
}
}
##############################################################################
# Output formatting
##############################################################################
# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
# because it plays games with tabs. We can't use formline, even though we'd
# really like to, because it screws up non-printing characters. So we have to
# do the wrapping ourselves.
sub wrap {
my $self = shift;
local $_ = shift;
my $output = '';
my $spaces = ' ' x $$self{MARGIN};
my $width = $$self{opt_width} - $$self{MARGIN};
while (length > $width) {
if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
$output .= $spaces . $1 . "\n";
} else {
last;
}
}
$output .= $spaces . $_;
$output =~ s/\s+$/\n\n/;
return $output;
}
# Reformat a paragraph of text for the current margin. Takes the text to
# reformat and returns the formatted text.
sub reformat {
my $self = shift;
local $_ = shift;
# If we're trying to preserve two spaces after sentences, do some munging
# to support that. Otherwise, smash all repeated whitespace.
if ($$self{opt_sentence}) {
s/ +$//mg;
s/\.\n/. \n/g;
s/\n/ /g;
s/ +/ /g;
} else {
s/\s+/ /g;
}
return $self->wrap ($_);
}
# Output text to the output device. Replace non-breaking spaces with spaces
# and soft hyphens with nothing, and then try to fix the output encoding if
# necessary to match the input encoding unless UTF-8 output is forced. This
# preserves the traditional pass-through behavior of Pod::Text.
sub output {
my ($self, @text) = @_;
my $text = join ('', @text);
if ($NBSP) {
$text =~ s/$NBSP/ /g;
}
if ($SHY) {
$text =~ s/$SHY//g;
}
unless ($$self{opt_utf8}) {
my $encoding = $$self{encoding} || '';
if ($encoding && $encoding ne $$self{ENCODING}) {
$$self{ENCODING} = $encoding;
eval { binmode ($$self{output_fh}, ":encoding($encoding)") };
}
}
if ($$self{ENCODE}) {
print { $$self{output_fh} } encode ('UTF-8', $text);
} else {
print { $$self{output_fh} } $text;
}
}
# Output a block of code (something that isn't part of the POD text). Called
# by preprocess_paragraph only if we were given the code option. Exists here
# only so that it can be overridden by subclasses.
sub output_code { $_[0]->output ($_[1]) }
##############################################################################
# Document initialization
##############################################################################
# Set up various things that have to be initialized on a per-document basis.
sub start_document {
my ($self, $attrs) = @_;
if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
$$self{CONTENTLESS} = 1;
} else {
delete $$self{CONTENTLESS};
}
my $margin = $$self{opt_indent} + $$self{opt_margin};
# Initialize a few per-document variables.
$$self{INDENTS} = []; # Stack of indentations.
$$self{MARGIN} = $margin; # Default left margin.
$$self{PENDING} = [[]]; # Pending output.
# We have to redo encoding handling for each document.
$$self{ENCODING} = '';
# When UTF-8 output is set, check whether our output file handle already
# has a PerlIO encoding layer set. If it does not, we'll need to encode
# our output before printing it (handled in the output() sub). Wrap the
# check in an eval to handle versions of Perl without PerlIO.
$$self{ENCODE} = 0;
if ($$self{opt_utf8}) {
$$self{ENCODE} = 1;
eval {
my @options = (output => 1, details => 1);
my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1];
if ($flag & PerlIO::F_UTF8 ()) {
$$self{ENCODE} = 0;
$$self{ENCODING} = 'UTF-8';
}
};
}
return '';
}
# Handle the end of the document. The only thing we do is handle dying on POD
# errors, since Pod::Parser currently doesn't.
sub end_document {
my ($self) = @_;
if ($$self{complain_die} && $self->errors_seen) {
croak ("POD document had syntax errors");
}
}
##############################################################################
# Text blocks
##############################################################################
# Intended for subclasses to override, this method returns text with any
# non-printing formatting codes stripped out so that length() correctly
# returns the length of the text. For basic Pod::Text, it does nothing.
sub strip_format {
my ($self, $string) = @_;
return $string;
}
# This method is called whenever an =item command is complete (in other words,
# we've seen its associated paragraph or know for certain that it doesn't have
# one). It gets the paragraph associated with the item as an argument. If
# that argument is empty, just output the item tag; if it contains a newline,
# output the item tag followed by the newline. Otherwise, see if there's
# enough room for us to output the item tag in the margin of the text or if we
# have to put it on a separate line.
sub item {
my ($self, $text) = @_;
my $tag = $$self{ITEM};
unless (defined $tag) {
carp "Item called without tag";
return;
}
undef $$self{ITEM};
# Calculate the indentation and margin. $fits is set to true if the tag
# will fit into the margin of the paragraph given our indentation level.
my $indent = $$self{INDENTS}[-1];
$indent = $$self{opt_indent} unless defined $indent;
my $margin = ' ' x $$self{opt_margin};
my $tag_length = length ($self->strip_format ($tag));
my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1);
# If the tag doesn't fit, or if we have no associated text, print out the
# tag separately. Otherwise, put the tag in the margin of the paragraph.
if (!$text || $text =~ /^\s+$/ || !$fits) {
my $realindent = $$self{MARGIN};
$$self{MARGIN} = $indent;
my $output = $self->reformat ($tag);
$output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
$output =~ s/\n*$/\n/;
# If the text is just whitespace, we have an empty item paragraph;
# this can result from =over/=item/=back without any intermixed
# paragraphs. Insert some whitespace to keep the =item from merging
# into the next paragraph.
$output .= "\n" if $text && $text =~ /^\s*$/;
$self->output ($output);
$$self{MARGIN} = $realindent;
$self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
} else {
my $space = ' ' x $indent;
$space =~ s/^$margin /$margin:/ if $$self{opt_alt};
$text = $self->reformat ($text);
$text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
my $tagspace = ' ' x $tag_length;
$text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
$self->output ($text);
}
}
# Handle a basic block of text. The only tricky thing here is that if there
# is a pending item tag, we need to format this as an item paragraph.
sub cmd_para {
my ($self, $attrs, $text) = @_;
$text =~ s/\s+$/\n/;
if (defined $$self{ITEM}) {
$self->item ($text . "\n");
} else {
$self->output ($self->reformat ($text . "\n"));
}
return '';
}
# Handle a verbatim paragraph. Just print it out, but indent it according to
# our margin.
sub cmd_verbatim {
my ($self, $attrs, $text) = @_;
$self->item if defined $$self{ITEM};
return if $text =~ /^\s*$/;
$text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
$text =~ s/\s*$/\n\n/;
$self->output ($text);
return '';
}
# Handle literal text (produced by =for and similar constructs). Just output
# it with the minimum of changes.
sub cmd_data {
my ($self, $attrs, $text) = @_;
$text =~ s/^\n+//;
$text =~ s/\n{0,2}$/\n/;
$self->output ($text);
return '';
}
##############################################################################
# Headings
##############################################################################
# The common code for handling all headers. Takes the header text, the
# indentation, and the surrounding marker for the alt formatting method.
sub heading {
my ($self, $text, $indent, $marker) = @_;
$self->item ("\n\n") if defined $$self{ITEM};
$text =~ s/\s+$//;
if ($$self{opt_alt}) {
my $closemark = reverse (split (//, $marker));
my $margin = ' ' x $$self{opt_margin};
$self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
} else {
$text .= "\n" if $$self{opt_loose};
my $margin = ' ' x ($$self{opt_margin} + $indent);
$self->output ($margin . $text . "\n");
}
return '';
}
# First level heading.
sub cmd_head1 {
my ($self, $attrs, $text) = @_;
$self->heading ($text, 0, '====');
}
# Second level heading.
sub cmd_head2 {
my ($self, $attrs, $text) = @_;
$self->heading ($text, $$self{opt_indent} / 2, '== ');
}
# Third level heading.
sub cmd_head3 {
my ($self, $attrs, $text) = @_;
$self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= ');
}
# Fourth level heading.
sub cmd_head4 {
my ($self, $attrs, $text) = @_;
$self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- ');
}
##############################################################################
# List handling
##############################################################################
# Handle the beginning of an =over block. Takes the type of the block as the
# first argument, and then the attr hash. This is called by the handlers for
# the four different types of lists (bullet, number, text, and block).
sub over_common_start {
my ($self, $attrs) = @_;
$self->item ("\n\n") if defined $$self{ITEM};
# Find the indentation level.
my $indent = $$attrs{indent};
unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
$indent = $$self{opt_indent};
}
# Add this to our stack of indents and increase our current margin.
push (@{ $$self{INDENTS} }, $$self{MARGIN});
$$self{MARGIN} += ($indent + 0);
return '';
}
# End an =over block. Takes no options other than the class pointer. Output
# any pending items and then pop one level of indentation.
sub over_common_end {
my ($self) = @_;
$self->item ("\n\n") if defined $$self{ITEM};
$$self{MARGIN} = pop @{ $$self{INDENTS} };
return '';
}
# Dispatch the start and end calls as appropriate.
sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
sub start_over_number { $_[0]->over_common_start ($_[1]) }
sub start_over_text { $_[0]->over_common_start ($_[1]) }
sub start_over_block { $_[0]->over_common_start ($_[1]) }
sub end_over_bullet { $_[0]->over_common_end }
sub end_over_number { $_[0]->over_common_end }
sub end_over_text { $_[0]->over_common_end }
sub end_over_block { $_[0]->over_common_end }
# The common handler for all item commands. Takes the type of the item, the
# attributes, and then the text of the item.
sub item_common {
my ($self, $type, $attrs, $text) = @_;
$self->item if defined $$self{ITEM};
# Clean up the text. We want to end up with two variables, one ($text)
# which contains any body text after taking out the item portion, and
# another ($item) which contains the actual item text. Note the use of
# the internal Pod::Simple attribute here; that's a potential land mine.
$text =~ s/\s+$//;
my ($item, $index);
if ($type eq 'bullet') {
$item = '*';
} elsif ($type eq 'number') {
$item = $$attrs{'~orig_content'};
} else {
$item = $text;
$item =~ s/\s*\n\s*/ /g;
$text = '';
}
$$self{ITEM} = $item;
# If body text for this item was included, go ahead and output that now.
if ($text) {
$text =~ s/\s*$/\n/;
$self->item ($text);
}
return '';
}
# Dispatch the item commands to the appropriate place.
sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) }
sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) }
##############################################################################
# Formatting codes
##############################################################################
# The simple ones.
sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
sub cmd_i { return '*' . $_[2] . '*' }
sub cmd_x { return '' }
# Apply a whole bunch of messy heuristics to not quote things that don't
# benefit from being quoted. These originally come from Barrie Slaymaker and
# largely duplicate code in Pod::Man.
sub cmd_c {
my ($self, $attrs, $text) = @_;
# A regex that matches the portion of a variable reference that's the
# array or hash index, separated out just because we want to use it in
# several places in the following regex.
my $index = '(?: \[.*\] | \{.*\} )?';
# Check for things that we don't want to quote, and if we find any of
# them, return the string with just a font change and no quoting.
$text =~ m{
^\s*
(?:
( [\'\`\"] ) .* \1 # already quoted
| \` .* \' # `quoted'
| \$+ [\#^]? \S $index # special ($^Foo, $")
| [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
| [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
| [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
| 0x [a-fA-F\d]+ # a hex constant
)
\s*\z
}xo && return $text;
# If we didn't return, go ahead and quote the text.
return $$self{opt_alt}
? "``$text''"
: "$$self{LQUOTE}$text$$self{RQUOTE}";
}
# Links reduce to the text that we're given, wrapped in angle brackets if it's
# a URL.
sub cmd_l {
my ($self, $attrs, $text) = @_;
if ($$attrs{type} eq 'url') {
if (not defined($$attrs{to}) or $$attrs{to} eq $text) {
return "<$text>";
} elsif ($$self{opt_nourls}) {
return $text;
} else {
return "$text <$$attrs{to}>";
}
} else {
return $text;
}
}
##############################################################################
# Backwards compatibility
##############################################################################
# The old Pod::Text module did everything in a pod2text() function. This
# tries to provide the same interface for legacy applications.
sub pod2text {
my @args;
# This is really ugly; I hate doing option parsing in the middle of a
# module. But the old Pod::Text module supported passing flags to its
# entry function, so handle -a and -<number>.
while ($_[0] =~ /^-/) {
my $flag = shift;
if ($flag eq '-a') { push (@args, alt => 1) }
elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
else {
unshift (@_, $flag);
last;
}
}
# Now that we know what arguments we're using, create the parser.
my $parser = Pod::Text->new (@args);
# If two arguments were given, the second argument is going to be a file
# handle. That means we want to call parse_from_filehandle(), which means
# we need to turn the first argument into a file handle. Magic open will
# handle the <&STDIN case automagically.
if (defined $_[1]) {
my @fhs = @_;
local *IN;
unless (open (IN, $fhs[0])) {
croak ("Can't open $fhs[0] for reading: $!\n");
return;
}
$fhs[0] = \*IN;
$parser->output_fh ($fhs[1]);
my $retval = $parser->parse_file ($fhs[0]);
my $fh = $parser->output_fh ();
close $fh;
return $retval;
} else {
$parser->output_fh (\*STDOUT);
return $parser->parse_file (@_);
}
}
# Reset the underlying Pod::Simple object between calls to parse_from_file so
# that the same object can be reused to convert multiple pages.
sub parse_from_file {
my $self = shift;
$self->reinit;
# Fake the old cutting option to Pod::Parser. This fiddles with internal
# Pod::Simple state and is quite ugly; we need a better approach.
if (ref ($_[0]) eq 'HASH') {
my $opts = shift @_;
if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
$$self{in_pod} = 1;
$$self{last_was_blank} = 1;
}
}
# Do the work.
my $retval = $self->Pod::Simple::parse_from_file (@_);
# Flush output, since Pod::Simple doesn't do this. Ideally we should also
# close the file descriptor if we had to open one, but we can't easily
# figure this out.
my $fh = $self->output_fh ();
my $oldfh = select $fh;
my $oldflush = $|;
$| = 1;
print $fh '';
$| = $oldflush;
select $oldfh;
return $retval;
}
# Pod::Simple failed to provide this backward compatibility function, so
# implement it ourselves. File handles are one of the inputs that
# parse_from_file supports.
sub parse_from_filehandle {
my $self = shift;
$self->parse_from_file (@_);
}
# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so
# ourself unless it was already set by the caller, since our documentation has
# always said that this should work.
sub parse_file {
my ($self, $in) = @_;
unless (defined $$self{output_fh}) {
$self->output_fh (\*STDOUT);
}
return $self->SUPER::parse_file ($in);
}
# Do the same for parse_lines, just to be polite. Pod::Simple's man page
# implies that the caller is responsible for setting this, but I don't see any
# reason not to set a default.
sub parse_lines {
my ($self, @lines) = @_;
unless (defined $$self{output_fh}) {
$self->output_fh (\*STDOUT);
}
return $self->SUPER::parse_lines (@lines);
}
# Likewise for parse_string_document.
sub parse_string_document {
my ($self, $doc) = @_;
unless (defined $$self{output_fh}) {
$self->output_fh (\*STDOUT);
}
return $self->SUPER::parse_string_document ($doc);
}
##############################################################################
# Module return value and documentation
##############################################################################
1;
__END__
=for stopwords
alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls
parsers
=head1 NAME
Pod::Text - Convert POD data to formatted text
=head1 SYNOPSIS
use Pod::Text;
my $parser = Pod::Text->new (sentence => 1, width => 78);
# Read POD from STDIN and write to STDOUT.
$parser->parse_from_filehandle;
# Read POD from file.pod and write to file.txt.
$parser->parse_from_file ('file.pod', 'file.txt');
=head1 DESCRIPTION
Pod::Text is a module that can convert documentation in the POD format
(the preferred language for documenting Perl) into formatted text. It
uses no special formatting controls or codes whatsoever, and its output is
therefore suitable for nearly any device.
As a derived class from Pod::Simple, Pod::Text supports the same methods and
interfaces. See L<Pod::Simple> for all the details; briefly, one creates a
new parser with C<< Pod::Text->new() >> and then normally calls parse_file().
new() can take options, in the form of key/value pairs, that control the
behavior of the parser. The currently recognized options are:
=over 4
=item alt
If set to a true value, selects an alternate output format that, among other
things, uses a different heading style and marks C<=item> entries with a
colon in the left margin. Defaults to false.
=item code
If set to a true value, the non-POD parts of the input file will be included
in the output. Useful for viewing code documented with POD blocks with the
POD rendered and the code left intact.
=item errors
How to report errors. C<die> says to throw an exception on any POD
formatting error. C<stderr> says to report errors on standard error, but
not to throw an exception. C<pod> says to include a POD ERRORS section
in the resulting documentation summarizing the errors. C<none> ignores
POD errors entirely, as much as possible.
The default is C<pod>.
=item indent
The number of spaces to indent regular text, and the default indentation for
C<=over> blocks. Defaults to 4.
=item loose
If set to a true value, a blank line is printed after a C<=head1> heading.
If set to false (the default), no blank line is printed after C<=head1>,
although one is still printed after C<=head2>. This is the default because
it's the expected formatting for manual pages; if you're formatting
arbitrary text documents, setting this to true may result in more pleasing
output.
=item margin
The width of the left margin in spaces. Defaults to 0. This is the margin
for all text, including headings, not the amount by which regular text is
indented; for the latter, see the I<indent> option. To set the right
margin, see the I<width> option.
=item nourls
Normally, LZ<><> formatting codes with a URL but anchor text are formatted
to show both the anchor text and the URL. In other words:
L<foo|http://example.com/>
is formatted as:
foo <http://example.com/>
This option, if set to a true value, suppresses the URL when anchor text
is given, so this example would be formatted as just C<foo>. This can
produce less cluttered output in cases where the URLs are not particularly
important.
=item quotes
Sets the quote marks used to surround CE<lt>> text. If the value is a
single character, it is used as both the left and right quote. Otherwise,
it is split in half, and the first half of the string is used as the left
quote and the second is used as the right quote.
This may also be set to the special value C<none>, in which case no quote
marks are added around CE<lt>> text.
=item sentence
If set to a true value, Pod::Text will assume that each sentence ends in two
spaces, and will try to preserve that spacing. If set to false, all
consecutive whitespace in non-verbatim paragraphs is compressed into a
single space. Defaults to false.
=item stderr
Send error messages about invalid POD to standard error instead of
appending a POD ERRORS section to the generated output. This is
equivalent to setting C<errors> to C<stderr> if C<errors> is not already
set. It is supported for backward compatibility.
=item utf8
By default, Pod::Text uses the same output encoding as the input encoding
of the POD source (provided that Perl was built with PerlIO; otherwise, it
doesn't encode its output). If this option is given, the output encoding
is forced to UTF-8.
Be aware that, when using this option, the input encoding of your POD
source should be properly declared unless it's US-ASCII. Pod::Simple will
attempt to guess the encoding and may be successful if it's Latin-1 or
UTF-8, but it will produce warnings. Use the C<=encoding> command to
declare the encoding. See L<perlpod(1)> for more information.
=item width
The column at which to wrap text on the right-hand side. Defaults to 76.
=back
The standard Pod::Simple method parse_file() takes one argument naming the
POD file to read from. By default, the output is sent to C<STDOUT>, but
this can be changed with the output_fh() method.
The standard Pod::Simple method parse_from_file() takes up to two
arguments, the first being the input file to read POD from and the second
being the file to write the formatted output to.
You can also call parse_lines() to parse an array of lines or
parse_string_document() to parse a document already in memory. As with
parse_file(), parse_lines() and parse_string_document() default to sending
their output to C<STDOUT> unless changed with the output_fh() method.
To put the output from any parse method into a string instead of a file
handle, call the output_string() method instead of output_fh().
See L<Pod::Simple> for more specific details on the methods available to
all derived parsers.
=head1 DIAGNOSTICS
=over 4
=item Bizarre space in item
=item Item called without tag
(W) Something has gone wrong in internal C<=item> processing. These
messages indicate a bug in Pod::Text; you should never see them.
=item Can't open %s for reading: %s
(F) Pod::Text was invoked via the compatibility mode pod2text() interface
and the input file it was given could not be opened.
=item Invalid errors setting "%s"
(F) The C<errors> parameter to the constructor was set to an unknown value.
=item Invalid quote specification "%s"
(F) The quote specification given (the C<quotes> option to the
constructor) was invalid. A quote specification must be either one
character long or an even number (greater than one) characters long.
=item POD document had syntax errors
(F) The POD document being formatted had syntax errors and the C<errors>
option was set to C<die>.
=back
=head1 BUGS
Encoding handling assumes that PerlIO is available and does not work
properly if it isn't. The C<utf8> option is therefore not supported
unless Perl is built with PerlIO support.
=head1 CAVEATS
If Pod::Text is given the C<utf8> option, the encoding of its output file
handle will be forced to UTF-8 if possible, overriding any existing
encoding. This will be done even if the file handle is not created by
Pod::Text and was passed in from outside. This maintains consistency
regardless of PERL_UNICODE and other settings.
If the C<utf8> option is not given, the encoding of its output file handle
will be forced to the detected encoding of the input POD, which preserves
whatever the input text is. This ensures backward compatibility with
earlier, pre-Unicode versions of this module, without large numbers of
Perl warnings.
This is not ideal, but it seems to be the best compromise. If it doesn't
work for you, please let me know the details of how it broke.
=head1 NOTES
This is a replacement for an earlier Pod::Text module written by Tom
Christiansen. It has a revamped interface, since it now uses Pod::Simple,
but an interface roughly compatible with the old Pod::Text::pod2text()
function is still available. Please change to the new calling convention,
though.
The original Pod::Text contained code to do formatting via termcap
sequences, although it wasn't turned on by default and it was problematic to
get it to work at all. This rewrite doesn't even try to do that, but a
subclass of it does. Look for L<Pod::Text::Termcap>.
=head1 AUTHOR
Russ Allbery <rra@cpan.org>, based I<very> heavily on the original
Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
Pod::Parser by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial
conversion of Pod::Man to use Pod::Simple provided much-needed guidance on
how to use Pod::Simple.
=head1 COPYRIGHT AND LICENSE
Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018 Russ Allbery
<rra@cpan.org>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=head1 SEE ALSO
L<Pod::Simple>, L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
The current version of this module is always available from its web site at
L<https://www.eyrie.org/~eagle/software/podlators/>. It is also part of the
Perl core distribution as of 5.6.0.
=cut
# Local Variables:
# copyright-at-end-flag: t
# End:
Checker.pm 0000644 00000077362 15051135563 0006471 0 ustar 00 #############################################################################
# Pod/Checker.pm -- check pod documents for syntax errors
#
# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
# This is free software; you can redistribute it and/or modify it under the
# same terms as Perl itself.
#############################################################################
package Pod::Checker;
use strict;
use warnings;
our $VERSION = '1.73'; ## Current version of this package
=head1 NAME
Pod::Checker - check pod documents for syntax errors
=head1 SYNOPSIS
use Pod::Checker;
$syntax_okay = podchecker($filepath, $outputpath, %options);
my $checker = Pod::Checker->new(%options);
$checker->parse_from_file($filepath, \*STDERR);
=head1 OPTIONS/ARGUMENTS
C<$filepath> is the input POD to read and C<$outputpath> is
where to write POD syntax error messages. Either argument may be a scalar
indicating a file-path, or else a reference to an open filehandle.
If unspecified, the input-file it defaults to C<\*STDIN>, and
the output-file defaults to C<\*STDERR>.
=head2 podchecker()
This function can take a hash of options:
=over 4
=item B<-warnings> =E<gt> I<val>
Turn warnings on/off. I<val> is usually 1 for on, but higher values
trigger additional warnings. See L<"Warnings">.
=item B<-quiet> =E<gt> I<val>
If C<val> is true, do not print any errors/warnings.
=back
=head1 DESCRIPTION
B<podchecker> will perform syntax checking of Perl5 POD format documentation.
Curious/ambitious users are welcome to propose additional features they wish
to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
consistent with L<perlpod>.
The following checks are currently performed:
=over 4
=item *
Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
and unterminated interior sequences.
=item *
Check for proper balancing of C<=begin> and C<=end>. The contents of such
a block are generally ignored, i.e. no syntax checks are performed.
=item *
Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
=item *
Check for same nested interior-sequences (e.g.
C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
=item *
Check for malformed or non-existing entities C<EE<lt>...E<gt>>.
=item *
Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
for details.
=item *
Check for unresolved document-internal links. This check may also reveal
misspelled links that seem to be internal links but should be links
to something else.
=back
=head1 DIAGNOSTICS
=head2 Errors
=over 4
=item * empty =headn
A heading (C<=head1> or C<=head2>) without any text? That ain't no
heading!
=item * =over on line I<N> without closing =back
=item * You forgot a '=back' before '=headI<N>'
=item * =over is the last thing in the document?!
The C<=over> command does not have a corresponding C<=back> before the
next heading (C<=head1> or C<=head2>) or the end of the file.
=item * '=item' outside of any '=over'
=item * =back without =over
An C<=item> or C<=back> command has been found outside a
C<=over>/C<=back> block.
=item * Can't have a 0 in =over I<N>
You need to indent a strictly positive number of spaces, not 0.
=item * =over should be: '=over' or '=over positive_number'
Either have an argumentless =over, or have its argument a strictly positive number.
=item * =begin I<TARGET> without matching =end I<TARGET>
A C<=begin> command was found that has no matching =end command.
=item * =begin without a target?
A C<=begin> command was found that is not followed by the formatter
specification.
=item * =end I<TARGET> without matching =begin.
A standalone C<=end> command was found.
=item * '=end' without a target?
'=end' directives need to have a target, just like =begin directives.
=item * '=end I<TARGET>' is invalid.
I<TARGET> needs to be one word
=item * =end I<CONTENT> doesn't match =begin I<TARGET>
I<CONTENT> needs to match =begin's I<TARGET>.
=item * =for without a target?
There is no specification of the formatter after the C<=for> command.
=item * unresolved internal link I<NAME>
The given link to I<NAME> does not have a matching node in the current
POD. This also happened when a single word node name is not enclosed in
C<"">.
=item * Unknown directive: I<CMD>
An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
C<=for>, C<=pod>, C<=cut>
=item * Deleting unknown formatting code I<SEQ>
An invalid markup command has been encountered. Valid are:
C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
C<ZE<lt>E<gt>>
=item * Unterminated I<SEQ>E<lt>E<gt> sequence
An unclosed formatting code
=item * An EE<lt>...E<gt> surrounding strange content
The I<STRING> found cannot be interpreted as a character entity.
=item * An empty EE<lt>E<gt>
=item * An empty C<< LE<lt>E<gt> >>
=item * An empty XE<lt>E<gt>
There needs to be content inside E, L, and X formatting codes.
=item * A non-empty ZE<lt>E<gt>
The C<ZE<lt>E<gt>> sequence is supposed to be empty.
=item * Spurious text after =pod / =cut
The commands C<=pod> and C<=cut> do not take any arguments.
=item * =back doesn't take any parameters, but you said =back I<ARGUMENT>
The C<=back> command does not take any arguments.
=item * =pod directives shouldn't be over one line long! Ignoring all I<N> lines of content
Self explanatory
=item * =cut found outside a pod block.
A '=cut' directive found in the middle of non-POD
=item * Invalid =encoding syntax: I<CONTENT>
Syntax error in =encoding directive
=back
=head2 Warnings
These may not necessarily cause trouble, but indicate mediocre style.
=over 4
=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
Two nested identical markup commands have been found. Generally this
does not make sense.
=item * multiple occurrences (I<N>) of link target I<name>
The POD file has some C<=item> and/or C<=head> commands that have
the same text. Potential hyperlinks to such a text cannot be unique then.
This warning is printed only with warning level greater than one.
=item * line containing nothing but whitespace in paragraph
There is some whitespace on a seemingly empty line. POD is very sensitive
to such things, so this is flagged. B<vi> users switch on the B<list>
option to avoid this problem.
=item * =item has no contents
There is a list C<=item> that has no text contents. You probably want to delete
empty items.
=item * You can't have =items (as at line I<N>) unless the first thing after the =over is an =item
A list introduced by C<=over> starts with a text or verbatim paragraph,
but continues with C<=item>s. Move the non-item paragraph out of the
C<=over>/C<=back> block.
=item * Expected '=item I<EXPECTED VALUE>'
=item * Expected '=item *'
=item * Possible =item type mismatch: 'I<x>' found leading a supposed definition =item
A list started with e.g. a bullet-like C<=item> and continued with a
numbered one. This is obviously inconsistent. For most translators the
type of the I<first> C<=item> determines the type of the list.
=item * You have '=item x' instead of the expected '=item I<N>'
Erroneous numbering of =item numbers; they need to ascend consecutively.
=item * Unknown E content in EE<lt>I<CONTENT>E<gt>
A character entity was found that does not belong to the standard
ISO set or the POD specials C<verbar> and C<sol>. I<Currently, this warning
only appears if a character entity was found that does not have a Unicode
character. This should be fixed to adhere to the original warning.>
=item * empty =over/=back block
The list opened with C<=over> does not contain anything.
=item * empty section in previous paragraph
The previous section (introduced by a C<=head> command) does not contain
any valid content. This usually indicates that something is missing. Note: A
C<=head1> followed immediately by C<=head2> does not trigger this warning.
=item * Verbatim paragraph in NAME section
The NAME section (C<=head1 NAME>) should consist of a single paragraph
with the script/module name, followed by a dash `-' and a very short
description of what the thing is good for.
=item * =headI<n> without preceding higher level
For example if there is a C<=head2> in the POD file prior to a
C<=head1>.
=back
=head2 Hyperlinks
There are some warnings with respect to malformed hyperlinks:
=over 4
=item * ignoring leading/trailing whitespace in link
There is whitespace at the beginning or the end of the contents of
LE<lt>...E<gt>.
=item * alternative text/node '%s' contains non-escaped | or /
The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
Although the hyperlink parser does its best to determine which "/" is
text and which is a delimiter in case of doubt, one ought to escape
these literal characters like this:
/ E<sol>
| E<verbar>
=back
Note that the line number of the error/warning may refer to the line number of
the start of the paragraph in which the error/warning exists, not the line
number that the error/warning is on. This bug is present in errors/warnings
related to formatting codes. I<This should be fixed.>
=head1 RETURN VALUE
B<podchecker> returns the number of POD syntax errors found or -1 if
there were no POD commands at all found in the file.
=head1 EXAMPLES
See L</SYNOPSIS>
=head1 SCRIPTS
The B<podchecker> script that comes with this distribution is a lean wrapper
around this module. See the online manual with
podchecker -help
podchecker -man
=head1 INTERFACE
While checking, this module collects document properties, e.g. the nodes
for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
POD translators can use this feature to syntax-check and get the nodes in
a first pass before actually starting to convert. This is expensive in terms
of execution time, but allows for very robust conversions.
Since v1.24 the B<Pod::Checker> module uses only the B<poderror>
method to print errors and warnings. The summary output (e.g.
"Pod syntax OK") has been dropped from the module and has been included in
B<podchecker> (the script). This allows users of B<Pod::Checker> to
control completely the output behavior. Users of B<podchecker> (the script)
get the well-known behavior.
v1.45 inherits from Pod::Simple as opposed to all previous versions
inheriting from Pod::Parser. Do B<not> use Pod::Simple's interface when
using Pod::Checker unless it is documented somewhere on this page. I
repeat, DO B<NOT> USE POD::SIMPLE'S INTERFACE.
=cut
#############################################################################
#use diagnostics;
use Carp qw(croak);
use Exporter 'import';
use base qw/Pod::Simple::Methody/;
our @EXPORT = qw(&podchecker);
##---------------------------------
## Function definitions begin here
##---------------------------------
sub podchecker {
my ($infile, $outfile, %options) = @_;
local $_;
## Set defaults
$infile ||= \*STDIN;
$outfile ||= \*STDERR;
## Now create a pod checker
my $checker = Pod::Checker->new(%options);
## Now check the pod document for errors
$checker->parse_from_file($infile, $outfile);
## Return the number of errors found
return $checker->num_errors();
}
##---------------------------------------------------------------------------
##-------------------------------
## Method definitions begin here
##-------------------------------
##################################
=over 4
=item C<Pod::Checker-E<gt>new( %options )>
Return a reference to a new Pod::Checker object that inherits from
Pod::Simple and is used for calling the required methods later. The
following options are recognized:
C<-warnings =E<gt> num>
Print warnings if C<num> is true. The higher the value of C<num>,
the more warnings are printed. Currently there are only levels 1 and 2.
C<-quiet =E<gt> num>
If C<num> is true, do not print any errors/warnings. This is useful
when Pod::Checker is used to munge POD code into plain text from within
POD formatters.
=cut
sub new {
my $new = shift->SUPER::new(@_);
$new->{'output_fh'} ||= *STDERR{IO};
# Set options
my %opts = @_;
$new->{'-warnings'} = defined $opts{'-warnings'} ?
$opts{'-warnings'} : 1; # default on
$new->{'-quiet'} = $opts{'-quiet'} || 0; # default off
# Initialize number of errors/warnings
$new->{'_NUM_ERRORS'} = 0;
$new->{'_NUM_WARNINGS'} = 0;
# 'current' also means 'most recent' in the follow comments
$new->{'_thispara'} = ''; # current POD paragraph
$new->{'_line'} = 0; # current line number
$new->{'_head_num'} = 0; # current =head level (set to 0 to make
# logic easier down the road)
$new->{'_cmds_since_head'} = 0; # num of POD directives since prev. =headN
$new->{'_nodes'} = []; # stack for =head/=item nodes
$new->{'_fcode_stack'} = []; # stack for nested formatting codes
$new->{'_fcode_pos'} = []; # stack for position in paragraph of fcodes
$new->{'_begin_stack'} = []; # stack for =begins: [line #, target]
$new->{'_links'} = []; # stack for hyperlinks to external entities
$new->{'_internal_links'} = []; # set of linked-to internal sections
$new->{'_index'} = []; # stack for text in X<>s
$new->accept_targets('*'); # check all =begin/=for blocks
$new->cut_handler( \&handle_pod_and_cut ); # warn if text after =cut
$new->pod_handler( \&handle_pod_and_cut ); # warn if text after =pod
$new->whiteline_handler( \&handle_whiteline ); # warn if whiteline
$new->parse_empty_lists(1); # warn if they are empty
return $new;
}
##################################
=item C<$checker-E<gt>poderror( @args )>
=item C<$checker-E<gt>poderror( {%opts}, @args )>
Internal method for printing errors and warnings. If no options are given,
simply prints "@_". The following options are recognized and used to form
the output:
-msg
A message to print prior to C<@args>.
-line
The line number the error occurred in.
-file
The file (name) the error occurred in. Defaults to the name of the current
file being processed.
-severity
The error level, should be 'WARNING' or 'ERROR'.
=cut
# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
sub poderror {
my $self = shift;
my %opts = (ref $_[0]) ? %{shift()} : ();
## Retrieve options
chomp( my $msg = ($opts{'-msg'} || '')."@_" );
my $line = (exists $opts{'-line'}) ? " at line $opts{'-line'}" : '';
my $file = ' in file ' . ((exists $opts{'-file'})
? $opts{'-file'}
: ((defined $self->source_filename)
? $self->source_filename
: "???"));
unless (exists $opts{'-severity'}) {
## See if can find severity in message prefix
$opts{'-severity'} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
}
my $severity = (exists $opts{'-severity'}) ? "*** $opts{-severity}: " : '';
## Increment error count and print message "
++($self->{'_NUM_ERRORS'})
if(!%opts || ($opts{-severity} && $opts{'-severity'} eq 'ERROR'));
++($self->{'_NUM_WARNINGS'})
if(!%opts || ($opts{-severity} && $opts{'-severity'} eq 'WARNING'));
unless($self->{'-quiet'}) {
my $out_fh = $self->{'output_fh'} || \*STDERR;
print $out_fh ($severity, $msg, $line, $file, "\n")
if($self->{'-warnings'} || !%opts || $opts{'-severity'} ne 'WARNING');
}
}
##################################
=item C<$checker-E<gt>num_errors()>
Set (if argument specified) and retrieve the number of errors found.
=cut
sub num_errors {
return (@_ > 1) ? ($_[0]->{'_NUM_ERRORS'} = $_[1]) : $_[0]->{'_NUM_ERRORS'};
}
##################################
=item C<$checker-E<gt>num_warnings()>
Set (if argument specified) and retrieve the number of warnings found.
=cut
sub num_warnings {
return (@_ > 1) ? ($_[0]->{'_NUM_WARNINGS'} = $_[1]) :
$_[0]->{'_NUM_WARNINGS'};
}
##################################
=item C<$checker-E<gt>name()>
Set (if argument specified) and retrieve the canonical name of POD as
found in the C<=head1 NAME> section.
=cut
sub name {
return (@_ > 1 && $_[1]) ?
($_[0]->{'_pod_name'} = $_[1]) : $_[0]->{'_pod_name'};
}
##################################
=item C<$checker-E<gt>node()>
Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
and C<=item>) of the current POD. The nodes are returned in the order of
their occurrence. They consist of plain text, each piece of whitespace is
collapsed to a single blank.
=cut
sub node {
my ($self,$text) = @_;
if(defined $text) {
$text =~ s/\s+$//s; # strip trailing whitespace
$text =~ s/\s+/ /gs; # collapse whitespace
# add node, order important!
push(@{$self->{'_nodes'}}, $text);
# keep also a uniqueness counter
$self->{'_unique_nodes'}->{$text}++ if($text !~ /^\s*$/s);
return $text;
}
@{$self->{'_nodes'}};
}
##################################
=item C<$checker-E<gt>idx()>
Add (if argument specified) and retrieve the index entries (as defined by
C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
of whitespace is collapsed to a single blank.
=cut
# set/return index entries of current POD
sub idx {
my ($self,$text) = @_;
if(defined $text) {
$text =~ s/\s+$//s; # strip trailing whitespace
$text =~ s/\s+/ /gs; # collapse whitespace
# add node, order important!
push(@{$self->{'_index'}}, $text);
# keep also a uniqueness counter
$self->{'_unique_nodes'}->{$text}++ if($text !~ /^\s*$/s);
return $text;
}
@{$self->{'_index'}};
}
##################################
# add a hyperlink to the list of those of the current POD; returns current
# list after the addition has been done
sub hyperlink {
my $self = shift;
push(@{$self->{'_links'}}, $_[0]);
return $_[0];
}
=item C<$checker-E<gt>hyperlinks()>
Retrieve an array containing the hyperlinks to things outside
the current POD (as defined by C<LE<lt>E<gt>>).
Each is an instance of a class with the following methods:
=cut
sub hyperlinks {
@{shift->{'_links'}};
}
##################################
# override Pod::Simple's whine() and scream() to use poderror()
# Note:
# Ignore $self->{'no_whining'} b/c $self->{'quiet'} takes care of it in poderror
# Don't bother incrementing $self->{'errors_seen'} -- it's not used
# Don't bother pushing to $self->{'errata'} b/c poderror() outputs immediately
# We don't need to set $self->no_errata_section(1) b/c of these overrides
sub whine {
my ($self, $line, $complaint) = @_;
my $severity = 'ERROR';
if (0) {
# XXX: Let's standardize what's a warning and what's an error. Let's not
# move stuff up and down the severity tree. -- rjbs, 2013-04-12
# Convert errors in Pod::Simple that are warnings in Pod::Checker
# XXX Do differently so the $complaint can be reworded without this breaking
$severity = 'WARNING' if
$complaint =~ /^Expected '=item .+?'$/ ||
$complaint =~ /^You can't have =items \(as at line .+?\) unless the first thing after the =over is an =item$/ ||
$complaint =~ /^You have '=item .+?' instead of the expected '=item .+?'$/;
}
$self->poderror({ -line => $line,
-severity => $severity,
-msg => $complaint });
return 1; # assume everything is peachy keen
}
sub scream {
my ($self, $line, $complaint) = @_;
$self->poderror({ -line => $line,
-severity => 'ERROR', # consider making severity 'FATAL'
-msg => $complaint });
return 1;
}
##################################
# Some helper subroutines
sub _init_event { # assignments done at the start of most events
$_[0]{'_thispara'} = '';
$_[0]{'_line'} = $_[1]{'start_line'};
$_[0]{'_cmds_since_head'}++;
}
sub _check_fcode {
my ($self, $inner, $outers) = @_;
# Check for an fcode inside another of the same fcode
# XXX line number is the line of the start of the paragraph that the warning
# is in, not the line that the warning is on. Fix this
# Later versions of Pod::Simple forbid nested L<>'s
return if $inner eq 'L' && $Pod::Simple::VERSION ge '3.33';
if (grep { $_ eq $inner } @$outers) {
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => "nested commands $inner<...$inner<...>...>"});
}
}
##################################
sub handle_text { $_[0]{'_thispara'} .= $_[1] }
# whiteline is a seemingly blank line that matches /[^\S\r\n]/
sub handle_whiteline {
my ($line, $line_n, $self) = @_;
$self->poderror({
-line => $line_n,
-severity => 'WARNING',
-msg => 'line containing nothing but whitespace in paragraph'});
}
######## Directives
sub handle_pod_and_cut {
my ($line, $line_n, $self) = @_;
$self->{'_cmds_since_head'}++;
if ($line =~ /=(pod|cut)\s+\S/) {
$self->poderror({ -line => $line_n,
-severity => 'ERROR',
-msg => "Spurious text after =$1"});
}
}
sub start_Para { shift->_init_event(@_); }
sub end_Para {
my $self = shift;
# Get the NAME of the pod document
if ($self->{'_head_num'} == 1 && $self->{'_head_text'} eq 'NAME') {
if ($self->{'_thispara'} =~ /^\s*(\S+?)\s*[,-]/) {
$self->{'_pod_name'} = $1 unless defined $self->{'_pod_name'};
}
}
}
sub start_Verbatim {
my $self = shift;
$self->_init_event(@_);
if ($self->{'_head_num'} == 1 && $self->{'_head_text'} eq 'NAME') {
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => 'Verbatim paragraph in NAME section' });
}
}
# Don't need an end_Verbatim
# Do I need to do anything else with this?
sub start_Data { shift->_init_event() }
sub start_head1 { shift->start_head(1, @_) }
sub start_head2 { shift->start_head(2, @_) }
sub start_head3 { shift->start_head(3, @_) }
sub start_head4 { shift->start_head(4, @_) }
sub start_head {
my $self = shift;
my $h = shift;
$self->_init_event(@_);
my $prev_h = $self->{'_head_num'};
$self->{'_head_num'} = $h;
$self->{"_count_head$h"}++;
if ($h > 1 && !$self->{'_count_head'.($h-1)}) {
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => "=head$h without preceding higher level"});
}
# If this is the first =head of the doc, $prev_h is 0, thus less than $h
if ($self->{'_cmds_since_head'} == 1 && $prev_h >= $h) {
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => 'empty section in previous paragraph'});
}
}
sub end_head1 { shift->end_head(@_) }
sub end_head2 { shift->end_head(@_) }
sub end_head3 { shift->end_head(@_) }
sub end_head4 { shift->end_head(@_) }
sub end_head {
my $self = shift;
my $arg = $self->{'_thispara'};
$arg =~ s/\s+$//;
$self->{'_head_text'} = $arg;
$self->{'_cmds_since_head'} = 0;
my $h = $self->{'_head_num'};
$self->node($arg); # remember this node
if ($arg eq '') {
$self->poderror({ -line => $self->{'_line'},
-severity => 'ERROR',
-msg => "empty =head$h" });
}
}
sub start_over_bullet { shift->start_over(@_, 'bullet') }
sub start_over_number { shift->start_over(@_, 'number') }
sub start_over_text { shift->start_over(@_, 'definition') }
sub start_over_block { shift->start_over(@_, 'block') }
sub start_over_empty {
my $self = shift;
$self->start_over(@_, 'empty');
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => 'empty =over/=back block' });
}
sub start_over {
my $self = shift;
my $type = pop;
$self->_init_event(@_);
}
sub start_item_bullet { shift->_init_event(@_) }
sub start_item_number { shift->_init_event(@_) }
sub start_item_text { shift->_init_event(@_) }
sub end_item_bullet { shift->end_item('bullet') }
sub end_item_number { shift->end_item('number') }
sub end_item_text { shift->end_item('definition') }
sub end_item {
my $self = shift;
my $type = shift;
# If there is verbatim text in this item, it will show up as part of
# 'paras', and not part of '_thispara'. If the first para after this is a
# verbatim one, it actually will be (part of) the contents for this item.
if ( $self->{'_thispara'} eq ''
&& ( ! @{$self->{'paras'}}
|| $self->{'paras'}[0][0] !~ /Verbatim/i))
{
$self->poderror({ -line => $self->{'_line'},
-severity => 'WARNING',
-msg => '=item has no contents' });
}
$self->node($self->{'_thispara'}); # remember this node
}
sub start_for { # =for and =begin directives
my ($self, $flags) = @_;
$self->_init_event($flags);
push @{$self->{'_begin_stack'}}, [$self->{'_line'}, $flags->{'target'}];
}
sub end_for {
my ($self, $flags) = @_;
my ($line, $target) = @{pop @{$self->{'_begin_stack'}}};
if ($flags->{'fake-closer'}) { # meaning Pod::Simple generated this =end
$self->poderror({ -line => $line,
-severity => 'ERROR',
-msg => "=begin $target without matching =end $target"
});
}
}
sub end_Document {
# Some final error checks
my $self = shift;
# no POD found here
$self->num_errors(-1) && return unless $self->content_seen;
my %nodes;
for ($self->node()) {
$nodes{$_} = 1;
if(/^(\S+)\s+\S/) {
# we have more than one word. Use the first as a node, too.
# This is used heavily in perlfunc.pod
$nodes{$1} ||= 2; # derived node
}
}
for ($self->idx()) {
$nodes{$_} = 3; # index node
}
# XXX update unresolved internal link POD -- single word not enclosed in ""?
# I don't know what I was thinking when I made the above TODO, and I don't
# know what it means...
for my $link (@{ $self->{'_internal_links'} }) {
my ($name, $line) = @$link;
unless ( $nodes{$name} ) {
$self->poderror({ -line => $line,
-severity => 'ERROR',
-msg => "unresolved internal link '$name'"});
}
}
# check the internal nodes for uniqueness. This pertains to
# =headX, =item and X<...>
if ($self->{'-warnings'} > 1 ) {
for my $node (sort keys %{ $self->{'_unique_nodes'} }) {
my $count = $self->{'_unique_nodes'}{$node};
if ($count > 1) { # not unique
$self->poderror({
-line => '-',
-severity => 'WARNING',
-msg => "multiple occurrences ($count) of link target ".
"'$node'"});
}
}
}
}
######## Formatting codes
sub start_B { shift->start_fcode('B') }
sub start_C { shift->start_fcode('C') }
sub start_F { shift->start_fcode('F') }
sub start_I { shift->start_fcode('I') }
sub start_S { shift->start_fcode('S') }
sub start_fcode {
my ($self, $fcode) = @_;
unshift @{$self->{'_fcode_stack'}}, $fcode;
}
sub end_B { shift->end_fcode() }
sub end_C { shift->end_fcode() }
sub end_F { shift->end_fcode() }
sub end_I { shift->end_fcode() }
sub end_S { shift->end_fcode() }
sub end_fcode {
my $self = shift;
$self->_check_fcode(shift @{$self->{'_fcode_stack'}}, # current fcode removed
$self->{'_fcode_stack'}); # previous fcodes
}
sub start_L {
my ($self, $flags) = @_;
$self->start_fcode('L');
my $link = Pod::Checker::Hyperlink->new($flags, $self);
if ($link) {
if ( $link->type eq 'pod'
&& $link->node
# It's an internal-to-this-page link if no page is given, or
# if the given one is to our NAME.
&& (! $link->page || ( $self->{'_pod_name'}
&& $link->page eq $self->{'_pod_name'})))
{
push @{ $self->{'_internal_links'} }, [ $link->{'-raw_node'}, $link->line ];
}
else {
$self->hyperlink($link);
}
}
}
sub end_L {
my $self = shift;
$self->end_fcode();
}
sub start_X {
my $self = shift;
$self->start_fcode('X');
# keep track of where X<> starts in the paragraph
# (this is a stack so nested X<>s are handled correctly)
push @{$self->{'_fcode_pos'}}, length $self->{'_thispara'};
}
sub end_X {
my $self = shift;
# extract contents of X<> and replace with ''
my $start = pop @{$self->{'_fcode_pos'}}; # start at the beginning of X<>
my $end = length($self->{'_thispara'}) - $start; # end at end of X<>
my $x = substr($self->{'_thispara'}, $start, $end, '');
if ($x eq "") {
$self->poderror({ -line => $self->{'_line'},
-severity => 'ERROR',
-msg => "An empty X<>" });
}
$self->idx($x); # remember this node
$self->end_fcode();
}
package Pod::Checker::Hyperlink;
# This class is used to represent L<> link structures, so that the individual
# elements are easily accessible. It is based on code in Pod::Hyperlink
sub new {
my ($class,
$simple_link, # The link structure returned by Pod::Simple
$caller # The caller class
) = @_;
my $self = +{};
bless $self, $class;
$self->{'-line'} ||= $caller->{'_line'};
$self->{'-type'} ||= $simple_link->{'type'};
# Force stringification of page and node. (This expands any E<>.)
$self->{'-page'} = exists $simple_link->{'to'} ? "$simple_link->{'to'}" : "";
$self->{'-node'} = exists $simple_link->{'section'} ? "$simple_link->{'section'}" : "";
# Save the unmodified node text, as the .t files are expecting the message
# for internal link failures to include it (hence this preserves backward
# compatibility).
$self->{'-raw_node'} = $self->{'-node'};
# Remove leading/trailing white space. Pod::Simple already warns about
# these, so if the only error is this, and the link is otherwise correct,
# only the Pod::Simple warning will be output, avoiding unnecessary
# confusion.
$self->{'-page'} =~ s/ ^ \s+ //x;
$self->{'-page'} =~ s/ \s+ $ //x;
$self->{'-node'} =~ s/ ^ \s+ //x;
$self->{'-node'} =~ s/ \s+ $ //x;
# Pod::Simple warns about L<> and L< >, but not L</>
if ($self->{'-page'} eq "" && $self->{'-node'} eq "") {
$caller->poderror({ -line => $caller->{'_line'},
-severity => 'WARNING',
-msg => 'empty link'});
return;
}
return $self;
}
=item line()
Returns the approximate line number in which the link was encountered
=cut
sub line {
return $_[0]->{-line};
}
=item type()
Returns the type of the link; one of:
C<"url"> for things like
C<http://www.foo>, C<"man"> for man pages, or C<"pod">.
=cut
sub type {
return $_[0]->{-type};
}
=item page()
Returns the linked-to page or url.
=cut
sub page {
return $_[0]->{-page};
}
=item node()
Returns the anchor or node within the linked-to page, or an empty string
(C<"">) if none appears in the link.
=back
=cut
sub node {
return $_[0]->{-node};
}
=head1 AUTHOR
Please report bugs using L<http://rt.cpan.org>.
Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
Marek Rouchal E<lt>marekr@cpan.orgE<gt>,
Marc Green E<lt>marcgreen@cpan.orgE<gt> (port to Pod::Simple)
Ricardo Signes E<lt>rjbs@cpan.orgE<gt> (more porting to Pod::Simple)
Karl Williamson E<lt>khw@cpan.orgE<gt> (more porting to Pod::Simple)
Based on code for B<Pod::Text::pod2text()> written by
Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
=cut
1
Select.pm 0000644 00000057672 15051135563 0006346 0 ustar 00 #############################################################################
# Pod/Select.pm -- function to select portions of POD docs
#
# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
#############################################################################
package Pod::Select;
use strict;
use vars qw($VERSION @ISA @EXPORT $MAX_HEADING_LEVEL %myData @section_headings @selected_sections);
$VERSION = '1.63'; ## Current version of this package
require 5.005; ## requires this Perl version or later
#############################################################################
=head1 NAME
Pod::Select, podselect() - extract selected sections of POD from input
=head1 SYNOPSIS
use Pod::Select;
## Select all the POD sections for each file in @filelist
## and print the result on standard output.
podselect(@filelist);
## Same as above, but write to tmp.out
podselect({-output => "tmp.out"}, @filelist):
## Select from the given filelist, only those POD sections that are
## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist):
## Select the "DESCRIPTION" section of the PODs from STDIN and write
## the result to STDERR.
podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN);
or
use Pod::Select;
## Create a parser object for selecting POD sections from the input
$parser = new Pod::Select();
## Select all the POD sections for each file in @filelist
## and print the result to tmp.out.
$parser->parse_from_file("<&STDIN", "tmp.out");
## Select from the given filelist, only those POD sections that are
## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
$parser->select("NAME|SYNOPSIS", "OPTIONS");
for (@filelist) { $parser->parse_from_file($_); }
## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from
## STDIN and write the result to STDERR.
$parser->select("DESCRIPTION");
$parser->add_selection("SEE ALSO");
$parser->parse_from_filehandle(\*STDIN, \*STDERR);
=head1 REQUIRES
perl5.005, Pod::Parser, Exporter, Carp
=head1 EXPORTS
podselect()
=head1 DESCRIPTION
B<NOTE: This module is considered legacy; modern Perl releases (5.18 and
higher) are going to remove Pod-Parser from core and use L<Pod-Simple>
for all things POD.>
B<podselect()> is a function which will extract specified sections of
pod documentation from an input stream. This ability is provided by the
B<Pod::Select> module which is a subclass of B<Pod::Parser>.
B<Pod::Select> provides a method named B<select()> to specify the set of
POD sections to select for processing/printing. B<podselect()> merely
creates a B<Pod::Select> object and then invokes the B<podselect()>
followed by B<parse_from_file()>.
=head1 SECTION SPECIFICATIONS
B<podselect()> and B<Pod::Select::select()> may be given one or more
"section specifications" to restrict the text processed to only the
desired set of sections and their corresponding subsections. A section
specification is a string containing one or more Perl-style regular
expressions separated by forward slashes ("/"). If you need to use a
forward slash literally within a section title you can escape it with a
backslash ("\/").
The formal syntax of a section specification is:
=over 4
=item *
I<head1-title-regex>/I<head2-title-regex>/...
=back
Any omitted or empty regular expressions will default to ".*".
Please note that each regular expression given is implicitly
anchored by adding "^" and "$" to the beginning and end. Also, if a
given regular expression starts with a "!" character, then the
expression is I<negated> (so C<!foo> would match anything I<except>
C<foo>).
Some example section specifications follow.
=over 4
=item *
Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections:
C<NAME|SYNOPSIS>
=item *
Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION>
section:
C<DESCRIPTION/Question|Answer>
=item *
Match the C<Comments> subsection of I<all> sections:
C</Comments>
=item *
Match all subsections of C<DESCRIPTION> I<except> for C<Comments>:
C<DESCRIPTION/!Comments>
=item *
Match the C<DESCRIPTION> section but do I<not> match any of its subsections:
C<DESCRIPTION/!.+>
=item *
Match all top level sections but none of their subsections:
C</!.+>
=back
=begin _NOT_IMPLEMENTED_
=head1 RANGE SPECIFICATIONS
B<podselect()> and B<Pod::Select::select()> may be given one or more
"range specifications" to restrict the text processed to only the
desired ranges of paragraphs in the desired set of sections. A range
specification is a string containing a single Perl-style regular
expression (a regex), or else two Perl-style regular expressions
(regexs) separated by a ".." (Perl's "range" operator is "..").
The regexs in a range specification are delimited by forward slashes
("/"). If you need to use a forward slash literally within a regex you
can escape it with a backslash ("\/").
The formal syntax of a range specification is:
=over 4
=item *
/I<start-range-regex>/[../I<end-range-regex>/]
=back
Where each the item inside square brackets (the ".." followed by the
end-range-regex) is optional. Each "range-regex" is of the form:
=cmd-expr text-expr
Where I<cmd-expr> is intended to match the name of one or more POD
commands, and I<text-expr> is intended to match the paragraph text for
the command. If a range-regex is supposed to match a POD command, then
the first character of the regex (the one after the initial '/')
absolutely I<must> be a single '=' character; it may not be anything
else (not even a regex meta-character) if it is supposed to match
against the name of a POD command.
If no I<=cmd-expr> is given then the text-expr will be matched against
plain textblocks unless it is preceded by a space, in which case it is
matched against verbatim text-blocks. If no I<text-expr> is given then
only the command-portion of the paragraph is matched against.
Note that these two expressions are each implicitly anchored. This
means that when matching against the command-name, there will be an
implicit '^' and '$' around the given I<=cmd-expr>; and when matching
against the paragraph text there will be an implicit '\A' and '\Z'
around the given I<text-expr>.
Unlike with section-specs, the '!' character does I<not> have any special
meaning (negation or otherwise) at the beginning of a range-spec!
Some example range specifications follow.
=over 4
=item
Match all C<=for html> paragraphs:
C</=for html/>
=item
Match all paragraphs between C<=begin html> and C<=end html>
(note that this will I<not> work correctly if such sections
are nested):
C</=begin html/../=end html/>
=item
Match all paragraphs between the given C<=item> name until the end of the
current section:
C</=item mine/../=head\d/>
=item
Match all paragraphs between the given C<=item> until the next item, or
until the end of the itemized list (note that this will I<not> work as
desired if the item contains an itemized list nested within it):
C</=item mine/../=(item|back)/>
=back
=end _NOT_IMPLEMENTED_
=cut
#############################################################################
#use diagnostics;
use Carp;
use Pod::Parser 1.04;
@ISA = qw(Pod::Parser);
@EXPORT = qw(&podselect);
## Maximum number of heading levels supported for '=headN' directives
*MAX_HEADING_LEVEL = \3;
#############################################################################
=head1 OBJECT METHODS
The following methods are provided in this module. Each one takes a
reference to the object itself as an implicit first parameter.
=cut
##---------------------------------------------------------------------------
## =begin _PRIVATE_
##
## =head1 B<_init_headings()>
##
## Initialize the current set of active section headings.
##
## =cut
##
## =end _PRIVATE_
sub _init_headings {
my $self = shift;
local *myData = $self;
## Initialize current section heading titles if necessary
unless (defined $myData{_SECTION_HEADINGS}) {
local *section_headings = $myData{_SECTION_HEADINGS} = [];
for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
$section_headings[$i] = '';
}
}
}
##---------------------------------------------------------------------------
=head1 B<curr_headings()>
($head1, $head2, $head3, ...) = $parser->curr_headings();
$head1 = $parser->curr_headings(1);
This method returns a list of the currently active section headings and
subheadings in the document being parsed. The list of headings returned
corresponds to the most recently parsed paragraph of the input.
If an argument is given, it must correspond to the desired section
heading number, in which case only the specified section heading is
returned. If there is no current section heading at the specified
level, then C<undef> is returned.
=cut
sub curr_headings {
my $self = shift;
$self->_init_headings() unless (defined $self->{_SECTION_HEADINGS});
my @headings = @{ $self->{_SECTION_HEADINGS} };
return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings;
}
##---------------------------------------------------------------------------
=head1 B<select()>
$parser->select($section_spec1,$section_spec2,...);
This method is used to select the particular sections and subsections of
POD documentation that are to be printed and/or processed. The existing
set of selected sections is I<replaced> with the given set of sections.
See B<add_selection()> for adding to the current set of selected
sections.
Each of the C<$section_spec> arguments should be a section specification
as described in L<"SECTION SPECIFICATIONS">. The section specifications
are parsed by this method and the resulting regular expressions are
stored in the invoking object.
If no C<$section_spec> arguments are given, then the existing set of
selected sections is cleared out (which means C<all> sections will be
processed).
This method should I<not> normally be overridden by subclasses.
=cut
sub select {
my ($self, @sections) = @_;
local *myData = $self;
local $_;
### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?)
##---------------------------------------------------------------------
## The following is a blatant hack for backward compatibility, and for
## implementing add_selection(). If the *first* *argument* is the
## string "+", then the remaining section specifications are *added*
## to the current set of selections; otherwise the given section
## specifications will *replace* the current set of selections.
##
## This should probably be fixed someday, but for the present time,
## it seems incredibly unlikely that "+" would ever correspond to
## a legitimate section heading
##---------------------------------------------------------------------
my $add = ($sections[0] eq '+') ? shift(@sections) : '';
## Reset the set of sections to use
unless (@sections) {
delete $myData{_SELECTED_SECTIONS} unless ($add);
return;
}
$myData{_SELECTED_SECTIONS} = []
unless ($add && exists $myData{_SELECTED_SECTIONS});
local *selected_sections = $myData{_SELECTED_SECTIONS};
## Compile each spec
for my $spec (@sections) {
if ( defined($_ = _compile_section_spec($spec)) ) {
## Store them in our sections array
push(@selected_sections, $_);
}
else {
carp qq{Ignoring section spec "$spec"!\n};
}
}
}
##---------------------------------------------------------------------------
=head1 B<add_selection()>
$parser->add_selection($section_spec1,$section_spec2,...);
This method is used to add to the currently selected sections and
subsections of POD documentation that are to be printed and/or
processed. See <select()> for replacing the currently selected sections.
Each of the C<$section_spec> arguments should be a section specification
as described in L<"SECTION SPECIFICATIONS">. The section specifications
are parsed by this method and the resulting regular expressions are
stored in the invoking object.
This method should I<not> normally be overridden by subclasses.
=cut
sub add_selection {
my $self = shift;
return $self->select('+', @_);
}
##---------------------------------------------------------------------------
=head1 B<clear_selections()>
$parser->clear_selections();
This method takes no arguments, it has the exact same effect as invoking
<select()> with no arguments.
=cut
sub clear_selections {
my $self = shift;
return $self->select();
}
##---------------------------------------------------------------------------
=head1 B<match_section()>
$boolean = $parser->match_section($heading1,$heading2,...);
Returns a value of true if the given section and subsection heading
titles match any of the currently selected section specifications in
effect from prior calls to B<select()> and B<add_selection()> (or if
there are no explicitly selected/deselected sections).
The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of
the corresponding sections, subsections, etc. to try and match. If
C<$headingN> is omitted then it defaults to the current corresponding
section heading title in the input.
This method should I<not> normally be overridden by subclasses.
=cut
sub match_section {
my $self = shift;
my (@headings) = @_;
local *myData = $self;
## Return true if no restrictions were explicitly specified
my $selections = (exists $myData{_SELECTED_SECTIONS})
? $myData{_SELECTED_SECTIONS} : undef;
return 1 unless ((defined $selections) && @{$selections});
## Default any unspecified sections to the current one
my @current_headings = $self->curr_headings();
for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
(defined $headings[$i]) or $headings[$i] = $current_headings[$i];
}
## Look for a match against the specified section expressions
for my $section_spec ( @{$selections} ) {
##------------------------------------------------------
## Each portion of this spec must match in order for
## the spec to be matched. So we will start with a
## match-value of 'true' and logically 'and' it with
## the results of matching a given element of the spec.
##------------------------------------------------------
my $match = 1;
for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
my $regex = $section_spec->[$i];
my $negated = ($regex =~ s/^\!//);
$match &= ($negated ? ($headings[$i] !~ /${regex}/)
: ($headings[$i] =~ /${regex}/));
last unless ($match);
}
return 1 if ($match);
}
return 0; ## no match
}
##---------------------------------------------------------------------------
=head1 B<is_selected()>
$boolean = $parser->is_selected($paragraph);
This method is used to determine if the block of text given in
C<$paragraph> falls within the currently selected set of POD sections
and subsections to be printed or processed. This method is also
responsible for keeping track of the current input section and
subsections. It is assumed that C<$paragraph> is the most recently read
(but not yet processed) input paragraph.
The value returned will be true if the C<$paragraph> and the rest of the
text in the same section as C<$paragraph> should be selected (included)
for processing; otherwise a false value is returned.
=cut
sub is_selected {
my ($self, $paragraph) = @_;
local $_;
local *myData = $self;
$self->_init_headings() unless (defined $myData{_SECTION_HEADINGS});
## Keep track of current sections levels and headings
$_ = $paragraph;
if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*?)\s*$/)
{
## This is a section heading command
my ($level, $heading) = ($2, $3);
$level = 1 + (length($1) / 3) if ((! length $level) || (length $1));
## Reset the current section heading at this level
$myData{_SECTION_HEADINGS}->[$level - 1] = $heading;
## Reset subsection headings of this one to empty
for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) {
$myData{_SECTION_HEADINGS}->[$i] = '';
}
}
return $self->match_section();
}
#############################################################################
=head1 EXPORTED FUNCTIONS
The following functions are exported by this module. Please note that
these are functions (not methods) and therefore C<do not> take an
implicit first argument.
=cut
##---------------------------------------------------------------------------
=head1 B<podselect()>
podselect(\%options,@filelist);
B<podselect> will print the raw (untranslated) POD paragraphs of all
POD sections in the given input files specified by C<@filelist>
according to the options given in C<\%options>.
If any argument to B<podselect> is a reference to a hash
(associative array) then the values with the following keys are
processed as follows:
=over 4
=item B<-output>
A string corresponding to the desired output file (or ">&STDOUT"
or ">&STDERR"), or a filehandle to write on. The default is to use
standard output.
=item B<-sections>
A reference to an array of sections specifications (as described in
L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD
sections and subsections to be selected from input. If no section
specifications are given, then all sections of the PODs are used.
=begin _NOT_IMPLEMENTED_
=item B<-ranges>
A reference to an array of range specifications (as described in
L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD
paragraphs to be selected from the desired input sections. If no range
specifications are given, then all paragraphs of the desired sections
are used.
=end _NOT_IMPLEMENTED_
=back
All other arguments are optional and should correspond to filehandles to
read from or the names of input files containing POD sections. A file name
of "", "-" or "<&STDIN" will be interpreted to mean standard input (which
is the default if no arguments are given).
=cut
sub podselect {
my(@argv) = @_;
my %defaults = ();
my $pod_parser = new Pod::Select(%defaults);
my $num_inputs = 0;
my $output = '>&STDOUT';
my %opts;
local $_;
for (@argv) {
my $ref = ref($_);
if ($ref && $ref eq 'HASH') {
%opts = (%defaults, %{$_});
##-------------------------------------------------------------
## Need this for backward compatibility since we formerly used
## options that were all uppercase words rather than ones that
## looked like Unix command-line options.
## to be uppercase keywords)
##-------------------------------------------------------------
%opts = map {
my ($key, $val) = (lc $_, $opts{$_});
$key =~ s/^(?=\w)/-/;
$key =~ /^-se[cl]/ and $key = '-sections';
#! $key eq '-range' and $key .= 's';
($key => $val);
} (keys %opts);
## Process the options
(exists $opts{'-output'}) and $output = $opts{'-output'};
## Select the desired sections
$pod_parser->select(@{ $opts{'-sections'} })
if ( (defined $opts{'-sections'})
&& ((ref $opts{'-sections'}) eq 'ARRAY') );
#! ## Select the desired paragraph ranges
#! $pod_parser->select(@{ $opts{'-ranges'} })
#! if ( (defined $opts{'-ranges'})
#! && ((ref $opts{'-ranges'}) eq 'ARRAY') );
}
elsif(!$ref || $ref eq 'GLOB') {
$pod_parser->parse_from_file($_, $output);
++$num_inputs;
}
else {
croak "Input from $ref reference not supported!\n";
}
}
$pod_parser->parse_from_file('-') unless ($num_inputs > 0);
}
#############################################################################
=head1 PRIVATE METHODS AND DATA
B<Pod::Select> makes uses a number of internal methods and data fields
which clients should not need to see or use. For the sake of avoiding
name collisions with client data and methods, these methods and fields
are briefly discussed here. Determined hackers may obtain further
information about them by reading the B<Pod::Select> source code.
Private data fields are stored in the hash-object whose reference is
returned by the B<new()> constructor for this class. The names of all
private methods and data-fields used by B<Pod::Select> begin with a
prefix of "_" and match the regular expression C</^_\w+$/>.
=cut
##---------------------------------------------------------------------------
=begin _PRIVATE_
=head1 B<_compile_section_spec()>
$listref = $parser->_compile_section_spec($section_spec);
This function (note it is a function and I<not> a method) takes a
section specification (as described in L<"SECTION SPECIFICATIONS">)
given in C<$section_sepc>, and compiles it into a list of regular
expressions. If C<$section_spec> has no syntax errors, then a reference
to the list (array) of corresponding regular expressions is returned;
otherwise C<undef> is returned and an error message is printed (using
B<carp>) for each invalid regex.
=end _PRIVATE_
=cut
sub _compile_section_spec {
my ($section_spec) = @_;
my (@regexs, $negated);
## Compile the spec into a list of regexs
local $_ = $section_spec;
s{\\\\}{\001}g; ## handle escaped backward slashes
s{\\/}{\002}g; ## handle escaped forward slashes
## Parse the regexs for the heading titles
@regexs = split(/\//, $_, $MAX_HEADING_LEVEL);
## Set default regex for omitted levels
for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
$regexs[$i] = '.*' unless ((defined $regexs[$i])
&& (length $regexs[$i]));
}
## Modify the regexs as needed and validate their syntax
my $bad_regexs = 0;
for (@regexs) {
$_ .= '.+' if ($_ eq '!');
s{\001}{\\\\}g; ## restore escaped backward slashes
s{\002}{\\/}g; ## restore escaped forward slashes
$negated = s/^\!//; ## check for negation
eval "m{$_}"; ## check regex syntax
if ($@) {
++$bad_regexs;
carp qq{Bad regular expression /$_/ in "$section_spec": $@\n};
}
else {
## Add the forward and rear anchors (and put the negator back)
$_ = '^' . $_ unless (/^\^/);
$_ = $_ . '$' unless (/\$$/);
$_ = '!' . $_ if ($negated);
}
}
return (! $bad_regexs) ? [ @regexs ] : undef;
}
##---------------------------------------------------------------------------
=begin _PRIVATE_
=head2 $self->{_SECTION_HEADINGS}
A reference to an array of the current section heading titles for each
heading level (note that the first heading level title is at index 0).
=end _PRIVATE_
=cut
##---------------------------------------------------------------------------
=begin _PRIVATE_
=head2 $self->{_SELECTED_SECTIONS}
A reference to an array of references to arrays. Each subarray is a list
of anchored regular expressions (preceded by a "!" if the expression is to
be negated). The index of the expression in the subarray should correspond
to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}>
that it is to be matched against.
=end _PRIVATE_
=cut
#############################################################################
=head1 SEE ALSO
L<Pod::Parser>
=head1 AUTHOR
Please report bugs using L<http://rt.cpan.org>.
Brad Appleton E<lt>bradapp@enteract.comE<gt>
Based on code for B<pod2text> written by
Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
B<Pod::Select> is part of the L<Pod::Parser> distribution.
=cut
1;
# vim: ts=4 sw=4 et
PlainText.pm 0000644 00000061760 15051135563 0007030 0 ustar 00 # Pod::PlainText -- Convert POD data to formatted ASCII text.
# $Id: Text.pm,v 2.1 1999/09/20 11:53:33 eagle Exp $
#
# Copyright 1999-2000 by Russ Allbery <rra@stanford.edu>
#
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
#
# This module is intended to be a replacement for Pod::Text, and attempts to
# match its output except for some specific circumstances where other
# decisions seemed to produce better output. It uses Pod::Parser and is
# designed to be very easy to subclass.
############################################################################
# Modules and declarations
############################################################################
package Pod::PlainText;
use strict;
require 5.005;
use Carp qw(carp croak);
use Pod::Select ();
use vars qw(@ISA %ESCAPES $VERSION);
# We inherit from Pod::Select instead of Pod::Parser so that we can be used
# by Pod::Usage.
@ISA = qw(Pod::Select);
$VERSION = '2.07';
BEGIN {
if ($] < 5.006) {
require Symbol;
import Symbol;
}
}
############################################################################
# Table of supported E<> escapes
############################################################################
# This table is taken near verbatim from Pod::PlainText in Pod::Parser,
# which got it near verbatim from the original Pod::Text. It is therefore
# credited to Tom Christiansen, and I'm glad I didn't have to write it. :)
%ESCAPES = (
'amp' => '&', # ampersand
'lt' => '<', # left chevron, less-than
'gt' => '>', # right chevron, greater-than
'quot' => '"', # double quote
"Aacute" => "\xC1", # capital A, acute accent
"aacute" => "\xE1", # small a, acute accent
"Acirc" => "\xC2", # capital A, circumflex accent
"acirc" => "\xE2", # small a, circumflex accent
"AElig" => "\xC6", # capital AE diphthong (ligature)
"aelig" => "\xE6", # small ae diphthong (ligature)
"Agrave" => "\xC0", # capital A, grave accent
"agrave" => "\xE0", # small a, grave accent
"Aring" => "\xC5", # capital A, ring
"aring" => "\xE5", # small a, ring
"Atilde" => "\xC3", # capital A, tilde
"atilde" => "\xE3", # small a, tilde
"Auml" => "\xC4", # capital A, dieresis or umlaut mark
"auml" => "\xE4", # small a, dieresis or umlaut mark
"Ccedil" => "\xC7", # capital C, cedilla
"ccedil" => "\xE7", # small c, cedilla
"Eacute" => "\xC9", # capital E, acute accent
"eacute" => "\xE9", # small e, acute accent
"Ecirc" => "\xCA", # capital E, circumflex accent
"ecirc" => "\xEA", # small e, circumflex accent
"Egrave" => "\xC8", # capital E, grave accent
"egrave" => "\xE8", # small e, grave accent
"ETH" => "\xD0", # capital Eth, Icelandic
"eth" => "\xF0", # small eth, Icelandic
"Euml" => "\xCB", # capital E, dieresis or umlaut mark
"euml" => "\xEB", # small e, dieresis or umlaut mark
"Iacute" => "\xCD", # capital I, acute accent
"iacute" => "\xED", # small i, acute accent
"Icirc" => "\xCE", # capital I, circumflex accent
"icirc" => "\xEE", # small i, circumflex accent
"Igrave" => "\xCD", # capital I, grave accent
"igrave" => "\xED", # small i, grave accent
"Iuml" => "\xCF", # capital I, dieresis or umlaut mark
"iuml" => "\xEF", # small i, dieresis or umlaut mark
"Ntilde" => "\xD1", # capital N, tilde
"ntilde" => "\xF1", # small n, tilde
"Oacute" => "\xD3", # capital O, acute accent
"oacute" => "\xF3", # small o, acute accent
"Ocirc" => "\xD4", # capital O, circumflex accent
"ocirc" => "\xF4", # small o, circumflex accent
"Ograve" => "\xD2", # capital O, grave accent
"ograve" => "\xF2", # small o, grave accent
"Oslash" => "\xD8", # capital O, slash
"oslash" => "\xF8", # small o, slash
"Otilde" => "\xD5", # capital O, tilde
"otilde" => "\xF5", # small o, tilde
"Ouml" => "\xD6", # capital O, dieresis or umlaut mark
"ouml" => "\xF6", # small o, dieresis or umlaut mark
"szlig" => "\xDF", # small sharp s, German (sz ligature)
"THORN" => "\xDE", # capital THORN, Icelandic
"thorn" => "\xFE", # small thorn, Icelandic
"Uacute" => "\xDA", # capital U, acute accent
"uacute" => "\xFA", # small u, acute accent
"Ucirc" => "\xDB", # capital U, circumflex accent
"ucirc" => "\xFB", # small u, circumflex accent
"Ugrave" => "\xD9", # capital U, grave accent
"ugrave" => "\xF9", # small u, grave accent
"Uuml" => "\xDC", # capital U, dieresis or umlaut mark
"uuml" => "\xFC", # small u, dieresis or umlaut mark
"Yacute" => "\xDD", # capital Y, acute accent
"yacute" => "\xFD", # small y, acute accent
"yuml" => "\xFF", # small y, dieresis or umlaut mark
"lchevron" => "\xAB", # left chevron (double less than)
"rchevron" => "\xBB", # right chevron (double greater than)
);
############################################################################
# Initialization
############################################################################
# Initialize the object. Must be sure to call our parent initializer.
sub initialize {
my $self = shift;
$$self{alt} = 0 unless defined $$self{alt};
$$self{indent} = 4 unless defined $$self{indent};
$$self{loose} = 0 unless defined $$self{loose};
$$self{sentence} = 0 unless defined $$self{sentence};
$$self{width} = 76 unless defined $$self{width};
$$self{INDENTS} = []; # Stack of indentations.
$$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
return $self->SUPER::initialize;
}
############################################################################
# Core overrides
############################################################################
# Called for each command paragraph. Gets the command, the associated
# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
# the command to a method named the same as the command. =cut is handled
# internally by Pod::Parser.
sub command {
my $self = shift;
my $command = shift;
return if $command eq 'pod';
return if ($$self{EXCLUDE} && $command ne 'end');
if (defined $$self{ITEM}) {
$self->item ("\n");
local $_ = "\n";
$self->output($_) if($command eq 'back');
}
$command = 'cmd_' . $command;
return $self->$command (@_);
}
# Called for a verbatim paragraph. Gets the paragraph, the line number, and
# a Pod::Paragraph object. Just output it verbatim, but with tabs converted
# to spaces.
sub verbatim {
my $self = shift;
return if $$self{EXCLUDE};
$self->item if defined $$self{ITEM};
local $_ = shift;
return if /^\s*$/;
s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
return $self->output($_);
}
# Called for a regular text block. Gets the paragraph, the line number, and
# a Pod::Paragraph object. Perform interpolation and output the results.
sub textblock {
my $self = shift;
return if $$self{EXCLUDE};
if($$self{VERBATIM}) {
$self->output($_[0]);
return;
}
local $_ = shift;
my $line = shift;
# Perform a little magic to collapse multiple L<> references. This is
# here mostly for backwards-compatibility. We'll just rewrite the whole
# thing into actual text at this part, bypassing the whole internal
# sequence parsing thing.
s{
(
L< # A link of the form L</something>.
/
(
[:\w]+ # The item has to be a simple word...
(\(\))? # ...or simple function.
)
>
(
,?\s+(and\s+)? # Allow lots of them, conjuncted.
L<
/
(
[:\w]+
(\(\))?
)
>
)+
)
} {
local $_ = $1;
s%L</([^>]+)>%$1%g;
my @items = split /(?:,?\s+(?:and\s+)?)/;
my $string = "the ";
my $i;
for ($i = 0; $i < @items; $i++) {
$string .= $items[$i];
$string .= ", " if @items > 2 && $i != $#items;
$string .= " and " if ($i == $#items - 1);
}
$string .= " entries elsewhere in this document";
$string;
}gex;
# Now actually interpolate and output the paragraph.
$_ = $self->interpolate ($_, $line);
s/\s*$/\n/s;
if (defined $$self{ITEM}) {
$self->item ($_ . "\n");
} else {
$self->output ($self->reformat ($_ . "\n"));
}
}
# Called for an interior sequence. Gets the command, argument, and a
# Pod::InteriorSequence object and is expected to return the resulting text.
# Calls code, bold, italic, file, and link to handle those types of
# sequences, and handles S<>, E<>, X<>, and Z<> directly.
sub interior_sequence {
my $self = shift;
my $command = shift;
local $_ = shift;
return '' if ($command eq 'X' || $command eq 'Z');
# Expand escapes into the actual character now, carping if invalid.
if ($command eq 'E') {
return $ESCAPES{$_} if defined $ESCAPES{$_};
carp "Unknown escape: E<$_>";
return "E<$_>";
}
# For all the other sequences, empty content produces no output.
return if $_ eq '';
# For S<>, compress all internal whitespace and then map spaces to \01.
# When we output the text, we'll map this back.
if ($command eq 'S') {
s/\s{2,}/ /g;
tr/ /\01/;
return $_;
}
# Anything else needs to get dispatched to another method.
if ($command eq 'B') { return $self->seq_b ($_) }
elsif ($command eq 'C') { return $self->seq_c ($_) }
elsif ($command eq 'F') { return $self->seq_f ($_) }
elsif ($command eq 'I') { return $self->seq_i ($_) }
elsif ($command eq 'L') { return $self->seq_l ($_) }
else { carp "Unknown sequence $command<$_>" }
}
# Called for each paragraph that's actually part of the POD. We take
# advantage of this opportunity to untabify the input.
sub preprocess_paragraph {
my $self = shift;
local $_ = shift;
1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
return $_;
}
############################################################################
# Command paragraphs
############################################################################
# All command paragraphs take the paragraph and the line number.
# First level heading.
sub cmd_head1 {
my $self = shift;
local $_ = shift;
s/\s+$//s;
$_ = $self->interpolate ($_, shift);
if ($$self{alt}) {
$self->output ("\n==== $_ ====\n\n");
} else {
$_ .= "\n" if $$self{loose};
$self->output ($_ . "\n");
}
}
# Second level heading.
sub cmd_head2 {
my $self = shift;
local $_ = shift;
s/\s+$//s;
$_ = $self->interpolate ($_, shift);
if ($$self{alt}) {
$self->output ("\n== $_ ==\n\n");
} else {
$_ .= "\n" if $$self{loose};
$self->output (' ' x ($$self{indent} / 2) . $_ . "\n");
}
}
# third level heading - not strictly perlpodspec compliant
sub cmd_head3 {
my $self = shift;
local $_ = shift;
s/\s+$//s;
$_ = $self->interpolate ($_, shift);
if ($$self{alt}) {
$self->output ("\n= $_ =\n");
} else {
$_ .= "\n" if $$self{loose};
$self->output (' ' x ($$self{indent}) . $_ . "\n");
}
}
# fourth level heading - not strictly perlpodspec compliant
# just like head3
*cmd_head4 = \&cmd_head3;
# Start a list.
sub cmd_over {
my $self = shift;
local $_ = shift;
unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
push (@{ $$self{INDENTS} }, $$self{MARGIN});
$$self{MARGIN} += ($_ + 0);
}
# End a list.
sub cmd_back {
my $self = shift;
$$self{MARGIN} = pop @{ $$self{INDENTS} };
unless (defined $$self{MARGIN}) {
carp 'Unmatched =back';
$$self{MARGIN} = $$self{indent};
}
}
# An individual list item.
sub cmd_item {
my $self = shift;
if (defined $$self{ITEM}) { $self->item }
local $_ = shift;
s/\s+$//s;
$$self{ITEM} = $self->interpolate ($_);
}
# Begin a block for a particular translator. Setting VERBATIM triggers
# special handling in textblock().
sub cmd_begin {
my $self = shift;
local $_ = shift;
my ($kind) = /^(\S+)/ or return;
if ($kind eq 'text') {
$$self{VERBATIM} = 1;
} else {
$$self{EXCLUDE} = 1;
}
}
# End a block for a particular translator. We assume that all =begin/=end
# pairs are properly closed.
sub cmd_end {
my $self = shift;
$$self{EXCLUDE} = 0;
$$self{VERBATIM} = 0;
}
# One paragraph for a particular translator. Ignore it unless it's intended
# for text, in which case we treat it as a verbatim text block.
sub cmd_for {
my $self = shift;
local $_ = shift;
my $line = shift;
return unless s/^text\b[ \t]*\r?\n?//;
$self->verbatim ($_, $line);
}
# just a dummy method for the time being
sub cmd_encoding {
return;
}
############################################################################
# Interior sequences
############################################################################
# The simple formatting ones. These are here mostly so that subclasses can
# override them and do more complicated things.
sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" }
sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
sub seq_i { return '*' . $_[1] . '*' }
# The complicated one. Handle links. Since this is plain text, we can't
# actually make any real links, so this is all to figure out what text we
# print out.
sub seq_l {
my $self = shift;
local $_ = shift;
# Smash whitespace in case we were split across multiple lines.
s/\s+/ /g;
# If we were given any explicit text, just output it.
if (/^([^|]+)\|/) { return $1 }
# Okay, leading and trailing whitespace isn't important; get rid of it.
s/^\s+//;
s/\s+$//;
# Default to using the whole content of the link entry as a section
# name. Note that L<manpage/> forces a manpage interpretation, as does
# something looking like L<manpage(section)>. The latter is an
# enhancement over the original Pod::Text.
my ($manpage, $section) = ('', $_);
if (/^(?:https?|ftp|news):/) {
# a URL
return $_;
} elsif (/^"\s*(.*?)\s*"$/) {
$section = '"' . $1 . '"';
} elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
($manpage, $section) = ($_, '');
} elsif (m{/}) {
($manpage, $section) = split (/\s*\/\s*/, $_, 2);
}
my $text = '';
# Now build the actual output text.
if (!length $section) {
$text = "the $manpage manpage" if length $manpage;
} elsif ($section =~ /^[:\w]+(?:\(\))?/) {
$text .= 'the ' . $section . ' entry';
$text .= (length $manpage) ? " in the $manpage manpage"
: ' elsewhere in this document';
} else {
$section =~ s/^\"\s*//;
$section =~ s/\s*\"$//;
$text .= 'the section on "' . $section . '"';
$text .= " in the $manpage manpage" if length $manpage;
}
return $text;
}
############################################################################
# List handling
############################################################################
# This method is called whenever an =item command is complete (in other
# words, we've seen its associated paragraph or know for certain that it
# doesn't have one). It gets the paragraph associated with the item as an
# argument. If that argument is empty, just output the item tag; if it
# contains a newline, output the item tag followed by the newline.
# Otherwise, see if there's enough room for us to output the item tag in the
# margin of the text or if we have to put it on a separate line.
sub item {
my $self = shift;
local $_ = shift;
my $tag = $$self{ITEM};
unless (defined $tag) {
carp 'item called without tag';
return;
}
undef $$self{ITEM};
my $indent = $$self{INDENTS}[-1];
unless (defined $indent) { $indent = $$self{indent} }
my $space = ' ' x $indent;
$space =~ s/^ /:/ if $$self{alt};
if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
my $margin = $$self{MARGIN};
$$self{MARGIN} = $indent;
my $output = $self->reformat ($tag);
$output =~ s/[\r\n]*$/\n/;
$self->output ($output);
$$self{MARGIN} = $margin;
$self->output ($self->reformat ($_)) if /\S/;
} else {
$_ = $self->reformat ($_);
s/^ /:/ if ($$self{alt} && $indent > 0);
my $tagspace = ' ' x length $tag;
s/^($space)$tagspace/$1$tag/ or carp 'Bizarre space in item';
$self->output ($_);
}
}
############################################################################
# Output formatting
############################################################################
# Wrap a line, indenting by the current left margin. We can't use
# Text::Wrap because it plays games with tabs. We can't use formline, even
# though we'd really like to, because it screws up non-printing characters.
# So we have to do the wrapping ourselves.
sub wrap {
my $self = shift;
local $_ = shift;
my $output = '';
my $spaces = ' ' x $$self{MARGIN};
my $width = $$self{width} - $$self{MARGIN};
while (length > $width) {
if (s/^([^\r\n]{0,$width})\s+// || s/^([^\r\n]{$width})//) {
$output .= $spaces . $1 . "\n";
} else {
last;
}
}
$output .= $spaces . $_;
$output =~ s/\s+$/\n\n/;
return $output;
}
# Reformat a paragraph of text for the current margin. Takes the text to
# reformat and returns the formatted text.
sub reformat {
my $self = shift;
local $_ = shift;
# If we're trying to preserve two spaces after sentences, do some
# munging to support that. Otherwise, smash all repeated whitespace.
if ($$self{sentence}) {
s/ +$//mg;
s/\.\r?\n/. \n/g;
s/[\r\n]+/ /g;
s/ +/ /g;
} else {
s/\s+/ /g;
}
return $self->wrap($_);
}
# Output text to the output device.
sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
############################################################################
# Backwards compatibility
############################################################################
# The old Pod::Text module did everything in a pod2text() function. This
# tries to provide the same interface for legacy applications.
sub pod2text {
my @args;
# This is really ugly; I hate doing option parsing in the middle of a
# module. But the old Pod::Text module supported passing flags to its
# entry function, so handle -a and -<number>.
while ($_[0] =~ /^-/) {
my $flag = shift;
if ($flag eq '-a') { push (@args, alt => 1) }
elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
else {
unshift (@_, $flag);
last;
}
}
# Now that we know what arguments we're using, create the parser.
my $parser = Pod::PlainText->new (@args);
# If two arguments were given, the second argument is going to be a file
# handle. That means we want to call parse_from_filehandle(), which
# means we need to turn the first argument into a file handle. Magic
# open will handle the <&STDIN case automagically.
if (defined $_[1]) {
my $infh;
if ($] < 5.006) {
$infh = gensym();
}
unless (open ($infh, $_[0])) {
croak ("Can't open $_[0] for reading: $!\n");
}
$_[0] = $infh;
return $parser->parse_from_filehandle (@_);
} else {
return $parser->parse_from_file (@_);
}
}
############################################################################
# Module return value and documentation
############################################################################
1;
__END__
=head1 NAME
Pod::PlainText - Convert POD data to formatted ASCII text
=head1 SYNOPSIS
use Pod::PlainText;
my $parser = Pod::PlainText->new (sentence => 0, width => 78);
# Read POD from STDIN and write to STDOUT.
$parser->parse_from_filehandle;
# Read POD from file.pod and write to file.txt.
$parser->parse_from_file ('file.pod', 'file.txt');
=head1 DESCRIPTION
B<NOTE: This module is considered legacy; modern Perl releases (5.18 and
higher) are going to remove Pod-Parser from core and use L<Pod-Simple>
for all things POD.>
Pod::PlainText is a module that can convert documentation in the POD format (the
preferred language for documenting Perl) into formatted ASCII. It uses no
special formatting controls or codes whatsoever, and its output is therefore
suitable for nearly any device.
As a derived class from Pod::Parser, Pod::PlainText supports the same methods and
interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
new parser with C<Pod::PlainText-E<gt>new()> and then calls either
parse_from_filehandle() or parse_from_file().
new() can take options, in the form of key/value pairs, that control the
behavior of the parser. The currently recognized options are:
=over 4
=item alt
If set to a true value, selects an alternate output format that, among other
things, uses a different heading style and marks C<=item> entries with a
colon in the left margin. Defaults to false.
=item indent
The number of spaces to indent regular text, and the default indentation for
C<=over> blocks. Defaults to 4.
=item loose
If set to a true value, a blank line is printed after a C<=headN> headings.
If set to false (the default), no blank line is printed after C<=headN>.
This is the default because it's the expected formatting for manual pages;
if you're formatting arbitrary text documents, setting this to true may
result in more pleasing output.
=item sentence
If set to a true value, Pod::PlainText will assume that each sentence ends in two
spaces, and will try to preserve that spacing. If set to false, all
consecutive whitespace in non-verbatim paragraphs is compressed into a
single space. Defaults to true.
=item width
The column at which to wrap text on the right-hand side. Defaults to 76.
=back
The standard Pod::Parser method parse_from_filehandle() takes up to two
arguments, the first being the file handle to read POD from and the second
being the file handle to write the formatted output to. The first defaults
to STDIN if not given, and the second defaults to STDOUT. The method
parse_from_file() is almost identical, except that its two arguments are the
input and output disk files instead. See L<Pod::Parser> for the specific
details.
=head1 DIAGNOSTICS
=over 4
=item Bizarre space in item
(W) Something has gone wrong in internal C<=item> processing. This message
indicates a bug in Pod::PlainText; you should never see it.
=item Can't open %s for reading: %s
(F) Pod::PlainText was invoked via the compatibility mode pod2text() interface
and the input file it was given could not be opened.
=item Unknown escape: %s
(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::PlainText didn't
know about.
=item Unknown sequence: %s
(W) The POD source contained a non-standard internal sequence (something of
the form C<XE<lt>E<gt>>) that Pod::PlainText didn't know about.
=item Unmatched =back
(W) Pod::PlainText encountered a C<=back> command that didn't correspond to an
C<=over> command.
=back
=head1 RESTRICTIONS
Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
output, due to an internal implementation detail.
=head1 NOTES
This is a replacement for an earlier Pod::Text module written by Tom
Christiansen. It has a revamped interface, since it now uses Pod::Parser,
but an interface roughly compatible with the old Pod::Text::pod2text()
function is still available. Please change to the new calling convention,
though.
The original Pod::Text contained code to do formatting via termcap
sequences, although it wasn't turned on by default and it was problematic to
get it to work at all. This rewrite doesn't even try to do that, but a
subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
=head1 SEE ALSO
B<Pod::PlainText> is part of the L<Pod::Parser> distribution.
L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
pod2text(1)
=head1 AUTHOR
Please report bugs using L<http://rt.cpan.org>.
Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
its conversion to Pod::Parser by Brad Appleton
E<lt>bradapp@enteract.comE<gt>.
=cut
ParseLink.pm 0000644 00000014373 15051135563 0007006 0 ustar 00 # Parse an L<> formatting code in POD text.
#
# This module implements parsing of the text of an L<> formatting code as
# defined in perlpodspec. It should be suitable for any POD formatter. It
# exports only one function, parselink(), which returns the five-item parse
# defined in perlpodspec.
#
# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
##############################################################################
# Modules and declarations
##############################################################################
package Pod::ParseLink;
use 5.006;
use strict;
use warnings;
use vars qw(@EXPORT @ISA $VERSION);
use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(parselink);
$VERSION = '4.11';
##############################################################################
# Implementation
##############################################################################
# Parse the name and section portion of a link into a name and section.
sub _parse_section {
my ($link) = @_;
$link =~ s/^\s+//;
$link =~ s/\s+$//;
# If the whole link is enclosed in quotes, interpret it all as a section
# even if it contains a slash.
return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/);
# Split into page and section on slash, and then clean up quoting in the
# section. If there is no section and the name contains spaces, also
# guess that it's an old section link.
my ($page, $section) = split (/\s*\/\s*/, $link, 2);
$section =~ s/^"\s*(.*?)\s*"$/$1/ if $section;
if ($page && $page =~ / / && !defined ($section)) {
$section = $page;
$page = undef;
} else {
$page = undef unless $page;
$section = undef unless $section;
}
return ($page, $section);
}
# Infer link text from the page and section.
sub _infer_text {
my ($page, $section) = @_;
my $inferred;
if ($page && !$section) {
$inferred = $page;
} elsif (!$page && $section) {
$inferred = '"' . $section . '"';
} elsif ($page && $section) {
$inferred = '"' . $section . '" in ' . $page;
}
return $inferred;
}
# Given the contents of an L<> formatting code, parse it into the link text,
# the possibly inferred link text, the name or URL, the section, and the type
# of link (pod, man, or url).
sub parselink {
my ($link) = @_;
$link =~ s/\s+/ /g;
my $text;
if ($link =~ /\|/) {
($text, $link) = split (/\|/, $link, 2);
}
if ($link =~ /\A\w+:[^:\s]\S*\Z/) {
my $inferred;
if (defined ($text) && length ($text) > 0) {
return ($text, $text, $link, undef, 'url');
} else {
return ($text, $link, $link, undef, 'url');
}
} else {
my ($name, $section) = _parse_section ($link);
my $inferred;
if (defined ($text) && length ($text) > 0) {
$inferred = $text;
} else {
$inferred = _infer_text ($name, $section);
}
my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod';
return ($text, $inferred, $name, $section, $type);
}
}
##############################################################################
# Module return value and documentation
##############################################################################
# Ensure we evaluate to true.
1;
__END__
=for stopwords
markup Allbery URL
=head1 NAME
Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text
=head1 SYNOPSIS
use Pod::ParseLink;
my $link = get_link();
my ($text, $inferred, $name, $section, $type) = parselink($link);
=head1 DESCRIPTION
This module only provides a single function, parselink(), which takes the
text of an LE<lt>E<gt> formatting code and parses it. It returns the
anchor text for the link (if any was given), the anchor text possibly
inferred from the name and section, the name or URL, the section if any,
and the type of link. The type will be one of C<url>, C<pod>, or C<man>,
indicating a URL, a link to a POD page, or a link to a Unix manual page.
Parsing is implemented per L<perlpodspec>. For backward compatibility,
links where there is no section and name contains spaces, or links where the
entirety of the link (except for the anchor text if given) is enclosed in
double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>).
The inferred anchor text is implemented per L<perlpodspec>:
L<name> => L<name|name>
L</section> => L<"section"|/section>
L<name/section> => L<"section" in name|name/section>
The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes,
and the section, anchor text, and inferred anchor text may contain any
formatting codes. Any double quotes around the section are removed as part
of the parsing, as is any leading or trailing whitespace.
If the text of the LE<lt>E<gt> escape is entirely enclosed in double
quotes, it's interpreted as a link to a section for backward
compatibility.
No attempt is made to resolve formatting codes. This must be done after
calling parselink() (since EE<lt>E<gt> formatting codes can be used to
escape characters that would otherwise be significant to the parser and
resolving them before parsing would result in an incorrect parse of a
formatting code like:
L<verticalE<verbar>barE<sol>slash>
which should be interpreted as a link to the C<vertical|bar/slash> POD page
and not as a link to the C<slash> section of the C<bar> POD page with an
anchor text of C<vertical>. Note that not only the anchor text will need to
have formatting codes expanded, but so will the target of the link (to deal
with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of
the section may be necessary depending on whether the translator wants to
consider markup in sections to be significant when resolving links. See
L<perlpodspec> for more information.
=head1 AUTHOR
Russ Allbery <rra@cpan.org>.
=head1 COPYRIGHT AND LICENSE
Copyright 2001, 2008, 2009, 2014, 2018 Russ Allbery <rra@cpan.org>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
=head1 SEE ALSO
L<Pod::Parser>
The current version of this module is always available from its web site at
L<https://www.eyrie.org/~eagle/software/podlators/>.
=cut
# Local Variables:
# copyright-at-end-flag: t
# End:
InputObjects.pm 0000644 00000065572 15051135563 0007536 0 ustar 00 #############################################################################
# Pod/InputObjects.pm -- package which defines objects for input streams
# and paragraphs and commands when parsing POD docs.
#
# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
#############################################################################
package Pod::InputObjects;
use strict;
use vars qw($VERSION);
$VERSION = '1.63'; ## Current version of this package
require 5.005; ## requires this Perl version or later
#############################################################################
=head1 NAME
Pod::InputObjects - objects representing POD input paragraphs, commands, etc.
=head1 SYNOPSIS
use Pod::InputObjects;
=head1 REQUIRES
perl5.004, Carp
=head1 EXPORTS
Nothing.
=head1 DESCRIPTION
B<NOTE: This module is considered legacy; modern Perl releases (5.18 and
higher) are going to remove Pod-Parser from core and use L<Pod-Simple>
for all things POD.>
This module defines some basic input objects used by B<Pod::Parser> when
reading and parsing POD text from an input source. The following objects
are defined:
=begin __PRIVATE__
=over 4
=item package B<Pod::InputSource>
An object corresponding to a source of POD input text. It is mostly a
wrapper around a filehandle or C<IO::Handle>-type object (or anything
that implements the C<getline()> method) which keeps track of some
additional information relevant to the parsing of PODs.
=back
=end __PRIVATE__
=over 4
=item package B<Pod::Paragraph>
An object corresponding to a paragraph of POD input text. It may be a
plain paragraph, a verbatim paragraph, or a command paragraph (see
L<perlpod>).
=item package B<Pod::InteriorSequence>
An object corresponding to an interior sequence command from the POD
input text (see L<perlpod>).
=item package B<Pod::ParseTree>
An object corresponding to a tree of parsed POD text. Each "node" in
a parse-tree (or I<ptree>) is either a text-string or a reference to
a B<Pod::InteriorSequence> object. The nodes appear in the parse-tree
in the order in which they were parsed from left-to-right.
=back
Each of these input objects are described in further detail in the
sections which follow.
=cut
#############################################################################
package Pod::InputSource;
##---------------------------------------------------------------------------
=begin __PRIVATE__
=head1 B<Pod::InputSource>
This object corresponds to an input source or stream of POD
documentation. When parsing PODs, it is necessary to associate and store
certain context information with each input source. All of this
information is kept together with the stream itself in one of these
C<Pod::InputSource> objects. Each such object is merely a wrapper around
an C<IO::Handle> object of some kind (or at least something that
implements the C<getline()> method). They have the following
methods/attributes:
=end __PRIVATE__
=cut
##---------------------------------------------------------------------------
=begin __PRIVATE__
=head2 B<new()>
my $pod_input1 = Pod::InputSource->new(-handle => $filehandle);
my $pod_input2 = new Pod::InputSource(-handle => $filehandle,
-name => $name);
my $pod_input3 = new Pod::InputSource(-handle => \*STDIN);
my $pod_input4 = Pod::InputSource->new(-handle => \*STDIN,
-name => "(STDIN)");
This is a class method that constructs a C<Pod::InputSource> object and
returns a reference to the new input source object. It takes one or more
keyword arguments in the form of a hash. The keyword C<-handle> is
required and designates the corresponding input handle. The keyword
C<-name> is optional and specifies the name associated with the input
handle (typically a file name).
=end __PRIVATE__
=cut
sub new {
## Determine if we were called via an object-ref or a classname
my $this = shift;
my $class = ref($this) || $this;
## Any remaining arguments are treated as initial values for the
## hash that is used to represent this object. Note that we default
## certain values by specifying them *before* the arguments passed.
## If they are in the argument list, they will override the defaults.
my $self = { -name => '(unknown)',
-handle => undef,
-was_cutting => 0,
@_ };
## Bless ourselves into the desired class and perform any initialization
bless $self, $class;
return $self;
}
##---------------------------------------------------------------------------
=begin __PRIVATE__
=head2 B<name()>
my $filename = $pod_input->name();
$pod_input->name($new_filename_to_use);
This method gets/sets the name of the input source (usually a filename).
If no argument is given, it returns a string containing the name of
the input source; otherwise it sets the name of the input source to the
contents of the given argument.
=end __PRIVATE__
=cut
sub name {
(@_ > 1) and $_[0]->{'-name'} = $_[1];
return $_[0]->{'-name'};
}
## allow 'filename' as an alias for 'name'
*filename = \&name;
##---------------------------------------------------------------------------
=begin __PRIVATE__
=head2 B<handle()>
my $handle = $pod_input->handle();
Returns a reference to the handle object from which input is read (the
one used to contructed this input source object).
=end __PRIVATE__
=cut
sub handle {
return $_[0]->{'-handle'};
}
##---------------------------------------------------------------------------
=begin __PRIVATE__
=head2 B<was_cutting()>
print "Yes.\n" if ($pod_input->was_cutting());
The value of the C<cutting> state (that the B<cutting()> method would
have returned) immediately before any input was read from this input
stream. After all input from this stream has been read, the C<cutting>
state is restored to this value.
=end __PRIVATE__
=cut
sub was_cutting {
(@_ > 1) and $_[0]->{-was_cutting} = $_[1];
return $_[0]->{-was_cutting};
}
##---------------------------------------------------------------------------
#############################################################################
package Pod::Paragraph;
##---------------------------------------------------------------------------
=head1 B<Pod::Paragraph>
An object representing a paragraph of POD input text.
It has the following methods/attributes:
=cut
##---------------------------------------------------------------------------
=head2 Pod::Paragraph-E<gt>B<new()>
my $pod_para1 = Pod::Paragraph->new(-text => $text);
my $pod_para2 = Pod::Paragraph->new(-name => $cmd,
-text => $text);
my $pod_para3 = new Pod::Paragraph(-text => $text);
my $pod_para4 = new Pod::Paragraph(-name => $cmd,
-text => $text);
my $pod_para5 = Pod::Paragraph->new(-name => $cmd,
-text => $text,
-file => $filename,
-line => $line_number);
This is a class method that constructs a C<Pod::Paragraph> object and
returns a reference to the new paragraph object. It may be given one or
two keyword arguments. The C<-text> keyword indicates the corresponding
text of the POD paragraph. The C<-name> keyword indicates the name of
the corresponding POD command, such as C<head1> or C<item> (it should
I<not> contain the C<=> prefix); this is needed only if the POD
paragraph corresponds to a command paragraph. The C<-file> and C<-line>
keywords indicate the filename and line number corresponding to the
beginning of the paragraph
=cut
sub new {
## Determine if we were called via an object-ref or a classname
my $this = shift;
my $class = ref($this) || $this;
## Any remaining arguments are treated as initial values for the
## hash that is used to represent this object. Note that we default
## certain values by specifying them *before* the arguments passed.
## If they are in the argument list, they will override the defaults.
my $self = {
-name => undef,
-text => (@_ == 1) ? shift : undef,
-file => '<unknown-file>',
-line => 0,
-prefix => '=',
-separator => ' ',
-ptree => [],
@_
};
## Bless ourselves into the desired class and perform any initialization
bless $self, $class;
return $self;
}
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<cmd_name()>
my $para_cmd = $pod_para->cmd_name();
If this paragraph is a command paragraph, then this method will return
the name of the command (I<without> any leading C<=> prefix).
=cut
sub cmd_name {
(@_ > 1) and $_[0]->{'-name'} = $_[1];
return $_[0]->{'-name'};
}
## let name() be an alias for cmd_name()
*name = \&cmd_name;
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<text()>
my $para_text = $pod_para->text();
This method will return the corresponding text of the paragraph.
=cut
sub text {
(@_ > 1) and $_[0]->{'-text'} = $_[1];
return $_[0]->{'-text'};
}
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<raw_text()>
my $raw_pod_para = $pod_para->raw_text();
This method will return the I<raw> text of the POD paragraph, exactly
as it appeared in the input.
=cut
sub raw_text {
return $_[0]->{'-text'} unless (defined $_[0]->{'-name'});
return $_[0]->{'-prefix'} . $_[0]->{'-name'} .
$_[0]->{'-separator'} . $_[0]->{'-text'};
}
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<cmd_prefix()>
my $prefix = $pod_para->cmd_prefix();
If this paragraph is a command paragraph, then this method will return
the prefix used to denote the command (which should be the string "="
or "==").
=cut
sub cmd_prefix {
return $_[0]->{'-prefix'};
}
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<cmd_separator()>
my $separator = $pod_para->cmd_separator();
If this paragraph is a command paragraph, then this method will return
the text used to separate the command name from the rest of the
paragraph (if any).
=cut
sub cmd_separator {
return $_[0]->{'-separator'};
}
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<parse_tree()>
my $ptree = $pod_parser->parse_text( $pod_para->text() );
$pod_para->parse_tree( $ptree );
$ptree = $pod_para->parse_tree();
This method will get/set the corresponding parse-tree of the paragraph's text.
=cut
sub parse_tree {
(@_ > 1) and $_[0]->{'-ptree'} = $_[1];
return $_[0]->{'-ptree'};
}
## let ptree() be an alias for parse_tree()
*ptree = \&parse_tree;
##---------------------------------------------------------------------------
=head2 $pod_para-E<gt>B<file_line()>
my ($filename, $line_number) = $pod_para->file_line();
my $position = $pod_para->file_line();
Returns the current filename and line number for the paragraph
object. If called in a list context, it returns a list of two
elements: first the filename, then the line number. If called in
a scalar context, it returns a string containing the filename, followed
by a colon (':'), followed by the line number.
=cut
sub file_line {
my @loc = ($_[0]->{'-file'} || '<unknown-file>',
$_[0]->{'-line'} || 0);
return (wantarray) ? @loc : join(':', @loc);
}
##---------------------------------------------------------------------------
#############################################################################
package Pod::InteriorSequence;
##---------------------------------------------------------------------------
=head1 B<Pod::InteriorSequence>
An object representing a POD interior sequence command.
It has the following methods/attributes:
=cut
##---------------------------------------------------------------------------
=head2 Pod::InteriorSequence-E<gt>B<new()>
my $pod_seq1 = Pod::InteriorSequence->new(-name => $cmd
-ldelim => $delimiter);
my $pod_seq2 = new Pod::InteriorSequence(-name => $cmd,
-ldelim => $delimiter);
my $pod_seq3 = new Pod::InteriorSequence(-name => $cmd,
-ldelim => $delimiter,
-file => $filename,
-line => $line_number);
my $pod_seq4 = new Pod::InteriorSequence(-name => $cmd, $ptree);
my $pod_seq5 = new Pod::InteriorSequence($cmd, $ptree);
This is a class method that constructs a C<Pod::InteriorSequence> object
and returns a reference to the new interior sequence object. It should
be given two keyword arguments. The C<-ldelim> keyword indicates the
corresponding left-delimiter of the interior sequence (e.g. 'E<lt>').
The C<-name> keyword indicates the name of the corresponding interior
sequence command, such as C<I> or C<B> or C<C>. The C<-file> and
C<-line> keywords indicate the filename and line number corresponding
to the beginning of the interior sequence. If the C<$ptree> argument is
given, it must be the last argument, and it must be either string, or
else an array-ref suitable for passing to B<Pod::ParseTree::new> (or
it may be a reference to a Pod::ParseTree object).
=cut
sub new {
## Determine if we were called via an object-ref or a classname
my $this = shift;
my $class = ref($this) || $this;
## See if first argument has no keyword
if (((@_ <= 2) or (@_ % 2)) and $_[0] !~ /^-\w/) {
## Yup - need an implicit '-name' before first parameter
unshift @_, '-name';
}
## See if odd number of args
if ((@_ % 2) != 0) {
## Yup - need an implicit '-ptree' before the last parameter
splice @_, $#_, 0, '-ptree';
}
## Any remaining arguments are treated as initial values for the
## hash that is used to represent this object. Note that we default
## certain values by specifying them *before* the arguments passed.
## If they are in the argument list, they will override the defaults.
my $self = {
-name => (@_ == 1) ? $_[0] : undef,
-file => '<unknown-file>',
-line => 0,
-ldelim => '<',
-rdelim => '>',
@_
};
## Initialize contents if they haven't been already
my $ptree = $self->{'-ptree'} || new Pod::ParseTree();
if ( ref $ptree =~ /^(ARRAY)?$/ ) {
## We have an array-ref, or a normal scalar. Pass it as an
## an argument to the ptree-constructor
$ptree = new Pod::ParseTree($1 ? [$ptree] : $ptree);
}
$self->{'-ptree'} = $ptree;
## Bless ourselves into the desired class and perform any initialization
bless $self, $class;
return $self;
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<cmd_name()>
my $seq_cmd = $pod_seq->cmd_name();
The name of the interior sequence command.
=cut
sub cmd_name {
(@_ > 1) and $_[0]->{'-name'} = $_[1];
return $_[0]->{'-name'};
}
## let name() be an alias for cmd_name()
*name = \&cmd_name;
##---------------------------------------------------------------------------
## Private subroutine to set the parent pointer of all the given
## children that are interior-sequences to be $self
sub _set_child2parent_links {
my ($self, @children) = @_;
## Make sure any sequences know who their parent is
for (@children) {
next unless (length and ref and ref ne 'SCALAR');
if (UNIVERSAL::isa($_, 'Pod::InteriorSequence') or
UNIVERSAL::can($_, 'nested'))
{
$_->nested($self);
}
}
}
## Private subroutine to unset child->parent links
sub _unset_child2parent_links {
my $self = shift;
$self->{'-parent_sequence'} = undef;
my $ptree = $self->{'-ptree'};
for (@$ptree) {
next unless (length and ref and ref ne 'SCALAR');
$_->_unset_child2parent_links()
if UNIVERSAL::isa($_, 'Pod::InteriorSequence');
}
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<prepend()>
$pod_seq->prepend($text);
$pod_seq1->prepend($pod_seq2);
Prepends the given string or parse-tree or sequence object to the parse-tree
of this interior sequence.
=cut
sub prepend {
my $self = shift;
$self->{'-ptree'}->prepend(@_);
_set_child2parent_links($self, @_);
return $self;
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<append()>
$pod_seq->append($text);
$pod_seq1->append($pod_seq2);
Appends the given string or parse-tree or sequence object to the parse-tree
of this interior sequence.
=cut
sub append {
my $self = shift;
$self->{'-ptree'}->append(@_);
_set_child2parent_links($self, @_);
return $self;
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<nested()>
$outer_seq = $pod_seq->nested || print "not nested";
If this interior sequence is nested inside of another interior
sequence, then the outer/parent sequence that contains it is
returned. Otherwise C<undef> is returned.
=cut
sub nested {
my $self = shift;
(@_ == 1) and $self->{'-parent_sequence'} = shift;
return $self->{'-parent_sequence'} || undef;
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<raw_text()>
my $seq_raw_text = $pod_seq->raw_text();
This method will return the I<raw> text of the POD interior sequence,
exactly as it appeared in the input.
=cut
sub raw_text {
my $self = shift;
my $text = $self->{'-name'} . $self->{'-ldelim'};
for ( $self->{'-ptree'}->children ) {
$text .= (ref $_) ? $_->raw_text : $_;
}
$text .= $self->{'-rdelim'};
return $text;
}
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<left_delimiter()>
my $ldelim = $pod_seq->left_delimiter();
The leftmost delimiter beginning the argument text to the interior
sequence (should be "<").
=cut
sub left_delimiter {
(@_ > 1) and $_[0]->{'-ldelim'} = $_[1];
return $_[0]->{'-ldelim'};
}
## let ldelim() be an alias for left_delimiter()
*ldelim = \&left_delimiter;
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<right_delimiter()>
The rightmost delimiter beginning the argument text to the interior
sequence (should be ">").
=cut
sub right_delimiter {
(@_ > 1) and $_[0]->{'-rdelim'} = $_[1];
return $_[0]->{'-rdelim'};
}
## let rdelim() be an alias for right_delimiter()
*rdelim = \&right_delimiter;
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<parse_tree()>
my $ptree = $pod_parser->parse_text($paragraph_text);
$pod_seq->parse_tree( $ptree );
$ptree = $pod_seq->parse_tree();
This method will get/set the corresponding parse-tree of the interior
sequence's text.
=cut
sub parse_tree {
(@_ > 1) and $_[0]->{'-ptree'} = $_[1];
return $_[0]->{'-ptree'};
}
## let ptree() be an alias for parse_tree()
*ptree = \&parse_tree;
##---------------------------------------------------------------------------
=head2 $pod_seq-E<gt>B<file_line()>
my ($filename, $line_number) = $pod_seq->file_line();
my $position = $pod_seq->file_line();
Returns the current filename and line number for the interior sequence
object. If called in a list context, it returns a list of two
elements: first the filename, then the line number. If called in
a scalar context, it returns a string containing the filename, followed
by a colon (':'), followed by the line number.
=cut
sub file_line {
my @loc = ($_[0]->{'-file'} || '<unknown-file>',
$_[0]->{'-line'} || 0);
return (wantarray) ? @loc : join(':', @loc);
}
##---------------------------------------------------------------------------
=head2 Pod::InteriorSequence::B<DESTROY()>
This method performs any necessary cleanup for the interior-sequence.
If you override this method then it is B<imperative> that you invoke
the parent method from within your own method, otherwise
I<interior-sequence storage will not be reclaimed upon destruction!>
=cut
sub DESTROY {
## We need to get rid of all child->parent pointers throughout the
## tree so their reference counts will go to zero and they can be
## garbage-collected
_unset_child2parent_links(@_);
}
##---------------------------------------------------------------------------
#############################################################################
package Pod::ParseTree;
##---------------------------------------------------------------------------
=head1 B<Pod::ParseTree>
This object corresponds to a tree of parsed POD text. As POD text is
scanned from left to right, it is parsed into an ordered list of
text-strings and B<Pod::InteriorSequence> objects (in order of
appearance). A B<Pod::ParseTree> object corresponds to this list of
strings and sequences. Each interior sequence in the parse-tree may
itself contain a parse-tree (since interior sequences may be nested).
=cut
##---------------------------------------------------------------------------
=head2 Pod::ParseTree-E<gt>B<new()>
my $ptree1 = Pod::ParseTree->new;
my $ptree2 = new Pod::ParseTree;
my $ptree4 = Pod::ParseTree->new($array_ref);
my $ptree3 = new Pod::ParseTree($array_ref);
This is a class method that constructs a C<Pod::Parse_tree> object and
returns a reference to the new parse-tree. If a single-argument is given,
it must be a reference to an array, and is used to initialize the root
(top) of the parse tree.
=cut
sub new {
## Determine if we were called via an object-ref or a classname
my $this = shift;
my $class = ref($this) || $this;
my $self = (@_ == 1 and ref $_[0]) ? $_[0] : [];
## Bless ourselves into the desired class and perform any initialization
bless $self, $class;
return $self;
}
##---------------------------------------------------------------------------
=head2 $ptree-E<gt>B<top()>
my $top_node = $ptree->top();
$ptree->top( $top_node );
$ptree->top( @children );
This method gets/sets the top node of the parse-tree. If no arguments are
given, it returns the topmost node in the tree (the root), which is also
a B<Pod::ParseTree>. If it is given a single argument that is a reference,
then the reference is assumed to a parse-tree and becomes the new top node.
Otherwise, if arguments are given, they are treated as the new list of
children for the top node.
=cut
sub top {
my $self = shift;
if (@_ > 0) {
@{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_;
}
return $self;
}
## let parse_tree() & ptree() be aliases for the 'top' method
*parse_tree = *ptree = \⊤
##---------------------------------------------------------------------------
=head2 $ptree-E<gt>B<children()>
This method gets/sets the children of the top node in the parse-tree.
If no arguments are given, it returns the list (array) of children
(each of which should be either a string or a B<Pod::InteriorSequence>.
Otherwise, if arguments are given, they are treated as the new list of
children for the top node.
=cut
sub children {
my $self = shift;
if (@_ > 0) {
@{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_;
}
return @{ $self };
}
##---------------------------------------------------------------------------
=head2 $ptree-E<gt>B<prepend()>
This method prepends the given text or parse-tree to the current parse-tree.
If the first item on the parse-tree is text and the argument is also text,
then the text is prepended to the first item (not added as a separate string).
Otherwise the argument is added as a new string or parse-tree I<before>
the current one.
=cut
use vars qw(@ptree); ## an alias used for performance reasons
sub prepend {
my $self = shift;
local *ptree = $self;
for (@_) {
next unless length;
if (@ptree && !(ref $ptree[0]) && !(ref $_)) {
$ptree[0] = $_ . $ptree[0];
}
else {
unshift @ptree, $_;
}
}
}
##---------------------------------------------------------------------------
=head2 $ptree-E<gt>B<append()>
This method appends the given text or parse-tree to the current parse-tree.
If the last item on the parse-tree is text and the argument is also text,
then the text is appended to the last item (not added as a separate string).
Otherwise the argument is added as a new string or parse-tree I<after>
the current one.
=cut
sub append {
my $self = shift;
local *ptree = $self;
my $can_append = @ptree && !(ref $ptree[-1]);
for (@_) {
if (ref) {
push @ptree, $_;
}
elsif(!length) {
next;
}
elsif ($can_append) {
$ptree[-1] .= $_;
}
else {
push @ptree, $_;
}
}
}
=head2 $ptree-E<gt>B<raw_text()>
my $ptree_raw_text = $ptree->raw_text();
This method will return the I<raw> text of the POD parse-tree
exactly as it appeared in the input.
=cut
sub raw_text {
my $self = shift;
my $text = '';
for ( @$self ) {
$text .= (ref $_) ? $_->raw_text : $_;
}
return $text;
}
##---------------------------------------------------------------------------
## Private routines to set/unset child->parent links
sub _unset_child2parent_links {
my $self = shift;
local *ptree = $self;
for (@ptree) {
next unless (defined and length and ref and ref ne 'SCALAR');
$_->_unset_child2parent_links()
if UNIVERSAL::isa($_, 'Pod::InteriorSequence');
}
}
sub _set_child2parent_links {
## nothing to do, Pod::ParseTrees cant have parent pointers
}
=head2 Pod::ParseTree::B<DESTROY()>
This method performs any necessary cleanup for the parse-tree.
If you override this method then it is B<imperative>
that you invoke the parent method from within your own method,
otherwise I<parse-tree storage will not be reclaimed upon destruction!>
=cut
sub DESTROY {
## We need to get rid of all child->parent pointers throughout the
## tree so their reference counts will go to zero and they can be
## garbage-collected
_unset_child2parent_links(@_);
}
#############################################################################
=head1 SEE ALSO
B<Pod::InputObjects> is part of the L<Pod::Parser> distribution.
See L<Pod::Parser>, L<Pod::Select>
=head1 AUTHOR
Please report bugs using L<http://rt.cpan.org>.
Brad Appleton E<lt>bradapp@enteract.comE<gt>
=cut
1;
Simple.pod 0000644 00000033622 15051135563 0006513 0 ustar 00
=head1 NAME
Pod::Simple - framework for parsing Pod
=head1 SYNOPSIS
TODO
=head1 DESCRIPTION
Pod::Simple is a Perl library for parsing text in the Pod ("plain old
documentation") markup language that is typically used for writing
documentation for Perl and for Perl modules. The Pod format is explained
in L<perlpod>; the most common formatter is called C<perldoc>.
Be sure to read L</ENCODING> if your Pod contains non-ASCII characters.
Pod formatters can use Pod::Simple to parse Pod documents and render them into
plain text, HTML, or any number of other formats. Typically, such formatters
will be subclasses of Pod::Simple, and so they will inherit its methods, like
C<parse_file>.
If you're reading this document just because you have a Pod-processing
subclass that you want to use, this document (plus the documentation for the
subclass) is probably all you need to read.
If you're reading this document because you want to write a formatter
subclass, continue reading it and then read L<Pod::Simple::Subclassing>, and
then possibly even read L<perlpodspec> (some of which is for parser-writers,
but much of which is notes to formatter-writers).
=head1 MAIN METHODS
=over
=item C<< $parser = I<SomeClass>->new(); >>
This returns a new parser object, where I<C<SomeClass>> is a subclass
of Pod::Simple.
=item C<< $parser->output_fh( *OUT ); >>
This sets the filehandle that C<$parser>'s output will be written to.
You can pass C<*STDOUT> or C<*STDERR>, otherwise you should probably do
something like this:
my $outfile = "output.txt";
open TXTOUT, ">$outfile" or die "Can't write to $outfile: $!";
$parser->output_fh(*TXTOUT);
...before you call one of the C<< $parser->parse_I<whatever> >> methods.
=item C<< $parser->output_string( \$somestring ); >>
This sets the string that C<$parser>'s output will be sent to,
instead of any filehandle.
=item C<< $parser->parse_file( I<$some_filename> ); >>
=item C<< $parser->parse_file( *INPUT_FH ); >>
This reads the Pod content of the file (or filehandle) that you specify,
and processes it with that C<$parser> object, according to however
C<$parser>'s class works, and according to whatever parser options you
have set up for this C<$parser> object.
=item C<< $parser->parse_string_document( I<$all_content> ); >>
This works just like C<parse_file> except that it reads the Pod
content not from a file, but from a string that you have already
in memory.
=item C<< $parser->parse_lines( I<...@lines...>, undef ); >>
This processes the lines in C<@lines> (where each list item must be a
defined value, and must contain exactly one line of content -- so no
items like C<"foo\nbar"> are allowed). The final C<undef> is used to
indicate the end of document being parsed.
The other C<parser_I<whatever>> methods are meant to be called only once
per C<$parser> object; but C<parse_lines> can be called as many times per
C<$parser> object as you want, as long as the last call (and only
the last call) ends with an C<undef> value.
=item C<< $parser->content_seen >>
This returns true only if there has been any real content seen for this
document. Returns false in cases where the document contains content,
but does not make use of any Pod markup.
=item C<< I<SomeClass>->filter( I<$filename> ); >>
=item C<< I<SomeClass>->filter( I<*INPUT_FH> ); >>
=item C<< I<SomeClass>->filter( I<\$document_content> ); >>
This is a shortcut method for creating a new parser object, setting the
output handle to STDOUT, and then processing the specified file (or
filehandle, or in-memory document). This is handy for one-liners like
this:
perl -MPod::Simple::Text -e "Pod::Simple::Text->filter('thingy.pod')"
=back
=head1 SECONDARY METHODS
Some of these methods might be of interest to general users, as
well as of interest to formatter-writers.
Note that the general pattern here is that the accessor-methods
read the attribute's value with C<< $value = $parser->I<attribute> >>
and set the attribute's value with
C<< $parser->I<attribute>(I<newvalue>) >>. For each accessor, I typically
only mention one syntax or another, based on which I think you are actually
most likely to use.
=over
=item C<< $parser->parse_characters( I<SOMEVALUE> ) >>
The Pod parser normally expects to read octets and to convert those octets
to characters based on the C<=encoding> declaration in the Pod source. Set
this option to a true value to indicate that the Pod source is already a Perl
character stream. This tells the parser to ignore any C<=encoding> command
and to skip all the code paths involving decoding octets.
=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
If you set this attribute to a true value, you will suppress the
parser's complaints about irregularities in the Pod coding. By default,
this attribute's value is false, meaning that irregularities will
be reported.
Note that turning this attribute to true won't suppress one or two kinds
of complaints about rarely occurring unrecoverable errors.
=item C<< $parser->no_errata_section( I<SOMEVALUE> ) >>
If you set this attribute to a true value, you will stop the parser from
generating a "POD ERRORS" section at the end of the document. By
default, this attribute's value is false, meaning that an errata section
will be generated, as necessary.
=item C<< $parser->complain_stderr( I<SOMEVALUE> ) >>
If you set this attribute to a true value, it will send reports of
parsing errors to STDERR. By default, this attribute's value is false,
meaning that no output is sent to STDERR.
Setting C<complain_stderr> also sets C<no_errata_section>.
=item C<< $parser->source_filename >>
This returns the filename that this parser object was set to read from.
=item C<< $parser->doc_has_started >>
This returns true if C<$parser> has read from a source, and has seen
Pod content in it.
=item C<< $parser->source_dead >>
This returns true if C<$parser> has read from a source, and come to the
end of that source.
=item C<< $parser->strip_verbatim_indent( I<SOMEVALUE> ) >>
The perlpod spec for a Verbatim paragraph is "It should be reproduced
exactly...", which means that the whitespace you've used to indent your
verbatim blocks will be preserved in the output. This can be annoying for
outputs such as HTML, where that whitespace will remain in front of every
line. It's an unfortunate case where syntax is turned into semantics.
If the POD you're parsing adheres to a consistent indentation policy, you can
have such indentation stripped from the beginning of every line of your
verbatim blocks. This method tells Pod::Simple what to strip. For two-space
indents, you'd use:
$parser->strip_verbatim_indent(' ');
For tab indents, you'd use a tab character:
$parser->strip_verbatim_indent("\t");
If the POD is inconsistent about the indentation of verbatim blocks, but you
have figured out a heuristic to determine how much a particular verbatim block
is indented, you can pass a code reference instead. The code reference will be
executed with one argument, an array reference of all the lines in the
verbatim block, and should return the value to be stripped from each line. For
example, if you decide that you're fine to use the first line of the verbatim
block to set the standard for indentation of the rest of the block, you can
look at the first line and return the appropriate value, like so:
$new->strip_verbatim_indent(sub {
my $lines = shift;
(my $indent = $lines->[0]) =~ s/\S.*//;
return $indent;
});
If you'd rather treat each line individually, you can do that, too, by just
transforming them in-place in the code reference and returning C<undef>. Say
that you don't want I<any> lines indented. You can do something like this:
$new->strip_verbatim_indent(sub {
my $lines = shift;
sub { s/^\s+// for @{ $lines },
return undef;
});
=back
=head1 TERTIARY METHODS
=over
=item C<< $parser->abandon_output_fh() >>X<abandon_output_fh>
Cancel output to the file handle. Any POD read by the C<$parser> is not
effected.
=item C<< $parser->abandon_output_string() >>X<abandon_output_string>
Cancel output to the output string. Any POD read by the C<$parser> is not
effected.
=item C<< $parser->accept_code( @codes ) >>X<accept_code>
Alias for L<< accept_codes >>.
=item C<< $parser->accept_codes( @codes ) >>X<accept_codes>
Allows C<$parser> to accept a list of L<perlpod/Formatting Codes>. This can be
used to implement user-defined codes.
=item C<< $parser->accept_directive_as_data( @directives ) >>X<accept_directive_as_data>
Allows C<$parser> to accept a list of directives for data paragraphs. A
directive is the label of a L<perlpod/Command Paragraph>. A data paragraph is
one delimited by C<< =begin/=for/=end >> directives. This can be used to
implement user-defined directives.
=item C<< $parser->accept_directive_as_processed( @directives ) >>X<accept_directive_as_processed>
Allows C<$parser> to accept a list of directives for processed paragraphs. A
directive is the label of a L<perlpod/Command Paragraph>. A processed
paragraph is also known as L<perlpod/Ordinary Paragraph>. This can be used to
implement user-defined directives.
=item C<< $parser->accept_directive_as_verbatim( @directives ) >>X<accept_directive_as_verbatim>
Allows C<$parser> to accept a list of directives for L<perlpod/Verbatim
Paragraph>. A directive is the label of a L<perlpod/Command Paragraph>. This
can be used to implement user-defined directives.
=item C<< $parser->accept_target( @targets ) >>X<accept_target>
Alias for L<< accept_targets >>.
=item C<< $parser->accept_target_as_text( @targets ) >>X<accept_target_as_text>
Alias for L<< accept_targets_as_text >>.
=item C<< $parser->accept_targets( @targets ) >>X<accept_targets>
Accepts targets for C<< =begin/=for/=end >> sections of the POD.
=item C<< $parser->accept_targets_as_text( @targets ) >>X<accept_targets_as_text>
Accepts targets for C<< =begin/=for/=end >> sections that should be parsed as
POD. For details, see L<< perlpodspec/About Data Paragraphs >>.
=item C<< $parser->any_errata_seen() >>X<any_errata_seen>
Used to check if any errata was seen.
I<Example:>
die "too many errors\n" if $parser->any_errata_seen();
=item C<< $parser->errata_seen() >>X<errata_seen>
Returns a hash reference of all errata seen, both whines and screams. The hash reference's keys are the line number and the value is an array reference of the errors for that line.
I<Example:>
if ( $parser->any_errata_seen() ) {
$logger->log( $parser->errata_seen() );
}
=item C<< $parser->detected_encoding() >>X<detected_encoding>
Return the encoding corresponding to C<< =encoding >>, but only if the
encoding was recognized and handled.
=item C<< $parser->encoding() >>X<encoding>
Return encoding of the document, even if the encoding is not correctly
handled.
=item C<< $parser->parse_from_file( $source, $to ) >>X<parse_from_file>
Parses from C<$source> file to C<$to> file. Similar to L<<
Pod::Parser/parse_from_file >>.
=item C<< $parser->scream( @error_messages ) >>X<scream>
Log an error that can't be ignored.
=item C<< $parser->unaccept_code( @codes ) >>X<unaccept_code>
Alias for L<< unaccept_codes >>.
=item C<< $parser->unaccept_codes( @codes ) >>X<unaccept_codes>
Removes C<< @codes >> as valid codes for the parse.
=item C<< $parser->unaccept_directive( @directives ) >>X<unaccept_directive>
Alias for L<< unaccept_directives >>.
=item C<< $parser->unaccept_directives( @directives ) >>X<unaccept_directives>
Removes C<< @directives >> as valid directives for the parse.
=item C<< $parser->unaccept_target( @targets ) >>X<unaccept_target>
Alias for L<< unaccept_targets >>.
=item C<< $parser->unaccept_targets( @targets ) >>X<unaccept_targets>
Removes C<< @targets >> as valid targets for the parse.
=item C<< $parser->version_report() >>X<version_report>
Returns a string describing the version.
=item C<< $parser->whine( @error_messages ) >>X<whine>
Log an error unless C<< $parser->no_whining( TRUE ); >>.
=back
=head1 ENCODING
The Pod::Simple parser expects to read B<octets>. The parser will decode the
octets into Perl's internal character string representation using the value of
the C<=encoding> declaration in the POD source.
If the POD source does not include an C<=encoding> declaration, the parser will
attempt to guess the encoding (selecting one of UTF-8 or CP 1252) by examining
the first non-ASCII bytes and applying the heuristic described in
L<perlpodspec>. (If the POD source contains only ASCII bytes, the
encoding is assumed to be ASCII.)
If you set the C<parse_characters> option to a true value the parser will
expect characters rather than octets; will ignore any C<=encoding>; and will
make no attempt to decode the input.
=head1 SEE ALSO
L<Pod::Simple::Subclassing>
L<perlpod|perlpod>
L<perlpodspec|perlpodspec>
L<Pod::Escapes|Pod::Escapes>
L<perldoc>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
Documentation has been contributed by:
=over
=item * Gabor Szabo C<szabgab@gmail.com>
=item * Shawn H Corey C<SHCOREY at cpan.org>
=back
=cut
Simple/RTF.pm 0000644 00000053724 15051135563 0007005 0 ustar 00
require 5;
package Pod::Simple::RTF;
#sub DEBUG () {4};
#sub Pod::Simple::DEBUG () {4};
#sub Pod::Simple::PullParser::DEBUG () {4};
use strict;
use vars qw($VERSION @ISA %Escape $WRAP %Tagmap);
$VERSION = '3.35';
use Pod::Simple::PullParser ();
BEGIN {@ISA = ('Pod::Simple::PullParser')}
use Carp ();
BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
$WRAP = 1 unless defined $WRAP;
# These are broken for early Perls on EBCDIC; they could be fixed to work
# better there, but not worth it. These are part of a larger [...] class, so
# are just the strings to substitute into it, as opposed to compiled patterns.
my $cntrl = '[:cntrl:]';
$cntrl = '\x00-\x1F\x7F' unless eval "qr/[$cntrl]/";
my $not_ascii = '[:^ascii:]';
$not_ascii = '\x80-\xFF' unless eval "qr/[$not_ascii]/";
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub _openclose {
return map {;
m/^([-A-Za-z]+)=(\w[^\=]*)$/s or die "what's <$_>?";
( $1, "{\\$2\n", "/$1", "}" );
} @_;
}
my @_to_accept;
%Tagmap = (
# 'foo=bar' means ('foo' => '{\bar'."\n", '/foo' => '}')
_openclose(
'B=cs18\b',
'I=cs16\i',
'C=cs19\f1\lang1024\noproof',
'F=cs17\i\lang1024\noproof',
'VerbatimI=cs26\i',
'VerbatimB=cs27\b',
'VerbatimBI=cs28\b\i',
map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
qw[
underline=ul smallcaps=scaps shadow=shad
superscript=super subscript=sub strikethrough=strike
outline=outl emboss=embo engrave=impr
dotted-underline=uld dash-underline=uldash
dot-dash-underline=uldashd dot-dot-dash-underline=uldashdd
double-underline=uldb thick-underline=ulth
word-underline=ulw wave-underline=ulwave
]
# But no double-strikethrough, because MSWord can't agree with the
# RTF spec on whether it's supposed to be \strikedl or \striked1 (!!!)
),
# Bit of a hack here:
'L=pod' => '{\cs22\i'."\n",
'L=url' => '{\cs23\i'."\n",
'L=man' => '{\cs24\i'."\n",
'/L' => '}',
'Data' => "\n",
'/Data' => "\n",
'Verbatim' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
'/Verbatim' => "\n\\par}\n",
'VerbatimFormatted' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n",
'/VerbatimFormatted' => "\n\\par}\n",
'Para' => "\n{\\pard\\li#rtfindent#\\sa180\n",
'/Para' => "\n\\par}\n",
'head1' => "\n{\\pard\\li#rtfindent#\\s31\\keepn\\sb90\\sa180\\f2\\fs#head1_halfpoint_size#\\ul{\n",
'/head1' => "\n}\\par}\n",
'head2' => "\n{\\pard\\li#rtfindent#\\s32\\keepn\\sb90\\sa180\\f2\\fs#head2_halfpoint_size#\\ul{\n",
'/head2' => "\n}\\par}\n",
'head3' => "\n{\\pard\\li#rtfindent#\\s33\\keepn\\sb90\\sa180\\f2\\fs#head3_halfpoint_size#\\ul{\n",
'/head3' => "\n}\\par}\n",
'head4' => "\n{\\pard\\li#rtfindent#\\s34\\keepn\\sb90\\sa180\\f2\\fs#head4_halfpoint_size#\\ul{\n",
'/head4' => "\n}\\par}\n",
# wordpad borks on \tc\tcl1, or I'd put that in =head1 and =head2
'item-bullet' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
'/item-bullet' => "\n\\par}\n",
'item-number' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
'/item-number' => "\n\\par}\n",
'item-text' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n",
'/item-text' => "\n\\par}\n",
# we don't need any styles for over-* and /over-*
);
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub new {
my $new = shift->SUPER::new(@_);
$new->nix_X_codes(1);
$new->nbsp_for_S(1);
$new->accept_targets( 'rtf', 'RTF' );
$new->{'Tagmap'} = {%Tagmap};
$new->accept_codes(@_to_accept);
$new->accept_codes('VerbatimFormatted');
DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
$new->doc_lang(
( $ENV{'RTFDEFLANG'} || '') =~ m/^(\d{1,10})$/s ? $1
: ($ENV{'RTFDEFLANG'} || '') =~ m/^0?x([a-fA-F0-9]{1,10})$/s ? hex($1)
# yes, tolerate hex!
: ($ENV{'RTFDEFLANG'} || '') =~ m/^([a-fA-F0-9]{4})$/s ? hex($1)
# yes, tolerate even more hex!
: '1033'
);
$new->head1_halfpoint_size(32);
$new->head2_halfpoint_size(28);
$new->head3_halfpoint_size(25);
$new->head4_halfpoint_size(22);
$new->codeblock_halfpoint_size(18);
$new->header_halfpoint_size(17);
$new->normal_halfpoint_size(25);
return $new;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
__PACKAGE__->_accessorize(
'doc_lang',
'head1_halfpoint_size',
'head2_halfpoint_size',
'head3_halfpoint_size',
'head4_halfpoint_size',
'codeblock_halfpoint_size',
'header_halfpoint_size',
'normal_halfpoint_size',
'no_proofing_exemptions',
);
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub run {
my $self = $_[0];
return $self->do_middle if $self->bare_output;
return
$self->do_beginning && $self->do_middle && $self->do_end;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub do_middle { # the main work
my $self = $_[0];
my $fh = $self->{'output_fh'};
my($token, $type, $tagname, $scratch);
my @stack;
my @indent_stack;
$self->{'rtfindent'} = 0 unless defined $self->{'rtfindent'};
while($token = $self->get_token) {
if( ($type = $token->type) eq 'text' ) {
if( $self->{'rtfverbatim'} ) {
DEBUG > 1 and print STDERR " $type " , $token->text, " in verbatim!\n";
rtf_esc_codely($scratch = $token->text);
print $fh $scratch;
next;
}
DEBUG > 1 and print STDERR " $type " , $token->text, "\n";
$scratch = $token->text;
$scratch =~ tr/\t\cb\cc/ /d;
$self->{'no_proofing_exemptions'} or $scratch =~
s/(?:
^
|
(?<=[\r\n\t "\[\<\(])
) # start on whitespace, sequence-start, or quote
( # something looking like a Perl token:
(?:
[\$\@\:\<\*\\_]\S+ # either starting with a sigil, etc.
)
|
# or starting alpha, but containing anything strange:
(?:
[a-zA-Z'${not_ascii}]+[\$\@\:_<>\(\\\*]\S+
)
)
/\cb$1\cc/xsg
;
rtf_esc($scratch);
$scratch =~
s/(
[^\r\n]{65} # Snare 65 characters from a line
[^\r\n ]{0,50} # and finish any current word
)
(\ {1,10})(?![\r\n]) # capture some spaces not at line-end
/$1$2\n/gx # and put a NL before those spaces
if $WRAP;
# This may wrap at well past the 65th column, but not past the 120th.
print $fh $scratch;
} elsif( $type eq 'start' ) {
DEBUG > 1 and print STDERR " +$type ",$token->tagname,
" (", map("<$_> ", %{$token->attr_hash}), ")\n";
if( ($tagname = $token->tagname) eq 'Verbatim'
or $tagname eq 'VerbatimFormatted'
) {
++$self->{'rtfverbatim'};
my $next = $self->get_token;
next unless defined $next;
my $line_count = 1;
if($next->type eq 'text') {
my $t = $next->text_r;
while( $$t =~ m/$/mg ) {
last if ++$line_count > 15; # no point in counting further
}
DEBUG > 3 and print STDERR " verbatim line count: $line_count\n";
}
$self->unget_token($next);
$self->{'rtfkeep'} = ($line_count > 15) ? '' : '\keepn' ;
} elsif( $tagname =~ m/^item-/s ) {
my @to_unget;
my $text_count_here = 0;
$self->{'rtfitemkeepn'} = '';
# Some heuristics to stop item-*'s functioning as subheadings
# from getting split from the things they're subheadings for.
#
# It's not terribly pretty, but it really does make things pretty.
#
while(1) {
push @to_unget, $self->get_token;
pop(@to_unget), last unless defined $to_unget[-1];
# Erroneously used to be "unshift" instead of pop! Adds instead
# of removes, and operates on the beginning instead of the end!
if($to_unget[-1]->type eq 'text') {
if( ($text_count_here += length ${$to_unget[-1]->text_r}) > 150 ){
DEBUG > 1 and print STDERR " item-* is too long to be keepn'd.\n";
last;
}
} elsif (@to_unget > 1 and
$to_unget[-2]->type eq 'end' and
$to_unget[-2]->tagname =~ m/^item-/s
) {
# Bail out here, after setting rtfitemkeepn yea or nay.
$self->{'rtfitemkeepn'} = '\keepn' if
$to_unget[-1]->type eq 'start' and
$to_unget[-1]->tagname eq 'Para';
DEBUG > 1 and printf STDERR " item-* before %s(%s) %s keepn'd.\n",
$to_unget[-1]->type,
$to_unget[-1]->can('tagname') ? $to_unget[-1]->tagname : '',
$self->{'rtfitemkeepn'} ? "gets" : "doesn't get";
last;
} elsif (@to_unget > 40) {
DEBUG > 1 and print STDERR " item-* now has too many tokens (",
scalar(@to_unget),
(DEBUG > 4) ? (q<: >, map($_->dump, @to_unget)) : (),
") to be keepn'd.\n";
last; # give up
}
# else keep while'ing along
}
# Now put it aaaaall back...
$self->unget_token(@to_unget);
} elsif( $tagname =~ m/^over-/s ) {
push @stack, $1;
push @indent_stack,
int($token->attr('indent') * 4 * $self->normal_halfpoint_size);
DEBUG and print STDERR "Indenting over $indent_stack[-1] twips.\n";
$self->{'rtfindent'} += $indent_stack[-1];
} elsif ($tagname eq 'L') {
$tagname .= '=' . ($token->attr('type') || 'pod');
} elsif ($tagname eq 'Data') {
my $next = $self->get_token;
next unless defined $next;
unless( $next->type eq 'text' ) {
$self->unget_token($next);
next;
}
DEBUG and print STDERR " raw text ", $next->text, "\n";
printf $fh "\n" . $next->text . "\n";
next;
}
defined($scratch = $self->{'Tagmap'}{$tagname}) or next;
$scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
print $fh $scratch;
if ($tagname eq 'item-number') {
print $fh $token->attr('number'), ". \n";
} elsif ($tagname eq 'item-bullet') {
print $fh "\\'", ord("_"), "\n";
#for funky testing: print $fh '', rtf_esc("\x{4E4B}\x{9053}");
}
} elsif( $type eq 'end' ) {
DEBUG > 1 and print STDERR " -$type ",$token->tagname,"\n";
if( ($tagname = $token->tagname) =~ m/^over-/s ) {
DEBUG and print STDERR "Indenting back $indent_stack[-1] twips.\n";
$self->{'rtfindent'} -= pop @indent_stack;
pop @stack;
} elsif( $tagname eq 'Verbatim' or $tagname eq 'VerbatimFormatted') {
--$self->{'rtfverbatim'};
}
defined($scratch = $self->{'Tagmap'}{"/$tagname"}) or next;
$scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate
print $fh $scratch;
}
}
return 1;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub do_beginning {
my $self = $_[0];
my $fh = $self->{'output_fh'};
return print $fh join '',
$self->doc_init,
$self->font_table,
$self->stylesheet,
$self->color_table,
$self->doc_info,
$self->doc_start,
"\n"
;
}
sub do_end {
my $self = $_[0];
my $fh = $self->{'output_fh'};
return print $fh '}'; # that should do it
}
###########################################################################
sub stylesheet {
return sprintf <<'END',
{\stylesheet
{\snext0 Normal;}
{\*\cs10 \additive Default Paragraph Font;}
{\*\cs16 \additive \i \sbasedon10 pod-I;}
{\*\cs17 \additive \i\lang1024\noproof \sbasedon10 pod-F;}
{\*\cs18 \additive \b \sbasedon10 pod-B;}
{\*\cs19 \additive \f1\lang1024\noproof\sbasedon10 pod-C;}
{\s20\ql \li0\ri0\sa180\widctlpar\f1\fs%s\lang1024\noproof\sbasedon0 \snext0 pod-codeblock;}
{\*\cs21 \additive \lang1024\noproof \sbasedon10 pod-computerese;}
{\*\cs22 \additive \i\lang1024\noproof\sbasedon10 pod-L-pod;}
{\*\cs23 \additive \i\lang1024\noproof\sbasedon10 pod-L-url;}
{\*\cs24 \additive \i\lang1024\noproof\sbasedon10 pod-L-man;}
{\*\cs25 \additive \f1\lang1024\noproof\sbasedon0 pod-codelbock-plain;}
{\*\cs26 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-ital;}
{\*\cs27 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold;}
{\*\cs28 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold-ital;}
{\s31\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head1;}
{\s32\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head2;}
{\s33\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head3;}
{\s34\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head4;}
}
END
$_[0]->codeblock_halfpoint_size(),
$_[0]->head1_halfpoint_size(),
$_[0]->head2_halfpoint_size(),
$_[0]->head3_halfpoint_size(),
$_[0]->head4_halfpoint_size(),
;
}
###########################################################################
# Override these as necessary for further customization
sub font_table {
return <<'END'; # text font, code font, heading font
{\fonttbl
{\f0\froman Times New Roman;}
{\f1\fmodern Courier New;}
{\f2\fswiss Arial;}
}
END
}
sub doc_init {
return <<'END';
{\rtf1\ansi\deff0
END
}
sub color_table {
return <<'END';
{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
END
}
sub doc_info {
my $self = $_[0];
my $class = ref($self) || $self;
my $tag = __PACKAGE__ . ' ' . $VERSION;
unless($class eq __PACKAGE__) {
$tag = " ($tag)";
$tag = " v" . $self->VERSION . $tag if defined $self->VERSION;
$tag = $class . $tag;
}
return sprintf <<'END',
{\info{\doccomm
%s
using %s v%s
under Perl v%s at %s GMT}
{\author [see doc]}{\company [see doc]}{\operator [see doc]}
}
END
# None of the following things should need escaping, I dare say!
$tag,
$ISA[0], $ISA[0]->VERSION(),
$], scalar(gmtime),
;
}
sub doc_start {
my $self = $_[0];
my $title = $self->get_short_title();
DEBUG and print STDERR "Short Title: <$title>\n";
$title .= ' ' if length $title;
$title =~ s/ *$/ /s;
$title =~ s/^ //s;
$title =~ s/ $/, /s;
# make sure it ends in a comma and a space, unless it's 0-length
my $is_obviously_module_name;
$is_obviously_module_name = 1
if $title =~ m/^\S+$/s and $title =~ m/::/s;
# catches the most common case, at least
DEBUG and print STDERR "Title0: <$title>\n";
$title = rtf_esc($title);
DEBUG and print STDERR "Title1: <$title>\n";
$title = '\lang1024\noproof ' . $title
if $is_obviously_module_name;
return sprintf <<'END',
\deflang%s\plain\lang%s\widowctrl
{\header\pard\qr\plain\f2\fs%s
%s
p.\chpgn\par}
\fs%s
END
($self->doc_lang) x 2,
$self->header_halfpoint_size,
$title,
$self->normal_halfpoint_size,
;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#-------------------------------------------------------------------------
use integer;
sub rtf_esc {
my $x; # scratch
if(!defined wantarray) { # void context: alter in-place!
for(@_) {
s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
}
return;
} elsif(wantarray) { # return an array
return map {; ($x = $_) =~
s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
$x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
$x;
} @_;
} else { # return a single scalar
($x = ((@_ == 1) ? $_[0] : join '', @_)
) =~ s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
# Escape \, {, }, -, control chars, and 7f-ff.
$x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
return $x;
}
}
sub rtf_esc_codely {
# Doesn't change "-" to hard-hyphen, nor apply computerese style-smarts.
# We don't want to change the "-" to hard-hyphen, because we want to
# be able to paste this into a file and run it without there being
# dire screaming about the mysterious hard-hyphen character (which
# looks just like a normal dash character).
my $x; # scratch
if(!defined wantarray) { # void context: alter in-place!
for(@_) {
s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
}
return;
} elsif(wantarray) { # return an array
return map {; ($x = $_) =~
s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
$x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
$x;
} @_;
} else { # return a single scalar
($x = ((@_ == 1) ? $_[0] : join '', @_)
) =~ s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER
# Escape \, {, }, -, control chars, and 7f-ff.
$x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
return $x;
}
}
%Escape = (
(($] lt 5.007_003) # Broken for non-ASCII on early Perls
? (map( (chr($_),chr($_)), # things not apparently needing escaping
0x20 .. 0x7E ),
map( (chr($_),sprintf("\\'%02x", $_)), # apparently escapeworthy things
0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46))
: (map( (chr(utf8::unicode_to_native($_)),chr(utf8::unicode_to_native($_))),
0x20 .. 0x7E ),
map( (chr($_),sprintf("\\'%02x", utf8::unicode_to_native($_))),
0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46))),
# We get to escape out 'F' so that we can send RTF files thru the mail
# without the slightest worry that paragraphs beginning with "From"
# will get munged.
# And some refinements:
"\r" => "\n",
"\cj" => "\n",
"\n" => "\n\\line ",
"\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay)
"\f" => "\n\\page\n", # Formfeed
"-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen
$Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space
$Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen
# CRAZY HACKS:
"\n" => "\\line\n",
"\r" => "\n",
"\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1
"\cc" => "}",
);
1;
__END__
=head1 NAME
Pod::Simple::RTF -- format Pod as RTF
=head1 SYNOPSIS
perl -MPod::Simple::RTF -e \
"exit Pod::Simple::RTF->filter(shift)->any_errata_seen" \
thingy.pod > thingy.rtf
=head1 DESCRIPTION
This class is a formatter that takes Pod and renders it as RTF, good for
viewing/printing in MSWord, WordPad/write.exe, TextEdit, etc.
This is a subclass of L<Pod::Simple> and inherits all its methods.
=head1 FORMAT CONTROL ATTRIBUTES
You can set these attributes on the parser object before you
call C<parse_file> (or a similar method) on it:
=over
=item $parser->head1_halfpoint_size( I<halfpoint_integer> );
=item $parser->head2_halfpoint_size( I<halfpoint_integer> );
=item $parser->head3_halfpoint_size( I<halfpoint_integer> );
=item $parser->head4_halfpoint_size( I<halfpoint_integer> );
These methods set the size (in half-points, like 52 for 26-point)
that these heading levels will appear as.
=item $parser->codeblock_halfpoint_size( I<halfpoint_integer> );
This method sets the size (in half-points, like 21 for 10.5-point)
that codeblocks ("verbatim sections") will appear as.
=item $parser->header_halfpoint_size( I<halfpoint_integer> );
This method sets the size (in half-points, like 15 for 7.5-point)
that the header on each page will appear in. The header
is usually just "I<modulename> p. I<pagenumber>".
=item $parser->normal_halfpoint_size( I<halfpoint_integer> );
This method sets the size (in half-points, like 26 for 13-point)
that normal paragraphic text will appear in.
=item $parser->no_proofing_exemptions( I<true_or_false> );
Set this value to true if you don't want the formatter to try
putting a hidden code on all Perl symbols (as best as it can
notice them) that labels them as being not in English, and
so not worth spellchecking.
=item $parser->doc_lang( I<microsoft_decimal_language_code> )
This sets the language code to tag this document as being in. By
default, it is currently the value of the environment variable
C<RTFDEFLANG>, or if that's not set, then the value
1033 (for US English).
Setting this appropriately is useful if you want to use the RTF
to spellcheck, and/or if you want it to hyphenate right.
Here are some notable values:
1033 US English
2057 UK English
3081 Australia English
4105 Canada English
1034 Spain Spanish
2058 Mexico Spanish
1031 Germany German
1036 France French
3084 Canada French
1035 Finnish
1044 Norwegian (Bokmal)
2068 Norwegian (Nynorsk)
=back
If you are particularly interested in customizing this module's output
even more, see the source and/or write to me.
=head1 SEE ALSO
L<Pod::Simple>, L<RTF::Writer>, L<RTF::Cookbook>, L<RTF::Document>,
L<RTF::Generator>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/PullParser.pm 0000644 00000062201 15051135563 0010431 0 ustar 00 require 5;
package Pod::Simple::PullParser;
$VERSION = '3.35';
use Pod::Simple ();
BEGIN {@ISA = ('Pod::Simple')}
use strict;
use Carp ();
use Pod::Simple::PullParserStartToken;
use Pod::Simple::PullParserEndToken;
use Pod::Simple::PullParserTextToken;
BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
__PACKAGE__->_accessorize(
'source_fh', # the filehandle we're reading from
'source_scalar_ref', # the scalarref we're reading from
'source_arrayref', # the arrayref we're reading from
);
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#
# And here is how we implement a pull-parser on top of a push-parser...
sub filter {
my($self, $source) = @_;
$self = $self->new unless ref $self;
$source = *STDIN{IO} unless defined $source;
$self->set_source($source);
$self->output_fh(*STDOUT{IO});
$self->run; # define run() in a subclass if you want to use filter()!
return $self;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub parse_string_document {
my $this = shift;
$this->set_source(\ $_[0]);
$this->run;
}
sub parse_file {
my($this, $filename) = @_;
$this->set_source($filename);
$this->run;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# In case anyone tries to use them:
sub run {
use Carp ();
if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
Carp::croak "You can call run() only on subclasses of "
. __PACKAGE__;
} else {
Carp::croak join '',
"You can't call run() because ",
ref($_[0]) || $_[0], " didn't define a run() method";
}
}
sub parse_lines {
use Carp ();
Carp::croak "Use set_source with ", __PACKAGE__,
" and subclasses, not parse_lines";
}
sub parse_line {
use Carp ();
Carp::croak "Use set_source with ", __PACKAGE__,
" and subclasses, not parse_line";
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
die "Couldn't construct for $class" unless $self;
$self->{'token_buffer'} ||= [];
$self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
$self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken';
$self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken';
DEBUG > 1 and print STDERR "New pullparser object: $self\n";
return $self;
}
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
sub get_token {
my $self = shift;
DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n";
DEBUG > 2 and print STDERR " Items in token-buffer (",
scalar( @{ $self->{'token_buffer'} } ) ,
") :\n", map(
" " . $_->dump . "\n", @{ $self->{'token_buffer'} }
),
@{ $self->{'token_buffer'} } ? '' : ' (no tokens)',
"\n"
;
until( @{ $self->{'token_buffer'} } ) {
DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n";
if($self->{'source_dead'}) {
DEBUG and print STDERR "$self 's source is dead.\n";
push @{ $self->{'token_buffer'} }, undef;
} elsif(exists $self->{'source_fh'}) {
my @lines;
my $fh = $self->{'source_fh'}
|| Carp::croak('You have to call set_source before you can call get_token');
DEBUG and print STDERR "$self 's source is filehandle $fh.\n";
# Read those many lines at a time
for(my $i = Pod::Simple::MANY_LINES; $i--;) {
DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n";
local $/ = $Pod::Simple::NL;
push @lines, scalar(<$fh>); # readline
DEBUG > 3 and print STDERR " Line is: ",
defined($lines[-1]) ? $lines[-1] : "<undef>\n";
unless( defined $lines[-1] ) {
DEBUG and print STDERR "That's it for that source fh! Killing.\n";
delete $self->{'source_fh'}; # so it can be GC'd
last;
}
# but pass thru the undef, which will set source_dead to true
# TODO: look to see if $lines[-1] is =encoding, and if so,
# do horribly magic things
}
if(DEBUG > 8) {
print STDERR "* I've gotten ", scalar(@lines), " lines:\n";
foreach my $l (@lines) {
if(defined $l) {
print STDERR " line {$l}\n";
} else {
print STDERR " line undef\n";
}
}
print STDERR "* end of ", scalar(@lines), " lines\n";
}
$self->SUPER::parse_lines(@lines);
} elsif(exists $self->{'source_arrayref'}) {
DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ",
scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
$self->SUPER::parse_lines(
splice @{ $self->{'source_arrayref'} },
0,
Pod::Simple::MANY_LINES
);
unless( @{ $self->{'source_arrayref'} } ) {
DEBUG and print STDERR "That's it for that source arrayref! Killing.\n";
$self->SUPER::parse_lines(undef);
delete $self->{'source_arrayref'}; # so it can be GC'd
}
# to make sure that an undef is always sent to signal end-of-stream
} elsif(exists $self->{'source_scalar_ref'}) {
DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
length(${ $self->{'source_scalar_ref'} }) -
(pos(${ $self->{'source_scalar_ref'} }) || 0),
" characters left to parse.\n";
DEBUG > 3 and print STDERR " Fetching a line from source-string...\n";
if( ${ $self->{'source_scalar_ref'} } =~
m/([^\n\r]*)((?:\r?\n)?)/g
) {
#print(">> $1\n"),
$self->SUPER::parse_lines($1)
if length($1) or length($2)
or pos( ${ $self->{'source_scalar_ref'} })
!= length( ${ $self->{'source_scalar_ref'} });
# I.e., unless it's a zero-length "empty line" at the very
# end of "foo\nbar\n" (i.e., between the \n and the EOS).
} else { # that's the end. Byebye
$self->SUPER::parse_lines(undef);
delete $self->{'source_scalar_ref'};
DEBUG and print STDERR "That's it for that source scalarref! Killing.\n";
}
} else {
die "What source??";
}
}
DEBUG and print STDERR "get_token about to return ",
Pod::Simple::pretty( @{$self->{'token_buffer'}}
? $self->{'token_buffer'}[-1] : undef
), "\n";
return shift @{$self->{'token_buffer'}}; # that's an undef if empty
}
sub unget_token {
my $self = shift;
DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ",
@_ ? "@_\n" : "().\n";
foreach my $t (@_) {
Carp::croak "Can't unget that, because it's not a token -- it's undef!"
unless defined $t;
Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
unless ref $t;
Carp::croak "Can't unget $t, because it's not a token object!"
unless UNIVERSAL::can($t, 'type');
}
unshift @{$self->{'token_buffer'}}, @_;
DEBUG > 1 and print STDERR "Token buffer now has ",
scalar(@{$self->{'token_buffer'}}), " items in it.\n";
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# $self->{'source_filename'} = $source;
sub set_source {
my $self = shift @_;
return $self->{'source_fh'} unless @_;
Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
my $handle;
if(!defined $_[0]) {
Carp::croak("Can't use empty-string as a source for set_source");
} elsif(ref(\( $_[0] )) eq 'GLOB') {
$self->{'source_filename'} = '' . ($handle = $_[0]);
DEBUG and print STDERR "$self 's source is glob $_[0]\n";
# and fall thru
} elsif(ref( $_[0] ) eq 'SCALAR') {
$self->{'source_scalar_ref'} = $_[0];
DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n";
return;
} elsif(ref( $_[0] ) eq 'ARRAY') {
$self->{'source_arrayref'} = $_[0];
DEBUG and print STDERR "$self 's source is array ref $_[0]\n";
return;
} elsif(ref $_[0]) {
$self->{'source_filename'} = '' . ($handle = $_[0]);
DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n";
} elsif(!length $_[0]) {
Carp::croak("Can't use empty-string as a source for set_source");
} else { # It's a filename!
DEBUG and print STDERR "$self 's source is filename $_[0]\n";
{
local *PODSOURCE;
open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
$handle = *PODSOURCE{IO};
}
$self->{'source_filename'} = $_[0];
DEBUG and print STDERR " Its name is $_[0].\n";
# TODO: file-discipline things here!
}
$self->{'source_fh'} = $handle;
DEBUG and print STDERR " Its handle is $handle\n";
return 1;
}
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
sub get_title_short { shift->get_short_title(@_) } # alias
sub get_short_title {
my $title = shift->get_title(@_);
$title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
# turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
return $title;
}
sub get_title { shift->_get_titled_section(
'NAME', max_token => 50, desperate => 1, @_)
}
sub get_version { shift->_get_titled_section(
'VERSION',
max_token => 400,
accept_verbatim => 1,
max_content_length => 3_000,
@_,
);
}
sub get_description { shift->_get_titled_section(
'DESCRIPTION',
max_token => 400,
max_content_length => 3_000,
@_,
) }
sub get_authors { shift->get_author(@_) } # a harmless alias
sub get_author {
my $this = shift;
# Max_token is so high because these are
# typically at the end of the document:
$this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
$this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
}
#--------------------------------------------------------------------------
sub _get_titled_section {
# Based on a get_title originally contributed by Graham Barr
my($self, $titlename, %options) = (@_);
my $max_token = delete $options{'max_token'};
my $desperate_for_title = delete $options{'desperate'};
my $accept_verbatim = delete $options{'accept_verbatim'};
my $max_content_length = delete $options{'max_content_length'};
my $nocase = delete $options{'nocase'};
$max_content_length = 120 unless defined $max_content_length;
Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
. join " ", map "[$_]", sort keys %options
)
if keys %options;
my %content_containers;
$content_containers{'Para'} = 1;
if($accept_verbatim) {
$content_containers{'Verbatim'} = 1;
$content_containers{'VerbatimFormatted'} = 1;
}
my $token_count = 0;
my $title;
my @to_unget;
my $state = 0;
my $depth = 0;
Carp::croak "What kind of titlename is \"$titlename\"?!" unless
defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
my $titlename_re = quotemeta($titlename);
my $head1_text_content;
my $para_text_content;
my $skipX;
while(
++$token_count <= ($max_token || 1_000_000)
and defined(my $token = $self->get_token)
) {
push @to_unget, $token;
if ($state == 0) { # seeking =head1
if( $token->is_start and $token->tagname eq 'head1' ) {
DEBUG and print STDERR " Found head1. Seeking content...\n";
++$state;
$head1_text_content = '';
}
}
elsif($state == 1) { # accumulating text until end of head1
if( $token->is_text ) {
unless ($skipX) {
DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n";
$head1_text_content .= $token->text;
}
} elsif( $token->is_tagname('X') ) {
# We're going to want to ignore X<> stuff.
$skipX = $token->is_start;
DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
} elsif( $token->is_end and $token->tagname eq 'head1' ) {
DEBUG and print STDERR " Found end of head1. Considering content...\n";
$head1_text_content = uc $head1_text_content if $nocase;
if($head1_text_content eq $titlename
or $head1_text_content =~ m/\($titlename_re\)/s
# We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
) {
DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n";
++$state;
} elsif(
$desperate_for_title
# if we're so desperate we'll take the first
# =head1's content as a title
and $head1_text_content =~ m/\S/
and $head1_text_content !~ m/^[ A-Z]+$/s
and $head1_text_content !~
m/\((?:
NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
| COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
| CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
)\)/sx
# avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
and ($max_content_length
? (length($head1_text_content) <= $max_content_length) # sanity
: 1)
) {
# Looks good; trim it
($title = $head1_text_content) =~ s/\s+$//;
DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n";
last;
} else {
--$state;
DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n",
"\n Dropping back to seeking-head1-content mode...\n";
}
}
}
elsif($state == 2) {
# seeking start of para (which must immediately follow)
if($token->is_start and $content_containers{ $token->tagname }) {
DEBUG and print STDERR " Found start of Para. Accumulating content...\n";
$para_text_content = '';
++$state;
} else {
DEBUG and print
" Didn't see an immediately subsequent start-Para. Reseeking H1\n";
$state = 0;
}
}
elsif($state == 3) {
# accumulating text until end of Para
if( $token->is_text ) {
DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n";
$para_text_content .= $token->text;
# and keep looking
} elsif( $token->is_end and $content_containers{ $token->tagname } ) {
DEBUG and print STDERR " Found end of Para. Considering content: ",
$para_text_content, "\n";
if( $para_text_content =~ m/\S/
and ($max_content_length
? (length($para_text_content) <= $max_content_length)
: 1)
) {
# Some minimal sanity constraints, I think.
DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n";
$title = $para_text_content;
last;
} else {
DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n";
undef $title;
last;
}
}
}
else {
die "IMPOSSIBLE STATE $state!\n"; # should never happen
}
}
# Put it all back!
$self->unget_token(@to_unget);
if(DEBUG) {
if(defined $title) { print STDERR " Returning title <$title>\n" }
else { print STDERR "Returning title <>\n" }
}
return '' unless defined $title;
$title =~ s/^\s+//;
return $title;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#
# Methods that actually do work at parse-time:
sub _handle_element_start {
my $self = shift; # leaving ($element_name, $attr_hash_r)
DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
push @{ $self->{'token_buffer'} },
$self->{'start_token_class'}->new(@_);
return;
}
sub _handle_text {
my $self = shift; # leaving ($text)
DEBUG > 2 and print STDERR "== $_[0]\n";
push @{ $self->{'token_buffer'} },
$self->{'text_token_class'}->new(@_);
return;
}
sub _handle_element_end {
my $self = shift; # leaving ($element_name);
DEBUG > 2 and print STDERR "-- $_[0]\n";
push @{ $self->{'token_buffer'} },
$self->{'end_token_class'}->new(@_);
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
__END__
=head1 NAME
Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
=head1 SYNOPSIS
my $parser = SomePodProcessor->new;
$parser->set_source( "whatever.pod" );
$parser->run;
Or:
my $parser = SomePodProcessor->new;
$parser->set_source( $some_filehandle_object );
$parser->run;
Or:
my $parser = SomePodProcessor->new;
$parser->set_source( \$document_source );
$parser->run;
Or:
my $parser = SomePodProcessor->new;
$parser->set_source( \@document_lines );
$parser->run;
And elsewhere:
require 5;
package SomePodProcessor;
use strict;
use base qw(Pod::Simple::PullParser);
sub run {
my $self = shift;
Token:
while(my $token = $self->get_token) {
...process each token...
}
}
=head1 DESCRIPTION
This class is for using Pod::Simple to build a Pod processor -- but
one that uses an interface based on a stream of token objects,
instead of based on events.
This is a subclass of L<Pod::Simple> and inherits all its methods.
A subclass of Pod::Simple::PullParser should define a C<run> method
that calls C<< $token = $parser->get_token >> to pull tokens.
See the source for Pod::Simple::RTF for an example of a formatter
that uses Pod::Simple::PullParser.
=head1 METHODS
=over
=item my $token = $parser->get_token
This returns the next token object (which will be of a subclass of
L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
the end of the document.
=item $parser->unget_token( $token )
=item $parser->unget_token( $token1, $token2, ... )
This restores the token object(s) to the front of the parser stream.
=back
The source has to be set before you can parse anything. The lowest-level
way is to call C<set_source>:
=over
=item $parser->set_source( $filename )
=item $parser->set_source( $filehandle_object )
=item $parser->set_source( \$document_source )
=item $parser->set_source( \@document_lines )
=back
Or you can call these methods, which Pod::Simple::PullParser has defined
to work just like Pod::Simple's same-named methods:
=over
=item $parser->parse_file(...)
=item $parser->parse_string_document(...)
=item $parser->filter(...)
=item $parser->parse_from_file(...)
=back
For those to work, the Pod-processing subclass of
Pod::Simple::PullParser has to have defined a $parser->run method --
so it is advised that all Pod::Simple::PullParser subclasses do so.
See the Synopsis above, or the source for Pod::Simple::RTF.
Authors of formatter subclasses might find these methods useful to
call on a parser object that you haven't started pulling tokens
from yet:
=over
=item my $title_string = $parser->get_title
This tries to get the title string out of $parser, by getting some tokens,
and scanning them for the title, and then ungetting them so that you can
process the token-stream from the beginning.
For example, suppose you have a document that starts out:
=head1 NAME
Hoo::Boy::Wowza -- Stuff B<wow> yeah!
$parser->get_title on that document will return "Hoo::Boy::Wowza --
Stuff wow yeah!". If the document starts with:
=head1 Name
Hoo::Boy::W00t -- Stuff B<w00t> yeah!
Then you'll need to pass the C<nocase> option in order to recognize "Name":
$parser->get_title(nocase => 1);
In cases where get_title can't find the title, it will return empty-string
("").
=item my $title_string = $parser->get_short_title
This is just like get_title, except that it returns just the modulename, if
the title seems to be of the form "SomeModuleName -- description".
For example, suppose you have a document that starts out:
=head1 NAME
Hoo::Boy::Wowza -- Stuff B<wow> yeah!
then $parser->get_short_title on that document will return
"Hoo::Boy::Wowza".
But if the document starts out:
=head1 NAME
Hooboy, stuff B<wow> yeah!
then $parser->get_short_title on that document will return "Hooboy,
stuff wow yeah!". If the document starts with:
=head1 Name
Hoo::Boy::W00t -- Stuff B<w00t> yeah!
Then you'll need to pass the C<nocase> option in order to recognize "Name":
$parser->get_short_title(nocase => 1);
If the title can't be found, then get_short_title returns empty-string
("").
=item $author_name = $parser->get_author
This works like get_title except that it returns the contents of the
"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
section, pass the C<nocase> option:
$parser->get_author(nocase => 1);
(This method tolerates "AUTHORS" instead of "AUTHOR" too.)
=item $description_name = $parser->get_description
This works like get_title except that it returns the contents of the
"=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
section, pass the C<nocase> option:
$parser->get_description(nocase => 1);
=item $version_block = $parser->get_version
This works like get_title except that it returns the contents of
the "=head1 VERSION\n\n[BIG BLOCK]\n" block. Note that this does NOT
return the module's C<$VERSION>!! To recognize a
"=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> option:
$parser->get_version(nocase => 1);
=back
=head1 NOTE
You don't actually I<have> to define a C<run> method. If you're
writing a Pod-formatter class, you should define a C<run> just so
that users can call C<parse_file> etc, but you don't I<have> to.
And if you're not writing a formatter class, but are instead just
writing a program that does something simple with a Pod::PullParser
object (and not an object of a subclass), then there's no reason to
bother subclassing to add a C<run> method.
=head1 SEE ALSO
L<Pod::Simple>
L<Pod::Simple::PullParserToken> -- and its subclasses
L<Pod::Simple::PullParserStartToken>,
L<Pod::Simple::PullParserTextToken>, and
L<Pod::Simple::PullParserEndToken>.
L<HTML::TokeParser>, which inspired this.
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
JUNK:
sub _old_get_title { # some witchery in here
my $self = $_[0];
my $title;
my @to_unget;
while(1) {
push @to_unget, $self->get_token;
unless(defined $to_unget[-1]) { # whoops, short doc!
pop @to_unget;
last;
}
DEBUG and print STDERR "-Got token ", $to_unget[-1]->dump, "\n";
(DEBUG and print STDERR "Too much in the buffer.\n"),
last if @to_unget > 25; # sanity
my $pattern = '';
if( #$to_unget[-1]->type eq 'end'
#and $to_unget[-1]->tagname eq 'Para'
#and
($pattern = join('',
map {;
($_->type eq 'start') ? ("<" . $_->tagname .">")
: ($_->type eq 'end' ) ? ("</". $_->tagname .">")
: ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
: "BLORP"
} @to_unget
)) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
) {
# Whee, it fits the pattern
DEBUG and print STDERR "Seems to match =head1 NAME pattern.\n";
$title = '';
foreach my $t (reverse @to_unget) {
last if $t->type eq 'start' and $t->tagname eq 'Para';
$title = $t->text . $title if $t->type eq 'text';
}
undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
last;
} elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
and !( $1 eq '1' and $2 eq 'NAME' )
) {
# Well, it fits a fallback pattern
DEBUG and print STDERR "Seems to match NAMEless pattern.\n";
$title = '';
foreach my $t (reverse @to_unget) {
last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
$title = $t->text . $title if $t->type eq 'text';
}
undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
last;
} else {
DEBUG and $pattern and print STDERR "Leading pattern: $pattern\n";
}
}
# Put it all back:
$self->unget_token(@to_unget);
if(DEBUG) {
if(defined $title) { print STDERR " Returning title <$title>\n" }
else { print STDERR "Returning title <>\n" }
}
return '' unless defined $title;
return $title;
}
Simple/LinkSection.pm 0000644 00000010370 15051135563 0010562 0 ustar 00
require 5;
package Pod::Simple::LinkSection;
# Based somewhat dimly on Array::Autojoin
use vars qw($VERSION );
$VERSION = '3.35';
use strict;
use Pod::Simple::BlackBox;
use vars qw($VERSION );
$VERSION = '3.35';
use overload( # So it'll stringify nice
'""' => \&Pod::Simple::BlackBox::stringify_lol,
'bool' => \&Pod::Simple::BlackBox::stringify_lol,
# '.=' => \&tack_on, # grudgingly support
'fallback' => 1, # turn on cleverness
);
sub tack_on {
$_[0] = ['', {}, "$_[0]" ];
return $_[0][2] .= $_[1];
}
sub as_string {
goto &Pod::Simple::BlackBox::stringify_lol;
}
sub stringify {
goto &Pod::Simple::BlackBox::stringify_lol;
}
sub new {
my $class = shift;
$class = ref($class) || $class;
my $new;
if(@_ == 1) {
if (!ref($_[0] || '')) { # most common case: one bare string
return bless ['', {}, $_[0] ], $class;
} elsif( ref($_[0] || '') eq 'ARRAY') {
$new = [ @{ $_[0] } ];
} else {
Carp::croak( "$class new() doesn't know to clone $new" );
}
} else { # misc stuff
$new = [ '', {}, @_ ];
}
# By now it's a treelet: [ 'foo', {}, ... ]
foreach my $x (@$new) {
if(ref($x || '') eq 'ARRAY') {
$x = $class->new($x); # recurse
} elsif(ref($x || '') eq 'HASH') {
$x = { %$x };
}
# otherwise leave it.
}
return bless $new, $class;
}
# Not much in this class is likely to be link-section specific --
# but it just so happens that link-sections are about the only treelets
# that are exposed to the user.
1;
__END__
# TODO: let it be an option whether a given subclass even wants little treelets?
__END__
=head1 NAME
Pod::Simple::LinkSection -- represent "section" attributes of L codes
=head1 SYNOPSIS
# a long story
=head1 DESCRIPTION
This class is not of interest to general users.
Pod::Simple uses this class for representing the value of the
"section" attribute of "L" start-element events. Most applications
can just use the normal stringification of objects of this class;
they stringify to just the text content of the section,
such as "foo" for
C<< LZ<><Stuff/foo> >>, and "bar" for
C<< LZ<><Stuff/bIZ<><ar>> >>.
However, anyone particularly interested in getting the full value of
the treelet, can just traverse the content of the treeleet
@$treelet_object. To wit:
% perl -MData::Dumper -e
"use base qw(Pod::Simple::Methody);
sub start_L { print Dumper($_[1]{'section'} ) }
__PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>')
"
Output:
$VAR1 = bless( [
'',
{},
'b',
bless( [
'I',
{},
'ar'
], 'Pod::Simple::LinkSection' ),
'baz'
], 'Pod::Simple::LinkSection' );
But stringify it and you get just the text content:
% perl -MData::Dumper -e
"use base qw(Pod::Simple::Methody);
sub start_L { print Dumper( '' . $_[1]{'section'} ) }
__PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>')
"
Output:
$VAR1 = 'barbaz';
=head1 SEE ALSO
L<Pod::Simple>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2004 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/XMLOutStream.pm 0000644 00000011072 15051135563 0010644 0 ustar 00
require 5;
package Pod::Simple::XMLOutStream;
use strict;
use Carp ();
use Pod::Simple ();
use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS);
$VERSION = '3.35';
BEGIN {
@ISA = ('Pod::Simple');
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG;
}
$ATTR_PAD = "\n" unless defined $ATTR_PAD;
# Don't mess with this unless you know what you're doing.
$SORT_ATTRS = 0 unless defined $SORT_ATTRS;
sub new {
my $self = shift;
my $new = $self->SUPER::new(@_);
$new->{'output_fh'} ||= *STDOUT{IO};
$new->keep_encoding_directive(1);
#$new->accept_codes('VerbatimFormatted');
return $new;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub _handle_element_start {
# ($self, $element_name, $attr_hash_r)
my $fh = $_[0]{'output_fh'};
my($key, $value);
DEBUG and print STDERR "++ $_[1]\n";
print $fh "<", $_[1];
if($SORT_ATTRS) {
foreach my $key (sort keys %{$_[2]}) {
unless($key =~ m/^~/s) {
next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
_xml_escape($value = $_[2]{$key});
print $fh $ATTR_PAD, $key, '="', $value, '"';
}
}
} else { # faster
while(($key,$value) = each %{$_[2]}) {
unless($key =~ m/^~/s) {
next if $key eq 'start_line' and $_[0]{'hide_line_numbers'};
_xml_escape($value);
print $fh $ATTR_PAD, $key, '="', $value, '"';
}
}
}
print $fh ">";
return;
}
sub _handle_text {
DEBUG and print STDERR "== \"$_[1]\"\n";
if(length $_[1]) {
my $text = $_[1];
_xml_escape($text);
print {$_[0]{'output_fh'}} $text;
}
return;
}
sub _handle_element_end {
DEBUG and print STDERR "-- $_[1]\n";
print {$_[0]{'output_fh'}} "</", $_[1], ">";
return;
}
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub _xml_escape {
foreach my $x (@_) {
# Escape things very cautiously:
if ($] ge 5.007_003) {
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
} else { # Is broken for non-ASCII platforms on early perls
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
}
# Yes, stipulate the list without a range, so that this can work right on
# all charsets that this module happens to run under.
}
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
__END__
=head1 NAME
Pod::Simple::XMLOutStream -- turn Pod into XML
=head1 SYNOPSIS
perl -MPod::Simple::XMLOutStream -e \
"exit Pod::Simple::XMLOutStream->filter(shift)->any_errata_seen" \
thingy.pod
=head1 DESCRIPTION
Pod::Simple::XMLOutStream is a subclass of L<Pod::Simple> that parses
Pod and turns it into XML.
Pod::Simple::XMLOutStream inherits methods from
L<Pod::Simple>.
=head1 SEE ALSO
L<Pod::Simple::DumpAsXML> is rather like this class; see its
documentation for a discussion of the differences.
L<Pod::Simple>, L<Pod::Simple::DumpAsXML>, L<Pod::SAX>
L<Pod::Simple::Subclassing>
The older (and possibly obsolete) libraries L<Pod::PXML>, L<Pod::XML>
=head1 ABOUT EXTENDING POD
TODO: An example or two of =extend, then point to Pod::Simple::Subclassing
=head1 SEE ALSO
L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002-2004 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/BlackBox.pm 0000644 00000215626 15051135563 0010040 0 ustar 00 package Pod::Simple::BlackBox;
#
# "What's in the box?" "Pain."
#
###########################################################################
#
# This is where all the scary things happen: parsing lines into
# paragraphs; and then into directives, verbatims, and then also
# turning formatting sequences into treelets.
#
# Are you really sure you want to read this code?
#
#-----------------------------------------------------------------------------
#
# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
# to call the proper callbacks on the treelets.
#
# Every node in a treelet is a ['name', {attrhash}, ...children...]
use integer; # vroom!
use strict;
use Carp ();
use vars qw($VERSION );
$VERSION = '3.35';
#use constant DEBUG => 7;
BEGIN {
require Pod::Simple;
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
}
# Matches a character iff the character will have a different meaning
# if we choose CP1252 vs UTF-8 if there is no =encoding line.
# This is broken for early Perls on non-ASCII platforms.
my $non_ascii_re = eval "qr/[[:^ascii:]]/";
$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re;
my $utf8_bom;
if (($] ge 5.007_003)) {
$utf8_bom = "\x{FEFF}";
utf8::encode($utf8_bom);
} else {
$utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls.
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub parse_line { shift->parse_lines(@_) } # alias
# - - - Turn back now! Run away! - - -
sub parse_lines { # Usage: $parser->parse_lines(@lines)
# an undef means end-of-stream
my $self = shift;
my $code_handler = $self->{'code_handler'};
my $cut_handler = $self->{'cut_handler'};
my $wl_handler = $self->{'whiteline_handler'};
$self->{'line_count'} ||= 0;
my $scratch;
DEBUG > 4 and
print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
DEBUG > 5 and
print STDERR "# About to parse lines: ",
join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
my $paras = ($self->{'paras'} ||= []);
# paragraph buffer. Because we need to defer processing of =over
# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
# to process this.
$self->{'pod_para_count'} ||= 0;
my $line;
foreach my $source_line (@_) {
if( $self->{'source_dead'} ) {
DEBUG > 4 and print STDERR "# Source is dead.\n";
last;
}
unless( defined $source_line ) {
DEBUG > 4 and print STDERR "# Undef-line seen.\n";
push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
push @$paras, $paras->[-1], $paras->[-1];
# So that it definitely fills the buffer.
$self->{'source_dead'} = 1;
$self->_ponder_paragraph_buffer;
next;
}
if( $self->{'line_count'}++ ) {
($line = $source_line) =~ tr/\n\r//d;
# If we don't have two vars, we'll end up with that there
# tr/// modding the (potentially read-only) original source line!
} else {
DEBUG > 2 and print STDERR "First line: [$source_line]\n";
if( ($line = $source_line) =~ s/^$utf8_bom//s ) {
DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
$self->_handle_encoding_line( "=encoding utf8" );
delete $self->{'_processed_encoding'};
$line =~ tr/\n\r//d;
} elsif( $line =~ s/^\xFE\xFF//s ) {
DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
$self->scream(
$self->{'line_count'},
"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
);
splice @_;
push @_, undef;
next;
# TODO: implement somehow?
} elsif( $line =~ s/^\xFF\xFE//s ) {
DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
$self->scream(
$self->{'line_count'},
"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
);
splice @_;
push @_, undef;
next;
# TODO: implement somehow?
} else {
DEBUG > 2 and print STDERR "First line is BOM-less.\n";
($line = $source_line) =~ tr/\n\r//d;
}
}
if(!$self->{'parse_characters'} && !$self->{'encoding'}
&& ($self->{'in_pod'} || $line =~ /^=/s)
&& $line =~ /$non_ascii_re/
) {
my $encoding;
# No =encoding line, and we are at the first line in the input that
# contains a non-ascii byte, that is one whose meaning varies depending
# on whether the file is encoded in UTF-8 or CP1252, which are the two
# possibilities permitted by the pod spec. (ASCII is assumed if the
# file only contains ASCII bytes.) In order to process this line, we
# need to figure out what encoding we will use for the file.
#
# Strictly speaking ISO 8859-1 (Latin 1) refers to the code points
# 160-255, but it is used here, as it often colloquially is, to refer to
# the complete set of code points 0-255, including ASCII (0-127), the C1
# controls (128-159), and strict Latin 1 (160-255).
#
# CP1252 is effectively a superset of Latin 1, because it differs only
# from colloquial 8859-1 in the C1 controls, which are very unlikely to
# actually be present in 8859-1 files, so can be used for other purposes
# without conflict. CP 1252 uses most of them for graphic characters.
#
# Note that all ASCII-range bytes represent their corresponding code
# points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code
# points require multiple (non-ASCII) bytes to represent. (A separate
# paragraph for EBCDIC is below.) The multi-byte representation is
# quite structured. If we find an isolated byte that requires multiple
# bytes to represent in UTF-8, we know that the encoding is not UTF-8.
# If we find a sequence of bytes that violates the UTF-8 structure, we
# also can presume the encoding isn't UTF-8, and hence must be 1252.
#
# But there are ambiguous cases where we could guess wrong. If so, the
# user will end up having to supply an =encoding line. We use all
# readily available information to improve our chances of guessing
# right. The odds of something not being UTF-8, but still passing a
# UTF-8 validity test go down very rapidly with increasing length of the
# sequence. Therefore we look at all the maximal length non-ascii
# sequences on the line. If any of the sequences can't be UTF-8, we
# quit there and choose CP1252. If all could be UTF-8, we guess UTF-8.
#
# On EBCDIC platforms, the situation is somewhat different. In
# UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
# but so do the bytes that are for the C1 controls. Recall that these
# correspond to the unused portion of 8859-1 that 1252 mostly takes
# over. That means that there are fewer code points that are
# represented by multi-bytes. But, note that the these controls are
# very unlikely to be in pod text. So if we encounter one of them, it
# means that it is quite likely CP1252 and not UTF-8. The net result is
# the same code below is used for both platforms.
while ($line =~ m/($non_ascii_re+)/g) {
my $non_ascii_seq = $1;
if (length $non_ascii_seq == 1) {
$encoding = 'CP1252';
goto guessed;
} elsif ($] ge 5.007_003) {
# On Perls that have this function, we can see if the sequence is
# valid UTF-8 or not.
my $is_utf8;
{
no warnings 'utf8';
$is_utf8 = utf8::decode($non_ascii_seq);
}
if (! $is_utf8) {
$encoding = 'CP1252';
goto guessed;
}
} elsif (ord("A") == 65) { # An early Perl, ASCII platform
# Without utf8::decode, it's a lot harder to do a rigorous check
# (though some early releases had a different function that
# accomplished the same thing). Since these are ancient Perls, not
# likely to be in use today, we take the easy way out, and look at
# just the first two bytes of the sequence to see if they are the
# start of a UTF-8 character. In ASCII UTF-8, continuation bytes
# must be between 0x80 and 0xBF. Start bytes can range from 0xC2
# through 0xFF, but anything above 0xF4 is not Unicode, and hence
# extremely unlikely to be in a pod.
if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) {
$encoding = 'CP1252';
goto guessed;
}
# We don't bother doing anything special for EBCDIC on early Perls.
# If there is a solitary variant, CP1252 will be chosen; otherwise
# UTF-8.
}
} # End of loop through all variant sequences on the line
# All sequences in the line could be UTF-8. Guess that.
$encoding = 'UTF-8';
guessed:
$self->_handle_encoding_line( "=encoding $encoding" );
delete $self->{'_processed_encoding'};
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
my ($word) = $line =~ /(\S*$non_ascii_re\S*)/;
$self->whine(
$self->{'line_count'},
"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
);
}
DEBUG > 5 and print STDERR "# Parsing line: [$line]\n";
if(!$self->{'in_pod'}) {
if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) {
if($1 eq 'cut') {
$self->scream(
$self->{'line_count'},
"=cut found outside a pod block. Skipping to next block."
);
## Before there were errata sections in the world, it was
## least-pessimal to abort processing the file. But now we can
## just barrel on thru (but still not start a pod block).
#splice @_;
#push @_, undef;
next;
} else {
$self->{'in_pod'} = $self->{'start_of_pod_block'}
= $self->{'last_was_blank'} = 1;
# And fall thru to the pod-mode block further down
}
} else {
DEBUG > 5 and print STDERR "# It's a code-line.\n";
$code_handler->(map $_, $line, $self->{'line_count'}, $self)
if $code_handler;
# Note: this may cause code to be processed out of order relative
# to pods, but in order relative to cuts.
# Note also that we haven't yet applied the transcoding to $line
# by time we call $code_handler!
if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
# That RE is from perlsyn, section "Plain Old Comments (Not!)",
#$fname = $2 if defined $2;
#DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n";
DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
$self->{'line_count'} = $1 - 1;
}
next;
}
}
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# Else we're in pod mode:
# Apply any necessary transcoding:
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
# HERE WE CATCH =encoding EARLY!
if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
next if $self->parse_characters; # Ignore this line
$line = $self->_handle_encoding_line( $line );
}
if($line =~ m/^=cut/s) {
# here ends the pod block, and therefore the previous pod para
DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n";
$self->{'in_pod'} = 0;
# ++$self->{'pod_para_count'};
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
if $cut_handler;
# TODO: add to docs: Note: this may cause cuts to be processed out
# of order relative to pods, but in order relative to code.
} elsif($line =~ m/^(\s*)$/s) { # it's a blank line
if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
$wl_handler->(map $_, $line, $self->{'line_count'}, $self)
if $wl_handler;
}
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
push @{$paras->[-1]}, $line;
} # otherwise it's not interesting
if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
}
$self->{'last_was_blank'} = 1;
} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
# Note that in "=head1 foo", the WS is lost.
# Example: ['=head1', {'start_line' => 123}, ' foo']
++$self->{'pod_para_count'};
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
push @$paras, $new; # the new incipient paragraph
DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
} elsif($line =~ m/^\s/s) {
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n";
push @{$paras->[-1]}, $line;
} else {
++$self->{'pod_para_count'};
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n";
push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
}
} else {
++$self->{'pod_para_count'};
$self->_ponder_paragraph_buffer();
# by now it's safe to consider the previous paragraph as done.
push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n";
}
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
} else {
# It's a non-blank line /continuing/ the current para
if(@$paras) {
DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n";
push @{$paras->[-1]}, $line;
} else {
# Unexpected case!
die "Continuing a paragraph but \@\$paras is empty?";
}
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
}
} # ends the big while loop
DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
return $self;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub _handle_encoding_line {
my($self, $line) = @_;
return if $self->parse_characters;
# The point of this routine is to set $self->{'_transcoder'} as indicated.
return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n";
my $e = $1;
my $orig = $e;
push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
my $enc_error;
# Cf. perldoc Encode and perldoc Encode::Supported
require Pod::Simple::Transcode;
if( $self->{'encoding'} ) {
my $norm_current = $self->{'encoding'};
my $norm_e = $e;
foreach my $that ($norm_current, $norm_e) {
$that = lc($that);
$that =~ s/[-_]//g;
}
if($norm_current eq $norm_e) {
DEBUG > 1 and print STDERR "The '=encoding $orig' line is ",
"redundant. ($norm_current eq $norm_e). Ignoring.\n";
$enc_error = '';
# But that doesn't necessarily mean that the earlier one went okay
} else {
$enc_error = "Encoding is already set to " . $self->{'encoding'};
DEBUG > 1 and print STDERR $enc_error;
}
} elsif (
# OK, let's turn on the encoding
do {
DEBUG > 1 and print STDERR " Setting encoding to $e\n";
$self->{'encoding'} = $e;
1;
}
and $e eq 'HACKRAW'
) {
DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n";
} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
die($enc_error = "WHAT? _transcoder is already set?!")
if $self->{'_transcoder'}; # should never happen
require Pod::Simple::Transcode;
$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
eval {
my @x = ('', "abc", "123");
$self->{'_transcoder'}->(@x);
};
$@ && die( $enc_error =
"Really unexpected error setting up encoding $e: $@\nAborting"
);
$self->{'detected_encoding'} = $e;
} else {
my @supported = Pod::Simple::Transcode::->all_encodings;
# Note unsupported, and complain
DEBUG and print STDERR " Encoding [$e] is unsupported.",
"\nSupporteds: @supported\n";
my $suggestion = '';
# Look for a near match:
my $norm = lc($e);
$norm =~ tr[-_][]d;
my $n;
foreach my $enc (@supported) {
$n = lc($enc);
$n =~ tr[-_][]d;
next unless $n eq $norm;
$suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
last;
}
my $encmodver = Pod::Simple::Transcode::->encmodver;
$enc_error = join '' =>
"This document probably does not appear as it should, because its ",
"\"=encoding $e\" line calls for an unsupported encoding.",
$suggestion, " [$encmodver\'s supported encodings are: @supported]"
;
$self->scream( $self->{'line_count'}, $enc_error );
}
push @{ $self->{'encoding_command_statuses'} }, $enc_error;
if (defined($self->{'_processed_encoding'})) {
# Double declaration.
$self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
}
$self->{'_processed_encoding'} = $orig;
return $line;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub _handle_encoding_second_level {
# By time this is called, the encoding (if well formed) will already
# have been acted one.
my($self, $para) = @_;
my @x = @$para;
my $content = join ' ', splice @x, 2;
$content =~ s/^\s+//s;
$content =~ s/\s+$//s;
DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
if (defined($self->{'_processed_encoding'})) {
#if($content ne $self->{'_processed_encoding'}) {
# Could it happen?
#}
delete $self->{'_processed_encoding'};
# It's already been handled. Check for errors.
if(! $self->{'encoding_command_statuses'} ) {
DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n";
} elsif( $self->{'encoding_command_statuses'}[-1] ) {
$self->whine( $para->[1]{'start_line'},
sprintf "Couldn't do %s: %s",
$self->{'encoding_command_reqs' }[-1],
$self->{'encoding_command_statuses'}[-1],
);
} else {
DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
}
} else {
# Otherwise it's a syntax error
$self->whine( $para->[1]{'start_line'},
"Invalid =encoding syntax: $content"
);
}
return;
}
#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
{
my $m = -321; # magic line number
sub _gen_errata {
my $self = $_[0];
# Return 0 or more fake-o paragraphs explaining the accumulated
# errors on this document.
return() unless $self->{'errata'} and keys %{$self->{'errata'}};
my @out;
foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
push @out,
['=item', {'start_line' => $m}, "Around line $line:"],
map( ['~Para', {'start_line' => $m, '~cooked' => 1},
#['~Top', {'start_line' => $m},
$_
#]
],
@{$self->{'errata'}{$line}}
)
;
}
# TODO: report of unknown entities? unrenderable characters?
unshift @out,
['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
"Hey! ",
['B', {},
'The above document had some coding errors, which are explained below:'
]
],
['=over', {'start_line' => $m, 'errata' => 1}, ''],
;
push @out,
['=back', {'start_line' => $m, 'errata' => 1}, ''],
;
DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n";
return @out;
}
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
##############################################################################
##
## stop reading now stop reading now stop reading now stop reading now stop
##
## HERE IT BECOMES REALLY SCARY
##
## stop reading now stop reading now stop reading now stop reading now stop
##
##############################################################################
sub _ponder_paragraph_buffer {
# Para-token types as found in the buffer.
# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
# =over, =back, =item
# and the null =pod (to be complained about if over one line)
#
# "~data" paragraphs are something we generate at this level, depending on
# a currently open =over region
# Events fired: Begin and end for:
# directivename (like head1 .. head4), item, extend,
# for (from =begin...=end, =for),
# over-bullet, over-number, over-text, over-block,
# item-bullet, item-number, item-text,
# Document,
# Data, Para, Verbatim
# B, C, longdirname (TODO -- wha?), etc. for all directives
#
my $self = $_[0];
my $paras;
return unless @{$paras = $self->{'paras'}};
my $curr_open = ($self->{'curr_open'} ||= []);
my $scratch;
DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n";
# We have something in our buffer. So apparently the document has started.
unless($self->{'doc_has_started'}) {
$self->{'doc_has_started'} = 1;
my $starting_contentless;
$starting_contentless =
(
!@$curr_open
and @$paras and ! grep $_->[0] ne '~end', @$paras
# i.e., if the paras is all ~ends
)
;
DEBUG and print STDERR "# Starting ",
$starting_contentless ? 'contentless' : 'contentful',
" document\n"
;
$self->_handle_element_start(
($scratch = 'Document'),
{
'start_line' => $paras->[0][1]{'start_line'},
$starting_contentless ? ( 'contentless' => 1 ) : (),
},
);
}
my($para, $para_type);
while(@$paras) {
last if @$paras == 1 and
( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
or $paras->[0][0] eq '=item' )
;
# Those're the three kinds of paragraphs that require lookahead.
# Actually, an "=item Foo" inside an <over type=text> region
# and any =item inside an <over type=block> region (rare)
# don't require any lookahead, but all others (bullets
# and numbers) do.
# TODO: whinge about many kinds of directives in non-resolving =for regions?
# TODO: many? like what? =head1 etc?
$para = shift @$paras;
$para_type = $para->[0];
DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
$self->_dump_curr_open(), ")\n";
if($para_type eq '=for') {
next if $self->_ponder_for($para,$curr_open,$paras);
} elsif($para_type eq '=begin') {
next if $self->_ponder_begin($para,$curr_open,$paras);
} elsif($para_type eq '=end') {
next if $self->_ponder_end($para,$curr_open,$paras);
} elsif($para_type eq '~end') { # The virtual end-document signal
next if $self->_ponder_doc_end($para,$curr_open,$paras);
}
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
if(grep $_->[1]{'~ignore'}, @$curr_open) {
DEBUG > 1 and
print STDERR "Skipping $para_type paragraph because in ignore mode.\n";
next;
}
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
if($para_type eq '=pod') {
$self->_ponder_pod($para,$curr_open,$paras);
} elsif($para_type eq '=over') {
next if $self->_ponder_over($para,$curr_open,$paras);
} elsif($para_type eq '=back') {
next if $self->_ponder_back($para,$curr_open,$paras);
} else {
# All non-magical codes!!!
# Here we start using $para_type for our own twisted purposes, to
# mean how it should get treated, not as what the element name
# should be.
DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n";
my $i;
# Enforce some =headN discipline
if($para_type =~ m/^=head\d$/s
and ! $self->{'accept_heads_anywhere'}
and @$curr_open
and $curr_open->[-1][0] eq '=over'
) {
DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n";
$self->whine(
$para->[1]{'start_line'},
"You forgot a '=back' before '$para_type'"
);
unshift @$paras, ['=back', {}, ''], $para; # close the =over
next;
}
if($para_type eq '=item') {
my $over;
unless(@$curr_open and
$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
$self->whine(
$para->[1]{'start_line'},
"'=item' outside of any '=over'"
);
unshift @$paras,
['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
$para
;
next;
}
my $over_type = $over->[1]{'~type'};
if(!$over_type) {
# Shouldn't happen1
die "Typeless over in stack, starting at line "
. $over->[1]{'start_line'};
} elsif($over_type eq 'block') {
unless($curr_open->[-1][1]{'~bitched_about'}) {
$curr_open->[-1][1]{'~bitched_about'} = 1;
$self->whine(
$curr_open->[-1][1]{'start_line'},
"You can't have =items (as at line "
. $para->[1]{'start_line'}
. ") unless the first thing after the =over is an =item"
);
}
# Just turn it into a paragraph and reconsider it
$para->[0] = '~Para';
unshift @$paras, $para;
next;
} elsif($over_type eq 'text') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
if($item_type eq 'text') {
# Nothing special needs doing for 'text'
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
$self->whine(
$para->[1]{'start_line'},
"Expected text after =item, not a $item_type"
);
# Undo our clobbering:
push @$para, $para->[1]{'~orig_content'};
delete $para->[1]{'number'};
# Only a PROPER item-number element is allowed
# to have a number attribute.
} else {
die "Unhandled item type $item_type"; # should never happen
}
# =item-text thingies don't need any assimilation, it seems.
} elsif($over_type eq 'number') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
if($item_type eq 'bullet') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
$self->whine(
$para->[1]{'start_line'},
"Expected '=item $expected_value'"
);
push @$para, $para->[1]{'~orig_content'};
# restore the bullet, blocking the assimilation of next para
} elsif($item_type eq 'text') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
$self->whine(
$para->[1]{'start_line'},
"Expected '=item $expected_value'"
);
# Text content will still be there and will block next ~Para
} elsif($item_type ne 'number') {
die "Unknown item type $item_type"; # should never happen
} elsif($expected_value == $para->[1]{'number'}) {
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
} else {
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
" instead of the expected value of $expected_value\n";
$self->whine(
$para->[1]{'start_line'},
"You have '=item " . $para->[1]{'number'} .
"' instead of the expected '=item $expected_value'"
);
$para->[1]{'number'} = $expected_value; # correcting!!
}
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
push @$para, splice @{shift @$paras},2;
} else {
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
push @$para, ''; # Just so it's not contentless
}
}
} elsif($over_type eq 'bullet') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
if($item_type eq 'bullet') {
# as expected!
if( $para->[1]{'~_freaky_para_hack'} ) {
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
push @$para, delete $para->[1]{'~_freaky_para_hack'};
}
} elsif($item_type eq 'number') {
$self->whine(
$para->[1]{'start_line'},
"Expected '=item *'"
);
push @$para, $para->[1]{'~orig_content'};
# and block assimilation of the next paragraph
delete $para->[1]{'number'};
# Only a PROPER item-number element is allowed
# to have a number attribute.
} elsif($item_type eq 'text') {
$self->whine(
$para->[1]{'start_line'},
"Expected '=item *'"
);
# But doesn't need processing. But it'll block assimilation
# of the next para.
} else {
die "Unhandled item type $item_type"; # should never happen
}
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
push @$para, splice @{shift @$paras},2;
} else {
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
push @$para, ''; # Just so it's not contentless
}
}
} else {
die "Unhandled =over type \"$over_type\"?";
# Shouldn't happen!
}
$para_type = 'Plain';
$para->[0] .= '-' . $over_type;
# Whew. Now fall thru and process it.
} elsif($para_type eq '=extend') {
# Well, might as well implement it here.
$self->_ponder_extend($para);
next; # and skip
} elsif($para_type eq '=encoding') {
# Not actually acted on here, but we catch errors here.
$self->_handle_encoding_second_level($para);
next unless $self->keep_encoding_directive;
$para_type = 'Plain';
} elsif($para_type eq '~Verbatim') {
$para->[0] = 'Verbatim';
$para_type = '?Verbatim';
} elsif($para_type eq '~Para') {
$para->[0] = 'Para';
$para_type = '?Plain';
} elsif($para_type eq 'Data') {
$para->[0] = 'Data';
$para_type = '?Data';
} elsif( $para_type =~ s/^=//s
and defined( $para_type = $self->{'accept_directives'}{$para_type} )
) {
DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n";
} else {
# An unknown directive!
DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n",
$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
;
$self->whine(
$para->[1]{'start_line'},
"Unknown directive: $para->[0]"
);
# And maybe treat it as text instead of just letting it go?
next;
}
if($para_type =~ s/^\?//s) {
if(! @$curr_open) { # usual case
DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n";
} else {
my @fors = grep $_->[0] eq '=for', @$curr_open;
DEBUG > 1 and print STDERR "Containing fors: ",
join(',', map $_->[1]{'target'}, @fors), "\n";
if(! @fors) {
DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
#} elsif(grep $_->[1]{'~resolve'}, @fors) {
#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
} elsif( $fors[-1][1]{'~resolve'} ) {
# Look to the immediately containing for
if($para_type eq 'Data') {
DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
$para->[0] = 'Para';
$para_type = 'Plain';
} else {
DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
}
} else {
DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
$para->[0] = $para_type = 'Data';
}
}
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if($para_type eq 'Plain') {
$self->_ponder_Plain($para);
} elsif($para_type eq 'Verbatim') {
$self->_ponder_Verbatim($para);
} elsif($para_type eq 'Data') {
$self->_ponder_Data($para);
} else {
die "\$para type is $para_type -- how did that happen?";
# Shouldn't happen.
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$para->[0] =~ s/^[~=]//s;
DEBUG and print STDERR "\n", pretty($para), "\n";
# traverse the treelet (which might well be just one string scalar)
$self->{'content_seen'} ||= 1;
$self->_traverse_treelet_bit(@$para);
}
}
return;
}
###########################################################################
# The sub-ponderers...
sub _ponder_for {
my ($self,$para,$curr_open,$paras) = @_;
# Fake it out as a begin/end
my $target;
if(grep $_->[1]{'~ignore'}, @$curr_open) {
DEBUG > 1 and print STDERR "Ignoring ignorable =for\n";
return 1;
}
for(my $i = 2; $i < @$para; ++$i) {
if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
$target = $1;
last;
}
}
unless(defined $target) {
$self->whine(
$para->[1]{'start_line'},
"=for without a target?"
);
return 1;
}
DEBUG > 1 and
print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
$para->[0] = 'Data';
unshift @$paras,
['=begin',
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
$target,
],
$para,
['=end',
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
$target,
],
;
return 1;
}
sub _ponder_begin {
my ($self,$para,$curr_open,$paras) = @_;
my $content = join ' ', splice @$para, 2;
$content =~ s/^\s+//s;
$content =~ s/\s+$//s;
unless(length($content)) {
$self->whine(
$para->[1]{'start_line'},
"=begin without a target?"
);
DEBUG and print STDERR "Ignoring targetless =begin\n";
return 1;
}
my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
$para->[1]{'title'} = $title if ($title);
$para->[1]{'target'} = $target; # without any ':'
$content = $target; # strip off the title
$content =~ s/^:!/!:/s;
my $neg; # whether this is a negation-match
$neg = 1 if $content =~ s/^!//s;
my $to_resolve; # whether to process formatting codes
$to_resolve = 1 if $content =~ s/^://s;
my $dont_ignore; # whether this target matches us
foreach my $target_name (
split(',', $content, -1),
$neg ? () : '*'
) {
DEBUG > 2 and
print STDERR " Considering whether =begin $content matches $target_name\n";
next unless $self->{'accept_targets'}{$target_name};
DEBUG > 2 and
print STDERR " It DOES match the acceptable target $target_name!\n";
$to_resolve = 1
if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
$dont_ignore = 1;
$para->[1]{'target_matching'} = $target_name;
last; # stop looking at other target names
}
if($neg) {
if( $dont_ignore ) {
$dont_ignore = '';
delete $para->[1]{'target_matching'};
DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n";
} else {
$dont_ignore = 1;
$para->[1]{'target_matching'} = '!';
DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n";
}
}
$para->[0] = '=for'; # Just what we happen to call these, internally
$para->[1]{'~really'} ||= '=begin';
$para->[1]{'~ignore'} = (! $dont_ignore) || 0;
$para->[1]{'~resolve'} = $to_resolve || 0;
DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '',
"ignore contents of this region\n";
DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ",
($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n";
push @$curr_open, $para;
if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n";
} else {
$self->{'content_seen'} ||= 1;
$self->_handle_element_start((my $scratch='for'), $para->[1]);
}
return 1;
}
sub _ponder_end {
my ($self,$para,$curr_open,$paras) = @_;
my $content = join ' ', splice @$para, 2;
$content =~ s/^\s+//s;
$content =~ s/\s+$//s;
DEBUG and print STDERR "Ogling '=end $content' directive\n";
unless(length($content)) {
$self->whine(
$para->[1]{'start_line'},
"'=end' without a target?" . (
( @$curr_open and $curr_open->[-1][0] eq '=for' )
? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
: ''
)
);
DEBUG and print STDERR "Ignoring targetless =end\n";
return 1;
}
unless($content =~ m/^\S+$/) { # i.e., unless it's one word
$self->whine(
$para->[1]{'start_line'},
"'=end $content' is invalid. (Stack: "
. $self->_dump_curr_open() . ')'
);
DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
return 1;
}
unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
$self->whine(
$para->[1]{'start_line'},
"=end $content without matching =begin. (Stack: "
. $self->_dump_curr_open() . ')'
);
DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
return 1;
}
unless($content eq $curr_open->[-1][1]{'target'}) {
$self->whine(
$para->[1]{'start_line'},
"=end $content doesn't match =begin "
. $curr_open->[-1][1]{'target'}
. ". (Stack: "
. $self->_dump_curr_open() . ')'
);
DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
return 1;
}
# Else it's okay to close...
if(grep $_->[1]{'~ignore'}, @$curr_open) {
DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n";
# And that may be because of this to-be-closed =for region, or some
# other one, but it doesn't matter.
} else {
$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
# what's that for?
$self->{'content_seen'} ||= 1;
$self->_handle_element_end( my $scratch = 'for', $para->[1]);
}
DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
pop @$curr_open;
return 1;
}
sub _ponder_doc_end {
my ($self,$para,$curr_open,$paras) = @_;
if(@$curr_open) { # Deal with things left open
DEBUG and print STDERR "Stack is nonempty at end-document: (",
$self->_dump_curr_open(), ")\n";
DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
unshift @$paras, $self->_closers_for_all_curr_open;
# Make sure there is exactly one ~end in the parastack, at the end:
@$paras = grep $_->[0] ne '~end', @$paras;
push @$paras, $para, $para;
# We need two -- once for the next cycle where we
# generate errata, and then another to be at the end
# when that loop back around to process the errata.
return 1;
} else {
DEBUG and print STDERR "Okay, stack is empty now.\n";
}
# Try generating errata section, if applicable
unless($self->{'~tried_gen_errata'}) {
$self->{'~tried_gen_errata'} = 1;
my @extras = $self->_gen_errata();
if(@extras) {
unshift @$paras, @extras;
DEBUG and print STDERR "Generated errata... relooping...\n";
return 1; # I.e., loop around again to process these fake-o paragraphs
}
}
splice @$paras; # Well, that's that for this paragraph buffer.
DEBUG and print STDERR "Throwing end-document event.\n";
$self->_handle_element_end( my $scratch = 'Document' );
return 1; # Hasta la byebye
}
sub _ponder_pod {
my ($self,$para,$curr_open,$paras) = @_;
$self->whine(
$para->[1]{'start_line'},
"=pod directives shouldn't be over one line long! Ignoring all "
. (@$para - 2) . " lines of content"
) if @$para > 3;
# Content ignored unless 'pod_handler' is set
if (my $pod_handler = $self->{'pod_handler'}) {
my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
$line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
$pod_handler->($line, $line_num, $self);
}
# The surrounding methods set content_seen, so let us remain consistent.
# I do not know why it was not here before -- should it not be here?
# $self->{'content_seen'} ||= 1;
return;
}
sub _ponder_over {
my ($self,$para,$curr_open,$paras) = @_;
return 1 unless @$paras;
my $list_type;
if($paras->[0][0] eq '=item') { # most common case
$list_type = $self->_get_initial_item_type($paras->[0]);
} elsif($paras->[0][0] eq '=back') {
# Ignore empty lists by default
if ($self->{'parse_empty_lists'}) {
$list_type = 'empty';
} else {
shift @$paras;
return 1;
}
} elsif($paras->[0][0] eq '~end') {
$self->whine(
$para->[1]{'start_line'},
"=over is the last thing in the document?!"
);
return 1; # But feh, ignore it.
} else {
$list_type = 'block';
}
$para->[1]{'~type'} = $list_type;
push @$curr_open, $para;
# yes, we reuse the paragraph as a stack item
my $content = join ' ', splice @$para, 2;
my $overness;
if($content =~ m/^\s*$/s) {
$para->[1]{'indent'} = 4;
} elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
no integer;
$para->[1]{'indent'} = $1;
if($1 == 0) {
$self->whine(
$para->[1]{'start_line'},
"Can't have a 0 in =over $content"
);
$para->[1]{'indent'} = 4;
}
} else {
$self->whine(
$para->[1]{'start_line'},
"=over should be: '=over' or '=over positive_number'"
);
$para->[1]{'indent'} = 4;
}
DEBUG > 1 and print STDERR "=over found of type $list_type\n";
$self->{'content_seen'} ||= 1;
$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
return;
}
sub _ponder_back {
my ($self,$para,$curr_open,$paras) = @_;
# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
my $content = join ' ', splice @$para, 2;
if($content =~ m/\S/) {
$self->whine(
$para->[1]{'start_line'},
"=back doesn't take any parameters, but you said =back $content"
);
}
if(@$curr_open and $curr_open->[-1][0] eq '=over') {
DEBUG > 1 and print STDERR "=back happily closes matching =over\n";
# Expected case: we're closing the most recently opened thing
#my $over = pop @$curr_open;
$self->{'content_seen'} ||= 1;
$self->_handle_element_end( my $scratch =
'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
);
} else {
DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (",
join(', ', map $_->[0], @$curr_open), ").\n";
$self->whine(
$para->[1]{'start_line'},
'=back without =over'
);
return 1; # and ignore it
}
}
sub _ponder_item {
my ($self,$para,$curr_open,$paras) = @_;
my $over;
unless(@$curr_open and
$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
$self->whine(
$para->[1]{'start_line'},
"'=item' outside of any '=over'"
);
unshift @$paras,
['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
$para
;
return 1;
}
my $over_type = $over->[1]{'~type'};
if(!$over_type) {
# Shouldn't happen1
die "Typeless over in stack, starting at line "
. $over->[1]{'start_line'};
} elsif($over_type eq 'block') {
unless($curr_open->[-1][1]{'~bitched_about'}) {
$curr_open->[-1][1]{'~bitched_about'} = 1;
$self->whine(
$curr_open->[-1][1]{'start_line'},
"You can't have =items (as at line "
. $para->[1]{'start_line'}
. ") unless the first thing after the =over is an =item"
);
}
# Just turn it into a paragraph and reconsider it
$para->[0] = '~Para';
unshift @$paras, $para;
return 1;
} elsif($over_type eq 'text') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
if($item_type eq 'text') {
# Nothing special needs doing for 'text'
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
$self->whine(
$para->[1]{'start_line'},
"Expected text after =item, not a $item_type"
);
# Undo our clobbering:
push @$para, $para->[1]{'~orig_content'};
delete $para->[1]{'number'};
# Only a PROPER item-number element is allowed
# to have a number attribute.
} else {
die "Unhandled item type $item_type"; # should never happen
}
# =item-text thingies don't need any assimilation, it seems.
} elsif($over_type eq 'number') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
if($item_type eq 'bullet') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
$self->whine(
$para->[1]{'start_line'},
"Expected '=item $expected_value'"
);
push @$para, $para->[1]{'~orig_content'};
# restore the bullet, blocking the assimilation of next para
} elsif($item_type eq 'text') {
# Hm, it's not numeric. Correct for this.
$para->[1]{'number'} = $expected_value;
$self->whine(
$para->[1]{'start_line'},
"Expected '=item $expected_value'"
);
# Text content will still be there and will block next ~Para
} elsif($item_type ne 'number') {
die "Unknown item type $item_type"; # should never happen
} elsif($expected_value == $para->[1]{'number'}) {
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
} else {
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
" instead of the expected value of $expected_value\n";
$self->whine(
$para->[1]{'start_line'},
"You have '=item " . $para->[1]{'number'} .
"' instead of the expected '=item $expected_value'"
);
$para->[1]{'number'} = $expected_value; # correcting!!
}
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
push @$para, splice @{shift @$paras},2;
} else {
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
push @$para, ''; # Just so it's not contentless
}
}
} elsif($over_type eq 'bullet') {
my $item_type = $self->_get_item_type($para);
# That kills the content of the item if it's a number or bullet.
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
if($item_type eq 'bullet') {
# as expected!
if( $para->[1]{'~_freaky_para_hack'} ) {
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
push @$para, delete $para->[1]{'~_freaky_para_hack'};
}
} elsif($item_type eq 'number') {
$self->whine(
$para->[1]{'start_line'},
"Expected '=item *'"
);
push @$para, $para->[1]{'~orig_content'};
# and block assimilation of the next paragraph
delete $para->[1]{'number'};
# Only a PROPER item-number element is allowed
# to have a number attribute.
} elsif($item_type eq 'text') {
$self->whine(
$para->[1]{'start_line'},
"Expected '=item *'"
);
# But doesn't need processing. But it'll block assimilation
# of the next para.
} else {
die "Unhandled item type $item_type"; # should never happen
}
if(@$para == 2) {
# For the cases where we /didn't/ push to @$para
if($paras->[0][0] eq '~Para') {
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
push @$para, splice @{shift @$paras},2;
} else {
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
push @$para, ''; # Just so it's not contentless
}
}
} else {
die "Unhandled =over type \"$over_type\"?";
# Shouldn't happen!
}
$para->[0] .= '-' . $over_type;
return;
}
sub _ponder_Plain {
my ($self,$para) = @_;
DEBUG and print STDERR " giving plain treatment...\n";
unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
or $para->[1]{'~cooked'}
) {
push @$para,
@{$self->_make_treelet(
join("\n", splice(@$para, 2)),
$para->[1]{'start_line'}
)};
}
# Empty paragraphs don't need a treelet for any reason I can see.
# And precooked paragraphs already have a treelet.
return;
}
sub _ponder_Verbatim {
my ($self,$para) = @_;
DEBUG and print STDERR " giving verbatim treatment...\n";
$para->[1]{'xml:space'} = 'preserve';
my $indent = $self->strip_verbatim_indent;
if ($indent && ref $indent eq 'CODE') {
my @shifted = (shift @{$para}, shift @{$para});
$indent = $indent->($para);
unshift @{$para}, @shifted;
}
for(my $i = 2; $i < @$para; $i++) {
foreach my $line ($para->[$i]) { # just for aliasing
# Strip indentation.
$line =~ s/^\Q$indent// if $indent
&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
while( $line =~
# Sort of adapted from Text::Tabs -- yes, it's hardwired in that
# tabs are at every EIGHTH column. For portability, it has to be
# one setting everywhere, and 8th wins.
s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
) {}
# TODO: whinge about (or otherwise treat) unindented or overlong lines
}
}
# Now the VerbatimFormatted hoodoo...
if( $self->{'accept_codes'} and
$self->{'accept_codes'}{'VerbatimFormatted'}
) {
while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
# Kill any number of terminal newlines
$self->_verbatim_format($para);
} elsif ($self->{'codes_in_verbatim'}) {
push @$para,
@{$self->_make_treelet(
join("\n", splice(@$para, 2)),
$para->[1]{'start_line'}, $para->[1]{'xml:space'}
)};
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
} else {
push @$para, join "\n", splice(@$para, 2) if @$para > 3;
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
}
return;
}
sub _ponder_Data {
my ($self,$para) = @_;
DEBUG and print STDERR " giving data treatment...\n";
$para->[1]{'xml:space'} = 'preserve';
push @$para, join "\n", splice(@$para, 2) if @$para > 3;
return;
}
###########################################################################
sub _traverse_treelet_bit { # for use only by the routine above
my($self, $name) = splice @_,0,2;
my $scratch;
$self->_handle_element_start(($scratch=$name), shift @_);
while (@_) {
my $x = shift;
if (ref($x)) {
&_traverse_treelet_bit($self, @$x);
} else {
$x .= shift while @_ && !ref($_[0]);
$self->_handle_text($x);
}
}
$self->_handle_element_end($scratch=$name);
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub _closers_for_all_curr_open {
my $self = $_[0];
my @closers;
foreach my $still_open (@{ $self->{'curr_open'} || return }) {
my @copy = @$still_open;
$copy[1] = {%{ $copy[1] }};
#$copy[1]{'start_line'} = -1;
if($copy[0] eq '=for') {
$copy[0] = '=end';
} elsif($copy[0] eq '=over') {
$self->whine(
$still_open->[1]{start_line} ,
"=over without closing =back"
);
$copy[0] = '=back';
} else {
die "I don't know how to auto-close an open $copy[0] region";
}
unless( @copy > 2 ) {
push @copy, $copy[1]{'target'};
$copy[-1] = '' unless defined $copy[-1];
# since =over's don't have targets
}
$copy[1]{'fake-closer'} = 1;
DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n";
unshift @closers, \@copy;
}
return @closers;
}
#--------------------------------------------------------------------------
sub _verbatim_format {
my($it, $p) = @_;
my $formatting;
for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n";
$p->[$i] .= "\n";
# Unlike with simple Verbatim blocks, we don't end up just doing
# a join("\n", ...) on the contents, so we have to append a
# newline to ever line, and then nix the last one later.
}
if( DEBUG > 4 ) {
print STDERR "<<\n";
for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
print STDERR "_verbatim_format $i: $p->[$i]";
}
print STDERR ">>\n";
}
for(my $i = $#$p; $i > 2; $i--) {
# work backwards over the lines, except the first (#2)
#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
# look at a formatty line preceding a nonformatty one
DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n";
if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
DEBUG > 5 and print STDERR " It's a formatty line. ",
"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n";
next;
} else {
DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n";
}
} else {
DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n";
next;
}
# A formatty line has to have #: in the first two columns, and uses
# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
# Example:
# What do you want? i like pie. [or whatever]
# #:^^^^^^^^^^^^^^^^^ /////////////
DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
$formatting = ' ' . $1;
$formatting =~ s/\s+$//s; # nix trailing whitespace
unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
splice @$p,$i,1; # remove this line
$i--; # don't consider next line
next;
}
if( length($formatting) >= length($p->[$i-1]) ) {
$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
} else {
$formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
}
# Make $formatting and the previous line be exactly the same length,
# with $formatting having a " " as the last character.
DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n";
my @new_line;
while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
#print STDERR "Format matches $1\n";
if($2) {
#print STDERR "SKIPPING <$2>\n";
push @new_line,
substr($p->[$i-1], pos($formatting)-length($1), length($1));
} else {
#print STDERR "SNARING $+\n";
push @new_line, [
(
$3 ? 'VerbatimB' :
$4 ? 'VerbatimI' :
$5 ? 'VerbatimBI' : die("Should never get called")
), {},
substr($p->[$i-1], pos($formatting)-length($1), length($1))
];
#print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
}
}
my @nixed =
splice @$p, $i-1, 2, @new_line; # replace myself and the next line
DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
scalar(@new_line), "):",
map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
$i--; # So the next line we scrutinize is the line before the one
# that we just went and formatted
}
$p->[0] = 'VerbatimFormatted';
# Collapse adjacent text nodes, just for kicks.
for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
$p->[$i] .= splice @$p, $i+1, 1; # merge
--$i; # and back up
}
}
# Now look for the last text token, and remove the terminal newline
for( my $i = $#$p; $i >= 2; $i-- ) {
# work backwards over the tokens, even the first
if( !ref($p->[$i]) ) {
if($p->[$i] =~ s/\n$//s) {
DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
} else {
DEBUG > 5 and print STDERR
"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
}
last; # we only want the next one
}
}
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub _treelet_from_formatting_codes {
# Given a paragraph, returns a treelet. Full of scary tokenizing code.
# Like [ '~Top', {'start_line' => $start_line},
# "I like ",
# [ 'B', {}, "pie" ],
# "!"
# ]
my($self, $para, $start_line, $preserve_space) = @_;
my $treelet = ['~Top', {'start_line' => $start_line},];
unless ($preserve_space || $self->{'preserve_whitespace'}) {
$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
$para =~ s/ $//;
$para =~ s/^ //;
}
# Only apparent problem the above code is that N<< >> turns into
# N<< >>. But then, word wrapping does that too! So don't do that!
my @stack;
my @lineage = ($treelet);
my $raw = ''; # raw content of L<> fcode before splitting/processing
# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
# into just 1 ' '. Is this the regex's doing or 'raw's?
my $inL = 0;
DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
# Here begins our frightening tokenizer RE. The following regex matches
# text in four main parts:
#
# * Start-codes. The first alternative matches C< or C<<, the latter
# followed by some whitespace. $1 will hold the entire start code
# (including any space following a multiple-angle-bracket delimiter),
# and $2 will hold only the additional brackets past the first in a
# multiple-bracket delimiter. length($2) + 1 will be the number of
# closing brackets we have to find.
#
# * Closing brackets. Match some amount of whitespace followed by
# multiple close brackets. The logic to see if this closes anything
# is down below. Note that in order to parse C<< >> correctly, we
# have to use look-behind (?<=\s\s), since the match of the starting
# code will have consumed the whitespace.
#
# * A single closing bracket, to close a simple code like C<>.
#
# * Something that isn't a start or end code. We have to be careful
# about accepting whitespace, since perlpodspec says that any whitespace
# before a multiple-bracket closing delimiter should be ignored.
#
while($para =~
m/\G
(?:
# Match starting codes, including the whitespace following a
# multiple-delimiter start code. $1 gets the whole start code and
# $2 gets all but one of the <s in the multiple-bracket case.
([A-Z]<(?:(<+)\s+)?)
|
# Match multiple-bracket end codes. $3 gets the whitespace that
# should be discarded before an end bracket but kept in other cases
# and $4 gets the end brackets themselves.
(\s+|(?<=\s\s))(>{2,})
|
(\s?>) # $5: simple end-codes
|
( # $6: stuff containing no start-codes or end-codes
(?:
[^A-Z\s>]
|
(?:
[A-Z](?!<)
)
|
# whitespace is ok, but we don't want to eat the whitespace before
# a multiple-bracket end code.
# NOTE: we may still have problems with e.g. S<< >>
(?:
\s(?!\s*>{2,})
)
)+
)
)
/xgo
) {
DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
if(defined $1) {
if(defined $2) {
DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
push @stack, length($2) + 1;
# length of the necessary complex end-code string
} else {
DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
push @stack, 0; # signal that we're looking for simple
}
push @lineage, [ substr($1,0,1), {}, ]; # new node object
push @{ $lineage[-2] }, $lineage[-1];
if ('L' eq substr($1,0,1)) {
$raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
$inL = 1;
} else {
$raw .= $1 if $inL;
}
} elsif(defined $4) {
DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
# This is where it gets messy...
if(! @stack) {
# We saw " >>>>" but needed nothing. This is ALL just stuff then.
DEBUG > 4 and print STDERR " But it's really just stuff.\n";
push @{ $lineage[-1] }, $3, $4;
next;
} elsif(!$stack[-1]) {
# We saw " >>>>" but needed only ">". Back pos up.
DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n";
push @{ $lineage[-1] }, $3; # That was a for-real space, too.
pos($para) = pos($para) - length($4) + 1;
} elsif($stack[-1] == length($4)) {
# We found " >>>>", and it was exactly what we needed. Commonest case.
DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n";
} elsif($stack[-1] < length($4)) {
# We saw " >>>>" but needed only " >>". Back pos up.
DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n";
pos($para) = pos($para) - length($4) + $stack[-1];
} else {
# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n";
push @{ $lineage[-1] }, $3, $4;
next;
}
#print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
# Keep the element from being childless
pop @stack;
pop @lineage;
unless (@stack) { # not in an L if there are no open fcodes
$inL = 0;
if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
$lineage[-1][-1][1]{'raw'} = $raw
}
}
$raw .= $3.$4 if $inL;
} elsif(defined $5) {
DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
if(@stack and ! $stack[-1]) {
# We're indeed expecting a simple end-code
DEBUG > 4 and print STDERR " It's indeed an end-code.\n";
if(length($5) == 2) { # There was a space there: " >"
push @{ $lineage[-1] }, ' ';
} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
push @{ $lineage[-1] }, ''; # keep it from being really childless
}
pop @stack;
pop @lineage;
} else {
DEBUG > 4 and print STDERR " It's just stuff.\n";
push @{ $lineage[-1] }, $5;
}
unless (@stack) { # not in an L if there are no open fcodes
$inL = 0;
if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
$lineage[-1][-1][1]{'raw'} = $raw
}
}
$raw .= $5 if $inL;
} elsif(defined $6) {
DEBUG > 3 and print STDERR "Found stuff \"$6\"\n";
push @{ $lineage[-1] }, $6;
$raw .= $6 if $inL;
# XXX does not capture multiplace whitespaces -- 'raw' ends up with
# at most 1 leading/trailing whitespace, why not all of it?
} else {
# should never ever ever ever happen
DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n";
die "SPORK 512512!";
}
}
if(@stack) { # Uhoh, some sequences weren't closed.
my $x= "...";
while(@stack) {
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
# Hmmmmm!
my $code = (pop @lineage)->[0];
my $ender_length = pop @stack;
if($ender_length) {
--$ender_length;
$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
} else {
$x = $code . "<$x>";
}
}
DEBUG > 1 and print STDERR "Unterminated $x sequence\n";
$self->whine($start_line,
"Unterminated $x sequence",
);
}
return $treelet;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
return stringify_lol($_[1]);
}
sub stringify_lol { # function: stringify_lol($lol)
my $string_form = '';
_stringify_lol( $_[0] => \$string_form );
return $string_form;
}
sub _stringify_lol { # the real recursor
my($lol, $to) = @_;
for(my $i = 2; $i < @$lol; ++$i) {
if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
_stringify_lol( $lol->[$i], $to); # recurse!
} else {
$$to .= $lol->[$i];
}
}
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sub _dump_curr_open { # return a string representation of the stack
my $curr_open = $_[0]{'curr_open'};
return '[empty]' unless @$curr_open;
return join '; ',
map {;
($_->[0] eq '=for')
? ( ($_->[1]{'~really'} || '=over')
. ' ' . $_->[1]{'target'})
: $_->[0]
}
@$curr_open
;
}
###########################################################################
my %pretty_form = (
"\a" => '\a', # ding!
"\b" => '\b', # BS
"\e" => '\e', # ESC
"\f" => '\f', # FF
"\t" => '\t', # tab
"\cm" => '\cm',
"\cj" => '\cj',
"\n" => '\n', # probably overrides one of either \cm or \cj
'"' => '\"',
'\\' => '\\\\',
'$' => '\\$',
'@' => '\\@',
'%' => '\\%',
'#' => '\\#',
);
sub pretty { # adopted from Class::Classless
# Not the most brilliant routine, but passable.
# Don't give it a cyclic data structure!
my @stuff = @_; # copy
my $x;
my $out =
# join ",\n" .
join ", ",
map {;
if(!defined($_)) {
"undef";
} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
$x = "[ " . pretty(@$_) . " ]" ;
$x;
} elsif(ref($_) eq 'SCALAR') {
$x = "\\" . pretty($$_) ;
$x;
} elsif(ref($_) eq 'HASH') {
my $hr = $_;
$x = "{" . join(", ",
map(pretty($_) . '=>' . pretty($hr->{$_}),
sort keys %$hr ) ) . "}" ;
$x;
} elsif(!length($_)) { q{''} # empty string
} elsif(
$_ eq '0' # very common case
or(
m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
and $_ ne '-0' # the strange case that RE lets thru
)
) { $_;
} else {
# Yes, explicitly name every character desired. There are shorcuts one
# could make, but I (Karl Williamson) was afraid that some Perl
# releases would have bugs in some of them. For example [A-Z] works
# even on EBCDIC platforms to match exactly the 26 uppercase English
# letters, but I don't know if it has always worked without bugs. It
# seemed safest just to list the characters.
# s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
qq{"$_"};
}
} @stuff;
# $out =~ s/\n */ /g if length($out) < 75;
return $out;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# A rather unsubtle method of blowing away all the state information
# from a parser object so it can be reused. Provided as a utility for
# backward compatibility in Pod::Man, etc. but not recommended for
# general use.
sub reinit {
my $self = shift;
foreach (qw(source_dead source_filename doc_has_started
start_of_pod_block content_seen last_was_blank paras curr_open
line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen
Title)) {
delete $self->{$_};
}
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
Simple/HTML.pm 0000644 00000103466 15051135563 0007115 0 ustar 00 require 5;
package Pod::Simple::HTML;
use strict;
use Pod::Simple::PullParser ();
use vars qw(
@ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
$Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
$Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
$Doctype_decl $Content_decl
);
@ISA = ('Pod::Simple::PullParser');
$VERSION = '3.35';
BEGIN {
if(defined &DEBUG) { } # no-op
elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
else { *DEBUG = sub () {0}; }
}
$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
# qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
# "http://www.w3.org/TR/html4/loose.dtd">\n};
$Content_decl ||=
q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
$Computerese = "" unless defined $Computerese;
$LamePad = '' unless defined $LamePad;
$Linearization_Limit = 120 unless defined $Linearization_Limit;
# headings/items longer than that won't get an <a name="...">
$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?'
unless defined $Perldoc_URL_Prefix;
$Perldoc_URL_Postfix = ''
unless defined $Perldoc_URL_Postfix;
$Man_URL_Prefix = 'http://man.he.net/man';
$Man_URL_Postfix = '';
$Title_Prefix = '' unless defined $Title_Prefix;
$Title_Postfix = '' unless defined $Title_Postfix;
%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
# 'item-text' stuff in the index doesn't quite work, and may
# not be a good idea anyhow.
__PACKAGE__->_accessorize(
'perldoc_url_prefix',
# In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
# to put before the "Foo%3a%3aBar".
# (for singleton mode only?)
'perldoc_url_postfix',
# what to put after "Foo%3a%3aBar" in the URL. Normally "".
'man_url_prefix',
# In turning L<crontab(5)> into http://whatever/man/1/crontab, what
# to put before the "1/crontab".
'man_url_postfix',
# what to put after the "1/crontab" in the URL. Normally "".
'batch_mode', # whether we're in batch mode
'batch_mode_current_level',
# When in batch mode, how deep the current module is: 1 for "LWP",
# 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
'title_prefix', 'title_postfix',
# What to put before and after the title in the head.
# Should already be &-escaped
'html_h_level',
'html_header_before_title',
'html_header_after_title',
'html_footer',
'top_anchor',
'index', # whether to add an index at the top of each page
# (actually it's a table-of-contents, but we'll call it an index,
# out of apparently longstanding habit)
'html_css', # URL of CSS file to point to
'html_javascript', # URL of Javascript file to point to
'force_title', # should already be &-escaped
'default_title', # should already be &-escaped
);
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
my @_to_accept;
%Tagmap = (
'Verbatim' => "\n<pre$Computerese>",
'/Verbatim' => "</pre>\n",
'VerbatimFormatted' => "\n<pre$Computerese>",
'/VerbatimFormatted' => "</pre>\n",
'VerbatimB' => "<b>",
'/VerbatimB' => "</b>",
'VerbatimI' => "<i>",
'/VerbatimI' => "</i>",
'VerbatimBI' => "<b><i>",
'/VerbatimBI' => "</i></b>",
'Data' => "\n",
'/Data' => "\n",
'head1' => "\n<h1>", # And also stick in an <a name="...">
'head2' => "\n<h2>", # ''
'head3' => "\n<h3>", # ''
'head4' => "\n<h4>", # ''
'/head1' => "</a></h1>\n",
'/head2' => "</a></h2>\n",
'/head3' => "</a></h3>\n",
'/head4' => "</a></h4>\n",
'X' => "<!--\n\tINDEX: ",
'/X' => "\n-->",
changes(qw(
Para=p
B=b I=i
over-bullet=ul
over-number=ol
over-text=dl
over-block=blockquote
item-bullet=li
item-number=li
item-text=dt
)),
changes2(
map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
qw[
sample=samp
definition=dfn
keyboard=kbd
variable=var
citation=cite
abbreviation=abbr
acronym=acronym
subscript=sub
superscript=sup
big=big
small=small
underline=u
strikethrough=s
preformat=pre
teletype=tt
] # no point in providing a way to get <q>...</q>, I think
),
'/item-bullet' => "</li>$LamePad\n",
'/item-number' => "</li>$LamePad\n",
'/item-text' => "</a></dt>$LamePad\n",
'item-body' => "\n<dd>",
'/item-body' => "</dd>\n",
'B' => "<b>", '/B' => "</b>",
'I' => "<i>", '/I' => "</i>",
'F' => "<em$Computerese>", '/F' => "</em>",
'C' => "<code$Computerese>", '/C' => "</code>",
'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
'/L' => "</a>",
);
sub changes {
return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
} @_;
}
sub changes2 {
return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
} @_;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
# Just so we can run from the command line. No options.
# For that, use perldoc!
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub new {
my $new = shift->SUPER::new(@_);
#$new->nix_X_codes(1);
$new->nbsp_for_S(1);
$new->accept_targets( 'html', 'HTML' );
$new->accept_codes('VerbatimFormatted');
$new->accept_codes(@_to_accept);
DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
$new->perldoc_url_prefix( $Perldoc_URL_Prefix );
$new->perldoc_url_postfix( $Perldoc_URL_Postfix );
$new->man_url_prefix( $Man_URL_Prefix );
$new->man_url_postfix( $Man_URL_Postfix );
$new->title_prefix( $Title_Prefix );
$new->title_postfix( $Title_Postfix );
$new->html_header_before_title(
qq[$Doctype_decl<html><head><title>]
);
$new->html_header_after_title( join "\n" =>
"</title>",
$Content_decl,
"</head>\n<body class='pod'>",
$new->version_tag_comment,
"<!-- start doc -->\n",
);
$new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
$new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" );
$new->{'Tagmap'} = {%Tagmap};
return $new;
}
sub __adjust_html_h_levels {
my ($self) = @_;
my $Tagmap = $self->{'Tagmap'};
my $add = $self->html_h_level;
return unless defined $add;
return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
$add -= 1;
for (1 .. 4) {
$Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
$Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
}
}
sub batch_mode_page_object_init {
my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
DEBUG and print STDERR "Initting $self\n for $module\n",
" in $infile\n out $outfile\n depth $depth\n";
$self->batch_mode(1);
$self->batch_mode_current_level($depth);
return $self;
}
sub run {
my $self = $_[0];
return $self->do_middle if $self->bare_output;
return
$self->do_beginning && $self->do_middle && $self->do_end;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub do_beginning {
my $self = $_[0];
my $title;
if(defined $self->force_title) {
$title = $self->force_title;
DEBUG and print STDERR "Forcing title to be $title\n";
} else {
# Actually try looking for the title in the document:
$title = $self->get_short_title();
unless($self->content_seen) {
DEBUG and print STDERR "No content seen in search for title.\n";
return;
}
$self->{'Title'} = $title;
if(defined $title and $title =~ m/\S/) {
$title = $self->title_prefix . esc($title) . $self->title_postfix;
} else {
$title = $self->default_title;
$title = '' unless defined $title;
DEBUG and print STDERR "Title defaults to $title\n";
}
}
my $after = $self->html_header_after_title || '';
if($self->html_css) {
my $link =
$self->html_css =~ m/</
? $self->html_css # It's a big blob of markup, let's drop it in
: sprintf( # It's just a URL, so let's wrap it up
qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
$self->html_css,
);
$after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
}
$self->_add_top_anchor(\$after);
if($self->html_javascript) {
my $link =
$self->html_javascript =~ m/</
? $self->html_javascript # It's a big blob of markup, let's drop it in
: sprintf( # It's just a URL, so let's wrap it up
qq[<script type="text/javascript" src="%s"></script>\n],
$self->html_javascript,
);
$after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind
}
print {$self->{'output_fh'}}
$self->html_header_before_title || '',
$title, # already escaped
$after,
;
DEBUG and print STDERR "Returning from do_beginning...\n";
return 1;
}
sub _add_top_anchor {
my($self, $text_r) = @_;
unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
$$text_r .= $self->top_anchor || '';
}
return;
}
sub version_tag_comment {
my $self = shift;
return sprintf
"<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
esc(
ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
$], scalar(gmtime),
), $self->_modnote(),
;
}
sub _modnote {
my $class = ref($_[0]) || $_[0];
return join "\n " => grep m/\S/, split "\n",
qq{
If you want to change this HTML document, you probably shouldn't do that
by changing it directly. Instead, see about changing the calling options
to $class, and/or subclassing $class,
then reconverting this document from the Pod source.
When in doubt, email the author of $class for advice.
See 'perldoc $class' for more info.
};
}
sub do_end {
my $self = $_[0];
print {$self->{'output_fh'}} $self->html_footer || '';
return 1;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normally this would just be a call to _do_middle_main_loop -- but we
# have to do some elaborate things to emit all the content and then
# summarize it and output it /before/ the content that it's a summary of.
sub do_middle {
my $self = $_[0];
return $self->_do_middle_main_loop unless $self->index;
if( $self->output_string ) {
# An efficiency hack
my $out = $self->output_string; #it's a reference to it
my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
$$out .= $sneakytag;
$self->_do_middle_main_loop;
$sneakytag = quotemeta($sneakytag);
my $index = $self->index_as_html();
if( $$out =~ s/$sneakytag/$index/s ) {
# Expected case
DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
} else {
DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
# I don't think this should ever happen.
}
return 1;
}
unless( $self->output_fh ) {
require Carp;
Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
}
# If we get here, we're outputting to a FH. So we need to do some magic.
# Namely, divert all content to a string, which we output after the index.
my $fh = $self->output_fh;
my $content = '';
{
# Our horrible bait and switch:
$self->output_string( \$content );
$self->_do_middle_main_loop;
$self->abandon_output_string();
$self->output_fh($fh);
}
print $fh $self->index_as_html();
print $fh $content;
return 1;
}
###########################################################################
sub index_as_html {
my $self = $_[0];
# This is meant to be called AFTER the input document has been parsed!
my $points = $self->{'PSHTML_index_points'} || [];
@$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
# There's no point in having a 0-item or 1-item index, I dare say.
my(@out) = qq{\n<div class='indexgroup'>};
my $level = 0;
my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
foreach my $p (@$points, ['head0', '(end)']) {
($tagname, $text) = @$p;
$anchorname = $self->section_escape($text);
if( $tagname =~ m{^head(\d+)$} ) {
$target_level = 0 + $1;
} else { # must be some kinda list item
if($previous_tagname =~ m{^head\d+$} ) {
$target_level = $level + 1;
} else {
$target_level = $level; # no change needed
}
}
# Get to target_level by opening or closing ULs
while($level > $target_level)
{ --$level; push @out, (" " x $level) . "</ul>"; }
while($level < $target_level)
{ ++$level; push @out, (" " x ($level-1))
. "<ul class='indexList indexList$level'>"; }
$previous_tagname = $tagname;
next unless $level;
$indent = ' ' x $level;
push @out, sprintf
"%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
$indent, $level, esc($anchorname), esc($text)
;
}
push @out, "</div>\n";
return join "\n", @out;
}
###########################################################################
sub _do_middle_main_loop {
my $self = $_[0];
my $fh = $self->{'output_fh'};
my $tagmap = $self->{'Tagmap'};
$self->__adjust_html_h_levels;
my($token, $type, $tagname, $linkto, $linktype);
my @stack;
my $dont_wrap = 0;
while($token = $self->get_token) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if( ($type = $token->type) eq 'start' ) {
if(($tagname = $token->tagname) eq 'L') {
$linktype = $token->attr('type') || 'insane';
$linkto = $self->do_link($token);
if(defined $linkto and length $linkto) {
esc($linkto);
# (Yes, SGML-escaping applies on top of %-escaping!
# But it's rarely noticeable in practice.)
print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
} else {
print $fh "<a>"; # Yes, an 'a' element with no attributes!
}
} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
print $fh $tagmap->{$tagname} || next;
my @to_unget;
while(1) {
push @to_unget, $self->get_token;
last if $to_unget[-1]->is_end
and $to_unget[-1]->tagname eq $tagname;
# TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
}
my $name = $self->linearize_tokens(@to_unget);
$name = $self->do_section($name, $token) if defined $name;
print $fh "<a ";
if ($tagname =~ m/^head\d$/s) {
print $fh "class='u'", $self->index
? " href='#___top' title='click to go to top of document'\n"
: "\n";
}
if(defined $name) {
my $esc = esc( $self->section_name_tidy( $name ) );
print $fh qq[name="$esc"];
DEBUG and print STDERR "Linearized ", scalar(@to_unget),
" tokens as \"$name\".\n";
push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
if $ToIndex{ $tagname };
# Obviously, this discards all formatting codes (saving
# just their content), but ahwell.
} else { # ludicrously long, so nevermind
DEBUG and print STDERR "Linearized ", scalar(@to_unget),
" tokens, but it was too long, so nevermind.\n";
}
print $fh "\n>";
$self->unget_token(@to_unget);
} elsif ($tagname eq 'Data') {
my $next = $self->get_token;
next unless defined $next;
unless( $next->type eq 'text' ) {
$self->unget_token($next);
next;
}
DEBUG and print STDERR " raw text ", $next->text, "\n";
# The parser sometimes preserves newlines and sometimes doesn't!
(my $text = $next->text) =~ s/\n\z//;
print $fh $text, "\n";
next;
} else {
if( $tagname =~ m/^over-/s ) {
push @stack, '';
} elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
print $fh $stack[-1];
$stack[-1] = '';
}
print $fh $tagmap->{$tagname} || next;
++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
or $tagname eq 'X';
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
} elsif( $type eq 'end' ) {
if( ($tagname = $token->tagname) =~ m/^over-/s ) {
if( my $end = pop @stack ) {
print $fh $end;
}
} elsif( $tagname =~ m/^item-/s and @stack) {
$stack[-1] = $tagmap->{"/$tagname"};
if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
$self->unget_token($next);
if( $next->type eq 'start' ) {
print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
$stack[-1] = $tagmap->{"/item-body"};
}
}
next;
}
print $fh $tagmap->{"/$tagname"} || next;
--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
} elsif( $type eq 'text' ) {
esc($type = $token->text); # reuse $type, why not
$type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
print $fh $type;
}
}
return 1;
}
###########################################################################
#
sub do_section {
my($self, $name, $token) = @_;
return $name;
}
sub do_link {
my($self, $token) = @_;
my $type = $token->attr('type');
if(!defined $type) {
$self->whine("Typeless L!?", $token->attr('start_line'));
} elsif( $type eq 'pod') { return $self->do_pod_link($token);
} elsif( $type eq 'url') { return $self->do_url_link($token);
} elsif( $type eq 'man') { return $self->do_man_link($token);
} else {
$self->whine("L of unknown type $type!?", $token->attr('start_line'));
}
return 'FNORG'; # should never get called
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub do_url_link { return $_[1]->attr('to') }
sub do_man_link {
my ($self, $link) = @_;
my $to = $link->attr('to');
my $frag = $link->attr('section');
return undef unless defined $to and length $to; # should never happen
$frag = $self->section_escape($frag)
if defined $frag and length($frag .= ''); # (stringify)
DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
return $self->resolve_man_page_link($to, $frag);
}
sub do_pod_link {
# And now things get really messy...
my($self, $link) = @_;
my $to = $link->attr('to');
my $section = $link->attr('section');
return undef unless( # should never happen
(defined $to and length $to) or
(defined $section and length $section)
);
$section = $self->section_escape($section)
if defined $section and length($section .= ''); # (stringify)
DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
$to || "(nil)", $section || "(nil)";
{
# An early hack:
my $complete_url = $self->resolve_pod_link_by_table($to, $section);
if( $complete_url ) {
DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
$complete_url, "\n (Returning that.)\n";
return $complete_url;
} else {
DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
" didn't return anything interesting.\n";
}
}
if(defined $to and length $to) {
# Give this routine first hack again
my $there = $self->resolve_pod_link_by_table($to);
if(defined $there and length $there) {
DEBUG > 1
and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
} else {
$there =
$self->resolve_pod_page_link($to, $section);
# (I pass it the section value, but I don't see a
# particular reason it'd use it.)
DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
unless( defined $there and length $there ) {
DEBUG and print STDERR "Can't resolve $to\n";
return undef;
}
# resolve_pod_page_link returning undef is how it
# can signal that it gives up on making a link
}
$to = $there;
}
#DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
my $out = (defined $to and length $to) ? $to : '';
$out .= "#" . $section if defined $section and length $section;
unless(length $out) { # sanity check
DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
$to || "(nil)", $section || "(nil)";
return undef;
}
DEBUG and print STDERR "Resolved to $out\n";
return $out;
}
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
sub section_escape {
my($self, $section) = @_;
return $self->section_url_escape(
$self->section_name_tidy($section)
);
}
sub section_name_tidy {
my($self, $section) = @_;
$section =~ s/^\s+//;
$section =~ s/\s+$//;
$section =~ tr/ /_/;
if ($] ge 5.006) {
$section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
} elsif ('A' eq chr(65)) { # But not on early EBCDIC
$section =~ tr/\x00-\x1F\x80-\x9F//d;
}
$section = $self->unicode_escape_url($section);
$section = '_' unless length $section;
return $section;
}
sub section_url_escape { shift->general_url_escape(@_) }
sub pagepath_url_escape { shift->general_url_escape(@_) }
sub manpage_url_escape { shift->general_url_escape(@_) }
sub general_url_escape {
my($self, $string) = @_;
$string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
# express Unicode things as urlencode(utf(orig)).
# A pretty conservative escaping, behoovey even for query components
# of a URL (see RFC 2396)
if ($] ge 5.007_003) {
$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
} else { # Is broken for non-ASCII platforms on early perls
$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
}
# Yes, stipulate the list without a range, so that this can work right on
# all charsets that this module happens to run under.
return $string;
}
#--------------------------------------------------------------------------
#
# Oh look, a yawning portal to Hell! Let's play touch football right by it!
#
sub resolve_pod_page_link {
# resolve_pod_page_link must return a properly escaped URL
my $self = shift;
return $self->batch_mode()
? $self->resolve_pod_page_link_batch_mode(@_)
: $self->resolve_pod_page_link_singleton_mode(@_)
;
}
sub resolve_pod_page_link_singleton_mode {
my($self, $it) = @_;
return undef unless defined $it and length $it;
my $url = $self->pagepath_url_escape($it);
$url =~ s{::$}{}s; # probably never comes up anyway
$url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
return undef unless length $url;
return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
}
sub resolve_pod_page_link_batch_mode {
my($self, $to) = @_;
DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
my @path = grep length($_), split m/::/s, $to, -1;
unless( @path ) { # sanity
DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
return undef;
}
$self->batch_mode_rectify_path(\@path);
my $out = join('/', map $self->pagepath_url_escape($_), @path)
. $HTML_EXTENSION;
DEBUG > 1 and print STDERR " => $out\n";
return $out;
}
sub batch_mode_rectify_path {
my($self, $pathbits) = @_;
my $level = $self->batch_mode_current_level;
$level--; # how many levels up to go to get to the root
if($level < 1) {
unshift @$pathbits, '.'; # just to be pretty
} else {
unshift @$pathbits, ('..') x $level;
}
return;
}
sub resolve_man_page_link {
my ($self, $to, $frag) = @_;
my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
return undef unless defined $page and length $page;
$section ||= 1;
return $self->man_url_prefix . "$section/"
. $self->manpage_url_escape($page)
. $self->man_url_postfix;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub resolve_pod_link_by_table {
# A crazy hack to allow specifying custom L<foo> => URL mappings
return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
my($self, $to, $section) = @_;
# TODO: add a method that actually populates podhtml_LOT from a file?
if(defined $section) {
$to = '' unless defined $to and length $to;
return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
} else {
return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
}
return;
}
###########################################################################
sub linearize_tokens { # self, tokens
my $self = shift;
my $out = '';
my $t;
while($t = shift @_) {
if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
$out .= $t; # a string, or some insane thing
} elsif($t->is_text) {
$out .= $t->text;
} elsif($t->is_start and $t->tag eq 'X') {
# Ignore until the end of this X<...> sequence:
my $x_open = 1;
while($x_open) {
next if( ($t = shift @_)->is_text );
if( $t->is_start and $t->tag eq 'X') { ++$x_open }
elsif($t->is_end and $t->tag eq 'X') { --$x_open }
}
}
}
return undef if length $out > $Linearization_Limit;
return $out;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub unicode_escape_url {
my($self, $string) = @_;
$string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
# Turn char 1234 into "(1234)"
return $string;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub esc { # a function.
if(defined wantarray) {
if(wantarray) {
@_ = splice @_; # break aliasing
} else {
my $x = shift;
if ($] ge 5.007_003) {
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
} else { # Is broken for non-ASCII platforms on early perls
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
}
return $x;
}
}
foreach my $x (@_) {
# Escape things very cautiously:
if (defined $x) {
if ($] ge 5.007_003) {
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
} else { # Is broken for non-ASCII platforms on early perls
$x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
}
}
# Leave out "- so that "--" won't make it thru in X-generated comments
# with text in them.
# Yes, stipulate the list without a range, so that this can work right on
# all charsets that this module happens to run under.
}
return @_;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1;
__END__
=head1 NAME
Pod::Simple::HTML - convert Pod to HTML
=head1 SYNOPSIS
perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
=head1 DESCRIPTION
This class is for making an HTML rendering of a Pod document.
This is a subclass of L<Pod::Simple::PullParser> and inherits all its
methods (and options).
Note that if you want to do a batch conversion of a lot of Pod
documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
=head1 CALLING FROM THE COMMAND LINE
TODO
perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
=head1 CALLING FROM PERL
=head2 Minimal code
use Pod::Simple::HTML;
my $p = Pod::Simple::HTML->new;
$p->output_string(\my $html);
$p->parse_file('path/to/Module/Name.pm');
open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
print $out $html;
=head2 More detailed example
use Pod::Simple::HTML;
Set the content type:
$Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
my $p = Pod::Simple::HTML->new;
Include a single javascript source:
$p->html_javascript('http://abc.com/a.js');
Or insert multiple javascript source in the header
(or for that matter include anything, thought this is not recommended)
$p->html_javascript('
<script type="text/javascript" src="http://abc.com/b.js"></script>
<script type="text/javascript" src="http://abc.com/c.js"></script>');
Include a single css source in the header:
$p->html_css('/style.css');
or insert multiple css sources:
$p->html_css('
<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css">
<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">');
Tell the parser where should the output go. In this case it will be placed in the $html variable:
my $html;
$p->output_string(\$html);
Parse and process a file with pod in it:
$p->parse_file('path/to/Module/Name.pm');
=head1 METHODS
TODO
all (most?) accessorized methods
The following variables need to be set B<before> the call to the ->new constructor.
Set the string that is included before the opening <html> tag:
$Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">\n};
Set the content-type in the HTML head: (defaults to ISO-8859-1)
$Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
Set the value that will be embedded in the opening tags of F, C tags and verbatim text.
F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "")
$Pod::Simple::HTML::Computerese = ' class="some_class_name';
=head2 html_css
=head2 html_javascript
=head2 title_prefix
=head2 title_postfix
=head2 html_header_before_title
This includes everything before the <title> opening tag including the Document type
and including the opening <title> tag. The following call will set it to be a simple HTML
file:
$p->html_header_before_title('<html><head><title>');
=head2 top_anchor
By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML.
You can change it by calling
$p->top_anchor('<a name="zz" >');
=head2 html_h_level
Normally =head1 will become <h1>, =head2 will become <h2> etc.
Using the html_h_level method will change these levels setting the h level
of =head1 tags:
$p->html_h_level(3);
Will make sure that =head1 will become <h3> and =head2 will become <h4> etc...
=head2 index
Set it to some true value if you want to have an index (in reality a table of contents)
to be added at the top of the generated HTML.
$p->index(1);
=head2 html_header_after_title
Includes the closing tag of </title> and through the rest of the head
till the opening of the body
$p->html_header_after_title('</title>...</head><body id="my_id">');
=head2 html_footer
The very end of the document:
$p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
=head1 SUBCLASSING
Can use any of the methods described above but for further customization
one needs to override some of the methods:
package My::Pod;
use strict;
use warnings;
use base 'Pod::Simple::HTML';
# needs to return a URL string such
# http://some.other.com/page.html
# #anchor_in_the_same_file
# /internal/ref.html
sub do_pod_link {
# My::Pod object and Pod::Simple::PullParserStartToken object
my ($self, $link) = @_;
say $link->tagname; # will be L for links
say $link->attr('to'); #
say $link->attr('type'); # will be 'pod' always
say $link->attr('section');
# Links local to our web site
if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') {
my $to = $link->attr('to');
if ($to =~ /^Padre::/) {
$to =~ s{::}{/}g;
return "/docs/Padre/$to.html";
}
}
# all other links are generated by the parent class
my $ret = $self->SUPER::do_pod_link($link);
return $ret;
}
1;
Meanwhile in script.pl:
use My::Pod;
my $p = My::Pod->new;
my $html;
$p->output_string(\$html);
$p->parse_file('path/to/Module/Name.pm');
open my $out, '>', 'out.html' or die;
print $out $html;
TODO
maybe override do_beginning do_end
=head1 SEE ALSO
L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
TODO: a corpus of sample Pod input and HTML output? Or common
idioms?
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002-2004 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 ACKNOWLEDGEMENTS
Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
L<Linux man pages online|http://man.he.net/> site for man page links.
Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
site for Perl module links.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/Text.pm 0000644 00000011757 15051135563 0007276 0 ustar 00
require 5;
package Pod::Simple::Text;
use strict;
use Carp ();
use Pod::Simple::Methody ();
use Pod::Simple ();
use vars qw( @ISA $VERSION $FREAKYMODE);
$VERSION = '3.35';
@ISA = ('Pod::Simple::Methody');
BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
? \&Pod::Simple::DEBUG
: sub() {0}
}
use Text::Wrap 98.112902 ();
$Text::Wrap::huge = 'overflow';
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub new {
my $self = shift;
my $new = $self->SUPER::new(@_);
$new->{'output_fh'} ||= *STDOUT{IO};
$new->accept_target_as_text(qw( text plaintext plain ));
$new->nix_X_codes(1);
$new->nbsp_for_S(1);
$new->{'Thispara'} = '';
$new->{'Indent'} = 0;
$new->{'Indentstring'} = ' ';
return $new;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub handle_text { $_[0]{'Thispara'} .= $_[1] }
sub start_Para { $_[0]{'Thispara'} = '' }
sub start_head1 { $_[0]{'Thispara'} = '' }
sub start_head2 { $_[0]{'Thispara'} = '' }
sub start_head3 { $_[0]{'Thispara'} = '' }
sub start_head4 { $_[0]{'Thispara'} = '' }
sub start_Verbatim { $_[0]{'Thispara'} = '' }
sub start_item_bullet { $_[0]{'Thispara'} = $FREAKYMODE ? '' : '* ' }
sub start_item_number { $_[0]{'Thispara'} = $FREAKYMODE ? '' : "$_[1]{'number'}. " }
sub start_item_text { $_[0]{'Thispara'} = '' }
sub start_over_bullet { ++$_[0]{'Indent'} }
sub start_over_number { ++$_[0]{'Indent'} }
sub start_over_text { ++$_[0]{'Indent'} }
sub start_over_block { ++$_[0]{'Indent'} }
sub end_over_bullet { --$_[0]{'Indent'} }
sub end_over_number { --$_[0]{'Indent'} }
sub end_over_text { --$_[0]{'Indent'} }
sub end_over_block { --$_[0]{'Indent'} }
# . . . . . Now the actual formatters:
sub end_head1 { $_[0]->emit_par(-4) }
sub end_head2 { $_[0]->emit_par(-3) }
sub end_head3 { $_[0]->emit_par(-2) }
sub end_head4 { $_[0]->emit_par(-1) }
sub end_Para { $_[0]->emit_par( 0) }
sub end_item_bullet { $_[0]->emit_par( 0) }
sub end_item_number { $_[0]->emit_par( 0) }
sub end_item_text { $_[0]->emit_par(-2) }
sub start_L { $_[0]{'Link'} = $_[1] if $_[1]->{type} eq 'url' }
sub end_L {
if (my $link = delete $_[0]{'Link'}) {
# Append the URL to the output unless it's already present.
$_[0]{'Thispara'} .= " <$link->{to}>"
unless $_[0]{'Thispara'} =~ /\b\Q$link->{to}/;
}
}
sub emit_par {
my($self, $tweak_indent) = splice(@_,0,2);
my $indent = ' ' x ( 2 * $self->{'Indent'} + 4 + ($tweak_indent||0) );
# Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
$self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
$out =~ s/$Pod::Simple::nbsp/ /g;
print {$self->{'output_fh'}} $out, "\n";
$self->{'Thispara'} = '';
return;
}
# . . . . . . . . . . And then off by its lonesome:
sub end_Verbatim {
my $self = shift;
$self->{'Thispara'} =~ s/$Pod::Simple::nbsp/ /g;
$self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
my $i = ' ' x ( 2 * $self->{'Indent'} + 4);
#my $i = ' ' x (4 + $self->{'Indent'});
$self->{'Thispara'} =~ s/^/$i/mg;
print { $self->{'output_fh'} } '',
$self->{'Thispara'},
"\n\n"
;
$self->{'Thispara'} = '';
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
__END__
=head1 NAME
Pod::Simple::Text -- format Pod as plaintext
=head1 SYNOPSIS
perl -MPod::Simple::Text -e \
"exit Pod::Simple::Text->filter(shift)->any_errata_seen" \
thingy.pod
=head1 DESCRIPTION
This class is a formatter that takes Pod and renders it as
wrapped plaintext.
Its wrapping is done by L<Text::Wrap>, so you can change
C<$Text::Wrap::columns> as you like.
This is a subclass of L<Pod::Simple> and inherits all its methods.
=head1 SEE ALSO
L<Pod::Simple>, L<Pod::Simple::TextContent>, L<Pod::Text>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/Checker.pm 0000644 00000012333 15051135563 0007705 0 ustar 00
# A quite dimwitted pod2plaintext that need only know how to format whatever
# text comes out of Pod::BlackBox's _gen_errata
require 5;
package Pod::Simple::Checker;
use strict;
use Carp ();
use Pod::Simple::Methody ();
use Pod::Simple ();
use vars qw( @ISA $VERSION );
$VERSION = '3.35';
@ISA = ('Pod::Simple::Methody');
BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
? \&Pod::Simple::DEBUG
: sub() {0}
}
use Text::Wrap 98.112902 (); # was 2001.0131, but I don't think we need that
$Text::Wrap::wrap = 'overflow';
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub any_errata_seen { # read-only accessor
return $_[1]->{'Errata_seen'};
}
sub new {
my $self = shift;
my $new = $self->SUPER::new(@_);
$new->{'output_fh'} ||= *STDOUT{IO};
$new->nix_X_codes(1);
$new->nbsp_for_S(1);
$new->{'Thispara'} = '';
$new->{'Indent'} = 0;
$new->{'Indentstring'} = ' ';
$new->{'Errata_seen'} = 0;
return $new;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub handle_text { $_[0]{'Errata_seen'} and $_[0]{'Thispara'} .= $_[1] }
sub start_Para { $_[0]{'Thispara'} = '' }
sub start_head1 {
if($_[0]{'Errata_seen'}) {
$_[0]{'Thispara'} = '';
} else {
if($_[1]{'errata'}) { # start of errata!
$_[0]{'Errata_seen'} = 1;
$_[0]{'Thispara'} = $_[0]{'source_filename'} ?
"$_[0]{'source_filename'} -- " : ''
}
}
}
sub start_head2 { $_[0]{'Thispara'} = '' }
sub start_head3 { $_[0]{'Thispara'} = '' }
sub start_head4 { $_[0]{'Thispara'} = '' }
sub start_Verbatim { $_[0]{'Thispara'} = '' }
sub start_item_bullet { $_[0]{'Thispara'} = '* ' }
sub start_item_number { $_[0]{'Thispara'} = "$_[1]{'number'}. " }
sub start_item_text { $_[0]{'Thispara'} = '' }
sub start_over_bullet { ++$_[0]{'Indent'} }
sub start_over_number { ++$_[0]{'Indent'} }
sub start_over_text { ++$_[0]{'Indent'} }
sub start_over_block { ++$_[0]{'Indent'} }
sub end_over_bullet { --$_[0]{'Indent'} }
sub end_over_number { --$_[0]{'Indent'} }
sub end_over_text { --$_[0]{'Indent'} }
sub end_over_block { --$_[0]{'Indent'} }
# . . . . . Now the actual formatters:
sub end_head1 { $_[0]->emit_par(-4) }
sub end_head2 { $_[0]->emit_par(-3) }
sub end_head3 { $_[0]->emit_par(-2) }
sub end_head4 { $_[0]->emit_par(-1) }
sub end_Para { $_[0]->emit_par( 0) }
sub end_item_bullet { $_[0]->emit_par( 0) }
sub end_item_number { $_[0]->emit_par( 0) }
sub end_item_text { $_[0]->emit_par(-2) }
sub emit_par {
return unless $_[0]{'Errata_seen'};
my($self, $tweak_indent) = splice(@_,0,2);
my $indent = ' ' x ( 2 * $self->{'Indent'} + ($tweak_indent||0) );
# Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
$self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
$out =~ s/$Pod::Simple::nbsp/ /g;
print {$self->{'output_fh'}} $out,
#"\n"
;
$self->{'Thispara'} = '';
return;
}
# . . . . . . . . . . And then off by its lonesome:
sub end_Verbatim {
return unless $_[0]{'Errata_seen'};
my $self = shift;
$self->{'Thispara'} =~ s/$Pod::Simple::nbsp/ /g;
$self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
my $i = ' ' x ( 2 * $self->{'Indent'} + 4);
$self->{'Thispara'} =~ s/^/$i/mg;
print { $self->{'output_fh'} } '',
$self->{'Thispara'},
"\n\n"
;
$self->{'Thispara'} = '';
return;
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
__END__
=head1 NAME
Pod::Simple::Checker -- check the Pod syntax of a document
=head1 SYNOPSIS
perl -MPod::Simple::Checker -e \
"exit Pod::Simple::Checker->filter(shift)->any_errata_seen" \
thingy.pod
=head1 DESCRIPTION
This class is for checking the syntactic validity of Pod.
It works by basically acting like a simple-minded version of
L<Pod::Simple::Text> that formats only the "Pod Errors" section
(if Pod::Simple even generates one for the given document).
This is a subclass of L<Pod::Simple> and inherits all its methods.
=head1 SEE ALSO
L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Checker>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/Subclassing.pod 0000644 00000101016 15051135563 0010761 0 ustar 00 =head1 NAME
Pod::Simple::Subclassing -- write a formatter as a Pod::Simple subclass
=head1 SYNOPSIS
package Pod::SomeFormatter;
use Pod::Simple;
@ISA = qw(Pod::Simple);
$VERSION = '1.01';
use strict;
sub _handle_element_start {
my($parser, $element_name, $attr_hash_r) = @_;
...
}
sub _handle_element_end {
my($parser, $element_name, $attr_hash_r) = @_;
# NOTE: $attr_hash_r is only present when $element_name is "over" or "begin"
# The remaining code excerpts will mostly ignore this $attr_hash_r, as it is
# mostly useless. It is documented where "over-*" and "begin" events are
# documented.
...
}
sub _handle_text {
my($parser, $text) = @_;
...
}
1;
=head1 DESCRIPTION
This document is about using Pod::Simple to write a Pod processor,
generally a Pod formatter. If you just want to know about using an
existing Pod formatter, instead see its documentation and see also the
docs in L<Pod::Simple>.
B<The zeroeth step> in writing a Pod formatter is to make sure that there
isn't already a decent one in CPAN. See L<http://search.cpan.org/>, and
run a search on the name of the format you want to render to. Also
consider joining the Pod People list
L<http://lists.perl.org/showlist.cgi?name=pod-people> and asking whether
anyone has a formatter for that format -- maybe someone cobbled one
together but just hasn't released it.
B<The first step> in writing a Pod processor is to read L<perlpodspec>,
which contains information on writing a Pod parser (which has been
largely taken care of by Pod::Simple), but also a lot of requirements
and recommendations for writing a formatter.
B<The second step> is to actually learn the format you're planning to
format to -- or at least as much as you need to know to represent Pod,
which probably isn't much.
B<The third step> is to pick which of Pod::Simple's interfaces you want to
use:
=over
=item Pod::Simple
The basic L<Pod::Simple> interface that uses C<_handle_element_start()>,
C<_handle_element_end()> and C<_handle_text()>.
=item Pod::Simple::Methody
The L<Pod::Simple::Methody> interface is event-based, similar to that of
L<HTML::Parser> or L<XML::Parser>'s "Handlers".
=item Pod::Simple::PullParser
L<Pod::Simple::PullParser> provides a token-stream interface, sort of
like L<HTML::TokeParser>'s interface.
=item Pod::Simple::SimpleTree
L<Pod::Simple::SimpleTree> provides a simple tree interface, rather like
L<XML::Parser>'s "Tree" interface. Users familiar with XML handling will
be comfortable with this interface. Users interested in outputting XML,
should look into the modules that produce an XML representation of the
Pod stream, notably L<Pod::Simple::XMLOutStream>; you can feed the output
of such a class to whatever XML parsing system you are most at home with.
=back
B<The last step> is to write your code based on how the events (or tokens,
or tree-nodes, or the XML, or however you're parsing) will map to
constructs in the output format. Also be sure to consider how to escape
text nodes containing arbitrary text, and what to do with text
nodes that represent preformatted text (from verbatim sections).
=head1 Events
TODO intro... mention that events are supplied for implicits, like for
missing >'s
In the following section, we use XML to represent the event structure
associated with a particular construct. That is, TODO
=over
=item C<< $parser->_handle_element_start( I<element_name>, I<attr_hashref> ) >>
=item C<< $parser->_handle_element_end( I<element_name> ) >>
=item C<< $parser->_handle_text( I<text_string> ) >>
=back
TODO describe
=over
=item events with an element_name of Document
Parsing a document produces this event structure:
<Document start_line="543">
...all events...
</Document>
The value of the I<start_line> attribute will be the line number of the first
Pod directive in the document.
If there is no Pod in the given document, then the
event structure will be this:
<Document contentless="1" start_line="543">
</Document>
In that case, the value of the I<start_line> attribute will not be meaningful;
under current implementations, it will probably be the line number of the
last line in the file.
=item events with an element_name of Para
Parsing a plain (non-verbatim, non-directive, non-data) paragraph in
a Pod document produces this event structure:
<Para start_line="543">
...all events in this paragraph...
</Para>
The value of the I<start_line> attribute will be the line number of the start
of the paragraph.
For example, parsing this paragraph of Pod:
The value of the I<start_line> attribute will be the
line number of the start of the paragraph.
produces this event structure:
<Para start_line="129">
The value of the
<I>
start_line
</I>
attribute will be the line number of the first Pod directive
in the document.
</Para>
=item events with an element_name of B, C, F, or I.
Parsing a BE<lt>...E<gt> formatting code (or of course any of its
semantically identical syntactic variants
S<BE<lt>E<lt> ... E<gt>E<gt>>,
or S<BE<lt>E<lt>E<lt>E<lt> ... E<gt>E<gt>E<gt>E<gt>>, etc.)
produces this event structure:
<B>
...stuff...
</B>
Currently, there are no attributes conveyed.
Parsing C, F, or I codes produce the same structure, with only a
different element name.
If your parser object has been set to accept other formatting codes,
then they will be presented like these B/C/F/I codes -- i.e., without
any attributes.
=item events with an element_name of S
Normally, parsing an SE<lt>...E<gt> sequence produces this event
structure, just as if it were a B/C/F/I code:
<S>
...stuff...
</S>
However, Pod::Simple (and presumably all derived parsers) offers the
C<nbsp_for_S> option which, if enabled, will suppress all S events, and
instead change all spaces in the content to non-breaking spaces. This is
intended for formatters that output to a format that has no code that
means the same as SE<lt>...E<gt>, but which has a code/character that
means non-breaking space.
=item events with an element_name of X
Normally, parsing an XE<lt>...E<gt> sequence produces this event
structure, just as if it were a B/C/F/I code:
<X>
...stuff...
</X>
However, Pod::Simple (and presumably all derived parsers) offers the
C<nix_X_codes> option which, if enabled, will suppress all X events
and ignore their content. For formatters/processors that don't use
X events, this is presumably quite useful.
=item events with an element_name of L
Because the LE<lt>...E<gt> is the most complex construct in the
language, it should not surprise you that the events it generates are
the most complex in the language. Most of complexity is hidden away in
the attribute values, so for those of you writing a Pod formatter that
produces a non-hypertextual format, you can just ignore the attributes
and treat an L event structure like a formatting element that
(presumably) doesn't actually produce a change in formatting. That is,
the content of the L event structure (as opposed to its
attributes) is always what text should be displayed.
There are, at first glance, three kinds of L links: URL, man, and pod.
When a LE<lt>I<some_url>E<gt> code is parsed, it produces this event
structure:
<L content-implicit="yes" raw="that_url" to="that_url" type="url">
that_url
</L>
The C<type="url"> attribute is always specified for this type of
L code.
For example, this Pod source:
L<http://www.perl.com/CPAN/authors/>
produces this event structure:
<L content-implicit="yes" raw="http://www.perl.com/CPAN/authors/" to="http://www.perl.com/CPAN/authors/" type="url">
http://www.perl.com/CPAN/authors/
</L>
When a LE<lt>I<manpage(section)>E<gt> code is parsed (and these are
fairly rare and not terribly useful), it produces this event structure:
<L content-implicit="yes" raw="manpage(section)" to="manpage(section)" type="man">
manpage(section)
</L>
The C<type="man"> attribute is always specified for this type of
L code.
For example, this Pod source:
L<crontab(5)>
produces this event structure:
<L content-implicit="yes" raw="crontab(5)" to="crontab(5)" type="man">
crontab(5)
</L>
In the rare cases where a man page link has a section specified, that text appears
in a I<section> attribute. For example, this Pod source:
L<crontab(5)/"ENVIRONMENT">
will produce this event structure:
<L content-implicit="yes" raw="crontab(5)/"ENVIRONMENT"" section="ENVIRONMENT" to="crontab(5)" type="man">
"ENVIRONMENT" in crontab(5)
</L>
In the rare case where the Pod document has code like
LE<lt>I<sometext>|I<manpage(section)>E<gt>, then the I<sometext> will appear
as the content of the element, the I<manpage(section)> text will appear
only as the value of the I<to> attribute, and there will be no
C<content-implicit="yes"> attribute (whose presence means that the Pod parser
had to infer what text should appear as the link text -- as opposed to
cases where that attribute is absent, which means that the Pod parser did
I<not> have to infer the link text, because that L code explicitly specified
some link text.)
For example, this Pod source:
L<hell itself!|crontab(5)>
will produce this event structure:
<L raw="hell itself!|crontab(5)" to="crontab(5)" type="man">
hell itself!
</L>
The last type of L structure is for links to/within Pod documents. It is
the most complex because it can have a I<to> attribute, I<or> a
I<section> attribute, or both. The C<type="pod"> attribute is always
specified for this type of L code.
In the most common case, the simple case of a LE<lt>podpageE<gt> code
produces this event structure:
<L content-implicit="yes" raw="podpage" to="podpage" type="pod">
podpage
</L>
For example, this Pod source:
L<Net::Ping>
produces this event structure:
<L content-implicit="yes" raw="Net::Ping" to="Net::Ping" type="pod">
Net::Ping
</L>
In cases where there is link-text explicitly specified, it
is to be found in the content of the element (and not the
attributes), just as with the LE<lt>I<sometext>|I<manpage(section)>E<gt>
case discussed above. For example, this Pod source:
L<Perl Error Messages|perldiag>
produces this event structure:
<L raw="Perl Error Messages|perldiag" to="perldiag" type="pod">
Perl Error Messages
</L>
In cases of links to a section in the current Pod document,
there is a I<section> attribute instead of a I<to> attribute.
For example, this Pod source:
L</"Member Data">
produces this event structure:
<L content-implicit="yes" raw="/"Member Data"" section="Member Data" type="pod">
"Member Data"
</L>
As another example, this Pod source:
L<the various attributes|/"Member Data">
produces this event structure:
<L raw="the various attributes|/"Member Data"" section="Member Data" type="pod">
the various attributes
</L>
In cases of links to a section in a different Pod document,
there are both a I<section> attribute and a L<to> attribute.
For example, this Pod source:
L<perlsyn/"Basic BLOCKs and Switch Statements">
produces this event structure:
<L content-implicit="yes" raw="perlsyn/"Basic BLOCKs and Switch Statements"" section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod">
"Basic BLOCKs and Switch Statements" in perlsyn
</L>
As another example, this Pod source:
L<SWITCH statements|perlsyn/"Basic BLOCKs and Switch Statements">
produces this event structure:
<L raw="SWITCH statements|perlsyn/"Basic BLOCKs and Switch Statements"" section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod">
SWITCH statements
</L>
Incidentally, note that we do not distinguish between these syntaxes:
L</"Member Data">
L<"Member Data">
L</Member Data>
L<Member Data> [deprecated syntax]
That is, they all produce the same event structure (for the most part), namely:
<L content-implicit="yes" raw="$depends_on_syntax" section="Member Data" type="pod">
"Member Data"
</L>
The I<raw> attribute depends on what the raw content of the C<LE<lt>E<gt>> is,
so that is why the event structure is the same "for the most part".
If you have not guessed it yet, the I<raw> attribute contains the raw,
original, unescaped content of the C<LE<lt>E<gt>> formatting code. In addition
to the examples above, take notice of the following event structure produced
by the following C<LE<lt>E<gt>> formatting code.
L<click B<here>|page/About the C<-M> switch>
<L raw="click B<here>|page/About the C<-M> switch" section="About the -M switch" to="page" type="pod">
click B<here>
</L>
Specifically, notice that the formatting codes are present and unescaped
in I<raw>.
There is a known bug in the I<raw> attribute where any surrounding whitespace
is condensed into a single ' '. For example, given LE<60> linkE<62>, I<raw>
will be " link".
=item events with an element_name of E or Z
While there are Pod codes EE<lt>...E<gt> and ZE<lt>E<gt>, these
I<do not> produce any E or Z events -- that is, there are no such
events as E or Z.
=item events with an element_name of Verbatim
When a Pod verbatim paragraph (AKA "codeblock") is parsed, it
produces this event structure:
<Verbatim start_line="543" xml:space="preserve">
...text...
</Verbatim>
The value of the I<start_line> attribute will be the line number of the
first line of this verbatim block. The I<xml:space> attribute is always
present, and always has the value "preserve".
The text content will have tabs already expanded.
=item events with an element_name of head1 .. head4
When a "=head1 ..." directive is parsed, it produces this event
structure:
<head1>
...stuff...
</head1>
For example, a directive consisting of this:
=head1 Options to C<new> et al.
will produce this event structure:
<head1 start_line="543">
Options to
<C>
new
</C>
et al.
</head1>
"=head2" through "=head4" directives are the same, except for the element
names in the event structure.
=item events with an element_name of encoding
In the default case, the events corresponding to C<=encoding> directives
are not emitted. They are emitted if C<keep_encoding_directive> is true.
In that case they produce event structures like
L</"events with an element_name of head1 .. head4"> above.
=item events with an element_name of over-bullet
When an "=over ... Z<>=back" block is parsed where the items are
a bulleted list, it will produce this event structure:
<over-bullet indent="4" start_line="543">
<item-bullet start_line="545">
...Stuff...
</item-bullet>
...more item-bullets...
</over-bullet fake-closer="1">
The attribute I<fake-closer> is only present if it is a true value; it is not
present if it is a false value. It is shown in the above example to illustrate
where the attribute is (in the B<closing> tag). It signifies that the C<=over>
did not have a matching C<=back>, and thus Pod::Simple had to create a fake
closer.
For example, this Pod source:
=over
=item *
Something
=back
Would produce an event structure that does B<not> have the I<fake-closer>
attribute, whereas this Pod source:
=over
=item *
Gasp! An unclosed =over block!
would. The rest of the over-* examples will not demonstrate this attribute,
but they all can have it. See L<Pod::Checker>'s source for an example of this
attribute being used.
The value of the I<indent> attribute is whatever value is after the
"=over" directive, as in "=over 8". If no such value is specified
in the directive, then the I<indent> attribute has the value "4".
For example, this Pod source:
=over
=item *
Stuff
=item *
Bar I<baz>!
=back
produces this event structure:
<over-bullet indent="4" start_line="10">
<item-bullet start_line="12">
Stuff
</item-bullet>
<item-bullet start_line="14">
Bar <I>baz</I>!
</item-bullet>
</over-bullet>
=item events with an element_name of over-number
When an "=over ... Z<>=back" block is parsed where the items are
a numbered list, it will produce this event structure:
<over-number indent="4" start_line="543">
<item-number number="1" start_line="545">
...Stuff...
</item-number>
...more item-number...
</over-bullet>
This is like the "over-bullet" event structure; but note that the contents
are "item-number" instead of "item-bullet", and note that they will have
a "number" attribute, which some formatters/processors may ignore
(since, for example, there's no need for it in HTML when producing
an "<UL><LI>...</LI>...</UL>" structure), but which any processor may use.
Note that the values for the I<number> attributes of "item-number"
elements in a given "over-number" area I<will> start at 1 and go up by
one each time. If the Pod source doesn't follow that order (even though
it really should!), whatever numbers it has will be ignored (with
the correct values being put in the I<number> attributes), and an error
message might be issued to the user.
=item events with an element_name of over-text
These events are somewhat unlike the other over-*
structures, as far as what their contents are. When
an "=over ... Z<>=back" block is parsed where the items are
a list of text "subheadings", it will produce this event structure:
<over-text indent="4" start_line="543">
<item-text>
...stuff...
</item-text>
...stuff (generally Para or Verbatim elements)...
<item-text>
...more item-text and/or stuff...
</over-text>
The I<indent> and I<fake-closer> attributes are as with the other over-* events.
For example, this Pod source:
=over
=item Foo
Stuff
=item Bar I<baz>!
Quux
=back
produces this event structure:
<over-text indent="4" start_line="20">
<item-text start_line="22">
Foo
</item-text>
<Para start_line="24">
Stuff
</Para>
<item-text start_line="26">
Bar
<I>
baz
</I>
!
</item-text>
<Para start_line="28">
Quux
</Para>
</over-text>
=item events with an element_name of over-block
These events are somewhat unlike the other over-*
structures, as far as what their contents are. When
an "=over ... Z<>=back" block is parsed where there are no items,
it will produce this event structure:
<over-block indent="4" start_line="543">
...stuff (generally Para or Verbatim elements)...
</over-block>
The I<indent> and I<fake-closer> attributes are as with the other over-* events.
For example, this Pod source:
=over
For cutting off our trade with all parts of the world
For transporting us beyond seas to be tried for pretended offenses
He is at this time transporting large armies of foreign mercenaries to
complete the works of death, desolation and tyranny, already begun with
circumstances of cruelty and perfidy scarcely paralleled in the most
barbarous ages, and totally unworthy the head of a civilized nation.
=back
will produce this event structure:
<over-block indent="4" start_line="2">
<Para start_line="4">
For cutting off our trade with all parts of the world
</Para>
<Para start_line="6">
For transporting us beyond seas to be tried for pretended offenses
</Para>
<Para start_line="8">
He is at this time transporting large armies of [...more text...]
</Para>
</over-block>
=item events with an element_name of over-empty
B<Note: These events are only triggered if C<parse_empty_lists()> is set to a
true value.>
These events are somewhat unlike the other over-* structures, as far as what
their contents are. When an "=over ... Z<>=back" block is parsed where there
is no content, it will produce this event structure:
<over-empty indent="4" start_line="543">
</over-empty>
The I<indent> and I<fake-closer> attributes are as with the other over-* events.
For example, this Pod source:
=over
=over
=back
=back
will produce this event structure:
<over-block indent="4" start_line="1">
<over-empty indent="4" start_line="3">
</over-empty>
</over-block>
Note that the outer C<=over> is a block because it has no C<=item>s but still
has content: the inner C<=over>. The inner C<=over>, in turn, is completely
empty, and is treated as such.
=item events with an element_name of item-bullet
See L</"events with an element_name of over-bullet">, above.
=item events with an element_name of item-number
See L</"events with an element_name of over-number">, above.
=item events with an element_name of item-text
See L</"events with an element_name of over-text">, above.
=item events with an element_name of for
TODO...
=item events with an element_name of Data
TODO...
=back
=head1 More Pod::Simple Methods
Pod::Simple provides a lot of methods that aren't generally interesting
to the end user of an existing Pod formatter, but some of which you
might find useful in writing a Pod formatter. They are listed below. The
first several methods (the accept_* methods) are for declaring the
capabilities of your parser, notably what C<=for I<targetname>> sections
it's interested in, what extra NE<lt>...E<gt> codes it accepts beyond
the ones described in the I<perlpod>.
=over
=item C<< $parser->accept_targets( I<SOMEVALUE> ) >>
As the parser sees sections like:
=for html <img src="fig1.jpg">
or
=begin html
<img src="fig1.jpg">
=end html
...the parser will ignore these sections unless your subclass has
specified that it wants to see sections targeted to "html" (or whatever
the formatter name is).
If you want to process all sections, even if they're not targeted for you,
call this before you start parsing:
$parser->accept_targets('*');
=item C<< $parser->accept_targets_as_text( I<SOMEVALUE> ) >>
This is like accept_targets, except that it specifies also that the
content of sections for this target should be treated as Pod text even
if the target name in "=for I<targetname>" doesn't start with a ":".
At time of writing, I don't think you'll need to use this.
=item C<< $parser->accept_codes( I<Codename>, I<Codename>... ) >>
This tells the parser that you accept additional formatting codes,
beyond just the standard ones (I B C L F S X, plus the two weird ones
you don't actually see in the parse tree, Z and E). For example, to also
accept codes "N", "R", and "W":
$parser->accept_codes( qw( N R W ) );
B<TODO: document how this interacts with =extend, and long element names>
=item C<< $parser->accept_directive_as_data( I<directive_name> ) >>
=item C<< $parser->accept_directive_as_verbatim( I<directive_name> ) >>
=item C<< $parser->accept_directive_as_processed( I<directive_name> ) >>
In the unlikely situation that you need to tell the parser that you will
accept additional directives ("=foo" things), you need to first set the
parser to treat its content as data (i.e., not really processed at
all), or as verbatim (mostly just expanding tabs), or as processed text
(parsing formatting codes like BE<lt>...E<gt>).
For example, to accept a new directive "=method", you'd presumably
use:
$parser->accept_directive_as_processed("method");
so that you could have Pod lines like:
=method I<$whatever> thing B<um>
Making up your own directives breaks compatibility with other Pod
formatters, in a way that using "=for I<target> ..." lines doesn't;
however, you may find this useful if you're making a Pod superset
format where you don't need to worry about compatibility.
=item C<< $parser->nbsp_for_S( I<BOOLEAN> ); >>
Setting this attribute to a true value (and by default it is false) will
turn "SE<lt>...E<gt>" sequences into sequences of words separated by
C<\xA0> (non-breaking space) characters. For example, it will take this:
I like S<Dutch apple pie>, don't you?
and treat it as if it were:
I like DutchE<nbsp>appleE<nbsp>pie, don't you?
This is handy for output formats that don't have anything quite like an
"SE<lt>...E<gt>" code, but which do have a code for non-breaking space.
There is currently no method for going the other way; but I can
probably provide one upon request.
=item C<< $parser->version_report() >>
This returns a string reporting the $VERSION value from your module (and
its classname) as well as the $VERSION value of Pod::Simple. Note that
L<perlpodspec> requires output formats (wherever possible) to note
this detail in a comment in the output format. For example, for
some kind of SGML output format:
print OUT "<!-- \n", $parser->version_report, "\n -->";
=item C<< $parser->pod_para_count() >>
This returns the count of Pod paragraphs seen so far.
=item C<< $parser->line_count() >>
This is the current line number being parsed. But you might find the
"line_number" event attribute more accurate, when it is present.
=item C<< $parser->nix_X_codes( I<SOMEVALUE> ) >>
This attribute, when set to a true value (and it is false by default)
ignores any "XE<lt>...E<gt>" sequences in the document being parsed.
Many formats don't actually use the content of these codes, so have
no reason to process them.
=item C<< $parser->keep_encoding_directive( I<SOMEVALUE> ) >>
This attribute, when set to a true value (it is false by default)
will keep C<=encoding> and its content in the event structure. Most
formats don't actually need to process the content of an C<=encoding>
directive, even when this directive sets the encoding and the
processor makes use of the encoding information. Indeed, it is
possible to know the encoding without processing the directive
content.
=item C<< $parser->merge_text( I<SOMEVALUE> ) >>
This attribute, when set to a true value (and it is false by default)
makes sure that only one event (or token, or node) will be created
for any single contiguous sequence of text. For example, consider
this somewhat contrived example:
I just LOVE Z<>hotE<32>apple pie!
When that is parsed and events are about to be called on it, it may
actually seem to be four different text events, one right after another:
one event for "I just LOVE ", one for "hot", one for " ", and one for
"apple pie!". But if you have merge_text on, then you're guaranteed
that it will be fired as one text event: "I just LOVE hot apple pie!".
=item C<< $parser->code_handler( I<CODE_REF> ) >>
This specifies code that should be called when a code line is seen
(i.e., a line outside of the Pod). Normally this is undef, meaning
that no code should be called. If you provide a routine, it should
start out like this:
sub get_code_line { # or whatever you'll call it
my($line, $line_number, $parser) = @_;
...
}
Note, however, that sometimes the Pod events aren't processed in exactly
the same order as the code lines are -- i.e., if you have a file with
Pod, then code, then more Pod, sometimes the code will be processed (via
whatever you have code_handler call) before the all of the preceding Pod
has been processed.
=item C<< $parser->cut_handler( I<CODE_REF> ) >>
This is just like the code_handler attribute, except that it's for
"=cut" lines, not code lines. The same caveats apply. "=cut" lines are
unlikely to be interesting, but this is included for completeness.
=item C<< $parser->pod_handler( I<CODE_REF> ) >>
This is just like the code_handler attribute, except that it's for
"=pod" lines, not code lines. The same caveats apply. "=pod" lines are
unlikely to be interesting, but this is included for completeness.
=item C<< $parser->whiteline_handler( I<CODE_REF> ) >>
This is just like the code_handler attribute, except that it's for
lines that are seemingly blank but have whitespace (" " and/or "\t") on them,
not code lines. The same caveats apply. These lines are unlikely to be
interesting, but this is included for completeness.
=item C<< $parser->whine( I<linenumber>, I<complaint string> ) >>
This notes a problem in the Pod, which will be reported in the "Pod
Errors" section of the document and/or sent to STDERR, depending on the
values of the attributes C<no_whining>, C<no_errata_section>, and
C<complain_stderr>.
=item C<< $parser->scream( I<linenumber>, I<complaint string> ) >>
This notes an error like C<whine> does, except that it is not
suppressible with C<no_whining>. This should be used only for very
serious errors.
=item C<< $parser->source_dead(1) >>
This aborts parsing of the current document, by switching on the flag
that indicates that EOF has been seen. In particularly drastic cases,
you might want to do this. It's rather nicer than just calling
C<die>!
=item C<< $parser->hide_line_numbers( I<SOMEVALUE> ) >>
Some subclasses that indiscriminately dump event attributes (well,
except for ones beginning with "~") can use this object attribute for
refraining to dump the "start_line" attribute.
=item C<< $parser->no_whining( I<SOMEVALUE> ) >>
This attribute, if set to true, will suppress reports of non-fatal
error messages. The default value is false, meaning that complaints
I<are> reported. How they get reported depends on the values of
the attributes C<no_errata_section> and C<complain_stderr>.
=item C<< $parser->no_errata_section( I<SOMEVALUE> ) >>
This attribute, if set to true, will suppress generation of an errata
section. The default value is false -- i.e., an errata section will be
generated.
=item C<< $parser->complain_stderr( I<SOMEVALUE> ) >>
This attribute, if set to true will send complaints to STDERR. The
default value is false -- i.e., complaints do not go to STDERR.
=item C<< $parser->bare_output( I<SOMEVALUE> ) >>
Some formatter subclasses use this as a flag for whether output should
have prologue and epilogue code omitted. For example, setting this to
true for an HTML formatter class should omit the
"<html><head><title>...</title><body>..." prologue and the
"</body></html>" epilogue.
If you want to set this to true, you should probably also set
C<no_whining> or at least C<no_errata_section> to true.
=item C<< $parser->preserve_whitespace( I<SOMEVALUE> ) >>
If you set this attribute to a true value, the parser will try to
preserve whitespace in the output. This means that such formatting
conventions as two spaces after periods will be preserved by the parser.
This is primarily useful for output formats that treat whitespace as
significant (such as text or *roff, but not HTML).
=item C<< $parser->parse_empty_lists( I<SOMEVALUE> ) >>
If this attribute is set to true, the parser will not ignore empty
C<=over>/C<=back> blocks. The type of C<=over> will be I<empty>, documented
above, L<events with an element_name of over-empty>.
=back
=head1 SEE ALSO
L<Pod::Simple> -- event-based Pod-parsing framework
L<Pod::Simple::Methody> -- like Pod::Simple, but each sort of event
calls its own method (like C<start_head3>)
L<Pod::Simple::PullParser> -- a Pod-parsing framework like Pod::Simple,
but with a token-stream interface
L<Pod::Simple::SimpleTree> -- a Pod-parsing framework like Pod::Simple,
but with a tree interface
L<Pod::Simple::Checker> -- a simple Pod::Simple subclass that reads
documents, and then makes a plaintext report of any errors found in the
document
L<Pod::Simple::DumpAsXML> -- for dumping Pod documents as tidily
indented XML, showing each event on its own line
L<Pod::Simple::XMLOutStream> -- dumps a Pod document as XML (without
introducing extra whitespace as Pod::Simple::DumpAsXML does).
L<Pod::Simple::DumpAsText> -- for dumping Pod documents as tidily
indented text, showing each event on its own line
L<Pod::Simple::LinkSection> -- class for objects representing the values
of the TODO and TODO attributes of LE<lt>...E<gt> elements
L<Pod::Escapes> -- the module that Pod::Simple uses for evaluating
EE<lt>...E<gt> content
L<Pod::Simple::Text> -- a simple plaintext formatter for Pod
L<Pod::Simple::TextContent> -- like Pod::Simple::Text, but
makes no effort for indent or wrap the text being formatted
L<Pod::Simple::HTML> -- a simple HTML formatter for Pod
L<perlpod|perlpod>
L<perlpodspec|perlpodspec>
L<perldoc>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=for notes
Hm, my old podchecker version (1.2) says:
*** WARNING: node 'http://search.cpan.org/' contains non-escaped | or / at line 38 in file Subclassing.pod
*** WARNING: node 'http://lists.perl.org/showlist.cgi?name=pod-people' contains non-escaped | or / at line 41 in file Subclassing.pod
Yes, L<...> is hard.
=cut
Simple/PullParserEndToken.pm 0000644 00000005504 15051135563 0012064 0 ustar 00
require 5;
package Pod::Simple::PullParserEndToken;
use Pod::Simple::PullParserToken ();
use strict;
use vars qw(@ISA $VERSION);
@ISA = ('Pod::Simple::PullParserToken');
$VERSION = '3.35';
sub new { # Class->new(tagname);
my $class = shift;
return bless ['end', @_], ref($class) || $class;
}
# Purely accessors:
sub tagname { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] }
sub tag { shift->tagname(@_) }
# shortcut:
sub is_tagname { $_[0][1] eq $_[1] }
sub is_tag { shift->is_tagname(@_) }
1;
__END__
=head1 NAME
Pod::Simple::PullParserEndToken -- end-tokens from Pod::Simple::PullParser
=head1 SYNOPSIS
(See L<Pod::Simple::PullParser>)
=head1 DESCRIPTION
When you do $parser->get_token on a L<Pod::Simple::PullParser>, you might
get an object of this class.
This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods,
and adds these methods:
=over
=item $token->tagname
This returns the tagname for this end-token object.
For example, parsing a "=head1 ..." line will give you
a start-token with the tagname of "head1", token(s) for its
content, and then an end-token with the tagname of "head1".
=item $token->tagname(I<somestring>)
This changes the tagname for this end-token object.
You probably won't need to do this.
=item $token->tag(...)
A shortcut for $token->tagname(...)
=item $token->is_tag(I<somestring>) or $token->is_tagname(I<somestring>)
These are shortcuts for C<< $token->tag() eq I<somestring> >>
=back
You're unlikely to ever need to construct an object of this class for
yourself, but if you want to, call
C<<
Pod::Simple::PullParserEndToken->new( I<tagname> )
>>
=head1 SEE ALSO
L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing>
=head1 SUPPORT
Questions or discussion about POD and Pod::Simple should be sent to the
pod-people@perl.org mail list. Send an empty email to
pod-people-subscribe@perl.org to subscribe.
This module is managed in an open GitHub repository,
L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
Patches against Pod::Simple are welcome. Please send bug reports to
<bug-pod-simple@rt.cpan.org>.
=head1 COPYRIGHT AND DISCLAIMERS
Copyright (c) 2002 Sean M. Burke.
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=head1 AUTHOR
Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
But don't bother him, he's retired.
Pod::Simple is maintained by:
=over
=item * Allison Randal C<allison@perl.org>
=item * Hans Dieter Pearcey C<hdp@cpan.org>
=item * David E. Wheeler C<dwheeler@cpan.org>
=back
=cut
Simple/TiedOutFH.pm 0000644 00000005277 15051135563 0010145 0 ustar 00
use strict;
package Pod::Simple::TiedOutFH;
use Symbol ('gensym');
use Carp ();
use vars qw($VERSION );
$VERSION = '3.35';
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub handle_on { # some horrible frightening things are encapsulated in here
my $class = shift;
$class = ref($class) || $class;
Carp::croak "Usage: ${class}->handle_on(\$somescalar)" unless @_;
my $x = (defined($_[0]) and ref($_[0]))
? $_[0]
: ( \( $_[0] ) )[0]
;
$$x = '' unless defined $$x;
#Pod::Simple::DEBUG and print STDERR "New $class handle on $x = \"$$x\"\n";
my $new = gensym();
tie *$new, $class, $x;
return $new;
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sub TIEHANDLE { # Ties to just a scalar ref
my($class, $scalar_ref) = @_;
$$scalar_ref = '' unless defined $$scalar_ref;
return bless \$scalar_ref, ref($class) || $class;
}
sub PRINT {
my $it = shift;
foreach my $x (@_) { $$$it .= $x }
#Pod::Simple::DEBUG > 10 and print STDERR " appended to $$it = \"$$$it\"\n";
return 1;
}
sub FETCH {
return ${$_[0]};
}
sub PRINTF {
my $it = shift;
my $format = shift;
$$$it .= sprintf $format, @_;
return 1;
}
sub FILENO { ${ $_[0] } + 100 } # just to produce SOME number
sub CLOSE { 1 }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1;
__END__
Chole
* 1 large red onion
* 2 tomatillos
* 4 or 5 roma tomatoes (optionally with the pulp discarded)
* 1 tablespoons chopped ginger root (or more, to taste)
* 2 tablespoons canola oil (or vegetable oil)
* 1 tablespoon garam masala
* 1/2 teaspoon red chili powder, or to taste
* Salt, to taste (probably quite a bit)
* 2 (15-ounce) cans chick peas or garbanzo beans, drained and rinsed
* juice of one smallish lime
* a dash of balsamic vinegar (to taste)
* cooked rice, preferably long-grain white rice (whether plain,
basmati rice, jasmine rice, or even a mild pilaf)
In a blender or food processor, puree the onions, tomatoes, tomatillos,
and ginger root. You can even do it with a Braun hand "mixer", if you
chop things finer to start with, and work at it.
In a saucepan set over moderate heat, warm the oil until hot.
Add the puree and the balsamic vinegar, and cook, stirring occasionally,
for 20 to 40 minutes. (Cooking it longer will make it sweeter.)
Add the Garam Masala, chili powder, and cook, stirring occasionally, for
5 minutes.
Add the salt and chick peas and cook, stirring, until heated through.
Stir in the lime juice, and optionally one or two teaspoons of tahini.
You can let it simmer longer, depending on how much softer you want the
garbanzos to get.
Serve over rice, like a curry.
Yields 5 to 7 servings.