Sisyphus repositório
Última atualização: 1 outubro 2023 | SRPMs: 18631 | Visitas: 37865635
en ru br
ALT Linux repositórios
S:0.59-alt2
5.0: 0.55-alt2
4.1: 0.55-alt1

Group :: Desenvolvimento/Perl
RPM: perl-HTML-WikiConverter-MediaWiki

 Main   Changelog   Spec   Patches   Sources   Download   Gear   Bugs e FR  Repocop 

HTML-WikiConverter-MediaWiki-0.59/000075500000000000000000000000001147254762400166575ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/Changes000064400000000000000000000030361147254762400201540ustar00rootroot00000000000000# Change log for HTML::WikiConverter::MediaWiki

version: 0.59
date: 2009-05-29
changes:
- (bug #46453) fix bug in which <nowiki> was triggered too often
- update readme documentation

version: 0.58
date: 2009-03-06
changes:
- fix manifest

version: 0.57
date: 2009-03-06
changes:
- no longer pad section headings by default
- add new span/font tests, marked as TODO
- add perl license to Makefile.PL

version: 0.56
date: 2008-11-11
changes:
- (bug #28402) add tbody, thead, font to passthrough_naked_tags
- preserve image width in [[Image:]] markup
- update 'table w/ blocks' test now that H::WC 0.63 properly supports nested blocks
- now requires H::WC 0.63 (for the above test)
- add author/license to META.yml

version: 0.55
date: Sun Sep 17 11:00:00 EST 2006
changes:
- (bug #21531) Added 'preserve_nowiki' attribute
- Added 'preserve_templates' attribute (for the TinyMCE folks; see http://meta.wikimedia.org/wiki/TinyMCE)

version: 0.54
date: Thu Sep 1 00:00:00 EST 2006
changes:
- Added alternate text for [[Image:]] markup

version: 0.53
date: Thu Jul 20 19:00:00 EST 2006
changes:
- <hr> within <td> is now on own line (reported on [[wp:User_talk:Diberri]])
- require H::WC 0.60

version: 0.52
date: Wed June 07 16:00:00 EST 2006
changes:
- (bug #19046) allow lone '0' in text
- add pad_headings attribute
- update to require H::WC 0.54

version: 0.51
date: Fri Mar 03 2006
changes:
- update to require H::WC 0.52

version: 0.50
date: Tue Jan 10 2006
changes:
- branched from main HTML::WikiConverter codebase.
HTML-WikiConverter-MediaWiki-0.59/MANIFEST000064400000000000000000000003351147254762400200110ustar00rootroot00000000000000Changes
MANIFEST
META.yml # Will be created by "make dist"
Makefile.PL
README
lib/HTML/WikiConverter/MediaWiki.pm
t/00-load.t
t/01-mediawiki.t
t/boilerplate.t
t/mediawiki.preserve.t
t/pod-coverage.t
t/pod.t
t/runtests.pl
HTML-WikiConverter-MediaWiki-0.59/META.yml000064400000000000000000000011211147254762400201230ustar00rootroot00000000000000--- #YAML:1.0
name: HTML-WikiConverter-MediaWiki
version: 0.59
abstract: Convert HTML to MediaWiki markup
author:
- David J. Iberri <diberri@cpan.org>
license: perl
distribution_type: module
configure_requires:
ExtUtils::MakeMaker: 0
requires:
HTML::WikiConverter: 0.63
Test::More: 0
URI: 1.35
no_index:
directory:
- t
- inc
generated_by: ExtUtils::MakeMaker version 6.48
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
HTML-WikiConverter-MediaWiki-0.59/Makefile.PL000064400000000000000000000012601147254762400206300ustar00rootroot00000000000000use strict;
use warnings;
use ExtUtils::MakeMaker;

WriteMakefile(
NAME => 'HTML::WikiConverter::MediaWiki',
AUTHOR => 'David J. Iberri <diberri@cpan.org>',
VERSION_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm',
ABSTRACT_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm',
LICENSE => 'perl',
PL_FILES => {},
PREREQ_PM => {
'Test::More' => 0,
'URI' => 1.35,
'HTML::WikiConverter' => 0.63, # for the 'table w/ blocks' test
},
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'HTML-WikiConverter-MediaWiki-*' },
);
HTML-WikiConverter-MediaWiki-0.59/README000064400000000000000000000026311147254762400175410ustar00rootroot00000000000000HTML::WikiConverter::MediaWiki
==============================

HTML::WikiConverter::MediaWiki adds the MediaWiki dialect to
HTML::WikiConverter allowing conversion from HTML to MediaWiki markup.

SYNOPSIS

Converting HTML to wiki markup is easy:

use HTML::WikiConverter;
my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
print $wc->html2wiki( $html );

Or from the command line:

% html2wiki --dialect MediaWiki input.html > output.wiki

There's also a web interface if you're so inclined:

http://toolserver.org/~diberri/cgi-bin/html2wiki/

INSTALLATION

To install this module, run the following commands:

perl Makefile.PL
make
make test
make install

SUPPORT AND DOCUMENTATION

After installing, you can find documentation for this module with the
perldoc command.

perldoc HTML::WikiConverter::MediaWiki

You can also look for information at:

Search CPAN
http://search.cpan.org/dist/HTML-WikiConverter-MediaWiki

CPAN Request Tracker:
http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-WikiConverter-MediaWiki

AnnoCPAN, annotated CPAN documentation:
http://annocpan.org/dist/HTML-WikiConverter-MediaWiki

CPAN Ratings:
http://cpanratings.perl.org/d/HTML-WikiConverter-MediaWiki

COPYRIGHT AND LICENCE

Copyright (c) David J. Iberri

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
HTML-WikiConverter-MediaWiki-0.59/lib/000075500000000000000000000000001147254762400174255ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/000075500000000000000000000000001147254762400201715ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/000075500000000000000000000000001147254762400227645ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/MediaWiki.pm000064400000000000000000000342321147254762400251710ustar00rootroot00000000000000package HTML::WikiConverter::MediaWiki;
use base 'HTML::WikiConverter';

use warnings;
use strict;

use URI;
use File::Basename;
use HTML::Tagset;
our $VERSION = '0.59';

=head1 NAME

HTML::WikiConverter::MediaWiki - Convert HTML to MediaWiki markup

=head1 SYNOPSIS

use HTML::WikiConverter;
my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
print $wc->html2wiki( $html );

=head1 DESCRIPTION

This module contains rules for converting HTML into MediaWiki
markup. See L<HTML::WikiConverter> for additional usage details.

=head1 ATTRIBUTES

In addition to the regular set of attributes recognized by the
L<HTML::WikiConverter> constructor, this dialect also accepts the
following attributes:

=head2 preserve_bold

Boolean indicating whether bold HTML elements should be preserved as
HTML in the wiki output rather than being converted into MediaWiki
markup.

By default, E<lt>bE<gt> and E<lt>strongE<gt> elements are converted to
wiki markup identically. But sometimes you may wish E<lt>bE<gt> tags
in the HTML to be preserved in the resulting MediaWiki markup. This
attribute allows this.

For example, if C<preserve_bold> is enabled, HTML like

<ul>
<li> <b>Bold</b>
<li> <strong>Strong</strong>
</ul>

will be converted to

* <b>Bold</b>
* '''Strong'''

When disabled (the default), the preceding HTML markup would be
converted into

* '''Bold'''
* '''Strong'''

=head2 preserve_italic

Boolean indicating whether italic HTML elements should be preserved as
HTML in the wiki output rather than being converted into MediaWiki
markup.

For example, if C<preserve_italic> is enabled, HTML like

<ul>
<li> <i>Italic</i>
<li> <em>Emphasized</em>
</ul>

will be converted to

* <i>Italic</i>
* ''Emphasized''

When disabled (the default), the preceding HTML markup would be
converted into

* ''Italic''
* ''Emphasized''

=head2 preserve_templates

Boolean indicating whether C<{{template}}> calls found in HTML should
be preserved in the wiki markup. If disabled (the default), templates
calls will be wrapped in C<E<lt>nowikiE<gt>> tags.

=head2 preserve_nowiki

Boolean indicating whether C<E<lt>nowikiE<gt>> tags found in HTML
should be preserved in the wiki markup. If disabled (the default),
nowiki tags will be replaced with their content.

=head2 pad_headings

Boolean indicating whether section headings should be padded with
spaces (eg, "== Section ==" instead of "==Section=="). Default is
false (ie, not to pad).

=cut

my @common_attrs = qw/ id class lang dir title style /;
my @block_attrs = ( @common_attrs, 'align' );
my @tablealign_attrs = qw/ align char charoff valign /;
my @tablecell_attrs = qw(
abbr axis headers scope rowspan
colspan nowrap width height bgcolor
);

# Fix for bug 14527
my $pre_prefix = '[jsmckaoqkjgbhazkfpwijhkixh]';

sub rules {
my $self = shift;

my %rules = (
hr => { replace => "\n----\n" },
br => { preserve => 1, empty => 1, attributes => [ qw/id class title style clear/ ] },
p => { block => 1, trim => 'both', line_format => 'single' },
em => { start => "''", end => "''", line_format => 'single' },
strong => { start => "'''", end => "'''", line_format => 'single' },

i => { alias => 'em' },
b => { alias => 'strong' },

pre => { line_prefix => $pre_prefix, block => 1 },

table => { start => \&_table_start, end => "|}", block => 1, line_format => 'blocks' },
tr => { start => \&_tr_start },
td => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'blocks' },
th => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'single' },
caption => { start => \&_caption_start, end => "\n", line_format => 'single' },

img => { replace => \&_image },
a => { replace => \&_link },

ul => { line_format => 'multi', block => 1 },
ol => { alias => 'ul' },
dl => { alias => 'ul' },

li => { start => \&_li_start, trim => 'leading' },
dt => { alias => 'li' },
dd => { alias => 'li' },

# Preserved elements, from MediaWiki's Sanitizer.php (http://tinyurl.com/dzj6o)
div => { preserve => 1, attributes => \@block_attrs },
span => { preserve => 1, attributes => \@block_attrs },
blockquote => { preserve => 1, attributes => [ @common_attrs, qw/ cite / ] },
del => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] },
ins => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] },
font => { preserve => 1, attributes => [ @common_attrs, qw/ size color face / ] },

# Headings (h1-h6)
h1 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h2 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h3 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h4 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h5 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h6 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
);

my @preserved = qw/ center cite code var sup sub tt big small strike s u ruby rb rt rp /;
push @preserved, 'i' if $self->preserve_italic;
push @preserved, 'b' if $self->preserve_bold;
push @preserved, 'nowiki' if $self->preserve_nowiki;
$rules{$_} = { preserve => 1, attributes => \@common_attrs } foreach @preserved;

return \%rules;
}

sub attributes { {
preserve_italic => { default => 0 },
preserve_bold => { default => 0 },
strip_tags => { default => [ qw/ head style script ~comment title meta link object / ] },
pad_headings => { default => 0 },
preserve_templates => { default => 0 },
preserve_nowiki => { default => 0 },

# see bug #28402
# xxx passthrough_naked_tags => { default => [ qw/ tbody thead font / ] },
passthrough_naked_tags => { default => [ qw/ tbody thead font span / ] },
} }

sub _hr_start {
my( $wc, $node, $subrules ) = @_;
( my $level = $node->tag ) =~ s/\D//g;
my $affix = ('=') x $level;
return $wc->pad_headings ? "$affix " : $affix;
}

sub _hr_end {
my( $wc, $node, $subrules ) = @_;
( my $level = $node->tag ) =~ s/\D//g;
my $affix = ('=') x $level;
return $wc->pad_headings ? " $affix" : $affix;
}

sub postprocess_output {
my( $self, $outref ) = @_;
$$outref =~ s/\Q$pre_prefix\E/ /g;
}

# Calculates the prefix that will be placed before each list item.
# Handles ordered, unordered, and definition list items.
sub _li_start {
my( $self, $node, $rules ) = @_;
my @parent_lists = $node->look_up( _tag => qr/ul|ol|dl/ );

my $prefix = '';
foreach my $parent ( @parent_lists ) {
my $bullet = '';
$bullet = '*' if $parent->tag eq 'ul';
$bullet = '#' if $parent->tag eq 'ol';
$bullet = ':' if $parent->tag eq 'dl';
$bullet = ';' if $parent->tag eq 'dl' and $node->tag eq 'dt';
$prefix = $bullet.$prefix;
}

return "\n$prefix ";
}

sub _link {
my( $self, $node, $rules ) = @_;
my $url = defined $node->attr('href') ? $node->attr('href') : '';
my $text = $self->get_elem_contents($node);

# Handle internal links
if( my $title = $self->get_wiki_page( $url ) ) {
$title =~ s/_/ /g;
return "[[$title]]" if $text eq $title; # no difference between link text and page title
return "[[$text]]" if $text eq lcfirst $title; # differ by 1st char. capitalization
return "[[$title|$text]]"; # completely different
}

# Treat them as external links
return $url if $url eq $text;
return "[$url $text]";
}

sub _image {
my( $self, $node, $rules ) = @_;
return '' unless $node->attr('src');

my $alt = $node->attr('alt') || '';
my $img = basename( URI->new($node->attr('src'))->path );
my $width = $node->attr('width') || '';

return sprintf '[[Image:%s|%spx|%s]]', $img, $width, $alt if $alt and $width;
return sprintf '[[Image:%s|%s]]', $img, $alt if $alt;
return sprintf '[[Image:%s]]', $img;
}

sub _table_start {
my( $self, $node, $rules ) = @_;
my $prefix = '{|';

my @table_attrs = (
@common_attrs,
qw/ summary width border frame rules cellspacing
cellpadding align bgcolor frame rules /
);

my $attrs = $self->get_attr_str( $node, @table_attrs );
$prefix .= ' '.$attrs if $attrs;

return $prefix."\n";
}

sub _tr_start {
my( $self, $node, $rules ) = @_;
my $prefix = '|-';

my @tr_attrs = ( @common_attrs, 'bgcolor', @tablealign_attrs );
my $attrs = $self->get_attr_str( $node, @tr_attrs );
$prefix .= ' '.$attrs if $attrs;

return '' unless $node->left or $attrs;
return $prefix."\n";
}

# List of tags (and pseudo-tags, in the case of '~text') that are
# considered phrasal elements. Any table cells that contain only these
# elements will be placed on a single line.
my @td_phrasals = qw/ i em b strong u tt code span font sup sub br ~text s strike del ins /;
my %td_phrasals = map { $_ => 1 } @td_phrasals;

sub _td_start {
my( $self, $node, $rules ) = @_;
my $prefix = $node->tag eq 'th' ? '!' : '|';

my @td_attrs = ( @common_attrs, @tablecell_attrs, @tablealign_attrs );
my $attrs = $self->get_attr_str( $node, @td_attrs );
$prefix .= ' '.$attrs.' |' if $attrs;

# If there are any non-text elements inside the cell, then the
# cell's content should start on its own line
my @non_text = grep !$td_phrasals{$_->tag}, $node->content_list;
my $space = @non_text ? "\n" : ' ';

return $prefix.$space;
}

sub _caption_start {
my( $self, $node, $rules ) = @_;
my $prefix = '|+ ';

my @caption_attrs = ( @common_attrs, 'align' );
my $attrs = $self->get_attr_str( $node, @caption_attrs );
$prefix .= $attrs.' |' if $attrs;

return $prefix;
}

sub preprocess_node {
my( $self, $node ) = @_;
my $tag = defined $node->tag ? $node->tag : '';
$self->strip_aname($node) if $tag eq 'a';
$self->_strip_extra($node);
$self->_nowiki_text($node) if $tag eq '~text';

# # XXX font-to-span convers
# $node->tag('span') if $tag eq 'font';
}

my $URL_PROTOCOLS = 'http|https|ftp|irc|gopher|news|mailto';
my $EXT_LINK_URL_CLASS = '[^]<>"\\x00-\\x20\\x7F]';
my $EXT_LINK_TEXT_CLASS = '[^\]\\x00-\\x1F\\x7F]';

# Text nodes matching one or more of these patterns will be enveloped
# in <nowiki> and </nowiki>

sub _wikitext_patterns {
my $self = shift;

# the caret in "qr/^/" seems redundant with "start_of_line" but both
# are necessary
my %wikitext_patterns = (
misc => { pattern => qr/^(?:\*|\#|\;|\:|\=|\!|\|)/m, location => 'start_of_line' },
italic => { pattern => qr/''/, location => 'anywhere' },
rule => { pattern => qr/^----/m, location => 'start_of_line' },
table => { pattern => qr/^\{\|/m, location => 'start_of_line' },
link => { pattern => qr/\[\[/m, location => 'anywhere' },
template => { pattern => qr/{{/m, location => 'anywhere' },
);

delete $wikitext_patterns{template} if $self->preserve_templates;
return \%wikitext_patterns;
}

sub _nowiki_text {
my( $self, $node ) = @_;

my $text = defined $node->attr('text') ? $node->attr('text') : '';
return unless $text;

my $wikitext_patterns = $self->_wikitext_patterns;
my $found_nowiki_text = 0;

ANYWHERE: {
my @anywhere_patterns =
map { $_->{pattern} } grep { $_->{location} eq 'anywhere' } values %$wikitext_patterns;

$found_nowiki_text++ if $self->_match( $text, \@anywhere_patterns );
};

START_OF_LINE: {
last if $found_nowiki_text;

my @sol_patterns =
map { $_->{pattern} } grep { $_->{location} eq 'start_of_line' } values %$wikitext_patterns;

# find closest parent that is a block-level node
my $nearest_parent_block = $self->elem_search_lineage( $node, { block => 1 } );

if( $nearest_parent_block ) {
my $leftmostish_text_node = $self->_get_leftmostish_text_node( $nearest_parent_block );
if( $leftmostish_text_node and $node == $leftmostish_text_node ) {
# I'm the first child in this block element, so let's apply start_of_line nowiki fixes
$found_nowiki_text++ if $self->_match( $text, \@sol_patterns );
}
}
};

if( $found_nowiki_text ) {
$text = "<nowiki>$text</nowiki>";
} else {
$text =~ s~(\[\b(?:$URL_PROTOCOLS):$EXT_LINK_URL_CLASS+ *$EXT_LINK_TEXT_CLASS*?\])~<nowiki>$1</nowiki>~go;
}

$node->attr( text => $text );
}

sub _get_leftmostish_text_node {
my( $self, $node ) = @_;
return unless $node;
return $node if $node->tag eq '~text';
return $self->_get_leftmostish_text_node( ($node->content_list)[0] )
}

sub _match {
my( $self, $text, $patterns ) = @_;
$text =~ $_ && return 1 for @$patterns;
return 0;
}

my %extra = (
id => qr/catlinks/,
class => qr/urlexpansion|printfooter|editsection/
);

# Delete <span class="urlexpansion">...</span> et al
sub _strip_extra {
my( $self, $node ) = @_;
my $tag = defined $node->tag ? $node->tag : '';

foreach my $att_name ( keys %extra ) {
my $att_value = defined $node->attr($att_name) ? $node->attr($att_name) : '';
if( $att_value =~ $extra{$att_name} ) {
$node->detach();
$node->delete();
return;
}
}
}

=head1 AUTHOR

David J. Iberri, C<< <diberri at cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to
C<bug-html-wikiconverter-mediawiki at rt.cpan.org>, or through the web
interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=HTML-WikiConverter-MediaWiki>.
I will be notified, and then you'll automatically be notified of
progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

perldoc HTML::WikiConverter::MediaWiki

You can also look for information at:

=over 4

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/HTML-WikiConverter-MediaWiki>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/HTML-WikiConverter-MediaWiki>

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-WikiConverter-MediaWiki>

=item * Search CPAN

L<http://search.cpan.org/dist/HTML-WikiConverter-MediaWiki>

=back

=head1 COPYRIGHT & LICENSE

Copyright 2006 David J. Iberri, all rights reserved.

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

1;
HTML-WikiConverter-MediaWiki-0.59/t/000075500000000000000000000000001147254762400171225ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/t/00-load.t000064400000000000000000000003101147254762400204350ustar00rootroot00000000000000#!perl -T

use Test::More tests => 1;

BEGIN {
use_ok( 'HTML::WikiConverter::MediaWiki' );
}

diag( "Testing HTML::WikiConverter::MediaWiki $HTML::WikiConverter::MediaWiki::VERSION, Perl $], $^X" );
HTML-WikiConverter-MediaWiki-0.59/t/01-mediawiki.t000064400000000000000000000327441147254762400215020ustar00rootroot00000000000000local $/;
require 't/runtests.pl';
runtests( data => <DATA>, dialect => 'MediaWiki', wiki_uri => 'http://www.test.com/wiki/' );
close DATA;

__DATA__
external link
__H__
<p><a href="http://example.com">[http://example.com]</a></p>
__W__
[http://example.com <nowiki>[http://example.com]</nowiki>]
__NEXT__
nowiki template
__H__
<p>mark stubs with {{stub}}</p>
__W__
<nowiki>mark stubs with {{stub}}</nowiki>
__NEXT__
nowiki quoted
__H__
<p>what happens to 'quoted text'?</p>
__W__
what happens to 'quoted text'?
__NEXT__
nowiki doubly quoted
__H__
<p>how about ''doubly quoted''?</p>
__W__
<nowiki>how about ''doubly quoted''?</nowiki>
__NEXT__
nowiki triply quoted
__H__
<p>and '''triply quoted'''?</p>
__W__
<nowiki>and '''triply quoted'''?</nowiki>
__NEXT__
nowiki hr
__H__
<p>----</p>
__W__
<nowiki>----</nowiki>
__NEXT__
nowiki ul
__H__
<p>* ul</p>
__W__
<nowiki>* ul</nowiki>
__NEXT__
nowiki ol
__H__
<p># ol</p>
__W__
<nowiki># ol</nowiki>
__NEXT__
nowiki def
__H__
<p>; def</p>
__W__
<nowiki>; def</nowiki>
__NEXT__
nowiki indent
__H__
<p>: indent</p>
__W__
<nowiki>: indent</nowiki>
__NEXT__
nowiki internal links
__H__
<p>an [[internal]] link</p>
__W__
<nowiki>an [[internal]] link</nowiki>
__NEXT__
nowiki table markup
__H__
<p>{|<br />
| table<br />
|}</p>
__W__
<nowiki>{|</nowiki><br /> | table<br /> |}
__NEXT__
nowiki ext link
__H__
<p>[http://example.com]</p>
__W__
<nowiki>[http://example.com]</nowiki>
__NEXT__
(bug #46453) triggering <nowiki> too often
__H__
<em>x</em>:bla
__W__
''x'':bla
__NEXT__
do not add a <nowiki> tag only if offending character(s) occur at the beginning of text node
__H__
<p>text <strong>*</strong>
<p>text <strong>#</strong>
<p>text <strong>;</strong>
<p>text <strong>:</strong>
<p>text <strong>=</strong>
<p>text <strong>!</strong>
<p>text <strong>|</strong>
<p>text <strong>----</strong>
<p>text <strong>{|</strong>
__W__
text '''*'''

text '''#'''

text ''';'''

text ''':'''

text '''='''

text '''!'''

text '''|'''

text '''----'''

text '''{|'''
__NEXT__
tr attributes
__H__
<html><table><tr align="left" valign="top"><td>ok</td></tr></table></html>
__W__
{|
|- align="left" valign="top"
| ok
|}
__NEXT__
preserve cite
__H__
<html><cite id="good">text</cite></html>
__W__
<cite id="good">text</cite>
__NEXT__
preserve var
__H__
<html><var id="good">text</var></html>
__W__
<var id="good">text</var>
__NEXT__
preserve blockquote
__H__
<html><blockquote cite="something" onclick="alert('hello')">text</blockquote></html>
__W__
<blockquote cite="something">text</blockquote>
__NEXT__
preserve ruby
__H__
<html><ruby>text</ruby></html>
__W__
<ruby>text</ruby>
__NEXT__
preserve rb
__H__
<html><rb id="ok">text</rb></html>
__W__
<rb id="ok">text</rb>
__NEXT__
preserve rt
__H__
<html><rt id="ok" blah="blah">text</rt></html>
__W__
<rt id="ok">text</rt>
__NEXT__
preserve rp
__H__
<html><rp id="ok" something="ok" bad="good" class="stuff">text</rp></html>
__W__
<rp id="ok" class="stuff">text</rp>
__NEXT__
preserve div
__H__
<html><div id="thing" align="left" bad="good">ok</div></html>
__W__
<div id="thing" align="left">ok</div>
__NEXT__
empty line break
__H__
<html><br id="thing"></br></html>
__W__
<br id="thing" />
__NEXT__
br attribs
__H__
<html>ok<br id="stuff" class="things" title="ok" style="clear:both" clear="both"></html>
__W__
ok<br id="stuff" class="things" title="ok" style="clear: both" clear="both" />
__NEXT__
wrap in html
__H__
<a href="http://google.com">GOOGLE</a><br/>
NewLine
__W__
[http://google.com GOOGLE]<br /> NewLine
__NEXT__
bold
__H__
<html><b>bold</b></html>
__W__
'''bold'''
__NEXT__
italics
__H__
<html><i>italics</i></html>
__W__
''italics''
__NEXT__
bold and italics
__H__
<html><b>bold</b> and <i>italics</i></html>
__W__
'''bold''' and ''italics''
__NEXT__
bold-italics nested
__H__
<html><b><i>bold-italics</i> nested</b></html>
__W__
'''''bold-italics'' nested'''
__NEXT__
strong
__H__
<html><strong>strong</strong></html>
__W__
'''strong'''
__NEXT__
emphasized
__H__
<html><em>emphasized</em></html>
__W__
''emphasized''
__NEXT__
underlined
__H__
<html><u>underlined</u></html>
__W__
<u>underlined</u>
__NEXT__
strikethrough
__H__
<html><s>strike</s></html>
__W__
<s>strike</s>
__NEXT__
deleted
__H__
<html><del>deleted text</del></html>
__W__
<del>deleted text</del>
__NEXT__
inserted
__H__
<html><ins>inserted</ins></html>
__W__
<ins>inserted</ins>
__NEXT__
span tags removed if naked (ie, have no attribs)
__H__
<html><span>text here</span></html>
__W__
text here
__NEXT__
strip aname
__H__
<html><a name="thing"></a></html>
__W__

__NEXT__
one-line phrasals
__H__
<html><i>phrasals
in one line</i></html>
__W__
''phrasals in one line''
__NEXT__
paragraph blocking
__H__
<html><p>p1</p><p>p2</p></html>
__W__
p1

p2
__NEXT__
lists
__H__
<html><ul><li>1</li><li>2</li></ul></html>
__W__
* 1
* 2
__NEXT__
nested lists
__H__
<html><ul><li>1<ul><li>1a</li><li>1b</li></ul></li><li>2</li></ul>
__W__
* 1
** 1a
** 1b
* 2
__NEXT__
nested lists (different types)
__H__
<html><ul><li>1<ul><li>a<ol><li>i</li></ol></li><li>b</li></ul></li><li>2<dl><dd>indented</dd></dl></li></ul></html>
__W__
* 1
** a
**# i
** b
* 2
*: indented
__NEXT__
hr
__H__
<html><hr /></html>
__W__
----
__NEXT__
br
__H__
<html><p>stuff<br />stuff two</p></html>
__W__
stuff<br />stuff two
__NEXT__
div
__H__
<html><div>thing</div></html>
__W__
<div>thing</div>
__NEXT__
div w/ attrs
__H__
<html><div id="name" class="panel" onclick="popup()">thing</div></html>
__W__
<div id="name" class="panel">thing</div>
__NEXT__
sub
__H__
<html><p>H<sub>2</sub>O</p></html>
__W__
H<sub>2</sub>O
__NEXT__
sup
__H__
<html><p>x<sup>2</sup></p></html>
__W__
x<sup>2</sup>
__NEXT__
center
__H__
<html><center>centered text</center></html>
__W__
<center>centered text</center>
__NEXT__
small
__H__
<html><small>small text</small></html>
__W__
<small>small text</small>
__NEXT__
code
__H__
<html><code>$name = 'stan';</code></html>
__W__
<code>$name = 'stan';</code>
__NEXT__
tt
__H__
<html><tt>tt text</tt></html>
__W__
<tt>tt text</tt>
__NEXT__
font-to-span conversion ::TODO("HTML::WikiConverter::Normalizer not doing font-to-span conversion yet")
__H__
<html><font color="blue" face="Arial" size="+2">font</font></html>
__W__
<span style="font-size:+2; color:blue; font-family:Arial">font</span>
__NEXT__
font
__H__
<html><font color="blue" face="Arial" size="+2">font</font></html>
__W__
<font size="+2" color="blue" face="Arial">font</font>
__NEXT__
pre
__H__
<html><pre>this
is
preformatted
text</pre></html>
__W__
this
is
preformatted
text
__NEXT__
indent
__H__
<html><dl><dd>indented text</dd></dl></html>
__W__
: indented text
__NEXT__
nested indent
__H__
<html><dl><dd>stuff<dl><dd>double-indented</dd></dl></dd></dl></html>
__W__
: stuff
:: double-indented
__NEXT__
h1
__H__
<h1>h1</h1>
__W__
=h1=
__NEXT__
h2
__H__
<h2>h2</h2>
__W__
==h2==
__NEXT__
h3
__H__
<h3>h3</h3>
__W__
===h3===
__NEXT__
h4
__H__
<h4>h4</h4>
__W__
====h4====
__NEXT__
h5
__H__
<h5>h5</h5>
__W__
=====h5=====
__NEXT__
h6
__H__
<h6>h6</h6>
__W__
======h6======
__NEXT__
img
__H__
<html><img src="thing.gif" /></html>
__W__
[[Image:thing.gif]]
__NEXT__
table
__H__
<table>
<caption>Stuff</caption>
<tr>
<th> Name </th> <td> David </td>
</tr>
<tr>
<th> Age </th> <td> 24 </td>
</tr>
<tr>
<th> Height </th> <td> 6' </td>
</tr>
<tr>
<td>
<table>
<tr>
<td> Nested </td>
<td> tables </td>
</tr>
<tr>
<td> are </td>
<td> fun </td>
</tr>
</table>
</td>
</tr>
</table>
__W__
{|
|+ Stuff
|-
! Name
| David
|-
! Age
| 24
|-
! Height
| 6'
|-
|
{|
| Nested
| tables
|-
| are
| fun
|}
|}
__NEXT__
table w/ attrs
__H__
<table border=1 cellpadding=3 bgcolor=#ffffff onclick='alert("alert!")'>
<caption>Stuff</caption>
<tr id="first" class="unselected">
<th id=thing bgcolor=black> Name </th> <td> Foo </td>
</tr>
<tr class="selected">
<th> Age </th> <td>24</td>
</tr>
<tr class="unselected">
<th> <u>Height</u> </th> <td> 6' </td>
</tr>
</table>
__W__
{| border="1" cellpadding="3" bgcolor="#ffffff"
|+ Stuff
|- id="first" class="unselected"
! id="thing" bgcolor="black" | Name
| Foo
|- class="selected"
! Age
| 24
|- class="unselected"
! <u>Height</u>
| 6'
|}
__NEXT__
table w/ blocks
__H__
<table>
<tr>
<td align=center>
<p>Paragraph 1</p>
<p>Paragraph 2</p>
</td>
</tr>
</table>
__W__
{|
| align="center" |
Paragraph 1

Paragraph 2
|}
__NEXT__
strip empty aname
__H__
<html><a name="thing"></a> some text</html>
__W__
some text
__NEXT__
wiki link (text == title)
__H__
<html><a href="/wiki/Some_wiki_page">Some wiki page</a></html>
__W__
[[Some wiki page]]
__NEXT__
wiki link (text case != title case)
__H__
<html><a href="/wiki/Another_page">another page</a></html>
__W__
[[another page]]
__NEXT__
wiki link (text != title)
__H__
<html><a href="/wiki/Another_page">some text</a></html>
__W__
[[Another page|some text]]
__NEXT__
external links
__H__
<html><a href="http://www.test.com">thing</a></html>
__W__
[http://www.test.com thing]
__NEXT__
external links (rel2abs)
__H__
<html><a href="thing.html">thing</a></html>
__W__
[http://www.test.com/thing.html thing]
__NEXT__
strip urlexpansion
__H__
<html><a href="http://www.google.com">Google</a> <span class=" urlexpansion ">(http://www.google.com)</span></html>
__W__
[http://www.google.com Google]
__NEXT__
strip printfooter
__H__
<html><div class="printfooter">Retrieved from blah blah</div></html>
__W__

__NEXT__
strip catlinks
__H__
<html><div id="catlinks"><p>Categories: ...</p></div></html>
__W__

__NEXT__
strip editsection
__H__
<html>This is <div class="editsection"><a href="?action=edit&section=1">edit</a></div> great</html>
__W__
This is

great
__NEXT__
escape bracketed urls
__H__
<html><p>This is a text node with what looks like an ext. link [http://example.org].</p></html>
__W__
This is a text node with what looks like an ext. link <nowiki>[http://example.org]</nowiki>.
__NEXT__
line with vertical bar
__H__
<html><p>| a line with a vertical bar</p></html>
__W__
<nowiki>| a line with a vertical bar</nowiki>
__NEXT__
line that starts with a bang
__H__
<html><p>! a line that starts with a bang</p></html>
__W__
<nowiki>! a line that starts with a bang</nowiki>
__NEXT__
line that looks like a section
__H__
<html><p>= a line that looks like a section</p></html>
__W__
<nowiki>= a line that looks like a section</nowiki>
__NEXT__
pre-many (bug #14527)
__H__
<html><pre>preformatted text

with spaces

should produce only one

pre-block</pre></html>
__W__
preformatted text

with spaces

should produce only one

pre-block
__NEXT__
pre following pre
__H__
<html><pre>preformatted text</pre>
<pre>more preformatted text</pre>
<pre>once again</pre></html>
__W__
preformatted text

more preformatted text

once again
__NEXT__
preserve ::preserve_bold(1)
__H__
<b>hello</b>
__W__
<b>hello</b>
__NEXT__
hr under td
__H__
<table><tr><td><hr></td></tr></table>
__W__
{|
|
----
|}
__NEXT__
img alt
__H__
<img src="thing.gif" alt="Just a test" />
__W__
[[Image:thing.gif|Just a test]]
__NEXT__
no preserve templates ::preserve_templates(0)
__H__
{{template}}
__W__
<nowiki>{{template}}</nowiki>
__NEXT__
preserve templates ::preserve_templates(1)
{{template}}
__W__
{{template}}
__NEXT__
no preserve nowiki ::preserve_nowiki(0)
__H__
<nowiki>hey</nowiki>
__W__
hey
__NEXT__
preserve nowiki ::preserve_nowiki(1)
__H__
<nowiki>hey</nowiki>
__W__
<nowiki>hey</nowiki>
__NEXT__
preserve image width
__H__
<img src="thing.jpg" width="200" height="400" alt="The Thing" />
__W__
[[Image:thing.jpg|200px|The Thing]]
__NEXT__
tbody and thead fixes (bug #28402)
__H__
<table border="1">
<colgroup>
<col />
<col />
<col />
</colgroup>
<thead>
<tr>
<th>heading col 1</th>
<th>heading col 2</th>
<th>heading last col</th>
</tr>
</thead>
<tbody>
<tr>
<td>data first col first row</td>
<td>data c2 r1</td>
<td>data c3 r1</td>
</tr>
<tr>
<td>data c1 r2</td>
<td>data c2 r2</td>
<td>data c3 r2</td>
</tr>
<tr>
<td>data c1 r3</td>
<td>data c2 r3</td>
<td>data c3 r3</td>
</tr>
</tbody>
</table>
__W__
{| border="1"
|-
! heading col 1
! heading col 2
! heading last col
|-
| data first col first row
| data c2 r1
| data c3 r1
|-
| data c1 r2
| data c2 r2
| data c3 r2
|-
| data c1 r3
| data c2 r3
| data c3 r3
|}
__NEXT__
don't pad headings ::pad_headings(0)
__H__
<h2>Heading</h2>
__W__
==Heading==
__NEXT__
table with zeros
__H__
<table>
<tr><td>0</td></tr>
<tr><td>1</td></tr>
<tr><td>0</td></tr>
<tr><td>1</td></tr>
</table>
__W__
{|
| 0
|-
| 1
|-
| 0
|-
| 1
|}
__NEXT__
(bug #40845) internal links, without wiki_uri
__H__
<a href='class_browser.html'>Class Browser</a>
__W__
[http://www.test.com/class_browser.html Class Browser]
__NEXT__
(bug #40845) internal links, with wiki_uri=base_uri ::wiki_uri('http://www.test.com/')
__H__
<a href='class_browser.html'>Class Browser</a>
__W__
[[class browser.html|Class Browser]]
__NEXT__
(bug #40845) broken links with anchors, without wiki_uri
__H__
<a href='#Adding'>adding</a>
__W__
[http://www.test.com#Adding adding]
__NEXT__
(bug #40845) links with anchors, with wiki_uri ::wiki_uri('http://www.test.com/') ::TODO('wiki_uri not working with an ending slash')
__H__
<a href='#Adding'>adding</a>
__W__
[[#Adding|adding]]
__NEXT__
(bug #24745) font/span weirdness ::TODO("HTML::WikiConverter::Normalizer doesn't handle this yet");
__H__
<p><span style='font-size:40.0pt; font-family:"ArialNarrow"'>The Test Header</span></p>
__W__
<span style="font-size:40pt; font-family:ArialNarrow">The Test Header</span>
__NEXT__
(bug #29342) Tag attributes with 0 ::TODO("this is actually an H::WC-specific bug")
__H__
<table cellspacing="0" cellpadding="3" border="1">
<tr><td>Hello</td><td>World</td></tr>
</table>
__W__
{| border="1" cellpadding="3" cellspacing="0"
| Hello
| World
|}
HTML-WikiConverter-MediaWiki-0.59/t/boilerplate.t000064400000000000000000000023431147254762400216130ustar00rootroot00000000000000#!perl -T

use strict;
use warnings;
use Test::More tests => 3;

sub not_in_file_ok {
my ($filename, %regex) = @_;
open my $fh, "<", $filename
or die "couldn't open $filename for reading: $!";

my %violated;

while (my $line = <$fh>) {
while (my ($desc, $regex) = each %regex) {
if ($line =~ $regex) {
push @{$violated{$desc}||=[]}, $.;
}
}
}

if (%violated) {
fail("$filename contains boilerplate text");
diag "$_ appears on lines @{$violated{$_}}" for keys %violated;
} else {
pass("$filename contains no boilerplate text");
}
}

not_in_file_ok(README =>
"The README is used..." => qr/The README is used/,
"'version information here'" => qr/to provide version information/,
);

not_in_file_ok(Changes =>
"placeholder date/time" => qr(Date/time)
);

sub module_boilerplate_ok {
my ($module) = @_;
not_in_file_ok($module =>
'the great new $MODULENAME' => qr/ - The great new /,
'boilerplate description' => qr/Quick summary of what the module/,
'stub function definition' => qr/function[12]/,
);
}

module_boilerplate_ok('lib/HTML/WikiConverter/MediaWiki.pm');
HTML-WikiConverter-MediaWiki-0.59/t/mediawiki.preserve.t000064400000000000000000000020001147254762400230740ustar00rootroot00000000000000local $/;
require 't/runtests.pl';
runtests( data => <DATA>, dialect => 'MediaWiki', minimal => 1, preserve_italic => 1, preserve_bold => 1 );
close DATA;

__DATA__
preserve bold
__H__
<b>bold</b>
__W__
<b>bold</b>
__NEXT__
preserve bold w/ attrs
__H__
<b id="this">this</b>
__W__
<b id="this">this</b>
__NEXT__
preserve bold w/ bad attrs
__H__
<b onclick="takeOverBrowser()">clickme</b>
__W__
<b>clickme</b>
__NEXT__
convert strong
__H__
<strong>strong</strong>
__W__
'''strong'''
__NEXT__
both strong/b
__H__
<ul>
<li> <b>bold</b>
<li> <strong>strong</strong>
</ul>
__W__
* <b>bold</b>
* '''strong'''
__NEXT__
preserve italic
__H__
<i>italic</i>
__W__
<i>italic</i>
__NEXT__
preserve italic w/ attrs
__H__
<i id="it">italic</i>
__W__
<i id="it">italic</i>
__NEXT__
preserve italic w/ bad attrs
__H__
<i onclick="alert('bad!')">clickme</i>
__W__
<i>clickme</i>
__NEXT__
convert em
__H__
<em>em</em>
__W__
''em''
__NEXT__
both em/i
__H__
<ul>
<li> <i>italic</i>
<li> <em>em</em>
</ul>
__W__
* <i>italic</i>
* ''em''
HTML-WikiConverter-MediaWiki-0.59/t/pod-coverage.t000064400000000000000000000010551147254762400216630ustar00rootroot00000000000000#!perl -T

use Test::More;
eval "use Test::Pod::Coverage 1.04";
plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
all_pod_coverage_ok( { also_private => [
# These methods are documented in HTML::WikiConverter::Dialects
qr/
get_elem_contents
|get_wiki_page
|get_attr_str
|elem_within_block
|is_camel_case
|rule
|rules
|attribute
|attributes
|preprocess_tree
|preprocess_node
|postprocess_output
|caption2para
|strip_aname
|base_url
|wiki_url
/x
] } );
HTML-WikiConverter-MediaWiki-0.59/t/pod.t000064400000000000000000000002141147254762400200660ustar00rootroot00000000000000#!perl -T

use Test::More;
eval "use Test::Pod 1.14";
plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
all_pod_files_ok();
HTML-WikiConverter-MediaWiki-0.59/t/runtests.pl000064400000000000000000000066251147254762400213570ustar00rootroot00000000000000#!/usr/bin/perl
use warnings;
use strict;

use Test::More;
use File::Spec;
use HTML::Entities;
use HTML::WikiConverter;
*e = \&encode_entities;

my $more_tests = <<END_TESTS;
__NEXT__
entities (1)
__H__
To enter a '&lt;' in your input, use "&amp;lt;"
__W__
To enter a '&lt;' in your input, use "&amp;lt;"
__NEXT__
entities (2)
__H__
To enter a '<' in your input, use "&amp;lt;"
__W__
To enter a '&lt;' in your input, use "&amp;lt;"
__NEXT__
strip comments
__H__
A <!-- stripped --> comment
__W__
A comment
__NEXT__
strip head
__H__
<html>
<head><title>fun stuff</title></head>
<body>
<p>Crazy stuff here</p>
</body>
</html>
__W__
Crazy stuff here
__NEXT__
strip scripts
__H__
<html>
<head><script>bogus stuff</script></head>
<body>
<script>maliciousCode()</script>
<p>benevolent text</p>
</body>
</html>
__W__
benevolent text
END_TESTS

sub runtests {
my %arg = @_;

$arg{wrap_in_html} = 1;
$arg{base_uri} ||= 'http://www.test.com';
my $minimal = $arg{minimal} || 0;

my $data = $arg{data} || '';
$data .= entity_tests() . $more_tests unless $minimal;

my @tests = split /__NEXT__\n/, $data;
my $numtests = @tests;
#$numtests += 1 unless $minimal; # file test
plan tests => $numtests;

# Delete unrecognized HTML::WikiConverter options
delete $arg{$_} for qw/ data minimal /;

my $wc = new HTML::WikiConverter(%arg);
foreach my $test ( @tests ) {
$test =~ s/^(.*?)\n//; my $name = $1;
my( $html, $wiki ) = split /__W__\n/, $test;
$html =~ s/__H__\n//;

# $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{
# my $method_call = $1;
# eval "\$wc->$method_call;";
# die "Failed test call ($name): $@" if $@;
# '';
# }ge;

my( $todo, $todo_reason );
$name =~ s{\s*\:\:(\w+\([^\)]*?\))}{
my $keyword = $1;
if( $keyword =~ /TODO\((\"|\')(.*?)\1/ ) {
$todo = 1;
$todo_reason = $2;
} else {
my $method_call = $keyword;
eval "\$wc->$method_call;";
die "Failed test call ($name): $@" if $@;
}
'';
}ge;

for( $html, $wiki ) { s/^\n+//; s/\n+$// }

if( $todo ) {
TODO: {
local $TODO = $todo_reason;
is( $wc->html2wiki($html), $wiki, $name );
}
} else {
is( $wc->html2wiki($html), $wiki, $name );
}
}

#file_test($wc) unless $minimal;
}

sub entity_tests {
my $tmpl = "__NEXT__\n%s\n__H__\n%s\n__W__\n%s\n"; # test-name, html-input, expected-wiki-output

my $data = '';
my @chars = ( '<', '>', '&' );
foreach my $char ( @chars ) {
( my $charname = e($char) ) =~ s/[&;]//g;
$data .= sprintf $tmpl, "literal ($charname)", $char, e($char)
. sprintf $tmpl, "encode ($charname)", e($char), e($char)
. sprintf $tmpl, "meta ($charname)", e(e($char)), e(e($char));
}

return $data;
}

sub _slurp {
my $path = shift;
open H, $path or die "couldn't open $path: $!";
local $/;
my $c = <H>;
close H;
return $c;
}

sub file_test {
my $wc = shift;
my $lc_dialect = lc $wc->dialect;
my $infile = File::Spec->catfile( 't', 'complete.html' );
my $outfile = File::Spec->catfile( 't', "complete.$lc_dialect" );

SKIP: {
skip "Couldn't find $infile (ignore this)", 1 unless -e $infile;
skip "Couldn't find $outfile (ignore this)", 1 unless -e $outfile;
my( $got, $expect ) = ( $wc->html2wiki( file => $infile, slurp => 1 ), _slurp($outfile) );
for( $got, $expect ) { s/^\n+//; s/\n+$// }
is( $got, $expect, 'read from file' );
};
}

1;
 
projeto & código: Vladimir Lettiev aka crux © 2004-2005, Andrew Avramenko aka liks © 2007-2008
mantenedor atual: Michael Shigorin
mantenedor da tradução: Fernando Martini aka fmartini © 2009