ALT Linux Sisyphus - Пакет perl-Jevix

perl-Jevix-0.9.7/000075500000000000000000000000001133231612600135505ustar00rootroot00000000000000perl-Jevix-0.9.7/Jevix/000075500000000000000000000000001133231612600146355ustar00rootroot00000000000000perl-Jevix-0.9.7/Jevix/Makeup.pm000064400000000000000000000635441133231612600164310ustar00rootroot00000000000000package Jevix::Makeup;

# ==========================================
#
#  Jevix Version 0.9.7 (windows-1251)
#
#  Developed by Igor Askarov
# 
#  Please send all suggestions to
#  Igor Askarov <juks@juks.ru>
#  http://www.jevix.ru/
#
#  Release date: 20/12/2008
#
# === Methods list==========================
#
#  new							the constructor
#  procces						entry sub
#  setConf						setting up the configuration
#  preset						presets selector
#  makeup						makeup the text
#  quotes						quotes processor
#  cuttags						tags processor
#  tagEnd						looking fo tag end
#  plantTags						sub to bring the tags back
#  vanish						sub to remove all the stuff and bring the text to plain mode
#  parseTagsAllowString					parse the tagsAllow string to hash
#  parseTagsDenyString					parse the tagsDeny string to hash
#  getConf						return configuration hash
#
# ==========================================

use strict;
use warnings;

my $markLength = 8;
my $strip;
my $result;
my $tags;
my @tagsOpen;
my $conf;

my @singleTags = qw/link input spacer img br hr/;
my @breakingTags = qw/p td div hr/;
my @spaceTags = qw/br/;
my @tagsToEat = qw/script style pre code/;

# ==The constructor
sub new {
	my Jevix::Makeup $class = shift;

	return $class;
}

# ==Here we've got the input
sub process($$$) {
	my($class, $text, $userConf) = @_;

	# If there is a configuration given we set it here
	$class->setConf($userConf) if($userConf);

	$strip = "";
	$tags = [];
	@tagsOpen = ();

	$result = {};
	$result->{error} = 0;
	$result->{errorLog} = [];

	if($conf->{vanish}) {
		$class->cuttags($text, {tagsDenyAll=>1}, $result);
		$class->vanish(\$strip);
		$result->{text} = $strip;
	} else {
		if(!$conf->{isHTML}) { $strip = $$text; } else { $class->cuttags($text, $conf, $result); }
		if($conf->{quotes}) { $class->quotes($conf); }
		$class->makeup($conf);

		$result->{text} = "";
		if($conf->{isHTML}) { $class->plantTags($result); } else { $result->{text} = $strip; }
	}

	return $result;
}

# ==Setting the configuration
sub setConf($$) {
	my($class, $userConf) = @_;

	$conf = $userConf ? $userConf : {presetBasic=>1};
	$class->preset();
}

# ==Choosing default setup when necessary
sub preset($$) {
	my ($class) = @_;

	if(!$conf || $conf->{presetBasic}) {
		$conf->{isHTML} = 1 if(!defined($conf->{isHTML}));								# HTML mode
		$conf->{lineBreaks} = 1 if(!defined($conf->{lineBreaks}));						# Linebreaks to <br/>
		$conf->{paragraphs} = 0 if(!defined($conf->{paragraphs}));						# Paragraphs
		$conf->{dashes} = 1 if(!defined($conf->{dashes}));								# Replace hyphens with dashes when necessary
		$conf->{dots} = 1 if(!defined($conf->{dots}));										# Replace 3 dots with a symbol
		$conf->{edgeSpaces} = 1 if(!defined($conf->{edgeSpaces}));						# Wipe edge space characters
		$conf->{multiSpaces} = 1 if(!defined($conf->{multiSpaces}));						# Wipe multispaces
		$conf->{redundantSpaces} = 1 if(!defined($conf->{redundantSpaces}));				# Wipe redundant spaces
		$conf->{compositeWordsLength} = 10 if(!defined($conf->{compositeWordsLength}));		# The maximim length of composite word to be put inside <nobr>
		$conf->{tagLf} = 1 if(!defined($conf->{tagLf}));								# Wipe crs and lfs after droppped tag
		$conf->{nbsp} = 1 if(!defined($conf->{nbsp}));										# Insert non-breaking spaces
		$conf->{quotes} = 1 if(!defined($conf->{quotes}));								# Makeup quotes
		$conf->{qaType} = 0 if(!defined($conf->{qaType}));								# Main quotes type
		$conf->{qbType} = 2 if(!defined($conf->{qbType}));								# Nested quotes type
		$conf->{misc} = 1 if(!defined($conf->{misc}));										# Misc substitutions
		$conf->{codeMode} = 2 if(!defined($conf->{codeMode}));								# The way jevix should represent html special characters
	}

	# If tagsAllow came as a string
	if(defined($conf->{tagsAllow}) && !ref($conf->{tagsAllow})) {
		my $tmp = $class->parseTagsAllowString($conf->{tagsAllow});
		$conf->{tagsAllow} = $tmp->{tagsAllow};
		$conf->{tagsDenyAllAttributes} = $tmp->{tagsDenyAllAttributes};
	}

	# If tagsDeny came as a string
	if(defined($conf->{tagsDeny}) && !ref($conf->{tagsDeny})) {
		$conf->{tagsDeny} = $class->parseTagsDenyString($conf->{tagsDeny});
	}
}

# ==Imposing clear text
sub makeup($$) {
	my ($class, $conf) = @_;

	# ==Misc
	# Prepositions
	my $prp_rus = "Ю|АЕГ|АЕГН|Б|БМЕ|БН|ДЮ|ДКЪ|ДН|ГЮ|Х|ХГ|ХГН|ХКХ|Й|ЙЮЙ|МЮ|МЮД|МЮДН|МЕ|МХ|МН|Н|НА|НАН|НЙНКН|НР|НРН|ОН|ОНД|ОНДН|ОПХ|ОПН|Я|ЯЙБНГЭ|ЯН|С|ВЕПЕГ";
	my $prp_eng = "aboard|about|above|absent|across|after|against|along|alongside|amid|amidst|among|amongst|around|as|astride|at|atop|before|behind|below|beneath|beside|besides|between|beyond|but|by|despite|down|during|except|following|for|from|in|inside|into|like|mid|minus|near|nearest|notwithstanding|of|off|on|onto|opposite|out|outside|over|past|re|round|save|since|than|through|throughout|till|to|toward|towards|under|underneath|unlike|until|up|upon|via|with|within|without";
	my $prp = "$prp_rus|$prp_eng";

	my $letters = "A-Za-zю-ъЮ-Ъ╗╦иИ";				 # Characters
	my $cap_letters = "A-Zю-ъ╗╦";								 # Capital characters

	my $sp = " \xA0\t";														   # space class
	my $rt = "\r?\n";																 # cr class

	my $br = "\x00\x0F..[\x01\x03].\x0F\x00";				 # br tag
	my $pt = "\x00\x0F..[\x02].\x0F\x00";						 # Paragraph tag
	my $ps = "\x00\x0F..[\x02][\x01\x03]\x0F\x00";		# Paragraph start
	my $pe = "\x00\x0F..[\x02][\x02\x00]\x0F\x00";		# Paragraph end
	my $to = "\x00\x0F...[\x03\x01]\x0F\x00";				 # Opening tag
	my $tc = "\x00\x0F...[\x02\x00]\x0F\x00";				 # Closing tag
	my $bb = "\x00\x0F...[\x02\x03]\x0F\x00";				 # Tag where <nobr> is open
	my $nb = "\x00\x0F...[\x01\x00]\x0F\x00";				 # Tag where no <nobr> is open
	my $ao = "\x00\x0F[\x01]...\x0F\x00";						# Tag where <a> is open
	my $ac = "\x00\x0F[\x00]...\x0F\x00";						# Tag where no <a> is open				
	my $ts = "\x00\x0F";														  # Tag start
	my $te = "\x0F\x00";														  # Tag end

	my $brt = "<br *\/?>";														# br tag in text mode
	my $pst = "<p>";
	my $pet = "</p>";

	# Codes, metasymbols or what ever?
	my ($cdash, $cnbsp, $cdots, $cfracs, $ccopy, $creg);

	if(!$conf->{codeMode}) {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("≈", "═", "┘", "╘", "╝");
		$cfracs = {'1/4'=>"?", '1/2'=>"?", '3/4'=>"?"};
	} elsif($conf->{codeMode} == 1) {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("&#151;", "&#160;", "&#133;", "&#169;", "&#174;");
		$cfracs = {'1/4'=>"&#188;", '1/2'=>"&#189;", '3/4'=>"&#190;"};
	} else {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("&mdash;", "&nbsp;", "&hellip;", "&copy;", "&reg;");
		$cfracs = {'1/4'=>"&frac14;", '1/2'=>"&frac12;", '3/4'=>"&frac34;"};
	}

	# Wiping edge spaces
	if($conf->{edgeSpaces}) { $strip =~ s/^[$sp\r\n]*(.+?)[$sp\r\n]*$/$1/isg; }

	# Wiping spaces between tags (</td> </tr>)
	if($conf->{tagSpaces}) { $strip =~ s/($tc)[$sp]($tc)/$1$2/isg; }

	# Wiping multispaces
	if($conf->{multiSpaces}) { $strip =~ s/([$sp]){2,}/$1/ig; }

	# Wiping redundant spaces
	if($conf->{redundantSpaces}) { $strip =~ s{([$sp]+(?![:;]-[)(])([;:,.)?!]))|(\()(?<![:%;]-\()[$sp]+}{$1 ? $2 : $3}eig; } 

	if($conf->{nbsp}) {
		# Prepositions with &nbsp;
		$strip =~ s/(^|\x00|[$sp])($prp)[$sp]([0-9$letters])/$1$2$cnbsp$3/gm;

		# &nbsp; with digits
		$strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/([0-9]+)([$sp]+|&nbsp;|&#160;)(?:(?=[0-9]{2,})|(?=%))/$1$cnbsp/ig; "$a$b$c"; }eisg;
	}		

	# Put composite words inside <nobr>
	if($conf->{compositeWords}) { $strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3);
											$b =~ s{(^|[$sp\x00]|&nbsp;)([$letters]+(?:-[$letters]+)+)(?=$|[$sp\x00])}{
															my $d = !defined($1) ? "" : $1; my $e = !defined($2) ? "" : $2; my $f = !defined($3) ? "" : $3;
															if(length($e) <= $conf->{compositeWordsLength}) { "$d<nobr>$e<\/nobr>" } else {"$d$e$f"}
													}eig; "$a$b$c";
											}eisg; }

	# Links
	if($conf->{links}) {
		my $lAttributes = '';

		# Building <a> attributes string
		if($conf->{linksAttributes}) {
			my $q = !$conf->{tagUnQuoteValues} ? '"' : '';

			while (my ($attr, $value) = each(%{$conf->{linksAttributes}})) {
				$lAttributes .= ' ' if($lAttributes);
				$lAttributes .= "$attr=$q$value$q";
			}

			$lAttributes = ' ' . $lAttributes if($lAttributes);
		}

		$strip =~ s{(^|$ac|(?<=</a>))([^\x00]*?)(http://[^ \x00]+)(?<![,.!?])}{$1$2<a href="$3"$lAttributes>$3</a>}ig;
	}

	# Dots
	if($conf->{dots}) { $strip =~ s/\.{3}|┘|&hellip;/$cdots/ig; }

	# Dashes
	if($conf->{dashes}) {
		# Hyphen
		$strip =~ s/([^$sp])([$sp]|&#160;|&nbsp;)(-{1,2}|≈|&mdash;|&#151;)/$1$cnbsp$cdash/ig;
		# "Speech" hyphen
		$strip =~ s/((?:^|$ps|$br|$brt(?:$rt)*|[$rt]))[$sp]*(?:&nbsp;)*(-{1,2}|≈|&mdash;|&#151;)[$sp]*(?:&nbsp;)*(.)/$1$cdash$cnbsp$3/ig;
	}

	# Misc stuff
	if($conf->{misc}) {
		# Fracs
		$strip =~ s{(?:(?<=[$sp\x00])|(?<=^))([13])/([24])(?:(?=[$sp\x00])|(?=$))}{if(defined($cfracs->{"$1/$2"})) { $cfracs->{"$1/$2"} } else { "$1/$2" } }esg;
		# Copyright & registered
		$strip =~ s{(?:(?<=[$sp\x00])|(?<=^))(\([cr]\)|&copy;|╘)(?:(?=[$sp\x00?!;.,])|(?=$))}{ if((lc($1) eq "(c)") || (lc($1) eq "&copy;") || ($1 eq "╘")) {$ccopy} elsif((lc($1) eq "(r)") || (lc($1) eq "&reg;") || ($1 eq "╝")) {$creg} else { $2 } }eig;
	}

	# Paragraphs
	if($conf->{paragraphs}) { $strip =~ s{(^|$pe(?:$rt$rt)?|$rt$rt)(?!$ps)(.+?)($br)?($brt)?(?<!$pe)(?:(?=$)|(?=$rt$rt)|(?=$ps))}{ my ($a, $b, $c) = ($1,$2,$3||""); (($b =~ /^[ \r\n]+$/) || ($b =~ /^(<br *\/?>|$br)+$/)) ? "$a$b$c" : "$a<p>$b</p>";}eisg; }

	# Line break
	if($conf->{lineBreaks}) { $strip =~ s/(?<!$pt)(?<!$br)(?<!$br\r)(?<!$pe\r\n\r\n)(?<!$pe\n\n)(?<!$pe\r\n)(?<!$pe\n)(?<!$pe\r)(?<!$pe)(?<!$pet\r\n\r\n)(?<!$pet\r\n\r)(?<!$pet\n\n)(?<!$pet\r\n)(?<!$pet\n)(?<!$pet\r)(?<!$pet)(?<!$pst)($rt)(?!$brt)(?!$ts)/<br \/>$1/isg; }
}

# ==impose quotes
sub quotes($$) {
	my ($class, $conf) = @_;

	my $i;
	my ($a_open, $b_open) = (0,0);
	my ($cp, $c, $cn, $cn_is_sp, $cp_is_sp) = ('', '', '', 0, 0);
	my ($qaStart, $qaEnd, $qbStart, $qbEnd);
	my (@qs, @qe, @qs_ansi, @qe_ansi, @qs_html, @qe_html, @qs_ent, @qe_ent,);

	# space class
	my $sp =" \t\xA0";

	# characters
	my $letters = "A-Za-zю-ъЮ-Ъ╗╦иИ";

	@qs_ansi = ("╚", "⌠", "└", "▒", "┌", '"');		
	@qe_ansi = ("╩", "■", "⌠", "▓", "▒", '"');
	@qs_html = ("&#171;", "&#147;", "&#132;", "&#145;", "&#130;", "&#34;");
	@qe_html = ("&#187;", "&#148;", "&#147;", "&#146;", "&#145;", "&#34;");
	#				<<				``				..				`				.				"
	@qs_ent = ("&laquo;",		"&ldquo;",		"&bdquo;",		"&lsquo;",		"&sbquo;",		"&quot;");		
	#				>>				''				''				'				`				"
	@qe_ent = ("&raquo;", 		"&rdquo;", 		"&ldquo;", 		"&rsquo;", 		"&lsquo;", 		"&quot;");

	# Quotes collection
	if(!$conf->{codeMode}) {
		@qs = @qs_ansi; @qe = @qe_ansi;
	} elsif ($conf->{codeMode} == 1) {
		@qs = @qs_html; @qe = @qe_html;
	} else {
			@qs = @qs_ent; @qe = @qe_ent;
	}

	# Getting configuration setting
	$conf->{qaType} ||= 0;
	$conf->{qbType} ||= 1;
	$conf->{qaType} = ($conf->{qaType} >= 0 && $conf->{qaType} <= 5) ? $conf->{qaType} : 0;
	$conf->{qbType} = ($conf->{qbType} >= 0 && $conf->{qbType} <= 5) ? $conf->{qbType} : 1;

	# Selecting quotes as requested by user
	($qaStart, $qaEnd) = ($qs[$conf->{qaType}], $qe[$conf->{qaType}]);
	($qbStart, $qbEnd) = ($qs[$conf->{qbType}], $qe[$conf->{qbType}]);

	# Resetting all the quotes inside text to <">
	my $qa = join('|', @qs_ansi) . '|' . join('|', @qe_ansi) . '|' . join('|', @qs_html) . '|' . join('|', @qe_html) . '|' . join('|', @qs_ent) . '|' . join('|', @qe_ent);
	$strip =~ s/(?:(?:(?<=[^$letters])|(?<=^))($qa))|(?:($qa)(?:(?=[^$letters])|(?=$)))/\"/ig;

	my $spread = 1;
	my $mv = 0;
	my $mvn = 0;
	my @st;
	$i = 0;
	my $skip = 0;
	my @space;				  # Space tags flag
	my @break;				  # Text break flags

	$st[$_] = '' foreach(0..$spread + 1);
	$space[$_] = 0 foreach(0 + 1..$spread + 1);
	$break[$_] = 0 foreach(0 + 1..$spread + 1);
	$space[0] = 1;
	$break[0] = 1;

	while(1) {
		# Skipping tags
		foreach(0..$spread) {
			do {
				$skip = 0;
				if($i + $_ + $mv <= length($strip)) {
					if($i + $_ + $mv + 1 < length($strip)) {
						if((substr($strip, $i + $_ + $mv, 1) eq "\x00") && (substr($strip, $i + $_ + $mv + 1, 1) eq "\x0F")) {
							$space[$_ + 1] |= (ord(substr($strip, $i + $_ + $mv + 2, 1)) & 2) >> 1;
							$break[$_ + 1] |= ord(substr($strip, $i + $_ + $mv + 2, 1)) & 1;
							$mv += $markLength;
							if(!$_) { $mvn = $mv; }
							$st[$_ + 1] = "";
							$skip = 1;
						}
					}
					if(!$skip) { $st[$_ + 1] = substr($strip, $i + $_ + $mv, 1); }
				} 
			} while($skip);
		}

		$i += $mvn;
		$mv = 0;
		$mvn = 0;

		($cp, $c, $cn) = ($st[0], $st[1], $st[2]);
		$cp_is_sp = (($cp =~ /[^0-9$letters]/) || $space[0] || $space[1] || $break[0] || !$i) ? 1 : 0;
		$cn_is_sp = (($cn =~ /[^0-9$letters]/) || $space[2] || $break[2] || $cn eq '') ? 1 : 0;

		# Reset state if breaking tag appears
		if($break[1] || $i == length($strip)) {
			if($a_open || $b_open) {
				# Log quote error if appears
				if($conf->{logErrors}) {
					my $quoteErrSampleLength = 100;
					my $z = $i - 1;
					my $y;
					while(1) {
						if(substr($strip, $z, 1) eq " " || substr($strip, $z, 1) eq "\xA0" || !$z) { if($i-$z <= $quoteErrSampleLength) {$y = $z}}
						last if(!$z);
						$z--;
					}
					my $sample = substr($strip, $y, ($i - $y));
					$sample =~ s/\x00\x0F[^\x0F]+\x0F\x00//g;
					$sample =~ s/<\/?[a-z]+.*?>//g;
					push(@{$result->{errorLog}}, {type=>"Quote_error", message=>"Quote mismatch near [$sample]<--"});
					$result->{error} = 1;
				}
			}

			$a_open = 0;
			$b_open = 0;
		}

	if($c eq '"') {
		if(!$a_open) {
			$a_open = 1;
			substr($strip, $i, 1) = $qaStart;
			$i += length($qaStart) - 1;
		} elsif ($a_open && (($i == length($strip) - 1) || (!$b_open && $cn_is_sp))) {
			$a_open = 0;
			substr($strip, $i, 1) = $qaEnd;
			$i += length($qaEnd) - 1;
		} elsif ($a_open && !$b_open) {
			$b_open = 1;
			substr($strip, $i, 1) = $qbStart;
			$i += length($qbStart) - 1;
		} elsif ($a_open && $b_open) {
			$b_open = 0;
			substr($strip, $i, 1) = $qbEnd;
			$i += length($qbEnd) - 1;
		}
	}

		last if($i == length($strip));

		$st[0] = $st[1];
		$space[0] = $space[1];
		$break[0] = $break[1];
		$space[$_] = 0 foreach(0 + 1..$spread + 1);
		$break[$_] = 0 foreach(0 + 1..$spread + 1);
		$i++;
	}
}

# ==Cutting the tags away
sub cuttags($$$$) {
		my($class, $text, $conf, $result) = @_;

		# loop counter
		my $i = 0;
		# Jump length
		my $hop;
		# current & next character
		my ($c, $cn);
		# tag length, tag dimensions, tag name, tag body text, single tag flag, content inside the tag
		my ($tl, $ts, $te, $cl, $tagName, $tagBody, $tagContent);
		# some useful flags
		my ($isTag, $isTagStart, $isSingle, $isSingleClosed, $isSpace, $isBreaking, $nobrIsOpen, $aIsOpen, $flagSet3, $flagSet2, $flagSet1, $flagSet0);

		# space class
		my $sp =" \t\xA0";

		while(1) {
			$hop = index($$text, "<", $i);

			if($hop < 0) {
				$strip .= substr($$text, $i, length($$text) - $i);
				last;
			} elsif($hop > 0) {
				$strip .= substr($$text, $i, $hop - $i);
				$i = $hop;
			}

			($c, $cn) = unpack("aa", substr($$text, $i, 2));

			$isTag = 0;

			# =If tag opens
			$isTagStart = ($cn =~ /!|[a-z]/i) ? 1 : 0;
			if($isTagStart || ($cn eq "/")) { $isTag = 1; }

			if($isTag) {
				$ts = $i;																		# Tag start position 
				$te = $isTagStart ? tagEnd($text, $ts) : index($$text, ">", $ts);				# Tag end position

				if($te) {
					$tagBody = substr($$text, $ts, $te - $ts + 1);
					$tagName = $isTagStart ? ($tagBody =~ m/^<([a-z0-9]+)/i)[0] : ($tagBody =~ m/^<\/\s*([a-z]+)/i)[0];
					$tagName =~ tr/A-Z/a-z/;
				}

				if($te && $tagName) {
					# =Flags
					# Detecting whether the tag is single (self-closing) or double
					$isSingleClosed = 0;
					$isSingle = 0;

					if($isTagStart) {
						if(grep{$tagName eq $_} @singleTags) {
							$isSingle = 1;
						} elsif (substr($tagBody, length($tagBody) - 2, 1) eq "/") {
							$isSingle = 1;
							$isSingleClosed = 1;
						}
					}

					# Detecting wether this is space tag or not
					$isSpace = (grep{$tagName eq $_} @spaceTags) ? 1 : 0;

					# Detecting wether this is breaking tag or not
					$isBreaking = (grep{$tagName eq $_} @breakingTags) ? 1 : 0;

					# Tag Length
					$tl = $te - $ts + 1;

					# Updating the status for tags open
					if(($conf->{checkHTML} || $conf->{tagCloseOpen}) && !$isSingle) {
						if($isTagStart) {
							push(@tagsOpen, $tagName);
						} else {
							if($tagsOpen[$#tagsOpen] ne $tagName) {
								# HTML error
								$result->{error} = 1;
								if($conf->{logErrors}) { push(@{$result->{errorLog}}, {type=>"HTML_Parse", position=>$i, message=>"Found closing tag <$tagName> while waiting tag <" . $tagsOpen[$#tagsOpen] . "> to close!"}); }
							} else {
								pop(@tagsOpen);
							}
						}
					}

					# Eating tag content for some tags like <script>
					$tagContent = "";
					$cl = 0;
					if((grep{$tagName eq $_} @tagsToEat) && $isTagStart) {
						$cl = index($$text, "</$tagName>", $ts + $tl) - $ts - $tl;
						if($cl > 0) {
							$tagContent = substr($$text, $ts + $tl, $cl);
						} else {
							$cl = 0;
							$result->{error} = 1;

							if($conf->{logErrors}) { push(@{$result->{errorLog}}, {type=>"HTML_Parse", position=>$i, message=>"Can't find <$tagName> end!"}); }
						}
					}

					# Should I drop all the tags by default?
					my $dropTag = 0;
					if($conf->{tagsDenyAll} || $conf->{simpleXSS} && $tagName eq 'script') { $dropTag = 1; }

					# Checking deny list
					if(!$dropTag && defined($conf->{tagsDeny})) {
						if($conf->{tagsDeny}->{$tagName}) { $dropTag = 1; }
					}

					# Checking allow list
					if(defined($conf->{tagsAllow}) && $dropTag) {
						if($conf->{tagsAllow}->{$tagName}) { $dropTag = 0; }
					}

					# Nobr tag status
					if($tagName eq "nobr" && $isTagStart) {
						$nobrIsOpen = 1;
					} elsif(($tagName eq "nobr" && !$isTagStart) || (grep{$tagName eq $_} @breakingTags)) {
						$nobrIsOpen = 0;
					}

					# A tag status
					if($tagName eq "a" && $isTagStart) {
						$aIsOpen = 1;
					} elsif(($tagName eq "a" && !$isTagStart) || (grep{$tagName eq $_} @breakingTags)) {
						$aIsOpen = 0;
					}

					# =Final part
					if(!$dropTag) {
						# =Processing tags
						# Tag name to lower case
						if($conf->{tagNamesToLower}) {
							if($isTagStart) { $tagBody = "<" . $tagName . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
							else { $tagBody =~ tr/A-Z/a-z/; }
						}

						# Tag name to upper case
						if($conf->{tagNamesToUpper}) {
							if($isTagStart) { $tagBody = "<" . uc($tagName) . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
							else { $tagBody =~ tr/a-z/A-Z/; }
						}

						# =Tag parameters to lower or upper case
						if($isTagStart && ($conf->{tagAttributesToLower} || $conf->{tagAttributesToUpper})) {
							# Regular parameters
							my $tmp = "";

							while ($tagBody =~ m/([^\s]*\s*)(?:([a-z\r]+)(\s*)(?==)(=\s*))?/ig ) {
								$tmp .= $1 if ($1); if($conf->{tagAttributesToLower}) { if($2) { $tmp .= lc($2); } } else { if($2) { $tmp .= uc($2); } } $tmp .= $3 if ($3); $tmp .= $4 if ($4); $tmp .= $5 if ($5);
							}

							# Single parameters (like <checked>)
							if($conf->{tagAttributesToLower}) { $tagBody =~ s{(?<!=)( +([a-z]+))}{lc($1)}eig; }
							elsif($conf->{tagAttributesToUpper}) { $tagBody =~ s{(?<!=)( +([a-z]+))}{uc($1)}eig; }
						}

						# Simple XSS & tag attributes protection
						if($isTagStart && ($conf->{simpleXSS} || $conf->{tagsAllow}->{$tagName}->{validAttributes} || $conf->{tagsAllow}->{$tagName}->{invalidAttributes} || $conf->{tagsAllow}->{$tagName}->{denyAllAttributes} || $conf->{tagsDenyAllAttributes})) {
							$tagBody =~ s{(?<!<)(\s*)([a-z]+)([$sp]*=[$sp]*)("[^"]+"|[^$sp/>]+)} {
								my ($a, $b, $c, $d) = ($1||'', lc($2), $3, $4);
								if($conf->{simpleXSS} && ($b =~ /^on/ig || $d =~ /javascript|expression/ig)) {
								'';
								} elsif(($conf->{tagsDenyAllAttributes} || $conf->{tagsAllow}->{$tagName}->{denyAllAttributes} || ($conf->{tagsAllow}->{$tagName}->{invalidAttributes} && $conf->{tagsAllow}->{$tagName}->{invalidAttributes}->{$b}))
																		&& !(($conf->{tagsAllow}->{$tagName}->{validAttributes} && $conf->{tagsAllow}->{$tagName}->{validAttributes}->{$b})
																		|| $conf->{tagsAllow}->{$tagName}->{allowAllAttributes})
										) {
								'';
														} elsif($conf->{tagsAllow}->{$tagName}->{validAttributes} && !$conf->{tagsAllow}->{$tagName}->{validAttributes}->{$b}) {
								'';
								} else {
								$a . $b . $c . $d;
								}
							}eig;						
					}

					# Close single tag
					if($conf->{tagCloseSingle} && $isSingle && !$isSingleClosed) {
						if(substr($tagBody, length($tagBody) - 2, 1) ne "/") {
							if(substr($tagBody, length($tagBody) - 2, 1) ne " ") { substr($tagBody, length($tagBody) - 2, 1) .= " /"; } else { substr($tagBody, length($tagBody) - 2, 1) .= "/"; }
						}
					}

					# Quote attribute values
					if($conf->{tagQuoteValues} && $isTagStart) {
						my $tmp = "";
						#						1			   23  4		5		6
						while($tagBody =~ m/([<a-z0-9 >]+)?((=)(\s*)([^ >]+)([ >]+))?/ig) {
							$tmp .= $1 if($1);
							if($2) {
								$tmp .= $3 if($3);
								$tmp .= $4 if($4);
								if($5 && substr($5, 0, 1) ne '"' && substr($5, length($5) - 1, 1) ne '"') { $tmp .= "\"$5\""; } else { $tmp .= $5; }
								$tmp .= $6 if($6);
							}
						}

						$tagBody = $tmp;
					}

					# Unquote attribute values
					if($conf->{tagUnQuoteValues}) {
						$tagBody =~ s/([a-z]+)(\s*)(=)(\s*)"([^\=\s">]+)"/$1$2$3$4$5/ig;   #"
					}

					# Saving the tag
					push(@$tags, {name=>$tagName, body=>$tagBody, content=>$tagContent});

					# Forming flagSet
					#
					# byte3: _ _ _ _ _ _ _ isHref | byte2: _ _ _ _ _ _ isSpace isBreaking| byte1: _ _ _ _ _ p br| byte0: _ _ _ _ nobr isTagStart
					$flagSet3 = 0;
					if($aIsOpen) { $flagSet3 |= 1; }
					$flagSet2 = 0;
					if($isSpace) { $flagSet2 |= 2; }
					if($isBreaking) { $flagSet2 |= 1; }
					$flagSet1 = 0;
					if($tagName eq "br") { $flagSet1 |= 1; }
					if($tagName eq "p") { $flagSet1 |= 2; }
					$flagSet0 = 0;
					if($isTagStart) { $flagSet0 |= 1; }
					if($nobrIsOpen) { $flagSet0 |= 2; }

					# Planting the marker
					$strip .= "\x00\x0F" . chr($flagSet3) . chr($flagSet2) . chr($flagSet1) . chr($flagSet0) . "\x0F\x00";
				}

				# Moving the pointer (tag end position + content length)
				$i = $te + $cl;

				# Eating crs & lfs after dropped tag
				if($conf->{tagLf} && $dropTag) {
					while(1) {
						if(substr($$text, $i + 1, 1) eq "\r") { $i++; } elsif(substr($$text, $i + 1, 1) eq "\n") { $i++; last; } else { last }
					}
				}
			}
		} else {
			# This is not a tag, just add the "<" to result
			$strip .= $c;
		}

		last if($i == length($$text));
		$i++;
	}

	# Need to close all the open tags in the order of appearance
	if($conf->{'closeOpenTags'} && scalar @tagsOpen) {
		while(my $tag = pop @tagsOpen) {

		}
	}
}

# ==Find where tag ends
sub tagEnd($$$) {
		my ($text, $i) = @_;

		my $gotcha = 0;
		my $quote = 0;

		$i |= 0;

		while (1) {
			if (substr($$text, $i, 1) eq '"') { $quote ^= 1; }
			if (!$quote && substr($$text, $i, 1) eq '>') { $gotcha = $i; }
			last if ($i == length($$text) || $gotcha);

			$i++;
		}

	return $gotcha;
}

# ==Bring everything back to HTML
sub plantTags($$) {
	my ($class, $result) = @_;
	my $i = 0;
	my $max = length($strip);
	my $ctag = 0;
	my $step;

	while (1) {
		if($i < $max - 2 && substr($strip, $i, 2) eq "\x00\x0F") {
			$result->{text} .= $$tags[$ctag]->{body};
			if($$tags[$ctag]->{content}) { $result->{text} .= $$tags[$ctag]->{content}; }
			$i += $markLength;
			$ctag++;
		} else {
			if($i < $max - 2) { $step = index($strip, "\x00\x0F", $i) - $i; } else { $step = $max - $i; }
			if($step < 0) { $step = $max - $i; }

			if($step >= 0) {
				$result->{text} .= substr($strip, $i, $step);
				$i += $step;
			}
		}

		last if($i == $max);
	}

	# Should we close open tags?
	if($conf->{tagCloseOpen} && scalar @tagsOpen) {
		my $closeString;

		while(my $tag = pop @tagsOpen) {
			if($conf->{tagNamesToUpper}) { $tag = uc($tag); }
			$closeString .= '</' . $tag .'>';
		}

		if($closeString) { $result->{text} .= $closeString; }
	}
}

# ==Bring the text to plain mode==
sub vanish($$) {
	my($class, $text) = @_;

	$$text =~ s/&laquo;|&ldquo;|&bdquo;|&lsquo;|&sbquo;|&quot;|&raquo;|&rdquo;|&ldquo;|&rsquo;|&#171;|&#147;|&#132;|&#145;|&#130;|&#34;|&#187;|&#148;|&#146;|╚|⌠|└|▒|┌|"|╩|■|▓/"/ig;
	$$text =~ s/&nbsp;|&#160;|═/ /ig;
	$$text =~ s/&mdash;|&ndash;|&#151;|&#150;|≈|√/-/ig;
	$$text =~ s/&hellip;|&#133;|┘/.../ig;
	$$text =~ s/&copy;|&#169;|╘/(c)/ig;
	$$text =~ s/&reg;|&#174;|╝/(r)/ig;
	$$text =~ s/&frac14;|&#188;/1\/4/ig;
	$$text =~ s/&frac12;|&#189;/1\/2/ig;
	$$text =~ s/&frac34;|&#190;/3\/4/ig;
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsAllowString($$) {
	my($class, $string) = @_;

	return {tagsAllow=>{}, tagsDenyAllAttributes=>0} if(!$string);

	my $tagsAllow = {};
	my $tagsDenyAllAttributes = 0;

	# Should I deny all tag attributes by default?
	if(substr($string,0,1) eq '|') {
		$tagsDenyAllAttributes = 1;
		substr($string,0,1) = '';
	};

	# Parsing the Configuration String
	while($string =~ /([a-z:|]+)/ig) {
		my $tBody = $1;
		my ($tagName) = lc(($tBody =~ /^([a-z]+)/i)[0]);

		last if(!$tagName);

		my $attrList = ();
		$tagsAllow->{$tagName}->{val}=1;

		if($tBody =~ /^$tagName\|$/i) {
			$tagsAllow->{$tagName}->{denyAllAttributes}=1;
		} elsif($tBody =~ /^$tagName\:$/i) {
			$tagsAllow->{$tagName}->{allowAllAttributes}=1;
		} else {
			while($tBody =~ /:([a-z]+)/ig) {
				$tagsAllow->{$tagName}->{validAttributes}->{lc($1)}=1;
			}

			while($tBody =~ /\|([a-z]+)/ig) {
				if(!$tagsAllow->{$tagName}->{validAttributes}->{lc($1)}) {
					$tagsAllow->{$tagName}->{invalidAttributes}->{lc($1)}=1;
				}
			}
		}
	}

	return {tagsAllow=>$tagsAllow, tagsDenyAllAttributes=>$tagsDenyAllAttributes};
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsDenyString($$) {
	my($class, $string) = @_;

	return {} if(!$string);

	my $tagsDeny = {};
	while($string =~ /([a-z]+)/ig) {
			$tagsDeny->{$1}->{val}=1;
	}

	return $tagsDeny;
}

# ==Return the configuration hash==
sub getConf($) {
	return $conf;
}

return 1;perl-Jevix-0.9.7/Jevix/MakeupUtf.pm000064400000000000000000000642751133231612600171120ustar00rootroot00000000000000package Jevix::MakeupUtf;

# ==========================================
#
#  Jevix Version 0.9.7 (utf-8)
#
#  Developed by Igor Askarov
# 
#  Please send all suggestions to
#  Igor Askarov <juks@juks.ru>
#  http://www.jevix.ru/
#
#  Release date: 20/12/2008
#
# === Methods list==========================
#
#  new							the constructor
#  procces						entry sub
#  setConf						setting up the configuration
#  preset						presets selector
#  makeup						makeup the text
#  quotes						quotes processor
#  cuttags						tags processor
#  tagEnd						looking fo tag end
#  plantTags						sub to bring the tags back
#  vanish						sub to remove all the stuff and bring the text to plain mode
#  parseTagsAllowString					parse the tagsAllow string to hash
#  parseTagsDenyString					parse the tagsDeny string to hash
#  getConf						return configuration hash
#
# ==========================================

use strict;
use warnings;

#UTFMODE#
use Encode;
use utf8;
#UTFMODE#

my $markLength = 8;
my $strip;
my $result;
my $tags;
my @tagsOpen;
my $conf;

my @singleTags = qw/link input spacer img br hr/;
my @breakingTags = qw/p td div hr/;
my @spaceTags = qw/br/;
my @tagsToEat = qw/script style pre code/;

# ==The constructor
sub new {
	my Jevix::MakeupUtf $class = shift;

	return $class;
}

# ==Here we've got the input
sub process($$$) {
	my($class, $text, $userConf) = @_;

	#UTFMODE#
	$$text = decode("utf8", $$text);
	#UTFMODE#

	# If there is a configuration given we set it here
	$class->setConf($userConf) if($userConf);

	$strip = "";
	$tags = [];
	@tagsOpen = ();

	$result = {};
	$result->{error} = 0;
	$result->{errorLog} = [];

	if($conf->{vanish}) {
		$class->cuttags($text, {tagsDenyAll=>1}, $result);
		$class->vanish(\$strip);
		$result->{text} = $strip;
	} else {
		if(!$conf->{isHTML}) { $strip = $$text; } else { $class->cuttags($text, $conf, $result); }
		if($conf->{quotes}) { $class->quotes($conf); }
		$class->makeup($conf);

		$result->{text} = "";
		if($conf->{isHTML}) { $class->plantTags($result); } else { $result->{text} = $strip; }
	}

	#UTFMODE#
	Encode::_utf8_off($result->{text});
	#UTFMODE#

	return $result;
}

# ==Setting the configuration
sub setConf($$) {
	my($class, $userConf) = @_;

	$conf = $userConf ? $userConf : {presetBasic=>1};
	$class->preset();
}

# ==Choosing default setup when necessary
sub preset($$) {
	my ($class) = @_;

	if(!$conf || $conf->{presetBasic}) {
		$conf->{isHTML} = 1 if(!defined($conf->{isHTML}));								# HTML mode
		$conf->{lineBreaks} = 1 if(!defined($conf->{lineBreaks}));						# Linebreaks to <br/>
		$conf->{paragraphs} = 0 if(!defined($conf->{paragraphs}));						# Paragraphs
		$conf->{dashes} = 1 if(!defined($conf->{dashes}));								# Replace hyphens with dashes when necessary
		$conf->{dots} = 1 if(!defined($conf->{dots}));										# Replace 3 dots with a symbol
		$conf->{edgeSpaces} = 1 if(!defined($conf->{edgeSpaces}));						# Wipe edge space characters
		$conf->{multiSpaces} = 1 if(!defined($conf->{multiSpaces}));						# Wipe multispaces
		$conf->{redundantSpaces} = 1 if(!defined($conf->{redundantSpaces}));				# Wipe redundant spaces
		$conf->{compositeWordsLength} = 10 if(!defined($conf->{compositeWordsLength}));		# The maximim length of composite word to be put inside <nobr>
		$conf->{tagLf} = 1 if(!defined($conf->{tagLf}));								# Wipe crs and lfs after droppped tag
		$conf->{nbsp} = 1 if(!defined($conf->{nbsp}));										# Insert non-breaking spaces
		$conf->{quotes} = 1 if(!defined($conf->{quotes}));								# Makeup quotes
		$conf->{qaType} = 0 if(!defined($conf->{qaType}));								# Main quotes type
		$conf->{qbType} = 2 if(!defined($conf->{qbType}));								# Nested quotes type
		$conf->{misc} = 1 if(!defined($conf->{misc}));										# Misc substitutions
		$conf->{codeMode} = 2 if(!defined($conf->{codeMode}));								# The way jevix should represent html special characters
	}

	# If tagsAllow came as a string
	if(defined($conf->{tagsAllow}) && !ref($conf->{tagsAllow})) {
		my $tmp = $class->parseTagsAllowString($conf->{tagsAllow});
		$conf->{tagsAllow} = $tmp->{tagsAllow};
		$conf->{tagsDenyAllAttributes} = $tmp->{tagsDenyAllAttributes};
	}

	# If tagsDeny came as a string
	if(defined($conf->{tagsDeny}) && !ref($conf->{tagsDeny})) {
		$conf->{tagsDeny} = $class->parseTagsDenyString($conf->{tagsDeny});
	}
}

# ==Imposing clear text
sub makeup($$) {
	my ($class, $conf) = @_;

	# ==Misc
	# Prepositions
	my $prp_rus = "п╟|п╠п╣п╥|п╠п╣п╥п╬|п╡|п╡п╫п╣|п╡п╬|п╢п╟|п╢п╩я▐|п╢п╬|п╥п╟|п╦|п╦п╥|п╦п╥п╬|п╦п╩п╦|п╨|п╨п╟п╨|п╫п╟|п╫п╟п╢|п╫п╟п╢п╬|п╫п╣|п╫п╦|п╫п╬|п╬|п╬п╠|п╬п╠п╬|п╬п╨п╬п╩п╬|п╬я┌|п╬я┌п╬|п©п╬|п©п╬п╢|п©п╬п╢п╬|п©я─п╦|п©я─п╬|я│|я│п╨п╡п╬п╥я▄|я│п╬|я┐|я┤п╣я─п╣п╥";
	my $prp_eng = "aboard|about|above|absent|across|after|against|along|alongside|amid|amidst|among|amongst|around|as|astride|at|atop|before|behind|below|beneath|beside|besides|between|beyond|but|by|despite|down|during|except|following|for|from|in|inside|into|like|mid|minus|near|nearest|notwithstanding|of|off|on|onto|opposite|out|outside|over|past|re|round|save|since|than|through|throughout|till|to|toward|towards|under|underneath|unlike|until|up|upon|via|with|within|without";
	my $prp = "$prp_rus|$prp_eng";

	my $letters = "A-Za-zп░-п╞п╟-я▐п│я▒п≥п╧";				 # Characters
	my $cap_letters = "A-Zп░-п╞п│я▒";								 # Capital characters

	my $sp = " \xA0\t";														   # space class
	my $rt = "\r?\n";																 # cr class

	my $br = "\x00\x0F..[\x01\x03].\x0F\x00";				 # br tag
	my $pt = "\x00\x0F..[\x02].\x0F\x00";						 # Paragraph tag
	my $ps = "\x00\x0F..[\x02][\x01\x03]\x0F\x00";		# Paragraph start
	my $pe = "\x00\x0F..[\x02][\x02\x00]\x0F\x00";		# Paragraph end
	my $to = "\x00\x0F...[\x03\x01]\x0F\x00";				 # Opening tag
	my $tc = "\x00\x0F...[\x02\x00]\x0F\x00";				 # Closing tag
	my $bb = "\x00\x0F...[\x02\x03]\x0F\x00";				 # Tag where <nobr> is open
	my $nb = "\x00\x0F...[\x01\x00]\x0F\x00";				 # Tag where no <nobr> is open
	my $ao = "\x00\x0F[\x01]...\x0F\x00";						# Tag where <a> is open
	my $ac = "\x00\x0F[\x00]...\x0F\x00";						# Tag where no <a> is open				
	my $ts = "\x00\x0F";														  # Tag start
	my $te = "\x0F\x00";														  # Tag end

	my $brt = "<br *\/?>";														# br tag in text mode
	my $pst = "<p>";
	my $pet = "</p>";

	# Codes, metasymbols or what ever?
	my ($cdash, $cnbsp, $cdots, $cfracs, $ccopy, $creg);

	if(!$conf->{codeMode}) {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("Б─■", "б═", "Б─╕", "б╘", "б╝");
		$cfracs = {'1/4'=>"б╪", '1/2'=>"б╫", '3/4'=>"б╬"};
	} elsif($conf->{codeMode} == 1) {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("&#151;", "&#160;", "&#133;", "&#169;", "&#174;");
		$cfracs = {'1/4'=>"&#188;", '1/2'=>"&#189;", '3/4'=>"&#190;"};
	} else {
		($cdash, $cnbsp, $cdots, $ccopy, $creg) = ("&mdash;", "&nbsp;", "&hellip;", "&copy;", "&reg;");
		$cfracs = {'1/4'=>"&frac14;", '1/2'=>"&frac12;", '3/4'=>"&frac34;"};
	}

	# Wiping edge spaces
	if($conf->{edgeSpaces}) { $strip =~ s/^[$sp\r\n]*(.+?)[$sp\r\n]*$/$1/isg; }

	# Wiping spaces between tags (</td> </tr>)
	if($conf->{tagSpaces}) { $strip =~ s/($tc)[$sp]($tc)/$1$2/isg; }

	# Wiping multispaces
	if($conf->{multiSpaces}) { $strip =~ s/([$sp]){2,}/$1/ig; }

	# Wiping redundant spaces
	if($conf->{redundantSpaces}) { $strip =~ s{([$sp]+(?![:;]-[)(])([;:,.)?!]))|(\()(?<![:%;]-\()[$sp]+}{$1 ? $2 : $3}eig; } 

	if($conf->{nbsp}) {
		# Prepositions with &nbsp;
		$strip =~ s/(^|\x00|[$sp])($prp)[$sp]([0-9$letters])/$1$2$cnbsp$3/gm;

		# &nbsp; with digits
		$strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/([0-9]+)([$sp]+|&nbsp;|&#160;)(?:(?=[0-9]{2,})|(?=%))/$1$cnbsp/ig; "$a$b$c"; }eisg;
	}		

	# Put composite words inside <nobr>
	if($conf->{compositeWords}) { $strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3);
											$b =~ s{(^|[$sp\x00]|&nbsp;)([$letters]+(?:-[$letters]+)+)(?=$|[$sp\x00])}{
															my $d = !defined($1) ? "" : $1; my $e = !defined($2) ? "" : $2; my $f = !defined($3) ? "" : $3;
															if(length($e) <= $conf->{compositeWordsLength}) { "$d<nobr>$e<\/nobr>" } else {"$d$e$f"}
													}eig; "$a$b$c";
											}eisg; }

	# Links
	if($conf->{links}) {
		my $lAttributes = '';

		# Building <a> attributes string
		if($conf->{linksAttributes}) {
			my $q = !$conf->{tagUnQuoteValues} ? '"' : '';

			while (my ($attr, $value) = each(%{$conf->{linksAttributes}})) {
				$lAttributes .= ' ' if($lAttributes);
				$lAttributes .= "$attr=$q$value$q";
			}

			$lAttributes = ' ' . $lAttributes if($lAttributes);
		}

		$strip =~ s{(^|$ac|(?<=</a>))([^\x00]*?)(http://[^ \x00]+)(?<![,.!?])}{$1$2<a href="$3"$lAttributes>$3</a>}ig;
	}

	# Dots
	if($conf->{dots}) { $strip =~ s/\.{3}|Б─╕|&hellip;/$cdots/ig; }

	# Dashes
	if($conf->{dashes}) {
		# Hyphen
		$strip =~ s/([^$sp])([$sp]|&#160;|&nbsp;)(-{1,2}|Б─■|&mdash;|&#151;)/$1$cnbsp$cdash/ig;
		# "Speech" hyphen
		$strip =~ s/((?:^|$ps|$br|$brt(?:$rt)*|[$rt]))[$sp]*(?:&nbsp;)*(-{1,2}|Б─■|&mdash;|&#151;)[$sp]*(?:&nbsp;)*(.)/$1$cdash$cnbsp$3/ig;
	}

	# Misc stuff
	if($conf->{misc}) {
		# Fracs
		$strip =~ s{(?:(?<=[$sp\x00])|(?<=^))([13])/([24])(?:(?=[$sp\x00])|(?=$))}{if(defined($cfracs->{"$1/$2"})) { $cfracs->{"$1/$2"} } else { "$1/$2" } }esg;
		# Copyright & registered
		$strip =~ s{(?:(?<=[$sp\x00])|(?<=^))(\([cr]\)|&copy;|б╘)(?:(?=[$sp\x00?!;.,])|(?=$))}{ if((lc($1) eq "(c)") || (lc($1) eq "&copy;") || ($1 eq "б╘")) {$ccopy} elsif((lc($1) eq "(r)") || (lc($1) eq "&reg;") || ($1 eq "б╝")) {$creg} else { $2 } }eig;
	}

	# Paragraphs
	if($conf->{paragraphs}) { $strip =~ s{(^|$pe(?:$rt$rt)?|$rt$rt)(?!$ps)(.+?)($br)?($brt)?(?<!$pe)(?:(?=$)|(?=$rt$rt)|(?=$ps))}{ my ($a, $b, $c) = ($1,$2,$3||""); (($b =~ /^[ \r\n]+$/) || ($b =~ /^(<br *\/?>|$br)+$/)) ? "$a$b$c" : "$a<p>$b</p>";}eisg; }

	# Line break
	if($conf->{lineBreaks}) { $strip =~ s/(?<!$pt)(?<!$br)(?<!$br\r)(?<!$pe\r\n\r\n)(?<!$pe\n\n)(?<!$pe\r\n)(?<!$pe\n)(?<!$pe\r)(?<!$pe)(?<!$pet\r\n\r\n)(?<!$pet\r\n\r)(?<!$pet\n\n)(?<!$pet\r\n)(?<!$pet\n)(?<!$pet\r)(?<!$pet)(?<!$pst)($rt)(?!$brt)(?!$ts)/<br \/>$1/isg; }
}

# ==impose quotes
sub quotes($$) {
	my ($class, $conf) = @_;

	my $i;
	my ($a_open, $b_open) = (0,0);
	my ($cp, $c, $cn, $cn_is_sp, $cp_is_sp) = ('', '', '', 0, 0);
	my ($qaStart, $qaEnd, $qbStart, $qbEnd);
	my (@qs, @qe, @qs_ansi, @qe_ansi, @qs_html, @qe_html, @qs_ent, @qe_ent,);

	# space class
	my $sp =" \t\xA0";

	# characters
	my $letters = "A-Za-zп░-п╞п╟-я▐п│я▒п≥п╧";

	@qs_ansi = ("б╚", "Б─°", "Б─·", "Б─≤", "Б─ ", '"');		
	@qe_ansi = ("б╩", "Б─²", "Б─°", "Б─≥", "Б─≤", '"');
	@qs_html = ("&#171;", "&#147;", "&#132;", "&#145;", "&#130;", "&#34;");
	@qe_html = ("&#187;", "&#148;", "&#147;", "&#146;", "&#145;", "&#34;");
	#				<<				``				..				`				.				"
	@qs_ent = ("&laquo;",		"&ldquo;",		"&bdquo;",		"&lsquo;",		"&sbquo;",		"&quot;");		
	#				>>				''				''				'				`				"
	@qe_ent = ("&raquo;", 		"&rdquo;", 		"&ldquo;", 		"&rsquo;", 		"&lsquo;", 		"&quot;");

	# Quotes collection
	if(!$conf->{codeMode}) {
		@qs = @qs_ansi; @qe = @qe_ansi;
	} elsif ($conf->{codeMode} == 1) {
		@qs = @qs_html; @qe = @qe_html;
	} else {
			@qs = @qs_ent; @qe = @qe_ent;
	}

	# Getting configuration setting
	$conf->{qaType} ||= 0;
	$conf->{qbType} ||= 1;
	$conf->{qaType} = ($conf->{qaType} >= 0 && $conf->{qaType} <= 5) ? $conf->{qaType} : 0;
	$conf->{qbType} = ($conf->{qbType} >= 0 && $conf->{qbType} <= 5) ? $conf->{qbType} : 1;

	# Selecting quotes as requested by user
	($qaStart, $qaEnd) = ($qs[$conf->{qaType}], $qe[$conf->{qaType}]);
	($qbStart, $qbEnd) = ($qs[$conf->{qbType}], $qe[$conf->{qbType}]);

	# Resetting all the quotes inside text to <">
	my $qa = join('|', @qs_ansi) . '|' . join('|', @qe_ansi) . '|' . join('|', @qs_html) . '|' . join('|', @qe_html) . '|' . join('|', @qs_ent) . '|' . join('|', @qe_ent);
	$strip =~ s/(?:(?:(?<=[^$letters])|(?<=^))($qa))|(?:($qa)(?:(?=[^$letters])|(?=$)))/\"/ig;

	my $spread = 1;
	my $mv = 0;
	my $mvn = 0;
	my @st;
	$i = 0;
	my $skip = 0;
	my @space;				  # Space tags flag
	my @break;				  # Text break flags

	$st[$_] = '' foreach(0..$spread + 1);
	$space[$_] = 0 foreach(0 + 1..$spread + 1);
	$break[$_] = 0 foreach(0 + 1..$spread + 1);
	$space[0] = 1;
	$break[0] = 1;

	while(1) {
		# Skipping tags
		foreach(0..$spread) {
			do {
				$skip = 0;
				if($i + $_ + $mv <= length($strip)) {
					if($i + $_ + $mv + 1 < length($strip)) {
						if((substr($strip, $i + $_ + $mv, 1) eq "\x00") && (substr($strip, $i + $_ + $mv + 1, 1) eq "\x0F")) {
							$space[$_ + 1] |= (ord(substr($strip, $i + $_ + $mv + 2, 1)) & 2) >> 1;
							$break[$_ + 1] |= ord(substr($strip, $i + $_ + $mv + 2, 1)) & 1;
							$mv += $markLength;
							if(!$_) { $mvn = $mv; }
							$st[$_ + 1] = "";
							$skip = 1;
						}
					}
					if(!$skip) { $st[$_ + 1] = substr($strip, $i + $_ + $mv, 1); }
				} 
			} while($skip);
		}

		$i += $mvn;
		$mv = 0;
		$mvn = 0;

		($cp, $c, $cn) = ($st[0], $st[1], $st[2]);
		$cp_is_sp = (($cp =~ /[^0-9$letters]/) || $space[0] || $space[1] || $break[0] || !$i) ? 1 : 0;
		$cn_is_sp = (($cn =~ /[^0-9$letters]/) || $space[2] || $break[2] || $cn eq '') ? 1 : 0;

		# Reset state if breaking tag appears
		if($break[1] || $i == length($strip)) {
			if($a_open || $b_open) {
				# Log quote error if appears
				if($conf->{logErrors}) {
					my $quoteErrSampleLength = 100;
					my $z = $i - 1;
					my $y;
					while(1) {
						if(substr($strip, $z, 1) eq " " || substr($strip, $z, 1) eq "\xA0" || !$z) { if($i-$z <= $quoteErrSampleLength) {$y = $z}}
						last if(!$z);
						$z--;
					}
					my $sample = substr($strip, $y, ($i - $y));
					$sample =~ s/\x00\x0F[^\x0F]+\x0F\x00//g;
					$sample =~ s/<\/?[a-z]+.*?>//g;
					push(@{$result->{errorLog}}, {type=>"Quote_error", message=>"Quote mismatch near [$sample]<--"});
					$result->{error} = 1;
				}
			}

			$a_open = 0;
			$b_open = 0;
		}

	if($c eq '"') {
		if(!$a_open) {
			$a_open = 1;
			substr($strip, $i, 1) = $qaStart;
			$i += length($qaStart) - 1;
		} elsif ($a_open && (($i == length($strip) - 1) || (!$b_open && $cn_is_sp))) {
			$a_open = 0;
			substr($strip, $i, 1) = $qaEnd;
			$i += length($qaEnd) - 1;
		} elsif ($a_open && !$b_open) {
			$b_open = 1;
			substr($strip, $i, 1) = $qbStart;
			$i += length($qbStart) - 1;
		} elsif ($a_open && $b_open) {
			$b_open = 0;
			substr($strip, $i, 1) = $qbEnd;
			$i += length($qbEnd) - 1;
		}
	}

		last if($i == length($strip));

		$st[0] = $st[1];
		$space[0] = $space[1];
		$break[0] = $break[1];
		$space[$_] = 0 foreach(0 + 1..$spread + 1);
		$break[$_] = 0 foreach(0 + 1..$spread + 1);
		$i++;
	}
}

# ==Cutting the tags away
sub cuttags($$$$) {
		my($class, $text, $conf, $result) = @_;

		# loop counter
		my $i = 0;
		# Jump length
		my $hop;
		# current & next character
		my ($c, $cn);
		# tag length, tag dimensions, tag name, tag body text, single tag flag, content inside the tag
		my ($tl, $ts, $te, $cl, $tagName, $tagBody, $tagContent);
		# some useful flags
		my ($isTag, $isTagStart, $isSingle, $isSingleClosed, $isSpace, $isBreaking, $nobrIsOpen, $aIsOpen, $flagSet3, $flagSet2, $flagSet1, $flagSet0);

		# space class
		my $sp =" \t\xA0";

		while(1) {
			$hop = index($$text, "<", $i);

			if($hop < 0) {
				$strip .= substr($$text, $i, length($$text) - $i);
				last;
			} elsif($hop > 0) {
				$strip .= substr($$text, $i, $hop - $i);
				$i = $hop;
			}

			($c, $cn) = unpack("aa", substr($$text, $i, 2));

			$isTag = 0;

			# =If tag opens
			$isTagStart = ($cn =~ /!|[a-z]/i) ? 1 : 0;
			if($isTagStart || ($cn eq "/")) { $isTag = 1; }

			if($isTag) {
				$ts = $i;																		# Tag start position 
				$te = $isTagStart ? tagEnd($text, $ts) : index($$text, ">", $ts);				# Tag end position

				if($te) {
					$tagBody = substr($$text, $ts, $te - $ts + 1);
					$tagName = $isTagStart ? ($tagBody =~ m/^<([a-z0-9]+)/i)[0] : ($tagBody =~ m/^<\/\s*([a-z]+)/i)[0];
					$tagName =~ tr/A-Z/a-z/;
				}

				if($te && $tagName) {
					# =Flags
					# Detecting whether the tag is single (self-closing) or double
					$isSingleClosed = 0;
					$isSingle = 0;

					if($isTagStart) {
						if(grep{$tagName eq $_} @singleTags) {
							$isSingle = 1;
						} elsif (substr($tagBody, length($tagBody) - 2, 1) eq "/") {
							$isSingle = 1;
							$isSingleClosed = 1;
						}
					}

					# Detecting wether this is space tag or not
					$isSpace = (grep{$tagName eq $_} @spaceTags) ? 1 : 0;

					# Detecting wether this is breaking tag or not
					$isBreaking = (grep{$tagName eq $_} @breakingTags) ? 1 : 0;

					# Tag Length
					$tl = $te - $ts + 1;

					# Updating the status for tags open
					if(($conf->{checkHTML} || $conf->{tagCloseOpen}) && !$isSingle) {
						if($isTagStart) {
							push(@tagsOpen, $tagName);
						} else {
							if($tagsOpen[$#tagsOpen] ne $tagName) {
								# HTML error
								$result->{error} = 1;
								if($conf->{logErrors}) { push(@{$result->{errorLog}}, {type=>"HTML_Parse", position=>$i, message=>"Found closing tag <$tagName> while waiting tag <" . $tagsOpen[$#tagsOpen] . "> to close!"}); }
							} else {
								pop(@tagsOpen);
							}
						}
					}

					# Eating tag content for some tags like <script>
					$tagContent = "";
					$cl = 0;
					if((grep{$tagName eq $_} @tagsToEat) && $isTagStart) {
						$cl = index($$text, "</$tagName>", $ts + $tl) - $ts - $tl;
						if($cl > 0) {
							$tagContent = substr($$text, $ts + $tl, $cl);
						} else {
							$cl = 0;
							$result->{error} = 1;

							if($conf->{logErrors}) { push(@{$result->{errorLog}}, {type=>"HTML_Parse", position=>$i, message=>"Can't find <$tagName> end!"}); }
						}
					}

					# Should I drop all the tags by default?
					my $dropTag = 0;
					if($conf->{tagsDenyAll} || $conf->{simpleXSS} && $tagName eq 'script') { $dropTag = 1; }

					# Checking deny list
					if(!$dropTag && defined($conf->{tagsDeny})) {
						if($conf->{tagsDeny}->{$tagName}) { $dropTag = 1; }
					}

					# Checking allow list
					if(defined($conf->{tagsAllow}) && $dropTag) {
						if($conf->{tagsAllow}->{$tagName}) { $dropTag = 0; }
					}

					# Nobr tag status
					if($tagName eq "nobr" && $isTagStart) {
						$nobrIsOpen = 1;
					} elsif(($tagName eq "nobr" && !$isTagStart) || (grep{$tagName eq $_} @breakingTags)) {
						$nobrIsOpen = 0;
					}

					# A tag status
					if($tagName eq "a" && $isTagStart) {
						$aIsOpen = 1;
					} elsif(($tagName eq "a" && !$isTagStart) || (grep{$tagName eq $_} @breakingTags)) {
						$aIsOpen = 0;
					}

					# =Final part
					if(!$dropTag) {
						# =Processing tags
						# Tag name to lower case
						if($conf->{tagNamesToLower}) {
							if($isTagStart) { $tagBody = "<" . $tagName . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
							else { $tagBody =~ tr/A-Z/a-z/; }
						}

						# Tag name to upper case
						if($conf->{tagNamesToUpper}) {
							if($isTagStart) { $tagBody = "<" . uc($tagName) . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
							else { $tagBody =~ tr/a-z/A-Z/; }
						}

						# =Tag parameters to lower or upper case
						if($isTagStart && ($conf->{tagAttributesToLower} || $conf->{tagAttributesToUpper})) {
							# Regular parameters
							my $tmp = "";

							while ($tagBody =~ m/([^\s]*\s*)(?:([a-z\r]+)(\s*)(?==)(=\s*))?/ig ) {
								$tmp .= $1 if ($1); if($conf->{tagAttributesToLower}) { if($2) { $tmp .= lc($2); } } else { if($2) { $tmp .= uc($2); } } $tmp .= $3 if ($3); $tmp .= $4 if ($4); $tmp .= $5 if ($5);
							}

							# Single parameters (like <checked>)
							if($conf->{tagAttributesToLower}) { $tagBody =~ s{(?<!=)( +([a-z]+))}{lc($1)}eig; }
							elsif($conf->{tagAttributesToUpper}) { $tagBody =~ s{(?<!=)( +([a-z]+))}{uc($1)}eig; }
						}

						# Simple XSS & tag attributes protection
						if($isTagStart && ($conf->{simpleXSS} || $conf->{tagsAllow}->{$tagName}->{validAttributes} || $conf->{tagsAllow}->{$tagName}->{invalidAttributes} || $conf->{tagsAllow}->{$tagName}->{denyAllAttributes} || $conf->{tagsDenyAllAttributes})) {
							$tagBody =~ s{(?<!<)(\s*)([a-z]+)([$sp]*=[$sp]*)("[^"]+"|[^$sp/>]+)} {
								my ($a, $b, $c, $d) = ($1||'', lc($2), $3, $4);
								if($conf->{simpleXSS} && ($b =~ /^on/ig || $d =~ /javascript|expression/ig)) {
								'';
								} elsif(($conf->{tagsDenyAllAttributes} || $conf->{tagsAllow}->{$tagName}->{denyAllAttributes} || ($conf->{tagsAllow}->{$tagName}->{invalidAttributes} && $conf->{tagsAllow}->{$tagName}->{invalidAttributes}->{$b}))
																		&& !(($conf->{tagsAllow}->{$tagName}->{validAttributes} && $conf->{tagsAllow}->{$tagName}->{validAttributes}->{$b})
																		|| $conf->{tagsAllow}->{$tagName}->{allowAllAttributes})
										) {
								'';
														} elsif($conf->{tagsAllow}->{$tagName}->{validAttributes} && !$conf->{tagsAllow}->{$tagName}->{validAttributes}->{$b}) {
								'';
								} else {
								$a . $b . $c . $d;
								}
							}eig;						
					}

					# Close single tag
					if($conf->{tagCloseSingle} && $isSingle && !$isSingleClosed) {
						if(substr($tagBody, length($tagBody) - 2, 1) ne "/") {
							if(substr($tagBody, length($tagBody) - 2, 1) ne " ") { substr($tagBody, length($tagBody) - 2, 1) .= " /"; } else { substr($tagBody, length($tagBody) - 2, 1) .= "/"; }
						}
					}

					# Quote attribute values
					if($conf->{tagQuoteValues} && $isTagStart) {
						my $tmp = "";
						#						1			   23  4		5		6
						while($tagBody =~ m/([<a-z0-9 >]+)?((=)(\s*)([^ >]+)([ >]+))?/ig) {
							$tmp .= $1 if($1);
							if($2) {
								$tmp .= $3 if($3);
								$tmp .= $4 if($4);
								if($5 && substr($5, 0, 1) ne '"' && substr($5, length($5) - 1, 1) ne '"') { $tmp .= "\"$5\""; } else { $tmp .= $5; }
								$tmp .= $6 if($6);
							}
						}

						$tagBody = $tmp;
					}

					# Unquote attribute values
					if($conf->{tagUnQuoteValues}) {
						$tagBody =~ s/([a-z]+)(\s*)(=)(\s*)"([^\=\s">]+)"/$1$2$3$4$5/ig;   #"
					}

					# Saving the tag
					push(@$tags, {name=>$tagName, body=>$tagBody, content=>$tagContent});

					# Forming flagSet
					#
					# byte3: _ _ _ _ _ _ _ isHref | byte2: _ _ _ _ _ _ isSpace isBreaking| byte1: _ _ _ _ _ p br| byte0: _ _ _ _ nobr isTagStart
					$flagSet3 = 0;
					if($aIsOpen) { $flagSet3 |= 1; }
					$flagSet2 = 0;
					if($isSpace) { $flagSet2 |= 2; }
					if($isBreaking) { $flagSet2 |= 1; }
					$flagSet1 = 0;
					if($tagName eq "br") { $flagSet1 |= 1; }
					if($tagName eq "p") { $flagSet1 |= 2; }
					$flagSet0 = 0;
					if($isTagStart) { $flagSet0 |= 1; }
					if($nobrIsOpen) { $flagSet0 |= 2; }

					# Planting the marker
					$strip .= "\x00\x0F" . chr($flagSet3) . chr($flagSet2) . chr($flagSet1) . chr($flagSet0) . "\x0F\x00";
				}

				# Moving the pointer (tag end position + content length)
				$i = $te + $cl;

				# Eating crs & lfs after dropped tag
				if($conf->{tagLf} && $dropTag) {
					while(1) {
						if(substr($$text, $i + 1, 1) eq "\r") { $i++; } elsif(substr($$text, $i + 1, 1) eq "\n") { $i++; last; } else { last }
					}
				}
			}
		} else {
			# This is not a tag, just add the "<" to result
			$strip .= $c;
		}

		last if($i == length($$text));
		$i++;
	}

	# Need to close all the open tags in the order of appearance
	if($conf->{'closeOpenTags'} && scalar @tagsOpen) {
		while(my $tag = pop @tagsOpen) {

		}
	}
}

# ==Find where tag ends
sub tagEnd($$$) {
		my ($text, $i) = @_;

		my $gotcha = 0;
		my $quote = 0;

		$i |= 0;

		while (1) {
			if (substr($$text, $i, 1) eq '"') { $quote ^= 1; }
			if (!$quote && substr($$text, $i, 1) eq '>') { $gotcha = $i; }
			last if ($i == length($$text) || $gotcha);

			$i++;
		}

	return $gotcha;
}

# ==Bring everything back to HTML
sub plantTags($$) {
	my ($class, $result) = @_;
	my $i = 0;
	my $max = length($strip);
	my $ctag = 0;
	my $step;

	while (1) {
		if($i < $max - 2 && substr($strip, $i, 2) eq "\x00\x0F") {
			$result->{text} .= $$tags[$ctag]->{body};
			if($$tags[$ctag]->{content}) { $result->{text} .= $$tags[$ctag]->{content}; }
			$i += $markLength;
			$ctag++;
		} else {
			if($i < $max - 2) { $step = index($strip, "\x00\x0F", $i) - $i; } else { $step = $max - $i; }
			if($step < 0) { $step = $max - $i; }

			if($step >= 0) {
				$result->{text} .= substr($strip, $i, $step);
				$i += $step;
			}
		}

		last if($i == $max);
	}

	# Should we close open tags?
	if($conf->{tagCloseOpen} && scalar @tagsOpen) {
		my $closeString;

		while(my $tag = pop @tagsOpen) {
			if($conf->{tagNamesToUpper}) { $tag = uc($tag); }
			$closeString .= '</' . $tag .'>';
		}

		if($closeString) { $result->{text} .= $closeString; }
	}
}

# ==Bring the text to plain mode==
sub vanish($$) {
	my($class, $text) = @_;

	$$text =~ s/&laquo;|&ldquo;|&bdquo;|&lsquo;|&sbquo;|&quot;|&raquo;|&rdquo;|&ldquo;|&rsquo;|&#171;|&#147;|&#132;|&#145;|&#130;|&#34;|&#187;|&#148;|&#146;|б╚|Б─°|Б─·|Б─≤|Б─ |"|б╩|Б─²|Б─≥/"/ig;
	$$text =~ s/&nbsp;|&#160;|б═/ /ig;
	$$text =~ s/&mdash;|&ndash;|&#151;|&#150;|Б─■|Б─⌠/-/ig;
	$$text =~ s/&hellip;|&#133;|Б─╕/.../ig;
	$$text =~ s/&copy;|&#169;|б╘/(c)/ig;
	$$text =~ s/&reg;|&#174;|б╝/(r)/ig;
	$$text =~ s/&frac14;|&#188;|б╪/1\/4/ig;
	$$text =~ s/&frac12;|&#189;|б╫/1\/2/ig;
	$$text =~ s/&frac34;|&#190;|б╬/3\/4/ig;
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsAllowString($$) {
	my($class, $string) = @_;

	return {tagsAllow=>{}, tagsDenyAllAttributes=>0} if(!$string);

	my $tagsAllow = {};
	my $tagsDenyAllAttributes = 0;

	# Should I deny all tag attributes by default?
	if(substr($string,0,1) eq '|') {
		$tagsDenyAllAttributes = 1;
		substr($string,0,1) = '';
	};

	# Parsing the Configuration String
	while($string =~ /([a-z:|]+)/ig) {
		my $tBody = $1;
		my ($tagName) = lc(($tBody =~ /^([a-z]+)/i)[0]);

		last if(!$tagName);

		my $attrList = ();
		$tagsAllow->{$tagName}->{val}=1;

		if($tBody =~ /^$tagName\|$/i) {
			$tagsAllow->{$tagName}->{denyAllAttributes}=1;
		} elsif($tBody =~ /^$tagName\:$/i) {
			$tagsAllow->{$tagName}->{allowAllAttributes}=1;
		} else {
			while($tBody =~ /:([a-z]+)/ig) {
				$tagsAllow->{$tagName}->{validAttributes}->{lc($1)}=1;
			}

			while($tBody =~ /\|([a-z]+)/ig) {
				if(!$tagsAllow->{$tagName}->{validAttributes}->{lc($1)}) {
					$tagsAllow->{$tagName}->{invalidAttributes}->{lc($1)}=1;
				}
			}
		}
	}

	return {tagsAllow=>$tagsAllow, tagsDenyAllAttributes=>$tagsDenyAllAttributes};
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsDenyString($$) {
	my($class, $string) = @_;

	return {} if(!$string);

	my $tagsDeny = {};
	while($string =~ /([a-z]+)/ig) {
			$tagsDeny->{$1}->{val}=1;
	}

	return $tagsDeny;
}

# ==Return the configuration hash==
sub getConf($) {
	return $conf;
}

return 1;perl-Jevix-0.9.7/changelog.txt000064400000000000000000000047321133231612600162460ustar00rootroot00000000000000
  Jevix Version 0.9.7 (utf-8 & windows1251)

  Developed by Igor Askarov

  Please send all bug reports and suggestions to
  Igor Askarov <juks@juks.ru>
  http://www.jevix.ru/

  Release date: 20/02/2008

  Changelog:
  [0.9.7]
	  18.02.2009: now we are able to close unclosed tags
	  19.01.2009: tags "pre" and "code" now remain untouched
	  19.01.2009: "tagQuoteValues" bug fixed processing tags like "<h1>"
          23.12.2008: configuration key for using "vanish" function added
  [0.9.6] 
	  20.12.2008: dashes bugfix by Andrew Hitrov (Rambler)
          20.12.2008: configuration methods improved
          20.12.2008: test script is now the part of the distribution
          20.12.2008: improved implementation of <a> tags
	  20.12.2008: major tag & tag properties filter bugs fixed
	  04.03.2008: automatic implementation of <a> tags added
  [0.9.5]
          20.01.2008: tag & tag properties filter logic has been improved 
	  20.01.2008: advanced configuration string parser added to base class
          15.01.2008: minor bugs fixed
          20.12.2007: XSS protection and tag & tag properties filter added
  [0.9.4]
          10.05.2007: improved <br> markup
          07.05.2007: improved quotes markup
          07.05.2007: improved paragraph markup
          03.05.2007: UTF-8 and Windows-1251 versions are now available
          24.04.2007: added "vanish" function (brings the text to plain mode)
          19.04.2007: fixed several bugs in quotes function
          17.04.2007: improved paragraph markup
          12.04.1007: several minor bugs fixed
          10.04.2007: added "quote mismatch" logging
                      improved quote mismatch processing
  [0.9.3]
          05.04.2007: improved non-breaking spaces markup logic (between digits)
          03.04.2007: improved hyphen markup logic
          02.04.2007: added misc substututions (fracs, (c)->&copy, etc)
  [0.9.2]
          27.03.2007: added capability for putting composite words inside <nobr> tag
          23.03.2007: added removal for crs & lfs after dropped tags
                      added removal for "between-tags" spaces (like </td> </tr>)
          22.03.2007: fixed bug: br markup (\n right after tag)
                      fixed bug: script-like tags processing
  [0.9.1]
          20.03.2007: improved cuttags function perfomance
                      improved paragraph markup rules
                      added removal for edge spaces
          16.03.2007: improved quotes logic for a case like: "hello "world" "
perl-Jevix-0.9.7/readme.txt000064400000000000000000000077751133231612600155660ustar00rootroot00000000000000    бЕПЯХЪ 0.9.7 perl-ЛНДСКЪ Jevix::Makeup

    яНДЕПФЮМХЕ:
    -----------
		[1] вЕЛ ЪБКЪЕРЯЪ Jevix Х ВЕЛ НМ МЕ ЪБКЪЕРЯЪ?
		[2] хЯРНПХЪ Jevix
		[3] яНЯРЮБ ОЮЙЕРЮ
		[4] дНЙСЛЕМРЮЖХЪ
		[5] оКЮМШ
		[6] юБРНПЯЙХЕ ОПЮБЮ

[1] вЕЛ ЪБКЪЕРЯЪ Jevix Х ВЕЛ НМ МЕ ЪБКЪЕРЯЪ?
    ----------------------------------------

    дЮБМН УНРЕКНЯЭ БМЕЯРХ ЪЯМНЯРЭ Б ЩРНЛ БНОПНЯЕ, ПЮЯЯЙЮГЮРЭ ГЮВЕЛ АШК
    ОПХДСЛЮМ Jevix Х ЙЮЙНБН ЕЦН ЛЕЯРН ЯПЕДХ ДПСЦХУ ОНУНФХУ ОПНЕЙРНБ. хРЮЙ:

    > Jevix ЪБКЪЕРЯЪ ЛНЫМШЛ Х СДНАМШЛ Б ОПХЛЕМЕМХХ ЯПЕДЯРБНЛ ДКЪ ПЮАНРШ
    Б ЯНЯРЮБЕ ЙПСОМШУ ХМРЕПМЕР-ОПНЕЙРНБ, ОПЕДНЯРЮБКЪЧЫХУ ДНЯРСО ЬХПНЙНИ
    ЮСДХРНПХХ Й РЕЙЯРНБШЛ ЛЮРЕПХЮКЮЛ, ОПНЯЛЮРПХБЮЕЛШЛ ХКХ ПЕДЮЙРХПСЕЛШЛ
    МЕНЦПЮМХВЕММШЛ ЙПСЦНЛ ОНКЭГНБЮРЕКЕИ.

    > Jevix ЪБКЪЕРЯЪ ЯПЕДЯРБНЛ ДКЪ ОПХЛЕМЕМХЪ ОПЮБХК МЮАНПЮ (РХОНЦПЮТХЙХ)
    РЕЙЯРНБ

    > Jevix ЪБКЪЕРЯЪ ЯПЕДЯРБНЛ АНПЭАШ Я XSS-ЮРЮЙЮЛХ, ЯЙПШРШЛХ Б HTML-ЙНДЕ

    > Jevix ЪБКЪЕРЯЪ ОПНЕЙРНЛ, ЙНРНПШИ ОНЯРНЪММН ПЮГБХБЮЕРЯЪ Х ЯРЮМНБХРЯЪ
    КСВЬЕ

    > Jevix ОНКМНЯРЭЧ ТСМЙЖХНМЮКЕМ ОПХ ПЮАНРЕ ЙЮЙ Я ОПНЯРШЛХ РЕЙЯРЮЛХ, РЮЙ
    Х c HTML-ДНЙСЛЕМРЮЛХ

    > Jevix МЕ ЪБКЪЕРЯЪ ОЮМЮЖЕЕИ ДКЪ ЩЯРЕРНБ. оПНЕЙР МХЙНЦДЮ МЕ ОНГХЖХНМХ-
    ПНБЮКЯЪ ЙЮЙ ЯПЕДЯРБН СДНБКЕРБНПЕМХЪ КЧДЕИ, НЯМНБМНЕ ФХГМЕММНЕ
    ОПЕДМЮГМЮВЕМХЕ ЙНРНПШУ, Б ЯХКС ЯНБПЕЛЕММШУ РЕМДЕМЖХИ, ГЮЙКЧВЮЕРЯЪ
    Б ДНЯЙНМЮКЭМН ОПЮБХКЭМНЛ НТНПЛКЕМХХ ЛЮРЕПХЮКНБ ЯБНЕЦН СЛЯРБЕММНЦН РПСДЮ,
    МЕФЕКХ Б МЮОНКМЕМХХ ЩРХУ ЯЮЛШУ ЛЮРЕПХЮКНБ ЙЮЙХЛ-КХАН ЯЛШЯКНЛ,
    ХМРЕПЕЯМШЛ НЙПСФЮЧЫХЛ.

    Jevix МЕ ОШРЮЕРЯЪ ГЮЛЕМХРЭ КЧАШЕ ЩКЕЛЕМРШ РЕЙЯРЮ, ЯЙНКЭ СЦНДМН ДЮКЕЙН
    МЮОНЛХМЮЧЫХЕ РХОНЦПЮТХЙС МЮ ХУ ХЯРХММШЕ ЩЙБХБЮКЕМРШ:
    ОПЮЙРХЙЮ ЩЙЯОКСЮРЮЖХХ Б ЯНЯРЮБЕ ПЕЮКЭМШУ ОПНЕЙРНБ ОНЙЮГЮКЮ, ВРН ЩРН
    ЛНФЕР ОПХБЕЯРХ Й ЮАЯСПДС. рЕЛ МЕ ЛЕМЕЕ, ОПЮБХКЮ ОПХЛЕМЕМХЪ РХОНЦПЮТХЙХ
    АСДСР ЯНБЕПЬЕМЯРБНБЮРЭЯЪ НР БЕПЯХХ Й БЕПЯХХ.

    > Jevix МЕ ЪБКЪЕРЯЪ НАПЮГЖНЛ ЛЕУЮМХГЛЮ БШЯНЙНИ ОПНХГБНДХРЕКЭМНЯРХ.
    й ЯНФЮКЕМХЧ, ОПНЕЙР ГЮРЕБЮКЯЪ ДКЪ АНКЕЕ ЯЙПНЛМШУ ГЮДЮВ МЕФЕКХ РЕ,
    ЙНРНПШЕ НМ ПЕЬЮЕР ЯЕИВЮЯ. нДМЮЙН, ГЮ БЯ╦ БПЕЛЪ ЩЙЯОКСЮРЮЖХХ ОПНЕЙРЮ
    МХЙЮЙХУ ФЮКНА МЮ ОПНХГБНДХРЕКЭМНЯРЭ МЕ ОНЯРСОЮКН.

[2] хЯРНПХЪ Jevix
    -------------

    хДЕЪ ПЮГПЮАНРЮРЭ ЯБН╦ ЯПЕДЯРБН ДКЪ ОПХЛЕМЕМХЪ ОПЮБХК МЮАНПЮ РЕЙЯРНБ Х
    ТХКЭРПЮЖХХ HTML-РЕЦНБ ОНЪБХКЮЯЭ С ЛЕМЪ Б 2004 ЦНДС, БН БПЕЛЪ ПЮАНРШ
    МЮД ОПНЕЙРНЛ Luminescene (http://luminescene.ru/, http://luminescene.com).

    пЮГПЮАНРЙЮ БЕКЮЯЭ МЮ php, ОЕПБНЕ ОНПНФДЕМХЕ Jevix ОНКСВХКНЯЭ ДНБНКЭМН
    ЙНПЪБШЛ Х ЛЮКНТСМЙЖХНМЮКЭМШЛ.

    бЕПМСРЭЯЪ Й ЯРЮПНИ ХДЕЕ Ъ ПЕЬХК Б 2007 ЦНДЮ, БН БПЕЛЪ ПЮАНРШ МЮД ОПНЕЙРНЛ
    мНБНЯРХ@mail.ru (http://news.mail.ru/). щРНР ОПНЕЙР ОНКСВЮЕР МНБНЯРМШЕ
    ГЮЛЕРЙХ ХГ ЛМНФЕЯРБЮ ХЯРНВМХЙНБ ЯПЕДЯРБЮЛХ RSS. оНЪБХКНЯЭ ФЕКЮМХЕ
    ОПХБЕЯРХ БЯЕ РЕЙЯРШ Й ЕДМНЛС БХДС, ХГАЕФЮРЭ МЕФЕКЮРЕКЭМНИ HTML-ПЮГЛЕРЙХ,
    БПЕЛЪ НР БПЕЛЕМХ ОПНМХЙЮЧЫЕИ Б РЕЙЯРШ. рЮЙХЛ НАПЮГНЛ, ОЕПБЮЪ ОНКМНЖЕММЮЪ
    БЕПЯХЪ Jevix АШКЮ ПЮГПЮАНРЮМЮ МЮ Perl.

    тСМЙЖХНМЮК ДКЪ ТХКЭРПЮЖХХ ЮРПХАСРНБ РЕЦНБ Х ОПЕДНРБПЮЫЕМХЪ XSS-ЮРЮЙ
    АШК ПЮГПЮАНРЮМ Б ОЕПХНД ХЯОНКЭГНБЮМХЪ Jevix МЮ ОПНЕЙРЕ
    юБРНЙЮДЮАПЮ (http://autokadabra.ru/), ЙНЦДЮ МНБЮЪ php-БЕПЯХЪ АШКЮ Б
    ПЮГПЮАНРЙЕ, Ю МЕАУНДХЛНЯРЭ ЙНМРПНКХПНБЮРЭ БЕЯЭ ОНКЭГНБЮРЕКЭЯЙХИ ББНД
    СФЕ АШКЮ ЮЙРСЮКЭМНИ.

[3] яНЯРЮБ ОЮЙЕРЮ
    -------------

    б ЯНЯРЮБ ОЮЙЕРЮ БУНДЪР БЕПЯХХ Б ДБСУ ЙНДХПНБЙЮУ:
    * windows-1251(Jevix::Makeup)
    * utf-8(Jevix::MakeupUtf)

    тЮИК sample.pl ЯНДЕПФХР ОПХЛЕПШ ХЯОНКЭГНБЮМХЪ ЛНДСКЪ.

    тЮИК testUtf.pl ОПНХГБНДХР АЮГНБНЕ РЕЯРХПНБЮМХЕ ЙКЮЯЯЮ МЮ ОПЕДЛЕР
    ЙНППЕЙРМНИ ЕЦН ПЮАНРШ, НЯМНБШБЮЪЯЭ МЮ МЮАНПЕ РЕЯРНБШУ ЬЮАКНМНБ

[4] дНЙСЛЕМРЮЖХЪ
    ------------

    оНЯРЮПЮЧЯЭ Б АСДСЫЕЛ ДНКФМШЛ НАПЮГНЛ ЯНЯРЮБХРЭ ДНЙСЛЕМРЮЖХЧ. мЮЯРЮКН
    БПЕЛЪ Я ВЕЦН-РН МЮВЮРЭ. мЮ ДЮММШИ ЛНЛЕМР, ЕДХМЯРБЕММШЛ ПСЙНБНДЯРБНЛ
    ОН ХЯОНКЭГНБЮМХЧ Jevix ЪБКЪЕРЯЪ ТЮИК jevix_sample.pl.

[5] оКЮМШ
    -----

    бЯЕ ОКЮМШ ГЮБХЯЪР НР БЮЬХУ НРЙКХЙНБ Х ОНФЕКЮМХИ

[6] юБРНПЯЙХЕ ОПЮБЮ
    ---------------

    оНКЭГНБЮРЭЯЪ ЯПЕДЯРБНЛ ЛНФМН Б КЧАШУ ЖЕКЪУ, АЕГ ЙЮЙХУ-КХАН НЦПЮМХВЕМХИ.
    еЯКХ бШ ОНФЕКЮЕРЕ ОНДДЕПФЮРЭ ОПНЕЙР, ОПНЯРН СОНЛЪМХРЕ ЕЦН ЯЯШКЙНИ ХКХ
    ЯКНБНЛ. яОЮЯХАН.

    ==
    хЦНПЭ юЯЙЮПНБ <support@jevix.ru>
    http://jevix.ru/
perl-Jevix-0.9.7/sample.pl000064400000000000000000000123421133231612600153700ustar00rootroot00000000000000#!/usr/bin/perl -w

# оПХЛЕПШ ХЯОНКЭГНБЮМХЪ ЛНДСКЪ Jevix::Makeup

# нАПЮРХРЕ БМХЛЮМХЕ! дЮММЮЪ БЕПЯХЪ ЛНДСКЪ ОПХМХЛЮЕР РНКЭЙН СЙЮГЮРЕКХ Х БНГБПЮЫЮЕР СЙЮГЮРЕКЭ МЮ УЕЬ Я ПЕГСКЭРЮРЮЛХ ПЮАНРШ.
# бНГБПЮЫЮЕЛШИ УЕЬ ЯНДЕПФХР РПХ ЙКЧВЮ:
#    text - НАПЮАНРЮММШИ РЕЙЯР
#    error - ОПХГМЮЙ НАМЮПСФЕМХЪ НЬХАНЙ Б HTML-ТЮИКЕ
#    errorLog - ЯЯШКЙЮ МЮ ЛЮЯЯХБ Я НОХЯЮМХЕЛ НАМЮПСФЕММШУ НЬХАНЙ

use strict;
use warnings;
use Jevix::Makeup;
use Data::Dumper;

# пСЯЯЙЮЪ КНЙЮКЭ=================================================
use POSIX;
POSIX::setlocale (&POSIX::LC_CTYPE, "ru");
use locale;
# ===============================================================

my $text = 'щРН - ОПНБЕПЙЮ. "мЮ,  ЦНБНПХР, БНР РЕАЕ  РБНХ ПСАЮУХ Х ОНПРЙХ, Ю Ъ ОНИДС Я <b style="color: red">"бЮМЭЙНИ"</b>, НМ ЙСДПЪБЕИ  РЕАЪ"...';

# оПХЛЕП 1: Я СЙЮГЮРЕКЕЛ МЮ УЕЬ, ЯНДЕПФЮЫХЛ БЯЕ ОЮПЮЛЕРПШ    
my $conf = {
                isHTML=>1,                  # пЮАНРЮРЭ Б ПЕФХЛЕ ЦХОЕПРЕЙЯРЮ (Б ПЕФХЛЕ ОПНЯРНЦН РЕЙЯРЮ ПЮАНРЮЕР АШЯРПЕЕ)
                lineBreaks=>1,              # пЮЯЯРЮБКЪРЭ ОЕПЕМНЯШ ЯРПНЙ <br />
                paragraphs=>1,              # пЮГЛЕВЮРЭ ОЮПЮЦПЮТШ <p>
                dashes=>1,                  # рХПЕ
                dots=>1,                    # лМНЦНРНВХЪ
                edgeSpaces=>1,              # сАХПЮРЭ ОПНАЕКЭМШЕ ЯХЛБНКШ Б МЮВЮКЕ Х ЙНМЖЕ ЯРПНЙХ
                tagSpaces=>1,               # сАХПЮРЭ ОПНАЕКШ ЛЕФДС РЕЦЮЛХ (</td>  <td>)
                multiSpaces=>1,             # оПЕБПЮЫЮРЭ ЛМНФЕЯРБЕММШЕ ОПНАЕКШ Б НДХМЮПМШЕ
                redundantSpaces=>1,         # сАХПЮРЭ ОПНАЕКШ РЮЛ, ЦДЕ ХУ МЕ ДНКФМН АШРЭ
                compositeWords=>0,          # гЮЙКЧВЮРЭ ЯНЯРЮБМШЕ ЯКНБЮ Б РЕЦ <nobr>
                compositeWordsLength=>10,   # лЮЙЯХЛЮКЭМЮЪ ДКХМЮ ЯНЯРЮБМНЦН ЯКНБЮ, ГЮЙКЧВЮЕЛНЦН Б РЕЦ <nobr>
                nbsp=>1,                    # пЮЯЯРЮБКЪРЭ МЕПЮГПШБМШЕ ОПНАЕКШ
                quotes=>1,                  # бЕПЯРЮРЭ ЙЮБШВЙХ
                qaType=>0,                  # рХО БМЕЬМХУ ЙЮБШВЕЙ (ЯЛ. МЮЯРПНИЙХ НРКЮДНВМНЦН ХМРЕПТЕИЯЮ МЮ http://jevix.ru/)
                qbType=>1,                  # рХО БКНФЕММШУ ЙЮБШВЕЙ
                misc=>1,                    # бЯЪЙНЕ ПЮГМНЕ (&copy, ДПНАХ Х ОПНВЕЕ)
                codeMode=>2,                # яОНЯНА ЙНДХПНБЙХ ЯОЕЖ. ЯХЛБНКНБ (0: ANSI <...>, 1: HTML-ЙНД <&#133;>, 2: HTML-ЯСФМНЯРХ <&hellip;>)
                tagsDenyAll=>0,             # оН СЛНКВЮМХЧ НРБЕПЦЮРЭ БЯЕ РЕЦХ
                tagsDeny=>'',               # яОХЯНЙ ГЮОПЕЫ╦ММШУ РЕЦНБ
                tagsAllow=>'',              # яОХЯНЙ ПЮГПЕЬ╦ММШУ РЕЦНБ (ХЯЙКЧВЮЕР ХУ, ЙНЦДЮ СЯРЮМЮБКЕМ ГЮОПЕР БЯЕУ)
                tagCloseSingle=>0,          # гЮЙПШБЮРЭ НДХМЮПМШЕ РЕЦХ, ЙНЦДЮ НМХ МЕ ГЮЙПШРШ
                tagCloseOpen=>0,            # гЮЙПШБЮРЭ НРЙПШРШЕ РЕЦХ Б ЙНМЖЕ ДНЙСЛЕМРЮ
                tagNamesToLower=>0,         # оПХБНДХРЭ ХЛЕМЮ РЕЦНБ Й МХФМЕЛС ПЕЦХЯРПС
                tagNamesToUpper=>0,         # оПХБНДХРЭ ХЛЕМЮ РЕЦНБ Й БЕПУМЕЛС ПЕЦХЯРПС
                tagAttributesToLower=>0,    # оПХБНДХРЭ ХЛЕМЮ ЮРПХАСРНБ РЕЦНБ Й МХФМЕЛС ПЕЦХЯРПС
                tagAttributesToUpper=>0,    # оПХБНДХРЭ ХЛЕМЮ ЮРПХАСРНБ РЕЦНБ Й БЕПУМЕЛС ПЕЦХЯРПС
                tagQuoteValues=>0,          # оНЛЕЫЮРЭ Б ЙЮБШВЙХ ГМЮВЕМХЪ ЮРПХАСРНБ РЕЦНБ
                tagUnQuoteValues=>0,        # сАХПЮРЭ ЙЮБШВЙХ БНЙПСЦ ГМЮВЕМХИ ЮРПХАСРНБ РЕЦНБ
                links=>1,                   # юБРНЛЮРХВЕЯЙХ ГЮЙКЧВЮРЭ ЯЯШКЙХ Б РЕЦ <a>
                linksAttributes=>0,         # уЕЬ ЮРРПХАСРНБ ДКЪ БЯЕУ МНБШУ ЯЯШКНЙ
                simpleXSS=>0,               # сДЮКЕМХЕ БНГЛНФМШУ XSS-ЮРЮЙ Б ЙНДЕ ДНЙСЛЕМРЮ
                checkHTML=>0,               # оПНБЕПЪРЭ ЖЕКНЯРМНЯРЭ HTML
                logErrors=>0                # бЕЯРХ ФСПМЮК НЬХАНЙ
           };

my $result = Jevix::Makeup->process(\$text, $conf);

# оПХЛЕПШ ОНОПНЫЕ
#
# оПХЛЕП 2: бШАПЮРЭ МЮАНП СЯРЮМНБНЙ "Basic", ГЮОПЕРХРЭ БЯРЮБЙС МЕПЮГПШБМШУ ОПНАЕКНБ, ГЮДЮРЭ РХО БМЕЬМХУ ЙЮБШВЕЙ, БШЙКЧВХРЭ ПЕФХЛ HTML
# -----------------------------------------------------------------------------------------------------------------------------------
#
#my $result = Jevix::Makeup->process(\$text, {presetBasic=>1, nbsp=>0, qaType=>1, isHTML=>0});

# оПХЛЕП 3: нАПЮАНРЮРЭ РЕЙЯР Я ОЮПЮЛЕРПЮЛХ "ОН СЛНКВЮМХЧ" (basic)
# ---------------------------------------------------------------
#
#my $result = Jevix::Makeup->process(\$text);

# мЕЛМНЦН Н ОЮПЮЛЕРПЮУ tagsAllow Х tagsDeny
# -----------------------------------------
#
# ОЮПЮЛЕРПШ tagsAllow Х tagsDeny ОПНЫЕ БЯЕЦН ОЕПЕДЮБЮРЭ Б БХДЕ ЯРПНЙХ, УНРЪ ЛНФМН ОЕПЕДЮБЮРЭ ХУ Б БХДЕ УЕЬЕИ (МЕДНЙСЛЕМРХПНБЮМН Б ЩРНИ БЕПЯХХ)
#
# ОЮПЮЛЕРП tagsAllow ЛНФЕР АШРЭ НОПЕДЕК╦М ЙЮЙ Б ОПНЯРНЛ БХДЕ 'a,br,div', РЮЙ Х Б ПЮЯЬХПЕММНЛ.
#
# оПХЛЕПШ:
#         '|a,br,b' - ПЮГПЕЬХРЭ РЕЦХ 'a', 'br', 'b', МН ГЮОПЕРХРЭ КЧАШЕ ЮРПХАСРШ ДКЪ ЩРХУ РЕЦНБ
#         '|a,br:class,b' - ПЮГПЕЬХРЭ РЕЦХ 'a', 'br', 'b', МН ГЮОПЕРХРЭ КЧАШЕ ЮРПХАСРШ ДКЪ БЯЕУ РЕЦНБ, ЙПНЛЕ ЮРПХАСРЮ class ДКЪ РЕЦЮ 'br'
#         'a:href:title,br,b' - ПЮГПЕЬХРЭ РЕЦХ 'a', 'br', 'b', ОПХ ЩРНЛ ПЮГПЕЬХРЭ КЧАШЕ ЮРПХАСРШ ДКЪ РЕЦНБ 'br' Х 'b', МН ДКЪ РЕЦЮ 'a' ПЮГПЕЬХРЭ
#                               РНКЭЙН ЮРПХАСРШ 'href' Х 'title'
#
# оЮПЮЛЕРП tagsDeny ГЮДЮ╦РЯЪ Б БХДЕ ОПНЯРНИ ЯРПНЙХ:
#         'script, object' - ГЮОПЕРХРЭ ХЯОНКЭГНБЮМХЕ РЕЦНБ 'script' Х 'object'
# 

print Dumper $result;perl-Jevix-0.9.7/testsUtf.pl000064400000000000000000000236041133231612600157330ustar00rootroot00000000000000#!/usr/bin/perl -w

# п²п╟п╠п╬я─ я┌п╣я│я┌п╬п╡ п╢п╩я▐ п╪п╬п╢я┐п╩я▐ Jevix::MakeupUtf

use strict;
use warnings;
use Jevix::MakeupUtf;
use Data::Dumper;

# п═я┐я│я│п╨п╟я▐ п╩п╬п╨п╟п╩я▄=================================================
use POSIX;
POSIX::setlocale (&POSIX::LC_CTYPE, "ru");
use locale;
# ===============================================================

# п║п©п╦я│п╬п╨ я┌п╣я│я┌п╬п╡ п╡п╬п©я─п╬я│-п╬я┌п╡п╣я┌
my @tests = (
		{ 
			q=>'п÷я─п╬п╡п╣я─я▐п╪ я┌п╦я─п╣, п╩п╦я┬п╫п╦п╣ п©я─п╬п╠п╣п╩я▀, п╪п╫п╬пЁп╬я┌п╬я┤п╦п╣ ... п║п╩п╬п╡п╬   - п╢п╣п╩п╬',
			a=>'п÷я─п╬п╡п╣я─я▐п╪ я┌п╦я─п╣, п╩п╦я┬п╫п╦п╣ п©я─п╬п╠п╣п╩я▀, п╪п╫п╬пЁп╬я┌п╬я┤п╦п╣&hellip; п║п╩п╬п╡п╬&nbsp;&mdash; п╢п╣п╩п╬'
		},

		{
			q=>'п÷я─п╬п╡п╣я─я▐п╣п╪ "п╨п╟п╡я▀я┤п╨п╦ п╡п╫п╣я┬п╫п╦п╣". п÷п╬я│п╩п╣ я█я┌п╬пЁп╬ п╬п╫ я│п╨п╟п╥п╟п╩: "п²п╣ п╪п╬я▒ я█я┌п╬ "п╢п╣п╩п╬""',
			a=>'п÷я─п╬п╡п╣я─я▐п╣п╪ &laquo;п╨п╟п╡я▀я┤п╨п╦ п╡п╫п╣я┬п╫п╦п╣&raquo;. п÷п╬я│п╩п╣ я█я┌п╬пЁп╬ п╬п╫ я│п╨п╟п╥п╟п╩: &laquo;п²п╣ п╪п╬я▒ я█я┌п╬ &ldquo;п╢п╣п╩п╬&rdquo;&raquo;'
		},

		{
			q=>'"п╨п╟п╡я▀я┤п╨п╦ я│ <b>"я┌п╣пЁп╟п╪п╦"</b>"',
			a=>'&laquo;п╨п╟п╡я▀я┤п╨п╦ я│ <b>&ldquo;я┌п╣пЁп╟п╪п╦&rdquo;</b>&raquo;'
		},

		{
			q=>'п║п╬я│я┌п╟п╡п╫п╬п╣ я│п╩п╬п╡п╬ п╨п╟п╨-я┌п╬ я┌п╟п╨',
			a=>'п║п╬я│я┌п╟п╡п╫п╬п╣ я│п╩п╬п╡п╬ <nobr>п╨п╟п╨-я┌п╬</nobr> я┌п╟п╨'
		},

		{
			addConf=>{'paragraphs'=>1},
			q=>"п©п╟я─п╟пЁя─п╟я└1\n\nп©п╟я─п╟пЁя─п╟я└2\nп╫п╬п╡п╟я▐ я│я┌я─п╬п╨п╟",
			a=>"<p>п©п╟я─п╟пЁя─п╟я└1</p>\n\n<p>п©п╟я─п╟пЁя─п╟я└2<br />\nп╫п╬п╡п╟я▐ я│я┌я─п╬п╨п╟</p>"
		},

		{
			addConf=>{'paragraphs'=>0},
			q=>"-- п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬ я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧Б─╕\n-- п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬ я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧Б─╕\n-- п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬ я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧Б─╕",
			a=>"&mdash;&nbsp;п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬&nbsp;я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧&hellip;<br />\n&mdash;&nbsp;п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬&nbsp;я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧&hellip;<br />\n&mdash;&nbsp;п■п╬ я│п╡п╦п╢п╟п╫я▄я▐, п╡п╫я┐я┤п╣п╨, п╢п╬&nbsp;я│п╡п╦п╢п╟п╫я▄я▐! Б─⌠ п╥п╟п╨п╦п╡п╟п╩п╟ п╠п╟п╠п╨п╟, Б─⌠ п▓п╣п╤п╩п╦п╡я▀п╧ п╨п╟п╨п╬п╧, я│п╣п╧я┤п╟я│ я┌п╟п╨п╦я┘ п╫п╣я┌. п▓п╬я┌, п╫п╟п©я─п╦п╪п╣я─, п÷п╣я┌я─п╬п╡п╦я┤ Б─⌠ п©п╬п╪п╫я▌, п©п╬п╪п╫я▌, п╪п╬п╩п╬п╢п╬п╧ п╠я▀п╩, пЁп╟п╩п╟п╫я┌п╫я▀п╧, п╡п╬я│п©п╦я┌п╟п╫п╫я▀п╧&hellip;"
		},

		{
			q=>'п╒п╣я│я┌ <a href="http://jevix.ru" title=hello onload="alert(1)">п╢п╬п©я┐я│я┌п╦п╪я▀я┘</a> я┌п╣пЁп╬п╡ п╦ <b     cLaSs= my style="wow">п╟я┌я┌я─п╦п╠я┐я┌п╬п╡</b>',
			a=>'п╒п╣я│я┌ <a href="http://jevix.ru" title=hello>п╢п╬п©я┐я│я┌п╦п╪я▀я┘</a> я┌п╣пЁп╬п╡ п╦ <b style="wow">п╟я┌я┌я─п╦п╠я┐я┌п╬п╡</b>'
		},

		{
			q=>'п≈п╢п╣я│я▄ п╢п╬п╩п╤п╫п╟ п╠я▀я┌я▄ я│я│я▀п╩п╨п╟: http://jevix.ru, <a href="http://jevix.ru">п╟ п╥п╢п╣я│я▄ - п╫п╣я┌ http://jevix.ru</a>',
			a=>'п≈п╢п╣я│я▄ п╢п╬п╩п╤п╫п╟ п╠я▀я┌я▄ я│я│я▀п╩п╨п╟: <a href="http://jevix.ru" target="_blank">http://jevix.ru</a>, <a href="http://jevix.ru">п╟&nbsp;п╥п╢п╣я│я▄&nbsp;&mdash; п╫п╣я┌ http://jevix.ru</a>'
		},

		{
			addConf=>{'tagNamesToLower'=>1},
			q=>'п÷я─п╦п╡п╬п╢п╦п╪ п╡я│я▒ п╨ <B>п╫п╦п╤п╫п╣п╪я┐</b> <a HREF="http://jevix.ru">я─п╣пЁп╦я│я┌я─я┐</a>',
			a=>'п÷я─п╦п╡п╬п╢п╦п╪ п╡я│я▒ п╨ <b>п╫п╦п╤п╫п╣п╪я┐</b> <a href="http://jevix.ru">я─п╣пЁп╦я│я┌я─я┐</a>'
		},

		{
			addConf=>{'tagsAllow'=>'a,br,b'},
			q=>'XSS test <script>alert(1)</script>. <a href="http://jevix.ru" onmouseOVER="alert(1)">hey!</a>',
			a=>'XSS test. <a href="http://jevix.ru">hey!</a>'
		},

		{
			addConf=>{'tagQuoteValues'=>1},
			q=>'<h1>this is test</h1><a href=jevix.ru title = link class="my">try jevix</a>',
			a=>'<h1>this is test</h1><a href="jevix.ru" title = "link" class="my">try jevix</a>'
		},

		{
			addConf=>{'tagCloseOpen'=>1},
			q=>'<p><h1>this is test',
			a=>'<p><h1>this is test</h1></p>'
		},

	      );

# п²п╟я│я┌я─п╬п╧п╨п╟ п©я─п╦ п╨п╬я┌п╬я─п╬п╧ п©я─п╬п╡п╬п╢я▐я┌я│я▐ я┌п╣я│я┌я▀
my $conf = {
               isHTML=>1,							# п═п╟п╠п╬я┌п╟я┌я▄ п╡ я─п╣п╤п╦п╪п╣ пЁп╦п©п╣я─я┌п╣п╨я│я┌п╟ (п╡ я─п╣п╤п╦п╪п╣ п©я─п╬я│я┌п╬пЁп╬ я┌п╣п╨я│я┌п╟ я─п╟п╠п╬я┌п╟п╣я┌ п╠я▀я│я┌я─п╣п╣)
               vanish=>0,							# п÷я─п╣п╬п╠я─п╟п╥п╬п╡п╟я┌я▄ п╦я│я┘п╬п╢п╫я▀п╧ я┌п╣п╨я│я┌ п╡ plain-text (п╦пЁп╬п╫п╬я─п╦я─я┐п╣я┌ п╡я│п╣ п©я─п╬я┤п╦п╣ п╫п╟я│я┌я─п╬п╧п╨п╦)               
               lineBreaks=>1,							# п═п╟я│я│я┌п╟п╡п╩я▐я┌я▄ п©п╣я─п╣п╫п╬я│я▀ я│я┌я─п╬п╨ <br />
               paragraphs=>0,							# п═п╟п╥п╪п╣я┤п╟я┌я▄ п©п╟я─п╟пЁя─п╟я└я▀ <p>
               dashes=>1,							# п╒п╦я─п╣
               dots=>1,								# п°п╫п╬пЁп╬я┌п╬я┤п╦я▐
               edgeSpaces=>1,							# пёп╠п╦я─п╟я┌я▄ п©я─п╬п╠п╣п╩я▄п╫я▀п╣ я│п╦п╪п╡п╬п╩я▀ п╡ п╫п╟я┤п╟п╩п╣ п╦ п╨п╬п╫я├п╣ я│я┌я─п╬п╨п╦
               tagSpaces=>1,							# пёп╠п╦я─п╟я┌я▄ п©я─п╬п╠п╣п╩я▀ п╪п╣п╤п╢я┐ я┌п╣пЁп╟п╪п╦ (</td>  <td>)
               multiSpaces=>1,							# п÷я─п╣п╡я─п╟я┴п╟я┌я▄ п╪п╫п╬п╤п╣я│я┌п╡п╣п╫п╫я▀п╣ п©я─п╬п╠п╣п╩я▀ п╡ п╬п╢п╦п╫п╟я─п╫я▀п╣
               redundantSpaces=>1,						# пёп╠п╦я─п╟я┌я▄ п©я─п╬п╠п╣п╩я▀ я┌п╟п╪, пЁп╢п╣ п╦я┘ п╫п╣ п╢п╬п╩п╤п╫п╬ п╠я▀я┌я▄
               compositeWords=>1,						# п≈п╟п╨п╩я▌я┤п╟я┌я▄ я│п╬я│я┌п╟п╡п╫я▀п╣ я│п╩п╬п╡п╟ п╡ я┌п╣пЁ <nobr>
               compositeWordsLength=>10,					# п°п╟п╨я│п╦п╪п╟п╩я▄п╫п╟я▐ п╢п╩п╦п╫п╟ я│п╬я│я┌п╟п╡п╫п╬пЁп╬ я│п╩п╬п╡п╟, п╥п╟п╨п╩я▌я┤п╟п╣п╪п╬пЁп╬ п╡ я┌п╣пЁ <nobr>
               nbsp=>1,								# п═п╟я│я│я┌п╟п╡п╩я▐я┌я▄ п╫п╣я─п╟п╥я─я▀п╡п╫я▀п╣ п©я─п╬п╠п╣п╩я▀
               quotes=>1,							# п▓п╣я─я│я┌п╟я┌я▄ п╨п╟п╡я▀я┤п╨п╦
               qaType=>0,							# п╒п╦п© п╡п╫п╣я┬п╫п╦я┘ п╨п╟п╡я▀я┤п╣п╨ (я│п╪. п╫п╟я│я┌я─п╬п╧п╨п╦ п╬я┌п╩п╟п╢п╬я┤п╫п╬пЁп╬ п╦п╫я┌п╣я─я└п╣п╧я│п╟ п╫п╟ http://jevix.ru/)
               qbType=>1,							# п╒п╦п© п╡п╩п╬п╤п╣п╫п╫я▀я┘ п╨п╟п╡я▀я┤п╣п╨
               misc=>1,								# п▓я│я▐п╨п╬п╣ я─п╟п╥п╫п╬п╣ (&copy, п╢я─п╬п╠п╦ п╦ п©я─п╬я┤п╣п╣)
               codeMode=>2,							# п║п©п╬я│п╬п╠ п╨п╬п╢п╦я─п╬п╡п╨п╦ я│п©п╣я├. я│п╦п╪п╡п╬п╩п╬п╡ (0: ANSI <...>, 1: HTML-п╨п╬п╢ <&#133;>, 2: HTML-я│я┐п╤п╫п╬я│я┌п╦ <&hellip;>)
               tagsDenyAll=>0,							# п÷п╬ я┐п╪п╬п╩я┤п╟п╫п╦я▌ п╬я┌п╡п╣я─пЁп╟я┌я▄ п╡я│п╣ я┌п╣пЁп╦
               tagsDeny=>'',							# п║п©п╦я│п╬п╨ п╥п╟п©я─п╣я┴я▒п╫п╫я▀я┘ я┌п╣пЁп╬п╡
               tagsAllow=>'|A:href:title,br,B:STYLE',				# п║п©п╦я│п╬п╨ я─п╟п╥я─п╣я┬я▒п╫п╫я▀я┘ я┌п╣пЁп╬п╡ (п╦я│п╨п╩я▌я┤п╟п╣я┌ п╦я┘, п╨п╬пЁп╢п╟ я┐я│я┌п╟п╫п╟п╡п╩п╣п╫ п╥п╟п©я─п╣я┌ п╡я│п╣я┘)
               tagCloseSingle=>0,						# п≈п╟п╨я─я▀п╡п╟я┌я▄ п╬п╢п╦п╫п╟я─п╫я▀п╣ я┌п╣пЁп╦, п╨п╬пЁп╢п╟ п╬п╫п╦ п╫п╣ п╥п╟п╨я─я▀я┌я▀
	       tagCloseOpen=>0,							# п≈п╟п╨я─я▀п╡п╟я┌я▄ п╬я┌п╨я─я▀я┌я▀п╣ я┌п╣пЁп╦ п╡ п╨п╬п╫я├п╣ п╢п╬п╨я┐п╪п╣п╫я┌п╟
               tagNamesToLower=>0,						# п÷я─п╦п╡п╬п╢п╦я┌я▄ п╦п╪п╣п╫п╟ я┌п╣пЁп╬п╡ п╨ п╫п╦п╤п╫п╣п╪я┐ я─п╣пЁп╦я│я┌я─я┐
               tagNamesToUpper=>0,						# п÷я─п╦п╡п╬п╢п╦я┌я▄ п╦п╪п╣п╫п╟ я┌п╣пЁп╬п╡ п╨ п╡п╣я─я┘п╫п╣п╪я┐ я─п╣пЁп╦я│я┌я─я┐
               tagAttributesToLower=>0,						# п÷я─п╦п╡п╬п╢п╦я┌я▄ п╦п╪п╣п╫п╟ п╟я┌я─п╦п╠я┐я┌п╬п╡ я┌п╣пЁп╬п╡ п╨ п╫п╦п╤п╫п╣п╪я┐ я─п╣пЁп╦я│я┌я─я┐
               tagAttributesToUpper=>0,						# п÷я─п╦п╡п╬п╢п╦я┌я▄ п╦п╪п╣п╫п╟ п╟я┌я─п╦п╠я┐я┌п╬п╡ я┌п╣пЁп╬п╡ п╨ п╡п╣я─я┘п╫п╣п╪я┐ я─п╣пЁп╦я│я┌я─я┐
               tagQuoteValues=>0,						# п÷п╬п╪п╣я┴п╟я┌я▄ п╡ п╨п╟п╡я▀я┤п╨п╦ п╥п╫п╟я┤п╣п╫п╦я▐ п╟я┌я─п╦п╠я┐я┌п╬п╡ я┌п╣пЁп╬п╡
               tagUnQuoteValues=>0,						# пёп╠п╦я─п╟я┌я▄ п╨п╟п╡я▀я┤п╨п╦ п╡п╬п╨я─я┐пЁ п╥п╫п╟я┤п╣п╫п╦п╧ п╟я┌я─п╦п╠я┐я┌п╬п╡ я┌п╣пЁп╬п╡
               links=>1,							# п░п╡я┌п╬п╪п╟я┌п╦я┤п╣я│п╨п╦ п╥п╟п╨п╩я▌я┤п╟я┌я▄ я│я│я▀п╩п╨п╦ п╡ я┌п╣пЁ <a>
               linksAttributes=>{target=>'_blank'},				# п╔п╣я┬ п╟я┌я┌я─п╦п╠я┐я┌п╬п╡ п╢п╩я▐ п╡я│п╣я┘ п╫п╬п╡я▀я┘ я│я│я▀п╩п╬п╨
               simpleXSS=>1,							# пёп╢п╟п╩п╣п╫п╦п╣ п╡п╬п╥п╪п╬п╤п╫я▀я┘ XSS-п╟я┌п╟п╨ п╡ п╨п╬п╢п╣ п╢п╬п╨я┐п╪п╣п╫я┌п╟
               checkHTML=>0,							# п÷я─п╬п╡п╣я─я▐я┌я▄ я├п╣п╩п╬я│я┌п╫п╬я│я┌я▄ HTML
               logErrors=>0							# п▓п╣я│я┌п╦ п╤я┐я─п╫п╟п╩ п╬я┬п╦п╠п╬п╨
};

my $text;
my $result;
my $testsCount = 0;
my $errCount = 0;

my $jevix = new Jevix::MakeupUtf;

print "\n\nTesting Jevix Class...\n\n";

# п╒п╣я│я┌ п©п╟я─я│п╣я─п╟ я│я┌я─п╬п╨п╦ п╫п╟я│я┌я─п╬п╧п╨п╦ я└п╦п╩я▄я┌я─п╟ HTML
$testsCount++;
$jevix->setConf($conf);

my $pTest = $jevix->getConf();
$pTest = $pTest->{tagsAllow};

if(!($pTest->{br} && $pTest->{b} && $pTest->{a} && $pTest->{a}->{validAttributes}->{href} && $pTest->{a}->{validAttributes}->{title})) {
	print "Allowed tags string parser failure\n\n";
	$errCount++;
}

# п÷я─п╬пЁп╬п╫я▐п╣п╪ я┌п╣п╨я│я┌п╬п╡я▀п╣ я┌п╣я│я┌я▀
foreach my $test (@tests) {
	$testsCount++;
	$text = $test->{q};

	# п≤п╥п╪п╣п╫п╣п╫п╦п╣ п╨п╬п╫я└п╦пЁя┐я─п╟я├п╦п╦
	if($test->{addConf}) {
		while (my ($k,$v) = each(%{$test->{addConf}})) {
			$conf->{$k} = $v
		}

		$jevix->setConf($conf);
	}

	$result = $jevix->process(\$text);

	if($test->{a} ne $result->{text}) {
		print 'Test ' . $testsCount . ' failed: [' . $text . "]\n";
		print 'Expected: [' . $test->{a} . "]\n";
		print 'We have:  [' . $result->{text} . "]\n\n";
		$errCount++;
	}
}

# п║я┌п╟я┌п╦я│я┌п╦п╨п╟
print "Tests Perfomed: " . $testsCount . "\n";
print "Tests Failed: " . $errCount . "\n";
print "Well Done!\n\n" if(!$errCount);