pax_global_header00006660000000000000000000000064117404657160014525gustar00rootroot0000000000000052 comment=57d078f2d606acd053cd731b74c517f23cf14f86 python3-module-sgmllib-1.0.0/000075500000000000000000000000001174046571600160375ustar00rootroot00000000000000python3-module-sgmllib-1.0.0/.gear/000075500000000000000000000000001174046571600170335ustar00rootroot00000000000000python3-module-sgmllib-1.0.0/.gear/merge000064400000000000000000000000271174046571600200540ustar00rootroot00000000000000merge: upstream master python3-module-sgmllib-1.0.0/.gear/rules000064400000000000000000000001141174046571600201040ustar00rootroot00000000000000spec: .gear/sgmllib.spec tar: . name=@name@-@version@ base=@name@-@version@ python3-module-sgmllib-1.0.0/.gear/sgmllib.spec000064400000000000000000000017651174046571600213510ustar00rootroot00000000000000%define oname sgmllib Name: python3-module-%oname Version: 1.0.0 Release: alt1.hg20100824 Summary: Py3k port of the old stdlib module License: BSD Group: Development/Python3 Url: http://hg.hardcoded.net/sgmllib Packager: Eugeny A. Rostovtsev (REAL) # hg clone https://bitbucket.org/hsoft/sgmllib Source: %name-%version.tar BuildArch: noarch BuildRequires(pre): rpm-build-python3 BuildPreReq: python3-devel python3-module-distribute %description sgmllib was dropped in Python 3. For those depending on it, that's somewhat unfortunate. This is a quick and dirty port of this old module. I just ran 2to3 on it and published it. I don't intend to maintain it, so it might be a good idea to eventually think about finding another module to use. %prep %setup %build %python3_build %install %python3_install %files %doc CHANGES README %python3_sitelibdir/* %changelog * Mon Apr 09 2012 Eugeny A. Rostovtsev (REAL) 1.0.0-alt1.hg20100824 - Initial build for Sisyphus python3-module-sgmllib-1.0.0/.hgignore000064400000000000000000000000351174046571600176400ustar00rootroot00000000000000syntax: glob .DS_Store *.pycpython3-module-sgmllib-1.0.0/CHANGES000064400000000000000000000001121174046571600170240ustar00rootroot00000000000000Version 1.0.0 -- 2010/08/24 --------------------------- * Initial Releasepython3-module-sgmllib-1.0.0/LICENSE000064400000000000000000000027711174046571600170530ustar00rootroot00000000000000Copyright (c) 2010, Hardcoded Software Inc., http://www.hardcoded.net All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Hardcoded Software Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.python3-module-sgmllib-1.0.0/README000064400000000000000000000006761174046571600167300ustar00rootroot00000000000000================================================== sgmllib3k -- Py3k port of the old stdlib module ================================================== sgmllib was dropped in Python 3. For those depending on it, that's somewhat unfortunate. This is a quick and dirty port of this old module. I just ran 2to3 on it and published it. I don't intend to maintain it, so it might be a good idea to eventually think about finding another module to use. python3-module-sgmllib-1.0.0/setup.py000064400000000000000000000011431174046571600175500ustar00rootroot00000000000000from setuptools import setup CLASSIFIERS = [ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 3', ] setup( name='sgmllib3k', version='1.0.0', author='Hardcoded Software', author_email='hsoft@hardcoded.net', py_modules=['sgmllib'], scripts=[], url='http://hg.hardcoded.net/sgmllib', license='BSD License', description='Py3k port of sgmllib.', long_description=open('README').read(), classifiers=CLASSIFIERS, test_suite='test_sgmllib', )python3-module-sgmllib-1.0.0/sgmllib.py000064400000000000000000000425741174046571600200560ustar00rootroot00000000000000"""A parser for SGML, using the derived class as a static DTD.""" # XXX This only supports those SGML features used by HTML. # XXX There should be a way to distinguish between PCDATA (parsed # character data -- the normal case), RCDATA (replaceable character # data -- only char and entity references and end tags are special) # and CDATA (character data -- only end tags are special). RCDATA is # not supported at all. import _markupbase import re __all__ = ["SGMLParser", "SGMLParseError"] # Regular expressions used for parsing interesting = re.compile('[&<]') incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' '<([a-zA-Z][^<>]*|' '/([a-zA-Z][^<>]*)?|' '![^<>]*)?') entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#([0-9]+)[^0-9]') starttagopen = re.compile('<[>a-zA-Z]') shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') piclose = re.compile('>') endbracket = re.compile('[<>]') tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') attrfind = re.compile( r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') class SGMLParseError(RuntimeError): """Exception raised for all parse errors.""" pass # SGML parser base class -- find tags and call handler functions. # Usage: p = SGMLParser(); p.feed(data); ...; p.close(). # The dtd is defined by deriving a class which defines methods # with special names to handle tags: start_foo and end_foo to handle # and , respectively, or do_foo to handle by itself. # (Tags are converted to lower case for this purpose.) The data # between tags is passed to the parser by calling self.handle_data() # with some data as argument (the data may be split up in arbitrary # chunks). Entity references are passed by calling # self.handle_entityref() with the entity reference as argument. class SGMLParser(_markupbase.ParserBase): # Definition of entities -- derived classes may override entity_or_charref = re.compile('&(?:' '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' ')(;?)') def __init__(self, verbose=0): """Initialize and reset this instance.""" self.verbose = verbose self.reset() def reset(self): """Reset this instance. Loses all unprocessed data.""" self.__starttag_text = None self.rawdata = '' self.stack = [] self.lasttag = '???' self.nomoretags = 0 self.literal = 0 _markupbase.ParserBase.reset(self) def setnomoretags(self): """Enter literal mode (CDATA) till EOF. Intended for derived classes only. """ self.nomoretags = self.literal = 1 def setliteral(self, *args): """Enter literal mode (CDATA). Intended for derived classes only. """ self.literal = 1 def feed(self, data): """Feed some data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). (This just saves the text, all the processing is done by goahead().) """ self.rawdata = self.rawdata + data self.goahead(0) def close(self): """Handle the remaining data.""" self.goahead(1) def error(self, message): raise SGMLParseError(message) # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is # true, force handling all data as if followed by EOF marker. def goahead(self, end): rawdata = self.rawdata i = 0 n = len(rawdata) while i < n: if self.nomoretags: self.handle_data(rawdata[i:n]) i = n break match = interesting.search(rawdata, i) if match: j = match.start() else: j = n if i < j: self.handle_data(rawdata[i:j]) i = j if i == n: break if rawdata[i] == '<': if starttagopen.match(rawdata, i): if self.literal: self.handle_data(rawdata[i]) i = i+1 continue k = self.parse_starttag(i) if k < 0: break i = k continue if rawdata.startswith(" (i + 1): self.handle_data("<") i = i+1 else: # incomplete break continue if rawdata.startswith(" ]""" self.check_events(["" % inside], [ ("decl", inside), ]) def test_doctype_decl_external(self): inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'" self.check_events("" % inside, [ ("decl", inside), ]) def test_underscore_in_attrname(self): # SF bug #436621 """Make sure attribute names with underscores are accepted""" self.check_events("", [ ("starttag", "a", [("has_under", "has_under"), ("_under", "_under")]), ]) def test_underscore_in_tagname(self): # SF bug #436621 """Make sure tag names with underscores are accepted""" self.check_events("", [ ("starttag", "has_under", []), ("endtag", "has_under"), ]) def test_quotes_in_unquoted_attrs(self): # SF bug #436621 """Be sure quotes in unquoted attributes are made part of the value""" self.check_events("", [ ("starttag", "a", [("href", "foo'bar\"baz")]), ]) def test_xhtml_empty_tag(self): """Handling of XHTML-style empty start tags""" self.check_events("
text", [ ("starttag", "br", []), ("data", "text"), ("starttag", "i", []), ("endtag", "i"), ]) def test_processing_instruction_only(self): self.check_events("", [ ("pi", "processing instruction"), ]) def test_bad_nesting(self): self.check_events("
", [ ("starttag", "a", []), ("starttag", "b", []), ("endtag", "a"), ("endtag", "b"), ]) def test_bare_ampersands(self): self.check_events("this text & contains & ampersands &", [ ("data", "this text & contains & ampersands &"), ]) def test_bare_pointy_brackets(self): self.check_events("this < text > contains < bare>pointy< brackets", [ ("data", "this < text > contains < bare>pointy< brackets"), ]) def test_attr_syntax(self): output = [ ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")]) ] self.check_events("""""", output) self.check_events("""""", output) self.check_events("""""", output) self.check_events("""""", output) def test_attr_values(self): self.check_events("""""", [("starttag", "a", [("b", "xxx\n\txxx"), ("c", "yyy\t\nyyy"), ("d", "\txyz\n")]) ]) self.check_events("""""", [ ("starttag", "a", [("b", ""), ("c", "")]), ]) # URL construction stuff from RFC 1808: safe = "$-_.+" extra = "!*'()," reserved = ";/?:@&=" url = "http://example.com:8080/path/to/file?%s%s%s" % ( safe, extra, reserved) self.check_events("""""" % url, [ ("starttag", "e", [("a", url)]), ]) # Regression test for SF patch #669683. self.check_events("", [ ("starttag", "e", [("a", "rgb(1,2,3)")]), ]) def test_attr_values_entities(self): """Substitution of entities and charrefs in attribute values""" # SF bug #1452246 self.check_events("""""", [("starttag", "a", [("b", "<"), ("c", "<>"), ("d", "<->"), ("e", "< "), ("f", "&xxx;"), ("g", " !"), ("h", "Ǵ"), ("i", "x?a=b&c=d;"), ("j", "*"), ("k", "*"), ])]) def test_convert_overrides(self): # This checks that the character and entity reference # conversion helpers are called at the documented times. No # attempt is made to really change what the parser accepts. # self.collector = HTMLEntityCollector self.check_events(('foo' '&foobar;*'), [ ('entityref', 'convert', 'ldquo'), ('charref', 'convert', 'x201d'), ('starttag', 'a', [('title', '“test”')]), ('data', 'foo'), ('endtag', 'a'), ('entityref', 'foobar'), ('entityref', 'convert', 'foobar'), ('charref', '42'), ('charref', 'convert', '42'), ('codepoint', 'convert', 42), ]) def test_attr_funky_names(self): self.check_events("""""", [ ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), ]) def test_attr_value_ip6_url(self): # http://www.python.org/sf/853506 self.check_events(("" ""), [ ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]), ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]), ]) def test_weird_starttags(self): self.check_events("", [ ("starttag", "a", []), ("starttag", "a", []), ]) self.check_events("", [ ("endtag", "a"), ("starttag", "a", []), ]) def test_declaration_junk_chars(self): self.check_parse_error("") def test_get_starttag_text(self): s = """""" self.check_events(s, [ ("starttag", "foobar", [("one", "1"), ("two", "2")]), ]) def test_cdata_content(self): s = (" ¬-an-entity-ref; " " ") self.collector = CDATAEventCollector self.check_events(s, [ ("starttag", "cdata", []), ("data", " ¬-an-entity-ref; "), ("endtag", "cdata"), ("starttag", "notcdata", []), ("data", " "), ("comment", " comment "), ("data", " "), ("endtag", "notcdata"), ]) s = """ """ self.check_events(s, [ ("starttag", "cdata", []), ("data", " "), ("endtag", "cdata"), ]) def test_illegal_declarations(self): s = 'abcdef' self.check_events(s, [ ("data", "abc"), ("unknown decl", 'spacer type="block" height="25"'), ("data", "def"), ]) def test_enumerated_attr_type(self): s = "]>" self.check_events(s, [ ('decl', 'DOCTYPE doc []'), ]) def test_only_decode_ascii(self): # SF bug #1651995, make sure non-ascii character references are not decoded s = '' self.check_events(s, [ ('starttag', 'signs', [('exclamation', '!'), ('copyright', '©'), ('quoteleft', '‘')]), ]) # XXX These tests have been disabled by prefixing their names with # an underscore. The first two exercise outstanding bugs in the # sgmllib module, and the third exhibits questionable behavior # that needs to be carefully considered before changing it. def _test_starttag_end_boundary(self): self.check_events("", [("starttag", "a", [("b", "<")])]) self.check_events("", [("starttag", "a", [("b", ">")])]) def _test_buffer_artefacts(self): output = [("starttag", "a", [("b", "<")])] self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) output = [("starttag", "a", [("b", ">")])] self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events(["'>"], output) self.check_events([""], output) self.check_events([""], output) output = [("comment", "abc")] self.check_events(["", ""], output) self.check_events(["<", "!--abc-->"], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events([""], output) self.check_events(["", ""], output) def _test_starttag_junk_chars(self): self.check_parse_error("<") self.check_parse_error("<>") self.check_parse_error("") self.check_parse_error("") self.check_parse_error("") self.check_parse_error("'") self.check_parse_error("