Primer commit del proyecto RSS

This commit is contained in:
jlimolina 2025-05-24 14:37:58 +02:00
commit 27c9515d29
1568 changed files with 252311 additions and 0 deletions

View file

@ -0,0 +1,506 @@
# Support for the Atom, RSS, RDF, and CDF feed formats
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import copy
from ..datetimes import _parse_date
from ..urls import make_safe_absolute_uri
from ..util import FeedParserDict
class Namespace(object):
"""Support for the Atom, RSS, RDF, and CDF feed formats.
The feed formats all share common elements, some of which have conflicting
interpretations. For simplicity, all of the base feed format support is
collected here.
"""
supported_namespaces = {
'': '',
'http://backend.userland.com/rss': '',
'http://blogs.law.harvard.edu/tech/rss': '',
'http://purl.org/rss/1.0/': '',
'http://my.netscape.com/rdf/simple/0.9/': '',
'http://example.com/newformat#': '',
'http://example.com/necho': '',
'http://purl.org/echo/': '',
'uri/of/echo/namespace#': '',
'http://purl.org/pie/': '',
'http://purl.org/atom/ns#': '',
'http://www.w3.org/2005/Atom': '',
'http://purl.org/rss/1.0/modules/rss091#': '',
}
def _start_rss(self, attrs_d):
versionmap = {
'0.91': 'rss091u',
'0.92': 'rss092',
'0.93': 'rss093',
'0.94': 'rss094',
}
# If we're here then this is an RSS feed.
# If we don't have a version or have a version that starts with something
# other than RSS then there's been a mistake. Correct it.
if not self.version or not self.version.startswith('rss'):
attr_version = attrs_d.get('version', '')
version = versionmap.get(attr_version)
if version:
self.version = version
elif attr_version.startswith('2.'):
self.version = 'rss20'
else:
self.version = 'rss'
def _start_channel(self, attrs_d):
self.infeed = 1
self._cdf_common(attrs_d)
def _cdf_common(self, attrs_d):
if 'lastmod' in attrs_d:
self._start_modified({})
self.elementstack[-1][-1] = attrs_d['lastmod']
self._end_modified()
if 'href' in attrs_d:
self._start_link({})
self.elementstack[-1][-1] = attrs_d['href']
self._end_link()
def _start_feed(self, attrs_d):
self.infeed = 1
versionmap = {'0.1': 'atom01',
'0.2': 'atom02',
'0.3': 'atom03'}
if not self.version:
attr_version = attrs_d.get('version')
version = versionmap.get(attr_version)
if version:
self.version = version
else:
self.version = 'atom'
def _end_channel(self):
self.infeed = 0
_end_feed = _end_channel
def _start_image(self, attrs_d):
context = self._get_context()
if not self.inentry:
context.setdefault('image', FeedParserDict())
self.inimage = 1
self.title_depth = -1
self.push('image', 0)
def _end_image(self):
self.pop('image')
self.inimage = 0
def _start_textinput(self, attrs_d):
context = self._get_context()
context.setdefault('textinput', FeedParserDict())
self.intextinput = 1
self.title_depth = -1
self.push('textinput', 0)
_start_textInput = _start_textinput
def _end_textinput(self):
self.pop('textinput')
self.intextinput = 0
_end_textInput = _end_textinput
def _start_author(self, attrs_d):
self.inauthor = 1
self.push('author', 1)
# Append a new FeedParserDict when expecting an author
context = self._get_context()
context.setdefault('authors', [])
context['authors'].append(FeedParserDict())
_start_managingeditor = _start_author
def _end_author(self):
self.pop('author')
self.inauthor = 0
self._sync_author_detail()
_end_managingeditor = _end_author
def _start_contributor(self, attrs_d):
self.incontributor = 1
context = self._get_context()
context.setdefault('contributors', [])
context['contributors'].append(FeedParserDict())
self.push('contributor', 0)
def _end_contributor(self):
self.pop('contributor')
self.incontributor = 0
def _start_name(self, attrs_d):
self.push('name', 0)
def _end_name(self):
value = self.pop('name')
if self.inpublisher:
self._save_author('name', value, 'publisher')
elif self.inauthor:
self._save_author('name', value)
elif self.incontributor:
self._save_contributor('name', value)
elif self.intextinput:
context = self._get_context()
context['name'] = value
def _start_width(self, attrs_d):
self.push('width', 0)
def _end_width(self):
value = self.pop('width')
try:
value = int(value)
except ValueError:
value = 0
if self.inimage:
context = self._get_context()
context['width'] = value
def _start_height(self, attrs_d):
self.push('height', 0)
def _end_height(self):
value = self.pop('height')
try:
value = int(value)
except ValueError:
value = 0
if self.inimage:
context = self._get_context()
context['height'] = value
def _start_url(self, attrs_d):
self.push('href', 1)
_start_homepage = _start_url
_start_uri = _start_url
def _end_url(self):
value = self.pop('href')
if self.inauthor:
self._save_author('href', value)
elif self.incontributor:
self._save_contributor('href', value)
_end_homepage = _end_url
_end_uri = _end_url
def _start_email(self, attrs_d):
self.push('email', 0)
def _end_email(self):
value = self.pop('email')
if self.inpublisher:
self._save_author('email', value, 'publisher')
elif self.inauthor:
self._save_author('email', value)
elif self.incontributor:
self._save_contributor('email', value)
def _start_subtitle(self, attrs_d):
self.push_content('subtitle', attrs_d, 'text/plain', 1)
_start_tagline = _start_subtitle
def _end_subtitle(self):
self.pop_content('subtitle')
_end_tagline = _end_subtitle
def _start_rights(self, attrs_d):
self.push_content('rights', attrs_d, 'text/plain', 1)
_start_copyright = _start_rights
def _end_rights(self):
self.pop_content('rights')
_end_copyright = _end_rights
def _start_item(self, attrs_d):
self.entries.append(FeedParserDict())
self.push('item', 0)
self.inentry = 1
self.guidislink = 0
self.title_depth = -1
id = self._get_attribute(attrs_d, 'rdf:about')
if id:
context = self._get_context()
context['id'] = id
self._cdf_common(attrs_d)
_start_entry = _start_item
def _end_item(self):
self.pop('item')
self.inentry = 0
self.hasContent = 0
_end_entry = _end_item
def _start_language(self, attrs_d):
self.push('language', 1)
def _end_language(self):
self.lang = self.pop('language')
def _start_webmaster(self, attrs_d):
self.push('publisher', 1)
def _end_webmaster(self):
self.pop('publisher')
self._sync_author_detail('publisher')
def _start_published(self, attrs_d):
self.push('published', 1)
_start_issued = _start_published
_start_pubdate = _start_published
def _end_published(self):
value = self.pop('published')
self._save('published_parsed', _parse_date(value), overwrite=True)
_end_issued = _end_published
_end_pubdate = _end_published
def _start_updated(self, attrs_d):
self.push('updated', 1)
_start_modified = _start_updated
_start_lastbuilddate = _start_updated
def _end_updated(self):
value = self.pop('updated')
parsed_value = _parse_date(value)
self._save('updated_parsed', parsed_value, overwrite=True)
_end_modified = _end_updated
_end_lastbuilddate = _end_updated
def _start_created(self, attrs_d):
self.push('created', 1)
def _end_created(self):
value = self.pop('created')
self._save('created_parsed', _parse_date(value), overwrite=True)
def _start_expirationdate(self, attrs_d):
self.push('expired', 1)
def _end_expirationdate(self):
self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
def _start_category(self, attrs_d):
term = attrs_d.get('term')
scheme = attrs_d.get('scheme', attrs_d.get('domain'))
label = attrs_d.get('label')
self._add_tag(term, scheme, label)
self.push('category', 1)
_start_keywords = _start_category
def _end_category(self):
value = self.pop('category')
if not value:
return
context = self._get_context()
tags = context['tags']
if value and len(tags) and not tags[-1]['term']:
tags[-1]['term'] = value
else:
self._add_tag(value, None, None)
_end_keywords = _end_category
def _start_cloud(self, attrs_d):
self._get_context()['cloud'] = FeedParserDict(attrs_d)
def _start_link(self, attrs_d):
attrs_d.setdefault('rel', 'alternate')
if attrs_d['rel'] == 'self':
attrs_d.setdefault('type', 'application/atom+xml')
else:
attrs_d.setdefault('type', 'text/html')
context = self._get_context()
attrs_d = self._enforce_href(attrs_d)
if 'href' in attrs_d:
attrs_d['href'] = self.resolve_uri(attrs_d['href'])
expecting_text = self.infeed or self.inentry or self.insource
context.setdefault('links', [])
if not (self.inentry and self.inimage):
context['links'].append(FeedParserDict(attrs_d))
if 'href' in attrs_d:
if (
attrs_d.get('rel') == 'alternate'
and self.map_content_type(attrs_d.get('type')) in self.html_types
):
context['link'] = attrs_d['href']
else:
self.push('link', expecting_text)
def _end_link(self):
self.pop('link')
def _start_guid(self, attrs_d):
self.guidislink = (attrs_d.get('ispermalink', 'true') == 'true')
self.push('id', 1)
_start_id = _start_guid
def _end_guid(self):
value = self.pop('id')
self._save('guidislink', self.guidislink and 'link' not in self._get_context())
if self.guidislink:
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
# and only if the item doesn't already have a link element
self._save('link', value)
_end_id = _end_guid
def _start_title(self, attrs_d):
if self.svgOK:
return self.unknown_starttag('title', list(attrs_d.items()))
self.push_content('title', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
def _end_title(self):
if self.svgOK:
return
value = self.pop_content('title')
if not value:
return
self.title_depth = self.depth
def _start_description(self, attrs_d):
context = self._get_context()
if 'summary' in context and not self.hasContent:
self._summaryKey = 'content'
self._start_content(attrs_d)
else:
self.push_content('description', attrs_d, 'text/html', self.infeed or self.inentry or self.insource)
def _start_abstract(self, attrs_d):
self.push_content('description', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
def _end_description(self):
if self._summaryKey == 'content':
self._end_content()
else:
self.pop_content('description')
self._summaryKey = None
_end_abstract = _end_description
def _start_info(self, attrs_d):
self.push_content('info', attrs_d, 'text/plain', 1)
_start_feedburner_browserfriendly = _start_info
def _end_info(self):
self.pop_content('info')
_end_feedburner_browserfriendly = _end_info
def _start_generator(self, attrs_d):
if attrs_d:
attrs_d = self._enforce_href(attrs_d)
if 'href' in attrs_d:
attrs_d['href'] = self.resolve_uri(attrs_d['href'])
self._get_context()['generator_detail'] = FeedParserDict(attrs_d)
self.push('generator', 1)
def _end_generator(self):
value = self.pop('generator')
context = self._get_context()
if 'generator_detail' in context:
context['generator_detail']['name'] = value
def _start_summary(self, attrs_d):
context = self._get_context()
if 'summary' in context and not self.hasContent:
self._summaryKey = 'content'
self._start_content(attrs_d)
else:
self._summaryKey = 'summary'
self.push_content(self._summaryKey, attrs_d, 'text/plain', 1)
def _end_summary(self):
if self._summaryKey == 'content':
self._end_content()
else:
self.pop_content(self._summaryKey or 'summary')
self._summaryKey = None
def _start_enclosure(self, attrs_d):
attrs_d = self._enforce_href(attrs_d)
context = self._get_context()
attrs_d['rel'] = 'enclosure'
context.setdefault('links', []).append(FeedParserDict(attrs_d))
def _start_source(self, attrs_d):
if 'url' in attrs_d:
# This means that we're processing a source element from an RSS 2.0 feed
self.sourcedata['href'] = attrs_d['url']
self.push('source', 1)
self.insource = 1
self.title_depth = -1
def _end_source(self):
self.insource = 0
value = self.pop('source')
if value:
self.sourcedata['title'] = value
self._get_context()['source'] = copy.deepcopy(self.sourcedata)
self.sourcedata.clear()
def _start_content(self, attrs_d):
self.hasContent = 1
self.push_content('content', attrs_d, 'text/plain', 1)
src = attrs_d.get('src')
if src:
self.contentparams['src'] = src
self.push('content', 1)
def _start_body(self, attrs_d):
self.push_content('content', attrs_d, 'application/xhtml+xml', 1)
_start_xhtml_body = _start_body
def _start_content_encoded(self, attrs_d):
self.hasContent = 1
self.push_content('content', attrs_d, 'text/html', 1)
_start_fullitem = _start_content_encoded
def _end_content(self):
copyToSummary = self.map_content_type(self.contentparams.get('type')) in ({'text/plain'} | self.html_types)
value = self.pop_content('content')
if copyToSummary:
self._save('summary', value)
_end_body = _end_content
_end_xhtml_body = _end_content
_end_content_encoded = _end_content
_end_fullitem = _end_content
def _start_newlocation(self, attrs_d):
self.push('newlocation', 1)
def _end_newlocation(self):
url = self.pop('newlocation')
context = self._get_context()
# don't set newlocation if the context isn't right
if context is not self.feeddata:
return
context['newlocation'] = make_safe_absolute_uri(self.baseuri, url.strip())

View file

@ -0,0 +1,53 @@
# Support for the administrative elements extension
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from ..util import FeedParserDict
class Namespace(object):
# RDF Site Summary 1.0 Modules: Administrative
# http://web.resource.org/rss/1.0/modules/admin/
supported_namespaces = {
'http://webns.net/mvcb/': 'admin',
}
def _start_admin_generatoragent(self, attrs_d):
self.push('generator', 1)
value = self._get_attribute(attrs_d, 'rdf:resource')
if value:
self.elementstack[-1][2].append(value)
self.pop('generator')
self._get_context()['generator_detail'] = FeedParserDict({'href': value})
def _start_admin_errorreportsto(self, attrs_d):
self.push('errorreportsto', 1)
value = self._get_attribute(attrs_d, 'rdf:resource')
if value:
self.elementstack[-1][2].append(value)
self.pop('errorreportsto')

View file

@ -0,0 +1,69 @@
# Support for the Creative Commons licensing extensions
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from ..util import FeedParserDict
class Namespace(object):
supported_namespaces = {
# RDF-based namespace
'http://creativecommons.org/ns#license': 'cc',
# Old RDF-based namespace
'http://web.resource.org/cc/': 'cc',
# RSS-based namespace
'http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html': 'creativecommons',
# Old RSS-based namespace
'http://backend.userland.com/creativeCommonsRssModule': 'creativecommons',
}
def _start_cc_license(self, attrs_d):
context = self._get_context()
value = self._get_attribute(attrs_d, 'rdf:resource')
attrs_d = FeedParserDict()
attrs_d['rel'] = 'license'
if value:
attrs_d['href'] = value
context.setdefault('links', []).append(attrs_d)
def _start_creativecommons_license(self, attrs_d):
self.push('license', 1)
_start_creativeCommons_license = _start_creativecommons_license
def _end_creativecommons_license(self):
value = self.pop('license')
context = self._get_context()
attrs_d = FeedParserDict()
attrs_d['rel'] = 'license'
if value:
attrs_d['href'] = value
context.setdefault('links', []).append(attrs_d)
del context['license']
_end_creativeCommons_license = _end_creativecommons_license

View file

@ -0,0 +1,134 @@
# Support for the Dublin Core metadata extensions
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from ..datetimes import _parse_date
from ..util import FeedParserDict
class Namespace(object):
supported_namespaces = {
'http://purl.org/dc/elements/1.1/': 'dc',
'http://purl.org/dc/terms/': 'dcterms',
}
def _end_dc_author(self):
self._end_author()
def _end_dc_creator(self):
self._end_author()
def _end_dc_date(self):
self._end_updated()
def _end_dc_description(self):
self._end_description()
def _end_dc_language(self):
self._end_language()
def _end_dc_publisher(self):
self._end_webmaster()
def _end_dc_rights(self):
self._end_rights()
def _end_dc_subject(self):
self._end_category()
def _end_dc_title(self):
self._end_title()
def _end_dcterms_created(self):
self._end_created()
def _end_dcterms_issued(self):
self._end_published()
def _end_dcterms_modified(self):
self._end_updated()
def _start_dc_author(self, attrs_d):
self._start_author(attrs_d)
def _start_dc_creator(self, attrs_d):
self._start_author(attrs_d)
def _start_dc_date(self, attrs_d):
self._start_updated(attrs_d)
def _start_dc_description(self, attrs_d):
self._start_description(attrs_d)
def _start_dc_language(self, attrs_d):
self._start_language(attrs_d)
def _start_dc_publisher(self, attrs_d):
self._start_webmaster(attrs_d)
def _start_dc_rights(self, attrs_d):
self._start_rights(attrs_d)
def _start_dc_subject(self, attrs_d):
self._start_category(attrs_d)
def _start_dc_title(self, attrs_d):
self._start_title(attrs_d)
def _start_dcterms_created(self, attrs_d):
self._start_created(attrs_d)
def _start_dcterms_issued(self, attrs_d):
self._start_published(attrs_d)
def _start_dcterms_modified(self, attrs_d):
self._start_updated(attrs_d)
def _start_dcterms_valid(self, attrs_d):
self.push('validity', 1)
def _end_dcterms_valid(self):
for validity_detail in self.pop('validity').split(';'):
if '=' in validity_detail:
key, value = validity_detail.split('=', 1)
if key == 'start':
self._save('validity_start', value, overwrite=True)
self._save('validity_start_parsed', _parse_date(value), overwrite=True)
elif key == 'end':
self._save('validity_end', value, overwrite=True)
self._save('validity_end_parsed', _parse_date(value), overwrite=True)
def _start_dc_contributor(self, attrs_d):
self.incontributor = 1
context = self._get_context()
context.setdefault('contributors', [])
context['contributors'].append(FeedParserDict())
self.push('name', 0)
def _end_dc_contributor(self):
self._end_name()
self.incontributor = 0

View file

@ -0,0 +1,278 @@
# Support for the GeoRSS format
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# Required for Python 3.6 compatibility.
from __future__ import generator_stop
from ..util import FeedParserDict
class Namespace(object):
supported_namespaces = {
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
'http://www.georss.org/georss': 'georss',
'http://www.opengis.net/gml': 'gml',
}
def __init__(self):
self.ingeometry = 0
super(Namespace, self).__init__()
def _start_georssgeom(self, attrs_d):
self.push('geometry', 0)
context = self._get_context()
context['where'] = FeedParserDict()
_start_georss_point = _start_georssgeom
_start_georss_line = _start_georssgeom
_start_georss_polygon = _start_georssgeom
_start_georss_box = _start_georssgeom
def _save_where(self, geometry):
context = self._get_context()
context['where'].update(geometry)
def _end_georss_point(self):
geometry = _parse_georss_point(self.pop('geometry'))
if geometry:
self._save_where(geometry)
def _end_georss_line(self):
geometry = _parse_georss_line(self.pop('geometry'))
if geometry:
self._save_where(geometry)
def _end_georss_polygon(self):
this = self.pop('geometry')
geometry = _parse_georss_polygon(this)
if geometry:
self._save_where(geometry)
def _end_georss_box(self):
geometry = _parse_georss_box(self.pop('geometry'))
if geometry:
self._save_where(geometry)
def _start_where(self, attrs_d):
self.push('where', 0)
context = self._get_context()
context['where'] = FeedParserDict()
_start_georss_where = _start_where
def _parse_srs_attrs(self, attrs_d):
srs_name = attrs_d.get('srsname')
try:
srs_dimension = int(attrs_d.get('srsdimension', '2'))
except ValueError:
srs_dimension = 2
context = self._get_context()
if 'where' not in context:
context['where'] = {}
context['where']['srsName'] = srs_name
context['where']['srsDimension'] = srs_dimension
def _start_gml_point(self, attrs_d):
self._parse_srs_attrs(attrs_d)
self.ingeometry = 1
self.push('geometry', 0)
def _start_gml_linestring(self, attrs_d):
self._parse_srs_attrs(attrs_d)
self.ingeometry = 'linestring'
self.push('geometry', 0)
def _start_gml_polygon(self, attrs_d):
self._parse_srs_attrs(attrs_d)
self.push('geometry', 0)
def _start_gml_exterior(self, attrs_d):
self.push('geometry', 0)
def _start_gml_linearring(self, attrs_d):
self.ingeometry = 'polygon'
self.push('geometry', 0)
def _start_gml_pos(self, attrs_d):
self.push('pos', 0)
def _end_gml_pos(self):
this = self.pop('pos')
context = self._get_context()
srs_name = context['where'].get('srsName')
srs_dimension = context['where'].get('srsDimension', 2)
swap = True
if srs_name and "EPSG" in srs_name:
epsg = int(srs_name.split(":")[-1])
swap = bool(epsg in _geogCS)
geometry = _parse_georss_point(this, swap=swap, dims=srs_dimension)
if geometry:
self._save_where(geometry)
def _start_gml_poslist(self, attrs_d):
self.push('pos', 0)
def _end_gml_poslist(self):
this = self.pop('pos')
context = self._get_context()
srs_name = context['where'].get('srsName')
srs_dimension = context['where'].get('srsDimension', 2)
swap = True
if srs_name and "EPSG" in srs_name:
epsg = int(srs_name.split(":")[-1])
swap = bool(epsg in _geogCS)
geometry = _parse_poslist(
this, self.ingeometry, swap=swap, dims=srs_dimension)
if geometry:
self._save_where(geometry)
def _end_geom(self):
self.ingeometry = 0
self.pop('geometry')
_end_gml_point = _end_geom
_end_gml_linestring = _end_geom
_end_gml_linearring = _end_geom
_end_gml_exterior = _end_geom
_end_gml_polygon = _end_geom
def _end_where(self):
self.pop('where')
_end_georss_where = _end_where
# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
# items, or None in the case of a parsing error.
def _parse_poslist(value, geom_type, swap=True, dims=2):
if geom_type == 'linestring':
return _parse_georss_line(value, swap, dims)
elif geom_type == 'polygon':
ring = _parse_georss_line(value, swap, dims)
return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
else:
return None
def _gen_georss_coords(value, swap=True, dims=2):
# A generator of (lon, lat) pairs from a string of encoded GeoRSS
# coordinates. Converts to floats and swaps order.
latlons = (float(ll) for ll in value.replace(',', ' ').split())
while True:
try:
t = [next(latlons), next(latlons)][::swap and -1 or 1]
if dims == 3:
t.append(next(latlons))
yield tuple(t)
except StopIteration:
return
def _parse_georss_point(value, swap=True, dims=2):
# A point contains a single latitude-longitude pair, separated by
# whitespace. We'll also handle comma separators.
try:
coords = list(_gen_georss_coords(value, swap, dims))
return {'type': 'Point', 'coordinates': coords[0]}
except (IndexError, ValueError):
return None
def _parse_georss_line(value, swap=True, dims=2):
# A line contains a space separated list of latitude-longitude pairs in
# WGS84 coordinate reference system, with each pair separated by
# whitespace. There must be at least two pairs.
try:
coords = list(_gen_georss_coords(value, swap, dims))
return {'type': 'LineString', 'coordinates': coords}
except (IndexError, ValueError):
return None
def _parse_georss_polygon(value, swap=True, dims=2):
# A polygon contains a space separated list of latitude-longitude pairs,
# with each pair separated by whitespace. There must be at least four
# pairs, with the last being identical to the first (so a polygon has a
# minimum of three actual points).
try:
ring = list(_gen_georss_coords(value, swap, dims))
except (IndexError, ValueError):
return None
if len(ring) < 4:
return None
return {'type': 'Polygon', 'coordinates': (ring,)}
def _parse_georss_box(value, swap=True, dims=2):
# A bounding box is a rectangular region, often used to define the extents
# of a map or a rough area of interest. A box contains two space separate
# latitude-longitude pairs, with each pair separated by whitespace. The
# first pair is the lower corner, the second is the upper corner.
try:
coords = list(_gen_georss_coords(value, swap, dims))
return {'type': 'Box', 'coordinates': tuple(coords)}
except (IndexError, ValueError):
return None
# The list of EPSG codes for geographic (latitude/longitude) coordinate
# systems to support decoding of GeoRSS GML profiles.
_geogCS = [
3819, 3821, 3824, 3889, 3906, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008,
4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4018, 4019, 4020, 4021, 4022,
4023, 4024, 4025, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036,
4041, 4042, 4043, 4044, 4045, 4046, 4047, 4052, 4053, 4054, 4055, 4075, 4081,
4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132,
4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145,
4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158,
4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171,
4172, 4173, 4174, 4175, 4176, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185,
4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200,
4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213,
4214, 4215, 4216, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227,
4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240,
4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253,
4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266,
4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279,
4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4291, 4292, 4293,
4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4306, 4307,
4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4322,
4324, 4326, 4463, 4470, 4475, 4483, 4490, 4555, 4558, 4600, 4601, 4602, 4603,
4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616,
4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629,
4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642,
4643, 4644, 4645, 4646, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665,
4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678,
4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691,
4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704,
4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717,
4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730,
4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743,
4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756,
4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4801, 4802, 4803, 4804,
4805, 4806, 4807, 4808, 4809, 4810, 4811, 4813, 4814, 4815, 4816, 4817, 4818,
4819, 4820, 4821, 4823, 4824, 4901, 4902, 4903, 4904, 4979,
]

View file

@ -0,0 +1,109 @@
# Support for the iTunes format
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from ..util import FeedParserDict
class Namespace(object):
supported_namespaces = {
# Canonical namespace
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
# Extra namespace
'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes',
}
def _start_itunes_author(self, attrs_d):
self._start_author(attrs_d)
def _end_itunes_author(self):
self._end_author()
def _end_itunes_category(self):
self._end_category()
def _start_itunes_name(self, attrs_d):
self._start_name(attrs_d)
def _end_itunes_name(self):
self._end_name()
def _start_itunes_email(self, attrs_d):
self._start_email(attrs_d)
def _end_itunes_email(self):
self._end_email()
def _start_itunes_subtitle(self, attrs_d):
self._start_subtitle(attrs_d)
def _end_itunes_subtitle(self):
self._end_subtitle()
def _start_itunes_summary(self, attrs_d):
self._start_summary(attrs_d)
def _end_itunes_summary(self):
self._end_summary()
def _start_itunes_owner(self, attrs_d):
self.inpublisher = 1
self.push('publisher', 0)
def _end_itunes_owner(self):
self.pop('publisher')
self.inpublisher = 0
self._sync_author_detail('publisher')
def _end_itunes_keywords(self):
for term in self.pop('itunes_keywords').split(','):
if term.strip():
self._add_tag(term.strip(), 'http://www.itunes.com/', None)
def _start_itunes_category(self, attrs_d):
self._add_tag(attrs_d.get('text'), 'http://www.itunes.com/', None)
self.push('category', 1)
def _start_itunes_image(self, attrs_d):
self.push('itunes_image', 0)
if attrs_d.get('href'):
self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('href')})
elif attrs_d.get('url'):
self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('url')})
_start_itunes_link = _start_itunes_image
def _end_itunes_block(self):
value = self.pop('itunes_block', 0)
self._get_context()['itunes_block'] = (value == 'yes' or value == 'Yes') and 1 or 0
def _end_itunes_explicit(self):
value = self.pop('itunes_explicit', 0)
# Convert 'yes' -> True, 'clean' to False, and any other value to None
# False and None both evaluate as False, so the difference can be ignored
# by applications that only need to know if the content is explicit.
self._get_context()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0]

View file

@ -0,0 +1,141 @@
# Support for the Media RSS format
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from ..util import FeedParserDict
class Namespace(object):
supported_namespaces = {
# Canonical namespace
'http://search.yahoo.com/mrss/': 'media',
# Old namespace (no trailing slash)
'http://search.yahoo.com/mrss': 'media',
}
def _start_media_category(self, attrs_d):
attrs_d.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
self._start_category(attrs_d)
def _end_media_category(self):
self._end_category()
def _end_media_keywords(self):
for term in self.pop('media_keywords').split(','):
if term.strip():
self._add_tag(term.strip(), None, None)
def _start_media_title(self, attrs_d):
self._start_title(attrs_d)
def _end_media_title(self):
title_depth = self.title_depth
self._end_title()
self.title_depth = title_depth
def _start_media_group(self, attrs_d):
# don't do anything, but don't break the enclosed tags either
pass
def _start_media_rating(self, attrs_d):
context = self._get_context()
context.setdefault('media_rating', attrs_d)
self.push('rating', 1)
def _end_media_rating(self):
rating = self.pop('rating')
if rating is not None and rating.strip():
context = self._get_context()
context['media_rating']['content'] = rating
def _start_media_credit(self, attrs_d):
context = self._get_context()
context.setdefault('media_credit', [])
context['media_credit'].append(attrs_d)
self.push('credit', 1)
def _end_media_credit(self):
credit = self.pop('credit')
if credit is not None and credit.strip():
context = self._get_context()
context['media_credit'][-1]['content'] = credit
def _start_media_description(self, attrs_d):
self._start_description(attrs_d)
def _end_media_description(self):
self._end_description()
def _start_media_restriction(self, attrs_d):
context = self._get_context()
context.setdefault('media_restriction', attrs_d)
self.push('restriction', 1)
def _end_media_restriction(self):
restriction = self.pop('restriction')
if restriction is not None and restriction.strip():
context = self._get_context()
context['media_restriction']['content'] = [cc.strip().lower() for cc in restriction.split(' ')]
def _start_media_license(self, attrs_d):
context = self._get_context()
context.setdefault('media_license', attrs_d)
self.push('license', 1)
def _end_media_license(self):
license_ = self.pop('license')
if license_ is not None and license_.strip():
context = self._get_context()
context['media_license']['content'] = license_
def _start_media_content(self, attrs_d):
context = self._get_context()
context.setdefault('media_content', [])
context['media_content'].append(attrs_d)
def _start_media_thumbnail(self, attrs_d):
context = self._get_context()
context.setdefault('media_thumbnail', [])
self.push('url', 1) # new
context['media_thumbnail'].append(attrs_d)
def _end_media_thumbnail(self):
url = self.pop('url')
context = self._get_context()
if url is not None and url.strip():
if 'url' not in context['media_thumbnail'][-1]:
context['media_thumbnail'][-1]['url'] = url
def _start_media_player(self, attrs_d):
self.push('media_player', 0)
self._get_context()['media_player'] = FeedParserDict(attrs_d)
def _end_media_player(self):
value = self.pop('media_player')
context = self._get_context()
context['media_player']['content'] = value

View file

@ -0,0 +1,74 @@
# Support for the Podlove Simple Chapters format
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import datetime
import re
from .. import util
class Namespace(object):
supported_namespaces = {
'http://podlove.org/simple-chapters': 'psc',
}
def __init__(self):
# chapters will only be captured while psc_chapters_flag is True.
self.psc_chapters_flag = False
super(Namespace, self).__init__()
def _start_psc_chapters(self, attrs_d):
context = self._get_context()
if 'psc_chapters' not in context:
self.psc_chapters_flag = True
attrs_d['chapters'] = []
context['psc_chapters'] = util.FeedParserDict(attrs_d)
def _end_psc_chapters(self):
self.psc_chapters_flag = False
def _start_psc_chapter(self, attrs_d):
if self.psc_chapters_flag:
start = self._get_attribute(attrs_d, 'start')
attrs_d['start_parsed'] = _parse_psc_chapter_start(start)
context = self._get_context()['psc_chapters']
context['chapters'].append(util.FeedParserDict(attrs_d))
format_ = re.compile(r'^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$')
def _parse_psc_chapter_start(start):
m = format_.match(start)
if m is None:
return None
_, h, m, s, _, ms = m.groups()
h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
return datetime.timedelta(0, h*60*60 + m*60 + s, ms*1000)