Primer commit del proyecto RSS
This commit is contained in:
commit
27c9515d29
1568 changed files with 252311 additions and 0 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
506
venv/lib/python3.12/site-packages/feedparser/namespaces/_base.py
Normal file
506
venv/lib/python3.12/site-packages/feedparser/namespaces/_base.py
Normal file
|
|
@ -0,0 +1,506 @@
|
|||
# Support for the Atom, RSS, RDF, and CDF feed formats
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import copy
|
||||
|
||||
from ..datetimes import _parse_date
|
||||
from ..urls import make_safe_absolute_uri
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
"""Support for the Atom, RSS, RDF, and CDF feed formats.
|
||||
|
||||
The feed formats all share common elements, some of which have conflicting
|
||||
interpretations. For simplicity, all of the base feed format support is
|
||||
collected here.
|
||||
"""
|
||||
|
||||
supported_namespaces = {
|
||||
'': '',
|
||||
'http://backend.userland.com/rss': '',
|
||||
'http://blogs.law.harvard.edu/tech/rss': '',
|
||||
'http://purl.org/rss/1.0/': '',
|
||||
'http://my.netscape.com/rdf/simple/0.9/': '',
|
||||
'http://example.com/newformat#': '',
|
||||
'http://example.com/necho': '',
|
||||
'http://purl.org/echo/': '',
|
||||
'uri/of/echo/namespace#': '',
|
||||
'http://purl.org/pie/': '',
|
||||
'http://purl.org/atom/ns#': '',
|
||||
'http://www.w3.org/2005/Atom': '',
|
||||
'http://purl.org/rss/1.0/modules/rss091#': '',
|
||||
}
|
||||
|
||||
def _start_rss(self, attrs_d):
|
||||
versionmap = {
|
||||
'0.91': 'rss091u',
|
||||
'0.92': 'rss092',
|
||||
'0.93': 'rss093',
|
||||
'0.94': 'rss094',
|
||||
}
|
||||
|
||||
# If we're here then this is an RSS feed.
|
||||
# If we don't have a version or have a version that starts with something
|
||||
# other than RSS then there's been a mistake. Correct it.
|
||||
if not self.version or not self.version.startswith('rss'):
|
||||
attr_version = attrs_d.get('version', '')
|
||||
version = versionmap.get(attr_version)
|
||||
if version:
|
||||
self.version = version
|
||||
elif attr_version.startswith('2.'):
|
||||
self.version = 'rss20'
|
||||
else:
|
||||
self.version = 'rss'
|
||||
|
||||
def _start_channel(self, attrs_d):
|
||||
self.infeed = 1
|
||||
self._cdf_common(attrs_d)
|
||||
|
||||
def _cdf_common(self, attrs_d):
|
||||
if 'lastmod' in attrs_d:
|
||||
self._start_modified({})
|
||||
self.elementstack[-1][-1] = attrs_d['lastmod']
|
||||
self._end_modified()
|
||||
if 'href' in attrs_d:
|
||||
self._start_link({})
|
||||
self.elementstack[-1][-1] = attrs_d['href']
|
||||
self._end_link()
|
||||
|
||||
def _start_feed(self, attrs_d):
|
||||
self.infeed = 1
|
||||
versionmap = {'0.1': 'atom01',
|
||||
'0.2': 'atom02',
|
||||
'0.3': 'atom03'}
|
||||
if not self.version:
|
||||
attr_version = attrs_d.get('version')
|
||||
version = versionmap.get(attr_version)
|
||||
if version:
|
||||
self.version = version
|
||||
else:
|
||||
self.version = 'atom'
|
||||
|
||||
def _end_channel(self):
|
||||
self.infeed = 0
|
||||
_end_feed = _end_channel
|
||||
|
||||
def _start_image(self, attrs_d):
|
||||
context = self._get_context()
|
||||
if not self.inentry:
|
||||
context.setdefault('image', FeedParserDict())
|
||||
self.inimage = 1
|
||||
self.title_depth = -1
|
||||
self.push('image', 0)
|
||||
|
||||
def _end_image(self):
|
||||
self.pop('image')
|
||||
self.inimage = 0
|
||||
|
||||
def _start_textinput(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('textinput', FeedParserDict())
|
||||
self.intextinput = 1
|
||||
self.title_depth = -1
|
||||
self.push('textinput', 0)
|
||||
_start_textInput = _start_textinput
|
||||
|
||||
def _end_textinput(self):
|
||||
self.pop('textinput')
|
||||
self.intextinput = 0
|
||||
_end_textInput = _end_textinput
|
||||
|
||||
def _start_author(self, attrs_d):
|
||||
self.inauthor = 1
|
||||
self.push('author', 1)
|
||||
# Append a new FeedParserDict when expecting an author
|
||||
context = self._get_context()
|
||||
context.setdefault('authors', [])
|
||||
context['authors'].append(FeedParserDict())
|
||||
_start_managingeditor = _start_author
|
||||
|
||||
def _end_author(self):
|
||||
self.pop('author')
|
||||
self.inauthor = 0
|
||||
self._sync_author_detail()
|
||||
_end_managingeditor = _end_author
|
||||
|
||||
def _start_contributor(self, attrs_d):
|
||||
self.incontributor = 1
|
||||
context = self._get_context()
|
||||
context.setdefault('contributors', [])
|
||||
context['contributors'].append(FeedParserDict())
|
||||
self.push('contributor', 0)
|
||||
|
||||
def _end_contributor(self):
|
||||
self.pop('contributor')
|
||||
self.incontributor = 0
|
||||
|
||||
def _start_name(self, attrs_d):
|
||||
self.push('name', 0)
|
||||
|
||||
def _end_name(self):
|
||||
value = self.pop('name')
|
||||
if self.inpublisher:
|
||||
self._save_author('name', value, 'publisher')
|
||||
elif self.inauthor:
|
||||
self._save_author('name', value)
|
||||
elif self.incontributor:
|
||||
self._save_contributor('name', value)
|
||||
elif self.intextinput:
|
||||
context = self._get_context()
|
||||
context['name'] = value
|
||||
|
||||
def _start_width(self, attrs_d):
|
||||
self.push('width', 0)
|
||||
|
||||
def _end_width(self):
|
||||
value = self.pop('width')
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
value = 0
|
||||
if self.inimage:
|
||||
context = self._get_context()
|
||||
context['width'] = value
|
||||
|
||||
def _start_height(self, attrs_d):
|
||||
self.push('height', 0)
|
||||
|
||||
def _end_height(self):
|
||||
value = self.pop('height')
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
value = 0
|
||||
if self.inimage:
|
||||
context = self._get_context()
|
||||
context['height'] = value
|
||||
|
||||
def _start_url(self, attrs_d):
|
||||
self.push('href', 1)
|
||||
_start_homepage = _start_url
|
||||
_start_uri = _start_url
|
||||
|
||||
def _end_url(self):
|
||||
value = self.pop('href')
|
||||
if self.inauthor:
|
||||
self._save_author('href', value)
|
||||
elif self.incontributor:
|
||||
self._save_contributor('href', value)
|
||||
_end_homepage = _end_url
|
||||
_end_uri = _end_url
|
||||
|
||||
def _start_email(self, attrs_d):
|
||||
self.push('email', 0)
|
||||
|
||||
def _end_email(self):
|
||||
value = self.pop('email')
|
||||
if self.inpublisher:
|
||||
self._save_author('email', value, 'publisher')
|
||||
elif self.inauthor:
|
||||
self._save_author('email', value)
|
||||
elif self.incontributor:
|
||||
self._save_contributor('email', value)
|
||||
|
||||
def _start_subtitle(self, attrs_d):
|
||||
self.push_content('subtitle', attrs_d, 'text/plain', 1)
|
||||
_start_tagline = _start_subtitle
|
||||
|
||||
def _end_subtitle(self):
|
||||
self.pop_content('subtitle')
|
||||
_end_tagline = _end_subtitle
|
||||
|
||||
def _start_rights(self, attrs_d):
|
||||
self.push_content('rights', attrs_d, 'text/plain', 1)
|
||||
_start_copyright = _start_rights
|
||||
|
||||
def _end_rights(self):
|
||||
self.pop_content('rights')
|
||||
_end_copyright = _end_rights
|
||||
|
||||
def _start_item(self, attrs_d):
|
||||
self.entries.append(FeedParserDict())
|
||||
self.push('item', 0)
|
||||
self.inentry = 1
|
||||
self.guidislink = 0
|
||||
self.title_depth = -1
|
||||
id = self._get_attribute(attrs_d, 'rdf:about')
|
||||
if id:
|
||||
context = self._get_context()
|
||||
context['id'] = id
|
||||
self._cdf_common(attrs_d)
|
||||
_start_entry = _start_item
|
||||
|
||||
def _end_item(self):
|
||||
self.pop('item')
|
||||
self.inentry = 0
|
||||
self.hasContent = 0
|
||||
_end_entry = _end_item
|
||||
|
||||
def _start_language(self, attrs_d):
|
||||
self.push('language', 1)
|
||||
|
||||
def _end_language(self):
|
||||
self.lang = self.pop('language')
|
||||
|
||||
def _start_webmaster(self, attrs_d):
|
||||
self.push('publisher', 1)
|
||||
|
||||
def _end_webmaster(self):
|
||||
self.pop('publisher')
|
||||
self._sync_author_detail('publisher')
|
||||
|
||||
def _start_published(self, attrs_d):
|
||||
self.push('published', 1)
|
||||
_start_issued = _start_published
|
||||
_start_pubdate = _start_published
|
||||
|
||||
def _end_published(self):
|
||||
value = self.pop('published')
|
||||
self._save('published_parsed', _parse_date(value), overwrite=True)
|
||||
_end_issued = _end_published
|
||||
_end_pubdate = _end_published
|
||||
|
||||
def _start_updated(self, attrs_d):
|
||||
self.push('updated', 1)
|
||||
_start_modified = _start_updated
|
||||
_start_lastbuilddate = _start_updated
|
||||
|
||||
def _end_updated(self):
|
||||
value = self.pop('updated')
|
||||
parsed_value = _parse_date(value)
|
||||
self._save('updated_parsed', parsed_value, overwrite=True)
|
||||
_end_modified = _end_updated
|
||||
_end_lastbuilddate = _end_updated
|
||||
|
||||
def _start_created(self, attrs_d):
|
||||
self.push('created', 1)
|
||||
|
||||
def _end_created(self):
|
||||
value = self.pop('created')
|
||||
self._save('created_parsed', _parse_date(value), overwrite=True)
|
||||
|
||||
def _start_expirationdate(self, attrs_d):
|
||||
self.push('expired', 1)
|
||||
|
||||
def _end_expirationdate(self):
|
||||
self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
|
||||
|
||||
def _start_category(self, attrs_d):
|
||||
term = attrs_d.get('term')
|
||||
scheme = attrs_d.get('scheme', attrs_d.get('domain'))
|
||||
label = attrs_d.get('label')
|
||||
self._add_tag(term, scheme, label)
|
||||
self.push('category', 1)
|
||||
_start_keywords = _start_category
|
||||
|
||||
def _end_category(self):
|
||||
value = self.pop('category')
|
||||
if not value:
|
||||
return
|
||||
context = self._get_context()
|
||||
tags = context['tags']
|
||||
if value and len(tags) and not tags[-1]['term']:
|
||||
tags[-1]['term'] = value
|
||||
else:
|
||||
self._add_tag(value, None, None)
|
||||
_end_keywords = _end_category
|
||||
|
||||
def _start_cloud(self, attrs_d):
|
||||
self._get_context()['cloud'] = FeedParserDict(attrs_d)
|
||||
|
||||
def _start_link(self, attrs_d):
|
||||
attrs_d.setdefault('rel', 'alternate')
|
||||
if attrs_d['rel'] == 'self':
|
||||
attrs_d.setdefault('type', 'application/atom+xml')
|
||||
else:
|
||||
attrs_d.setdefault('type', 'text/html')
|
||||
context = self._get_context()
|
||||
attrs_d = self._enforce_href(attrs_d)
|
||||
if 'href' in attrs_d:
|
||||
attrs_d['href'] = self.resolve_uri(attrs_d['href'])
|
||||
expecting_text = self.infeed or self.inentry or self.insource
|
||||
context.setdefault('links', [])
|
||||
if not (self.inentry and self.inimage):
|
||||
context['links'].append(FeedParserDict(attrs_d))
|
||||
if 'href' in attrs_d:
|
||||
if (
|
||||
attrs_d.get('rel') == 'alternate'
|
||||
and self.map_content_type(attrs_d.get('type')) in self.html_types
|
||||
):
|
||||
context['link'] = attrs_d['href']
|
||||
else:
|
||||
self.push('link', expecting_text)
|
||||
|
||||
def _end_link(self):
|
||||
self.pop('link')
|
||||
|
||||
def _start_guid(self, attrs_d):
|
||||
self.guidislink = (attrs_d.get('ispermalink', 'true') == 'true')
|
||||
self.push('id', 1)
|
||||
_start_id = _start_guid
|
||||
|
||||
def _end_guid(self):
|
||||
value = self.pop('id')
|
||||
self._save('guidislink', self.guidislink and 'link' not in self._get_context())
|
||||
if self.guidislink:
|
||||
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
|
||||
# and only if the item doesn't already have a link element
|
||||
self._save('link', value)
|
||||
_end_id = _end_guid
|
||||
|
||||
def _start_title(self, attrs_d):
|
||||
if self.svgOK:
|
||||
return self.unknown_starttag('title', list(attrs_d.items()))
|
||||
self.push_content('title', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
|
||||
|
||||
def _end_title(self):
|
||||
if self.svgOK:
|
||||
return
|
||||
value = self.pop_content('title')
|
||||
if not value:
|
||||
return
|
||||
self.title_depth = self.depth
|
||||
|
||||
def _start_description(self, attrs_d):
|
||||
context = self._get_context()
|
||||
if 'summary' in context and not self.hasContent:
|
||||
self._summaryKey = 'content'
|
||||
self._start_content(attrs_d)
|
||||
else:
|
||||
self.push_content('description', attrs_d, 'text/html', self.infeed or self.inentry or self.insource)
|
||||
|
||||
def _start_abstract(self, attrs_d):
|
||||
self.push_content('description', attrs_d, 'text/plain', self.infeed or self.inentry or self.insource)
|
||||
|
||||
def _end_description(self):
|
||||
if self._summaryKey == 'content':
|
||||
self._end_content()
|
||||
else:
|
||||
self.pop_content('description')
|
||||
self._summaryKey = None
|
||||
_end_abstract = _end_description
|
||||
|
||||
def _start_info(self, attrs_d):
|
||||
self.push_content('info', attrs_d, 'text/plain', 1)
|
||||
_start_feedburner_browserfriendly = _start_info
|
||||
|
||||
def _end_info(self):
|
||||
self.pop_content('info')
|
||||
_end_feedburner_browserfriendly = _end_info
|
||||
|
||||
def _start_generator(self, attrs_d):
|
||||
if attrs_d:
|
||||
attrs_d = self._enforce_href(attrs_d)
|
||||
if 'href' in attrs_d:
|
||||
attrs_d['href'] = self.resolve_uri(attrs_d['href'])
|
||||
self._get_context()['generator_detail'] = FeedParserDict(attrs_d)
|
||||
self.push('generator', 1)
|
||||
|
||||
def _end_generator(self):
|
||||
value = self.pop('generator')
|
||||
context = self._get_context()
|
||||
if 'generator_detail' in context:
|
||||
context['generator_detail']['name'] = value
|
||||
|
||||
def _start_summary(self, attrs_d):
|
||||
context = self._get_context()
|
||||
if 'summary' in context and not self.hasContent:
|
||||
self._summaryKey = 'content'
|
||||
self._start_content(attrs_d)
|
||||
else:
|
||||
self._summaryKey = 'summary'
|
||||
self.push_content(self._summaryKey, attrs_d, 'text/plain', 1)
|
||||
|
||||
def _end_summary(self):
|
||||
if self._summaryKey == 'content':
|
||||
self._end_content()
|
||||
else:
|
||||
self.pop_content(self._summaryKey or 'summary')
|
||||
self._summaryKey = None
|
||||
|
||||
def _start_enclosure(self, attrs_d):
|
||||
attrs_d = self._enforce_href(attrs_d)
|
||||
context = self._get_context()
|
||||
attrs_d['rel'] = 'enclosure'
|
||||
context.setdefault('links', []).append(FeedParserDict(attrs_d))
|
||||
|
||||
def _start_source(self, attrs_d):
|
||||
if 'url' in attrs_d:
|
||||
# This means that we're processing a source element from an RSS 2.0 feed
|
||||
self.sourcedata['href'] = attrs_d['url']
|
||||
self.push('source', 1)
|
||||
self.insource = 1
|
||||
self.title_depth = -1
|
||||
|
||||
def _end_source(self):
|
||||
self.insource = 0
|
||||
value = self.pop('source')
|
||||
if value:
|
||||
self.sourcedata['title'] = value
|
||||
self._get_context()['source'] = copy.deepcopy(self.sourcedata)
|
||||
self.sourcedata.clear()
|
||||
|
||||
def _start_content(self, attrs_d):
|
||||
self.hasContent = 1
|
||||
self.push_content('content', attrs_d, 'text/plain', 1)
|
||||
src = attrs_d.get('src')
|
||||
if src:
|
||||
self.contentparams['src'] = src
|
||||
self.push('content', 1)
|
||||
|
||||
def _start_body(self, attrs_d):
|
||||
self.push_content('content', attrs_d, 'application/xhtml+xml', 1)
|
||||
_start_xhtml_body = _start_body
|
||||
|
||||
def _start_content_encoded(self, attrs_d):
|
||||
self.hasContent = 1
|
||||
self.push_content('content', attrs_d, 'text/html', 1)
|
||||
_start_fullitem = _start_content_encoded
|
||||
|
||||
def _end_content(self):
|
||||
copyToSummary = self.map_content_type(self.contentparams.get('type')) in ({'text/plain'} | self.html_types)
|
||||
value = self.pop_content('content')
|
||||
if copyToSummary:
|
||||
self._save('summary', value)
|
||||
|
||||
_end_body = _end_content
|
||||
_end_xhtml_body = _end_content
|
||||
_end_content_encoded = _end_content
|
||||
_end_fullitem = _end_content
|
||||
|
||||
def _start_newlocation(self, attrs_d):
|
||||
self.push('newlocation', 1)
|
||||
|
||||
def _end_newlocation(self):
|
||||
url = self.pop('newlocation')
|
||||
context = self._get_context()
|
||||
# don't set newlocation if the context isn't right
|
||||
if context is not self.feeddata:
|
||||
return
|
||||
context['newlocation'] = make_safe_absolute_uri(self.baseuri, url.strip())
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
# Support for the administrative elements extension
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
# RDF Site Summary 1.0 Modules: Administrative
|
||||
# http://web.resource.org/rss/1.0/modules/admin/
|
||||
|
||||
supported_namespaces = {
|
||||
'http://webns.net/mvcb/': 'admin',
|
||||
}
|
||||
|
||||
def _start_admin_generatoragent(self, attrs_d):
|
||||
self.push('generator', 1)
|
||||
value = self._get_attribute(attrs_d, 'rdf:resource')
|
||||
if value:
|
||||
self.elementstack[-1][2].append(value)
|
||||
self.pop('generator')
|
||||
self._get_context()['generator_detail'] = FeedParserDict({'href': value})
|
||||
|
||||
def _start_admin_errorreportsto(self, attrs_d):
|
||||
self.push('errorreportsto', 1)
|
||||
value = self._get_attribute(attrs_d, 'rdf:resource')
|
||||
if value:
|
||||
self.elementstack[-1][2].append(value)
|
||||
self.pop('errorreportsto')
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
# Support for the Creative Commons licensing extensions
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
# RDF-based namespace
|
||||
'http://creativecommons.org/ns#license': 'cc',
|
||||
|
||||
# Old RDF-based namespace
|
||||
'http://web.resource.org/cc/': 'cc',
|
||||
|
||||
# RSS-based namespace
|
||||
'http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html': 'creativecommons',
|
||||
|
||||
# Old RSS-based namespace
|
||||
'http://backend.userland.com/creativeCommonsRssModule': 'creativecommons',
|
||||
}
|
||||
|
||||
def _start_cc_license(self, attrs_d):
|
||||
context = self._get_context()
|
||||
value = self._get_attribute(attrs_d, 'rdf:resource')
|
||||
attrs_d = FeedParserDict()
|
||||
attrs_d['rel'] = 'license'
|
||||
if value:
|
||||
attrs_d['href'] = value
|
||||
context.setdefault('links', []).append(attrs_d)
|
||||
|
||||
def _start_creativecommons_license(self, attrs_d):
|
||||
self.push('license', 1)
|
||||
_start_creativeCommons_license = _start_creativecommons_license
|
||||
|
||||
def _end_creativecommons_license(self):
|
||||
value = self.pop('license')
|
||||
context = self._get_context()
|
||||
attrs_d = FeedParserDict()
|
||||
attrs_d['rel'] = 'license'
|
||||
if value:
|
||||
attrs_d['href'] = value
|
||||
context.setdefault('links', []).append(attrs_d)
|
||||
del context['license']
|
||||
_end_creativeCommons_license = _end_creativecommons_license
|
||||
134
venv/lib/python3.12/site-packages/feedparser/namespaces/dc.py
Normal file
134
venv/lib/python3.12/site-packages/feedparser/namespaces/dc.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
# Support for the Dublin Core metadata extensions
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from ..datetimes import _parse_date
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
'http://purl.org/dc/elements/1.1/': 'dc',
|
||||
'http://purl.org/dc/terms/': 'dcterms',
|
||||
}
|
||||
|
||||
def _end_dc_author(self):
|
||||
self._end_author()
|
||||
|
||||
def _end_dc_creator(self):
|
||||
self._end_author()
|
||||
|
||||
def _end_dc_date(self):
|
||||
self._end_updated()
|
||||
|
||||
def _end_dc_description(self):
|
||||
self._end_description()
|
||||
|
||||
def _end_dc_language(self):
|
||||
self._end_language()
|
||||
|
||||
def _end_dc_publisher(self):
|
||||
self._end_webmaster()
|
||||
|
||||
def _end_dc_rights(self):
|
||||
self._end_rights()
|
||||
|
||||
def _end_dc_subject(self):
|
||||
self._end_category()
|
||||
|
||||
def _end_dc_title(self):
|
||||
self._end_title()
|
||||
|
||||
def _end_dcterms_created(self):
|
||||
self._end_created()
|
||||
|
||||
def _end_dcterms_issued(self):
|
||||
self._end_published()
|
||||
|
||||
def _end_dcterms_modified(self):
|
||||
self._end_updated()
|
||||
|
||||
def _start_dc_author(self, attrs_d):
|
||||
self._start_author(attrs_d)
|
||||
|
||||
def _start_dc_creator(self, attrs_d):
|
||||
self._start_author(attrs_d)
|
||||
|
||||
def _start_dc_date(self, attrs_d):
|
||||
self._start_updated(attrs_d)
|
||||
|
||||
def _start_dc_description(self, attrs_d):
|
||||
self._start_description(attrs_d)
|
||||
|
||||
def _start_dc_language(self, attrs_d):
|
||||
self._start_language(attrs_d)
|
||||
|
||||
def _start_dc_publisher(self, attrs_d):
|
||||
self._start_webmaster(attrs_d)
|
||||
|
||||
def _start_dc_rights(self, attrs_d):
|
||||
self._start_rights(attrs_d)
|
||||
|
||||
def _start_dc_subject(self, attrs_d):
|
||||
self._start_category(attrs_d)
|
||||
|
||||
def _start_dc_title(self, attrs_d):
|
||||
self._start_title(attrs_d)
|
||||
|
||||
def _start_dcterms_created(self, attrs_d):
|
||||
self._start_created(attrs_d)
|
||||
|
||||
def _start_dcterms_issued(self, attrs_d):
|
||||
self._start_published(attrs_d)
|
||||
|
||||
def _start_dcterms_modified(self, attrs_d):
|
||||
self._start_updated(attrs_d)
|
||||
|
||||
def _start_dcterms_valid(self, attrs_d):
|
||||
self.push('validity', 1)
|
||||
|
||||
def _end_dcterms_valid(self):
|
||||
for validity_detail in self.pop('validity').split(';'):
|
||||
if '=' in validity_detail:
|
||||
key, value = validity_detail.split('=', 1)
|
||||
if key == 'start':
|
||||
self._save('validity_start', value, overwrite=True)
|
||||
self._save('validity_start_parsed', _parse_date(value), overwrite=True)
|
||||
elif key == 'end':
|
||||
self._save('validity_end', value, overwrite=True)
|
||||
self._save('validity_end_parsed', _parse_date(value), overwrite=True)
|
||||
|
||||
def _start_dc_contributor(self, attrs_d):
|
||||
self.incontributor = 1
|
||||
context = self._get_context()
|
||||
context.setdefault('contributors', [])
|
||||
context['contributors'].append(FeedParserDict())
|
||||
self.push('name', 0)
|
||||
|
||||
def _end_dc_contributor(self):
|
||||
self._end_name()
|
||||
self.incontributor = 0
|
||||
|
|
@ -0,0 +1,278 @@
|
|||
# Support for the GeoRSS format
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Required for Python 3.6 compatibility.
|
||||
from __future__ import generator_stop
|
||||
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
|
||||
'http://www.georss.org/georss': 'georss',
|
||||
'http://www.opengis.net/gml': 'gml',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.ingeometry = 0
|
||||
super(Namespace, self).__init__()
|
||||
|
||||
def _start_georssgeom(self, attrs_d):
|
||||
self.push('geometry', 0)
|
||||
context = self._get_context()
|
||||
context['where'] = FeedParserDict()
|
||||
|
||||
_start_georss_point = _start_georssgeom
|
||||
_start_georss_line = _start_georssgeom
|
||||
_start_georss_polygon = _start_georssgeom
|
||||
_start_georss_box = _start_georssgeom
|
||||
|
||||
def _save_where(self, geometry):
|
||||
context = self._get_context()
|
||||
context['where'].update(geometry)
|
||||
|
||||
def _end_georss_point(self):
|
||||
geometry = _parse_georss_point(self.pop('geometry'))
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_line(self):
|
||||
geometry = _parse_georss_line(self.pop('geometry'))
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_polygon(self):
|
||||
this = self.pop('geometry')
|
||||
geometry = _parse_georss_polygon(this)
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_box(self):
|
||||
geometry = _parse_georss_box(self.pop('geometry'))
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _start_where(self, attrs_d):
|
||||
self.push('where', 0)
|
||||
context = self._get_context()
|
||||
context['where'] = FeedParserDict()
|
||||
_start_georss_where = _start_where
|
||||
|
||||
def _parse_srs_attrs(self, attrs_d):
|
||||
srs_name = attrs_d.get('srsname')
|
||||
try:
|
||||
srs_dimension = int(attrs_d.get('srsdimension', '2'))
|
||||
except ValueError:
|
||||
srs_dimension = 2
|
||||
context = self._get_context()
|
||||
if 'where' not in context:
|
||||
context['where'] = {}
|
||||
context['where']['srsName'] = srs_name
|
||||
context['where']['srsDimension'] = srs_dimension
|
||||
|
||||
def _start_gml_point(self, attrs_d):
|
||||
self._parse_srs_attrs(attrs_d)
|
||||
self.ingeometry = 1
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_linestring(self, attrs_d):
|
||||
self._parse_srs_attrs(attrs_d)
|
||||
self.ingeometry = 'linestring'
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_polygon(self, attrs_d):
|
||||
self._parse_srs_attrs(attrs_d)
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_exterior(self, attrs_d):
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_linearring(self, attrs_d):
|
||||
self.ingeometry = 'polygon'
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_pos(self, attrs_d):
|
||||
self.push('pos', 0)
|
||||
|
||||
def _end_gml_pos(self):
|
||||
this = self.pop('pos')
|
||||
context = self._get_context()
|
||||
srs_name = context['where'].get('srsName')
|
||||
srs_dimension = context['where'].get('srsDimension', 2)
|
||||
swap = True
|
||||
if srs_name and "EPSG" in srs_name:
|
||||
epsg = int(srs_name.split(":")[-1])
|
||||
swap = bool(epsg in _geogCS)
|
||||
geometry = _parse_georss_point(this, swap=swap, dims=srs_dimension)
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _start_gml_poslist(self, attrs_d):
|
||||
self.push('pos', 0)
|
||||
|
||||
def _end_gml_poslist(self):
|
||||
this = self.pop('pos')
|
||||
context = self._get_context()
|
||||
srs_name = context['where'].get('srsName')
|
||||
srs_dimension = context['where'].get('srsDimension', 2)
|
||||
swap = True
|
||||
if srs_name and "EPSG" in srs_name:
|
||||
epsg = int(srs_name.split(":")[-1])
|
||||
swap = bool(epsg in _geogCS)
|
||||
geometry = _parse_poslist(
|
||||
this, self.ingeometry, swap=swap, dims=srs_dimension)
|
||||
if geometry:
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_geom(self):
|
||||
self.ingeometry = 0
|
||||
self.pop('geometry')
|
||||
_end_gml_point = _end_geom
|
||||
_end_gml_linestring = _end_geom
|
||||
_end_gml_linearring = _end_geom
|
||||
_end_gml_exterior = _end_geom
|
||||
_end_gml_polygon = _end_geom
|
||||
|
||||
def _end_where(self):
|
||||
self.pop('where')
|
||||
_end_georss_where = _end_where
|
||||
|
||||
|
||||
# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
|
||||
# items, or None in the case of a parsing error.
|
||||
|
||||
def _parse_poslist(value, geom_type, swap=True, dims=2):
|
||||
if geom_type == 'linestring':
|
||||
return _parse_georss_line(value, swap, dims)
|
||||
elif geom_type == 'polygon':
|
||||
ring = _parse_georss_line(value, swap, dims)
|
||||
return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _gen_georss_coords(value, swap=True, dims=2):
|
||||
# A generator of (lon, lat) pairs from a string of encoded GeoRSS
|
||||
# coordinates. Converts to floats and swaps order.
|
||||
latlons = (float(ll) for ll in value.replace(',', ' ').split())
|
||||
while True:
|
||||
try:
|
||||
t = [next(latlons), next(latlons)][::swap and -1 or 1]
|
||||
if dims == 3:
|
||||
t.append(next(latlons))
|
||||
yield tuple(t)
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
|
||||
def _parse_georss_point(value, swap=True, dims=2):
|
||||
# A point contains a single latitude-longitude pair, separated by
|
||||
# whitespace. We'll also handle comma separators.
|
||||
try:
|
||||
coords = list(_gen_georss_coords(value, swap, dims))
|
||||
return {'type': 'Point', 'coordinates': coords[0]}
|
||||
except (IndexError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_georss_line(value, swap=True, dims=2):
|
||||
# A line contains a space separated list of latitude-longitude pairs in
|
||||
# WGS84 coordinate reference system, with each pair separated by
|
||||
# whitespace. There must be at least two pairs.
|
||||
try:
|
||||
coords = list(_gen_georss_coords(value, swap, dims))
|
||||
return {'type': 'LineString', 'coordinates': coords}
|
||||
except (IndexError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_georss_polygon(value, swap=True, dims=2):
|
||||
# A polygon contains a space separated list of latitude-longitude pairs,
|
||||
# with each pair separated by whitespace. There must be at least four
|
||||
# pairs, with the last being identical to the first (so a polygon has a
|
||||
# minimum of three actual points).
|
||||
try:
|
||||
ring = list(_gen_georss_coords(value, swap, dims))
|
||||
except (IndexError, ValueError):
|
||||
return None
|
||||
if len(ring) < 4:
|
||||
return None
|
||||
return {'type': 'Polygon', 'coordinates': (ring,)}
|
||||
|
||||
|
||||
def _parse_georss_box(value, swap=True, dims=2):
|
||||
# A bounding box is a rectangular region, often used to define the extents
|
||||
# of a map or a rough area of interest. A box contains two space separate
|
||||
# latitude-longitude pairs, with each pair separated by whitespace. The
|
||||
# first pair is the lower corner, the second is the upper corner.
|
||||
try:
|
||||
coords = list(_gen_georss_coords(value, swap, dims))
|
||||
return {'type': 'Box', 'coordinates': tuple(coords)}
|
||||
except (IndexError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
# The list of EPSG codes for geographic (latitude/longitude) coordinate
|
||||
# systems to support decoding of GeoRSS GML profiles.
|
||||
_geogCS = [
|
||||
3819, 3821, 3824, 3889, 3906, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008,
|
||||
4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4018, 4019, 4020, 4021, 4022,
|
||||
4023, 4024, 4025, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036,
|
||||
4041, 4042, 4043, 4044, 4045, 4046, 4047, 4052, 4053, 4054, 4055, 4075, 4081,
|
||||
4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132,
|
||||
4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145,
|
||||
4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158,
|
||||
4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171,
|
||||
4172, 4173, 4174, 4175, 4176, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185,
|
||||
4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200,
|
||||
4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213,
|
||||
4214, 4215, 4216, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227,
|
||||
4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240,
|
||||
4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253,
|
||||
4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266,
|
||||
4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279,
|
||||
4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4291, 4292, 4293,
|
||||
4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4306, 4307,
|
||||
4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4322,
|
||||
4324, 4326, 4463, 4470, 4475, 4483, 4490, 4555, 4558, 4600, 4601, 4602, 4603,
|
||||
4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616,
|
||||
4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629,
|
||||
4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642,
|
||||
4643, 4644, 4645, 4646, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665,
|
||||
4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678,
|
||||
4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691,
|
||||
4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704,
|
||||
4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717,
|
||||
4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730,
|
||||
4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743,
|
||||
4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756,
|
||||
4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4801, 4802, 4803, 4804,
|
||||
4805, 4806, 4807, 4808, 4809, 4810, 4811, 4813, 4814, 4815, 4816, 4817, 4818,
|
||||
4819, 4820, 4821, 4823, 4824, 4901, 4902, 4903, 4904, 4979,
|
||||
]
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
# Support for the iTunes format
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
# Canonical namespace
|
||||
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
|
||||
|
||||
# Extra namespace
|
||||
'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes',
|
||||
}
|
||||
|
||||
def _start_itunes_author(self, attrs_d):
|
||||
self._start_author(attrs_d)
|
||||
|
||||
def _end_itunes_author(self):
|
||||
self._end_author()
|
||||
|
||||
def _end_itunes_category(self):
|
||||
self._end_category()
|
||||
|
||||
def _start_itunes_name(self, attrs_d):
|
||||
self._start_name(attrs_d)
|
||||
|
||||
def _end_itunes_name(self):
|
||||
self._end_name()
|
||||
|
||||
def _start_itunes_email(self, attrs_d):
|
||||
self._start_email(attrs_d)
|
||||
|
||||
def _end_itunes_email(self):
|
||||
self._end_email()
|
||||
|
||||
def _start_itunes_subtitle(self, attrs_d):
|
||||
self._start_subtitle(attrs_d)
|
||||
|
||||
def _end_itunes_subtitle(self):
|
||||
self._end_subtitle()
|
||||
|
||||
def _start_itunes_summary(self, attrs_d):
|
||||
self._start_summary(attrs_d)
|
||||
|
||||
def _end_itunes_summary(self):
|
||||
self._end_summary()
|
||||
|
||||
def _start_itunes_owner(self, attrs_d):
|
||||
self.inpublisher = 1
|
||||
self.push('publisher', 0)
|
||||
|
||||
def _end_itunes_owner(self):
|
||||
self.pop('publisher')
|
||||
self.inpublisher = 0
|
||||
self._sync_author_detail('publisher')
|
||||
|
||||
def _end_itunes_keywords(self):
|
||||
for term in self.pop('itunes_keywords').split(','):
|
||||
if term.strip():
|
||||
self._add_tag(term.strip(), 'http://www.itunes.com/', None)
|
||||
|
||||
def _start_itunes_category(self, attrs_d):
|
||||
self._add_tag(attrs_d.get('text'), 'http://www.itunes.com/', None)
|
||||
self.push('category', 1)
|
||||
|
||||
def _start_itunes_image(self, attrs_d):
|
||||
self.push('itunes_image', 0)
|
||||
if attrs_d.get('href'):
|
||||
self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('href')})
|
||||
elif attrs_d.get('url'):
|
||||
self._get_context()['image'] = FeedParserDict({'href': attrs_d.get('url')})
|
||||
_start_itunes_link = _start_itunes_image
|
||||
|
||||
def _end_itunes_block(self):
|
||||
value = self.pop('itunes_block', 0)
|
||||
self._get_context()['itunes_block'] = (value == 'yes' or value == 'Yes') and 1 or 0
|
||||
|
||||
def _end_itunes_explicit(self):
|
||||
value = self.pop('itunes_explicit', 0)
|
||||
# Convert 'yes' -> True, 'clean' to False, and any other value to None
|
||||
# False and None both evaluate as False, so the difference can be ignored
|
||||
# by applications that only need to know if the content is explicit.
|
||||
self._get_context()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0]
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
# Support for the Media RSS format
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from ..util import FeedParserDict
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
# Canonical namespace
|
||||
'http://search.yahoo.com/mrss/': 'media',
|
||||
|
||||
# Old namespace (no trailing slash)
|
||||
'http://search.yahoo.com/mrss': 'media',
|
||||
}
|
||||
|
||||
def _start_media_category(self, attrs_d):
|
||||
attrs_d.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
|
||||
self._start_category(attrs_d)
|
||||
|
||||
def _end_media_category(self):
|
||||
self._end_category()
|
||||
|
||||
def _end_media_keywords(self):
|
||||
for term in self.pop('media_keywords').split(','):
|
||||
if term.strip():
|
||||
self._add_tag(term.strip(), None, None)
|
||||
|
||||
def _start_media_title(self, attrs_d):
|
||||
self._start_title(attrs_d)
|
||||
|
||||
def _end_media_title(self):
|
||||
title_depth = self.title_depth
|
||||
self._end_title()
|
||||
self.title_depth = title_depth
|
||||
|
||||
def _start_media_group(self, attrs_d):
|
||||
# don't do anything, but don't break the enclosed tags either
|
||||
pass
|
||||
|
||||
def _start_media_rating(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_rating', attrs_d)
|
||||
self.push('rating', 1)
|
||||
|
||||
def _end_media_rating(self):
|
||||
rating = self.pop('rating')
|
||||
if rating is not None and rating.strip():
|
||||
context = self._get_context()
|
||||
context['media_rating']['content'] = rating
|
||||
|
||||
def _start_media_credit(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_credit', [])
|
||||
context['media_credit'].append(attrs_d)
|
||||
self.push('credit', 1)
|
||||
|
||||
def _end_media_credit(self):
|
||||
credit = self.pop('credit')
|
||||
if credit is not None and credit.strip():
|
||||
context = self._get_context()
|
||||
context['media_credit'][-1]['content'] = credit
|
||||
|
||||
def _start_media_description(self, attrs_d):
|
||||
self._start_description(attrs_d)
|
||||
|
||||
def _end_media_description(self):
|
||||
self._end_description()
|
||||
|
||||
def _start_media_restriction(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_restriction', attrs_d)
|
||||
self.push('restriction', 1)
|
||||
|
||||
def _end_media_restriction(self):
|
||||
restriction = self.pop('restriction')
|
||||
if restriction is not None and restriction.strip():
|
||||
context = self._get_context()
|
||||
context['media_restriction']['content'] = [cc.strip().lower() for cc in restriction.split(' ')]
|
||||
|
||||
def _start_media_license(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_license', attrs_d)
|
||||
self.push('license', 1)
|
||||
|
||||
def _end_media_license(self):
|
||||
license_ = self.pop('license')
|
||||
if license_ is not None and license_.strip():
|
||||
context = self._get_context()
|
||||
context['media_license']['content'] = license_
|
||||
|
||||
def _start_media_content(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_content', [])
|
||||
context['media_content'].append(attrs_d)
|
||||
|
||||
def _start_media_thumbnail(self, attrs_d):
|
||||
context = self._get_context()
|
||||
context.setdefault('media_thumbnail', [])
|
||||
self.push('url', 1) # new
|
||||
context['media_thumbnail'].append(attrs_d)
|
||||
|
||||
def _end_media_thumbnail(self):
|
||||
url = self.pop('url')
|
||||
context = self._get_context()
|
||||
if url is not None and url.strip():
|
||||
if 'url' not in context['media_thumbnail'][-1]:
|
||||
context['media_thumbnail'][-1]['url'] = url
|
||||
|
||||
def _start_media_player(self, attrs_d):
|
||||
self.push('media_player', 0)
|
||||
self._get_context()['media_player'] = FeedParserDict(attrs_d)
|
||||
|
||||
def _end_media_player(self):
|
||||
value = self.pop('media_player')
|
||||
context = self._get_context()
|
||||
context['media_player']['content'] = value
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
# Support for the Podlove Simple Chapters format
|
||||
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
||||
# Copyright 2002-2008 Mark Pilgrim
|
||||
# All rights reserved.
|
||||
#
|
||||
# This file is a part of feedparser.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .. import util
|
||||
|
||||
|
||||
class Namespace(object):
|
||||
supported_namespaces = {
|
||||
'http://podlove.org/simple-chapters': 'psc',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
# chapters will only be captured while psc_chapters_flag is True.
|
||||
self.psc_chapters_flag = False
|
||||
super(Namespace, self).__init__()
|
||||
|
||||
def _start_psc_chapters(self, attrs_d):
|
||||
context = self._get_context()
|
||||
if 'psc_chapters' not in context:
|
||||
self.psc_chapters_flag = True
|
||||
attrs_d['chapters'] = []
|
||||
context['psc_chapters'] = util.FeedParserDict(attrs_d)
|
||||
|
||||
def _end_psc_chapters(self):
|
||||
self.psc_chapters_flag = False
|
||||
|
||||
def _start_psc_chapter(self, attrs_d):
|
||||
if self.psc_chapters_flag:
|
||||
start = self._get_attribute(attrs_d, 'start')
|
||||
attrs_d['start_parsed'] = _parse_psc_chapter_start(start)
|
||||
|
||||
context = self._get_context()['psc_chapters']
|
||||
context['chapters'].append(util.FeedParserDict(attrs_d))
|
||||
|
||||
|
||||
format_ = re.compile(r'^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$')
|
||||
|
||||
|
||||
def _parse_psc_chapter_start(start):
|
||||
m = format_.match(start)
|
||||
if m is None:
|
||||
return None
|
||||
|
||||
_, h, m, s, _, ms = m.groups()
|
||||
h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
|
||||
return datetime.timedelta(0, h*60*60 + m*60 + s, ms*1000)
|
||||
Loading…
Add table
Add a link
Reference in a new issue