Today’s Calibre Recipe is: ‘la Gazzetta dello sport ‘

la Gazzetta is the most popular daily italian newspaper dedicated to sports. It is highly recognizable as it is printed on pink paper and features in most bars and cafes in Italy. It is published by the Rcs MediaGroup

Click here to view the code

#!/usr/bin/env  python
__license__     = 'GPL v3'
__author__      = 'Lorenzo Vigentini'
__copyright__   = '2009, Lorenzo Vigentini '
__version__     = 'v1.02'
__date__        = '10, January 2010'
__description__ = 'Sport news from the most read sport newspaper in Italy'

'''www.gazzetta.it'''

from calibre.web.feeds.news import BasicNewsRecipe

class laGazzetta(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
    description   = 'Sport news from the most read sport newspaper in Italy'

    cover_url      = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
    title          = 'La Gazzetta dello Sport '
    publisher      = 'RCS Digital'
    category       = 'Sport News'         

    language       = 'it'
    encoding       = 'cp1252'
    timefmt        = '[%a, %d %b, %Y]'

    oldest_article = 2
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 10   

    remove_javascript = True
    no_stylesheets = True

    keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]

    remove_tags = [
                dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
                dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
                dict(name='iframe',attrs={'id':'mirago-feed'}),
                dict(name='a',attrs={'id':'commenta-up'}),
                dict(name='cite',attrs={'class':['signature','parag-title']}),
                dict(name='a',attrs={'class':['last-comment','button-bold2']}),
                dict(name=['base','object','link','a','script','noscript'])
            ]

    extra_css      = '''
                        h1 {font: sans-serif large;}
                        h2 {font: sans-serif medium;}
                        h3 {font: sans-serif small;}
                        h4 {font: sans-serif bold small;}
                        p  {font:10pt helvetica}
                        dd {font:8pt helvetica}
                      '''

    feeds       = [
                   (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
                   (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
                   (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
                   (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
                   (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
                   (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
                   (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
                   (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
                   (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
                   (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
                   (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
                 ]

    def print_version(self,url):
        segments = url.split('/')
        basename = '/'.join(segments[:3])+'/'
        subPath= '/'.join(segments[3:7])+'/'
        articleURL=(segments[len(segments)-1])[:-6]
        myArticleSegs=articleURL.split('.')
        myArticle=myArticleSegs[0]
        printVerString=myArticle+ '_print.html'
        myURL = basename + subPath + printVerString
        print 'this is the url: ' + myURL
        return basename + subPath + printVerString

or Download the file here: Calibre recipe – LaGazzetta



Be Sociable, Share!
Calibre Recipe: la Gazzetta dello Sport
Tagged on: