From: Antonio Ospite Date: Wed, 6 Jun 2018 14:08:18 +0000 (+0200) Subject: Merge tag 'v1.3.0' into debian/master X-Git-Tag: debian/1.3.0-1~2 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/d6d4c8b5e434a06555e3117550ab27b5bdba2e2e?hp=ba79b94e1ff55db1f0423941c5fdb83f3ae9ad32 Merge tag 'v1.3.0' into debian/master Release v1.3.0 --- diff --git a/INSTALL b/INSTALL index 6c19099..7c9ccc1 100644 --- a/INSTALL +++ b/INSTALL @@ -2,11 +2,17 @@ The recommended way to install tweeper globally is to install all its files under /usr/share/php/tweeper and then make a symlink to the wrapper script "tweeper" under /usr/bin +NOTES FOR PACKAGERS + +Even though the php json extensions are used, namely json_decode(), a direct +dependency on php-json is not usually strictly necessary, because (at least on +Debian) php-cli already depends on it. + Tweeper depends on php-symfony-serializer which is used to convert json to xml -for some sites which provide the timeline data in json rather than in usable -html. +for some sites which provide the timeline data in json rather than in directly +transformable html. -NOTE: Tweeper also depends indirectly on php-symfony-property-access because -the code relies on the ObjectNormalizer class which requires the -PropertyAccess component, see +Tweeper also depends (indirectly) on php-symfony-property-access because the +code relies on the ObjectNormalizer class which requires the PropertyAccess +component, see http://symfony.com/doc/current/components/serializer.html#installation diff --git a/NEWS b/NEWS index cd5a3bc..33d3163 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +News for v1.3.0: +================ + + * Fix scraping instagram.com + * Fix scraping twitter.com + * Improve scraping twitter.com hashtag pages, like for example + https://twitter.com/hashtag/tweeper + * Fix getting the channel logo URL for identi.ca/pump.io + * Add support for scraping Instagram hashtag pages, like for example + https://www.instagram.com/explore/tags/marechiaro + * Make the RSS feed for twitter.com hashtag pages validate with + feedvalidator.org + News for v1.2.0: ================ diff --git a/TODO b/TODO index 51b294b..7b72745 100644 --- a/TODO +++ b/TODO @@ -12,5 +12,3 @@ - The dependencies on the symphony components in composer.json could be more relaxed like ">=2.7.0", but for now sticking to "2.7.*" is good enough. - -- Add support for instagram tags diff --git a/autoload.php b/autoload.php index 4ba7832..d3ebc5a 100644 --- a/autoload.php +++ b/autoload.php @@ -3,7 +3,7 @@ * @file * Tweeper - some logic to allow tweeper to run with or without composer. * - * Copyright (C) 2016 Antonio Ospite + * Copyright (C) 2016-2018 Antonio Ospite * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/Tweeper.php b/src/Tweeper.php index 566decb..50ff148 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -6,7 +6,7 @@ namespace Tweeper; * @file * Tweeper - a Twitter to RSS web scraper. * - * Copyright (C) 2013-2016 Antonio Ospite + * Copyright (C) 2013-2018 Antonio Ospite * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,7 +36,7 @@ date_default_timezone_set('UTC'); */ class Tweeper { - private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0"; + private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0"; /** * Constructor sets up {@link $generate_enclosure}. diff --git a/src/rss_converter_dilbert.com.xsl b/src/rss_converter_dilbert.com.xsl index d340183..dcc56af 100644 --- a/src/rss_converter_dilbert.com.xsl +++ b/src/rss_converter_dilbert.com.xsl @@ -1,7 +1,7 @@ - + - + + + - - - + + + + @@ -60,14 +63,14 @@ - - - - + + + + <xsl:variable name="title-length" select="140"/> - <xsl:variable name="item-content-title" select="normalize-space(concat($user-name, ': ', $item-content-caption))"/> + <xsl:variable name="item-content-title" select="normalize-space(concat($screen-name, ': ', $item-content-caption))"/> <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 --> <xsl:choose> <xsl:when test="string-length($item-content-title) > $title-length"> @@ -87,7 +90,7 @@ <xsl:value-of select="$item-permalink"/> </guid> <pubDate> - <xsl:variable name="timestamp" select="./date"/> + <xsl:variable name="timestamp" select="./taken_at_timestamp"/> <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', $timestamp)"/> </pubDate> <description> @@ -113,15 +116,18 @@ <xsl:variable name="channel-link"> <xsl:choose> <xsl:when test="$location-name != ''"> - <xsl:variable name="location-id" select="//LocationsPage/location/id"/> + <xsl:variable name="location-id" select="//LocationsPage/graphql/location/id"/> <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/> </xsl:when> + <xsl:when test="$hashtag-name != ''"> + <xsl:value-of select="concat($BaseURL, '/explore/tags/', $hashtag-name)"/> + </xsl:when> <xsl:otherwise> <xsl:value-of select="concat($BaseURL, '/', $user-name)"/> </xsl:otherwise> </xsl:choose> </xsl:variable> - <xsl:variable name="channel-image" select="//ProfilePage/user/profile_pic_url"/> + <xsl:variable name="channel-image" select="//profile_pic_url"/> <rss version="2.0"> <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute> @@ -135,11 +141,23 @@ </link> <description> <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text> - <xsl:value-of select="normalize-space(concat($screen-name, '. ', //user/biography))"/> - <xsl:variable name="external-url" select="//user/external_url"/> - <xsl:if test="$external-url != ''"> - <xsl:text> </xsl:text><a href="{$external-url}"><xsl:value-of select="$external-url"/></a> - </xsl:if> + <xsl:choose> + <xsl:when test="$location-name != ''"> + <xsl:variable name="location-latitude" select="//LocationsPage/graphql/location/lat"/> + <xsl:variable name="location-longitude" select="//LocationsPage/graphql/location/lng"/> + <xsl:value-of select="concat($location-name, ' (', $location-latitude, ', ', $location-longitude, ')')"/> + </xsl:when> + <xsl:when test="$hashtag-name != ''"> + <xsl:value-of select="concat('#', $hashtag-name)"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="normalize-space(concat($screen-name, '. ', //user/biography))"/> + <xsl:variable name="external-url" select="//user/external_url"/> + <xsl:if test="$external-url != ''"> + <xsl:text> </xsl:text><a href="{$external-url}"><xsl:value-of select="$external-url"/></a> + </xsl:if> + </xsl:otherwise> + </xsl:choose> <xsl:text disable-output-escaping="yes">]]></xsl:text> </description> <xsl:if test="$channel-image != ''"> @@ -155,7 +173,7 @@ </url> </image> </xsl:if> - <xsl:apply-templates select="//ProfilePage/user/media/nodes|//LocationsPage/location/media/nodes"/> + <xsl:apply-templates select="//ProfilePage/graphql/user/edge_owner_to_timeline_media/edges/node|//LocationsPage/graphql/location/edge_location_to_media/edges/node|//TagPage/graphql/hashtag/edge_hashtag_to_media/edges/node"/> </channel> </rss> </xsl:template> diff --git a/src/rss_converter_pump.io.xsl b/src/rss_converter_pump.io.xsl index bf9f674..66e73cd 100644 --- a/src/rss_converter_pump.io.xsl +++ b/src/rss_converter_pump.io.xsl @@ -1,7 +1,7 @@ <!-- Stylesheet to convert Pump.io activity streams to RSS. - Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it> + Copyright (C) 2013-2018 Antonio Ospite <ao2@ao2.it> This file is part of tweeper. @@ -89,7 +89,7 @@ <xsl:value-of select="$channel-link"/> </link> <url> - <xsl:value-of select="//div[@id='profile-block']/span/img[@class='img-rounded media-object']/@src"/> + <xsl:value-of select="//div[@id='profile-block']/span/img[contains(@class, 'img-rounded media-object')]/@src"/> </url> </image> <xsl:apply-templates select="//div[@id='user-content-activities']//ul[@id='major-stream']/li"/> diff --git a/src/rss_converter_twitter.com.xsl b/src/rss_converter_twitter.com.xsl index 44a0416..d1514c5 100644 --- a/src/rss_converter_twitter.com.xsl +++ b/src/rss_converter_twitter.com.xsl @@ -1,7 +1,7 @@ <!-- Stylesheet to convert Twitter user timelines to RSS. - Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it> + Copyright (C) 2013-2018 Antonio Ospite <ao2@ao2.it> This file is part of tweeper. @@ -149,8 +149,10 @@ </pubDate> <description> <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text> + <xsl:value-of select="concat($user-name, ':')"/> + <xsl:element name="br"/> <xsl:if test="$item-has-video"> - <xsl:text>(Video)</xsl:text> + <xsl:text> (Video)</xsl:text> <xsl:element name="br"/> </xsl:if> <xsl:element name="span"> @@ -179,6 +181,7 @@ </xsl:choose> </xsl:variable> <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/> + <xsl:variable name="channel-image" select="//a[contains(@class, 'profile-picture')]/@href"/> <rss version="2.0"> <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute> @@ -192,18 +195,22 @@ </link> <description> <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/> + <!-- The following rule should only match on hashtag URLs --> + <xsl:value-of select="normalize-space(//div[@class='SearchNavigation-textContainer'])"/> </description> - <image> - <title> - <xsl:value-of select="$channel-title"/> - - - - - - - - + + + + <xsl:value-of select="$channel-title"/> + + + + + + + + + diff --git a/tweeper.1.asciidoc b/tweeper.1.asciidoc index 019df14..82b3a43 100644 --- a/tweeper.1.asciidoc +++ b/tweeper.1.asciidoc @@ -107,7 +107,7 @@ Main web site: COPYING ------- -Copyright \(C) 2013-2016 Antonio Ospite +Copyright \(C) 2013-2018 Antonio Ospite This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/tweeper.php b/tweeper.php index ff98ab7..b1dd021 100644 --- a/tweeper.php +++ b/tweeper.php @@ -3,7 +3,7 @@ * @file * Tweeper - a Twitter to RSS web scraper. * - * Copyright (C) 2013-2016 Antonio Ospite + * Copyright (C) 2013-2018 Antonio Ospite * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by