Merge tag 'v1.4.0' into debian/master
authorAntonio Ospite <ao2@ao2.it>
Fri, 16 Nov 2018 22:21:33 +0000 (23:21 +0100)
committerAntonio Ospite <ao2@ao2.it>
Fri, 16 Nov 2018 22:21:33 +0000 (23:21 +0100)
Release v1.4.0

NEWS
TODO
autoload.php
src/Tweeper.php
src/rss_converter_dilbert.com.xsl
src/rss_converter_facebook.com.xsl
src/rss_converter_instagram.com.xsl
src/rss_converter_pump.io.xsl
src/rss_converter_twitter.com.xsl
tweeper.1.asciidoc
tweeper.php

diff --git a/NEWS b/NEWS
index 33d3163..29ec569 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,17 @@
+News for v1.4.0:
+================
+
+  * Make the images adapt to the screen width in feed readers which render the
+    HTML data in the description.
+  * Indicate if there is a GIF image in a tweet.
+  * Add option to enable or disable showing usernames in RSS items.
+  * Retry multiple times to retrieve a resource before giving up.
+  * Fix coding style.
+  * Add option to enable or disable showing multimedia content in RSS items.
+  * Fix generating enclosures for Dilbert.com
+  * Make enclosure elements validate with feedvalidator.org when the server
+    does not provide a Content-Length header.
+
 News for v1.3.0:
 ================
 
diff --git a/TODO b/TODO
index 7b72745..3c71811 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,7 +1,3 @@
-- re-evaluate the use of trigger_error() or use a custom error handler,
-  because right now the code exists as soon as trigger_error() gets called and
-  any following code is ignored.
-
 - write better XSL stylesheets? I am not an XSL expert
 - evaluate the use of the <ttl/> RSS element
 - show cards directly in RSS items for twitter.com
index d3ebc5a..d366bbb 100644 (file)
@@ -1,4 +1,5 @@
 <?php
+
 /**
  * @file
  * Tweeper - some logic to allow tweeper to run with or without composer.
index 50ff148..09bd7cc 100644 (file)
@@ -36,13 +36,28 @@ date_default_timezone_set('UTC');
  */
 class Tweeper {
 
-  private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0";
+  private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0";
+  private static $maxConnectionTimeout = 5;
+  private static $maxConnectionRetries = 5;
 
   /**
-   * Constructor sets up {@link $generate_enclosure}.
+   * Create a new Tweeper object controlling optional settings.
+   *
+   * @param bool $generate_enclosure
+   *   Enables the creation of <enclosure/> elements (disabled by default).
+   * @param bool $show_usernames
+   *   Enables showing the username in front of the content for multi-user
+   *   sites (enabled by default). Only some stylesheets supports this
+   *   functionality (twitter, instagram, pump.io).
+   * @param bool $show_multimedia
+   *   Enables showing multimedia content (images, videos) directly in the
+   *   item description (enabled by default). Only some stylesheets supports
+   *   this functionality (twitter, instagram, dilbert).
    */
-  public function __construct($generate_enclosure = FALSE) {
+  public function __construct($generate_enclosure = FALSE, $show_usernames = TRUE, $show_multimedia = TRUE) {
     $this->generate_enclosure = $generate_enclosure;
+    $this->show_usernames = $show_usernames;
+    $this->show_multimedia = $show_multimedia;
   }
 
   /**
@@ -78,12 +93,32 @@ class Tweeper {
   }
 
   /**
+   * Perform a cURL session multiple times when it fails with a timeout.
+   *
+   * @param resource $ch
+   *   a cURL session handle.
+   */
+  private static function curlExec($ch) {
+    $ret = FALSE;
+    $attempt = 0;
+    do {
+      $ret = curl_exec($ch);
+      if (FALSE === $ret) {
+        trigger_error(curl_error($ch), E_USER_WARNING);
+      }
+    } while (curl_errno($ch) == CURLE_OPERATION_TIMEDOUT && ++$attempt < Tweeper::$maxConnectionRetries);
+
+    return $ret;
+  }
+
+  /**
    * Get the contents from a URL.
    */
   private static function getUrlContents($url) {
     $ch = curl_init($url);
     curl_setopt_array($ch, array(
       CURLOPT_HEADER => FALSE,
+      CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout,
       // Follow http redirects to get the real URL.
       CURLOPT_FOLLOWLOCATION => TRUE,
       CURLOPT_RETURNTRANSFER => TRUE,
@@ -92,10 +127,7 @@ class Tweeper {
       CURLOPT_HTTPHEADER => array('Accept-language: en'),
       CURLOPT_USERAGENT => Tweeper::$userAgent,
     ));
-    $contents = curl_exec($ch);
-    if (FALSE === $contents) {
-      trigger_error(curl_error($ch));
-    }
+    $contents = Tweeper::curlExec($ch);
     curl_close($ch);
 
     return $contents;
@@ -109,6 +141,7 @@ class Tweeper {
     curl_setopt_array($ch, array(
       CURLOPT_HEADER => TRUE,
       CURLOPT_NOBODY => TRUE,
+      CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout,
       // Follow http redirects to get the real URL.
       CURLOPT_FOLLOWLOCATION => TRUE,
       CURLOPT_RETURNTRANSFER => TRUE,
@@ -116,10 +149,16 @@ class Tweeper {
       CURLOPT_SSL_VERIFYPEER => FALSE,
       CURLOPT_USERAGENT => Tweeper::$userAgent,
     ));
-    curl_exec($ch);
+
+    $ret = Tweeper::curlExec($ch);
+    if (FALSE === $ret) {
+      curl_close($ch);
+      return FALSE;
+    }
+
     $url_info = curl_getinfo($ch);
     if (FALSE === $url_info) {
-      trigger_error(curl_error($ch));
+      trigger_error(curl_error($ch), E_USER_WARNING);
     }
     curl_close($ch);
 
@@ -152,10 +191,14 @@ class Tweeper {
     );
 
     $url_info = Tweeper::getUrlInfo($url);
+    if (FALSE === $url_info) {
+      trigger_error("Failed to retrieve info for URL: " . $url, E_USER_WARNING);
+      return '';
+    }
 
     $supported = in_array($url_info['content_type'], $supported_content_types);
     if (!$supported) {
-      error_log("Unsupported enclosure content type \"" . $url_info['content_type'] . "\" for URL: " . $url_info['url']);
+      trigger_error("Unsupported enclosure content type \"" . $url_info['content_type'] . "\" for URL: " . $url_info['url'], E_USER_WARNING);
       return '';
     }
 
@@ -163,10 +206,17 @@ class Tweeper {
     // See http://sourceforge.net/p/feedvalidator/bugs/72/
     $http_url = preg_replace("/^https/", "http", $url_info['url']);
 
+    // When the server does not provide a Content-Length header,
+    // curl_getinfo() would return a negative value for
+    // "download_content_length", however RSS recommends to use 0 when the
+    // enclosure's size cannot be determined.
+    // See: https://www.feedvalidator.org/docs/error/UseZeroForUnknown.html
+    $length = max($url_info['download_content_length'], 0);
+
     $dom = new DOMDocument();
     $enc = $dom->createElement('enclosure');
     $enc->setAttribute('url', $http_url);
-    $enc->setAttribute('length', $url_info['download_content_length']);
+    $enc->setAttribute('length', $length);
     $enc->setAttribute('type', $url_info['content_type']);
 
     return $enc;
@@ -203,7 +253,7 @@ class Tweeper {
 
     $output .= " line $error->line";
 
-    error_log($output);
+    trigger_error($output, E_USER_WARNING);
   }
 
   /**
@@ -230,7 +280,7 @@ class Tweeper {
 
     $xml_data = $serializer->serialize($data, 'xml', $serializer_options);
     if (!$xml_data) {
-      trigger_error("Cannot serialize data", E_USER_ERROR);
+      trigger_error("Cannot serialize data", E_USER_WARNING);
       return NULL;
     }
 
@@ -245,14 +295,20 @@ class Tweeper {
     $json_match_expr = '/window._sharedData = (.*);/';
     $ret = preg_match($json_match_expr, $html, $matches);
     if ($ret !== 1) {
-      trigger_error("Cannot match expression: $json_match_expr\n", E_USER_ERROR);
+      trigger_error("Cannot match expression: $json_match_expr\n", E_USER_WARNING);
       return NULL;
     }
 
+    $data = json_decode($matches[1], $assoc = TRUE);
+
     // The "qe" object contains elements which will result in invalid XML
     // element names, so remove it.
-    $data = json_decode($matches[1], $assoc = TRUE);
     unset($data["qe"]);
+
+    // The "knobs" object contains elements with undefined namespaces, so
+    // remove it to silence an error message.
+    unset($data["knobs"]);
+
     $json = json_encode($data);
 
     return Tweeper::jsonToXml($json, 'instagram');
@@ -301,11 +357,15 @@ class Tweeper {
   private function loadStylesheet($host) {
     $stylesheet = "file://" . __DIR__ . "/rss_converter_" . $host . ".xsl";
     if (FALSE === file_exists($stylesheet)) {
-      trigger_error("Conversion to RSS not supported for $host ($stylesheet not found)", E_USER_ERROR);
+      trigger_error("Conversion to RSS not supported for $host ($stylesheet not found)", E_USER_WARNING);
       return NULL;
     }
 
     $stylesheet_contents = Tweeper::getUrlContents($stylesheet);
+    if (FALSE === $stylesheet_contents) {
+      trigger_error("Cannot open $stylesheet", E_USER_WARNING);
+      return NULL;
+    }
 
     $xslDoc = new DOMDocument();
     $xslDoc->loadXML($stylesheet_contents);
@@ -313,6 +373,8 @@ class Tweeper {
     $xsltProcessor = new XSLTProcessor();
     $xsltProcessor->registerPHPFunctions();
     $xsltProcessor->setParameter('', 'generate-enclosure', $this->generate_enclosure);
+    $xsltProcessor->setParameter('', 'show-usernames', $this->show_usernames);
+    $xsltProcessor->setParameter('', 'show-multimedia', $this->show_multimedia);
     $xsltProcessor->importStylesheet($xslDoc);
 
     return $xsltProcessor;
@@ -321,25 +383,25 @@ class Tweeper {
   /**
    * Convert the site content to RSS.
    */
-  public function tweep($src_url, $host=NULL, $validate_scheme=TRUE) {
+  public function tweep($src_url, $host = NULL, $validate_scheme = TRUE) {
     $url = parse_url($src_url);
     if (FALSE === $url) {
-      trigger_error("Invalid URL: $src_url", E_USER_ERROR);
+      trigger_error("Invalid URL: $src_url", E_USER_WARNING);
       return NULL;
     }
 
     if (TRUE === $validate_scheme) {
       $scheme = $url["scheme"];
       if (!in_array($scheme, array("http", "https"))) {
-        trigger_error("unsupported scheme: $scheme", E_USER_ERROR);
+        trigger_error("unsupported scheme: $scheme", E_USER_WARNING);
         return NULL;
       }
     }
 
-    // if the host is not given derive it from the URL
+    // If the host is not given derive it from the URL.
     if (NULL === $host) {
       if (empty($url["host"])) {
-        trigger_error("Invalid host in URL: $src_url", E_USER_ERROR);
+        trigger_error("Invalid host in URL: $src_url", E_USER_WARNING);
         return NULL;
       }
       // Strip the leading www. to be more forgiving on input URLs.
@@ -353,6 +415,7 @@ class Tweeper {
 
     $html = Tweeper::getUrlContents($src_url);
     if (FALSE === $html) {
+      trigger_error("Failed to retrieve $src_url", E_USER_WARNING);
       return NULL;
     }
 
@@ -367,11 +430,11 @@ class Tweeper {
     }
 
     $output = $xsltProcessor->transformToXML($xmlDoc);
-
     if (FALSE === $output) {
-      trigger_error('XSL transformation failed.', E_USER_ERROR);
+      trigger_error('XSL transformation failed.', E_USER_WARNING);
       return NULL;
     }
+
     return $output;
   }
 
index dcc56af..94d7fef 100644 (file)
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
+    <xsl:param name="generate-enclosure"/>
+    <xsl:param name="show-multimedia"/>
+
     <xsl:output method="xml" indent="yes"/>
 
     <xsl:variable name="BaseURL" select="//meta[@property='og:url']/@content"/>
             </pubDate>
             <description>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
-                <img src="{$picture-url}" alt="{$picture-title}"/>
+                <xsl:if test="$show-multimedia = 1">
+                    <img src="{$picture-url}" alt="{$picture-title}"/>
+                </xsl:if>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
             <xsl:if test="$generate-enclosure = 1">
-                <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', $picture-url)"/>
+                <!--
+                    Dilbert.com uses protocol-relative urls for pictures but
+                    generateEnclosure() relies on curl which cannot work
+                    without a schema.
+
+                    Use http as protocol because curl gives some error when
+                    attempting TLS negotiation with the server where
+                    Dilbert.com assets are.
+                -->
+                <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', concat('http:', $picture-url))"/>
             </xsl:if>
         </item>
     </xsl:template>
index a735cf6..7ead3ef 100644 (file)
@@ -33,7 +33,6 @@
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
     <xsl:output method="xml" indent="yes"/>
index c714b1b..855ce0b 100644 (file)
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
     <xsl:param name="generate-enclosure"/>
+    <xsl:param name="show-usernames"/>
+    <xsl:param name="show-multimedia"/>
 
     <xsl:output method="xml" indent="yes"/>
 
         <item>
             <title>
                 <xsl:variable name="title-length" select="140"/>
-                <xsl:variable name="item-content-title" select="normalize-space(concat($screen-name, ': ', $item-content-caption))"/>
+                <xsl:variable name="item-content-title">
+                    <xsl:if test="$show-usernames = 1">
+                        <xsl:value-of select="concat($screen-name, ': ')"/>
+                    </xsl:if>
+                    <xsl:value-of select="normalize-space($item-content-caption)"/>
+                </xsl:variable>
                 <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
                 <xsl:choose>
                     <xsl:when test="string-length($item-content-title) > $title-length">
                     </xsl:if>
                     <xsl:value-of select="$item-content-caption"/>
                 </p><br />
-                <a href="{$item-permalink}"><img src="{$item-content-image}" style="max-width: 100%"/></a>
+                <xsl:if test="$show-multimedia = 1">
+                    <a href="{$item-permalink}"><img src="{$item-content-image}" style="max-width: 100%"/></a>
+                </xsl:if>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
             <xsl:if test="$generate-enclosure = 1">
index 66e73cd..42f8ac0 100644 (file)
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
+    <xsl:param name="generate-enclosure"/>
+    <xsl:param name="show-usernames"/>
+
     <xsl:output method="xml" indent="yes"/>
 
     <xsl:variable name="domain-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, '@')"/>
         <xsl:variable name="item-permalink" select=".//p[@class='muted']/small/a/@href"/>
         <item>
             <title>
-                <xsl:value-of select="concat($user-name, ': ', normalize-space($item-content))"/>
+                <xsl:if test="$show-usernames = 1">
+                    <xsl:value-of select="concat($user-name, ': ')"/>
+                </xsl:if>
+                <xsl:value-of select="normalize-space($item-content)"/>
             </title>
             <link>
                 <xsl:value-of select="$item-permalink"/>
                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::strToRssDate', .//abbr[@class='easydate']/@title)"/>
             </pubDate>
             <description>
-                <xsl:value-of select="concat($user-name, ': ')"/>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
+                <xsl:if test="$show-usernames = 1">
+                    <xsl:value-of select="concat($user-name, ': ')"/>
+                </xsl:if>
                 <xsl:copy-of select="$item-content/node()"/>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
index d1514c5..1c20e70 100644 (file)
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
     <xsl:param name="generate-enclosure"/>
+    <xsl:param name="show-usernames"/>
+    <xsl:param name="show-multimedia"/>
 
     <xsl:output method="xml" indent="yes"/>
 
     <!-- Identity transform -->
     <xsl:template match="@*|node()">
         <xsl:copy>
+            <!--
+                Strip the style attribute while copying elements because it may be
+                dangerous, see:
+                https://validator.w3.org/feed/docs/warning/DangerousStyleAttr.html
+            -->
             <xsl:apply-templates select="@*[not(name() = 'style')]|node()"/>
         </xsl:copy>
     </xsl:template>
          too instead of the t.co redirections.
     -->
     <xsl:template match="a[@data-pre-embedded='true']">
-        <!-- Prepend and append a white space for aestethic reasons -->
-        <xsl:text> </xsl:text>
-        <a>
-            <xsl:attribute name="href">
+        <xsl:if test="$show-multimedia = 1">
+            <!-- Prepend and append a white space for aestethic reasons -->
+            <xsl:text> </xsl:text>
+            <a>
+                <xsl:attribute name="href">
+                    <xsl:value-of select="concat('https://', .)"/>
+                </xsl:attribute>
                 <xsl:value-of select="concat('https://', .)"/>
-            </xsl:attribute>
-            <xsl:value-of select="concat('https://', .)"/>
-        </a>
-        <xsl:text> </xsl:text>
+            </a>
+            <xsl:text> </xsl:text>
+        </xsl:if>
     </xsl:template>
 
     <!-- Present images in a more convenient way -->
@@ -84,7 +92,7 @@
             <xsl:attribute name="href">
                 <xsl:value-of select="concat(@data-image-url, ':orig')"/>
             </xsl:attribute>
-            <img>
+            <img style="max-width: 100%">
                 <xsl:attribute name="src">
                     <xsl:value-of select="@data-image-url"/>
                 </xsl:attribute>
         <xsl:copy>
             <xsl:apply-templates select="@*"/>
             <xsl:attribute name="style">
-                <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
+                <xsl:value-of select="concat(@style, '; background-repeat: no-repeat; background-size: 100% auto')"/>
             </xsl:attribute>
             <xsl:apply-templates select="node()"/>
         </xsl:copy>
         <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
 
         <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
+        <xsl:variable name="item-has-gif" select="$item-media//*[contains(@class, 'PlayableMedia--gif')]"/>
         <item>
             <title>
-                <xsl:value-of select="concat($user-name, ': ')"/>
+                <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)">
+                    <xsl:value-of select="concat($user-name, ': ')"/>
+                </xsl:if>
                 <xsl:if test="$item-has-video">
                     <xsl:text>(Video) </xsl:text>
                 </xsl:if>
             </pubDate>
             <description>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
-                <xsl:value-of select="concat($user-name, ':')"/>
-                <xsl:element name="br"/>
+                <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)">
+                    <xsl:value-of select="concat($user-name, ':')"/>
+                    <xsl:element name="br"/>
+                </xsl:if>
                 <xsl:if test="$item-has-video">
                     <xsl:text> (Video)</xsl:text>
                     <xsl:element name="br"/>
                 </xsl:if>
+                <xsl:if test="$item-has-gif">
+                    <xsl:text> (GIF)</xsl:text>
+                    <xsl:element name="br"/>
+                </xsl:if>
                 <xsl:element name="span">
                     <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute>
                     <xsl:apply-templates select="$item-content/node()"/>
                 </xsl:element>
-                <xsl:apply-templates select="$item-media/node()"/>
+                <xsl:if test="$show-multimedia = 1">
+                    <xsl:apply-templates select="$item-media/node()"/>
+                </xsl:if>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
             <xsl:if test="$generate-enclosure = 1">
index 82b3a43..37e885a 100644 (file)
@@ -45,6 +45,14 @@ OPTIONS
 *-e*::
     show links to supported media files in the RSS <enclosure/> element
 
+*-m <0|1>*::
+    enable or disable showing multimedia content (e.g. Twitter or Instagram
+    pictures) directly inside the item description. Default is 1 (enable).
+
+*-u <0|1>*::
+    enable or disable showing usernames in front of the item for hosts which
+    supports it (Twitter.com/Instagram.com). Default is 1 (enable).
+
 *-h, --help*::
     show the help message
 
index b1dd021..2d5017e 100644 (file)
@@ -1,4 +1,5 @@
 <?php
+
 /**
  * @file
  * Tweeper - a Twitter to RSS web scraper.
@@ -37,10 +38,10 @@ function is_cli() {
  */
 function usage($argv) {
   if (is_cli()) {
-    $usage = "{$argv[0]} [-e|-h|--help] <src_url>\n";
+    $usage = "{$argv[0]} [-e|-m <0|1>|-u <0|1>|-h|--help] <src_url>\n";
   }
   else {
-    $usage = htmlentities("{$_SERVER['SCRIPT_NAME']}?src_url=<src_url>&generate_enclosure=<0|1>");
+    $usage = htmlentities("{$_SERVER['SCRIPT_NAME']}?src_url=<src_url>&generate_enclosure=<0|1>&show_usernames=<0|1>&show_multimedia=<0|1>");
   }
 
   return "usage: $usage";
@@ -52,19 +53,41 @@ function usage($argv) {
 function parse_options_cli($argv, $argc) {
   $options = array(
     'generate_enclosure' => FALSE,
+    'show_usernames' => TRUE,
+    'show_multimedia' => TRUE,
   );
 
   if ($argc < 2) {
     return $options;
   }
 
-  $cli_options = getopt("eh", array("help"));
+  $cli_options = getopt("em:u:h", array("help"));
   foreach ($cli_options as $opt => $val) {
     switch ($opt) {
       case 'e':
         $options['generate_enclosure'] = TRUE;
         break;
 
+      case 'm':
+        $ret = filter_var($val, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
+        if (NULL === $ret) {
+          fwrite(STDERR, "Invalid argument for the -m option.\n");
+          fwrite(STDERR, usage($argv));
+          exit(1);
+        }
+        $options['show_multimedia'] = $val;
+        break;
+
+      case 'u':
+        $ret = filter_var($val, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
+        if (NULL === $ret) {
+          fwrite(STDERR, "Invalid argument for the -u option.\n");
+          fwrite(STDERR, usage($argv));
+          exit(1);
+        }
+        $options['show_usernames'] = $val;
+        break;
+
       case 'h':
       case 'help':
         echo usage($argv);
@@ -76,7 +99,9 @@ function parse_options_cli($argv, $argc) {
     }
   }
 
-  $options['src_url'] = $argv[count($cli_options) + 1];
+  // For now assume that the URL is the lest argument, in the future we could
+  // switch to PHP >= 7.1 and use the $optind argument of getopt().
+  $options['src_url'] = array_pop($argv);
 
   return $options;
 }
@@ -87,6 +112,8 @@ function parse_options_cli($argv, $argc) {
 function parse_options_query_string() {
   $options = array(
     'generate_enclosure' => FALSE,
+    'show_usernames' => TRUE,
+    'show_multimedia' => TRUE,
   );
 
   if (isset($_GET['src_url'])) {
@@ -97,6 +124,14 @@ function parse_options_query_string() {
     $options['generate_enclosure'] = $_GET['generate_enclosure'] == 1;
   }
 
+  if (isset($_GET['show_multimedia'])) {
+    $options['show_multimedia'] = $_GET['show_multimedia'] != 0;
+  }
+
+  if (isset($_GET['show_usernames'])) {
+    $options['show_usernames'] = $_GET['show_usernames'] != 0;
+  }
+
   return $options;
 }
 
@@ -114,7 +149,7 @@ if (!isset($options['src_url'])) {
   exit(1);
 }
 
-$tweeper = new Tweeper($options['generate_enclosure']);
+$tweeper = new Tweeper($options['generate_enclosure'], $options['show_usernames'], $options['show_multimedia']);
 $output = $tweeper->tweep($options['src_url']);
 if (is_null($output)) {
   exit(1);