Code coverage for /20080809/modules/filter/filter.module

Line #Times calledCode
1
<?php
2
// $Id: filter.module,v 1.217 2008/07/24 16:25:17 dries Exp $
3
4
/**
5
 * @file
6
 * Framework for handling filtering of content.
7
 */
8
9
/**
10
 * Special format ID which means "use the default format".
11
 *
12
 * This value can be passed to the filter APIs as a format ID: this is
13
 * equivalent to not passing an explicit format at all.
14
 */
152027
define('FILTER_FORMAT_DEFAULT', 0);
16
17
/**
18
 * Implementation of hook_help().
19
 */
202027
function filter_help($path, $arg) {
21
  switch ($path) {
221491
    case 'admin/help#filter':
2312
      $output = '<p>' . t("The filter module allows administrators to
configure text input formats for use on your site. An input format defines
the HTML tags, codes, and other input allowed in both content and comments,
and is a key feature in guarding against potentially damaging input from
malicious users. Two input formats included by default are <em>Filtered
HTML</em> (which allows only an administrator-approved subset of HTML tags)
and <em>Full HTML</em> (which allows the full set of HTML tags). Additional
input formats may be created by an administrator.") . '</p>';
2412
      $output .= '<p>' . t('Each input format uses filters to manipulate
text, and most input formats apply several different filters to text in a
specific order. Each filter is designed for a specific purpose, and
generally either adds, removes or transforms elements within user-entered
text before it is displayed. A filter does not change the actual content of
a post, but instead, modifies it temporarily before it is displayed. A
filter may remove unapproved HTML tags, for instance, while another
automatically adds HTML to make links referenced in text clickable.') .
'</p>';
2512
      $output .= '<p>' . t('Users with access to more than one input format
can use the <em>Input format</em> fieldset to choose between available
input formats when creating or editing multi-line content. Administrators
determine the input formats available to each user role, select a default
input format, and control the order of formats listed in the <em>Input
format</em> fieldset.') . '</p>';
2612
      $output .= '<p>' . t('For more information, see the online handbook
entry for <a href="@filter">Filter module</a>.', array('@filter' =>
'http://drupal.org/handbook/modules/filter/')) . '</p>';
2712
      return $output;
281489
    case 'admin/settings/filters':
299
      $output = '<p>' . t('Use the list below to review the input formats
available to each user role, to select a default input format, and to
control the order of formats listed in the <em>Input format</em> fieldset.
(The <em>Input format</em> fieldset is displayed below textareas when users
with access to more than one input format create multi-line content.) The
input format selected as <em>Default</em> is available to all users and,
unless another format is selected, is applied to all content. All input
formats are available to users in roles with the "administer filters"
permission.') . '</p>';
309
      $output .= '<p>' . t('Since input formats, if available, are
presented in the same order as the list below, it may be helpful to arrange
the formats in descending order of your preference for their use. To change
the order of an input format, grab a drag-and-drop handle under the
<em>Name</em> column and drag to a new location in the list. (Grab a handle
by clicking and holding the mouse while hovering over a handle icon.)
Remember that your changes will not be saved until you click the <em>Save
changes</em> button at the bottom of the page.') . '</p>';
319
      return $output;
321489
    case 'admin/settings/filters/%':
336
      return '<p>' . t('Every <em>filter</em> performs one particular
change on the user input, for example stripping out malicious HTML or
making URLs clickable. Choose which filters you want to apply to text in
this input format. If you notice some filters are causing conflicts in the
output, you can <a href="@rearrange">rearrange them</a>.',
array('@rearrange' => url('admin/settings/filters/' . $arg[3] . '/order')))
. '</p>';
341489
    case 'admin/settings/filters/%/configure':
354
      return '<p>' . t('If you cannot find the settings for a certain
filter, make sure you have enabled it on the <a href="@url">edit tab</a>
first.', array('@url' => url('admin/settings/filters/' . $arg[3]))) .
'</p>';
361489
    case 'admin/settings/filters/%/order':
374
      $output = '<p>' . t('Because of the flexible filtering system, you
might encounter a situation where one filter prevents another from doing
its job. For example: a word in an URL gets converted into a glossary term,
before the URL can be converted to a clickable link. When this happens,
rearrange the order of the filters.') . '</p>';
384
      $output .= '<p>' . t("Filters are executed from top-to-bottom. To
change the order of the filters, modify the values in the <em>Weight</em>
column or grab a drag-and-drop handle under the <em>Name</em> column and
drag filters to new locations in the list. (Grab a handle by clicking and
holding the mouse while hovering over a handle icon.) Remember that your
changes will not be saved until you click the <em>Save configuration</em>
button at the bottom of the page.") . '</p>';
394
      return $output;
400
  }
411489
}
42
43
/**
44
 * Implementation of hook_theme().
45
 */
462027
function filter_theme() {
47
  return array(
48
    'filter_admin_overview' => array(
4991
      'arguments' => array('form' => NULL),
5091
      'file' => 'filter.admin.inc',
5191
    ),
52
    'filter_admin_order' => array(
5391
      'arguments' => array('form' => NULL),
5491
      'file' => 'filter.admin.inc',
5591
    ),
56
    'filter_tips' => array(
5791
      'arguments' => array('tips' => NULL, 'long' => FALSE, 'extra' =>
''),
5891
      'file' => 'filter.pages.inc',
5991
    ),
60
    'filter_tips_more_info' => array(
6191
      'arguments' => array(),
6291
    ),
6391
  );
640
}
65
66
/**
67
 * Implementation of hook_menu().
68
 */
692027
function filter_menu() {
7085
  $items['admin/settings/filters'] = array(
7185
    'title' => 'Input formats',
7285
    'description' => 'Configure how content input by users is filtered,
including allowed HTML tags. Also allows enabling of module-provided
filters.',
7385
    'page callback' => 'drupal_get_form',
7485
    'page arguments' => array('filter_admin_overview'),
7585
    'access arguments' => array('administer filters'),
76
  );
7785
  $items['admin/settings/filters/list'] = array(
7885
    'title' => 'List',
7985
    'type' => MENU_DEFAULT_LOCAL_TASK,
80
  );
8185
  $items['admin/settings/filters/add'] = array(
8285
    'title' => 'Add input format',
8385
    'page callback' => 'filter_admin_format_page',
8485
    'access arguments' => array('administer filters'),
8585
    'type' => MENU_LOCAL_TASK,
8685
    'weight' => 1,
87
  );
8885
  $items['admin/settings/filters/delete'] = array(
8985
    'title' => 'Delete input format',
9085
    'page callback' => 'drupal_get_form',
9185
    'page arguments' => array('filter_admin_delete'),
9285
    'access arguments' => array('administer filters'),
9385
    'type' => MENU_CALLBACK,
94
  );
9585
  $items['filter/tips'] = array(
9685
    'title' => 'Compose tips',
9785
    'page callback' => 'filter_tips_long',
9885
    'access callback' => TRUE,
9985
    'type' => MENU_SUGGESTED_ITEM,
100
  );
10185
  $items['admin/settings/filters/%filter_format'] = array(
10285
    'type' => MENU_CALLBACK,
10385
    'title callback' => 'filter_admin_format_title',
10485
    'title arguments' => array(3),
10585
    'page callback' => 'filter_admin_format_page',
10685
    'page arguments' => array(3),
10785
    'access arguments' => array('administer filters'),
108
  );
10985
  $items['admin/settings/filters/%filter_format/edit'] = array(
11085
    'title' => 'Edit',
11185
    'type' => MENU_DEFAULT_LOCAL_TASK,
11285
    'weight' => 0,
113
  );
11485
  $items['admin/settings/filters/%filter_format/configure'] = array(
11585
    'title' => 'Configure',
11685
    'page callback' => 'filter_admin_configure_page',
11785
    'page arguments' => array(3),
11885
    'access arguments' => array('administer filters'),
11985
    'type' => MENU_LOCAL_TASK,
12085
    'weight' => 1,
121
  );
12285
  $items['admin/settings/filters/%filter_format/order'] = array(
12385
    'title' => 'Rearrange',
12485
    'page callback' => 'filter_admin_order_page',
12585
    'page arguments' => array(3),
12685
    'access arguments' => array('administer filters'),
12785
    'type' => MENU_LOCAL_TASK,
12885
    'weight' => 2,
129
  );
13085
  return $items;
1310
}
132
1332027
function filter_format_load($arg) {
13421
  return filter_formats($arg);
1350
}
136
137
/**
138
 * Display a filter format form title.
139
 */
1402027
function filter_admin_format_title($format) {
14117
  return $format->name;
1420
}
143
144
/**
145
 * Implementation of hook_perm().
146
 */
1472027
function filter_perm() {
148
  return array(
14987
    'administer filters' => t('Manage input formats and filters, and select
which roles may use them. %warning', array('%warning' => t('Warning: Give
to trusted roles only; this permission has security implications.'))),
15087
  );
1510
}
152
153
/**
154
 * Implementation of hook_cron().
155
 *
156
 * Expire outdated filter cache entries
157
 */
1582027
function filter_cron() {
1592
  cache_clear_all(NULL, 'cache_filter');
1602
}
161
162
/**
163
 * Implementation of hook_filter_tips().
164
 */
1652027
function filter_filter_tips($delta, $format, $long = FALSE) {
166128
  global $base_url;
167
  switch ($delta) {
168128
    case 0:
169122
      if ($allowed_html = variable_get("allowed_html_$format", '<a> <em>
<strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
170
        switch ($long) {
171122
          case 0:
172122
            return t('Allowed HTML tags: @tags', array('@tags' =>
$allowed_html));
1730
          case 1:
1740
            $output = '<p>' . t('Allowed HTML tags: @tags', array('@tags'
=> $allowed_html)) . '</p>';
1750
            if (!variable_get("filter_html_help_$format", 1)) {
1760
              return $output;
1770
            }
178
1790
            $output .= '<p>' . t('This site allows HTML content. While
learning all of HTML may feel intimidating, learning how to use a very
small number of the most basic HTML "tags" is very easy. This table
provides examples for each tag that is enabled on this site.') . '</p>';
1800
            $output .= '<p>' . t('For more information see W3C\'s <a
href="@html-specifications">HTML Specifications</a> or use your favorite
search engine to find other sites that explain HTML.',
array('@html-specifications' => 'http://www.w3.org/TR/html/')) . '</p>';
181
            $tips = array(
1820
              'a' => array( t('Anchors are used to make links to other
pages.'), '<a href="' . $base_url . '">' . variable_get('site_name',
'Drupal') . '</a>'),
1830
              'br' => array( t('By default line break tags are
automatically added, so use this tag to add additional ones. Use of this
tag is different because it is not used with an open/close pair like all
the others. Use the extra " /" inside the tag to maintain XHTML 1.0
compatibility'), t('Text with <br />line break')),
1840
              'p' => array( t('By default paragraph tags are automatically
added, so use this tag to add additional ones.'), '<p>' . t('Paragraph
one.') . '</p> <p>' . t('Paragraph two.') . '</p>'),
1850
              'strong' => array( t('Strong'), '<strong>' . t('Strong') .
'</strong>'),
1860
              'em' => array( t('Emphasized'), '<em>' . t('Emphasized') .
'</em>'),
1870
              'cite' => array( t('Cited'), '<cite>' . t('Cited') .
'</cite>'),
1880
              'code' => array( t('Coded text used to show programming
source code'), '<code>' . t('Coded') . '</code>'),
1890
              'b' => array( t('Bolded'), '<b>' . t('Bolded') . '</b>'),
1900
              'u' => array( t('Underlined'), '<u>' . t('Underlined') .
'</u>'),
1910
              'i' => array( t('Italicized'), '<i>' . t('Italicized') .
'</i>'),
1920
              'sup' => array( t('Superscripted'),
t('<sup>Super</sup>scripted')),
1930
              'sub' => array( t('Subscripted'),
t('<sub>Sub</sub>scripted')),
1940
              'pre' => array( t('Preformatted'), '<pre>' .
t('Preformatted') . '</pre>'),
1950
              'abbr' => array( t('Abbreviation'), t('<abbr
title="Abbreviation">Abbrev.</abbr>')),
1960
              'acronym' => array( t('Acronym'), t('<acronym
title="Three-Letter Acronym">TLA</acronym>')),
1970
              'blockquote' => array( t('Block quoted'), '<blockquote>' .
t('Block quoted') . '</blockquote>'),
1980
              'q' => array( t('Quoted inline'), '<q>' . t('Quoted inline')
. '</q>'),
199
              // Assumes and describes tr, td, th.
2000
              'table' => array( t('Table'), '<table> <tr><th>' . t('Table
header') . '</th></tr> <tr><td>' . t('Table cell') . '</td></tr>
</table>'),
2010
              'tr' => NULL, 'td' => NULL, 'th' => NULL,
2020
              'del' => array( t('Deleted'), '<del>' . t('Deleted') .
'</del>'),
2030
              'ins' => array( t('Inserted'), '<ins>' . t('Inserted') .
'</ins>'),
204
               // Assumes and describes li.
2050
              'ol' => array( t('Ordered list - use the &lt;li&gt; to begin
each list item'), '<ol> <li>' . t('First item') . '</li> <li>' . t('Second
item') . '</li> </ol>'),
2060
              'ul' => array( t('Unordered list - use the &lt;li&gt; to
begin each list item'), '<ul> <li>' . t('First item') . '</li> <li>' .
t('Second item') . '</li> </ul>'),
2070
              'li' => NULL,
208
              // Assumes and describes dt and dd.
2090
              'dl' => array( t('Definition lists are similar to other HTML
lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the
definition term and &lt;dd&gt; begins the definition description.'), '<dl>
<dt>' . t('First term') . '</dt> <dd>' . t('First definition') . '</dd>
<dt>' . t('Second term') . '</dt> <dd>' . t('Second definition') . '</dd>
</dl>'),
2100
              'dt' => NULL, 'dd' => NULL,
2110
              'h1' => array( t('Heading'), '<h1>' . t('Title') . '</h1>'),
2120
              'h2' => array( t('Heading'), '<h2>' . t('Subtitle') .
'</h2>'),
2130
              'h3' => array( t('Heading'), '<h3>' . t('Subtitle three') .
'</h3>'),
2140
              'h4' => array( t('Heading'), '<h4>' . t('Subtitle four') .
'</h4>'),
2150
              'h5' => array( t('Heading'), '<h5>' . t('Subtitle five') .
'</h5>'),
2160
              'h6' => array( t('Heading'), '<h6>' . t('Subtitle six') .
'</h6>')
2170
            );
2180
            $header = array(t('Tag Description'), t('You Type'), t('You
Get'));
2190
            preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html,
$out);
2200
            foreach ($out[1] as $tag) {
2210
              if (array_key_exists($tag, $tips)) {
2220
                if ($tips[$tag]) {
2230
                  $rows[] = array(
2240
                    array('data' => $tips[$tag][0], 'class' =>
'description'),
2250
                    array('data' => '<code>' . check_plain($tips[$tag][1])
. '</code>', 'class' => 'type'),
2260
                    array('data' => $tips[$tag][1], 'class' => 'get')
2270
                  );
2280
                }
2290
              }
230
              else {
2310
                $rows[] = array(
2320
                  array('data' => t('No help provided for tag %tag.',
array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
233
                );
234
              }
2350
            }
2360
            $output .= theme('table', $header, $rows);
237
2380
            $output .= '<p>' . t('Most unusual characters can be directly
entered without any problems.') . '</p>';
2390
            $output .= '<p>' . t('If you do encounter problems, try using
HTML character entities. A common example looks like &amp;amp; for an
ampersand &amp; character. For a full list of entities see HTML\'s <a
href="@html-entities">entities</a> page. Some of the available characters
include:', array('@html-entities' =>
'http://www.w3.org/TR/html4/sgml/entities.html')) . '</p>';
240
241
            $entities = array(
2420
              array( t('Ampersand'), '&amp;'),
2430
              array( t('Greater than'), '&gt;'),
2440
              array( t('Less than'), '&lt;'),
2450
              array( t('Quotation mark'), '&quot;'),
2460
            );
2470
            $header = array(t('Character Description'), t('You Type'),
t('You Get'));
2480
            unset($rows);
2490
            foreach ($entities as $entity) {
2500
              $rows[] = array(
2510
                array('data' => $entity[0], 'class' => 'description'),
2520
                array('data' => '<code>' . check_plain($entity[1]) .
'</code>', 'class' => 'type'),
2530
                array('data' => $entity[1], 'class' => 'get')
2540
              );
2550
            }
2560
            $output .= theme('table', $header, $rows);
2570
            return $output;
2580
        }
2590
      }
2600
      break;
261
262128
    case 1:
263
      switch ($long) {
264127
        case 0:
265127
          return t('Lines and paragraphs break automatically.');
2660
        case 1:
2670
          return t('Lines and paragraphs are automatically recognized. The
&lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph
tags are inserted automatically. If paragraphs are not recognized simply
add a couple blank lines.');
2680
      }
2690
      break;
270
271128
    case 2:
272128
      return t('Web page addresses and e-mail addresses turn into links
automatically.');
2730
      break;
274
275126
    case 4:
2760
      return t('No HTML tags allowed');
2770
      break;
278
2790
  }
280126
}
281
282
/**
283
 * Retrieve a list of input formats.
284
 */
2852027
function filter_formats($index = NULL) {
286155
  global $user;
287155
  static $formats;
288
289
  // Administrators can always use all input formats.
290155
  $all = user_access('administer filters');
291
292155
  if (!isset($formats)) {
293155
    $formats = array();
294
295155
    $query = 'SELECT * FROM {filter_formats}';
296
297
    // Build query for selecting the format(s) based on the user's roles.
298155
    $args = array();
299155
    if (!$all) {
300123
      $where = array();
301123
      foreach ($user->roles as $rid => $role) {
302123
        $where[] = "roles LIKE '%%,%d,%%'";
303123
        $args[] = $rid;
304123
      }
305123
      $query .= ' WHERE ' . implode(' OR ', $where) . ' OR format = %d';
306123
      $args[] = variable_get('filter_default_format', 1);
307123
    }
308
309155
    $result = db_query($query . ' ORDER by weight', $args);
310155
    while ($format = db_fetch_object($result)) {
311155
      $formats[$format->format] = $format;
312155
    }
313155
  }
314155
  if (isset($index)) {
31521
    return isset($formats[$index]) ? $formats[$index] : FALSE;
3160
  }
317134
  return $formats;
3180
}
319
320
/**
321
 * Build a list of all filters.
322
 */
3232027
function filter_list_all() {
32413
  $filters = array();
325
32613
  foreach (module_list() as $module) {
32713
    $list = module_invoke($module, 'filter', 'list');
32813
    if (isset($list) && is_array($list)) {
32913
      foreach ($list as $delta => $name) {
33013
        $filters[$module . '/' . $delta] = (object)array('module' =>
$module, 'delta' => $delta, 'name' => $name);
33113
      }
33213
    }
33313
  }
334
33513
  uasort($filters, '_filter_list_cmp');
336
33713
  return $filters;
3380
}
339
340
/**
341
 * Helper function for sorting the filter list by filter name.
342
 */
3432027
function _filter_list_cmp($a, $b) {
34413
  return strcmp($a->name, $b->name);
3450
}
346
347
/**
348
 * Resolve a format id, including the default format.
349
 */
3502027
function filter_resolve_format($format) {
351366
  return $format == FILTER_FORMAT_DEFAULT ?
variable_get('filter_default_format', 1) : $format;
3520
}
353
/**
354
 * Check if text in a certain input format is allowed to be cached.
355
 */
3562027
function filter_format_allowcache($format) {
35787
  static $cache = array();
35887
  $format = filter_resolve_format($format);
35987
  if (!isset($cache[$format])) {
36087
    $cache[$format] = db_result(db_query('SELECT cache FROM
{filter_formats} WHERE format = %d', $format));
36187
  }
36287
  return $cache[$format];
3630
}
364
365
/**
366
 * Retrieve a list of filters for a certain format.
367
 */
3682027
function filter_list_format($format) {
369222
  static $filters = array();
370
371222
  if (!isset($filters[$format])) {
372222
    $filters[$format] = array();
373222
    $result = db_query("SELECT * FROM {filters} WHERE format = %d ORDER BY
weight, module, delta", $format);
374222
    while ($filter = db_fetch_object($result)) {
375218
      $list = module_invoke($filter->module, 'filter', 'list');
376218
      if (isset($list) && is_array($list) && isset($list[$filter->delta]))
{
377218
        $filter->name = $list[$filter->delta];
378218
        $filters[$format][$filter->module . '/' . $filter->delta] =
$filter;
379218
      }
380218
    }
381222
  }
382
383222
  return $filters[$format];
3840
}
385
386
/**
387
 * @name Filtering functions
388
 * @{
389
 * Modules which need to have content filtered can use these functions to
390
 * interact with the filter system.
391
 *
392
 * For more info, see the hook_filter() documentation.
393
 *
394
 * Note: because filters can inject JavaScript or execute PHP code,
security is
395
 * vital here. When a user supplies a $format, you should validate it with
396
 * filter_access($format) before accepting/using it. This is normally done
in
397
 * the validation stage of the node system. You should for example never
make a
398
 * preview of content in a disallowed format.
399
 */
400
401
/**
402
 * Run all the enabled filters on a piece of text.
403
 *
404
 * @param $text
405
 *    The text to be filtered.
406
 * @param $format
407
 *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT
for
408
 *    the default format.
409
 * @param $check
410
 *    Whether to check the $format with filter_access() first. Defaults to
TRUE.
411
 *    Note that this will check the permissions of the current user, so
you
412
 *    should specify $check = FALSE when viewing other people's content.
When
413
 *    showing content that is not (yet) stored in the database (eg. upon
preview),
414
 *    set to TRUE so the user's permissions are checked.
415
 */
4162027
function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $check =
TRUE) {
417
  // When $check = TRUE, do an access check on $format.
418240
  if (isset($text) && (!$check || filter_access($format))) {
419240
    $format = filter_resolve_format($format);
420
421
    // Check for a cached version of this piece of text.
422240
    $cache_id = $format . ':' . md5($text);
423240
    if ($cached = cache_get($cache_id, 'cache_filter')) {
424167
      return $cached->data;
4250
    }
426
427
    // See if caching is allowed for this format.
42887
    $cache = filter_format_allowcache($format);
429
430
    // Convert all Windows and Mac newlines to a single newline,
431
    // so filters only need to deal with one possibility.
43287
    $text = str_replace(array("\r\n", "\r"), "\n", $text);
433
434
    // Get a complete list of filters, ordered properly.
43587
    $filters = filter_list_format($format);
436
437
    // Give filters the chance to escape HTML-like data such as code or
formulas.
43887
    foreach ($filters as $filter) {
43987
      $text = module_invoke($filter->module, 'filter', 'prepare',
$filter->delta, $format, $text, $cache_id);
44087
    }
441
442
    // Perform filtering.
44387
    foreach ($filters as $filter) {
44487
      $text = module_invoke($filter->module, 'filter', 'process',
$filter->delta, $format, $text, $cache_id);
44587
    }
446
447
    // Store in cache with a minimum expiration time of 1 day.
44887
    if ($cache) {
44986
      cache_set($cache_id, $text, 'cache_filter', time() + (60 * 60 *
24));
45086
    }
45187
  }
452
  else {
4530
    $text = t('n/a');
454
  }
455
45687
  return $text;
4570
}
458
459
/**
460
 * Generate a selector for choosing a format in a form.
461
 *
462
 * @ingroup forms
463
 * @see filter_form_validate()
464
 * @param $value
465
 *   The ID of the format that is currently selected.
466
 * @param $weight
467
 *   The weight of the input format.
468
 * @param $parents
469
 *   Required when defining multiple input formats on a single node or
having a different parent than 'format'.
470
 * @return
471
 *   HTML for the form element.
472
 */
4732027
function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL,
$parents = array('format')) {
474122
  $value = filter_resolve_format($value);
475122
  $formats = filter_formats();
476
477122
  $extra = theme('filter_tips_more_info');
478
479122
  if (count($formats) > 1) {
480
    $form = array(
4814
      '#type' => 'fieldset',
4824
      '#title' => t('Input format'),
4834
      '#collapsible' => TRUE,
4844
      '#collapsed' => TRUE,
4854
      '#weight' => $weight,
4864
      '#element_validate' => array('filter_form_validate'),
4874
    );
488
    // Multiple formats available: display radio buttons with tips.
4894
    foreach ($formats as $format) {
490
      // Generate the parents as the autogenerator does, so we will have a
491
      // unique id for each radio button.
4924
      $parents_for_id = array_merge($parents, array($format->format));
4934
      $form[$format->format] = array(
4944
        '#type' => 'radio',
4954
        '#title' => $format->name,
4964
        '#default_value' => $value,
4974
        '#return_value' => $format->format,
4984
        '#parents' => $parents,
4994
        '#description' => theme('filter_tips',
_filter_tips($format->format, FALSE)),
5004
        '#id' => form_clean_id('edit-' . implode('-', $parents_for_id)),
501
      );
5024
    }
5034
  }
504
  else {
505
    // Only one format available: use a hidden form item and only show
tips.
506118
    $format = array_shift($formats);
507118
    $form[$format->format] = array('#type' => 'value', '#value' =>
$format->format, '#parents' => $parents);
508118
    $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE);
509118
    $form['format']['guidelines'] = array(
510118
      '#title' => t('Formatting guidelines'),
511118
      '#markup' => theme('filter_tips', $tips, FALSE, $extra),
512
    );
513
  }
514122
  $form[] = array('#markup' => $extra);
515122
  return $form;
5160
}
517
5182027
function filter_form_validate($form) {
5193
  foreach (element_children($form) as $key) {
5203
    if ($form[$key]['#value'] == $form[$key]['#return_value']) {
5213
      return;
5220
    }
5231
  }
5240
  form_error($form, t('An illegal choice has been detected. Please contact
the site administrator.'));
5250
  watchdog('form', 'Illegal choice %choice in %name element.',
array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title'])
? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR);
5260
}
527
528
/**
529
 * Returns TRUE if the user is allowed to access this format.
530
 */
5312027
function filter_access($format) {
532247
  $format = filter_resolve_format($format);
533247
  if (user_access('administer filters') || ($format ==
variable_get('filter_default_format', 1))) {
534246
    return TRUE;
5350
  }
536
  else {
5371
    $formats = filter_formats();
5381
    return isset($formats[$format]);
539
  }
5400
}
541
542
/**
543
 * @} End of "Filtering functions".
544
 */
545
546
547
/**
548
 * Helper function for fetching filter tips.
549
 */
5502027
function _filter_tips($format, $long = FALSE) {
551131
  if ($format == -1) {
5520
    $formats = filter_formats();
5530
  }
554
  else {
555131
    $formats = array(db_fetch_object(db_query("SELECT * FROM
{filter_formats} WHERE format = %d", $format)));
556
  }
557
558131
  $tips = array();
559
560131
  foreach ($formats as $format) {
561131
    $filters = filter_list_format($format->format);
562
563131
    $tips[$format->name] = array();
564131
    foreach ($filters as $id => $filter) {
565131
      if ($tip = module_invoke($filter->module, 'filter_tips',
$filter->delta, $format->format, $long)) {
566131
        $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
567131
      }
568131
    }
569131
  }
570
571131
  return $tips;
5720
}
573
574
575
/**
576
 * Format a link to the more extensive filter tips.
577
 *
578
 * @ingroup themeable
579
 */
5802027
function theme_filter_tips_more_info() {
581122
  return '<p>' . l(t('More information about formatting options'),
'filter/tips') . '</p>';
5820
}
583
584
/**
585
 * @name Standard filters
586
 * @{
587
 * Filters implemented by the filter.module.
588
 */
589
590
/**
591
 * Implementation of hook_filter(). Contains a basic set of essential
filters.
592
 * - HTML filter:
593
 *     Validates user-supplied HTML, transforming it as necessary.
594
 * - Line break converter:
595
 *     Converts newlines into paragraph and break tags.
596
 * - URL and e-mail address filter:
597
 *     Converts newlines into paragraph and break tags.
598
 */
5992027
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
600
  switch ($op) {
601221
    case 'list':
602221
      return array(0 => t('Limit allowed HTML tags'), 1 => t('Convert line
breaks'), 2 => t('Convert URLs into links'), 3 => t('Correct broken HTML'),
4 => t('Escape all HTML'));
603
604105
    case 'description':
605
      switch ($delta) {
60613
        case 0:
60713
          return t('Allows you to restrict the HTML tags the user can use.
It will also remove harmful content such as JavaScript events, JavaScript
URLs and CSS styles from those tags that are not removed.');
60813
        case 1:
60913
          return t('Converts line breaks into HTML (i.e. &lt;br&gt; and
&lt;p&gt;) tags.');
61013
        case 2:
61113
          return t('Turns web and e-mail addresses into clickable
links.');
61213
        case 3:
61313
          return t('Corrects faulty and chopped off HTML in postings.');
61413
        case 4:
61513
          return t('Escapes all HTML tags, so they will be visible instead
of being effective.');
6160
        default:
6170
          return;
6180
      }
619
62096
    case 'process':
621
      switch ($delta) {
62286
        case 0:
62385
          return _filter_html($text, $format);
62486
        case 1:
62585
          return _filter_autop($text);
62686
        case 2:
62786
          return _filter_url($text, $format);
62885
        case 3:
62985
          return _filter_htmlcorrector($text);
6300
        case 4:
6310
          return trim(check_plain($text));
6320
        default:
6330
          return $text;
6340
      }
635
63696
    case 'settings':
637
      switch ($delta) {
6386
        case 0:
6396
          return _filter_html_settings($format);
6406
        case 2:
6416
          return _filter_url_settings($format);
6426
        default:
6436
          return;
6446
      }
645
64690
    default:
64790
      return $text;
64890
  }
6490
}
650
651
/**
652
 * Settings for the HTML filter.
653
 */
6542027
function _filter_html_settings($format) {
6556
  $form['filter_html'] = array(
6566
    '#type' => 'fieldset',
6576
    '#title' => t('HTML filter'),
6586
    '#collapsible' => TRUE,
659
  );
6606
  $form['filter_html']["allowed_html_$format"] = array(
6616
    '#type' => 'textfield',
6626
    '#title' => t('Allowed HTML tags'),
6636
    '#default_value' => variable_get("allowed_html_$format", '<a> <em>
<strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
6646
    '#size' => 64,
6656
    '#maxlength' => 255,
6666
    '#description' => t('Specify a list of tags which should not be
stripped. (Note that JavaScript event attributes are always stripped.)'),
667
  );
6686
  $form['filter_html']["filter_html_help_$format"] = array(
6696
    '#type' => 'checkbox',
6706
    '#title' => t('Display HTML help'),
6716
    '#default_value' => variable_get("filter_html_help_$format", 1),
6726
    '#description' => t('If enabled, Drupal will display some basic HTML
help in the long filter tips.'),
673
  );
6746
  $form['filter_html']["filter_html_nofollow_$format"] = array(
6756
    '#type' => 'checkbox',
6766
    '#title' => t('Spam link deterrent'),
6776
    '#default_value' => variable_get("filter_html_nofollow_$format",
FALSE),
6786
    '#description' => t('If enabled, Drupal will add rel="nofollow" to all
links, as a measure to reduce the effectiveness of spam links. Note: this
will also prevent valid links from being followed by search engines,
therefore it is likely most effective when enabled for anonymous users.'),
679
  );
6806
  return $form;
6810
}
682
683
/**
684
 * HTML filter. Provides filtering of input into accepted HTML.
685
 */
6862027
function _filter_html($text, $format) {
68785
  $allowed_tags = preg_split('/\s+|<|>/',
variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul>
<ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
68885
  $text = filter_xss($text, $allowed_tags);
689
69085
  if (variable_get("filter_html_nofollow_$format", FALSE)) {
6910
    $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
6920
  }
693
69485
  return trim($text);
6950
}
696
697
/**
698
 * Settings for URL filter.
699
 */
7002027
function _filter_url_settings($format) {
7016
  $form['filter_urlfilter'] = array(
7026
    '#type' => 'fieldset',
7036
    '#title' => t('URL filter'),
7046
    '#collapsible' => TRUE,
705
  );
7066
  $form['filter_urlfilter']['filter_url_length_' . $format] = array(
7076
    '#type' => 'textfield',
7086
    '#title' => t('Maximum link text length'),
7096
    '#default_value' => variable_get('filter_url_length_' . $format, 72),
7106
    '#maxlength' => 4,
7116
    '#description' => t('URLs longer than this number of characters will be
truncated to prevent long strings that break formatting. The link itself
will be retained; just the text portion of the link will be truncated.'),
712
  );
7136
  return $form;
7140
}
715
716
/**
717
 * URL filter. Automatically converts text web addresses (URLs, e-mail
addresses,
718
 * ftp links, etc.) into hyperlinks.
719
 */
7202027
function _filter_url($text, $format) {
721
  // Pass length to regexp callback
72286
  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format,
72));
723
72486
  $text = ' ' . $text . ' ';
725
726
  // Match absolute URLs.
72786
  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[
\n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
728
729
  // Match e-mail addresses.
73086
  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[
\n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
731
732
  // Match www domains/addresses.
73386
  $text = preg_replace_callback("`(<p>|<li>|[
\n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[
\n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
73486
  $text = substr($text, 1, -1);
735
73686
  return $text;
7370
}
738
739
/**
740
 * Scan input and make sure that all HTML tags are properly closed and
nested.
741
 */
7422027
function _filter_htmlcorrector($text) {
743
  // Prepare tag lists.
74485
  static $no_nesting, $single_use;
74585
  if (!isset($no_nesting)) {
746
    // Tags which cannot be nested but are typically left unclosed.
74785
    $no_nesting = drupal_map_assoc(array('li', 'p'));
748
749
    // Single use tags in HTML4
75085
    $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr',
'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
75185
  }
752
753
  // Properly entify angles.
75485
  $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
755
756
  // Split tags from text.
75785
  $split = preg_split('/<([^>]+?)>/', $text, -1,
PREG_SPLIT_DELIM_CAPTURE);
758
  // Note: PHP ensures the array consists of alternating delimiters and
literals
759
  // and begins and ends with a literal (inserting $null as required).
760
76185
  $tag = false; // Odd/even counter. Tag or no tag.
76285
  $stack = array();
76385
  $output = '';
76485
  foreach ($split as $value) {
765
    // Process HTML tags.
76685
    if ($tag) {
76784
      list($tagname) = explode(' ', strtolower($value), 2);
768
      // Closing tag
76984
      if ($tagname{0} == '/') {
77084
        $tagname = substr($tagname, 1);
771
        // Discard XHTML closing tags for single use tags.
77284
        if (!isset($single_use[$tagname])) {
773
          // See if we possibly have a matching opening tag on the stack.
77484
          if (in_array($tagname, $stack)) {
775
            // Close other tags lingering first.
776
            do {
77784
              $output .= '</' . $stack[0] . '>';
77884
            } while (array_shift($stack) != $tagname);
77984
          }
780
          // Otherwise, discard it.
78184
        }
78284
      }
783
      // Opening tag
784
      else {
785
        // See if we have an identical 'no nesting' tag already open and
close it if found.
78684
        if (count($stack) && ($stack[0] == $tagname) &&
isset($no_nesting[$stack[0]])) {
7870
          $output .= '</' . array_shift($stack) . '>';
7880
        }
789
        // Push non-single-use tags onto the stack
79084
        if (!isset($single_use[$tagname])) {
79184
          array_unshift($stack, $tagname);
79284
        }
793
        // Add trailing slash to single-use tags as per X(HT)ML.
794
        else {
7950
          $value = rtrim($value, ' /') . ' /';
796
        }
79784
        $output .= '<' . $value . '>';
798
      }
79984
    }
800
    else {
801
      // Passthrough all text.
80285
      $output .= $value;
803
    }
80485
    $tag = !$tag;
80585
  }
806
  // Close remaining tags.
80785
  while (count($stack) > 0) {
8080
    $output .= '</' . array_shift($stack) . '>';
8090
  }
81085
  return $output;
8110
}
812
813
/**
814
 * Make links out of absolute URLs.
815
 */
8162027
function _filter_url_parse_full_links($match) {
8171
  $match[2] = decode_entities($match[2]);
8181
  $caption = check_plain(_filter_url_trim($match[2]));
8191
  $match[2] = check_url($match[2]);
8201
  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] .
'">' . $caption . '</a>' . $match[5];
8210
}
822
823
/**
824
 * Make links out of domain names starting with "www."
825
 */
8262027
function _filter_url_parse_partial_links($match) {
8271
  $match[2] = decode_entities($match[2]);
8281
  $caption = check_plain(_filter_url_trim($match[2]));
8291
  $match[2] = check_plain($match[2]);
8301
  return $match[1] . '<a href="http://' . $match[2] . '" title="' .
$match[2] . '">' . $caption . '</a>' . $match[3];
8310
}
832
833
/**
834
 * Shortens long URLs to http://www.example.com/long/url...
835
 */
8362027
function _filter_url_trim($text, $length = NULL) {
83786
  static $_length;
83886
  if ($length !== NULL) {
83986
    $_length = $length;
84086
  }
841
842
  // Use +3 for '...' string length.
84386
  if (strlen($text) > $_length + 3) {
8440
    $text = substr($text, 0, $_length) . '...';
8450
  }
846
84786
  return $text;
8480
}
849
850
/**
851
 * Convert line breaks into <p> and <br> in an intelligent fashion.
852
 * Based on: http://photomatt.net/scripts/autop
853
 */
8542027
function _filter_autop($text) {
855
  // All block level tags
85685
  $block =
'(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
857
858
  // Split at <pre>, <script>, <style> and </pre>, </script>, </style>
tags.
859
  // We don't apply any processing to the contents of these tags to avoid
messing
860
  // up code. We look for matched pairs and allow basic nesting. For
example:
861
  // "processed <pre> ignored <script> ignored </script> ignored </pre>
processed"
86285
  $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text,
-1, PREG_SPLIT_DELIM_CAPTURE);
863
  // Note: PHP ensures the array consists of alternating delimiters and
literals
864
  // and begins and ends with a literal (inserting NULL as required).
86585
  $ignore = FALSE;
86685
  $ignoretag = '';
86785
  $output = '';
86885
  foreach ($chunks as $i => $chunk) {
86985
    if ($i % 2) {
870
      // Opening or closing tag?
8710
      $open = ($chunk[1] != '/');
8720
      list($tag) = split('[ >]', substr($chunk, 2 - $open), 2);
8730
      if (!$ignore) {
8740
        if ($open) {
8750
          $ignore = TRUE;
8760
          $ignoretag = $tag;
8770
        }
8780
      }
879
      // Only allow a matching tag to close it.
8800
      else if (!$open && $ignoretag == $tag) {
8810
        $ignore = FALSE;
8820
        $ignoretag = '';
8830
      }
8840
    }
88585
    else if (!$ignore) {
88685
      $chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; // just to make
things a little easier, pad the end
88785
      $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
88885
      $chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk);
// Space things out a little
88985
      $chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); //
Space things out a little
89085
      $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of
duplicates
89185
      $chunk = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n",
$chunk); // make paragraphs, including one at the end
89285
      $chunk = preg_replace('|<p>\s*</p>\n|', '', $chunk); // under certain
strange conditions it could create a P of entirely whitespace
89385
      $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem
with nested lists
89485
      $chunk = preg_replace('|<p><blockquote([^>]*)>|i',
"<blockquote$1><p>", $chunk);
89585
      $chunk = str_replace('</blockquote></p>', '</p></blockquote>',
$chunk);
89685
      $chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1",
$chunk);
89785
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1",
$chunk);
89885
      $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); //
make line breaks
89985
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1",
$chunk);
90085
      $chunk = preg_replace('!<br
/>(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
90185
      $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1',
$chunk);
90285
    }
90385
    $output .= $chunk;
90485
  }
90585
  return $output;
9060
}
907
908
/**
909
 * Very permissive XSS/HTML filter for admin-only use.
910
 *
911
 * Use only for fields where it is impractical to use the
912
 * whole filter system, but where some (mainly inline) mark-up
913
 * is desired (so check_plain() is not acceptable).
914
 *
915
 * Allows all tags that can be used inside an HTML body, save
916
 * for scripts and styles.
917
 */
9182027
function filter_xss_admin($string) {
9191557
  return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b',
'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'param',
'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
9200
}
921
922
/**
923
 * Filters XSS. Based on kses by Ulf Harnhammar, see
924
 * http://sourceforge.net/projects/kses
925
 *
926
 * For examples of various XSS attacks, see:
927
 * http://ha.ckers.org/xss.html
928
 *
929
 * This code does four things:
930
 * - Removes characters and constructs that can trick browsers
931
 * - Makes sure all HTML entities are well-formed
932
 * - Makes sure all HTML tags and attributes are well-formed
933
 * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
javascript:)
934
 *
935
 * @param $string
936
 *   The string with raw HTML in it. It will be stripped of everything that
can cause
937
 *   an XSS attack.
938
 * @param $allowed_tags
939
 *   An array of allowed tags.
940
 * @param $format
941
 *   The format to use.
942
 */
9432027
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong',
'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
944
  // Only operate on valid UTF-8 strings. This is necessary to prevent
cross
945
  // site scripting issues on Internet Explorer 6.
9461557
  if (!drupal_validate_utf8($string)) {
9470
    return '';
9480
  }
949
  // Store the input format
9501557
  _filter_xss_split($allowed_tags, TRUE);
951
  // Remove NUL characters (ignored by some browsers)
9521557
  $string = str_replace(chr(0), '', $string);
953
  // Remove Netscape 4 JS entities
9541557
  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
955
956
  // Defuse all HTML entities
9571557
  $string = str_replace('&', '&amp;', $string);
958
  // Change back only well-formed entities in our whitelist
959
  // Named entities
9601557
  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1',
$string);
961
  // Decimal numeric entities
9621557
  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
963
  // Hexadecimal numeric entities
9641557
  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1',
$string);
965
9661557
  return preg_replace_callback('%
967
    (
968
    <(?=[^a-zA-Z!/])  # a lone <
969
    |                 # or
970
    <[^>]*.(>|$)      # a string that starts with a <, up until the > or
the end of the string
971
    |                 # or
972
    >                 # just a >
9731557
    )%x', '_filter_xss_split', $string);
9740
}
975
976
/**
977
 * Processes an HTML tag.
978
 *
979
 * @param @m
980
 *   An array with various meaning depending on the value of $store.
981
 *   If $store is TRUE then the array contains the allowed tags.
982
 *   If $store is FALSE then the array has one element, the HTML tag to
process.
983
 * @param $store
984
 *   Whether to store $m.
985
 * @return
986
 *   If the element isn't allowed, an empty string. Otherwise, the cleaned
up
987
 *   version of the HTML element.
988
 */
9892027
function _filter_xss_split($m, $store = FALSE) {
9901557
  static $allowed_html;
991
9921557
  if ($store) {
9931557
    $allowed_html = array_flip($m);
9941557
    return;
9950
  }
996
99758
  $string = $m[1];
998
99958
  if (substr($string, 0, 1) != '<') {
1000
    // We matched a lone ">" character
10013
    return '&gt;';
10020
  }
100358
  else if (strlen($string) == 1) {
1004
    // We matched a lone "<" character
10052
    return '&lt;';
10060
  }
1007
100856
  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string,
$matches)) {
1009
    // Seriously malformed
101052
    return '';
10110
  }
1012
101356
  $slash = trim($matches[1]);
101456
  $elem = &$matches[2];
101556
  $attrlist = &$matches[3];
1016
101756
  if (!isset($allowed_html[strtolower($elem)])) {
1018
    // Disallowed HTML element
101953
    return '';
10200
  }
1021
10223
  if ($slash != '') {
10233
    return "</$elem>";
10240
  }
1025
1026
  // Is there a closing XHTML slash at the end of the attributes?
1027
  // In PHP 5.1.0+ we could count the changes, currently we need a separate
match
10283
  $xhtml_slash = preg_match('%\s?/\s*$%', $attrlist) ? ' /' : '';
10293
  $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist);
1030
1031
  // Clean up attributes
10323
  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
10333
  $attr2 = preg_replace('/[<>]/', '', $attr2);
10343
  $attr2 = strlen($attr2) ? ' ' . $attr2 : '';
1035
10363
  return "<$elem$attr2$xhtml_slash>";
10370
}
1038
1039
/**
1040
 * Processes a string of HTML attributes.
1041
 *
1042
 * @return
1043
 *   Cleaned up version of the HTML attributes.
1044
 */
10452027
function _filter_xss_attributes($attr) {
10463
  $attrarr = array();
10473
  $mode = 0;
10483
  $attrname = '';
1049
10503
  while (strlen($attr) != 0) {
1051
    // Was the last operation successful?
10520
    $working = 0;
1053
1054
    switch ($mode) {
10550
      case 0:
1056
        // Attribute name, href for instance
10570
        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
10580
          $attrname = strtolower($match[1]);
10590
          $skip = ($attrname == 'style' || substr($attrname, 0, 2) ==
'on');
10600
          $working = $mode = 1;
10610
          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
10620
        }
1063
10640
        break;
1065
10660
      case 1:
1067
        // Equals sign or valueless ("selected")
10680
        if (preg_match('/^\s*=\s*/', $attr)) {
10690
          $working = 1; $mode = 2;
10700
          $attr = preg_replace('/^\s*=\s*/', '', $attr);
10710
          break;
10720
        }
1073
10740
        if (preg_match('/^\s+/', $attr)) {
10750
          $working = 1; $mode = 0;
10760
          if (!$skip) {
10770
            $attrarr[] = $attrname;
10780
          }
10790
          $attr = preg_replace('/^\s+/', '', $attr);
10800
        }
1081
10820
        break;
1083
10840
      case 2:
1085
        // Attribute value, a URL after href= for instance
10860
        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
10870
          $thisval = filter_xss_bad_protocol($match[1]);
1088
10890
          if (!$skip) {
10900
            $attrarr[] = "$attrname=\"$thisval\"";
10910
          }
10920
          $working = 1;
10930
          $mode = 0;
10940
          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
10950
          break;
10960
        }
1097
10980
        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
10990
          $thisval = filter_xss_bad_protocol($match[1]);
1100
11010
          if (!$skip) {
11020
            $attrarr[] = "$attrname='$thisval'";;
11030
          }
11040
          $working = 1; $mode = 0;
11050
          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
11060
          break;
11070
        }
1108
11090
        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
11100
          $thisval = filter_xss_bad_protocol($match[1]);
1111
11120
          if (!$skip) {
11130
            $attrarr[] = "$attrname=\"$thisval\"";
11140
          }
11150
          $working = 1; $mode = 0;
11160
          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
11170
        }
1118
11190
        break;
11200
    }
1121
11220
    if ($working == 0) {
1123
      // not well formed, remove and try again
11240
      $attr = preg_replace('/
1125
        ^
1126
        (
1127
        "[^"]*("|$)     # - a string that starts with a double quote, up
until the next double quote or the end of the string
1128
        |               # or
1129
        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the
next quote or the end of the string
1130
        |               # or
1131
        \S              # - a non-whitespace character
1132
        )*              # any number of the above three
1133
        \s*             # any number of whitespaces
11340
        /x', '', $attr);
11350
      $mode = 0;
11360
    }
11370
  }
1138
1139
  // the attribute list ends with a valueless attribute like "selected"
11403
  if ($mode == 1) {
11410
    $attrarr[] = $attrname;
11420
  }
11433
  return $attrarr;
11440
}
1145
1146
/**
1147
 * Processes an HTML attribute value and ensures it does not contain an
URL
1148
 * with a disallowed protocol (e.g. javascript:)
1149
 *
1150
 * @param $string
1151
 *   The string with the attribute value.
1152
 * @param $decode
1153
 *   Whether to decode entities in the $string. Set to FALSE if the
$string
1154
 *   is in plain text, TRUE otherwise. Defaults to TRUE.
1155
 * @return
1156
 *   Cleaned up and HTML-escaped version of $string.
1157
 */
11582027
function filter_xss_bad_protocol($string, $decode = TRUE) {
11591684
  static $allowed_protocols;
11601684
  if (!isset($allowed_protocols)) {
11611684
    $allowed_protocols =
array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http',
'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet',
'webcal')));
11621684
  }
1163
1164
  // Get the plain text representation of the attribute value (i.e. its
meaning).
11651684
  if ($decode) {
11660
    $string = decode_entities($string);
11670
  }
1168
1169
  // Iteratively remove any invalid protocol found.
1170
1171
  do {
11721684
    $before = $string;
11731684
    $colonpos = strpos($string, ':');
11741684
    if ($colonpos > 0) {
1175
      // We found a colon, possibly a protocol. Verify.
11761503
      $protocol = substr($string, 0, $colonpos);
1177
      // If a colon is preceded by a slash, question mark or hash, it
cannot
1178
      // possibly be part of the URL scheme. This must be a relative URL,
1179
      // which inherits the (safe) protocol of the base document.
11801503
      if (preg_match('![/?#]!', $protocol)) {
11810
        break;
11820
      }
1183
      // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must
be case-insensitive
1184
      // Check if this is a disallowed protocol.
11851503
      if (!isset($allowed_protocols[strtolower($protocol)])) {
11860
        $string = substr($string, $colonpos + 1);
11870
      }
11881503
    }
11891684
  } while ($before != $string);
11901684
  return check_plain($string);
11910
}
1192
1193
/**
1194
 * @} End of "Standard filters".
1195
 */
11962027