679 lines
16 KiB
PHP
679 lines
16 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Contact Form 7's class used for formatting HTML fragments.
|
|
*/
|
|
class WPCF7_HTMLFormatter {
|
|
|
|
// HTML component types.
|
|
const text = 0;
|
|
const start_tag = 1;
|
|
const end_tag = 2;
|
|
const comment = 3;
|
|
|
|
/**
|
|
* Tag name reserved for a custom HTML element used as a block placeholder.
|
|
*/
|
|
const placeholder_block = 'placeholder:block';
|
|
|
|
/**
|
|
* Tag name reserved for a custom HTML element used as an inline placeholder.
|
|
*/
|
|
const placeholder_inline = 'placeholder:inline';
|
|
|
|
/**
|
|
* The void elements in HTML.
|
|
*
|
|
* @link https://developer.mozilla.org/en-US/docs/Glossary/Void_element
|
|
*/
|
|
const void_elements = array(
|
|
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
|
|
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr',
|
|
self::placeholder_block, self::placeholder_inline,
|
|
);
|
|
|
|
/**
|
|
* HTML elements that can contain flow content.
|
|
*/
|
|
const p_parent_elements = array(
|
|
'address', 'article', 'aside', 'blockquote', 'body', 'caption',
|
|
'dd', 'details', 'dialog', 'div', 'dt', 'fieldset', 'figcaption',
|
|
'figure', 'footer', 'form', 'header', 'li', 'main', 'nav',
|
|
'section', 'td', 'th',
|
|
);
|
|
|
|
/**
|
|
* HTML elements that can be neither the parent nor a child of
|
|
* a paragraph element.
|
|
*/
|
|
const p_nonparent_elements = array(
|
|
'colgroup', 'dl', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head',
|
|
'hgroup', 'html', 'legend', 'menu', 'ol', 'pre', 'style', 'summary',
|
|
'table', 'tbody', 'template', 'tfoot', 'thead', 'title', 'tr', 'ul',
|
|
);
|
|
|
|
/**
|
|
* HTML elements in the phrasing content category, plus non-phrasing
|
|
* content elements that can be grandchildren of a paragraph element.
|
|
*/
|
|
const p_child_elements = array(
|
|
'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button',
|
|
'canvas', 'cite', 'code', 'data', 'datalist', 'del', 'dfn',
|
|
'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd',
|
|
'keygen', 'label', 'link', 'map', 'mark', 'meta',
|
|
'meter', 'noscript', 'object', 'output', 'picture', 'progress',
|
|
'q', 'ruby', 's', 'samp', 'script', 'select', 'slot', 'small',
|
|
'span', 'strong', 'sub', 'sup', 'textarea',
|
|
'time', 'u', 'var', 'video', 'wbr',
|
|
'optgroup', 'option', 'rp', 'rt', // non-phrasing grandchildren
|
|
self::placeholder_inline,
|
|
);
|
|
|
|
/**
|
|
* HTML elements that can contain phrasing content.
|
|
*/
|
|
const br_parent_elements = array(
|
|
'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', 'bdi',
|
|
'bdo', 'blockquote', 'button', 'canvas', 'caption', 'cite', 'code',
|
|
'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'div',
|
|
'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer', 'form',
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'i', 'ins', 'kbd',
|
|
'label', 'legend', 'li', 'main', 'map', 'mark', 'meter', 'nav',
|
|
'noscript', 'object', 'output', 'p', 'progress', 'q', 'rt',
|
|
'ruby', 's', 'samp', 'section', 'slot', 'small', 'span', 'strong',
|
|
'sub', 'summary', 'sup', 'td', 'th', 'time', 'u', 'var',
|
|
'video',
|
|
);
|
|
|
|
|
|
// Properties.
|
|
private $options = array();
|
|
private $stacked_elements = array();
|
|
private $output = '';
|
|
|
|
|
|
/**
|
|
* Constructor.
|
|
*/
|
|
public function __construct( $options = '' ) {
|
|
$this->options = wp_parse_args( $options, array(
|
|
'auto_br' => true,
|
|
'auto_indent' => true,
|
|
) );
|
|
}
|
|
|
|
|
|
/**
|
|
* Separates the given text into chunks of HTML. Each chunk must be an
|
|
* associative array that includes 'position', 'type', and 'content' keys.
|
|
*
|
|
* @param string $input Text to be separated into chunks.
|
|
* @return iterable Iterable of chunks.
|
|
*/
|
|
public function separate_into_chunks( $input ) {
|
|
$input_bytelength = strlen( $input );
|
|
$position = 0;
|
|
|
|
while ( $position < $input_bytelength ) {
|
|
$next_tag = preg_match(
|
|
'/(?:<!--.*?-->|<(?:\/?)[a-z].*?>)/is',
|
|
$input,
|
|
$matches,
|
|
PREG_OFFSET_CAPTURE,
|
|
$position
|
|
);
|
|
|
|
if ( ! $next_tag ) {
|
|
yield array(
|
|
'position' => $position,
|
|
'type' => self::text,
|
|
'content' => substr( $input, $position ),
|
|
);
|
|
|
|
break;
|
|
}
|
|
|
|
$next_tag = $matches[0][0];
|
|
$next_tag_position = $matches[0][1];
|
|
|
|
if ( $position < $next_tag_position ) {
|
|
yield array(
|
|
'position' => $position,
|
|
'type' => self::text,
|
|
'content' => substr(
|
|
$input,
|
|
$position,
|
|
$next_tag_position - $position
|
|
),
|
|
);
|
|
}
|
|
|
|
if ( '<!' === substr( $next_tag, 0, 2 ) ) {
|
|
$next_tag_type = self::comment;
|
|
} elseif ( '</' === substr( $next_tag, 0, 2 ) ) {
|
|
$next_tag_type = self::end_tag;
|
|
} else {
|
|
$next_tag_type = self::start_tag;
|
|
}
|
|
|
|
yield array(
|
|
'position' => $next_tag_position,
|
|
'type' => $next_tag_type,
|
|
'content' => substr(
|
|
$input,
|
|
$next_tag_position,
|
|
strlen( $next_tag )
|
|
),
|
|
);
|
|
|
|
$position = $next_tag_position + strlen( $next_tag );
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Normalizes content in each chunk. This may change the type and position
|
|
* of the chunk.
|
|
*
|
|
* @param iterable $chunks The original chunks.
|
|
* @return iterable Normalized chunks.
|
|
*/
|
|
public function pre_format( $chunks ) {
|
|
$position = 0;
|
|
|
|
foreach ( $chunks as $chunk ) {
|
|
$chunk['position'] = $position;
|
|
|
|
// Standardize newline characters to "\n".
|
|
$chunk['content'] = str_replace(
|
|
array( "\r\n", "\r" ), "\n", $chunk['content']
|
|
);
|
|
|
|
if ( $chunk['type'] === self::start_tag ) {
|
|
list( $chunk['content'] ) =
|
|
self::normalize_start_tag( $chunk['content'] );
|
|
|
|
// Replace <br /> by a line break.
|
|
if (
|
|
$this->options['auto_br'] and
|
|
preg_match( '/^<br\s*\/?>$/i', $chunk['content'] )
|
|
) {
|
|
$chunk['type'] = self::text;
|
|
$chunk['content'] = "\n";
|
|
}
|
|
}
|
|
|
|
yield $chunk;
|
|
$position = self::calc_next_position( $chunk );
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Concatenates neighboring text chunks to create a single chunk.
|
|
*
|
|
* @param iterable $chunks The original chunks.
|
|
* @return iterable Processed chunks.
|
|
*/
|
|
public function concatenate_texts( $chunks ) {
|
|
$position = 0;
|
|
$text_left = null;
|
|
|
|
foreach ( $chunks as $chunk ) {
|
|
$chunk['position'] = $position;
|
|
|
|
if ( $chunk['type'] === self::text ) {
|
|
if ( isset( $text_left ) ) {
|
|
$text_left['content'] .= $chunk['content'];
|
|
} else {
|
|
$text_left = $chunk;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if ( isset( $text_left ) ) {
|
|
yield $text_left;
|
|
$chunk['position'] = self::calc_next_position( $text_left );
|
|
$text_left = null;
|
|
}
|
|
|
|
yield $chunk;
|
|
$position = self::calc_next_position( $chunk );
|
|
}
|
|
|
|
if ( isset( $text_left ) ) {
|
|
yield $text_left;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Outputs formatted HTML based on the given chunks.
|
|
*
|
|
* @param iterable $chunks The original chunks.
|
|
* @return string Formatted HTML.
|
|
*/
|
|
public function format( $chunks ) {
|
|
$chunks = $this->pre_format( $chunks );
|
|
$chunks = $this->concatenate_texts( $chunks );
|
|
|
|
$this->output = '';
|
|
$this->stacked_elements = array();
|
|
|
|
foreach ( $chunks as $chunk ) {
|
|
|
|
if ( $chunk['type'] === self::text ) {
|
|
$this->append_text( $chunk['content'] );
|
|
}
|
|
|
|
if ( $chunk['type'] === self::start_tag ) {
|
|
$this->start_tag( $chunk['content'] );
|
|
}
|
|
|
|
if ( $chunk['type'] === self::end_tag ) {
|
|
$this->end_tag( $chunk['content'] );
|
|
}
|
|
|
|
if ( $chunk['type'] === self::comment ) {
|
|
$this->append_comment( $chunk['content'] );
|
|
}
|
|
}
|
|
|
|
// Close all remaining tags.
|
|
$this->close_all_tags();
|
|
|
|
return $this->output;
|
|
}
|
|
|
|
|
|
/**
|
|
* Appends a text node content to the output property.
|
|
*
|
|
* @param string $content Text node content.
|
|
*/
|
|
public function append_text( $content ) {
|
|
if ( $this->is_inside( array( 'pre', 'template' ) ) ) {
|
|
$this->output .= $content;
|
|
return;
|
|
}
|
|
|
|
if (
|
|
empty( $this->stacked_elements ) or
|
|
$this->has_parent( 'p' ) or
|
|
$this->has_parent( self::p_parent_elements )
|
|
) {
|
|
// Close <p> if the content starts with multiple line breaks.
|
|
if ( preg_match( '/^\s*\n\s*\n\s*/', $content ) ) {
|
|
$this->end_tag( 'p' );
|
|
}
|
|
|
|
// Split up the contents into paragraphs, separated by double line breaks.
|
|
$paragraphs = preg_split( '/\s*\n\s*\n\s*/', $content );
|
|
|
|
$paragraphs = array_filter( $paragraphs, static function ( $paragraph ) {
|
|
return '' !== trim( $paragraph );
|
|
} );
|
|
|
|
$paragraphs = array_values( $paragraphs );
|
|
|
|
if ( $paragraphs ) {
|
|
if ( $this->is_inside( 'p' ) ) {
|
|
$paragraph = array_shift( $paragraphs );
|
|
|
|
$paragraph = self::normalize_paragraph(
|
|
$paragraph,
|
|
$this->options['auto_br']
|
|
);
|
|
|
|
$this->output .= $paragraph;
|
|
}
|
|
|
|
foreach ( $paragraphs as $paragraph ) {
|
|
$this->start_tag( 'p' );
|
|
|
|
$paragraph = ltrim( $paragraph );
|
|
|
|
$paragraph = self::normalize_paragraph(
|
|
$paragraph,
|
|
$this->options['auto_br']
|
|
);
|
|
|
|
$this->output .= $paragraph;
|
|
}
|
|
}
|
|
|
|
// Close <p> if the content ends with multiple line breaks.
|
|
if ( preg_match( '/\s*\n\s*\n\s*$/', $content ) ) {
|
|
$this->end_tag( 'p' );
|
|
}
|
|
|
|
// Cases where the content is a single line break.
|
|
if ( preg_match( '/^\s*\n\s*$/', $content ) ) {
|
|
$auto_br = $this->options['auto_br'] && $this->is_inside( 'p' );
|
|
|
|
$content = self::normalize_paragraph( $content, $auto_br );
|
|
|
|
$this->output .= $content;
|
|
}
|
|
} else {
|
|
$auto_br = $this->options['auto_br'] &&
|
|
$this->has_parent( self::br_parent_elements );
|
|
|
|
$content = self::normalize_paragraph( $content, $auto_br );
|
|
|
|
$this->output .= $content;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Appends a start tag to the output property.
|
|
*
|
|
* @param string $tag A start tag.
|
|
*/
|
|
public function start_tag( $tag ) {
|
|
list( $tag, $tag_name ) = self::normalize_start_tag( $tag );
|
|
|
|
if ( in_array( $tag_name, self::p_child_elements ) ) {
|
|
if (
|
|
! $this->is_inside( 'p' ) and
|
|
! $this->is_inside( self::p_child_elements ) and
|
|
! $this->has_parent( self::p_nonparent_elements )
|
|
) {
|
|
// Open <p> if it does not exist.
|
|
$this->start_tag( 'p' );
|
|
}
|
|
} elseif (
|
|
'p' === $tag_name or
|
|
in_array( $tag_name, self::p_parent_elements ) or
|
|
in_array( $tag_name, self::p_nonparent_elements )
|
|
) {
|
|
// Close <p> if it exists.
|
|
$this->end_tag( 'p' );
|
|
}
|
|
|
|
if ( 'dd' === $tag_name or 'dt' === $tag_name ) {
|
|
// Close <dd> and <dt> if closing tag is omitted.
|
|
$this->end_tag( 'dd' );
|
|
$this->end_tag( 'dt' );
|
|
}
|
|
|
|
if ( 'li' === $tag_name ) {
|
|
// Close <li> if closing tag is omitted.
|
|
$this->end_tag( 'li' );
|
|
}
|
|
|
|
if ( 'optgroup' === $tag_name ) {
|
|
// Close <option> and <optgroup> if closing tag is omitted.
|
|
$this->end_tag( 'option' );
|
|
$this->end_tag( 'optgroup' );
|
|
}
|
|
|
|
if ( 'option' === $tag_name ) {
|
|
// Close <option> if closing tag is omitted.
|
|
$this->end_tag( 'option' );
|
|
}
|
|
|
|
if ( 'rp' === $tag_name or 'rt' === $tag_name ) {
|
|
// Close <rp> and <rt> if closing tag is omitted.
|
|
$this->end_tag( 'rp' );
|
|
$this->end_tag( 'rt' );
|
|
}
|
|
|
|
if ( 'td' === $tag_name or 'th' === $tag_name ) {
|
|
// Close <td> and <th> if closing tag is omitted.
|
|
$this->end_tag( 'td' );
|
|
$this->end_tag( 'th' );
|
|
}
|
|
|
|
if ( 'tr' === $tag_name ) {
|
|
// Close <tr> if closing tag is omitted.
|
|
$this->end_tag( 'tr' );
|
|
}
|
|
|
|
if ( 'tbody' === $tag_name or 'tfoot' === $tag_name ) {
|
|
// Close <thead> if closing tag is omitted.
|
|
$this->end_tag( 'thead' );
|
|
}
|
|
|
|
if ( 'tfoot' === $tag_name ) {
|
|
// Close <tbody> if closing tag is omitted.
|
|
$this->end_tag( 'tbody' );
|
|
}
|
|
|
|
if ( ! in_array( $tag_name, self::void_elements ) ) {
|
|
array_unshift( $this->stacked_elements, $tag_name );
|
|
}
|
|
|
|
if ( ! in_array( $tag_name, self::p_child_elements ) ) {
|
|
if ( '' !== $this->output ) {
|
|
$this->output = rtrim( $this->output ) . "\n";
|
|
}
|
|
|
|
if ( $this->options['auto_indent'] ) {
|
|
$this->output .= self::indent( count( $this->stacked_elements ) - 1 );
|
|
}
|
|
}
|
|
|
|
$this->output .= $tag;
|
|
}
|
|
|
|
|
|
/**
|
|
* Closes an element and its open descendants at a time.
|
|
*
|
|
* @param string $tag An end tag.
|
|
*/
|
|
public function end_tag( $tag ) {
|
|
if ( preg_match( '/<\/(.+?)(?:\s|>)/', $tag, $matches ) ) {
|
|
$tag_name = strtolower( $matches[1] );
|
|
} else {
|
|
$tag_name = strtolower( $tag );
|
|
}
|
|
|
|
$stacked_elements = array_values( $this->stacked_elements );
|
|
|
|
$tag_position = array_search( $tag_name, $stacked_elements );
|
|
|
|
if ( false === $tag_position ) {
|
|
return;
|
|
}
|
|
|
|
// Element groups that make up an indirect nesting structure.
|
|
// Descendant can contain ancestors.
|
|
static $nesting_families = array(
|
|
array(
|
|
'ancestors' => array( 'dl', ),
|
|
'descendants' => array( 'dd', 'dt', ),
|
|
),
|
|
array(
|
|
'ancestors' => array( 'ol', 'ul', 'menu', ),
|
|
'descendants' => array( 'li', ),
|
|
),
|
|
array(
|
|
'ancestors' => array( 'table', ),
|
|
'descendants' => array( 'td', 'th', 'tr', 'thead', 'tbody', 'tfoot', ),
|
|
),
|
|
);
|
|
|
|
foreach ( $nesting_families as $family ) {
|
|
$ancestors = (array) $family['ancestors'];
|
|
$descendants = (array) $family['descendants'];
|
|
|
|
if ( in_array( $tag_name, $descendants ) ) {
|
|
$intersect = array_intersect(
|
|
$ancestors,
|
|
array_slice( $stacked_elements, 0, $tag_position )
|
|
);
|
|
|
|
if ( $intersect ) { // Ancestor appears after descendant.
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
while ( $element = array_shift( $this->stacked_elements ) ) {
|
|
$this->append_end_tag( $element );
|
|
|
|
if ( $element === $tag_name ) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Closes all open tags.
|
|
*/
|
|
public function close_all_tags() {
|
|
while ( $element = array_shift( $this->stacked_elements ) ) {
|
|
$this->append_end_tag( $element );
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Appends an end tag to the output property.
|
|
*
|
|
* @param string $tag_name Tag name.
|
|
*/
|
|
public function append_end_tag( $tag_name ) {
|
|
if ( ! in_array( $tag_name, self::p_child_elements ) ) {
|
|
// Remove unnecessary <br />.
|
|
$this->output = preg_replace( '/\s*<br \/>\s*$/', '', $this->output );
|
|
|
|
$this->output = rtrim( $this->output ) . "\n";
|
|
|
|
if ( $this->options['auto_indent'] ) {
|
|
$this->output .= self::indent( count( $this->stacked_elements ) );
|
|
}
|
|
}
|
|
|
|
$this->output .= sprintf( '</%s>', $tag_name );
|
|
|
|
// Remove trailing <p></p>.
|
|
$this->output = preg_replace( '/<p>\s*<\/p>$/', '', $this->output );
|
|
}
|
|
|
|
|
|
/**
|
|
* Appends an HTML comment to the output property.
|
|
*
|
|
* @param string $tag An HTML comment.
|
|
*/
|
|
public function append_comment( $tag ) {
|
|
$this->output .= $tag;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns true if it is currently inside one of HTML elements specified
|
|
* by tag names.
|
|
*
|
|
* @param string|array $tag_names A tag name or an array of tag names.
|
|
*/
|
|
public function is_inside( $tag_names ) {
|
|
$tag_names = (array) $tag_names;
|
|
|
|
foreach ( $this->stacked_elements as $element ) {
|
|
if ( in_array( $element, $tag_names ) ) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns true if the parent node is one of HTML elements specified
|
|
* by tag names.
|
|
*
|
|
* @param string|array $tag_names A tag name or an array of tag names.
|
|
*/
|
|
public function has_parent( $tag_names ) {
|
|
$tag_names = (array) $tag_names;
|
|
|
|
$parent = reset( $this->stacked_elements );
|
|
|
|
if ( false === $parent ) {
|
|
return false;
|
|
}
|
|
|
|
return in_array( $parent, $tag_names );
|
|
}
|
|
|
|
|
|
/**
|
|
* Calculates the position of the next chunk based on the position and
|
|
* length of the current chunk.
|
|
*
|
|
* @param array $chunk An associative array of the current chunk.
|
|
* @return int The position of the next chunk.
|
|
*/
|
|
public static function calc_next_position( $chunk ) {
|
|
return $chunk['position'] + strlen( $chunk['content'] );
|
|
}
|
|
|
|
|
|
/**
|
|
* Outputs a set of tabs to indent.
|
|
*
|
|
* @param int $level Indentation level.
|
|
* @return string A series of tabs.
|
|
*/
|
|
public static function indent( $level ) {
|
|
$level = (int) $level;
|
|
|
|
if ( 0 < $level ) {
|
|
return str_repeat( "\t", $level );
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
|
|
/**
|
|
* Normalizes a start tag.
|
|
*
|
|
* @param string $tag A start tag or a tag name.
|
|
* @return array An array includes the normalized start tag and tag name.
|
|
*/
|
|
public static function normalize_start_tag( $tag ) {
|
|
if ( preg_match( '/<(.+?)[\s\/>]/', $tag, $matches ) ) {
|
|
$tag_name = strtolower( $matches[1] );
|
|
} else {
|
|
$tag_name = strtolower( $tag );
|
|
$tag = sprintf( '<%s>', $tag_name );
|
|
}
|
|
|
|
if ( in_array( $tag_name, self::void_elements ) ) {
|
|
// Normalize void element.
|
|
$tag = preg_replace( '/\s*\/?>/', ' />', $tag );
|
|
}
|
|
|
|
return array( $tag, $tag_name );
|
|
}
|
|
|
|
|
|
/**
|
|
* Normalizes a paragraph of text.
|
|
*
|
|
* @param string $paragraph A paragraph of text.
|
|
* @param bool $auto_br Optional. If true, line breaks will be replaced
|
|
* by a br element.
|
|
* @return string The normalized paragraph.
|
|
*/
|
|
public static function normalize_paragraph( $paragraph, $auto_br = false ) {
|
|
if ( $auto_br ) {
|
|
$paragraph = preg_replace( '/\s*\n\s*/', "<br />\n", $paragraph );
|
|
}
|
|
|
|
$paragraph = preg_replace( '/[ ]+/', " ", $paragraph );
|
|
|
|
return $paragraph;
|
|
}
|
|
|
|
}
|