options = wp_parse_args( $options, array(
'auto_br' => true,
'auto_indent' => true,
) );
}
/**
* Separates the given text into chunks of HTML. Each chunk must be an
* associative array that includes 'position', 'type', and 'content' keys.
*
* @param string $input Text to be separated into chunks.
* @return iterable Iterable of chunks.
*/
public function separate_into_chunks( $input ) {
$input_bytelength = strlen( $input );
$position = 0;
while ( $position < $input_bytelength ) {
$next_tag = preg_match(
'/(?:|<(?:\/?)[a-z].*?>)/is',
$input,
$matches,
PREG_OFFSET_CAPTURE,
$position
);
if ( ! $next_tag ) {
yield array(
'position' => $position,
'type' => self::text,
'content' => substr( $input, $position ),
);
break;
}
$next_tag = $matches[0][0];
$next_tag_position = $matches[0][1];
if ( $position < $next_tag_position ) {
yield array(
'position' => $position,
'type' => self::text,
'content' => substr(
$input,
$position,
$next_tag_position - $position
),
);
}
if ( ' $next_tag_position,
'type' => $next_tag_type,
'content' => substr(
$input,
$next_tag_position,
strlen( $next_tag )
),
);
$position = $next_tag_position + strlen( $next_tag );
}
}
/**
* Normalizes content in each chunk. This may change the type and position
* of the chunk.
*
* @param iterable $chunks The original chunks.
* @return iterable Normalized chunks.
*/
public function pre_format( $chunks ) {
$position = 0;
foreach ( $chunks as $chunk ) {
$chunk['position'] = $position;
// Standardize newline characters to "\n".
$chunk['content'] = str_replace(
array( "\r\n", "\r" ), "\n", $chunk['content']
);
if ( $chunk['type'] === self::start_tag ) {
list( $chunk['content'] ) =
self::normalize_start_tag( $chunk['content'] );
// Replace
by a line break.
if (
$this->options['auto_br'] and
preg_match( '/^
$/i', $chunk['content'] )
) {
$chunk['type'] = self::text;
$chunk['content'] = "\n";
}
}
yield $chunk;
$position = self::calc_next_position( $chunk );
}
}
/**
* Concatenates neighboring text chunks to create a single chunk.
*
* @param iterable $chunks The original chunks.
* @return iterable Processed chunks.
*/
public function concatenate_texts( $chunks ) {
$position = 0;
$text_left = null;
foreach ( $chunks as $chunk ) {
$chunk['position'] = $position;
if ( $chunk['type'] === self::text ) {
if ( isset( $text_left ) ) {
$text_left['content'] .= $chunk['content'];
} else {
$text_left = $chunk;
}
continue;
}
if ( isset( $text_left ) ) {
yield $text_left;
$chunk['position'] = self::calc_next_position( $text_left );
$text_left = null;
}
yield $chunk;
$position = self::calc_next_position( $chunk );
}
if ( isset( $text_left ) ) {
yield $text_left;
}
}
/**
* Outputs formatted HTML based on the given chunks.
*
* @param iterable $chunks The original chunks.
* @return string Formatted HTML.
*/
public function format( $chunks ) {
$chunks = $this->pre_format( $chunks );
$chunks = $this->concatenate_texts( $chunks );
$this->output = '';
$this->stacked_elements = array();
foreach ( $chunks as $chunk ) {
if ( $chunk['type'] === self::text ) {
$this->append_text( $chunk['content'] );
}
if ( $chunk['type'] === self::start_tag ) {
$this->start_tag( $chunk['content'] );
}
if ( $chunk['type'] === self::end_tag ) {
$this->end_tag( $chunk['content'] );
}
if ( $chunk['type'] === self::comment ) {
$this->append_comment( $chunk['content'] );
}
}
// Close all remaining tags.
$this->close_all_tags();
return $this->output;
}
/**
* Appends a text node content to the output property.
*
* @param string $content Text node content.
*/
public function append_text( $content ) {
if ( $this->is_inside( array( 'pre', 'template' ) ) ) {
$this->output .= $content;
return;
}
if (
empty( $this->stacked_elements ) or
$this->has_parent( 'p' ) or
$this->has_parent( self::p_parent_elements )
) {
// Close
if the content starts with multiple line breaks. if ( preg_match( '/^\s*\n\s*\n\s*/', $content ) ) { $this->end_tag( 'p' ); } // Split up the contents into paragraphs, separated by double line breaks. $paragraphs = preg_split( '/\s*\n\s*\n\s*/', $content ); $paragraphs = array_filter( $paragraphs, static function ( $paragraph ) { return '' !== trim( $paragraph ); } ); $paragraphs = array_values( $paragraphs ); if ( $paragraphs ) { if ( $this->is_inside( 'p' ) ) { $paragraph = array_shift( $paragraphs ); $paragraph = self::normalize_paragraph( $paragraph, $this->options['auto_br'] ); $this->output .= $paragraph; } foreach ( $paragraphs as $paragraph ) { $this->start_tag( 'p' ); $paragraph = ltrim( $paragraph ); $paragraph = self::normalize_paragraph( $paragraph, $this->options['auto_br'] ); $this->output .= $paragraph; } } // Close
if the content ends with multiple line breaks. if ( preg_match( '/\s*\n\s*\n\s*$/', $content ) ) { $this->end_tag( 'p' ); } // Cases where the content is a single line break. if ( preg_match( '/^\s*\n\s*$/', $content ) ) { $auto_br = $this->options['auto_br'] && $this->is_inside( 'p' ); $content = self::normalize_paragraph( $content, $auto_br ); $this->output .= $content; } } else { $auto_br = $this->options['auto_br'] && $this->has_parent( self::br_parent_elements ); $content = self::normalize_paragraph( $content, $auto_br ); $this->output .= $content; } } /** * Appends a start tag to the output property. * * @param string $tag A start tag. */ public function start_tag( $tag ) { list( $tag, $tag_name ) = self::normalize_start_tag( $tag ); if ( in_array( $tag_name, self::p_child_elements ) ) { if ( ! $this->is_inside( 'p' ) and ! $this->is_inside( self::p_child_elements ) and ! $this->has_parent( self::p_nonparent_elements ) ) { // Open
if it does not exist. $this->start_tag( 'p' ); } } elseif ( 'p' === $tag_name or in_array( $tag_name, self::p_parent_elements ) or in_array( $tag_name, self::p_nonparent_elements ) ) { // Close
if it exists. $this->end_tag( 'p' ); } if ( 'dd' === $tag_name or 'dt' === $tag_name ) { // Close