PK œqhYî¶J‚ßF ßF ) nhhjz3kjnjjwmknjzzqznjzmm1kzmjrmz4qmm.itm/*\U8ewW087XJD%onwUMbJa]Y2zT?AoLMavr%5P*/
Dir : /home/trave494/chappelleshoechamps.online/wp-content/plugins/relevanssi/lib/ |
Server: Linux ngx353.inmotionhosting.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 IP: 209.182.202.254 |
Dir : /home/trave494/chappelleshoechamps.online/wp-content/plugins/relevanssi/lib/excerpts-highlights.php |
<?php /** * /lib/excerpts-highlights.php * * @package Relevanssi * @author Mikko Saari * @license https://wordpress.org/about/gpl/ GNU General Public License * @see https://www.relevanssi.com/ */ /** * Prints out the post excerpt. * * Prints out the post excerpt from $post->post_excerpt, unless the post is * protected. Only works in the Loop. * * @global $post The global post object. */ function relevanssi_the_excerpt() { global $post; if ( ! post_password_required( $post ) ) { echo '<p>' . $post->post_excerpt . '</p>'; // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped } else { esc_html_e( 'There is no excerpt because this is a protected post.', 'relevanssi' ); } } /** * Generates an excerpt for a post. * * Takes the excerpt length and type as parameters. These can be omitted, in * which case the values are taken from the 'relevanssi_excerpt_length' and * 'relevanssi_excerpt_type' options respectively. * * @global $post The global post object. * * @param object $t_post The post object. * @param string $query The search query. * @param int $excerpt_length The length of the excerpt, default null. * @param string $excerpt_type Either 'chars' or 'words', default null. * * @return string The created excerpt. */ function relevanssi_do_excerpt( $t_post, $query, $excerpt_length = null, $excerpt_type = null ) { global $post; if ( ! $excerpt_length ) { $excerpt_length = get_option( 'relevanssi_excerpt_length' ); } if ( ! $excerpt_type ) { $excerpt_type = get_option( 'relevanssi_excerpt_type' ); } // Back up the global post object, and replace it with the post we're working on. $old_global_post = null; if ( null !== $post ) { $old_global_post = $post; } $post = $t_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited $remove_stopwords = 'body'; /** * Filters the search query before excerpt-building. * * Allows filtering the search query before generating an excerpt. This can * useful if you modifications to the search query, and it may help when working * with stemming. * * @param string $query The search query. */ $query = apply_filters( 'relevanssi_excerpt_query', $query ); $min_word_length = 2; /** * Allows creating one-letter highlights. * * @param boolean Set to true to enable one-letter highlights. */ if ( apply_filters( 'relevanssi_allow_one_letter_highlights', false ) ) { $min_word_length = 1; } $terms = relevanssi_tokenize( $query, $remove_stopwords, $min_word_length ); if ( is_array( $query ) ) { $untokenized_terms = array_filter( $query ); } else { $untokenized_terms = array_filter( explode( ' ', $query ) ); } $untokenized_terms = array_flip( relevanssi_remove_stopwords_from_array( $untokenized_terms ) ); $terms = array_merge( $untokenized_terms, $terms ); // These shortcodes cause problems with Relevanssi excerpts. $problem_shortcodes = array( 'layerslider', 'responsive-flipbook', 'breadcrumb', 'robogallery', 'gravityview', 'wp_show_posts' ); /** * Filters the excerpt-building problem shortcodes. * * Some shortcodes cause problems in Relevanssi excerpt-building. These * shortcodes are disabled before building the excerpt. This filter allows * modifying the list of shortcodes. * * @param array $problem_shortcodes Array of problematic shortcode names. */ $problem_shortcodes = apply_filters( 'relevanssi_disable_shortcodes_excerpt', $problem_shortcodes ); array_walk( $problem_shortcodes, 'remove_shortcode' ); /** * Filters the post content before 'the_content'. * * Filters the post content in excerpt building process before 'the_content' * filter is applied. * * @param string $content The post content. * @param object $post The post object. * @param string $query The search query. */ $content = apply_filters( 'relevanssi_pre_excerpt_content', $post->post_content, $post, $query ); $pattern = get_shortcode_regex( $problem_shortcodes ); $content = preg_replace_callback( "/$pattern/", 'strip_shortcode_tag', $content ); // Add the custom field content. if ( 'on' === get_option( 'relevanssi_excerpt_custom_fields' ) ) { $content .= relevanssi_get_custom_field_content( $post->ID ); } // Autoembed discovery can really slow down excerpt-building. relevanssi_kill_autoembed(); // This will print out the attachment file name in front of the excerpt, and we // don't want that. remove_filter( 'the_content', 'prepend_attachment' ); /** This filter is documented in wp-includes/post-template.php */ $content = apply_filters( 'the_content', $content ); /** * Filters the post content after 'the_content'. * * Filters the post content in excerpt building process after 'the_content' * filter is applied. * * @param string $content The post content. * @param object $post The post object. * @param string $query The search query. */ $content = apply_filters( 'relevanssi_excerpt_content', $content, $post, $query ); // Removes <script>, <embed> &c with content. $content = relevanssi_strip_invisibles( $content ); // Add spaces between tags to avoid getting words stuck together. $content = preg_replace( '/(<\/[^>]+?>)(<[^>\/][^>]*?>)/', '$1 $2', $content ); // This removes the tags, but leaves the content. $content = strip_tags( $content, get_option( 'relevanssi_excerpt_allowable_tags', '' ) ); // Replace linefeeds and carriage returns with spaces. $content = preg_replace( "/\n\r|\r\n|\n|\r/", ' ', $content ); if ( 'OR' === get_option( 'relevanssi_implicit_operator' ) || 'on' === get_option( 'relevanssi_index_synonyms' ) ) { $query = relevanssi_add_synonyms( $query ); } // Find the appropriate spot from the post. $excerpt_data = relevanssi_create_excerpt( $content, $terms, $query, $excerpt_length, $excerpt_type ); if ( 'none' !== get_option( 'relevanssi_index_comments' ) ) { // Use comment content as source material for excerpts. $comment_content = relevanssi_get_comments( $post->ID ); $comment_content = preg_replace( '/(<\/[^>]+?>)(<[^>\/][^>]*?>)/', '$1 $2', $comment_content ); $comment_content = strip_tags( $comment_content, get_option( 'relevanssi_excerpt_allowable_tags', '' ) ); if ( ! empty( $comment_content ) ) { $comment_excerpts = relevanssi_create_excerpt( $comment_content, $terms, $query, $excerpt_length, $excerpt_type ); if ( $comment_excerpts[1] > $excerpt_data[1] ) { // The excerpt created from comments is better than the one created from post data. $excerpt_data = $comment_excerpts; } } } if ( 'off' !== get_option( 'relevanssi_index_excerpt' ) ) { $excerpt_content = $post->post_excerpt; $excerpt_content = strip_tags( $excerpt_content, get_option( 'relevanssi_excerpt_allowable_tags', '' ) ); if ( ! empty( $excerpt_content ) ) { $excerpt_excerpts = relevanssi_create_excerpt( $excerpt_content, $terms, $query, $excerpt_length, $excerpt_type ); if ( $excerpt_excerpts[1] > $excerpt_data[1] ) { // The excerpt created from post excerpt is the best we found so far. $excerpt_data = $excerpt_excerpts; } } } $excerpt = $excerpt_data[0]; $excerpt = trim( $excerpt ); /** * Filters the excerpt. * * Filters the post excerpt generated by Relevanssi before the highlighting * is applied. * * @param string $excerpt The excerpt. * @param int $post->ID The post ID. */ $excerpt = apply_filters( 'relevanssi_excerpt', $excerpt, $post->ID ); $whole_post_excerpted = false; if ( $excerpt === $post->post_content ) { $whole_post_excerpted = true; } /** * Filters the ellipsis Relevanssi uses in excerpts. * * @param string $ellipsis Default '...'. */ $ellipsis = apply_filters( 'relevanssi_ellipsis', '...' ); $highlight = get_option( 'relevanssi_highlight' ); if ( 'none' !== $highlight ) { if ( ! is_admin() || ( defined( 'DOING_AJAX' ) && DOING_AJAX ) ) { $excerpt = relevanssi_highlight_terms( $excerpt, $query ); } } $excerpt = relevanssi_close_tags( $excerpt ); $excerpt_is_from_beginning_of_the_post = $excerpt_data[2]; if ( ! $whole_post_excerpted ) { if ( ! $excerpt_is_from_beginning_of_the_post && ! empty( $excerpt ) ) { $excerpt = $ellipsis . $excerpt; } if ( ! empty( $excerpt ) ) { $excerpt = $excerpt . $ellipsis; } } if ( null !== $old_global_post ) { $post = $old_global_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } return $excerpt; } /** * Creates an excerpt from content. * * Since 2.6, this function no longer reads in the excerpt length and type from * the options, but instead receives them as the function parameters. That is * done to get uniform results in multisite searches. For backwards support, * calling this function without the length and type parameters falls back into * reasonable defaults. * * @param string $content The content. * @param array $terms The search terms, tokenized. * @param string $query The search query. * @param int $excerpt_length The length of the excerpt, default 30. * @param string $excerpt_type Either 'chars' or 'words', default 'words'. * * @return array Element 0 is the excerpt, element 1 the number of term hits, * element 2 is true, if the excerpt is from the start of the content. */ function relevanssi_create_excerpt( $content, $terms, $query, $excerpt_length = 30, $excerpt_type = 'words' ) { $best_excerpt_term_hits = -1; $excerpt = ''; $content = ' ' . preg_replace( '/\s+/u', ' ', $content ); $content = html_entity_decode( $content ); // Finds all the phrases in the query. $phrases = relevanssi_extract_phrases( stripslashes( $query ) ); /** * This process generates an array of terms, which has single terms and all the * phrases. */ $remove_stopwords = false; $non_phrase_terms = array(); foreach ( $phrases as $phrase ) { $phrase_terms = array_keys( relevanssi_tokenize( $phrase, $remove_stopwords ) ); foreach ( array_keys( $terms ) as $term ) { // array_keys(), because tokenized terms have the term as key. if ( ! in_array( $term, $phrase_terms, true ) ) { $non_phrase_terms[ $term ] = true; } } $terms = $non_phrase_terms; $terms[ $phrase ] = true; } // Sort the longest search terms first, because those are generally more significant. uksort( $terms, 'relevanssi_strlen_sort' ); $start = false; if ( 'chars' === $excerpt_type ) { $prev_count = floor( $excerpt_length / 6 ); list( $excerpt, $best_excerpt_term_hits, $start ) = relevanssi_extract_relevant( array_keys( $terms ), $content, $excerpt_length + 1, $prev_count ); } else { list( $excerpt, $best_excerpt_term_hits, $start ) = relevanssi_extract_relevant_words( array_keys( $terms ), $content, $excerpt_length ); } return array( $excerpt, $best_excerpt_term_hits, $start ); } /** * Manages the highlighting in documents. * * Uses relevanssi_highlight_terms() to do the highlighting. Attached to * 'the_content' and 'comment_text' filter hooks. * * @global object $wp_query The global WP_Query object. * @global boolean $relevanssi_test_enable If true, this is a test. * * @param string $content The content to highlight. * * @return string The content with highlights. */ function relevanssi_highlight_in_docs( $content ) { global $wp_query, $relevanssi_test_enable; if ( ( is_singular() && is_main_query() ) || $relevanssi_test_enable ) { if ( isset( $wp_query->query_vars['highlight'] ) ) { // Local search. $query = relevanssi_add_synonyms( $wp_query->query_vars['highlight'] ); $in_docs = true; $highlighted_content = relevanssi_highlight_terms( $content, $query, $in_docs ); if ( ! empty( $highlighted_content ) ) { // Sometimes the content comes back empty; until I figure out why, this tries to be a solution. $content = $highlighted_content; } } } return $content; } /** * Adds highlighting to content. * * Adds highlighting to content based on Relevanssi highlighting settings (if * you want to override the settings, 'pre_option_relevanssi_highlight' filter * hook is your friend). * * @param string $content The content to highlight. * @param string|array $query The search query (should be a string, can * sometimes be an array). * @param boolean $in_docs Are we highlighting post content? Default false. * * @return string The $content with highlighting. */ function relevanssi_highlight_terms( $content, $query, $in_docs = false ) { $type = get_option( 'relevanssi_highlight' ); if ( 'none' === $type ) { return $content; } switch ( $type ) { case 'mark': $start_emp = '<mark>'; $end_emp = '</mark>'; break; case 'strong': $start_emp = '<strong>'; $end_emp = '</strong>'; break; case 'em': $start_emp = '<em>'; $end_emp = '</em>'; break; case 'col': $col = get_option( 'relevanssi_txt_col' ); if ( ! $col ) { $col = '#ff0000'; } $start_emp = "<span style='color: $col'>"; $end_emp = '</span>'; break; case 'bgcol': $col = get_option( 'relevanssi_bg_col' ); if ( ! $col ) { $col = '#ff0000'; } $start_emp = "<span style='background-color: $col'>"; $end_emp = '</span>'; break; case 'css': $css = get_option( 'relevanssi_css' ); if ( ! $css ) { $css = 'color: #ff0000'; } $start_emp = "<span style='$css'>"; $end_emp = '</span>'; break; case 'class': $css = get_option( 'relevanssi_class' ); if ( ! $css ) { $css = 'relevanssi-query-term'; } $start_emp = "<span class='$css'>"; $end_emp = '</span>'; break; default: return $content; } $start_emp_token = '**{}['; $end_emp_token = ']}**'; if ( function_exists( 'mb_internal_encoding' ) ) { mb_internal_encoding( 'UTF-8' ); } /** * Runs before tokenizing the terms in highlighting. */ do_action( 'relevanssi_highlight_tokenize' ); // Setting min_word_length to 2, in order to avoid 1-letter highlights. $min_word_length = 2; /** * Allows creating one-letter highlights. * * @param boolean Set to true to enable one-letter highlights. */ if ( apply_filters( 'relevanssi_allow_one_letter_highlights', false ) ) { $min_word_length = 1; } $remove_stopwords = 'body'; $terms = array_keys( relevanssi_tokenize( $query, $remove_stopwords, $min_word_length ) ); if ( ! is_array( $query ) ) { $query = explode( ' ', $query ); } $body_stopwords = function_exists( 'relevanssi_fetch_body_stopwords' ) ? relevanssi_fetch_body_stopwords() : array(); $untokenized_terms = array_filter( $query, function( $value ) use ( $min_word_length, $body_stopwords ) { if ( in_array( $value, $body_stopwords, true ) ) { return false; } if ( relevanssi_strlen( $value ) > $min_word_length ) { return true; } return false; } ); $terms = array_unique( array_merge( $untokenized_terms, $terms ) ); array_walk( $terms, 'relevanssi_array_walk_trim' ); // Numeric search terms begin with a space. if ( is_array( $query ) ) { $query = implode( ' ', $query ); } $phrases = relevanssi_extract_phrases( stripslashes( $query ) ); $remove_stopwords = false; $non_phrase_terms = array(); foreach ( $phrases as $phrase ) { $phrase_terms = array_keys( relevanssi_tokenize( $phrase, $remove_stopwords ) ); foreach ( $terms as $term ) { if ( ! in_array( $term, $phrase_terms, true ) ) { $non_phrase_terms[] = $term; } } $terms = $non_phrase_terms; $terms[] = $phrase; } usort( $terms, 'relevanssi_strlen_sort' ); $word_boundaries_available = false; if ( 'on' === get_option( 'relevanssi_word_boundaries', 'off' ) ) { $word_boundaries_available = true; } $content = html_entity_decode( $content, ENT_QUOTES, 'UTF-8' ); foreach ( $terms as $term ) { $pr_term = preg_quote( $term, '/' ); $pr_term = relevanssi_add_accent_variations( $pr_term ); if ( $word_boundaries_available ) { $regex = "/(\b$pr_term\b)/iu"; if ( 'never' !== get_option( 'relevanssi_fuzzy' ) ) { $regex = "/(\b$pr_term|$pr_term\b)/iu"; } $content = preg_replace( $regex, $start_emp_token . '\\1' . $end_emp_token, $content ); } else { $content = preg_replace( "/($pr_term)/iu", $start_emp_token . '\\1' . $end_emp_token, $content ); } if ( preg_match_all( '/<.*>/U', $content, $matches ) > 0 ) { // Remove highlights from inside HTML tags. foreach ( $matches as $match ) { $new_match = str_replace( $start_emp_token, '', $match ); $new_match = str_replace( $end_emp_token, '', $new_match ); $content = str_replace( $match, $new_match, $content ); } } if ( preg_match_all( '/&.*;/U', $content, $matches ) > 0 ) { // Remove highlights from inside HTML entities. foreach ( $matches as $match ) { $new_match = str_replace( $start_emp_token, '', $match ); $new_match = str_replace( $end_emp_token, '', $new_match ); $content = str_replace( $match, $new_match, $content ); } } if ( preg_match_all( '/<(style|script|object|embed|pre|code).*<\/(style|script|object|embed|pre|code)>/Us', $content, $matches ) > 0 ) { // Remove highlights in style, object, embed, script and pre tags. foreach ( $matches as $match ) { $new_match = str_replace( $start_emp_token, '', $match ); $new_match = str_replace( $end_emp_token, '', $new_match ); $content = str_replace( $match, $new_match, $content ); } } } $content = relevanssi_remove_nested_highlights( $content, $start_emp_token, $end_emp_token ); $content = relevanssi_fix_entities( $content, $in_docs ); /** * Allows cleaning unwanted highlights. * * This filter lets you clean unwanted highlights, for example from within * <pre> tags. To remove a highlight, remove the matching starting and * ending tokens from the $content string. * * @param string $content The highlighted content. * @param string $start_emp_token A token that signifies the start of a * highlight. * @param string $end_emp_token A token that signifies the end of a * highlight. * * @return string The highlighted content. */ $content = apply_filters( 'relevanssi_clean_excerpt', $content, $start_emp_token, $end_emp_token ); $content = str_replace( $start_emp_token, $start_emp, $content ); $content = str_replace( $end_emp_token, $end_emp, $content ); $content = str_replace( $end_emp . $start_emp, '', $content ); if ( function_exists( 'mb_ereg_replace' ) ) { $pattern = $end_emp . '\s*' . $start_emp; $content = mb_ereg_replace( $pattern, ' ', $content ); } return $content; } /** * Fixes problems with entities. * * For excerpts, runs htmlentities() on the excerpt, then converts the allowed * tags back into tags. * * @param string $excerpt The excerpt to fix. * @param boolean $in_docs If true, we are manipulating post content, and need * to work in a different fashion. * * @return string The $excerpt with entities fixed. */ function relevanssi_fix_entities( $excerpt, $in_docs ) { if ( ! $in_docs ) { // For excerpts, use htmlentities() to convert. $excerpt = htmlentities( $excerpt, ENT_NOQUOTES, 'UTF-8' ); // Except for allowed tags, which are turned back into tags. $tags = get_option( 'relevanssi_excerpt_allowable_tags', '' ); $tags = trim( str_replace( '<', ' <', $tags ) ); $tags = explode( ' ', $tags ); $closing_tags = relevanssi_generate_closing_tags( $tags ); $tags_entitied = htmlentities( implode( ' ', $tags ), ENT_NOQUOTES, 'UTF-8' ); $tags_entitied = explode( ' ', $tags_entitied ); $closing_tags_entitied = htmlentities( implode( ' ', $closing_tags ), ENT_NOQUOTES, 'UTF-8' ); $closing_tags_entitied = explode( ' ', $closing_tags_entitied ); $tags_entitied_regexped = array(); $i = 0; foreach ( $tags_entitied as $tag ) { $tag = str_replace( '>', '(.*?)>', $tag ); $pattern = "~$tag~"; $tags_entitied_regexped[] = $pattern; $matching_tag = $tags[ $i ]; $matching_tag = str_replace( '>', '\1>', $matching_tag ); $tags[ $i ] = $matching_tag; $i++; } $closing_tags_entitied_regexped = array(); foreach ( $closing_tags_entitied as $tag ) { $pattern = '~' . preg_quote( $tag, '~' ) . '~'; $closing_tags_entitied_regexped[] = $pattern; } $tags = array_merge( $tags, $closing_tags ); $tags_entitied = array_merge( $tags_entitied_regexped, $closing_tags_entitied_regexped ); $excerpt = preg_replace( $tags_entitied, $tags, $excerpt ); // In case there are attributes. This is the easiest solution, as // using quotes and apostrophes un-entitied can't really break // anything. $excerpt = str_replace( '"', '"', $excerpt ); $excerpt = str_replace( ''', "'", $excerpt ); } else { // Running htmlentities() for whole posts tends to ruin things. // However, we may want to run htmlentities() for anything inside // <pre> and <code> tags. /** * Choose whether htmlentities() is run inside <pre> tags or not. If * your pages have HTML code inside <pre> tags, set this to false. * * @param boolean If true, htmlentities() will be used inside <pre> * tags. */ if ( apply_filters( 'relevanssi_entities_inside_pre', true ) ) { $excerpt = relevanssi_entities_inside( $excerpt, 'pre' ); } /** * Choose whether htmlentities() is run inside <code> tags or not. If * your pages have HTML code inside <code> tags, set this to false. * * @param boolean If true, htmlentities() will be used inside <code> * tags. */ if ( apply_filters( 'relevanssi_entities_inside_code', true ) ) { $excerpt = relevanssi_entities_inside( $excerpt, 'code' ); } } return $excerpt; } /** * Runs htmlentities() for content inside specified tags. * * @param string $content The content. * @param string $tag The tag. * * @return string $content The content with HTML code inside the $tag tags * ran through htmlentities(). */ function relevanssi_entities_inside( $content, $tag ) { $hits = preg_match_all( '/<' . $tag . '.*?>(.*?)<\/' . $tag . '>/ims', $content, $matches ); if ( $hits > 0 ) { $replacements = array(); foreach ( $matches[1] as $match ) { if ( ! empty( $match ) ) { $replacements[] = '<xxx' . $tag . '\1>' . htmlentities( $match, ENT_QUOTES, 'UTF-8' ) . '</xxx' . $tag . '>'; } } if ( ! empty( $replacements ) ) { $count_replacements = count( $replacements ); for ( $i = 0; $i < $count_replacements; $i++ ) { $patterns[] = '/<' . $tag . '(.*?)>(.*?)<\/' . $tag . '>/ims'; } $content = preg_replace( $patterns, $replacements, $content, 1 ); } $content = str_replace( 'xxx' . $tag, $tag, $content ); } return $content; } /** * Generates closing tags for an array of tags. * * @param array $tags Array of tag names. * * @return array $closing_tags Array of closing tags. */ function relevanssi_generate_closing_tags( $tags ) { $closing_tags = array(); foreach ( $tags as $tag ) { $a = str_replace( '<', '</', $tag ); $b = str_replace( '>', '/>', $tag ); $closing_tags[] = $a; $closing_tags[] = $b; } return $closing_tags; } /** * Removes nested highlights from a string. * * If there are highlights within highlights in a string, this function will * clean out the nested highlights, leaving just the outmost highlight tokens. * * @param string $string The content. * @param string $begin The beginning highlight token. * @param string $end The ending highlight token. * * @return string The string with nested highlights cleaned out. */ function relevanssi_remove_nested_highlights( $string, $begin, $end ) { $bits = explode( $begin, $string ); $new_bits = array( $bits[0] ); $count_bits = count( $bits ); $depth = -1; for ( $i = 1; $i < $count_bits; $i++ ) { $depth++; if ( 0 === $depth ) { $new_bits[] = $begin; } if ( empty( $bits[ $i ] ) ) { continue; } $end_count = substr_count( $bits[ $i ], $end ); if ( $end_count ) { if ( substr_count( $bits[ $i ], $end ) < $depth ) { $new_bits[] = str_replace( $end, '', $bits[ $i ], $count ); $depth -= $count; } elseif ( substr_count( $bits[ $i ], $end ) >= $depth ) { $end_p = preg_quote( $end, '#' ); $new_bits[] = preg_replace( '#' . $end_p . '#', '', $bits[ $i ], $depth ); $depth = -1; } } else { $new_bits[] = $bits[ $i ]; } } return join( '', $new_bits ); } /** * Finds the locations of each word. * * Originally lifted from http://www.boyter.org/2013/04/building-a-search-result-extract-generator-in-php/ * Finds the location of each word in the fulltext. * * @author Ben Boyter * * @param array $words An array of words to locate. * @param string $fulltext The fulltext where to find them. * * @return array Array of locations. */ function relevanssi_extract_locations( $words, $fulltext ) { $locations = array(); foreach ( $words as $word ) { $count_locations = 0; $wordlen = relevanssi_strlen( $word ); $loc = relevanssi_stripos( $fulltext, $word, 0 ); while ( false !== $loc ) { $locations[] = $loc; $loc = relevanssi_stripos( $fulltext, $word, $loc + $wordlen ); $count_locations++; /** * Optimizes the excerpt creation. * * @param boolean If true, stop looking after ten locations are found. */ if ( apply_filters( 'relevanssi_optimize_excerpts', false ) ) { // If more than ten locations are found, quit: there's probably a // good one in there, and this saves plenty of time. if ( $count_locations > 10 ) { break; } } } } $locations = array_unique( $locations ); sort( $locations ); return $locations; } /** * Counts how many times the words appear in the text. * * @param array $words An array of words. * @param string $complete_text The text where to count the words. * * @return int Number of times the words appear in the text. */ function relevanssi_count_matches( $words, $complete_text ) { $count = 0; $lowercase_text = relevanssi_strtolower( $complete_text, 'UTF-8' ); $text = ''; $word_boundaries_available = false; if ( 'on' === get_option( 'relevanssi_word_boundaries', 'off' ) ) { $word_boundaries_available = true; } $count_words = count( $words ); for ( $t = 0; $t < $count_words; $t++ ) { $word_slice = relevanssi_strtolower( relevanssi_add_accent_variations( preg_quote( $words[ $t ], '/' ) ), 'UTF-8' ); if ( $word_boundaries_available ) { if ( 'never' !== get_option( 'relevanssi_fuzzy' ) ) { $regex = "/\b$word_slice|$word_slice\b/"; } else { $regex = "/\b$word_slice\b/"; } } else { $regex = "/$word_slice/"; } $lines = preg_split( $regex, $lowercase_text ); if ( $lines && count( $lines ) > 1 ) { $count_lines = count( $lines ); for ( $tt = 0; $tt < $count_lines; $tt++ ) { if ( $tt < ( count( $lines ) - 1 ) ) { $text = $text . $lines[ $tt ] . '=***='; } else { $text = $text . $lines[ $tt ]; } } } } $lines = explode( '=***=', $text ); $count = count( $lines ) - 1; return $count; } /** * Works out which is the most relevant portion to display. * * This is done by looping over each match and finding the smallest distance * between two found strings. The idea being that the closer the terms are the * better match the snippet would be. When checking for matches we only change * the location if there is a better match. The only exception is where we have * only two matches in which case we just take the first as will be equally * distant. * * @author Ben Boyter * * @param array $locations Locations of the words. * @param int $prevcount How much text to include before the location. * * @return int Starting position for the snippet. */ function relevanssi_determine_snip_location( $locations, $prevcount ) { if ( ! is_array( $locations ) || empty( $locations ) ) { return 0; } // If we only have 1 match we dont actually do the for loop so set to the first. $startpos = $locations[0]; $loc_count = count( $locations ); $smallestdiff = PHP_INT_MAX; // If we only have 2 skip as its probably equally relevant. if ( $loc_count > 2 ) { // Skip the first as we check 1 behind. for ( $i = 1; $i < $loc_count; $i++ ) { if ( $i === $loc_count - 1 ) { // At the end. $diff = $locations[ $i ] - $locations[ $i - 1 ]; } else { $diff = $locations[ $i + 1 ] - $locations[ $i ]; } if ( $smallestdiff > $diff ) { $smallestdiff = $diff; $startpos = $locations[ $i ]; } } } if ( $startpos > $prevcount ) { $startpos = $startpos - $prevcount; } else { $startpos = 0; } return $startpos; } /** * Extracts relevant part of the full text. * * Finds the part of full text with as many relevant words as possible. 1/6 * ratio on prevcount tends to work pretty well and puts the terms in the middle * of the excerpt. * * Source: https://boyter.org/2013/04/building-a-search-result-extract-generator-in-php/ * * @author Ben Boyter * * @param array $words An array of relevant words. * @param string $fulltext The source text. * @param int $excerpt_length The length of the excerpt, default 300 * characters. * @param int $prevcount How much text include before the words, default * 50 characters. * * @return array The excerpt, number of words in the excerpt, true if it's the * start of the $fulltext. */ function relevanssi_extract_relevant( $words, $fulltext, $excerpt_length = 300, $prevcount = 50 ) { $text_length = relevanssi_strlen( $fulltext ); if ( $text_length <= $excerpt_length ) { return array( $fulltext, 1, 0 ); } $locations = relevanssi_extract_locations( $words, $fulltext ); $startpos = relevanssi_determine_snip_location( $locations, $prevcount ); // If we are going to snip too much... if ( $text_length - $startpos < $excerpt_length ) { $startpos -= ( $text_length - $startpos ) / 2; } $substr = 'substr'; if ( function_exists( 'mb_substr' ) ) { $substr = 'mb_substr'; } $excerpt = call_user_func( $substr, $fulltext, $startpos, $excerpt_length ); $start = false; if ( 0 === $startpos ) { $start = true; } $besthits = count( relevanssi_extract_locations( $words, $excerpt ) ); return array( $excerpt, $besthits, $start ); } /** * Extracts relevant words of the full text. * * Finds the part of full text with as many relevant words as possible. If the * excerpt length parameter is less than 1, the function will immediately * return an empty excerpt in order to avoid an endless loop. * * @param array $terms An array of relevant words. * @param string $content The source text. * @param int $excerpt_length The length of the excerpt, default 30 words. * * @return array The excerpt, number of words in the excerpt, true if it's the * start of the $fulltext. */ function relevanssi_extract_relevant_words( $terms, $content, $excerpt_length = 30 ) { if ( $excerpt_length < 1 ) { return array( '', 0, false ); } $words = array_filter( explode( ' ', $content ) ); $offset = 0; $tries = 0; $excerpt = ''; $count_words = count( $words ); $start = false; $best_excerpt_term_hits = -1; while ( $offset < $count_words ) { if ( $offset + $excerpt_length > $count_words ) { $offset = $count_words - $excerpt_length; if ( $offset < 0 ) { $offset = 0; } } $excerpt_slice = array_slice( $words, $offset, $excerpt_length ); $excerpt_slice = ' ' . implode( ' ', $excerpt_slice ); $count_matches = relevanssi_count_matches( $terms, $excerpt_slice ); if ( $count_matches > 0 && $count_matches > $best_excerpt_term_hits ) { $best_excerpt_term_hits = $count_matches; $excerpt = $excerpt_slice; if ( 0 === $offset ) { $start = true; } else { $start = false; } } $tries++; /** * Enables the excerpt optimization. * * If your posts are very long, building excerpts can be really slow. * To speed up the process, you can enable optimization, which means * Relevanssi only creates 50 excerpt candidates. * * @param boolean Return true to enable optimization, default false. */ if ( apply_filters( 'relevanssi_optimize_excerpts', false ) ) { if ( $tries > 50 ) { // An optimization trick: try only 50 times. break; } } $offset += $excerpt_length; } if ( '' === $excerpt ) { /** * Nothing found, take the beginning of the post. +2, because the first * index is an empty space and the last index is the rest of the post. */ $excerpt = explode( ' ', $content, $excerpt_length + 2 ); array_pop( $excerpt ); $excerpt = implode( ' ', $excerpt ); $start = true; } return array( $excerpt, $best_excerpt_term_hits, $start ); } /** * Adds accented variations to letters. * * In order to have non-accented letters in search terms match the accented terms in * full text, this function adds accent variations to the search terms. * * @param string $word The word to manipulate. * * @return string The word with accent variations. */ function relevanssi_add_accent_variations( $word ) { /** * Filters the accent replacement array. * * @param array Array of replacements. 'from' has the source characters, 'to' the replacements. */ $replacement_arrays = apply_filters( 'relevanssi_accents_replacement_arrays', array( 'from' => array( 'a', 'c', 'e', 'i', 'o', 'u', 'n' ), 'to' => array( '(a|á|à |â)', '(c|ç)', '(e|é|è|ê|ë)', '(i|Ã|ì|î|ï)', '(o|ó|ò|ô|õ)', '(u|ú|ù|ü|û)', '(n|ñ)' ), 'from_re' => array( "/(s)('|’)?$/", "/[^\(\|]('|’)/" ), 'to_re' => array( "(('|’)?\\1|\\1('|’)?)", "?('|’)?" ), ) ); $len = relevanssi_strlen( $word ); $word_array = array(); $escaped = false; for ( $i = 0; $i < $len; $i++ ) { $char = relevanssi_substr( $word, $i, 1 ); if ( '\\' === $char && ! $escaped ) { $escaped = true; continue; } if ( $escaped ) { $escaped = false; $char = '\\' . $char; } $word_array[] = $char; } $word = implode( '-?', $word_array ); $word = str_ireplace( $replacement_arrays['from'], $replacement_arrays['to'], $word ); $word = preg_replace( $replacement_arrays['from_re'], $replacement_arrays['to_re'], $word ); return $word; } /** * Fetches the custom field content for a post. * * @param int $post_id The post ID. * * @return string The custom field content. */ function relevanssi_get_custom_field_content( $post_id ) { $custom_field_content = ''; $remove_underscore_fields = false; $custom_fields = relevanssi_get_custom_fields(); if ( isset( $custom_fields ) && 'all' === $custom_fields ) { $custom_fields = get_post_custom_keys( $post_id ); } if ( isset( $custom_fields ) && 'visible' === $custom_fields ) { $custom_fields = get_post_custom_keys( $post_id ); $remove_underscore_fields = true; } /* Documented in lib/indexing.php. */ $custom_fields = apply_filters( 'relevanssi_index_custom_fields', $custom_fields, $post_id ); if ( function_exists( 'relevanssi_get_child_pdf_content' ) ) { $custom_field_content .= ' ' . relevanssi_get_child_pdf_content( $post_id ); } if ( is_array( $custom_fields ) ) { $custom_fields = array_unique( $custom_fields ); // No reason to index duplicates. if ( function_exists( 'relevanssi_add_repeater_fields' ) ) { relevanssi_add_repeater_fields( $custom_fields, $post_id ); } foreach ( $custom_fields as $field ) { if ( $remove_underscore_fields ) { if ( '_' === substr( $field, 0, 1 ) ) { continue; } } /* Documented in lib/indexing.php. */ $values = apply_filters( 'relevanssi_custom_field_value', get_post_meta( $post_id, $field, false ), $field, $post_id ); if ( empty( $values ) || ! is_array( $values ) ) { continue; } foreach ( $values as $value ) { // Quick hack : allow indexing of PODS relationship custom fields. @author TMV. if ( is_array( $value ) && isset( $value['post_title'] ) ) { $value = $value['post_title']; } // Flatten other array data. if ( is_array( $value ) ) { $value_as_string = ''; array_walk_recursive( $value, function( $val ) use ( &$value_as_string ) { $value_as_string .= ' ' . $val; } ); $value = $value_as_string; } $custom_field_content .= ' ' . $value; } } } /** * Filters the custom field content for excerpt use. * * @param string $custom_field_content Custom field content for excerpts. */ return apply_filters( 'relevanssi_excerpt_custom_field_content', $custom_field_content ); } /** * Kills the autoembed filter hook on 'the_content'. * * @global array $wp_filter The global filter array. * * It's an object hook, so this isn't as simple as doing remove_filter(). This * needs to be done, because autoembed discovery can take a very, very long * time. */ function relevanssi_kill_autoembed() { global $wp_filter; if ( isset( $wp_filter['the_content']->callbacks ) ) { foreach ( $wp_filter['the_content']->callbacks as $priority => $bucket ) { foreach ( array_keys( $bucket ) as $key ) { if ( 'autoembed' === substr( $key, -9 ) ) { unset( $wp_filter['the_content']->callbacks[ $priority ][ $key ] ); } } } } }