\]]|^)($)?([\w]+?://(?:[\w\\x80-\\xff\#$%&~/=?@\[\](+-]|[.,;:](?![\s<]|($)?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is'; //Used by the edit and unlink callbacks var $old_url = ''; var $new_url = ''; /** * Parse a string for plaintext URLs * * @param string $content The text to parse. * @param string $base_url The base URL. Ignored. * @param string $default_link_text Default link text. * @return array An array of new blcLinkInstance objects. */ function parse($content, $base_url = '', $default_link_text = ''){ //Don't want to detect URLs inside links or tag attributes - //there are already other parsers for that. //Avoid http://... $content = preg_replace('#]*>.*?#si', '', $content); //HTML tags are treated as natural boundaries for plaintext URLs //(since we strip tags, we must place another boundary char where they were). //The closing tag of [shortcodes] is also treated as a boundary. $content = str_replace(array('<', '>', '[/'), array("\n<", ">\n", "\n[/"), $content); //Finally, kill all tags. $content = strip_tags($content); //Find all URLs $found = preg_match_all( $this->url_regexp, $content, $matches ); $instances = array(); if ( $found ){ //Create a new instance for each match foreach($matches[2] as $match){ $url = $this->validate_url(trim($match)); if ( $url == false ) { continue; } //Create a new link instance. $instance = new blcLinkInstance(); $instance->set_parser($this); $instance->raw_url = $match; $instance->link_text = $match; $link_obj = new blcLink($url); //Creates or loads the link $instance->set_link($link_obj); $instances[] = $instance; } } return $instances; } /** * Validate and sanitize a URL. * * @param string $url * @return bool|string A valid URL, or false if the URL is not valid. */ protected function validate_url($url) { //Do a little bit of validation $url = esc_url_raw($url); if ( empty($url) ){ return false; } if ( function_exists('filter_var') ){ //Note: filter_var() is no panacea as it accepts many invalid URLs if ( !filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_HOST_REQUIRED) ){ return false; } } $parts = @parse_url($url); if ( empty($parts['host']) || !strpos($parts['host'], '.') ){ return false; } return $url; } /** * Change all occurrences of a given plaintext URLs to a new URL. * * @param string $content Look for URLs in this string. * @param string $new_url Change them to this URL. * @param string $old_url The URL to look for. * @param string $old_raw_url The raw, not-normalized URL. Optional. * * @return array|WP_Error If successful, the return value will be an associative array with two * keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used * for the modified links. In most cases, the returned raw_url will be equal to the new_url. */ function edit($content, $new_url, $old_url, $old_raw_url = ''){ $this->new_url = $new_url; if ( empty($old_raw_url) ){ $this->old_url = $old_url; } else { $this->old_url = $old_raw_url; } return array( 'content' => preg_replace_callback($this->url_regexp, array(&$this, 'edit_callback'), $content), 'raw_url' => $new_url, 'link_text' => $new_url, ); } function edit_callback($match){ if ( $match[2] == $this->old_url ){ return $this->new_url; } else { return $match[0]; } } /** * Remove all occurrences of a specific plaintext URL. * * @param string $content Look for URLs in this string. * @param string $url The URL to look for. * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional. * @return string Input string with all matching plaintext URLs removed. */ function unlink($content, $url, $raw_url = ''){ if ( empty($raw_url) ){ $this->old_url = $url; } else { $this->old_url = $raw_url; } return preg_replace_callback($this->url_regexp, array(&$this, 'unlink_callback'), $content); } function unlink_callback($match){ if ( $match[2] == $this->old_url ){ return ''; } else { return $match[0]; } } }