Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
90.52% |
105 / 116 |
|
50.00% |
5 / 10 |
CRAP | |
0.00% |
0 / 1 |
| Parsed_Link | |
90.52% |
105 / 116 |
|
50.00% |
5 / 10 |
44.58 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| parse_html_string | |
90.00% |
27 / 30 |
|
0.00% |
0 / 1 |
9.08 | |||
| has_internal_url | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
| has_external_url | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 | |||
| is_external_url | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| is_empty | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| is_contains_unwanted_terms | |
96.30% |
26 / 27 |
|
0.00% |
0 / 1 |
7 | |||
| get_cleaned_link | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
5 | |||
| get_type | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
| replace_text_with_icons | |
54.55% |
6 / 11 |
|
0.00% |
0 / 1 |
7.35 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Class with utility functions for each action and meta link. |
| 4 | * |
| 5 | * @package brianhenryie/bh-wp-plugins-page |
| 6 | */ |
| 7 | |
| 8 | namespace BrianHenryIE\WP_Plugins_Page\API; |
| 9 | |
| 10 | use DOMDocument; |
| 11 | use DOMElement; |
| 12 | use DOMNode; |
| 13 | |
| 14 | /** |
| 15 | * Uses DOMDocument to extract links from the text, the bare text, and provides utility functions for classifying the |
| 16 | * text content. |
| 17 | */ |
| 18 | class Parsed_Link { |
| 19 | |
| 20 | /** |
| 21 | * The link's original array key. |
| 22 | * |
| 23 | * @var string|null |
| 24 | */ |
| 25 | protected ?string $key = null; |
| 26 | |
| 27 | /** |
| 28 | * The original HTML. |
| 29 | * |
| 30 | * @var string |
| 31 | */ |
| 32 | protected string $original = ''; |
| 33 | |
| 34 | /** |
| 35 | * The parsed HTML. This may be updated from the original. |
| 36 | * |
| 37 | * @var DOMDocument |
| 38 | */ |
| 39 | protected DOMDocument $dom_document; |
| 40 | |
| 41 | /** |
| 42 | * The bare text from the HTML. |
| 43 | * |
| 44 | * @var string |
| 45 | */ |
| 46 | protected string $text = ''; |
| 47 | |
| 48 | /** |
| 49 | * Indicator if the HTML is only one link, or has additional text or links too. |
| 50 | * |
| 51 | * @var bool |
| 52 | */ |
| 53 | protected ?bool $is_only_link = null; |
| 54 | |
| 55 | /** |
| 56 | * All the HTML anchor elements found in the string. |
| 57 | * |
| 58 | * @var array<int,DOMElement> $anchors |
| 59 | */ |
| 60 | protected array $anchors = array(); |
| 61 | |
| 62 | /** |
| 63 | * All URLs found in the HTML string. |
| 64 | * |
| 65 | * @var array<int,string> |
| 66 | */ |
| 67 | protected array $urls = array(); |
| 68 | |
| 69 | /** |
| 70 | * A representation of the HTML in a plugins.php meta or action link. |
| 71 | * |
| 72 | * @param int|string $key The original array key. |
| 73 | * @param string $value The html string. |
| 74 | */ |
| 75 | public function __construct( $key, string $value ) { |
| 76 | |
| 77 | if ( is_string( $key ) ) { |
| 78 | $this->key = $key; |
| 79 | } |
| 80 | |
| 81 | $this->original = $value; |
| 82 | |
| 83 | $this->parse_html_string( $value ); |
| 84 | } |
| 85 | |
| 86 | |
| 87 | /** |
| 88 | * Pick out the text, the links, and determine if this link contains only an anchor element. |
| 89 | * |
| 90 | * @param string $html_string A HTML string we want to analyse. |
| 91 | * |
| 92 | * phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
| 93 | * phpcs:disable WordPress.PHP.NoSilencedErrors.Discouraged |
| 94 | */ |
| 95 | protected function parse_html_string( string $html_string ): void { |
| 96 | |
| 97 | if ( empty( $html_string ) ) { |
| 98 | return; |
| 99 | } |
| 100 | |
| 101 | $dom_document = new DOMDocument(); |
| 102 | $previous_internal_errors_value = libxml_use_internal_errors( true ); |
| 103 | $bool_result = @$dom_document->loadHTML( $html_string ); |
| 104 | libxml_use_internal_errors( $previous_internal_errors_value ); |
| 105 | |
| 106 | if ( false === $bool_result ) { |
| 107 | return; |
| 108 | } |
| 109 | |
| 110 | $this->dom_document = $dom_document; |
| 111 | |
| 112 | $this->text = $dom_document->textContent; |
| 113 | |
| 114 | $html_tag = $dom_document->firstElementChild; |
| 115 | $body_tag = $html_tag->firstElementChild; |
| 116 | |
| 117 | $body_nodes_count = count( $body_tag->childNodes ); |
| 118 | $is_anchor = 'a' === $body_tag->firstElementChild->tagName; |
| 119 | |
| 120 | $this->is_only_link = ( 1 === $body_nodes_count ) && $is_anchor; |
| 121 | |
| 122 | $a_tags = $dom_document->getElementsByTagName( 'a' ); |
| 123 | $num_a_tags = count( $a_tags ); |
| 124 | |
| 125 | for ( $item_index = 0; $item_index < $num_a_tags; $item_index++ ) { |
| 126 | |
| 127 | /** |
| 128 | * We know this will not be null because we counted them just above. |
| 129 | * |
| 130 | * @var DOMElement $anchor_node |
| 131 | */ |
| 132 | $anchor_node = $a_tags->item( $item_index ); |
| 133 | |
| 134 | $this->anchors[ $item_index ] = $anchor_node; |
| 135 | |
| 136 | if ( is_null( $anchor_node->attributes ) || is_null( $anchor_node->attributes->getNamedItem( 'href' ) ) ) { |
| 137 | continue; |
| 138 | } |
| 139 | |
| 140 | $url_string = $anchor_node->attributes->getNamedItem( 'href' )->nodeValue; |
| 141 | |
| 142 | if ( ! empty( $url_string ) ) { |
| 143 | $this->urls[ $item_index ] = $url_string; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | $script_tags = $dom_document->getElementsByTagName( 'script' ); |
| 148 | $num_script_tags = count( $script_tags ); |
| 149 | |
| 150 | for ( $item_index = 0; $item_index < $num_script_tags; $item_index++ ) { |
| 151 | |
| 152 | /** |
| 153 | * We know this will not be null because we counted them just above. |
| 154 | * |
| 155 | * @var DOMElement $script_node |
| 156 | */ |
| 157 | $script_node = $script_tags->item( $item_index ); |
| 158 | |
| 159 | // We empty the script tag contents here and wp_kses() will remove the tag itself later. |
| 160 | $script_node->nodeValue = ''; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * Checks all URLs in this link to see are any links to pages inside this site. |
| 166 | * |
| 167 | * NB: Has internal is not the inverse of has external since some have no link at all. |
| 168 | * |
| 169 | * @return bool |
| 170 | */ |
| 171 | public function has_internal_url(): bool { |
| 172 | |
| 173 | if ( 0 === count( $this->urls ) ) { |
| 174 | return false; |
| 175 | } |
| 176 | |
| 177 | foreach ( $this->urls as $url ) { |
| 178 | if ( ! $this->is_external_url( $url ) ) { |
| 179 | return true; |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | return false; |
| 184 | } |
| 185 | |
| 186 | /** |
| 187 | * Checks all URLs in this link to see are any linking away from this site. |
| 188 | * |
| 189 | * @return bool |
| 190 | */ |
| 191 | public function has_external_url(): bool { |
| 192 | |
| 193 | if ( 0 === count( $this->urls ) ) { |
| 194 | return false; |
| 195 | } |
| 196 | |
| 197 | foreach ( $this->urls as $url ) { |
| 198 | if ( $this->is_external_url( $url ) ) { |
| 199 | return true; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * Checks a bare url to see does it contain "http" and a domain other than this site's domain. |
| 208 | * |
| 209 | * @param string $url The URL to check. |
| 210 | * @return bool |
| 211 | */ |
| 212 | protected function is_external_url( string $url ): bool { |
| 213 | |
| 214 | $is_external_link = ! is_null( wp_parse_url( $url, PHP_URL_SCHEME ) ) |
| 215 | && ! stristr( $url, get_site_url() ); |
| 216 | |
| 217 | return $is_external_link; |
| 218 | } |
| 219 | |
| 220 | /** |
| 221 | * Checks if the link contains no text at all. |
| 222 | * |
| 223 | * @return bool |
| 224 | */ |
| 225 | public function is_empty(): bool { |
| 226 | return empty( wp_strip_all_tags( $this->original ) ); |
| 227 | } |
| 228 | |
| 229 | /** |
| 230 | * Used to filter to remove upsells and marketing links. |
| 231 | * Removes external "pro" and licence links. |
| 232 | * |
| 233 | * "Donate" links are not removed. |
| 234 | * |
| 235 | * @return bool True if the link should remain, false to remove. |
| 236 | */ |
| 237 | public function is_contains_unwanted_terms(): bool { |
| 238 | |
| 239 | if ( empty( $this->text ) ) { |
| 240 | return false; |
| 241 | } |
| 242 | |
| 243 | $definitely_unwanted_terms = array( |
| 244 | 'opt in', |
| 245 | 'opt-in', |
| 246 | 'add on', |
| 247 | 'add-on', |
| 248 | 'free', |
| 249 | 'upgrade', |
| 250 | 'trial', |
| 251 | 'review', |
| 252 | 'rate', |
| 253 | 'Uninstall', |
| 254 | ); |
| 255 | |
| 256 | foreach ( $definitely_unwanted_terms as $term ) { |
| 257 | if ( stristr( $this->text, $term ) ) { |
| 258 | return true; |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | // These terms are acceptable for internal links, but not for external links. |
| 263 | $probably_unwanted_terms = array( |
| 264 | 'pro', |
| 265 | 'premium', |
| 266 | 'licence', |
| 267 | 'license', |
| 268 | ); |
| 269 | |
| 270 | foreach ( $probably_unwanted_terms as $term ) { |
| 271 | |
| 272 | // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
| 273 | if ( preg_match( '/\b' . $term . '\b/i', $this->text ) && $this->has_external_url() ) { |
| 274 | return true; |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | return false; |
| 279 | } |
| 280 | |
| 281 | /** |
| 282 | * Run wp_kses to strip unwanted styles etc. from links. |
| 283 | * Removes HTML CSS `class` element on Deactivate links. |
| 284 | * Returns "View details" links untouched. |
| 285 | * |
| 286 | * @see wp_kses() |
| 287 | * |
| 288 | * TODO: Are there CSS classes that need to be removed still? YES! |
| 289 | */ |
| 290 | public function get_cleaned_link(): string { |
| 291 | |
| 292 | $allowed_html = array( |
| 293 | 'a' => array( |
| 294 | 'href' => array(), |
| 295 | 'target' => array(), |
| 296 | 'class' => array(), |
| 297 | 'aria-label' => array(), |
| 298 | 'title' => array(), |
| 299 | 'data-title' => array(), |
| 300 | ), |
| 301 | ); |
| 302 | |
| 303 | switch ( $this->get_type() ) { |
| 304 | case 'view-details': |
| 305 | return $this->original; |
| 306 | case 'deactivate': |
| 307 | unset( $allowed_html['a']['class'] ); |
| 308 | } |
| 309 | |
| 310 | $unclean = ''; |
| 311 | if ( isset( $this->dom_document ) ) { |
| 312 | $unclean = $this->dom_document->saveHTML( $this->dom_document->firstElementChild->firstElementChild->firstElementChild ); |
| 313 | } |
| 314 | |
| 315 | if ( empty( $unclean ) ) { |
| 316 | $unclean = $this->original; |
| 317 | } |
| 318 | |
| 319 | $cleaned_html_string = wp_kses( $unclean, $allowed_html ); |
| 320 | |
| 321 | return trim( $cleaned_html_string ); |
| 322 | } |
| 323 | |
| 324 | /** |
| 325 | * Some links are special cases. |
| 326 | * E.g. "View details" is a special case where we want to keep the internal URL in the middle column. |
| 327 | * The link type is used for sorting in the first column (settings first, deactivate last...). |
| 328 | * Default type is "links". |
| 329 | * |
| 330 | * @return string |
| 331 | */ |
| 332 | public function get_type(): string { |
| 333 | |
| 334 | $types = array( 'View details', 'settings', 'log', 'deactivate', 'github' ); |
| 335 | $types['author-link'] = 'By '; |
| 336 | foreach ( $types as $key => $type ) { |
| 337 | if ( false !== stristr( $this->text, $type ) ) { |
| 338 | return is_string( $key ) ? $key : sanitize_title( $type ); |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | return 'links'; |
| 343 | } |
| 344 | |
| 345 | /** |
| 346 | * Looks for GitHub links and replaces them with the GitHub icon. |
| 347 | * |
| 348 | * Adds the original text as a mouseover hint. |
| 349 | * |
| 350 | * TODO: Pulling the icon straight from GitHub probably isn't best practice. |
| 351 | * TODO: WordPress.org links with WordPress icon? |
| 352 | * |
| 353 | * Edits the DOMDocument in place. |
| 354 | */ |
| 355 | public function replace_text_with_icons(): void { |
| 356 | |
| 357 | // Match github.com / (something other than "sponsors" ) / (anything up to maybe a final / ). |
| 358 | $match_github_repo_links = '/^https?:\/\/github.com\/(?!sponsors)[^\/]*\/[^\/]*[^\/]?$/i'; |
| 359 | |
| 360 | foreach ( $this->anchors as $anchor_node ) { |
| 361 | if ( is_null( $anchor_node->attributes ) || is_null( $anchor_node->attributes->getNamedItem( 'href' ) ) ) { |
| 362 | continue; |
| 363 | } |
| 364 | |
| 365 | $url_string = $anchor_node->attributes->getNamedItem( 'href' )->nodeValue; |
| 366 | |
| 367 | if ( 1 !== preg_match( $match_github_repo_links, $url_string ?? '' ) ) { |
| 368 | continue; |
| 369 | } |
| 370 | |
| 371 | $old_text = $anchor_node->nodeValue; |
| 372 | |
| 373 | $anchor_node->setAttribute( 'class', 'bh-wp-plugins-page-github-icon' ); |
| 374 | $anchor_node->setAttribute( 'title', $old_text ); |
| 375 | |
| 376 | // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
| 377 | $anchor_node->nodeValue = ''; |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | } |