Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.52% |
105 / 116 |
|
50.00% |
5 / 10 |
CRAP | |
0.00% |
0 / 1 |
Parsed_Link | |
90.52% |
105 / 116 |
|
50.00% |
5 / 10 |
44.58 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
parse_html_string | |
90.00% |
27 / 30 |
|
0.00% |
0 / 1 |
9.08 | |||
has_internal_url | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
has_external_url | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 | |||
is_external_url | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
is_empty | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
is_contains_unwanted_terms | |
96.30% |
26 / 27 |
|
0.00% |
0 / 1 |
7 | |||
get_cleaned_link | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
5 | |||
get_type | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
replace_text_with_icons | |
54.55% |
6 / 11 |
|
0.00% |
0 / 1 |
7.35 |
1 | <?php |
2 | /** |
3 | * Class with utility functions for each action and meta link. |
4 | * |
5 | * @package brianhenryie/bh-wp-plugins-page |
6 | */ |
7 | |
8 | namespace BrianHenryIE\WP_Plugins_Page\API; |
9 | |
10 | use DOMDocument; |
11 | use DOMElement; |
12 | use DOMNode; |
13 | |
14 | /** |
15 | * Uses DOMDocument to extract links from the text, the bare text, and provides utility functions for classifying the |
16 | * text content. |
17 | */ |
18 | class Parsed_Link { |
19 | |
20 | /** |
21 | * The link's original array key. |
22 | * |
23 | * @var string|null |
24 | */ |
25 | protected ?string $key = null; |
26 | |
27 | /** |
28 | * The original HTML. |
29 | * |
30 | * @var string |
31 | */ |
32 | protected string $original = ''; |
33 | |
34 | /** |
35 | * The parsed HTML. This may be updated from the original. |
36 | * |
37 | * @var DOMDocument |
38 | */ |
39 | protected DOMDocument $dom_document; |
40 | |
41 | /** |
42 | * The bare text from the HTML. |
43 | * |
44 | * @var string |
45 | */ |
46 | protected string $text = ''; |
47 | |
48 | /** |
49 | * Indicator if the HTML is only one link, or has additional text or links too. |
50 | * |
51 | * @var bool |
52 | */ |
53 | protected ?bool $is_only_link = null; |
54 | |
55 | /** |
56 | * All the HTML anchor elements found in the string. |
57 | * |
58 | * @var array<int,DOMElement> $anchors |
59 | */ |
60 | protected array $anchors = array(); |
61 | |
62 | /** |
63 | * All URLs found in the HTML string. |
64 | * |
65 | * @var array<int,string> |
66 | */ |
67 | protected array $urls = array(); |
68 | |
69 | /** |
70 | * A representation of the HTML in a plugins.php meta or action link. |
71 | * |
72 | * @param int|string $key The original array key. |
73 | * @param string $value The html string. |
74 | */ |
75 | public function __construct( $key, string $value ) { |
76 | |
77 | if ( is_string( $key ) ) { |
78 | $this->key = $key; |
79 | } |
80 | |
81 | $this->original = $value; |
82 | |
83 | $this->parse_html_string( $value ); |
84 | } |
85 | |
86 | |
87 | /** |
88 | * Pick out the text, the links, and determine if this link contains only an anchor element. |
89 | * |
90 | * @param string $html_string A HTML string we want to analyse. |
91 | * |
92 | * phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
93 | * phpcs:disable WordPress.PHP.NoSilencedErrors.Discouraged |
94 | */ |
95 | protected function parse_html_string( string $html_string ): void { |
96 | |
97 | if ( empty( $html_string ) ) { |
98 | return; |
99 | } |
100 | |
101 | $dom_document = new DOMDocument(); |
102 | $previous_internal_errors_value = libxml_use_internal_errors( true ); |
103 | $bool_result = @$dom_document->loadHTML( $html_string ); |
104 | libxml_use_internal_errors( $previous_internal_errors_value ); |
105 | |
106 | if ( false === $bool_result ) { |
107 | return; |
108 | } |
109 | |
110 | $this->dom_document = $dom_document; |
111 | |
112 | $this->text = $dom_document->textContent; |
113 | |
114 | $html_tag = $dom_document->firstElementChild; |
115 | $body_tag = $html_tag->firstElementChild; |
116 | |
117 | $body_nodes_count = count( $body_tag->childNodes ); |
118 | $is_anchor = 'a' === $body_tag->firstElementChild->tagName; |
119 | |
120 | $this->is_only_link = ( 1 === $body_nodes_count ) && $is_anchor; |
121 | |
122 | $a_tags = $dom_document->getElementsByTagName( 'a' ); |
123 | $num_a_tags = count( $a_tags ); |
124 | |
125 | for ( $item_index = 0; $item_index < $num_a_tags; $item_index++ ) { |
126 | |
127 | /** |
128 | * We know this will not be null because we counted them just above. |
129 | * |
130 | * @var DOMElement $anchor_node |
131 | */ |
132 | $anchor_node = $a_tags->item( $item_index ); |
133 | |
134 | $this->anchors[ $item_index ] = $anchor_node; |
135 | |
136 | if ( is_null( $anchor_node->attributes ) || is_null( $anchor_node->attributes->getNamedItem( 'href' ) ) ) { |
137 | continue; |
138 | } |
139 | |
140 | $url_string = $anchor_node->attributes->getNamedItem( 'href' )->nodeValue; |
141 | |
142 | if ( ! empty( $url_string ) ) { |
143 | $this->urls[ $item_index ] = $url_string; |
144 | } |
145 | } |
146 | |
147 | $script_tags = $dom_document->getElementsByTagName( 'script' ); |
148 | $num_script_tags = count( $script_tags ); |
149 | |
150 | for ( $item_index = 0; $item_index < $num_script_tags; $item_index++ ) { |
151 | |
152 | /** |
153 | * We know this will not be null because we counted them just above. |
154 | * |
155 | * @var DOMElement $script_node |
156 | */ |
157 | $script_node = $script_tags->item( $item_index ); |
158 | |
159 | // We empty the script tag contents here and wp_kses() will remove the tag itself later. |
160 | $script_node->nodeValue = ''; |
161 | } |
162 | } |
163 | |
164 | /** |
165 | * Checks all URLs in this link to see are any links to pages inside this site. |
166 | * |
167 | * NB: Has internal is not the inverse of has external since some have no link at all. |
168 | * |
169 | * @return bool |
170 | */ |
171 | public function has_internal_url(): bool { |
172 | |
173 | if ( 0 === count( $this->urls ) ) { |
174 | return false; |
175 | } |
176 | |
177 | foreach ( $this->urls as $url ) { |
178 | if ( ! $this->is_external_url( $url ) ) { |
179 | return true; |
180 | } |
181 | } |
182 | |
183 | return false; |
184 | } |
185 | |
186 | /** |
187 | * Checks all URLs in this link to see are any linking away from this site. |
188 | * |
189 | * @return bool |
190 | */ |
191 | public function has_external_url(): bool { |
192 | |
193 | if ( 0 === count( $this->urls ) ) { |
194 | return false; |
195 | } |
196 | |
197 | foreach ( $this->urls as $url ) { |
198 | if ( $this->is_external_url( $url ) ) { |
199 | return true; |
200 | } |
201 | } |
202 | |
203 | return false; |
204 | } |
205 | |
206 | /** |
207 | * Checks a bare url to see does it contain "http" and a domain other than this site's domain. |
208 | * |
209 | * @param string $url The URL to check. |
210 | * @return bool |
211 | */ |
212 | protected function is_external_url( string $url ): bool { |
213 | |
214 | $is_external_link = ! is_null( wp_parse_url( $url, PHP_URL_SCHEME ) ) |
215 | && ! stristr( $url, get_site_url() ); |
216 | |
217 | return $is_external_link; |
218 | } |
219 | |
220 | /** |
221 | * Checks if the link contains no text at all. |
222 | * |
223 | * @return bool |
224 | */ |
225 | public function is_empty(): bool { |
226 | return empty( wp_strip_all_tags( $this->original ) ); |
227 | } |
228 | |
229 | /** |
230 | * Used to filter to remove upsells and marketing links. |
231 | * Removes external "pro" and licence links. |
232 | * |
233 | * "Donate" links are not removed. |
234 | * |
235 | * @return bool True if the link should remain, false to remove. |
236 | */ |
237 | public function is_contains_unwanted_terms(): bool { |
238 | |
239 | if ( empty( $this->text ) ) { |
240 | return false; |
241 | } |
242 | |
243 | $definitely_unwanted_terms = array( |
244 | 'opt in', |
245 | 'opt-in', |
246 | 'add on', |
247 | 'add-on', |
248 | 'free', |
249 | 'upgrade', |
250 | 'trial', |
251 | 'review', |
252 | 'rate', |
253 | 'Uninstall', |
254 | ); |
255 | |
256 | foreach ( $definitely_unwanted_terms as $term ) { |
257 | if ( stristr( $this->text, $term ) ) { |
258 | return true; |
259 | } |
260 | } |
261 | |
262 | // These terms are acceptable for internal links, but not for external links. |
263 | $probably_unwanted_terms = array( |
264 | 'pro', |
265 | 'premium', |
266 | 'licence', |
267 | 'license', |
268 | ); |
269 | |
270 | foreach ( $probably_unwanted_terms as $term ) { |
271 | |
272 | // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
273 | if ( preg_match( '/\b' . $term . '\b/i', $this->text ) && $this->has_external_url() ) { |
274 | return true; |
275 | } |
276 | } |
277 | |
278 | return false; |
279 | } |
280 | |
281 | /** |
282 | * Run wp_kses to strip unwanted styles etc. from links. |
283 | * Removes HTML CSS `class` element on Deactivate links. |
284 | * Returns "View details" links untouched. |
285 | * |
286 | * @see wp_kses() |
287 | * |
288 | * TODO: Are there CSS classes that need to be removed still? YES! |
289 | */ |
290 | public function get_cleaned_link(): string { |
291 | |
292 | $allowed_html = array( |
293 | 'a' => array( |
294 | 'href' => array(), |
295 | 'target' => array(), |
296 | 'class' => array(), |
297 | 'aria-label' => array(), |
298 | 'title' => array(), |
299 | 'data-title' => array(), |
300 | ), |
301 | ); |
302 | |
303 | switch ( $this->get_type() ) { |
304 | case 'view-details': |
305 | return $this->original; |
306 | case 'deactivate': |
307 | unset( $allowed_html['a']['class'] ); |
308 | } |
309 | |
310 | $unclean = ''; |
311 | if ( isset( $this->dom_document ) ) { |
312 | $unclean = $this->dom_document->saveHTML( $this->dom_document->firstElementChild->firstElementChild->firstElementChild ); |
313 | } |
314 | |
315 | if ( empty( $unclean ) ) { |
316 | $unclean = $this->original; |
317 | } |
318 | |
319 | $cleaned_html_string = wp_kses( $unclean, $allowed_html ); |
320 | |
321 | return trim( $cleaned_html_string ); |
322 | } |
323 | |
324 | /** |
325 | * Some links are special cases. |
326 | * E.g. "View details" is a special case where we want to keep the internal URL in the middle column. |
327 | * The link type is used for sorting in the first column (settings first, deactivate last...). |
328 | * Default type is "links". |
329 | * |
330 | * @return string |
331 | */ |
332 | public function get_type(): string { |
333 | |
334 | $types = array( 'View details', 'settings', 'log', 'deactivate', 'github' ); |
335 | $types['author-link'] = 'By '; |
336 | foreach ( $types as $key => $type ) { |
337 | if ( false !== stristr( $this->text, $type ) ) { |
338 | return is_string( $key ) ? $key : sanitize_title( $type ); |
339 | } |
340 | } |
341 | |
342 | return 'links'; |
343 | } |
344 | |
345 | /** |
346 | * Looks for GitHub links and replaces them with the GitHub icon. |
347 | * |
348 | * Adds the original text as a mouseover hint. |
349 | * |
350 | * TODO: Pulling the icon straight from GitHub probably isn't best practice. |
351 | * TODO: WordPress.org links with WordPress icon? |
352 | * |
353 | * Edits the DOMDocument in place. |
354 | */ |
355 | public function replace_text_with_icons(): void { |
356 | |
357 | // Match github.com / (something other than "sponsors" ) / (anything up to maybe a final / ). |
358 | $match_github_repo_links = '/^https?:\/\/github.com\/(?!sponsors)[^\/]*\/[^\/]*[^\/]?$/i'; |
359 | |
360 | foreach ( $this->anchors as $anchor_node ) { |
361 | if ( is_null( $anchor_node->attributes ) || is_null( $anchor_node->attributes->getNamedItem( 'href' ) ) ) { |
362 | continue; |
363 | } |
364 | |
365 | $url_string = $anchor_node->attributes->getNamedItem( 'href' )->nodeValue; |
366 | |
367 | if ( 1 !== preg_match( $match_github_repo_links, $url_string ?? '' ) ) { |
368 | continue; |
369 | } |
370 | |
371 | $old_text = $anchor_node->nodeValue; |
372 | |
373 | $anchor_node->setAttribute( 'class', 'bh-wp-plugins-page-github-icon' ); |
374 | $anchor_node->setAttribute( 'title', $old_text ); |
375 | |
376 | // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase |
377 | $anchor_node->nodeValue = ''; |
378 | } |
379 | } |
380 | |
381 | } |