1- import cheerio , { AnyNode , Cheerio } from "cheerio" ;
1+ import { AnyNode , Cheerio , load } from "cheerio" ;
22import { PageOptions } from "../../../lib/entities" ;
33import { excludeNonMainTags } from "./excludeTags" ;
44
55export const removeUnwantedElements = (
66 html : string ,
7- pageOptions : PageOptions
7+ pageOptions : PageOptions ,
88) => {
9- const soup = cheerio . load ( html ) ;
9+ let soup = load ( html ) ;
1010
1111 if (
1212 pageOptions . onlyIncludeTags &&
1313 pageOptions . onlyIncludeTags . length > 0 &&
14- pageOptions . onlyIncludeTags [ 0 ] !== ''
14+ pageOptions . onlyIncludeTags [ 0 ] !== ""
1515 ) {
1616 if ( typeof pageOptions . onlyIncludeTags === "string" ) {
1717 pageOptions . onlyIncludeTags = [ pageOptions . onlyIncludeTags ] ;
1818 }
1919 if ( pageOptions . onlyIncludeTags . length !== 0 ) {
2020 // Create a new root element to hold the tags to keep
21- const newRoot = cheerio . load ( "<div></div>" ) ( "div" ) ;
21+ const newRoot = load ( "<div></div>" ) ( "div" ) ;
2222 pageOptions . onlyIncludeTags . forEach ( ( tag ) => {
2323 soup ( tag ) . each ( ( index , element ) => {
2424 newRoot . append ( soup ( element ) . clone ( ) ) ;
2525 } ) ;
2626 } ) ;
27- return newRoot . html ( ) ;
27+
28+ soup = load ( newRoot . html ( ) ) ;
2829 }
2930 }
3031
@@ -33,7 +34,7 @@ export const removeUnwantedElements = (
3334 if (
3435 pageOptions . removeTags &&
3536 pageOptions . removeTags . length > 0 &&
36- pageOptions . removeTags [ 0 ] !== ''
37+ pageOptions . removeTags [ 0 ] !== ""
3738 ) {
3839 if ( typeof pageOptions . removeTags === "string" ) {
3940 pageOptions . removeTags = [ pageOptions . removeTags ] ;
@@ -51,11 +52,11 @@ export const removeUnwantedElements = (
5152 const attributes = element . attribs ;
5253 const tagNameMatches = regexPattern . test ( element . name ) ;
5354 const attributesMatch = Object . keys ( attributes ) . some ( ( attr ) =>
54- regexPattern . test ( `${ attr } ="${ attributes [ attr ] } "` )
55+ regexPattern . test ( `${ attr } ="${ attributes [ attr ] } "` ) ,
5556 ) ;
5657 if ( tag . startsWith ( "*." ) ) {
5758 classMatch = Object . keys ( attributes ) . some ( ( attr ) =>
58- regexPattern . test ( `class="${ attributes [ attr ] } "` )
59+ regexPattern . test ( `class="${ attributes [ attr ] } "` ) ,
5960 ) ;
6061 }
6162 return tagNameMatches || attributesMatch || classMatch ;
0 commit comments