1010typedef BOOL (^HTMLSelectorPredicate)(HTMLElement *node);
1111typedef HTMLSelectorPredicate HTMLSelectorPredicateGen;
1212
13+ static HTMLSelectorPredicate ScanSelectorPredicate (NSScanner *scanner, NSError **error);
1314static HTMLSelectorPredicate SelectorFunctionForString (NSString *selectorString, NSError **error);
1415
1516static NSError * ParseError (NSString *reason, NSString *string, NSUInteger position)
@@ -67,6 +68,18 @@ HTMLSelectorPredicateGen bothCombinatorPredicate(HTMLSelectorPredicate a, HTMLSe
6768 };
6869}
6970
71+ HTMLSelectorPredicateGen eitherCombinatorPredicate (HTMLSelectorPredicate a, HTMLSelectorPredicate b)
72+ {
73+ // There was probably an error somewhere else
74+ // in parsing, so return nil here
75+ if (!a && !b) return nil ;
76+
77+ return ^BOOL (HTMLElement *node)
78+ {
79+ return a (node) || b (node);
80+ };
81+ }
82+
7083HTMLSelectorPredicateGen andCombinatorPredicate (NSArray *predicates)
7184{
7285 return ^(HTMLElement *node) {
@@ -316,6 +329,73 @@ HTMLSelectorPredicateGen isLastChildOfTypePredicate(HTMLSelectorPredicate typePr
316329 return isNthChildOfTypePredicate (HTMLNthExpressionMake (0 , 1 ), typePredicate, YES );
317330}
318331
332+ HTMLSelectorPredicateGen isContainsStringPredicate (NSString * str)
333+ {
334+ return ^BOOL (HTMLElement *node) {
335+ return [[node textContent ] containsString: str];
336+ };
337+ }
338+
339+ // This is just to check if it has a child matching insideAfter predicate
340+ HTMLSelectorPredicateGen isAfterTagPredicate (HTMLSelectorPredicate insideAfter)
341+ {
342+ return ^BOOL (HTMLElement *node) {
343+
344+ for (HTMLElement *child in node.childElementNodes ) {
345+ if (insideAfter (child)) {
346+ return YES ;
347+ }
348+ }
349+ return NO ;
350+ };
351+ }
352+
353+ // This is just to check if there are two childres matching left and right predicate
354+ HTMLSelectorPredicateGen isBetweenTagPredicate (HTMLSelectorPredicate left, HTMLSelectorPredicate right)
355+ {
356+ return ^BOOL (HTMLElement *node) {
357+ BOOL leftRet = YES ;
358+ for (HTMLElement *child in node.childElementNodes ) {
359+ if (left (child)) {
360+ leftRet = YES ;
361+ continue ;
362+ }
363+ if (right (child) && leftRet) {
364+ return YES ;
365+ }
366+ }
367+ return NO ;
368+ };
369+ }
370+
371+ // For enumerate all child to see if anyone matches childPredicate
372+ HTMLSelectorPredicateGen hasPredicate (HTMLSelectorPredicate childPredicate)
373+ {
374+ if (!childPredicate) return nil ;
375+
376+ return ^BOOL (HTMLElement *node) {
377+ NSMutableArray *queue = [node.childElementNodes mutableCopy ];
378+
379+ while (queue.count > 0 )
380+ {
381+ HTMLElement *curNode = [queue firstObject ];
382+ if (childPredicate (curNode))
383+ {
384+ return YES ;
385+ }
386+ else
387+ {
388+ for (HTMLElement *child in curNode.childElementNodes ) {
389+ [queue addObject: child];
390+ }
391+ }
392+ [queue removeObjectAtIndex: 0 ];
393+ }
394+
395+ return NO ;
396+ };
397+ }
398+
319399#pragma mark Attribute Helpers
320400
321401HTMLSelectorPredicateGen isKindOfClassPredicate (NSString *classname)
@@ -457,6 +537,20 @@ HTMLSelectorPredicateGen isRootPredicate(void)
457537 return nil ;
458538 }
459539
540+ // TODO: Handle nested brackets more robustly.
541+ // Get the nested interior if there are nested brackets ..(..(..)..)..
542+ // Count the number of ( between the first ( and the first ) and scan the same number of )
543+ NSInteger times = [[interior componentsSeparatedByString: @" (" ] count ]-1 ;
544+ while (times > 0 && scanner.scanLocation )
545+ {
546+ [scanner scanString: @" )" intoString: nil ];
547+ NSString *interior2 = nil ;
548+ [scanner scanUpToString: @" )" intoString: &interior2];
549+ interior = [interior stringByAppendingString: @" )" ];
550+ interior = interior2 != nil ? [interior stringByAppendingString: interior2] : interior;
551+ times--;
552+ }
553+
460554 [scanner scanString: @" )" intoString: nil ];
461555 return interior;
462556}
@@ -541,11 +635,51 @@ static HTMLSelectorPredicateGen scanPredicateFromPseudoClass(NSScanner *scanner,
541635 return isNthChildOfTypePredicate (nth, typePredicate, YES );
542636 }
543637 }
638+ else if ([pseudo isEqualToString: @" contains" ]){
639+ NSString *interior = scanFunctionInterior (scanner, error);
640+
641+ if (!interior) return nil ;
642+ NSString *str = [interior stringByTrimmingCharactersInSet: [NSCharacterSet characterSetWithCharactersInString: @" '" ]];
643+ return isContainsStringPredicate (str);
644+ }
544645 else if ([pseudo isEqualToString: @" not" ]) {
545646 NSString *toNegateString = scanFunctionInterior (scanner, error);
546647 HTMLSelectorPredicate toNegate = SelectorFunctionForString (toNegateString, error);
547648 return negatePredicate (toNegate);
548649 }
650+ else if ([pseudo isEqualToString: @" has" ]) {
651+ NSString *interior = scanFunctionInterior (scanner, error);
652+ if (!interior) return nil ;
653+ HTMLSelectorPredicate insideHas = SelectorFunctionForString (interior, error);
654+
655+ return hasPredicate (insideHas);
656+ }
657+ else if ([pseudo isEqualToString: @" after" ] || [pseudo isEqualToString: @" before" ]) {
658+ NSString *interior = scanFunctionInterior (scanner, error);
659+ if (!interior) return nil ;
660+ HTMLSelectorPredicate insideAfter = SelectorFunctionForString (interior, error);
661+
662+ return isAfterTagPredicate (insideAfter);
663+ }
664+ else if ([pseudo isEqualToString: @" between" ]) {
665+ NSString *interior = scanFunctionInterior (scanner, error);
666+ if (!interior) return nil ;
667+ NSCharacterSet *whitespace = [NSCharacterSet whitespaceAndNewlineCharacterSet ];
668+ NSArray *valueSplit = [[interior stringByTrimmingCharactersInSet: whitespace] componentsSeparatedByCharactersInSet: [NSCharacterSet characterSetWithCharactersInString: @" ;" ]];
669+
670+ if (valueSplit.count != 2 )
671+ {
672+
673+ NSLog (@" INFO: Number of strings after split by ';' is not 2 : %lu " , (unsigned long )valueSplit.count );
674+ return neverPredicate ();
675+ }
676+
677+ HTMLSelectorPredicate left = SelectorFunctionForString (valueSplit[0 ] , error);
678+
679+ HTMLSelectorPredicate right = SelectorFunctionForString (valueSplit[1 ] , error);
680+
681+ return isBetweenTagPredicate (left, right);
682+ }
549683
550684 *error = ParseError (@" Unrecognized pseudo class" , scanner.string , scanner.scanLocation );
551685 return nil ;
@@ -577,7 +711,7 @@ static HTMLSelectorPredicateGen scanPredicateFromPseudoClass(NSScanner *scanner,
577711 static dispatch_once_t onceToken;
578712 dispatch_once (&onceToken, ^{
579713 // Combinators are: whitespace, "greater-than sign" (U+003E, >), "plus sign" (U+002B, +) and "tilde" (U+007E, ~)
580- NSMutableCharacterSet *set = [NSMutableCharacterSet characterSetWithCharactersInString: @" >+~" ];
714+ NSMutableCharacterSet *set = [NSMutableCharacterSet characterSetWithCharactersInString: @" , >+~" ];
581715 [set formUnionWithCharacterSet: HTMLSelectorWhitespaceCharacterSet ()];
582716 frozenSet = [set copy ];
583717 });
@@ -733,7 +867,9 @@ HTMLSelectorPredicateGen scanPredicate(NSScanner *scanner, HTMLSelectorPredicate
733867 // Whitespace combinator
734868 // y descendant of an x
735869 return descendantOfPredicate (inputPredicate);
736- } else if ([combinator isEqualToString: @" >" ]) {
870+ } else if ([combinator isEqualToString: @" ," ]) {
871+ return eitherCombinatorPredicate (inputPredicate, ScanSelectorPredicate (scanner, error));
872+ } else if ([combinator isEqualToString: @" >" ]) {
737873 return childOfOtherPredicatePredicate (inputPredicate);
738874 } else if ([combinator isEqualToString: @" +" ]) {
739875 return adjacentSiblingPredicate (inputPredicate);
@@ -750,6 +886,20 @@ HTMLSelectorPredicateGen scanPredicate(NSScanner *scanner, HTMLSelectorPredicate
750886 }
751887}
752888
889+ static HTMLSelectorPredicate ScanSelectorPredicate (NSScanner *scanner, NSError **error)
890+ {
891+ // Scan out predicate parts and combine them
892+ HTMLSelectorPredicate lastPredicate = nil ;
893+
894+ do {
895+ lastPredicate = scanPredicate (scanner, lastPredicate, error);
896+ } while (lastPredicate && ![scanner isAtEnd ] && !*error);
897+
898+ NSCAssert (lastPredicate || *error, @" Need either a predicate or error at this point" );
899+
900+ return lastPredicate;
901+ }
902+
753903static HTMLSelectorPredicate SelectorFunctionForString (NSString *selectorString, NSError **error)
754904{
755905 // Trim non-functional whitespace
@@ -765,16 +915,7 @@ static HTMLSelectorPredicate SelectorFunctionForString(NSString *selectorString,
765915 scanner.caseSensitive = NO ; // Section 3 states that in HTML parsing, selectors are case-insensitive
766916 scanner.charactersToBeSkipped = nil ;
767917
768- // Scan out predicate parts and combine them
769- HTMLSelectorPredicate lastPredicate = nil ;
770-
771- do {
772- lastPredicate = scanPredicate (scanner, lastPredicate, error);
773- } while (lastPredicate && ![scanner isAtEnd ] && !*error);
774-
775- NSCAssert (lastPredicate || *error, @" Need either a predicate or error at this point" );
776-
777- return lastPredicate;
918+ return ScanSelectorPredicate (scanner, error);
778919}
779920
780921@interface HTMLSelector ()
@@ -858,12 +999,87 @@ - (HTMLElement *)firstNodeMatchingParsedSelector:(HTMLSelector *)selector
858999
8591000 for (HTMLElement *node in self.treeEnumerator ) {
8601001 if ([node isKindOfClass: [HTMLElement class ]] && [selector matchesElement: node]) {
1002+ // Return children before the predicate inside before(...)
1003+ if ([selector.string containsString: @" :before(" ]) {
1004+ NSString *interior = [self stringBetweenString: @" :before(" andString: @" )" withString: selector.string];
1005+ NSError *error;
1006+ HTMLSelectorPredicate predicate = SelectorFunctionForString (interior, &error);
1007+ HTMLElement *mutableNode = [node copy ];
1008+ for (HTMLNode *mNode in node.children ) {
1009+ if ([mNode isKindOfClass: [HTMLElement class ]] && predicate ((HTMLElement *)mNode)) {
1010+ break ;
1011+ }
1012+ [mutableNode.mutableChildren addObject: mNode];
1013+ }
1014+ return mutableNode;
1015+ }
1016+
1017+ // Return children after the predicate inside after(...)
1018+ if ([selector.string containsString: @" :after(" ]) {
1019+ NSString *interior = [self stringBetweenString: @" :after(" andString: @" )" withString: selector.string];
1020+ NSError *error;
1021+ HTMLSelectorPredicate predicate = SelectorFunctionForString (interior, &error);
1022+ HTMLElement *mutableNode = [node copy ];
1023+ BOOL shouldAdd = NO ;
1024+ for (HTMLNode *mNode in node.children ) {
1025+ if (shouldAdd) {
1026+ [mutableNode.mutableChildren addObject: mNode];
1027+ continue ;
1028+ }
1029+ if ([mNode isKindOfClass: [HTMLElement class ]] && predicate ((HTMLElement *)mNode)) {
1030+ shouldAdd = YES ;
1031+ }
1032+ }
1033+ return mutableNode;
1034+ }
1035+
1036+ // Return children between the predicate inside between(...)
1037+ if ([selector.string containsString: @" :between(" ]) {
1038+ NSString *interior = [self stringBetweenString: @" :between(" andString: @" )" withString: selector.string];
1039+ NSError *error;
1040+
1041+ NSCharacterSet *whitespace = [NSCharacterSet whitespaceAndNewlineCharacterSet ];
1042+ NSArray *valueSplit = [[interior stringByTrimmingCharactersInSet: whitespace] componentsSeparatedByCharactersInSet: [NSCharacterSet characterSetWithCharactersInString: @" ;" ]];
1043+
1044+ HTMLSelectorPredicate left = SelectorFunctionForString (valueSplit[0 ] , &error);
1045+ HTMLSelectorPredicate right = SelectorFunctionForString (valueSplit[1 ] , &error);
1046+ HTMLElement *mutableNode = [node copy ];
1047+ BOOL shouldAdd = NO ;
1048+ for (HTMLNode *mNode in node.children ) {
1049+ if (shouldAdd && [mNode isKindOfClass: [HTMLElement class ]] && right ((HTMLElement *)mNode)) {
1050+ break ;
1051+ }
1052+ if (shouldAdd) {
1053+ [mutableNode.mutableChildren addObject: mNode];
1054+ continue ;
1055+ }
1056+ if ([mNode isKindOfClass: [HTMLElement class ]] && left ((HTMLElement *)mNode)) {
1057+ shouldAdd = YES ;
1058+ }
1059+ }
1060+ return mutableNode;
1061+ }
1062+
8611063 return node;
8621064 }
8631065 }
8641066 return nil ;
8651067}
8661068
1069+ -(NSString *)stringBetweenString : (NSString *)start andString : (NSString *)end withString : (NSString *)str
1070+ {
1071+ NSScanner * scanner = [NSScanner scannerWithString: str];
1072+ [scanner setCharactersToBeSkipped: nil ];
1073+ [scanner scanUpToString: start intoString: NULL ];
1074+ if ([scanner scanString: start intoString: NULL ]) {
1075+ NSString * result = nil ;
1076+ if ([scanner scanUpToString: end intoString: &result]) {
1077+ return result;
1078+ }
1079+ }
1080+ return nil ;
1081+ }
1082+
8671083@end
8681084
8691085HTMLNthExpression HTMLNthExpressionMake (NSInteger n, NSInteger c)
0 commit comments