regular expression to read the string between <title> and </title>
Posted
by user262325
on Stack Overflow
See other posts from Stack Overflow
or by user262325
Published on 2010-03-30T14:36:49Z
Indexed on
2010/03/30
14:43 UTC
Read the original article
Hit count: 427
iphone
Hello every one
I hope to read the contents between and in a html string.
I think it should be in objective-c
@"<title([\\s\\S]*)</title>"
below are the codes that rewrited for regular expression
//source of NSStringCategory.h
#import <Foundation/Foundation.h>
#import <regex.h>
@interface NSStringCategory:NSObject
{
regex_t preg;
}
-(id)initWithPattern:(NSString *)pattern options:(int)options;
-(void)dealloc;
-(BOOL)matchesString:(NSString *)string;
-(NSString *)matchedSubstringOfString:(NSString *)string;
-(NSArray *)capturedSubstringsOfString:(NSString *)string;
+(NSStringCategory *)regexWithPattern:(NSString *)pattern options:(int)options;
+(NSStringCategory *)regexWithPattern:(NSString *)pattern;
+(NSString *)null;
+(void)initialize;
@end
@interface NSString (NSStringCategory)
-(BOOL)matchedByPattern:(NSString *)pattern options:(int)options;
-(BOOL)matchedByPattern:(NSString *)pattern;
-(NSString *)substringMatchedByPattern:(NSString *)pattern options:(int)options;
-(NSString *)substringMatchedByPattern:(NSString *)pattern;
-(NSArray *)substringsCapturedByPattern:(NSString *)pattern options:(int)options;
-(NSArray *)substringsCapturedByPattern:(NSString *)pattern;
-(NSString *)escapedPattern;
@end
and .m file
#import "NSStringCategory.h"
static NSString *nullstring=nil;
@implementation NSStringCategory
-(id)initWithPattern:(NSString *)pattern options:(int)options
{
if(self=[super init])
{
int err=regcomp(&preg,[pattern UTF8String],options|REG_EXTENDED);
if(err)
{
char errbuf[256];
regerror(err,&preg,errbuf,sizeof(errbuf));
[NSException raise:@"CSRegexException"
format:@"Could not compile regex \"%@\": %s",pattern,errbuf];
}
}
return self;
}
-(void)dealloc
{
regfree(&preg);
[super dealloc];
}
-(BOOL)matchesString:(NSString *)string
{
if(regexec(&preg,[string UTF8String],0,NULL,0)==0) return YES;
return NO;
}
-(NSString *)matchedSubstringOfString:(NSString *)string
{
const char *cstr=[string UTF8String];
regmatch_t match;
if(regexec(&preg,cstr,1,&match,0)==0)
{
return [[[NSString alloc] initWithBytes:cstr+match.rm_so
length:match.rm_eo-match.rm_so encoding:NSUTF8StringEncoding] autorelease];
}
return nil;
}
-(NSArray *)capturedSubstringsOfString:(NSString *)string
{
const char *cstr=[string UTF8String];
int num=preg.re_nsub+1;
regmatch_t *matches=calloc(sizeof(regmatch_t),num);
if(regexec(&preg,cstr,num,matches,0)==0)
{
NSMutableArray *array=[NSMutableArray arrayWithCapacity:num];
int i;
for(i=0;i<num;i++)
{
NSString *str;
if(matches[i].rm_so==-1&&matches[i].rm_eo==-1) str=nullstring;
else str=[[[NSString alloc] initWithBytes:cstr+matches[i].rm_so
length:matches[i].rm_eo-matches[i].rm_so encoding:NSUTF8StringEncoding] autorelease];
[array addObject:str];
}
free(matches);
return [NSArray arrayWithArray:array];
}
free(matches);
return nil;
}
+(NSStringCategory *)regexWithPattern:(NSString *)pattern options:(int)options
{ return [[[NSStringCategory alloc] initWithPattern:pattern options:options] autorelease]; }
+(NSStringCategory *)regexWithPattern:(NSString *)pattern
{ return [[[NSStringCategory alloc] initWithPattern:pattern options:0] autorelease]; }
+(NSString *)null { return nullstring; }
+(void)initialize
{
if(!nullstring) nullstring=[[NSString alloc] initWithString:@""];
}
@end
@implementation NSString (NSStringCategory)
-(BOOL)matchedByPattern:(NSString *)pattern options:(int)options
{
NSStringCategory *re=[NSStringCategory regexWithPattern:pattern options:options|REG_NOSUB];
return [re matchesString:self];
}
-(BOOL)matchedByPattern:(NSString *)pattern
{ return [self matchedByPattern:pattern options:0]; }
-(NSString *)substringMatchedByPattern:(NSString *)pattern options:(int)options
{
NSStringCategory *re=[NSStringCategory regexWithPattern:pattern options:options];
return [re matchedSubstringOfString:self];
}
-(NSString *)substringMatchedByPattern:(NSString *)pattern
{ return [self substringMatchedByPattern:pattern options:0]; }
-(NSArray *)substringsCapturedByPattern:(NSString *)pattern options:(int)options
{
NSStringCategory *re=[NSStringCategory regexWithPattern:pattern options:options];
return [re capturedSubstringsOfString:self];
}
-(NSArray *)substringsCapturedByPattern:(NSString *)pattern
{ return [self substringsCapturedByPattern:pattern options:0]; }
-(NSString *)escapedPattern
{
int len=[self length];
NSMutableString *escaped=[NSMutableString stringWithCapacity:len];
for(int i=0;i<len;i++)
{
unichar c=[self characterAtIndex:i];
if(c=='^'||c=='.'||c=='['||c=='$'||c=='('||c==')'
||c=='|'||c=='*'||c=='+'||c=='?'||c=='{'||c=='\\') [escaped appendFormat:@"\\%C",c];
else [escaped appendFormat:@"%C",c];
}
return [NSString stringWithString:escaped];
}
@end
I use the codes below to get the string between "" and ""
NSStringCategory *a=[[NSStringCategory alloc] initWithPattern:@"<title([\s\S]*)</title>" options:0];//
Unfortunately [a matchedSubstringOfString:response]] always returns nil
I do not if the regular expression is wrong or any other reason.
Welcome any comment
Thanks
interdev
© Stack Overflow or respective owner