Projects Publications Resume Contact About Youtube |
|
char *strstr(const char *haystack, const char *needle); char *strcasestr(const char *haystack, const char *needle);My implementation tries to reproduce the behavior of these two functions as closely as possible using regular expressions.
extern char *regex_strstr(const char *haystack, const char *needle, char **end); extern char *regex_strcasestr(const char *haystack, const char *needle, char **end);
#include <sys/types.h> #include <regex.h> #include <stdlib.h> #include "regex_strstr.h" static char *regex_search(const char *haystack, const char *needle, char **end, int cflags) { char *start = NULL; *end = NULL; regex_t preg; regmatch_t pmatch; if (!haystack) return NULL; if (regcomp(&preg, needle, cflags)) { regfree(&preg); return NULL; } if (!regexec(&preg, haystack, 1, &pmatch, 0)) if ((haystack + pmatch.rm_so) != NULL) { start = (char*)haystack + pmatch.rm_so; *end = (char*)haystack + pmatch.rm_eo; } regfree(&preg); return start; } char *regex_strstr(const char *haystack, const char *needle, char **end) { int cflags = REG_EXTENDED; return regex_search(haystack, needle, end, cflags); } char *regex_strcasestr(const char *haystack, const char *needle, char **end) { int cflags = REG_ICASE | REG_EXTENDED; return regex_search(haystack, needle, end, cflags); }
hay <needle> hay < neEdle > hayNow consider the following needle regular expression:
<[ \t]*needle[ \t]*>The following code in test.c will find both instances:
#include <stdio.h> #include <stdlib.h> #include "regex_strstr.h" int main() { char *haystack = "hay\n<needle>\nhay\n< neEdle >\nhay"; char *start = haystack; char *end = NULL; char *needle = "<[ \t]*needle[ \t]*>"; while(start = regex_strcasestr(start, needle, &end)) { printf("needle %.*s with %d characters found at offset %d", end-start, start, end-start, start-haystack); printf(" with start[0]:%c and end[0]:%c\n", start[0], end[0]); start = end; } return 0; }The code will produce the following output:
needle <needle> with 8 characters found at offset 4 with start[0]:< and end[0]:\n needle < neEdle > with 16 characters found at offset 17 with start[0]:< and end[0]:\n