Main Page   Compound List   File List   Compound Members   File Members  

pcre++.cc

Go to the documentation of this file.
00001 /*
00002  *
00003  *  $Id: pcre++.cc,v 1.2 2002/01/02 01:25:30 zarahg Exp $
00004  * 
00005  *  This file  is part of the  NABOU  Intrusion Detection System.
00006  *
00007  *  By  accessing  this software,  NABOU, you  are  duly informed
00008  *  of and agree to be  bound by the  conditions  described below
00009  *  in this notice:
00010  *
00011  *  This software product,  NABOU,  is developed by Thomas Linden
00012  *  and   copyrighted (C) 1999-2002   by  Thomas Linden, with all
00013  *  rights reserved.
00014  *
00015  *  There  is no charge for NABOU software.  You can redistribute
00016  *  it and/or modify it under the terms of the GNU General Public
00017  *  License, which is incorporated by reference herein.
00018  *
00019  *  NABOU is distributed WITHOUT ANY WARRANTY, IMPLIED OR EXPRESS,
00020  *  OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE or that
00021  *  the use of it will not infringe on any third party's intellec-
00022  *  tual property rights.
00023  *
00024  *  You should have received a copy of the GNU General Public
00025  *  License along with NABOU.  Copies can also be obtained from:
00026  *
00027  *    http://www.gnu.org/copyleft/gpl.html
00028  *
00029  *  or by writing to:
00030  *
00031  *  Free Software Foundation, Inc.
00032  *  59 Temple Place, Suite 330
00033  *  Boston, MA 02111-1307
00034  *  USA
00035  *
00036  *  Or contact:
00037  *
00038  *   "Thomas Linden" <tom@nabou.org>
00039  *
00040  *
00041  */
00042 
00043 #include "pcre++.h"
00044 
00045 Pcre::Pcre(const string& expression) {
00046   _expression   = expression;
00047   _flags        = 0;
00048   case_t = global_t = false;
00049   Compile(0);
00050 }
00051 
00052 Pcre::Pcre(const string& expression, const string& flags) {
00053   _expression   = expression;
00054   unsigned int FLAG = 0;
00055 
00056   for(unsigned int flag=0; flag<flags.length(); flag++) {
00057     switch(flags[flag]) {
00058     case 'i': FLAG |= PCRE_CASELESS;  case_t = true;   break;
00059     case 'm': FLAG |= PCRE_MULTILINE;                  break;
00060     case 's': FLAG |= PCRE_DOTALL;                     break;
00061     case 'x': FLAG |= PCRE_EXTENDED;                   break;
00062     case 'g':                         global_t = true; break;
00063     }
00064   }
00065 
00066   _flags = FLAG;
00067 
00068   Compile(FLAG);
00069 }
00070 
00071 Pcre::Pcre(Pcre &P) {
00072   _expression = P._expression;
00073   _flags      = P._flags;
00074   case_t      = P.case_t;
00075   global_t    = P.global_t;
00076   Compile(_flags);
00077 }
00078 
00079 void Pcre::Compile(int flags) {
00080   /* what happens if p_pcre is already allocated? hm ... */
00081   p_pcre_extra = NULL;
00082   p_pcre       = NULL;
00083   p_pcre       = pcre_compile((char *)_expression.c_str(), flags,
00084                               (const char **)(&err_str), &erroffset, NULL);
00085 
00086   if(p_pcre == NULL) {
00087     /* umh, that's odd, the parser should not fail at all */
00088     string Error = err_str;
00089     throw exception("pcre_compile(..) failed: " + Error);
00090   }
00091 
00092   /* calculate the number of substrings we are willing to catch */
00093   int where;
00094   int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00095   if(info == 0) {
00096     sub_len = (where +2) * 3; /* see "man pcre" for the exact formula */
00097   }
00098   else {
00099     throw exception(info);
00100   }
00101   did_match = false;
00102   num_matches = -1;
00103 }
00104 
00105 const Pcre& Pcre::operator = (const string& expression) {
00106   reset();
00107   Pcre *pcre = new Pcre(expression);
00108   return *pcre;
00109 }
00110 
00111 Pcre::~Pcre() {
00112   pcre_free(p_pcre);
00113   pcre_free(p_pcre_extra);
00114   delete sub_vec;
00115   if(num_matches > 0) /* avoid deleting of uninitialized pointer */
00116     delete resultset;
00117 }
00118 
00119 void Pcre::reset() {
00120   did_match   = false;
00121   num_matches = -1;
00122 }
00123 
00124 bool Pcre::search(const string& stuff, int OffSet) {
00125   return dosearch(stuff, OffSet);
00126 }
00127 
00128 bool Pcre::search(const string& stuff) {
00129   return dosearch(stuff, 0);
00130 }
00131 
00132 bool Pcre::dosearch(const string& stuff, int OffSet) {
00133   reset();
00134   sub_vec = new int[sub_len];
00135   int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00136                         (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00137 
00138   if(num < 0) {
00139     /* no match at all */
00140     return false;
00141   }
00142   else if(num == 0) {
00143     /* vector too small, there were too many substrings in stuff */
00144     return false;
00145   }
00146   else if(num == 1) {
00147     /* we had a match, but without substrings */
00148     did_match = true;
00149     num_matches = 0;
00150     return true;
00151   }
00152   else if(num > 1) {
00153     /* we had matching substrings */
00154     resultset = new Array;
00155     const char **stringlist;
00156     did_match = true;
00157     num_matches = num - 1;
00158 
00159     int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00160     if(res == 0) {
00161       for(int i=1; i<num; i++) {
00162         resultset->push_back(stringlist[i]);
00163       }
00164       pcre_free_substring_list(stringlist);
00165     }
00166     else {
00167       throw exception(res);
00168     }
00169     return true;
00170   }
00171   else {
00172     /* some other uncommon error occured */
00173     return false;
00174   }
00175 }
00176 
00177 Array* Pcre::get_sub_strings() {
00178   if(resultset != NULL)
00179     return resultset;
00180   else
00181     return NULL;
00182 }
00183 
00184 string Pcre::get_match(int pos) {
00185   if(pos >= 0 && pos < num_matches) {
00186     ArrayIterator P = resultset->begin() + pos;
00187     return *P;
00188   }
00189   else {
00190     throw exception("out of range");
00191   }
00192 }
00193 
00194 int Pcre::get_match_start(int pos) {
00195   if(pos >= 0 && pos <= num_matches) {
00196     /*
00197      * sub_vec[0] and [1] is the start/end of the entire string.
00198      */
00199     return sub_vec[ (++pos) * 2 ];
00200   }
00201   else {
00202     throw exception("out of range");
00203   }  
00204 }
00205 
00206 int Pcre::get_match_end(int pos) {
00207   if(pos >= 0 && pos <= num_matches) {
00208     /*
00209      * the end offset of a subpattern points to
00210      * the first offset of the next substring,
00211      * therefore -1
00212      */
00213     return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00214   }
00215   else {
00216     throw exception("out of range");
00217   }
00218 }
00219 
00220 size_t Pcre::get_match_length(int pos) {
00221   if(pos >= 0 && pos < num_matches) {
00222     ArrayIterator P = resultset->begin() + pos;
00223     return P->length();
00224   }
00225   else {
00226     throw exception("out of range");
00227   }
00228 }
00229 
00230 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00231   Array Splitted;
00232   /* _expression will be used as delimiter */
00233   if(_expression.length() == 1) {
00234     /* use the plain c++ way, ignore the pre-compiled p_pcre */
00235     string buffer, _delimiter, _piece;
00236     char z;
00237     if(case_t) {
00238       z = toupper(_expression[0]);
00239       for(size_t pos=0; pos < piece.length(); pos++) {
00240         _piece += (char)toupper(piece[pos]);
00241       }
00242     }
00243     else {
00244       z = _expression[0];
00245       _piece = piece;
00246     }
00247     for(size_t pos=0; pos<piece.length(); pos++) {
00248       if(_piece[pos] == z) {
00249         Splitted.push_back(buffer);
00250         buffer = "";
00251       }
00252       else {
00253         buffer += piece[pos];
00254       }
00255     }
00256     if(buffer != "") {
00257       Splitted.push_back(buffer);
00258     }
00259   }
00260   else {
00261     /* use the regex way */
00262     if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00263       /* oh, oh - the pre-compiled expression does not contain brackets */
00264       pcre_free(p_pcre);
00265       pcre_free(p_pcre_extra);
00266       
00267       pcre       *_p = NULL;
00268       pcre_extra *_e = NULL;;
00269 
00270       p_pcre = _p;
00271       p_pcre_extra = _e;
00272 
00273       _expression = "(" + _expression + ")";
00274       Compile(_flags);
00275     }
00276     int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00277     for(;;) {
00278       if(search(piece, pos) == true) {
00279         if(matches() > 0) {
00280           piece_end   = get_match_start(0) - 1;
00281           piece_start = pos;
00282           pos = piece_end + 1 + get_match_length(0);
00283           string junk(piece, piece_start, (piece_end - piece_start)+1);
00284           num_pieces++;
00285           if( (limit != 0 && num_pieces < limit) || limit == 0) {
00286             if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00287               if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00288                 /* we are within the allowed range, so just add the grab */
00289                 Splitted.push_back(junk);
00290               }
00291             }
00292           }
00293         }
00294       }
00295       else {
00296         /* the rest of the string, there are no more delimiters */
00297         string junk(piece, pos, (piece.length() - pos));
00298         num_pieces++;
00299         if( (limit != 0 && num_pieces < limit) || limit == 0) {
00300           if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00301             if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00302               /* we are within the allowed range, so just add the grab */
00303               Splitted.push_back(junk);
00304             }
00305           }
00306         }
00307         break;
00308       }
00309     } // for()
00310   } // if(_expression.length()
00311   return Splitted;
00312 }
00313 
00314 Array Pcre::split(const string& piece) {
00315   return _split(piece, 0, 0, 0);
00316 }
00317 
00318 Array Pcre::split(const string& piece, int limit) {
00319   return _split(piece, limit, 0, 0);
00320 }
00321 
00322 Array Pcre::split(const string& piece, int limit, int start_offset) {
00323   return _split(piece, limit, start_offset, 0);
00324 }
00325 
00326 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00327   return _split(piece, limit, start_offset, end_offset);
00328 }
00329 
00330 Array Pcre::split(const string& piece, vector<int> positions) {
00331   Array PreSplitted = _split(piece, 0, 0, 0);
00332   Array Splitted;
00333   for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00334     Splitted.push_back(PreSplitted[*vecIt]);
00335   }
00336   return Splitted;
00337 }
00338 
00339 
00340 string Pcre::replace(const string& piece, const string& with) {
00341   string Replaced(piece);
00342 
00343   if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00344     /* oh, oh - the pre-compiled expression does not contain brackets */
00345     _expression = "(" + _expression + ")";
00346     /* recreate the p_pcre* objects to avoid memory leaks */
00347     pcre_free(p_pcre);
00348     pcre_free(p_pcre_extra);
00349       
00350     pcre       *_p = NULL;
00351     pcre_extra *_e = NULL;;
00352 
00353     p_pcre = _p;
00354     p_pcre_extra = _e;
00355 
00356     _expression = "(" + _expression + ")";
00357     Compile(_flags);
00358   }
00359 
00360   if(search(piece)) {
00361     /* we found at least one match */
00362     string use_with = _replace_vars(with);
00363     if(!global_t) {
00364       /* only once */
00365       if(matched() && matches() >= 1) {
00366         Replaced.replace(get_match_start(0), (get_match_end(0) - get_match_start(0)) + 1, use_with);
00367       }
00368     }
00369     else {
00370       /* multiple times */
00371       Array Splitted = split(piece);
00372       Replaced = "";
00373       for(size_t pos=0; pos < Splitted.size(); pos++) {
00374         if(pos == (Splitted.size() - 1))
00375           Replaced += Splitted[pos];
00376         else
00377           Replaced += Splitted[pos] + with;
00378       }
00379     }
00380   }
00381   return Replaced;
00382 }
00383 
00384 string Pcre::_replace_vars(const string& piece) {
00385   Pcre dollar("\\$[0-9]+");
00386   string with = piece;
00387   if(dollar.search(with)) {
00388     for(int index=0; index < num_matches; index++) {
00389       /* do it for each existing sub string */
00390       string sub   = get_match(index); // what "$1" resulted
00391       ostringstream num(index+1);
00392       Pcre subsplit(string("(\\$") + num.str() + ")"); // "\\$1"
00393       Array splitted = subsplit.split(with); // normally 2 (or more) parts, the one in front of and the other one after "$1"
00394       string Replaced;
00395       for(size_t pos=0; pos < splitted.size(); pos++) {
00396         if(pos == (splitted.size() - 1))
00397           Replaced += splitted[pos];
00398         else
00399           Replaced += splitted[pos] + sub;
00400       }
00401       with = Replaced; // well, one part is done
00402     }
00403     return with;
00404   }
00405   else {
00406     /* hm, no $[0-9]+ stuff, so just return it untouched */
00407     return with;
00408   }
00409 }

Generated on Sun Jan 6 16:23:44 2002 for PCRE++ by doxygen1.2.13.1 written by Dimitri van Heesch, © 1997-2001