00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #include "pcre++.h"
00045
00046 Pcre::Pcre(const string& expression) {
00047 _expression = expression;
00048 _flags = 0;
00049 case_t = global_t = false;
00050 Compile(0);
00051 }
00052
00053 Pcre::Pcre(const string& expression, const string& flags) {
00054 _expression = expression;
00055 unsigned int FLAG = 0;
00056
00057 for(unsigned int flag=0; flag<flags.length(); flag++) {
00058 switch(flags[flag]) {
00059 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00060 case 'm': FLAG |= PCRE_MULTILINE; break;
00061 case 's': FLAG |= PCRE_DOTALL; break;
00062 case 'x': FLAG |= PCRE_EXTENDED; break;
00063 case 'g': global_t = true; break;
00064 }
00065 }
00066
00067 _flags = FLAG;
00068
00069 Compile(FLAG);
00070 }
00071
00072 Pcre::Pcre(Pcre &P) {
00073 _expression = P._expression;
00074 _flags = P._flags;
00075 case_t = P.case_t;
00076 global_t = P.global_t;
00077 Compile(_flags);
00078 }
00079
00080 void Pcre::Compile(int flags) {
00081
00082 p_pcre_extra = NULL;
00083 p_pcre = NULL;
00084 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00085 (const char **)(&err_str), &erroffset, NULL);
00086
00087 if(p_pcre == NULL) {
00088
00089 string Error = err_str;
00090 throw exception("pcre_compile(..) failed: " + Error);
00091 }
00092
00093
00094 int where;
00095 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00096 if(info == 0) {
00097 sub_len = (where +2) * 3;
00098 }
00099 else {
00100 throw exception(info);
00101 }
00102 did_match = false;
00103 num_matches = -1;
00104 }
00105
00106 const Pcre& Pcre::operator = (const string& expression) {
00107 reset();
00108 Pcre *pcre = new Pcre(expression);
00109 return *pcre;
00110 }
00111
00112 Pcre::~Pcre() {
00113 pcre_free(p_pcre);
00114 pcre_free(p_pcre_extra);
00115 delete sub_vec;
00116 if(num_matches > 0)
00117 delete resultset;
00118 }
00119
00120 void Pcre::reset() {
00121 did_match = false;
00122 num_matches = -1;
00123 }
00124
00125 bool Pcre::search(const string& stuff, int OffSet) {
00126 return dosearch(stuff, OffSet);
00127 }
00128
00129 bool Pcre::search(const string& stuff) {
00130 return dosearch(stuff, 0);
00131 }
00132
00133 bool Pcre::dosearch(const string& stuff, int OffSet) {
00134 reset();
00135 sub_vec = new int[sub_len];
00136 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00137 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00138
00139 if(num < 0) {
00140
00141 return false;
00142 }
00143 else if(num == 0) {
00144
00145 return false;
00146 }
00147 else if(num == 1) {
00148
00149 did_match = true;
00150 num_matches = 0;
00151 return true;
00152 }
00153 else if(num > 1) {
00154
00155 resultset = new Array;
00156 const char **stringlist;
00157 did_match = true;
00158 num_matches = num - 1;
00159
00160 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00161 if(res == 0) {
00162 for(int i=1; i<num; i++) {
00163 resultset->push_back(stringlist[i]);
00164 }
00165 pcre_free_substring_list(stringlist);
00166 }
00167 else {
00168 throw exception(res);
00169 }
00170 return true;
00171 }
00172 else {
00173
00174 return false;
00175 }
00176 }
00177
00178 Array* Pcre::get_sub_strings() {
00179 if(resultset != NULL)
00180 return resultset;
00181 else
00182 return NULL;
00183 }
00184
00185 string Pcre::get_match(int pos) {
00186 if(pos >= 0 && pos < num_matches) {
00187 ArrayIterator P = resultset->begin() + pos;
00188 return *P;
00189 }
00190 else {
00191 throw exception("out of range");
00192 }
00193 }
00194
00195 int Pcre::get_match_start(int pos) {
00196 if(pos >= 0 && pos <= num_matches) {
00197
00198
00199
00200 return sub_vec[ (++pos) * 2 ];
00201 }
00202 else {
00203 throw exception("out of range");
00204 }
00205 }
00206
00207 int Pcre::get_match_end(int pos) {
00208 if(pos >= 0 && pos <= num_matches) {
00209
00210
00211
00212
00213
00214 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00215 }
00216 else {
00217 throw exception("out of range");
00218 }
00219 }
00220
00221 size_t Pcre::get_match_length(int pos) {
00222 if(pos >= 0 && pos < num_matches) {
00223 ArrayIterator P = resultset->begin() + pos;
00224 return P->length();
00225 }
00226 else {
00227 throw exception("out of range");
00228 }
00229 }
00230
00231 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00232 Array Splitted;
00233
00234 if(_expression.length() == 1) {
00235
00236 string buffer, _delimiter, _piece;
00237 char z;
00238 if(case_t) {
00239 z = toupper(_expression[0]);
00240 for(size_t pos=0; pos < piece.length(); pos++) {
00241 _piece += (char)toupper(piece[pos]);
00242 }
00243 }
00244 else {
00245 z = _expression[0];
00246 _piece = piece;
00247 }
00248 for(size_t pos=0; pos<piece.length(); pos++) {
00249 if(_piece[pos] == z) {
00250 Splitted.push_back(buffer);
00251 buffer = "";
00252 }
00253 else {
00254 buffer += piece[pos];
00255 }
00256 }
00257 if(buffer != "") {
00258 Splitted.push_back(buffer);
00259 }
00260 }
00261 else {
00262
00263 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00264
00265 pcre_free(p_pcre);
00266 pcre_free(p_pcre_extra);
00267
00268 pcre *_p = NULL;
00269 pcre_extra *_e = NULL;;
00270
00271 p_pcre = _p;
00272 p_pcre_extra = _e;
00273
00274 _expression = "(" + _expression + ")";
00275 Compile(_flags);
00276 }
00277 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00278 for(;;) {
00279 if(search(piece, pos) == true) {
00280 if(matches() > 0) {
00281 piece_end = get_match_start(0) - 1;
00282 piece_start = pos;
00283 pos = piece_end + 1 + get_match_length(0);
00284 string junk(piece, piece_start, (piece_end - piece_start)+1);
00285 num_pieces++;
00286 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00287 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00288 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00289
00290 Splitted.push_back(junk);
00291 }
00292 }
00293 }
00294 }
00295 }
00296 else {
00297
00298 string junk(piece, pos, (piece.length() - pos));
00299 num_pieces++;
00300 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00301 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00302 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00303
00304 Splitted.push_back(junk);
00305 }
00306 }
00307 }
00308 break;
00309 }
00310 }
00311 }
00312 return Splitted;
00313 }
00314
00315 Array Pcre::split(const string& piece) {
00316 return _split(piece, 0, 0, 0);
00317 }
00318
00319 Array Pcre::split(const string& piece, int limit) {
00320 return _split(piece, limit, 0, 0);
00321 }
00322
00323 Array Pcre::split(const string& piece, int limit, int start_offset) {
00324 return _split(piece, limit, start_offset, 0);
00325 }
00326
00327 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00328 return _split(piece, limit, start_offset, end_offset);
00329 }
00330
00331 Array Pcre::split(const string& piece, vector<int> positions) {
00332 Array PreSplitted = _split(piece, 0, 0, 0);
00333 Array Splitted;
00334 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00335 Splitted.push_back(PreSplitted[*vecIt]);
00336 }
00337 return Splitted;
00338 }
00339
00340
00341 string Pcre::replace(const string& piece, const string& with) {
00342 string Replaced(piece);
00343
00344 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00345
00346 _expression = "(" + _expression + ")";
00347
00348 pcre_free(p_pcre);
00349 pcre_free(p_pcre_extra);
00350
00351 pcre *_p = NULL;
00352 pcre_extra *_e = NULL;;
00353
00354 p_pcre = _p;
00355 p_pcre_extra = _e;
00356
00357 _expression = "(" + _expression + ")";
00358 Compile(_flags);
00359 }
00360
00361 if(search(piece)) {
00362
00363 string use_with = _replace_vars(with);
00364 if(!global_t) {
00365
00366 if(matched() && matches() >= 1) {
00367 Replaced.replace(get_match_start(0), (get_match_end(0) - get_match_start(0)) + 1, use_with);
00368 }
00369 }
00370 else {
00371
00372 Array Splitted = split(piece);
00373 Replaced = "";
00374 for(size_t pos=0; pos < Splitted.size(); pos++) {
00375 if(pos == (Splitted.size() - 1))
00376 Replaced += Splitted[pos];
00377 else
00378 Replaced += Splitted[pos] + with;
00379 }
00380 }
00381 }
00382 return Replaced;
00383 }
00384
00385 string Pcre::_replace_vars(const string& piece) {
00386 Pcre dollar("\\$[0-9]+");
00387 string with = piece;
00388 if(dollar.search(with)) {
00389 for(int index=0; index < num_matches; index++) {
00390
00391 string sub = get_match(index);
00392 ostringstream num(index+1);
00393 Pcre subsplit(string("(\\$") + num.str() + ")");
00394 Array splitted = subsplit.split(with);
00395 string Replaced;
00396 for(size_t pos=0; pos < splitted.size(); pos++) {
00397 if(pos == (splitted.size() - 1))
00398 Replaced += splitted[pos];
00399 else
00400 Replaced += splitted[pos] + sub;
00401 }
00402 with = Replaced;
00403 }
00404 return with;
00405 }
00406 else {
00407
00408 return with;
00409 }
00410 }