00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #include "pcre++.h"
00043
00044
00045
00046
00047
00048 Pcre::Pcre(const string& expression) {
00049 _expression = expression;
00050 _flags = 0;
00051 case_t = global_t = false;
00052 zero();
00053 Compile(0);
00054 }
00055
00056 Pcre::Pcre(const string& expression, const string& flags) {
00057 _expression = expression;
00058 unsigned int FLAG = 0;
00059
00060 for(unsigned int flag=0; flag<flags.length(); flag++) {
00061 switch(flags[flag]) {
00062 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00063 case 'm': FLAG |= PCRE_MULTILINE; break;
00064 case 's': FLAG |= PCRE_DOTALL; break;
00065 case 'x': FLAG |= PCRE_EXTENDED; break;
00066 case 'g': global_t = true; break;
00067 }
00068 }
00069
00070 _flags = FLAG;
00071
00072 zero();
00073 Compile(FLAG);
00074 }
00075
00076 Pcre::Pcre(const Pcre &P) {
00077 _expression = P._expression;
00078 _flags = P._flags;
00079 case_t = P.case_t;
00080 global_t = P.global_t;
00081 zero();
00082 Compile(_flags);
00083 }
00084
00085 Pcre::Pcre() {
00086 zero();
00087 }
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 Pcre::~Pcre() {
00099
00100 if (p_pcre != NULL) {
00101 pcre_free(p_pcre);
00102 }
00103 if (p_pcre_extra != NULL) {
00104 pcre_free(p_pcre_extra);
00105 }
00106 if(sub_vec != NULL) {
00107 delete[] sub_vec;
00108 }
00109 if(num_matches > 0) {
00110 delete resultset;
00111 }
00112 }
00113
00114
00115
00116
00117
00118
00119
00120 const Pcre& Pcre::operator = (const string& expression) {
00121
00122 reset();
00123 _expression = expression;
00124 _flags = 0;
00125 case_t = global_t = false;
00126 Compile(0);
00127 return *this;
00128 }
00129
00130
00131 const Pcre& Pcre::operator = (const Pcre &P) {
00132 reset();
00133 _expression = P._expression;
00134 _flags = P._flags;
00135 case_t = P.case_t;
00136 global_t = P.global_t;
00137 zero();
00138 Compile(_flags);
00139 return *this;
00140 }
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150 void Pcre::zero() {
00151
00152 p_pcre_extra = NULL;
00153 p_pcre = NULL;
00154 sub_vec = NULL;
00155 resultset = NULL;
00156 err_str = NULL;
00157 num_matches = -1;
00158 }
00159
00160 void Pcre::reset() {
00161 did_match = false;
00162 num_matches = -1;
00163 }
00164
00165
00166
00167
00168
00169
00170
00171
00172 void Pcre::Compile(int flags) {
00173 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00174 (const char **)(&err_str), &erroffset, NULL);
00175
00176 if(p_pcre == NULL) {
00177
00178 string Error = err_str;
00179 throw exception("pcre_compile(..) failed: " + Error);
00180 }
00181
00182
00183 int where;
00184 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00185 if(info == 0) {
00186 sub_len = (where +2) * 3;
00187 }
00188 else {
00189 throw exception(info);
00190 }
00191 reset();
00192 }
00193
00194
00195
00196
00197
00198
00199
00200 bool Pcre::search(const string& stuff, int OffSet) {
00201 return dosearch(stuff, OffSet);
00202 }
00203
00204 bool Pcre::search(const string& stuff) {
00205 return dosearch(stuff, 0);
00206 }
00207
00208 bool Pcre::dosearch(const string& stuff, int OffSet) {
00209 reset();
00210 if (sub_vec != NULL)
00211 delete sub_vec;
00212
00213 sub_vec = new int[sub_len];
00214 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00215 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00216
00217 if(num < 0) {
00218
00219 return false;
00220 }
00221 else if(num == 0) {
00222
00223 return false;
00224 }
00225 else if(num == 1) {
00226
00227 did_match = true;
00228 num_matches = 0;
00229 return true;
00230 }
00231 else if(num > 1) {
00232
00233 if (resultset != NULL)
00234 delete resultset;
00235 resultset = new Array;
00236 const char **stringlist;
00237 did_match = true;
00238 num_matches = num - 1;
00239
00240 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00241 if(res == 0) {
00242 for(int i=1; i<num; i++) {
00243 resultset->push_back(stringlist[i]);
00244 }
00245 pcre_free_substring_list(stringlist);
00246 }
00247 else {
00248 throw exception(res);
00249 }
00250 return true;
00251 }
00252 else {
00253
00254 return false;
00255 }
00256 }
00257
00258 Array* Pcre::get_sub_strings() {
00259 if(resultset != NULL)
00260 return resultset;
00261 else
00262 return NULL;
00263 }
00264
00265 string Pcre::get_match(int pos) {
00266 if(pos >= 0 && pos < num_matches) {
00267 ArrayIterator P = resultset->begin() + pos;
00268 return *P;
00269 }
00270 else {
00271 throw exception("out of range");
00272 }
00273 }
00274
00275 int Pcre::get_match_start() {
00276 if (sub_vec)
00277 return sub_vec[0];
00278 else
00279 return -1;
00280 }
00281
00282 int Pcre::get_match_end() {
00283 if (sub_vec)
00284 return sub_vec[1] - 1;
00285 else
00286 return -1;
00287 }
00288
00289 int Pcre::get_match_start(int pos) {
00290 if(pos >= 0 && pos <= num_matches) {
00291
00292
00293
00294 return sub_vec[ (++pos) * 2 ];
00295 }
00296 else {
00297 throw exception("out of range");
00298 }
00299 }
00300
00301 int Pcre::get_match_end(int pos) {
00302 if(pos >= 0 && pos <= num_matches) {
00303
00304
00305
00306
00307
00308 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00309 }
00310 else {
00311 throw exception("out of range");
00312 }
00313 }
00314
00315 size_t Pcre::get_match_length(int pos) {
00316 if(pos >= 0 && pos < num_matches) {
00317 ArrayIterator P = resultset->begin() + pos;
00318 return P->length();
00319 }
00320 else {
00321 throw exception("out of range");
00322 }
00323 }
00324
00325 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00326 Array Splitted;
00327
00328 if(_expression.length() == 1) {
00329
00330 string buffer, _delimiter, _piece;
00331 char z;
00332 if(case_t) {
00333 z = toupper(_expression[0]);
00334 for(size_t pos=0; pos < piece.length(); pos++) {
00335 _piece += (char)toupper(piece[pos]);
00336 }
00337 }
00338 else {
00339 z = _expression[0];
00340 _piece = piece;
00341 }
00342 for(size_t pos=0; pos<piece.length(); pos++) {
00343 if(_piece[pos] == z) {
00344 Splitted.push_back(buffer);
00345 buffer = "";
00346 }
00347 else {
00348 buffer += piece[pos];
00349 }
00350 }
00351 if(buffer != "") {
00352 Splitted.push_back(buffer);
00353 }
00354 }
00355 else {
00356
00357 if(_expression[0] != '(' && _expression[ _expression.length() - 1 ] != ')' ) {
00358
00359 pcre_free(p_pcre);
00360 pcre_free(p_pcre_extra);
00361
00362 pcre *_p = NULL;
00363 pcre_extra *_e = NULL;;
00364
00365 p_pcre = _p;
00366 p_pcre_extra = _e;
00367
00368 _expression = "(" + _expression + ")";
00369 Compile(_flags);
00370 }
00371 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00372 for(;;) {
00373 if(search(piece, pos) == true) {
00374 if(matches() > 0) {
00375 piece_end = get_match_start(0) - 1;
00376 piece_start = pos;
00377 pos = piece_end + 1 + get_match_length(0);
00378 string junk(piece, piece_start, (piece_end - piece_start)+1);
00379 num_pieces++;
00380 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00381 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00382 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00383
00384 Splitted.push_back(junk);
00385 }
00386 }
00387 }
00388 }
00389 }
00390 else {
00391
00392 string junk(piece, pos, (piece.length() - pos));
00393 num_pieces++;
00394 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00395 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00396 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00397
00398 Splitted.push_back(junk);
00399 }
00400 }
00401 }
00402 break;
00403 }
00404 }
00405 }
00406 return Splitted;
00407 }
00408
00409 Array Pcre::split(const string& piece) {
00410 return _split(piece, 0, 0, 0);
00411 }
00412
00413 Array Pcre::split(const string& piece, int limit) {
00414 return _split(piece, limit, 0, 0);
00415 }
00416
00417 Array Pcre::split(const string& piece, int limit, int start_offset) {
00418 return _split(piece, limit, start_offset, 0);
00419 }
00420
00421 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00422 return _split(piece, limit, start_offset, end_offset);
00423 }
00424
00425 Array Pcre::split(const string& piece, vector<int> positions) {
00426 Array PreSplitted = _split(piece, 0, 0, 0);
00427 Array Splitted;
00428 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00429 Splitted.push_back(PreSplitted[*vecIt]);
00430 }
00431 return Splitted;
00432 }
00433
00434
00435
00436 string Pcre::replace(const string& piece, const string& with) {
00437 string Replaced(piece);
00438
00439
00440
00441
00442
00443 Pcre braces("[^\\\\]\\(.*[^\\\\]\\)");
00444 if(! braces.search(_expression)) {
00445
00446
00447
00448
00449 pcre_free(p_pcre);
00450 pcre_free(p_pcre_extra);
00451
00452 pcre *_p = NULL;
00453 pcre_extra *_e = NULL;;
00454
00455 p_pcre = _p;
00456 p_pcre_extra = _e;
00457
00458 _expression = "(" + _expression + ")";
00459 Compile(_flags);
00460 }
00461
00462 if(search(piece)) {
00463
00464 string use_with = _replace_vars(with);
00465 if(!global_t) {
00466
00467
00468
00469
00470 if(matched() && matches() >= 1) {
00471 int len = get_match_end() - get_match_start() + 1;
00472 Replaced.replace(get_match_start(0), len, use_with);
00473 }
00474 }
00475 else {
00476
00477
00478
00479
00480
00481
00482
00483
00484 string sLeftOver = Replaced;
00485 int iCurPosition = 0;
00486 while( search( sLeftOver ) ) {
00487 if( matched() && matches() >= 1 ) {
00488 int len = 0;
00489 string lookfor;
00490 lookfor.erase();
00491 int match_pos;
00492 for (match_pos = 0; match_pos < matches(); match_pos++) {
00493 len += ((get_match_end(match_pos) - get_match_start(match_pos)) + 1);
00494 lookfor += get_match(match_pos);
00495 }
00496 match_pos = Replaced.find( lookfor, iCurPosition );
00497 Replaced.replace(match_pos, len, use_with);
00498 iCurPosition = ( match_pos + use_with.length() );
00499 sLeftOver = Replaced.substr( iCurPosition, string::npos );
00500 }
00501 }
00502 }
00503 }
00504 return Replaced;
00505 }
00506
00507
00508
00509 string Pcre::_replace_vars(const string& piece) {
00510 Pcre dollar("\\$[0-9]+");
00511 string with = piece;
00512 if(dollar.search(with)) {
00513 for(int index=0; index < num_matches; index++) {
00514
00515 string sub = get_match(index);
00516 ostringstream num;
00517 num << index+1;
00518 string dollar_num = "(\\$" + num.str() + ")";
00519 Pcre subsplit(dollar_num);
00520
00521 Array splitted = subsplit.split(with);
00522 string Replaced;
00523 for(size_t pos=0; pos < splitted.size(); pos++) {
00524 if(pos == (splitted.size() - 1))
00525 Replaced += splitted[pos];
00526 else
00527 Replaced += splitted[pos] + sub;
00528 }
00529 with = Replaced;
00530 }
00531 return with;
00532 }
00533 else {
00534
00535 return with;
00536 }
00537 }