00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #include "pcre++.h"
00045
00046
00047
00048
00049
00050 Pcre::Pcre(const string& expression) {
00051 _expression = expression;
00052 _flags = 0;
00053 case_t = global_t = false;
00054 zero();
00055 Compile(0);
00056 }
00057
00058 Pcre::Pcre(const string& expression, const string& flags) {
00059 _expression = expression;
00060 unsigned int FLAG = 0;
00061
00062 for(unsigned int flag=0; flag<flags.length(); flag++) {
00063 switch(flags[flag]) {
00064 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00065 case 'm': FLAG |= PCRE_MULTILINE; break;
00066 case 's': FLAG |= PCRE_DOTALL; break;
00067 case 'x': FLAG |= PCRE_EXTENDED; break;
00068 case 'g': global_t = true; break;
00069 }
00070 }
00071
00072 _flags = FLAG;
00073
00074 zero();
00075 Compile(FLAG);
00076 }
00077
00078 Pcre::Pcre(const Pcre &P) {
00079 _expression = P._expression;
00080 _flags = P._flags;
00081 case_t = P.case_t;
00082 global_t = P.global_t;
00083 zero();
00084 Compile(_flags);
00085 }
00086
00087 Pcre::Pcre() {
00088 zero();
00089 }
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 Pcre::~Pcre() {
00101
00102 if (p_pcre != NULL) {
00103 pcre_free(p_pcre);
00104 }
00105 if (p_pcre_extra != NULL) {
00106 pcre_free(p_pcre_extra);
00107 }
00108 if(sub_vec != NULL) {
00109 delete[] sub_vec;
00110 }
00111 if(num_matches > 0) {
00112 delete resultset;
00113 }
00114 if(err_str != NULL) {
00115 delete err_str;
00116 }
00117 }
00118
00119
00120
00121
00122
00123
00124
00125 const Pcre& Pcre::operator = (const string& expression) {
00126
00127 reset();
00128 _expression = expression;
00129 _flags = 0;
00130 case_t = global_t = false;
00131 Compile(0);
00132 return *this;
00133 }
00134
00135
00136 const Pcre& Pcre::operator = (const Pcre &P) {
00137 reset();
00138 _expression = P._expression;
00139 _flags = P._flags;
00140 case_t = P.case_t;
00141 global_t = P.global_t;
00142 zero();
00143 Compile(_flags);
00144 return *this;
00145 }
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155 void Pcre::zero() {
00156
00157 p_pcre_extra = NULL;
00158 p_pcre = NULL;
00159 sub_vec = NULL;
00160 resultset = NULL;
00161 err_str = NULL;
00162 num_matches = -1;
00163 }
00164
00165 void Pcre::reset() {
00166 did_match = false;
00167 num_matches = -1;
00168 }
00169
00170
00171
00172
00173
00174
00175
00176
00177 void Pcre::Compile(int flags) {
00178 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00179 (const char **)(&err_str), &erroffset, NULL);
00180
00181 if(p_pcre == NULL) {
00182
00183 string Error = err_str;
00184 throw exception("pcre_compile(..) failed: " + Error);
00185 }
00186
00187
00188 int where;
00189 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00190 if(info == 0) {
00191 sub_len = (where +2) * 3;
00192 }
00193 else {
00194 throw exception(info);
00195 }
00196 reset();
00197 }
00198
00199
00200
00201
00202
00203
00204
00205 bool Pcre::search(const string& stuff, int OffSet) {
00206 return dosearch(stuff, OffSet);
00207 }
00208
00209 bool Pcre::search(const string& stuff) {
00210 return dosearch(stuff, 0);
00211 }
00212
00213 bool Pcre::dosearch(const string& stuff, int OffSet) {
00214 reset();
00215 if (sub_vec != NULL)
00216 delete sub_vec;
00217
00218 sub_vec = new int[sub_len];
00219 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00220 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00221
00222 if(num < 0) {
00223
00224 return false;
00225 }
00226 else if(num == 0) {
00227
00228 return false;
00229 }
00230 else if(num == 1) {
00231
00232 did_match = true;
00233 num_matches = 0;
00234 return true;
00235 }
00236 else if(num > 1) {
00237
00238 if (resultset != NULL)
00239 delete resultset;
00240 resultset = new Array;
00241 const char **stringlist;
00242 did_match = true;
00243 num_matches = num - 1;
00244
00245 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00246 if(res == 0) {
00247 for(int i=1; i<num; i++) {
00248 resultset->push_back(stringlist[i]);
00249 }
00250 pcre_free_substring_list(stringlist);
00251 }
00252 else {
00253 throw exception(res);
00254 }
00255 return true;
00256 }
00257 else {
00258
00259 return false;
00260 }
00261 }
00262
00263 Array* Pcre::get_sub_strings() {
00264 if(resultset != NULL)
00265 return resultset;
00266 else
00267 return NULL;
00268 }
00269
00270 string Pcre::get_match(int pos) {
00271 if(pos >= 0 && pos < num_matches) {
00272 ArrayIterator P = resultset->begin() + pos;
00273 return *P;
00274 }
00275 else {
00276 throw exception("out of range");
00277 }
00278 }
00279
00280 int Pcre::get_match_start() {
00281 if (sub_vec)
00282 return sub_vec[0];
00283 else
00284 return -1;
00285 }
00286
00287 int Pcre::get_match_end() {
00288 if (sub_vec)
00289 return sub_vec[1] - 1;
00290 else
00291 return -1;
00292 }
00293
00294 int Pcre::get_match_start(int pos) {
00295 if(pos >= 0 && pos <= num_matches) {
00296
00297
00298
00299 return sub_vec[ (++pos) * 2 ];
00300 }
00301 else {
00302 throw exception("out of range");
00303 }
00304 }
00305
00306 int Pcre::get_match_end(int pos) {
00307 if(pos >= 0 && pos <= num_matches) {
00308
00309
00310
00311
00312
00313 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00314 }
00315 else {
00316 throw exception("out of range");
00317 }
00318 }
00319
00320 size_t Pcre::get_match_length(int pos) {
00321 if(pos >= 0 && pos < num_matches) {
00322 ArrayIterator P = resultset->begin() + pos;
00323 return P->length();
00324 }
00325 else {
00326 throw exception("out of range");
00327 }
00328 }
00329
00330 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00331 Array Splitted;
00332
00333 if(_expression.length() == 1) {
00334
00335 string buffer, _delimiter, _piece;
00336 char z;
00337 if(case_t) {
00338 z = toupper(_expression[0]);
00339 for(size_t pos=0; pos < piece.length(); pos++) {
00340 _piece += (char)toupper(piece[pos]);
00341 }
00342 }
00343 else {
00344 z = _expression[0];
00345 _piece = piece;
00346 }
00347 for(size_t pos=0; pos<piece.length(); pos++) {
00348 if(_piece[pos] == z) {
00349 Splitted.push_back(buffer);
00350 buffer = "";
00351 }
00352 else {
00353 buffer += piece[pos];
00354 }
00355 }
00356 if(buffer != "") {
00357 Splitted.push_back(buffer);
00358 }
00359 }
00360 else {
00361
00362 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00363
00364 pcre_free(p_pcre);
00365 pcre_free(p_pcre_extra);
00366
00367 pcre *_p = NULL;
00368 pcre_extra *_e = NULL;;
00369
00370 p_pcre = _p;
00371 p_pcre_extra = _e;
00372
00373 _expression = "(" + _expression + ")";
00374 Compile(_flags);
00375 }
00376 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00377 for(;;) {
00378 if(search(piece, pos) == true) {
00379 if(matches() > 0) {
00380 piece_end = get_match_start(0) - 1;
00381 piece_start = pos;
00382 pos = piece_end + 1 + get_match_length(0);
00383 string junk(piece, piece_start, (piece_end - piece_start)+1);
00384 num_pieces++;
00385 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00386 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00387 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00388
00389 Splitted.push_back(junk);
00390 }
00391 }
00392 }
00393 }
00394 }
00395 else {
00396
00397 string junk(piece, pos, (piece.length() - pos));
00398 num_pieces++;
00399 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00400 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00401 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00402
00403 Splitted.push_back(junk);
00404 }
00405 }
00406 }
00407 break;
00408 }
00409 }
00410 }
00411 return Splitted;
00412 }
00413
00414 Array Pcre::split(const string& piece) {
00415 return _split(piece, 0, 0, 0);
00416 }
00417
00418 Array Pcre::split(const string& piece, int limit) {
00419 return _split(piece, limit, 0, 0);
00420 }
00421
00422 Array Pcre::split(const string& piece, int limit, int start_offset) {
00423 return _split(piece, limit, start_offset, 0);
00424 }
00425
00426 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00427 return _split(piece, limit, start_offset, end_offset);
00428 }
00429
00430 Array Pcre::split(const string& piece, vector<int> positions) {
00431 Array PreSplitted = _split(piece, 0, 0, 0);
00432 Array Splitted;
00433 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00434 Splitted.push_back(PreSplitted[*vecIt]);
00435 }
00436 return Splitted;
00437 }
00438
00439
00440
00441 string Pcre::replace(const string& piece, const string& with) {
00442 string Replaced(piece);
00443
00444
00445
00446
00447
00448 Pcre braces("[^\\\\]\\(.*[^\\\\]\\)");
00449 if(! braces.search(_expression)) {
00450
00451
00452
00453
00454 pcre_free(p_pcre);
00455 pcre_free(p_pcre_extra);
00456
00457 pcre *_p = NULL;
00458 pcre_extra *_e = NULL;;
00459
00460 p_pcre = _p;
00461 p_pcre_extra = _e;
00462
00463 _expression = "(" + _expression + ")";
00464 Compile(_flags);
00465 }
00466
00467 if(search(piece)) {
00468
00469 string use_with = _replace_vars(with);
00470 if(!global_t) {
00471
00472
00473
00474
00475 if(matched() && matches() >= 1) {
00476 int len = get_match_end() - get_match_start() + 1;
00477 Replaced.replace(get_match_start(0), len, use_with);
00478 }
00479 }
00480 else {
00481
00482
00483
00484
00485
00486
00487
00488
00489 string sLeftOver = Replaced;
00490 int iCurPosition = 0;
00491 while( search( sLeftOver ) ) {
00492 if( matched() && matches() >= 1 ) {
00493 int len = 0;
00494 string lookfor;
00495 lookfor.erase();
00496 int match_pos;
00497 for (match_pos = 0; match_pos < matches(); match_pos++) {
00498 len += ((get_match_end(match_pos) - get_match_start(match_pos)) + 1);
00499 lookfor += get_match(match_pos);
00500 }
00501 match_pos = Replaced.find( lookfor, iCurPosition );
00502 Replaced.replace(match_pos, len, use_with);
00503 iCurPosition = ( match_pos + use_with.length() );
00504 sLeftOver = Replaced.substr( iCurPosition, string::npos );
00505 }
00506 }
00507 }
00508 }
00509 return Replaced;
00510 }
00511
00512
00513
00514 string Pcre::_replace_vars(const string& piece) {
00515 Pcre dollar("\\$[0-9]+");
00516 string with = piece;
00517 if(dollar.search(with)) {
00518 for(int index=0; index < num_matches; index++) {
00519
00520 string sub = get_match(index);
00521 ostringstream num;
00522 num << index+1;
00523 string dollar_num = "(\\$" + num.str() + ")";
00524 Pcre subsplit(dollar_num);
00525
00526 Array splitted = subsplit.split(with);
00527 string Replaced;
00528 for(size_t pos=0; pos < splitted.size(); pos++) {
00529 if(pos == (splitted.size() - 1))
00530 Replaced += splitted[pos];
00531 else
00532 Replaced += splitted[pos] + sub;
00533 }
00534 with = Replaced;
00535 }
00536 return with;
00537 }
00538 else {
00539
00540 return with;
00541 }
00542 }