00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "pcre++.h"
00044
00045 Pcre::Pcre(const string& expression) {
00046 _expression = expression;
00047 _flags = 0;
00048 case_t = global_t = false;
00049 Compile(0);
00050 }
00051
00052 Pcre::Pcre(const string& expression, const string& flags) {
00053 _expression = expression;
00054 unsigned int FLAG = 0;
00055
00056 for(unsigned int flag=0; flag<flags.length(); flag++) {
00057 switch(flags[flag]) {
00058 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00059 case 'm': FLAG |= PCRE_MULTILINE; break;
00060 case 's': FLAG |= PCRE_DOTALL; break;
00061 case 'x': FLAG |= PCRE_EXTENDED; break;
00062 case 'g': global_t = true; break;
00063 }
00064 }
00065
00066 _flags = FLAG;
00067
00068 Compile(FLAG);
00069 }
00070
00071 Pcre::Pcre(Pcre &P) {
00072 _expression = P._expression;
00073 _flags = P._flags;
00074 case_t = P.case_t;
00075 global_t = P.global_t;
00076 Compile(_flags);
00077 }
00078
00079 void Pcre::Compile(int flags) {
00080
00081 p_pcre_extra = NULL;
00082 p_pcre = NULL;
00083 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00084 (const char **)(&err_str), &erroffset, NULL);
00085
00086 if(p_pcre == NULL) {
00087
00088 string Error = err_str;
00089 throw exception("pcre_compile(..) failed: " + Error);
00090 }
00091
00092
00093 int where;
00094 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00095 if(info == 0) {
00096 sub_len = (where +2) * 3;
00097 }
00098 else {
00099 throw exception(info);
00100 }
00101 did_match = false;
00102 num_matches = -1;
00103 }
00104
00105 const Pcre& Pcre::operator = (const string& expression) {
00106 reset();
00107 Pcre *pcre = new Pcre(expression);
00108 return *pcre;
00109 }
00110
00111 Pcre::~Pcre() {
00112 pcre_free(p_pcre);
00113 pcre_free(p_pcre_extra);
00114 delete sub_vec;
00115 if(num_matches > 0)
00116 delete resultset;
00117 }
00118
00119 void Pcre::reset() {
00120 did_match = false;
00121 num_matches = -1;
00122 }
00123
00124 bool Pcre::search(const string& stuff, int OffSet) {
00125 return dosearch(stuff, OffSet);
00126 }
00127
00128 bool Pcre::search(const string& stuff) {
00129 return dosearch(stuff, 0);
00130 }
00131
00132 bool Pcre::dosearch(const string& stuff, int OffSet) {
00133 reset();
00134 sub_vec = new int[sub_len];
00135 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00136 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00137
00138 if(num < 0) {
00139
00140 return false;
00141 }
00142 else if(num == 0) {
00143
00144 return false;
00145 }
00146 else if(num == 1) {
00147
00148 did_match = true;
00149 num_matches = 0;
00150 return true;
00151 }
00152 else if(num > 1) {
00153
00154 resultset = new Array;
00155 const char **stringlist;
00156 did_match = true;
00157 num_matches = num - 1;
00158
00159 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00160 if(res == 0) {
00161 for(int i=1; i<num; i++) {
00162 resultset->push_back(stringlist[i]);
00163 }
00164 pcre_free_substring_list(stringlist);
00165 }
00166 else {
00167 throw exception(res);
00168 }
00169 return true;
00170 }
00171 else {
00172
00173 return false;
00174 }
00175 }
00176
00177 Array* Pcre::get_sub_strings() {
00178 if(resultset != NULL)
00179 return resultset;
00180 else
00181 return NULL;
00182 }
00183
00184 string Pcre::get_match(int pos) {
00185 if(pos >= 0 && pos < num_matches) {
00186 ArrayIterator P = resultset->begin() + pos;
00187 return *P;
00188 }
00189 else {
00190 throw exception("out of range");
00191 }
00192 }
00193
00194 int Pcre::get_match_start(int pos) {
00195 if(pos >= 0 && pos <= num_matches) {
00196
00197
00198
00199 return sub_vec[ (++pos) * 2 ];
00200 }
00201 else {
00202 throw exception("out of range");
00203 }
00204 }
00205
00206 int Pcre::get_match_end(int pos) {
00207 if(pos >= 0 && pos <= num_matches) {
00208
00209
00210
00211
00212
00213 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00214 }
00215 else {
00216 throw exception("out of range");
00217 }
00218 }
00219
00220 size_t Pcre::get_match_length(int pos) {
00221 if(pos >= 0 && pos < num_matches) {
00222 ArrayIterator P = resultset->begin() + pos;
00223 return P->length();
00224 }
00225 else {
00226 throw exception("out of range");
00227 }
00228 }
00229
00230 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00231 Array Splitted;
00232
00233 if(_expression.length() == 1) {
00234
00235 string buffer, _delimiter, _piece;
00236 char z;
00237 if(case_t) {
00238 z = toupper(_expression[0]);
00239 for(size_t pos=0; pos < piece.length(); pos++) {
00240 _piece += (char)toupper(piece[pos]);
00241 }
00242 }
00243 else {
00244 z = _expression[0];
00245 _piece = piece;
00246 }
00247 for(size_t pos=0; pos<piece.length(); pos++) {
00248 if(_piece[pos] == z) {
00249 Splitted.push_back(buffer);
00250 buffer = "";
00251 }
00252 else {
00253 buffer += piece[pos];
00254 }
00255 }
00256 if(buffer != "") {
00257 Splitted.push_back(buffer);
00258 }
00259 }
00260 else {
00261
00262 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00263
00264 pcre_free(p_pcre);
00265 pcre_free(p_pcre_extra);
00266
00267 pcre *_p = NULL;
00268 pcre_extra *_e = NULL;;
00269
00270 p_pcre = _p;
00271 p_pcre_extra = _e;
00272
00273 _expression = "(" + _expression + ")";
00274 Compile(_flags);
00275 }
00276 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00277 for(;;) {
00278 if(search(piece, pos) == true) {
00279 if(matches() > 0) {
00280 piece_end = get_match_start(0) - 1;
00281 piece_start = pos;
00282 pos = piece_end + 1 + get_match_length(0);
00283 string junk(piece, piece_start, (piece_end - piece_start)+1);
00284 num_pieces++;
00285 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00286 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00287 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00288
00289 Splitted.push_back(junk);
00290 }
00291 }
00292 }
00293 }
00294 }
00295 else {
00296
00297 string junk(piece, pos, (piece.length() - pos));
00298 num_pieces++;
00299 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00300 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00301 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00302
00303 Splitted.push_back(junk);
00304 }
00305 }
00306 }
00307 break;
00308 }
00309 }
00310 }
00311 return Splitted;
00312 }
00313
00314 Array Pcre::split(const string& piece) {
00315 return _split(piece, 0, 0, 0);
00316 }
00317
00318 Array Pcre::split(const string& piece, int limit) {
00319 return _split(piece, limit, 0, 0);
00320 }
00321
00322 Array Pcre::split(const string& piece, int limit, int start_offset) {
00323 return _split(piece, limit, start_offset, 0);
00324 }
00325
00326 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00327 return _split(piece, limit, start_offset, end_offset);
00328 }
00329
00330 Array Pcre::split(const string& piece, vector<int> positions) {
00331 Array PreSplitted = _split(piece, 0, 0, 0);
00332 Array Splitted;
00333 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00334 Splitted.push_back(PreSplitted[*vecIt]);
00335 }
00336 return Splitted;
00337 }
00338
00339
00340 string Pcre::replace(const string& piece, const string& with) {
00341 string Replaced(piece);
00342
00343 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00344
00345 _expression = "(" + _expression + ")";
00346
00347 pcre_free(p_pcre);
00348 pcre_free(p_pcre_extra);
00349
00350 pcre *_p = NULL;
00351 pcre_extra *_e = NULL;;
00352
00353 p_pcre = _p;
00354 p_pcre_extra = _e;
00355
00356 _expression = "(" + _expression + ")";
00357 Compile(_flags);
00358 }
00359
00360 if(search(piece)) {
00361
00362 string use_with = _replace_vars(with);
00363 if(!global_t) {
00364
00365 if(matched() && matches() >= 1) {
00366 Replaced.replace(get_match_start(0), (get_match_end(0) - get_match_start(0)) + 1, use_with);
00367 }
00368 }
00369 else {
00370
00371 Array Splitted = split(piece);
00372 Replaced = "";
00373 for(size_t pos=0; pos < Splitted.size(); pos++) {
00374 if(pos == (Splitted.size() - 1))
00375 Replaced += Splitted[pos];
00376 else
00377 Replaced += Splitted[pos] + with;
00378 }
00379 }
00380 }
00381 return Replaced;
00382 }
00383
00384 string Pcre::_replace_vars(const string& piece) {
00385 Pcre dollar("\\$[0-9]+");
00386 string with = piece;
00387 if(dollar.search(with)) {
00388 for(int index=0; index < num_matches; index++) {
00389
00390 string sub = get_match(index);
00391 ostringstream num(index+1);
00392 Pcre subsplit(string("(\\$") + num.str() + ")");
00393 Array splitted = subsplit.split(with);
00394 string Replaced;
00395 for(size_t pos=0; pos < splitted.size(); pos++) {
00396 if(pos == (splitted.size() - 1))
00397 Replaced += splitted[pos];
00398 else
00399 Replaced += splitted[pos] + sub;
00400 }
00401 with = Replaced;
00402 }
00403 return with;
00404 }
00405 else {
00406
00407 return with;
00408 }
00409 }