From 0205b69ee1f12f5cbd4a6deda3b2b78228262827 Mon Sep 17 00:00:00 2001 From: Fons Rademakers <Fons.Rademakers@cern.ch> Date: Fri, 2 Dec 2005 16:17:48 +0000 Subject: [PATCH] Several new Core features: - new TPRegexp class by Eddy Offermann using the PCRE library for powerful regexp matching. TPRegexp is fully integrated in TString and can be used like TRegExp. For more see the TPRegexp clas description. - Extended TString::Atoi(), Atof(), IsDigit() and new IsFloat() by John Frankland. For more see the method description. git-svn-id: http://root.cern.ch/svn/root/trunk@13456 27541ba8-7e3a-0410-8455-c3a389f83636 --- Makefile | 12 +- base/Module.mk | 11 +- base/inc/LinkDef2.h | 3 +- base/inc/TPRegexp.h | 85 ++++++++ base/inc/TString.h | 20 +- base/src/TPRegexp.cxx | 470 ++++++++++++++++++++++++++++++++++++++++++ base/src/TString.cxx | 118 ++++++++++- config/Makefile.in | 4 + configure | 44 +++- test/tstring.cxx | 18 +- 10 files changed, 758 insertions(+), 27 deletions(-) create mode 100644 base/inc/TPRegexp.h create mode 100644 base/src/TPRegexp.cxx diff --git a/Makefile b/Makefile index 73c6e9e6451..55e4beea83f 100644 --- a/Makefile +++ b/Makefile @@ -42,9 +42,9 @@ endif ##### Modules to build ##### -MODULES = build cint metautils utils base cont meta net auth zip clib \ - matrix newdelete hist tree freetype graf g3d gpad gui minuit \ - histpainter treeplayer treeviewer physics postscript \ +MODULES = build cint metautils pcre utils base cont meta net auth zip \ + clib matrix newdelete hist tree freetype graf g3d gpad gui \ + minuit histpainter treeplayer treeviewer physics postscript \ rint html eg geom geompainter vmc fumili mlp gedold ged quadp \ guibuilder xml foam splot smatrix @@ -441,15 +441,15 @@ G__%.d: G__%.cxx $(RMKDEP) %.d: %.cxx $(RMKDEP) $(MAKEDEP) $@ "$(CXXFLAGS)" $< > $@ -$(CORELIB): $(COREO) $(COREDO) $(CINTLIB) $(CORELIBDEP) +$(CORELIB): $(COREO) $(COREDO) $(CINTLIB) $(PCREDEP) $(CORELIBDEP) ifneq ($(ARCH),alphacxx6) @$(MAKELIB) $(PLATFORM) $(LD) "$(LDFLAGS)" \ "$(SOFLAGS)" libCore.$(SOEXT) $@ "$(COREO) $(COREDO)" \ - "$(CORELIBEXTRA) $(CRYPTLIBS)" + "$(CORELIBEXTRA) $(PCRELDFLAGS) $(PCRELIB) $(CRYPTLIBS)" else @$(MAKELIB) $(PLATFORM) $(LD) "$(CORELDFLAGS)" \ "$(SOFLAGS)" libCore.$(SOEXT) $@ "$(COREO) $(COREDO)" \ - "$(CORELIBEXTRA) $(CRYPTLIBS)" + "$(CORELIBEXTRA) $(PCRELDFLAGS) $(PCRELIB) $(CRYPTLIBS)" endif map:: $(RLIBMAP) diff --git a/base/Module.mk b/base/Module.mk index e2aab2a616e..d323925b8d6 100644 --- a/base/Module.mk +++ b/base/Module.mk @@ -79,10 +79,10 @@ $(BASEDS4): @echo "Generating dictionary $@..." $(ROOTCINTTMP) -f $@ -c $(BASEH4) $(BASEL4) -$(BASEDO1): $(BASEDS1) - $(CXX) $(NOOPT) $(CXXFLAGS) -I. -o $@ -c $< -$(BASEDO2): $(BASEDS2) - $(CXX) $(NOOPT) $(CXXFLAGS) -I. -o $@ -c $< +$(BASEDO1): $(BASEDS1) $(PCREDEP) + $(CXX) $(NOOPT) $(PCREINC) $(CXXFLAGS) -I. -o $@ -c $< +$(BASEDO2): $(BASEDS2) $(PCREDEP) + $(CXX) $(NOOPT) $(PCREINC) $(CXXFLAGS) -I. -o $@ -c $< ifeq ($(ARCH),linuxicc) $(BASEDO3): $(BASEDS3) $(CXX) $(NOOPT) $(CXXFLAGS) -wd191 -I. -o $@ -c $< @@ -108,6 +108,9 @@ distclean-base: clean-base distclean:: distclean-base ##### extra rules ###### +base/src/TPRegexp.o: base/src/TPRegexp.cxx $(PCREDEP) + $(CXX) $(OPT) $(PCREINC) $(CXXFLAGS) -o $@ -c $< + ifeq ($(ARCH),alphacxx6) $(BASEDIRS)/TRandom.o: $(BASEDIRS)/TRandom.cxx $(CXX) $(NOOPT) $(CXXFLAGS) -o $@ -c $< diff --git a/base/inc/LinkDef2.h b/base/inc/LinkDef2.h index 6a1b4a5d727..dbbc99c33a6 100644 --- a/base/inc/LinkDef2.h +++ b/base/inc/LinkDef2.h @@ -1,4 +1,4 @@ -/* @(#)root/base:$Name: $:$Id: LinkDef2.h,v 1.29 2004/11/03 11:05:12 rdm Exp $ */ +/* @(#)root/base:$Name: $:$Id: LinkDef2.h,v 1.30 2005/08/16 12:57:57 brun Exp $ */ /************************************************************************* * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. * @@ -102,6 +102,7 @@ #pragma link C++ class TROOT; #pragma link C++ class TRealData+; #pragma link C++ class TRegexp; +#pragma link C++ class TPRegexp; #pragma link C++ class TRefCnt; #pragma link C++ class TSignalHandler; #pragma link C++ class TStopwatch+; diff --git a/base/inc/TPRegexp.h b/base/inc/TPRegexp.h new file mode 100644 index 00000000000..7d5a08a5c48 --- /dev/null +++ b/base/inc/TPRegexp.h @@ -0,0 +1,85 @@ +// @(#)root/base:$Name: $:$Id: TRegexp.h,v 1.1.1.1 2000/05/16 17:00:39 rdm Exp $ +// Author: Eddy Offermann 24/06/05 + +/************************************************************************* + * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +#ifndef ROOT_TPRegexp +#define ROOT_TPRegexp + +////////////////////////////////////////////////////////////////////////// +// // +// TPRegexp // +// // +// C++ Wrapper for the "Perl Compatible Regular Expressions" library // +// The PCRE lib can be found at: // +// http://www.pcre.org/ // +// // +// Extensive documentation about Regular expressions in Perl can be // +// found at : // +// http://perldoc.perl.org/perlre.html // +// // +////////////////////////////////////////////////////////////////////////// + +#ifndef ROOT_Rtypes +#include "Rtypes.h" +#endif +#ifndef ROOT_TString +#include "TString.h" +#endif +#ifndef ROOT_TArrayI +#include "TArrayI.h" +#endif + +struct PCREPriv_t; + + +class TPRegexp { + +private: + enum { + kPCRE_GLOBAL = 0x80000000, + kPCRE_OPTIMIZE = 0x40000000, + kPCRE_DEBUG_MSGS = 0x20000000, + kPCRE_INTMASK = 0x0FFF, + }; + + TString fPattern; + PCREPriv_t *fPriv; + UInt_t fPCREOpts; + + void Compile(); + void Optimize(); + UInt_t ParseMods(const TString &mods) const; + Int_t ReplaceSubs(const TString &s, TString &final, + const TString &replacePattern, + Int_t *ovec, Int_t nmatch) const; + +public: + TPRegexp(); + TPRegexp(const TString &pat); + TPRegexp(const TPRegexp &p); + virtual ~TPRegexp(); + + Int_t Match(const TString &s, const TString &mods="", + Int_t offset=0, Int_t nMatchMax=30, TArrayI *pos=0); + TObjArray *MatchS(const TString &s, const TString &mods="", + Int_t offset=0, Int_t nMaxMatch=30); + Bool_t MatchB(const TString &s, const TString &mods="", + Int_t offset=0, Int_t nMaxMatch=30) { + return (Match(s,mods,offset,nMaxMatch) > 0); } + Int_t Substitute(TString &s, const TString &replace, + const TString &mods="", Int_t offset=0, + Int_t nMatchMax=30); + + TPRegexp &operator=(const TPRegexp &p); + + ClassDef(TPRegexp,0) // Perl Compatible Regular Expression Class +}; + +#endif diff --git a/base/inc/TString.h b/base/inc/TString.h index 39394d5b735..c70e1a15cba 100644 --- a/base/inc/TString.h +++ b/base/inc/TString.h @@ -1,4 +1,4 @@ -// @(#)root/base:$Name: $:$Id: TString.h,v 1.40 2005/08/15 21:21:46 pcanal Exp $ +// @(#)root/base:$Name: $:$Id: TString.h,v 1.41 2005/11/21 11:17:18 rdm Exp $ // Author: Fons Rademakers 04/08/95 /************************************************************************* @@ -47,6 +47,7 @@ namespace std { using ::string; } #endif class TRegexp; +class TPRegexp; class TString; class TSubString; class TObjArray; @@ -272,6 +273,8 @@ public: TSubString operator()(Ssiz_t start, Ssiz_t len); // Sub-string operator TSubString operator()(const TRegexp &re); // Match the RE TSubString operator()(const TRegexp &re, Ssiz_t start); + TSubString operator()(TPRegexp &re); // Match the Perl compatible Regular Expression + TSubString operator()(TPRegexp &re, Ssiz_t start); TSubString SubString(const char *pat, Ssiz_t start = 0, ECaseCompare cmp = kExact); char operator[](Ssiz_t i) const; @@ -279,6 +282,8 @@ public: TSubString operator()(Ssiz_t start, Ssiz_t len) const; TSubString operator()(const TRegexp &re) const; // Match the RE TSubString operator()(const TRegexp &re, Ssiz_t start) const; + TSubString operator()(TPRegexp &re) const; // Match the Perl compatible Regular Expression + TSubString operator()(TPRegexp &re, Ssiz_t start) const; TSubString SubString(const char *pat, Ssiz_t start = 0, ECaseCompare cmp = kExact) const; @@ -300,6 +305,7 @@ public: Bool_t Contains(const char *pat, ECaseCompare cmp = kExact) const; Bool_t Contains(const TString &pat, ECaseCompare cmp = kExact) const; Bool_t Contains(const TRegexp &pat) const; + Bool_t Contains(TPRegexp &pat) const; Int_t CountChar(Int_t c) const; TString Copy() const; const char *Data() const { return fData; } @@ -318,6 +324,8 @@ public: ECaseCompare cmp) const; Ssiz_t Index(const TRegexp &pat, Ssiz_t i = 0) const; Ssiz_t Index(const TRegexp &pat, Ssiz_t *ext, Ssiz_t i = 0) const; + Ssiz_t Index(TPRegexp &pat, Ssiz_t i = 0) const; + Ssiz_t Index(TPRegexp &pat, Ssiz_t *ext, Ssiz_t i = 0) const; TString &Insert(Ssiz_t pos, const char *s); TString &Insert(Ssiz_t pos, const char *s, Ssiz_t extent); TString &Insert(Ssiz_t pos, const TString &s); @@ -326,6 +334,7 @@ public: Bool_t IsAlpha() const; Bool_t IsAlnum() const; Bool_t IsDigit() const; + Bool_t IsFloat() const; Bool_t IsHex() const; Bool_t IsNull() const { return Pref()->fNchars == 0; } Ssiz_t Last(char c) const { return Pref()->Last(c); } @@ -415,12 +424,6 @@ extern int strncasecmp(const char *str1, const char *str2, Ssiz_t n); inline void TStringRef::UnLink() { if (RemoveReference() == 0) delete [] (char*)this; } -inline Int_t TString::Atoi() const -{ return atoi(fData); } - -inline Double_t TString::Atof() const -{ return atof(fData); } - inline void TString::Cow() { if (Pref()->References() > 1) Clone(); } @@ -493,6 +496,9 @@ inline Bool_t TString::Contains(const char *s, ECaseCompare cmp) const inline Bool_t TString::Contains(const TRegexp &pat) const { return Index(pat, (Ssiz_t)0) != kNPOS; } +inline Bool_t TString::Contains(TPRegexp &pat) const +{ return Index(pat, (Ssiz_t)0) != kNPOS; } + inline Ssiz_t TString::Index(const char *s, Ssiz_t i, ECaseCompare cmp) const { return Index(s, s ? strlen(s) : 0, i, cmp); } diff --git a/base/src/TPRegexp.cxx b/base/src/TPRegexp.cxx new file mode 100644 index 00000000000..1456973ac3b --- /dev/null +++ b/base/src/TPRegexp.cxx @@ -0,0 +1,470 @@ +// @(#)root/base:$Name: $:$Id: TRegexp.h,v 1.1.1.1 2000/05/16 17:00:39 rdm Exp $ +// Author: Eddy Offermann 24/06/05 + +/************************************************************************* + * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +////////////////////////////////////////////////////////////////////////// +// // +// TPRegexp // +// // +// C++ Wrapper for the "Perl Compatible Regular Expressions" library // +// The PCRE lib can be found at: // +// http://www.pcre.org/ // +// // +// Extensive documentation about Regular expressions in Perl can be // +// found at : // +// http://perldoc.perl.org/perlre.html // +// // +////////////////////////////////////////////////////////////////////////// + +#include "Riostream.h" +#include "TPRegexp.h" +#include "TObjArray.h" +#include "TObjString.h" +#include "TError.h" + +#include <pcre.h> + + +struct PCREPriv_t { + pcre *fPCRE; + pcre_extra *fPCREExtra; + + PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; } +}; + + +ClassImp(TPRegexp) + +//______________________________________________________________________________ +TPRegexp::TPRegexp() +{ + // Default ctor. + + fPriv = new PCREPriv_t; + fPCREOpts = 0; +} + +//______________________________________________________________________________ +TPRegexp::TPRegexp(const TString &pat) +{ + // Create and initialize with pat. + + fPattern = pat; + fPriv = new PCREPriv_t; + fPCREOpts = 0; +} + +//______________________________________________________________________________ +TPRegexp::TPRegexp(const TPRegexp &p) +{ + // Copy ctor. + + fPattern = p.fPattern; + fPriv = new PCREPriv_t; + fPCREOpts = p.fPCREOpts; +} + +//______________________________________________________________________________ +TPRegexp::~TPRegexp() +{ + // Cleanup. + + if (fPriv->fPCRE) + pcre_free(fPriv->fPCRE); + if (fPriv->fPCREExtra) + pcre_free(fPriv->fPCREExtra); + delete fPriv; +} + +//______________________________________________________________________________ +TPRegexp &TPRegexp::operator=(const TPRegexp &p) +{ + // Assignement operator. + + if (this != &p) { + fPattern = p.fPattern; + if (fPriv->fPCRE) + pcre_free(fPriv->fPCRE); + fPriv->fPCRE = 0; + if (fPriv->fPCREExtra) + pcre_free(fPriv->fPCREExtra); + fPriv->fPCREExtra = 0; + fPCREOpts = p.fPCREOpts; + } + return *this; +} + +//______________________________________________________________________________ +UInt_t TPRegexp::ParseMods(const TString &modStr) const +{ + // Translate Perl modifier flags into pcre flags. + + UInt_t opts = 0; + + if (modStr.Length() <= 0) + return fPCREOpts; + + //translate perl flags into pcre flags + const char *m = modStr; + while (*m) { + switch (*m) { + case 'g': + opts |= kPCRE_GLOBAL; + break; + case 'i': + opts |= PCRE_CASELESS; + break; + case 'm': + opts |= PCRE_MULTILINE; + break; + case 'o': + opts |= kPCRE_OPTIMIZE; + break; + case 's': + opts |= PCRE_DOTALL; + break; + case 'x': + opts |= PCRE_EXTENDED; + break; + case 'd': // special flag to enable debug printing (not Perl compat.) + opts |= kPCRE_DEBUG_MSGS; + break; + default: + Error("ParseMods", "illegal pattern modifier: %c", *m); + opts = 0; + } + ++m; + } + return opts; +} + +//______________________________________________________________________________ +void TPRegexp::Compile() +{ + // Compile the fPattern. + + if (fPriv->fPCRE) + pcre_free(fPriv->fPCRE); + + if (fPCREOpts & kPCRE_DEBUG_MSGS) + Info("Compile", "PREGEX compiling %s", fPattern.Data()); + + const char *errstr; + Int_t patIndex; + fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK, + &errstr, &patIndex, 0); + + if (!fPriv->fPCRE) { + Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s", + fPattern.Data(), patIndex, errstr); + } + + if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE)) + Optimize(); +} + +//______________________________________________________________________________ +void TPRegexp::Optimize() +{ + // Send the pattern through the optimizer. + + if (fPriv->fPCREExtra) + pcre_free(fPriv->fPCREExtra); + + if (fPCREOpts & kPCRE_DEBUG_MSGS) + Info("Optimize", "PREGEX studying %s", fPattern.Data()); + + const char *errstr; + fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, fPCREOpts & kPCRE_INTMASK, &errstr); + + if (!fPriv->fPCREExtra && errstr) { + Error("Optimize", "Optimization of TPRegexp(%s) failed: %s", + fPattern.Data(), errstr); + } +} + +//______________________________________________________________________________ +Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final, + const TString &replacePattern, + Int_t *offVec, Int_t nrMatch) const +{ + // Return the number of substitutions. + + Int_t nrSubs = 0; + const char *p = replacePattern; + + Int_t state = 0; + Int_t subnum = 0; + while (state != -1) { + switch (state) { + case 0: + if (!*p) { + state = -1; + break; + } + if (*p == '$') { + state = 1; + subnum = 0; + if (p[1] == '&') { + p++; + if (isdigit(p[1])) + p++; + } else if (!isdigit(p[1])) { + Error("ReplaceSubs", "badly formed replacement pattern: %s", + replacePattern.Data()); + } + } else + final += *p; + break; + case 1: + if (isdigit(*p)) { + subnum *= 10; + subnum += (*p)-'0'; + } else { + if (fPCREOpts & kPCRE_DEBUG_MSGS) + Info("ReplaceSubs", "PREGEX appending substr #%d", subnum); + if (subnum < 0 || subnum > nrMatch-1) { + Error("ReplaceSubs","bad string number :%d",subnum); + } + const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]); + final += subStr; + nrSubs++; + + state = 0; + continue; // send char to start state + } + } + p++; + } + return nrSubs; +} + +//______________________________________________________________________________ +Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start, + Int_t nMaxMatch, TArrayI *pos) +{ + // The number of matches is returned, this equals the full match + + // sub-pattern matches. + // nMaxmatch is the maximum allowed number of matches. + // pos contains the string indices of the matches. Its usage is + // shown in the routine MatchS. + + UInt_t opts = ParseMods(mods); + + if (!fPriv->fPCRE || opts != fPCREOpts) { + fPCREOpts = opts; + Compile(); + } + + Int_t *offVec = new Int_t[nMaxMatch]; + Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(), + s.Length(), start, fPCREOpts & kPCRE_INTMASK, + offVec, nMaxMatch); + + if (nrMatch == PCRE_ERROR_NOMATCH) + nrMatch = 0; + else if (nrMatch <= 0) { + Error("Match","pcre_exec error = %d", nrMatch); + delete [] offVec; + return 0; + } + + if (pos) + pos->Adopt(2*nrMatch, offVec); + else + delete [] offVec; + + return nrMatch; +} + +//______________________________________________________________________________ +TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods, + Int_t start, Int_t nMaxMatch) +{ + // Returns a TObjArray of matched substrings as TObjString's. + // The TObjArray is owner of the objects. The first entry is the full + // matched pattern, followed by the subpatterns. + // If a pattern was not matched, it will return an empty substring: + // + // TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc"); + // for (Int_t i = 0; i < subStrL->GetLast()+1; i++) { + // const TString subStr = ((TObjString *)subStrL->At(i))->GetString(); + // cout << "\"" << subStr << "\" "; + // } + // cout << subStr << endl; + // + // produces: "abc" "a" "" "bc" + + TArrayI pos; + Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos); + + TObjArray *subStrL = new TObjArray(); + subStrL->SetOwner(); + + for (Int_t i = 0; i < nrMatch; i++) { + Int_t start = pos[2*i]; + Int_t stop = pos[2*i+1]; + if (start >= 0 && stop >= 0) { + const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]); + subStrL->Add(new TObjString(subStr)); + } else + subStrL->Add(new TObjString()); + } + + return subStrL; +} + +//______________________________________________________________________________ +Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern, + const TString &mods, Int_t start, Int_t nMaxMatch) +{ + // Substitute replaces the string s by a new string in which matching + // patterns are replaced by the replacePattern string. The number of + // substitutions are returned. + // + // TString s("aap noot mies"); + // const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1"); + // cout << nrSub << " \"" << s << "\"" <<endl; + // + // produces: 2 "mies noot aap" + + UInt_t opts = ParseMods(mods); + Int_t nrSubs = 0; + TString final; + + if (!fPriv->fPCRE || opts != fPCREOpts) { + fPCREOpts = opts; + Compile(); + } + + Int_t *offVec = new Int_t[nMaxMatch]; + + Int_t offset = start; + Int_t last = 0; + + while (kTRUE) { + + // find next matching subs + Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(), + s.Length(), offset, fPCREOpts & kPCRE_INTMASK, + offVec, nMaxMatch); + + if (nrMatch == PCRE_ERROR_NOMATCH) { + nrMatch = 0; + break; + } else if (nrMatch <= 0) + Error("Substitute", "pcre_exec error = %d", nrMatch); + + // append anything previously unmatched, but not substituted + if (last <= offVec[0]) { + final += s(last,offVec[0]-last); + last = offVec[1]; + } + + // replace stuff in s + nrSubs += ReplaceSubs(s, final, replacePattern, offVec, nrMatch); + + // if global gotta check match at every pos + if (!(fPCREOpts & kPCRE_GLOBAL)) + break; + + if (offVec[0] != offVec[1]) + offset = offVec[1]; + else { + // matched empty string + if (offVec[1] == s.Length()) + break; + offset = offVec[1]+1; + } + } + + delete [] offVec; + + final += s(last,s.Length()-last); + s = final; + + return nrSubs; +} + + +////////////////////////////////////////////////////////////////////////// +// // +// TString member functions, put here so the linker will include // +// them only if regular expressions are used. // +// // +////////////////////////////////////////////////////////////////////////// + +//______________________________________________________________________________ +Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const +{ + // Find the first occurance of the regexp in string and return the position. + // Start is the offset at which the search should start. + + TArrayI pos; + Int_t nrMatch = r.Match(*this,"",start,30,&pos); + if (nrMatch > 0) + return pos[0]; + else + return -1; +} + +//______________________________________________________________________________ +Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const +{ + // Find the first occurance of the regexp in string and return the position. + // Extent is length of the matched string and start is the offset at which + // the matching should start. + + TArrayI pos; + const Int_t nrMatch = r.Match(*this,"",start,30,&pos); + if (nrMatch > 0) { + *extent = pos[1]-pos[0]; + return pos[0]; + } else { + *extent = 0; + return -1; + } +} + +//______________________________________________________________________________ +TSubString TString::operator()(TPRegexp& r, Ssiz_t start) +{ + // Return the substring found by applying the regexp starting at start. + + Ssiz_t len; + Ssiz_t begin = Index(r, &len, start); + return TSubString(*this, begin, len); +} + +//______________________________________________________________________________ +TSubString TString::operator()(TPRegexp& r) +{ + // Return the substring found by applying the regexp. + + return (*this)(r,0); +} + +//______________________________________________________________________________ +TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const +{ + // Return the substring found by applying the regexp starting at start. + + Ssiz_t len; + Ssiz_t begin = Index(r, &len, start); + return TSubString(*this, begin, len); +} + +//______________________________________________________________________________ +TSubString TString::operator()(TPRegexp& r) const +{ + // Return the substring found by applying the regexp. + + return (*this)(r, 0); +} diff --git a/base/src/TString.cxx b/base/src/TString.cxx index 85093f4790a..91150a4c6d6 100644 --- a/base/src/TString.cxx +++ b/base/src/TString.cxx @@ -1,4 +1,4 @@ -// @(#)root/base:$Name: $:$Id: TString.cxx,v 1.44 2005/11/16 20:04:11 pcanal Exp $ +// @(#)root/base:$Name: $:$Id: TString.cxx,v 1.45 2005/11/21 11:17:18 rdm Exp $ // Author: Fons Rademakers 04/08/95 /************************************************************************* @@ -1489,18 +1489,62 @@ Bool_t TString::IsAlnum() const //______________________________________________________________________________ Bool_t TString::IsDigit() const { - // Returns true if all characters in string are digits (0-9). - // Returns false in case string length is 0. + // Returns true if all characters in string are digits (0-9) or whitespaces, + // i.e. "123456" and "123 456" are both valid integer strings. + // Returns false in case string length is 0 or string contains other + // characters. const char *cp = Data(); Ssiz_t len = Length(); if (len == 0) return kFALSE; - for (Ssiz_t i = 0; i < len; ++i) - if (!isdigit(cp[i])) - return kFALSE; + for (Ssiz_t i = 0; i < len; ++i){ + if (cp[i]!=' ' && !isdigit(cp[i])) return kFALSE; + } return kTRUE; } +//______________________________________________________________________________ +Bool_t TString::IsFloat() const +{ + // Returns kTRUE if string contains a floating point or integer number. + // Examples of valid formats are: + // 64320 + // 64 320 + // 6 4 3 2 0 + // 6.4320 6,4320 + // 6.43e20 6.43E20 6,43e20 + // 6.43e-20 6.43E-20 6,43e-20 + + //we first check if we have an integer, in this case, IsDigit() will be true straight away + if (IsDigit()) return kTRUE; + + TString tmp = *this; + //now we look for occurrences of '.', ',', e', 'E', '+', '-' and replace each + //with '0'. if it is a floating point, IsDigit() will then return kTRUE + Int_t i_dot, i_e, i_plus, i_minus, i_comma; + i_dot = i_e = i_plus = i_minus = i_comma = -1; + + i_dot = tmp.First('.'); + if (i_dot > -1) tmp.Replace(i_dot, 1, "0", 1); + i_comma = tmp.First(','); + if (i_comma > -1) tmp.Replace(i_comma, 1, "0", 1); + i_e = tmp.First('e'); + if (i_e > -1) + tmp.Replace(i_e, 1, "0", 1); + else { + //try for a capital "E" + i_e = tmp.First('E'); + if (i_e > -1) tmp.Replace(i_e, 1, "0", 1); + } + i_plus = tmp.First('+'); + if (i_plus > -1) tmp.Replace(i_plus, 1, "0", 1); + i_minus = tmp.First('-'); + if (i_minus > -1) tmp.Replace(i_minus, 1, "0", 1); + + //test if it is now uniquely composed of numbers + return tmp.IsDigit(); +} + //______________________________________________________________________________ Bool_t TString::IsHex() const { @@ -1517,6 +1561,68 @@ Bool_t TString::IsHex() const return kTRUE; } +//______________________________________________________________________________ +Int_t TString::Atoi() const +{ + // Return integer value of string. + // Valid strings include only digits and whitespace (see IsDigit()), + // i.e. "123456", "123 456" and "1 2 3 4 56" are all valid + // integer strings whose Atoi() value is 123456. + + //any whitespace ? + Int_t end = Index(" "); + //if no whitespaces in string, just use atoi() + if (end == -1) return atoi(Data()); + //make temporary string, removing whitespace + Int_t start = 0; + TString tmp; + //loop over all whitespace + while (end > -1) { + tmp += (*this)(start, end-start); + start = end+1; end = Index(" ", start); + } + //finally add part from last whitespace to end of string + end = Length(); + tmp += (*this)(start, end-start); + return atoi(tmp.Data()); +} + +//______________________________________________________________________________ +Double_t TString::Atof() const +{ + // Return floating-point value contained in string. + // Examples of valid strings are: + // 64320 + // 64 320 + // 6 4 3 2 0 + // 6.4320 6,4320 + // 6.43e20 6.43E20 6,43e20 + // 6.43e-20 6.43E-20 6,43e-20 + + //look for a comma and some whitespace + Int_t comma = Index(","); + Int_t end = Index(" "); + //if no commas & no whitespace in string, just use atof() + if (comma == -1 && end == -1) return atof(Data()); + TString tmp = *this; + if (comma > -1) { + //replace comma with decimal point + tmp.Replace(comma, 1, "."); + } + //no whitespace ? + if (end == -1) return atof(tmp.Data()); + //remove whitespace + Int_t start = 0; + TString tmp2; + while (end > -1) { + tmp2 += tmp(start, end-start); + start = end+1; end = tmp.Index(" ", start); + } + end = tmp.Length(); + tmp2 += tmp(start, end-start); + return atof(tmp2.Data()); +} + //______________________________________________________________________________ Bool_t TString::EndsWith(const char *s, ECaseCompare cmp) const { diff --git a/config/Makefile.in b/config/Makefile.in index 5e6b91a4e89..ca7a5b51ecd 100644 --- a/config/Makefile.in +++ b/config/Makefile.in @@ -46,6 +46,8 @@ OSTHREADLIB := @threadlib@ BUILTINFREETYPE:= @builtinfreetype@ +BUILTINPCRE := @builtinpcre@ + BUILDGL := @buildgl@ OPENGLLIBDIR := @opengllibdir@ OPENGLULIB := @openglulib@ @@ -211,6 +213,8 @@ CLARENSLIBS := @clarenslibs@ BUILDPEAC := @buildpeac@ +GCCXML := @gccxml@ + INSTALL := cp -dpR INSTALLDATA := cp -dpR INSTALLDIR := mkdir -p diff --git a/configure b/configure index 9d310651ddb..467c3236509 100755 --- a/configure +++ b/configure @@ -27,6 +27,7 @@ options=" \ enable_asimage \ enable_builtin_afterimage \ enable_builtin_freetype \ + enable_builtin_pcre \ enable_cern \ enable_chirp \ enable_cintex \ @@ -719,6 +720,7 @@ enable/disable options, prefix with either --enable- or --disable- asimage Image processing support, requires libAfterImage builtin-afterimage Built included libAfterImage, or use system libAfterImage builtin-freetype Built included libfreetype, or use system libfreetype + builtin-pcre Built included libpcre, or use system libpcre cern CERNLIB usage, build h2root and g2root chirp Chirp support (Condor remote I/O), requires libchirp_client cintex Build the libCintex Reflex interface library @@ -769,6 +771,7 @@ with options, prefix with --with-, enables corresponding support dcap-incdir dCache support, location of dcap.h dcap-libdir dCache support, location of libdcap globus Globus support, path to what should be GLOBUS_LOCATION + gccxml Gccxml support, directory of the gccxml installation krb5 Kerberos5 support, location of Kerberos distribution krb5-incdir Kerberos5 support, location of krb5.h krb5-libdir Kerberos5 support, location of libkrb5 @@ -920,6 +923,7 @@ if test $# -gt 0 ; then --with-dcap-incdir=*) dcapincdir=$optarg ; enable_dcache="yes" ;; --with-dcap-libdir=*) dcaplibdir=$optarg ; enable_dcache="yes" ;; --with-globus=*) globusdir=$optarg ; enable_globus="yes" ;; + --with-gccxml=*) gccxml=$optarg ;; --with-krb5=*) krb5dir=$optarg ; enable_krb5="yes" ;; --with-krb5-incdir=*) krb5incdir=$optarg ; enable_krb5="yes" ;; --with-krb5-libdir=*) krb5libdir=$optarg ; enable_krb5="yes" ;; @@ -1171,9 +1175,9 @@ fi ###################################################################### # -### echo %%% libfreetype (builtin or system) - Mandatory lib on Unix +### echo %%% libfreetype (builtin or system) - Mandatory lib # -# Mandatory test, must succeed +# Mandatory test, must succeed (see http://www.freetype.org/) # if test "x$enable_builtin_freetype" = "x" || \ test "x$enable_builtin_freetype" = "xno" ; then @@ -1203,6 +1207,39 @@ fi message "Checking whether to build included libfreetype6" echo "$enable_builtin_freetype" +###################################################################### +# +### echo %%% libpcre (builtin or system) - Mandatory lib +# +# Mandatory test, must succeed (see http://www.pcre.org/) +# +if test "x$enable_builtin_pcre" = "x" || \ + test "x$enable_builtin_pcre" = "xno" ; then + + checking_msg "pcre-config" + if `which pcre-config > /dev/null 2>&1` ; then + which pcre-config + + message "Checking for libpcre version >= 3.9" + pcre_version=`pcre-config --version | \ + tr '.' ' ' | \ + awk 'BEGIN { FS = " "; } \ + { printf "%d", ($''1 * 1000 + $''2) * 1000 + $''3;}'` + # First good version is 3.9 + if test $pcre_version -lt 3009000 ; then + echo "no (`pcre-config --version`)" + enable_builtin_pcre="yes" + else + echo "ok" + enable_builtin_pcre="no" + fi + else + enable_builtin_pcre="no" + echo "not found" + fi +fi +message "Checking whether to build included libpcre" +echo "$enable_builtin_pcre" ###################################################################### # @@ -3258,6 +3295,7 @@ sed -e "s|@aclocaldir@|$aclocaldir|" \ -e "s|@buildasimage@|$enable_asimage|" \ -e "s|@builtinafterimage@|$enable_builtin_afterimage|" \ -e "s|@builtinfreetype@|$enable_builtin_freetype|" \ + -e "s|@builtinpcre@|$enable_builtin_pcre|" \ -e "s|@buildpeac@|$buildpeac|" \ -e "s|@buildxrd@|$buildxrd|" \ -e "s|@cernlibdir@|$cernlibdir|" \ @@ -3294,6 +3332,7 @@ sed -e "s|@aclocaldir@|$aclocaldir|" \ -e "s|@globuslibdir@|$globuslibdir|" \ -e "s|@glbextracflags@|$glbextracflags|" \ -e "s|@glbpatchcflags@|$glbpatchcflags|" \ + -e "s|@gccxml@|$gccxml|" \ -e "s|@iconpath@|$iconpath|" \ -e "s|@incdir@|$incdir|" \ -e "s|@krb5incdir@|$krb5incdir|" \ @@ -3470,6 +3509,7 @@ sed -e "s|@architecture@|$arch|" \ -e "s|@libdir@|$libdir2|" \ -e "s|@incdir@|$incdir2|" \ -e "s|@features@|$features|" \ + -e "s|@configargs@|$configargs|" \ < root-config.tmp > $RCONFOUT rm -f root-config.tmp chmod 755 $RCONFOUT diff --git a/test/tstring.cxx b/test/tstring.cxx index e8b5d1a0511..dce92c27dcc 100644 --- a/test/tstring.cxx +++ b/test/tstring.cxx @@ -1,4 +1,4 @@ -// @(#)root/test:$Name: $:$Id: tstring.cxx,v 1.3 2002/01/23 17:52:51 rdm Exp $ +// @(#)root/test:$Name: $:$Id: tstring.cxx,v 1.4 2002/01/24 11:39:31 rdm Exp $ // Author: Fons Rademakers 19/08/96 #include <stdlib.h> @@ -6,6 +6,7 @@ #include "Riostream.h" #include "TString.h" #include "TRegexp.h" +#include "TPRegexp.h" void Ok(int i, int b) @@ -129,6 +130,21 @@ int main() Ok(22, !s10.IsAscii()); Ok(23, s.IsAscii()); + // some excercises with the Perl Compatible Regular Expressions + TString s11("Food is on the foo table."); + TPRegexp("\\b(foo)\\s+(\\w+)").Substitute(s11, "round $2"); + Ok(24, s11=="Food is on the round table."); + + TString s12("pepernotenkoek"); + TPRegexp("peper(.*)koek").Substitute(s12, "wal$1boom"); + Ok(25, s12=="walnotenboom"); + + TString s13("hihi haha"); + TPRegexp("^([^ ]*) *([^ ]*)").Substitute(s13, "$2 $1"); + Ok(26, s13=="haha hihi"); + + Ok(27, TPRegexp("^(\\w+) *(\\w+)").Match(s13) == 3); + // test Resize and Strip s9.Prepend(" "); cout << s9 << endl; -- GitLab