From 0205b69ee1f12f5cbd4a6deda3b2b78228262827 Mon Sep 17 00:00:00 2001
From: Fons Rademakers <Fons.Rademakers@cern.ch>
Date: Fri, 2 Dec 2005 16:17:48 +0000
Subject: [PATCH] Several new Core features: - new TPRegexp class by Eddy
 Offermann using the PCRE library for powerful   regexp matching. TPRegexp is
 fully integrated in TString and can be   used like TRegExp. For more see the
 TPRegexp clas description. - Extended TString::Atoi(), Atof(), IsDigit() and
 new IsFloat() by   John Frankland. For more see the method description.

git-svn-id: http://root.cern.ch/svn/root/trunk@13456 27541ba8-7e3a-0410-8455-c3a389f83636
---
 Makefile              |  12 +-
 base/Module.mk        |  11 +-
 base/inc/LinkDef2.h   |   3 +-
 base/inc/TPRegexp.h   |  85 ++++++++
 base/inc/TString.h    |  20 +-
 base/src/TPRegexp.cxx | 470 ++++++++++++++++++++++++++++++++++++++++++
 base/src/TString.cxx  | 118 ++++++++++-
 config/Makefile.in    |   4 +
 configure             |  44 +++-
 test/tstring.cxx      |  18 +-
 10 files changed, 758 insertions(+), 27 deletions(-)
 create mode 100644 base/inc/TPRegexp.h
 create mode 100644 base/src/TPRegexp.cxx

diff --git a/Makefile b/Makefile
index 73c6e9e6451..55e4beea83f 100644
--- a/Makefile
+++ b/Makefile
@@ -42,9 +42,9 @@ endif
 
 ##### Modules to build #####
 
-MODULES       = build cint metautils utils base cont meta net auth zip clib \
-                matrix newdelete hist tree freetype graf g3d gpad gui minuit \
-                histpainter treeplayer treeviewer physics postscript \
+MODULES       = build cint metautils pcre utils base cont meta net auth zip \
+                clib matrix newdelete hist tree freetype graf g3d gpad gui \
+                minuit histpainter treeplayer treeviewer physics postscript \
                 rint html eg geom geompainter vmc fumili mlp gedold ged quadp \
                 guibuilder xml foam splot smatrix
 
@@ -441,15 +441,15 @@ G__%.d: G__%.cxx $(RMKDEP)
 %.d: %.cxx $(RMKDEP)
 	$(MAKEDEP) $@ "$(CXXFLAGS)" $< > $@
 
-$(CORELIB): $(COREO) $(COREDO) $(CINTLIB) $(CORELIBDEP)
+$(CORELIB): $(COREO) $(COREDO) $(CINTLIB) $(PCREDEP) $(CORELIBDEP)
 ifneq ($(ARCH),alphacxx6)
 	@$(MAKELIB) $(PLATFORM) $(LD) "$(LDFLAGS)" \
 	   "$(SOFLAGS)" libCore.$(SOEXT) $@ "$(COREO) $(COREDO)" \
-	   "$(CORELIBEXTRA) $(CRYPTLIBS)"
+	   "$(CORELIBEXTRA) $(PCRELDFLAGS) $(PCRELIB) $(CRYPTLIBS)"
 else
 	@$(MAKELIB) $(PLATFORM) $(LD) "$(CORELDFLAGS)" \
 	   "$(SOFLAGS)" libCore.$(SOEXT) $@ "$(COREO) $(COREDO)" \
-	   "$(CORELIBEXTRA) $(CRYPTLIBS)"
+	   "$(CORELIBEXTRA) $(PCRELDFLAGS) $(PCRELIB) $(CRYPTLIBS)"
 endif
 
 map::   $(RLIBMAP)
diff --git a/base/Module.mk b/base/Module.mk
index e2aab2a616e..d323925b8d6 100644
--- a/base/Module.mk
+++ b/base/Module.mk
@@ -79,10 +79,10 @@ $(BASEDS4):
 		@echo "Generating dictionary $@..."
 		$(ROOTCINTTMP) -f $@ -c $(BASEH4) $(BASEL4)
 
-$(BASEDO1):     $(BASEDS1)
-		$(CXX) $(NOOPT) $(CXXFLAGS) -I. -o $@ -c $<
-$(BASEDO2):     $(BASEDS2)
-		$(CXX) $(NOOPT) $(CXXFLAGS) -I. -o $@ -c $<
+$(BASEDO1):     $(BASEDS1) $(PCREDEP)
+		$(CXX) $(NOOPT) $(PCREINC) $(CXXFLAGS) -I. -o $@ -c $<
+$(BASEDO2):     $(BASEDS2) $(PCREDEP)
+		$(CXX) $(NOOPT) $(PCREINC) $(CXXFLAGS) -I. -o $@ -c $<
 ifeq ($(ARCH),linuxicc)
 $(BASEDO3):     $(BASEDS3)
 		$(CXX) $(NOOPT) $(CXXFLAGS) -wd191 -I. -o $@ -c $<
@@ -108,6 +108,9 @@ distclean-base: clean-base
 distclean::     distclean-base
 
 ##### extra rules ######
+base/src/TPRegexp.o: base/src/TPRegexp.cxx $(PCREDEP)
+	$(CXX) $(OPT) $(PCREINC) $(CXXFLAGS) -o $@ -c $<
+
 ifeq ($(ARCH),alphacxx6)
 $(BASEDIRS)/TRandom.o: $(BASEDIRS)/TRandom.cxx
 	$(CXX) $(NOOPT) $(CXXFLAGS) -o $@ -c $<
diff --git a/base/inc/LinkDef2.h b/base/inc/LinkDef2.h
index 6a1b4a5d727..dbbc99c33a6 100644
--- a/base/inc/LinkDef2.h
+++ b/base/inc/LinkDef2.h
@@ -1,4 +1,4 @@
-/* @(#)root/base:$Name:  $:$Id: LinkDef2.h,v 1.29 2004/11/03 11:05:12 rdm Exp $ */
+/* @(#)root/base:$Name:  $:$Id: LinkDef2.h,v 1.30 2005/08/16 12:57:57 brun Exp $ */
 
 /*************************************************************************
  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers.               *
@@ -102,6 +102,7 @@
 #pragma link C++ class TROOT;
 #pragma link C++ class TRealData+;
 #pragma link C++ class TRegexp;
+#pragma link C++ class TPRegexp;
 #pragma link C++ class TRefCnt;
 #pragma link C++ class TSignalHandler;
 #pragma link C++ class TStopwatch+;
diff --git a/base/inc/TPRegexp.h b/base/inc/TPRegexp.h
new file mode 100644
index 00000000000..7d5a08a5c48
--- /dev/null
+++ b/base/inc/TPRegexp.h
@@ -0,0 +1,85 @@
+// @(#)root/base:$Name:  $:$Id: TRegexp.h,v 1.1.1.1 2000/05/16 17:00:39 rdm Exp $
+// Author: Eddy Offermann   24/06/05
+
+/*************************************************************************
+ * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers.               *
+ * All rights reserved.                                                  *
+ *                                                                       *
+ * For the licensing terms see $ROOTSYS/LICENSE.                         *
+ * For the list of contributors see $ROOTSYS/README/CREDITS.             *
+ *************************************************************************/
+
+#ifndef ROOT_TPRegexp
+#define ROOT_TPRegexp
+
+//////////////////////////////////////////////////////////////////////////
+//                                                                      //
+// TPRegexp                                                             //
+//                                                                      //
+// C++ Wrapper for the "Perl Compatible Regular Expressions" library    //
+//  The PCRE lib can be found at:                                       //
+//              http://www.pcre.org/                                    //
+//                                                                      //
+// Extensive documentation about Regular expressions in Perl can be     //
+// found at :                                                           //
+//              http://perldoc.perl.org/perlre.html                     //
+//                                                                      //
+//////////////////////////////////////////////////////////////////////////
+
+#ifndef ROOT_Rtypes
+#include "Rtypes.h"
+#endif
+#ifndef ROOT_TString
+#include "TString.h"
+#endif
+#ifndef ROOT_TArrayI
+#include "TArrayI.h"
+#endif
+
+struct PCREPriv_t;
+
+
+class TPRegexp {
+
+private:
+   enum {
+      kPCRE_GLOBAL     = 0x80000000,
+      kPCRE_OPTIMIZE   = 0x40000000,
+      kPCRE_DEBUG_MSGS = 0x20000000,
+      kPCRE_INTMASK    = 0x0FFF,
+   };
+
+   TString     fPattern;
+   PCREPriv_t *fPriv;
+   UInt_t      fPCREOpts;
+
+   void     Compile();
+   void     Optimize();
+   UInt_t   ParseMods(const TString &mods) const;
+   Int_t    ReplaceSubs(const TString &s, TString &final,
+                        const TString &replacePattern,
+                        Int_t *ovec, Int_t nmatch) const;
+
+public:
+   TPRegexp();
+   TPRegexp(const TString &pat);
+   TPRegexp(const TPRegexp &p);
+   virtual ~TPRegexp();
+
+   Int_t      Match(const TString &s, const TString &mods="",
+                    Int_t offset=0, Int_t nMatchMax=30, TArrayI *pos=0);
+   TObjArray *MatchS(const TString &s, const TString &mods="",
+                     Int_t offset=0, Int_t nMaxMatch=30);
+   Bool_t     MatchB(const TString &s, const TString &mods="",
+                     Int_t offset=0, Int_t nMaxMatch=30) {
+                           return (Match(s,mods,offset,nMaxMatch) > 0); }
+   Int_t      Substitute(TString &s, const TString &replace,
+                         const TString &mods="", Int_t offset=0,
+                         Int_t nMatchMax=30);
+
+   TPRegexp &operator=(const TPRegexp &p);
+
+   ClassDef(TPRegexp,0)  // Perl Compatible Regular Expression Class
+};
+
+#endif
diff --git a/base/inc/TString.h b/base/inc/TString.h
index 39394d5b735..c70e1a15cba 100644
--- a/base/inc/TString.h
+++ b/base/inc/TString.h
@@ -1,4 +1,4 @@
-// @(#)root/base:$Name:  $:$Id: TString.h,v 1.40 2005/08/15 21:21:46 pcanal Exp $
+// @(#)root/base:$Name:  $:$Id: TString.h,v 1.41 2005/11/21 11:17:18 rdm Exp $
 // Author: Fons Rademakers   04/08/95
 
 /*************************************************************************
@@ -47,6 +47,7 @@ namespace std { using ::string; }
 #endif
 
 class TRegexp;
+class TPRegexp;
 class TString;
 class TSubString;
 class TObjArray;
@@ -272,6 +273,8 @@ public:
    TSubString    operator()(Ssiz_t start, Ssiz_t len);   // Sub-string operator
    TSubString    operator()(const TRegexp &re);          // Match the RE
    TSubString    operator()(const TRegexp &re, Ssiz_t start);
+   TSubString    operator()(TPRegexp &re);               // Match the Perl compatible Regular Expression
+   TSubString    operator()(TPRegexp &re, Ssiz_t start);
    TSubString    SubString(const char *pat, Ssiz_t start = 0,
                            ECaseCompare cmp = kExact);
    char          operator[](Ssiz_t i) const;
@@ -279,6 +282,8 @@ public:
    TSubString    operator()(Ssiz_t start, Ssiz_t len) const;
    TSubString    operator()(const TRegexp &re) const;   // Match the RE
    TSubString    operator()(const TRegexp &re, Ssiz_t start) const;
+   TSubString    operator()(TPRegexp &re) const;        // Match the Perl compatible Regular Expression
+   TSubString    operator()(TPRegexp &re, Ssiz_t start) const;
    TSubString    SubString(const char *pat, Ssiz_t start = 0,
                            ECaseCompare cmp = kExact) const;
 
@@ -300,6 +305,7 @@ public:
    Bool_t       Contains(const char *pat,    ECaseCompare cmp = kExact) const;
    Bool_t       Contains(const TString &pat, ECaseCompare cmp = kExact) const;
    Bool_t       Contains(const TRegexp &pat) const;
+   Bool_t       Contains(TPRegexp &pat) const;
    Int_t        CountChar(Int_t c) const;
    TString      Copy() const;
    const char  *Data() const                 { return fData; }
@@ -318,6 +324,8 @@ public:
                       ECaseCompare cmp) const;
    Ssiz_t       Index(const TRegexp &pat, Ssiz_t i = 0) const;
    Ssiz_t       Index(const TRegexp &pat, Ssiz_t *ext, Ssiz_t i = 0) const;
+   Ssiz_t       Index(TPRegexp &pat, Ssiz_t i = 0) const;
+   Ssiz_t       Index(TPRegexp &pat, Ssiz_t *ext, Ssiz_t i = 0) const;
    TString     &Insert(Ssiz_t pos, const char *s);
    TString     &Insert(Ssiz_t pos, const char *s, Ssiz_t extent);
    TString     &Insert(Ssiz_t pos, const TString &s);
@@ -326,6 +334,7 @@ public:
    Bool_t       IsAlpha() const;
    Bool_t       IsAlnum() const;
    Bool_t       IsDigit() const;
+   Bool_t       IsFloat() const;
    Bool_t       IsHex() const;
    Bool_t       IsNull() const              { return Pref()->fNchars == 0; }
    Ssiz_t       Last(char c) const          { return Pref()->Last(c); }
@@ -415,12 +424,6 @@ extern int strncasecmp(const char *str1, const char *str2, Ssiz_t n);
 inline void TStringRef::UnLink()
 { if (RemoveReference() == 0) delete [] (char*)this; }
 
-inline Int_t TString::Atoi() const
-{ return atoi(fData); }
-
-inline Double_t TString::Atof() const
-{ return atof(fData); }
-
 inline void TString::Cow()
 { if (Pref()->References() > 1) Clone(); }
 
@@ -493,6 +496,9 @@ inline Bool_t TString::Contains(const char *s, ECaseCompare cmp) const
 inline Bool_t TString::Contains(const TRegexp &pat) const
 { return Index(pat, (Ssiz_t)0) != kNPOS; }
 
+inline Bool_t TString::Contains(TPRegexp &pat) const
+{ return Index(pat, (Ssiz_t)0) != kNPOS; }
+
 inline Ssiz_t TString::Index(const char *s, Ssiz_t i, ECaseCompare cmp) const
 { return Index(s, s ? strlen(s) : 0, i, cmp); }
 
diff --git a/base/src/TPRegexp.cxx b/base/src/TPRegexp.cxx
new file mode 100644
index 00000000000..1456973ac3b
--- /dev/null
+++ b/base/src/TPRegexp.cxx
@@ -0,0 +1,470 @@
+// @(#)root/base:$Name:  $:$Id: TRegexp.h,v 1.1.1.1 2000/05/16 17:00:39 rdm Exp $
+// Author: Eddy Offermann   24/06/05
+
+/*************************************************************************
+ * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers.               *
+ * All rights reserved.                                                  *
+ *                                                                       *
+ * For the licensing terms see $ROOTSYS/LICENSE.                         *
+ * For the list of contributors see $ROOTSYS/README/CREDITS.             *
+ *************************************************************************/
+
+//////////////////////////////////////////////////////////////////////////
+//                                                                      //
+// TPRegexp                                                             //
+//                                                                      //
+// C++ Wrapper for the "Perl Compatible Regular Expressions" library    //
+//  The PCRE lib can be found at:                                       //
+//              http://www.pcre.org/                                    //
+//                                                                      //
+// Extensive documentation about Regular expressions in Perl can be     //
+// found at :                                                           //
+//              http://perldoc.perl.org/perlre.html                     //
+//                                                                      //
+//////////////////////////////////////////////////////////////////////////
+
+#include "Riostream.h"
+#include "TPRegexp.h"
+#include "TObjArray.h"
+#include "TObjString.h"
+#include "TError.h"
+
+#include <pcre.h>
+
+
+struct PCREPriv_t {
+   pcre       *fPCRE;
+   pcre_extra *fPCREExtra;
+
+   PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
+};
+
+
+ClassImp(TPRegexp)
+
+//______________________________________________________________________________
+TPRegexp::TPRegexp()
+{
+   // Default ctor.
+
+   fPriv     = new PCREPriv_t;
+   fPCREOpts = 0;
+}
+
+//______________________________________________________________________________
+TPRegexp::TPRegexp(const TString &pat)
+{
+   // Create and initialize with pat.
+
+   fPattern  = pat;
+   fPriv     = new PCREPriv_t;
+   fPCREOpts = 0;
+}
+
+//______________________________________________________________________________
+TPRegexp::TPRegexp(const TPRegexp &p)
+{
+   // Copy ctor.
+
+   fPattern  = p.fPattern;
+   fPriv     = new PCREPriv_t;
+   fPCREOpts = p.fPCREOpts;
+}
+
+//______________________________________________________________________________
+TPRegexp::~TPRegexp()
+{
+   // Cleanup.
+
+   if (fPriv->fPCRE)
+      pcre_free(fPriv->fPCRE);
+   if (fPriv->fPCREExtra)
+      pcre_free(fPriv->fPCREExtra);
+   delete fPriv;
+}
+
+//______________________________________________________________________________
+TPRegexp &TPRegexp::operator=(const TPRegexp &p)
+{
+   // Assignement operator.
+
+   if (this != &p) {
+      fPattern = p.fPattern;
+      if (fPriv->fPCRE)
+         pcre_free(fPriv->fPCRE);
+      fPriv->fPCRE = 0;
+      if (fPriv->fPCREExtra)
+         pcre_free(fPriv->fPCREExtra);
+      fPriv->fPCREExtra = 0;
+      fPCREOpts  = p.fPCREOpts;
+   }
+   return *this;
+}
+
+//______________________________________________________________________________
+UInt_t TPRegexp::ParseMods(const TString &modStr) const
+{
+   // Translate Perl modifier flags into pcre flags.
+
+   UInt_t opts = 0;
+
+   if (modStr.Length() <= 0)
+      return fPCREOpts;
+
+   //translate perl flags into pcre flags
+   const char *m = modStr;
+   while (*m) {
+      switch (*m) {
+         case 'g':
+            opts |= kPCRE_GLOBAL;
+            break;
+         case 'i':
+            opts |= PCRE_CASELESS;
+            break;
+         case 'm':
+            opts |= PCRE_MULTILINE;
+            break;
+         case 'o':
+            opts |= kPCRE_OPTIMIZE;
+            break;
+         case 's':
+            opts |= PCRE_DOTALL;
+            break;
+         case 'x':
+            opts |= PCRE_EXTENDED;
+            break;
+         case 'd': // special flag to enable debug printing (not Perl compat.)
+            opts |= kPCRE_DEBUG_MSGS;
+            break;
+         default:
+            Error("ParseMods", "illegal pattern modifier: %c", *m);
+	    opts = 0;
+      }
+      ++m;
+   }
+   return opts;
+}
+
+//______________________________________________________________________________
+void TPRegexp::Compile()
+{
+   // Compile the fPattern.
+
+   if (fPriv->fPCRE)
+      pcre_free(fPriv->fPCRE);
+
+   if (fPCREOpts & kPCRE_DEBUG_MSGS)
+      Info("Compile", "PREGEX compiling %s", fPattern.Data());
+
+   const char *errstr;
+   Int_t patIndex;
+   fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
+                               &errstr, &patIndex, 0);
+
+   if (!fPriv->fPCRE) {
+      Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
+            fPattern.Data(), patIndex, errstr);
+   }
+
+   if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
+      Optimize();
+}
+
+//______________________________________________________________________________
+void TPRegexp::Optimize()
+{
+   // Send the pattern through the optimizer.
+
+   if (fPriv->fPCREExtra)
+      pcre_free(fPriv->fPCREExtra);
+
+   if (fPCREOpts & kPCRE_DEBUG_MSGS)
+      Info("Optimize", "PREGEX studying %s", fPattern.Data());
+
+   const char *errstr;
+   fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, fPCREOpts & kPCRE_INTMASK, &errstr);
+
+   if (!fPriv->fPCREExtra && errstr) {
+      Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
+            fPattern.Data(), errstr);
+   }
+}
+
+//______________________________________________________________________________
+Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
+                            const TString &replacePattern,
+                            Int_t *offVec, Int_t nrMatch) const
+{
+   // Return the number of substitutions.
+
+   Int_t nrSubs = 0;
+   const char *p = replacePattern;
+
+   Int_t state = 0;
+   Int_t subnum = 0;
+   while (state != -1) {
+      switch (state) {
+         case 0:
+            if (!*p) {
+               state = -1;
+               break;
+            }
+            if (*p == '$') {
+               state = 1;
+               subnum = 0;
+               if (p[1] == '&') {
+                  p++;
+                  if (isdigit(p[1]))
+                     p++;
+               } else if (!isdigit(p[1])) {
+                  Error("ReplaceSubs", "badly formed replacement pattern: %s",
+                        replacePattern.Data());
+               }
+            } else
+               final += *p;
+            break;
+         case 1:
+            if (isdigit(*p)) {
+               subnum *= 10;
+               subnum += (*p)-'0';
+            } else {
+               if (fPCREOpts & kPCRE_DEBUG_MSGS)
+                  Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
+               if (subnum < 0 || subnum > nrMatch-1) {
+                  Error("ReplaceSubs","bad string number :%d",subnum);
+               }
+               const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
+               final += subStr;
+               nrSubs++;
+
+               state = 0;
+               continue;  // send char to start state
+            }
+      }
+      p++;
+   }
+   return nrSubs;
+}
+
+//______________________________________________________________________________
+Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
+                      Int_t nMaxMatch, TArrayI *pos)
+{
+   // The number of matches is returned, this equals the full match +
+   // sub-pattern matches.
+   // nMaxmatch is the maximum allowed number of matches.
+   // pos contains the string indices of the matches. Its usage is
+   // shown in the routine MatchS.
+
+   UInt_t opts = ParseMods(mods);
+
+   if (!fPriv->fPCRE || opts != fPCREOpts) {
+      fPCREOpts = opts;
+      Compile();
+   }
+
+   Int_t *offVec = new Int_t[nMaxMatch];
+   Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
+                             s.Length(), start, fPCREOpts & kPCRE_INTMASK,
+                             offVec, nMaxMatch);
+
+   if (nrMatch == PCRE_ERROR_NOMATCH)
+      nrMatch = 0;
+   else if (nrMatch <= 0) {
+      Error("Match","pcre_exec error = %d", nrMatch);
+      delete [] offVec;
+      return 0;
+   }
+
+   if (pos)
+      pos->Adopt(2*nrMatch, offVec);
+   else
+      delete [] offVec;
+
+   return nrMatch;
+}
+
+//______________________________________________________________________________
+TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
+                            Int_t start, Int_t nMaxMatch)
+{
+   // Returns a TObjArray of matched substrings as TObjString's.
+   // The TObjArray is owner of the objects. The first entry is the full
+   // matched pattern, followed by the subpatterns.
+   // If a pattern was not matched, it will return an empty substring:
+   //
+   // TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
+   // for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
+   //    const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
+   //    cout << "\"" << subStr << "\" ";
+   // }
+   // cout << subStr << endl;
+   //
+   // produces:  "abc" "a" "" "bc"
+
+   TArrayI pos;
+   Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
+
+   TObjArray *subStrL = new TObjArray();
+   subStrL->SetOwner();
+
+   for (Int_t i = 0; i < nrMatch; i++) {
+      Int_t start = pos[2*i];
+      Int_t stop  = pos[2*i+1];
+      if (start >= 0 && stop >= 0) {
+         const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
+         subStrL->Add(new TObjString(subStr));
+      } else
+         subStrL->Add(new TObjString());
+   }
+
+   return subStrL;
+}
+
+//______________________________________________________________________________
+Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
+                           const TString &mods, Int_t start, Int_t nMaxMatch)
+{
+   // Substitute replaces the string s by a new string in which matching
+   // patterns are replaced by the replacePattern string. The number of
+   // substitutions are returned.
+   //
+   // TString s("aap noot mies");
+   // const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
+   // cout << nrSub << " \"" << s << "\"" <<endl;
+   //
+   // produces: 2 "mies noot aap"
+
+   UInt_t opts = ParseMods(mods);
+   Int_t nrSubs = 0;
+   TString final;
+
+   if (!fPriv->fPCRE || opts != fPCREOpts) {
+      fPCREOpts = opts;
+      Compile();
+   }
+
+   Int_t *offVec = new Int_t[nMaxMatch];
+
+   Int_t offset = start;
+   Int_t last = 0;
+
+   while (kTRUE) {
+
+      // find next matching subs
+      Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
+                                s.Length(), offset, fPCREOpts & kPCRE_INTMASK,
+                                offVec, nMaxMatch);
+
+      if (nrMatch == PCRE_ERROR_NOMATCH) {
+         nrMatch = 0;
+         break;
+      } else if (nrMatch <= 0)
+         Error("Substitute", "pcre_exec error = %d", nrMatch);
+
+      // append anything previously unmatched, but not substituted
+      if (last <= offVec[0]) {
+         final += s(last,offVec[0]-last);
+         last = offVec[1];
+      }
+
+      // replace stuff in s
+      nrSubs += ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
+
+      // if global gotta check match at every pos
+      if (!(fPCREOpts & kPCRE_GLOBAL))
+         break;
+
+      if (offVec[0] != offVec[1])
+         offset = offVec[1];
+      else {
+         // matched empty string
+         if (offVec[1] == s.Length())
+         break;
+         offset = offVec[1]+1;
+      }
+   }
+
+   delete [] offVec;
+
+   final += s(last,s.Length()-last);
+   s = final;
+
+   return nrSubs;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+//                                                                      //
+// TString member functions, put here so the linker will include        //
+// them only if regular expressions are used.                           //
+//                                                                      //
+//////////////////////////////////////////////////////////////////////////
+
+//______________________________________________________________________________
+Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
+{
+   // Find the first occurance of the regexp in string and return the position.
+   // Start is the offset at which the search should start.
+
+   TArrayI pos;
+   Int_t nrMatch = r.Match(*this,"",start,30,&pos);
+   if (nrMatch > 0)
+      return pos[0];
+   else
+      return -1;
+}
+
+//______________________________________________________________________________
+Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
+{
+   // Find the first occurance of the regexp in string and return the position.
+   // Extent is length of the matched string and start is the offset at which
+   // the matching should start.
+
+   TArrayI pos;
+   const Int_t nrMatch = r.Match(*this,"",start,30,&pos);
+   if (nrMatch > 0) {
+      *extent = pos[1]-pos[0];
+      return pos[0];
+   } else {
+      *extent = 0;
+      return -1;
+   }
+}
+
+//______________________________________________________________________________
+TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
+{
+   // Return the substring found by applying the regexp starting at start.
+
+   Ssiz_t len;
+   Ssiz_t begin = Index(r, &len, start);
+   return TSubString(*this, begin, len);
+}
+
+//______________________________________________________________________________
+TSubString TString::operator()(TPRegexp& r)
+{
+   // Return the substring found by applying the regexp.
+
+   return (*this)(r,0);
+}
+
+//______________________________________________________________________________
+TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
+{
+   // Return the substring found by applying the regexp starting at start.
+
+   Ssiz_t len;
+   Ssiz_t begin = Index(r, &len, start);
+   return TSubString(*this, begin, len);
+}
+
+//______________________________________________________________________________
+TSubString TString::operator()(TPRegexp& r) const
+{
+   // Return the substring found by applying the regexp.
+
+   return (*this)(r, 0);
+}
diff --git a/base/src/TString.cxx b/base/src/TString.cxx
index 85093f4790a..91150a4c6d6 100644
--- a/base/src/TString.cxx
+++ b/base/src/TString.cxx
@@ -1,4 +1,4 @@
-// @(#)root/base:$Name:  $:$Id: TString.cxx,v 1.44 2005/11/16 20:04:11 pcanal Exp $
+// @(#)root/base:$Name:  $:$Id: TString.cxx,v 1.45 2005/11/21 11:17:18 rdm Exp $
 // Author: Fons Rademakers   04/08/95
 
 /*************************************************************************
@@ -1489,18 +1489,62 @@ Bool_t TString::IsAlnum() const
 //______________________________________________________________________________
 Bool_t TString::IsDigit() const
 {
-   // Returns true if all characters in string are digits (0-9).
-   // Returns false in case string length is 0.
+   // Returns true if all characters in string are digits (0-9) or whitespaces,
+   // i.e. "123456" and "123 456" are both valid integer strings.
+   // Returns false in case string length is 0 or string contains other
+   // characters.
 
    const char *cp = Data();
    Ssiz_t len = Length();
    if (len == 0) return kFALSE;
-   for (Ssiz_t i = 0; i < len; ++i)
-      if (!isdigit(cp[i]))
-         return kFALSE;
+   for (Ssiz_t i = 0; i < len; ++i){
+      if (cp[i]!=' ' && !isdigit(cp[i])) return kFALSE;
+   }
    return kTRUE;
 }
 
+//______________________________________________________________________________
+Bool_t TString::IsFloat() const
+{
+   // Returns kTRUE if string contains a floating point or integer number.
+   // Examples of valid formats are:
+   //    64320
+   //    64 320
+   //    6 4 3 2 0
+   //    6.4320     6,4320
+   //    6.43e20   6.43E20    6,43e20
+   //    6.43e-20  6.43E-20   6,43e-20
+
+   //we first check if we have an integer, in this case, IsDigit() will be true straight away
+   if (IsDigit()) return kTRUE;
+
+   TString tmp = *this;
+   //now we look for occurrences of '.', ',', e', 'E', '+', '-' and replace each
+   //with '0'. if it is a floating point, IsDigit() will then return kTRUE
+   Int_t i_dot, i_e, i_plus, i_minus, i_comma;
+   i_dot = i_e = i_plus = i_minus = i_comma = -1;
+
+   i_dot = tmp.First('.');
+   if (i_dot > -1) tmp.Replace(i_dot, 1, "0", 1);
+   i_comma = tmp.First(',');
+   if (i_comma > -1) tmp.Replace(i_comma, 1, "0", 1);
+   i_e = tmp.First('e');
+   if (i_e > -1)
+      tmp.Replace(i_e, 1, "0", 1);
+   else {
+      //try for a capital "E"
+      i_e = tmp.First('E');
+      if (i_e > -1) tmp.Replace(i_e, 1, "0", 1);
+   }
+   i_plus = tmp.First('+');
+   if (i_plus > -1) tmp.Replace(i_plus, 1, "0", 1);
+   i_minus = tmp.First('-');
+   if (i_minus > -1) tmp.Replace(i_minus, 1, "0", 1);
+
+   //test if it is now uniquely composed of numbers
+   return tmp.IsDigit();
+}
+
 //______________________________________________________________________________
 Bool_t TString::IsHex() const
 {
@@ -1517,6 +1561,68 @@ Bool_t TString::IsHex() const
    return kTRUE;
 }
 
+//______________________________________________________________________________
+Int_t TString::Atoi() const
+{
+   // Return integer value of string.
+   // Valid strings include only digits and whitespace (see IsDigit()),
+   // i.e. "123456", "123 456" and "1 2 3 4        56" are all valid
+   // integer strings whose Atoi() value is 123456.
+
+   //any whitespace ?
+   Int_t end = Index(" ");
+   //if no whitespaces in string, just use atoi()
+   if (end == -1) return atoi(Data());
+   //make temporary string, removing whitespace
+   Int_t start = 0;
+   TString tmp;
+   //loop over all whitespace
+   while (end > -1) {
+      tmp += (*this)(start, end-start);
+      start = end+1; end = Index(" ", start);
+   }
+   //finally add part from last whitespace to end of string
+   end = Length();
+   tmp += (*this)(start, end-start);
+   return atoi(tmp.Data());
+}
+
+//______________________________________________________________________________
+Double_t TString::Atof() const
+{
+   // Return floating-point value contained in string.
+   // Examples of valid strings are:
+   //    64320
+   //    64 320
+   //    6 4 3 2 0
+   //    6.4320     6,4320
+   //    6.43e20   6.43E20    6,43e20
+   //    6.43e-20  6.43E-20   6,43e-20
+
+   //look for a comma and some whitespace
+   Int_t comma = Index(",");
+   Int_t end = Index(" ");
+   //if no commas & no whitespace in string, just use atof()
+   if (comma == -1 && end == -1) return atof(Data());
+   TString tmp = *this;
+   if (comma > -1) {
+      //replace comma with decimal point
+      tmp.Replace(comma, 1, ".");
+   }
+   //no whitespace ?
+   if (end == -1) return atof(tmp.Data());
+   //remove whitespace
+   Int_t start = 0;
+   TString tmp2;
+   while (end > -1) {
+      tmp2 += tmp(start, end-start);
+      start = end+1; end = tmp.Index(" ", start);
+   }
+   end = tmp.Length();
+   tmp2 += tmp(start, end-start);
+   return atof(tmp2.Data());
+}
+
 //______________________________________________________________________________
 Bool_t TString::EndsWith(const char *s, ECaseCompare cmp) const
 {
diff --git a/config/Makefile.in b/config/Makefile.in
index 5e6b91a4e89..ca7a5b51ecd 100644
--- a/config/Makefile.in
+++ b/config/Makefile.in
@@ -46,6 +46,8 @@ OSTHREADLIB    := @threadlib@
 
 BUILTINFREETYPE:= @builtinfreetype@
 
+BUILTINPCRE    := @builtinpcre@
+
 BUILDGL        := @buildgl@
 OPENGLLIBDIR   := @opengllibdir@
 OPENGLULIB     := @openglulib@
@@ -211,6 +213,8 @@ CLARENSLIBS    := @clarenslibs@
 
 BUILDPEAC      := @buildpeac@
 
+GCCXML         := @gccxml@
+
 INSTALL        := cp -dpR
 INSTALLDATA    := cp -dpR
 INSTALLDIR     := mkdir -p
diff --git a/configure b/configure
index 9d310651ddb..467c3236509 100755
--- a/configure
+++ b/configure
@@ -27,6 +27,7 @@ options="                    \
    enable_asimage            \
    enable_builtin_afterimage \
    enable_builtin_freetype   \
+   enable_builtin_pcre       \
    enable_cern               \
    enable_chirp              \
    enable_cintex             \
@@ -719,6 +720,7 @@ enable/disable options, prefix with either --enable- or --disable-
   asimage            Image processing support, requires libAfterImage
   builtin-afterimage Built included libAfterImage, or use system libAfterImage
   builtin-freetype   Built included libfreetype, or use system libfreetype
+  builtin-pcre       Built included libpcre, or use system libpcre
   cern               CERNLIB usage, build h2root and g2root
   chirp              Chirp support (Condor remote I/O), requires libchirp_client
   cintex             Build the libCintex Reflex interface library
@@ -769,6 +771,7 @@ with options, prefix with --with-, enables corresponding support
   dcap-incdir        dCache support, location of dcap.h
   dcap-libdir        dCache support, location of libdcap
   globus             Globus support, path to what should be GLOBUS_LOCATION
+  gccxml             Gccxml support, directory of the gccxml installation
   krb5               Kerberos5 support, location of Kerberos distribution
   krb5-incdir        Kerberos5 support, location of krb5.h
   krb5-libdir        Kerberos5 support, location of libkrb5
@@ -920,6 +923,7 @@ if test $# -gt 0 ; then
       --with-dcap-incdir=*)    dcapincdir=$optarg    ; enable_dcache="yes"  ;;
       --with-dcap-libdir=*)    dcaplibdir=$optarg    ; enable_dcache="yes"  ;;
       --with-globus=*)         globusdir=$optarg     ; enable_globus="yes"  ;;
+      --with-gccxml=*)         gccxml=$optarg        ;;
       --with-krb5=*)           krb5dir=$optarg       ; enable_krb5="yes"    ;;
       --with-krb5-incdir=*)    krb5incdir=$optarg    ; enable_krb5="yes"    ;;
       --with-krb5-libdir=*)    krb5libdir=$optarg    ; enable_krb5="yes"    ;;
@@ -1171,9 +1175,9 @@ fi
 
 ######################################################################
 #
-### echo %%% libfreetype (builtin or system) - Mandatory lib on Unix
+### echo %%% libfreetype (builtin or system) - Mandatory lib
 #
-# Mandatory test, must succeed
+# Mandatory test, must succeed (see http://www.freetype.org/)
 #
 if test "x$enable_builtin_freetype" = "x" || \
    test "x$enable_builtin_freetype" = "xno" ; then
@@ -1203,6 +1207,39 @@ fi
 message "Checking whether to build included libfreetype6"
 echo "$enable_builtin_freetype"
 
+######################################################################
+#
+### echo %%% libpcre (builtin or system) - Mandatory lib
+#
+# Mandatory test, must succeed (see http://www.pcre.org/)
+#
+if test "x$enable_builtin_pcre" = "x" || \
+   test "x$enable_builtin_pcre" = "xno" ; then
+
+   checking_msg "pcre-config"
+   if `which pcre-config > /dev/null 2>&1` ; then
+      which pcre-config
+
+      message "Checking for libpcre version >= 3.9"
+      pcre_version=`pcre-config --version | \
+          tr '.' ' ' | \
+          awk 'BEGIN { FS = " "; } \
+              { printf "%d", ($''1 * 1000 + $''2) * 1000 + $''3;}'`
+      # First good version is 3.9
+      if test $pcre_version -lt 3009000 ; then
+         echo "no (`pcre-config --version`)"
+         enable_builtin_pcre="yes"
+      else
+         echo "ok"
+         enable_builtin_pcre="no"
+      fi
+   else
+      enable_builtin_pcre="no"
+      echo "not found"
+   fi
+fi
+message "Checking whether to build included libpcre"
+echo "$enable_builtin_pcre"
 
 ######################################################################
 #
@@ -3258,6 +3295,7 @@ sed -e "s|@aclocaldir@|$aclocaldir|"            \
     -e "s|@buildasimage@|$enable_asimage|"      \
     -e "s|@builtinafterimage@|$enable_builtin_afterimage|"  \
     -e "s|@builtinfreetype@|$enable_builtin_freetype|"      \
+    -e "s|@builtinpcre@|$enable_builtin_pcre|"  \
     -e "s|@buildpeac@|$buildpeac|"              \
     -e "s|@buildxrd@|$buildxrd|"                \
     -e "s|@cernlibdir@|$cernlibdir|"            \
@@ -3294,6 +3332,7 @@ sed -e "s|@aclocaldir@|$aclocaldir|"            \
     -e "s|@globuslibdir@|$globuslibdir|"        \
     -e "s|@glbextracflags@|$glbextracflags|"    \
     -e "s|@glbpatchcflags@|$glbpatchcflags|"    \
+    -e "s|@gccxml@|$gccxml|"                    \
     -e "s|@iconpath@|$iconpath|"                \
     -e "s|@incdir@|$incdir|"                    \
     -e "s|@krb5incdir@|$krb5incdir|"            \
@@ -3470,6 +3509,7 @@ sed -e "s|@architecture@|$arch|"           \
     -e "s|@libdir@|$libdir2|"              \
     -e "s|@incdir@|$incdir2|"              \
     -e "s|@features@|$features|"           \
+    -e "s|@configargs@|$configargs|"       \
    < root-config.tmp > $RCONFOUT
 rm -f root-config.tmp
 chmod 755 $RCONFOUT
diff --git a/test/tstring.cxx b/test/tstring.cxx
index e8b5d1a0511..dce92c27dcc 100644
--- a/test/tstring.cxx
+++ b/test/tstring.cxx
@@ -1,4 +1,4 @@
-// @(#)root/test:$Name:  $:$Id: tstring.cxx,v 1.3 2002/01/23 17:52:51 rdm Exp $
+// @(#)root/test:$Name:  $:$Id: tstring.cxx,v 1.4 2002/01/24 11:39:31 rdm Exp $
 // Author: Fons Rademakers   19/08/96
 
 #include <stdlib.h>
@@ -6,6 +6,7 @@
 #include "Riostream.h"
 #include "TString.h"
 #include "TRegexp.h"
+#include "TPRegexp.h"
 
 
 void Ok(int i, int b)
@@ -129,6 +130,21 @@ int main()
    Ok(22, !s10.IsAscii());
    Ok(23, s.IsAscii());
 
+   // some excercises with the Perl Compatible Regular Expressions
+   TString s11("Food is on the foo table.");
+   TPRegexp("\\b(foo)\\s+(\\w+)").Substitute(s11, "round $2");
+   Ok(24, s11=="Food is on the round table.");
+
+   TString s12("pepernotenkoek");
+   TPRegexp("peper(.*)koek").Substitute(s12, "wal$1boom");
+   Ok(25, s12=="walnotenboom");
+
+   TString s13("hihi haha");
+   TPRegexp("^([^ ]*) *([^ ]*)").Substitute(s13, "$2 $1");
+   Ok(26, s13=="haha hihi");
+
+   Ok(27, TPRegexp("^(\\w+) *(\\w+)").Match(s13) == 3);
+
    // test Resize and Strip
    s9.Prepend("   ");
    cout << s9 << endl;
-- 
GitLab