OpenADFortTk (basic)
src/lib/support/CmdLineParser.cxx
Go to the documentation of this file.
00001 // ##########################################################
00002 // # This file is part of OpenADFortTk.                     #
00003 // # The full COPYRIGHT notice can be found in the top      #
00004 // # level directory of the OpenADFortTk source tree.       #
00005 // # For more information visit                             #
00006 // # http://www.mcs.anl.gov/openad                          #
00007 // ##########################################################
00008 
00009 #include <stdlib.h> // <cstdlib> // for strtol
00010 #include <string.h> // <cstring>
00011 #include <errno.h>  // <cerrno>
00012 
00013 #include <algorithm> // for sort
00014 
00015 #include "CmdLineParser.h"
00016 
00017 using std::string;
00018 
00019 static string MISSING_SWITCH = "Missing switch after -";
00020 static string UNKNOWN_SWITCH = "Unknown option switch: ";
00021 static string MISSING_ARG = "Missing argument for switch: ";
00022 
00023 // lt_OptArgDesc: Used to sort CmdLineParser::OptArgDesc
00024 struct lt_OptArgDesc
00025 {
00026   // return true if x1 < x2; false otherwise
00027   bool operator()(const CmdLineParser::OptArgDesc& x1, 
00028                   const CmdLineParser::OptArgDesc& x2) const
00029   {
00030     // There are three possibilities, listed in order of preference:
00031     //   - both long switches are present 
00032     //   - both short switches are present
00033     //   - one short and one long switch
00034     if (x1.swLong && x2.swLong) {
00035       return (strcmp(x1.swLong, x2.swLong) < 0);
00036     }
00037     else if (x1.swShort != 0 && x2.swShort != 0) {
00038       return (x1.swShort < x2.swShort);
00039     } 
00040     else {
00041       if (x1.swLong && x2.swShort != 0) {
00042         return (x1.swLong[0] < x2.swShort);
00043       } 
00044       else {
00045         return (x1.swShort < x2.swLong[0]);
00046       }
00047     }
00048   }
00049 
00050 private:
00051 
00052 };
00053 
00054 
00055 // IsDashDash
00056 static inline bool
00057 IsDashDash(const char* str) { return (strcmp(str, "--") == 0); }
00058 
00059 
00060 // IsSwitch, IsLongSwitch, IsShortSwitch: Assumes str is non-NULL!  Note also
00061 // that the test for short switch is not quite complete and depends on
00062 // testing for a long switch first!
00063 static inline bool
00064 IsLongSwitch(const char* str) { return (strncmp(str, "--", 2) == 0); }
00065 
00066 static inline bool
00067 IsShortSwitch(const char* str) { return (*str == '-'); }
00068 
00069 static inline bool
00070 IsSwitch(const char* str) { return (IsLongSwitch(str) || IsShortSwitch(str)); }
00071 
00072 
00073 // IsArg: Verifies that we should interpret 'str' as an argument.
00074 // Should be non-NULL;
00075 static inline bool
00076 IsArg(const char* str) { return (!IsSwitch(str) && !IsDashDash(str)); }
00077 
00078 
00079 //****************************************************************************
00080 
00081 //****************************************************************************
00082 // CmdLineParser
00083 //****************************************************************************
00084 
00085 CmdLineParser::OptArgDesc CmdLineParser::OptArgDesc_NULL = 
00086   CmdLineParser_OptArgDesc_NULL_MACRO;
00087 
00088 
00089 CmdLineParser::CmdLineParser() 
00090 {
00091   Ctor();
00092 }
00093 
00094 CmdLineParser::CmdLineParser(const OptArgDesc* optArgDescs, 
00095                              int argc, const char* const argv[])
00096 {
00097   Ctor();
00098   Parse(optArgDescs, argc, argv);
00099 }
00100 
00101 void
00102 CmdLineParser::Ctor() 
00103 {
00104   // nothing to do
00105 }
00106 
00107 
00108 
00109 CmdLineParser::~CmdLineParser() 
00110 { 
00111   Reset();
00112 }
00113 
00114 
00115 void
00116 CmdLineParser::Parse(const OptArgDesc* optArgDescsOrig, 
00117                      int argc, const char* const argv[])
00118 { 
00119   Reset();
00120   command = argv[0]; // always do first so it will be available after errors
00121   
00122   CheckForErrors(optArgDescsOrig);  
00123   const OptArgDesc* optArgDescs = CreateSortedCopy(optArgDescsOrig);
00124   
00125   bool endOfOpts = false;  // are we at end of optional args?
00126   
00127   for (int i = 1; i < argc; ++i) {
00128     const char* str = argv[i];
00129     
00130     // -------------------------------------------------------
00131     // Bypass special option values
00132     // -------------------------------------------------------
00133     if (str == NULL || *str == '\0') {
00134       continue; // should never happen, but we ignore
00135     }
00136     
00137     // A '--' signifies end of optional arguments
00138     if (IsDashDash(str)) {
00139       endOfOpts = true;
00140       continue;
00141     }
00142     
00143     if (!endOfOpts && IsSwitch(str)) {
00144       // -------------------------------------------------------
00145       // An option switch (possibly needing an argument)
00146       // -------------------------------------------------------
00147       // Note: The argument may be appended to the switch or it may be
00148       // the next element of argv.
00149       
00150       // 1. Separate switch from any argument embedded within
00151       SwDesc swdesc = MakeSwitchDesc(str);
00152       if (swdesc.sw.empty()) {
00153         throw ParseError(MISSING_SWITCH); // must have been '-'
00154       }
00155       
00156       // 2. Find option descriptor from switch (checks for duplicate matches)
00157       const OptArgDesc* d = FindOptDesc(optArgDescs, swdesc);
00158       if (!d) {
00159         throw ParseError(UNKNOWN_SWITCH + swdesc.sw);
00160       }
00161       
00162       // 3. Find argument for switch (if any) [N.B. may advance iteration!]
00163       if (d->kind == ARG_NONE) {
00164         if (!swdesc.arg.empty()) {
00165           string msg = "Invalid argument `" + swdesc.arg + "' to switch `" 
00166             + swdesc.sw + "'";
00167           throw ParseError(msg);
00168         }
00169       } else if (d->kind == ARG_REQ || d->kind == ARG_OPT) {
00170         if (swdesc.arg.empty()) {
00171           int nexti = i + 1;
00172           if (nexti < argc && argv[nexti] && IsArg(argv[nexti])) {
00173             swdesc.arg = argv[nexti];
00174             i = nexti; // increment iteration
00175           }
00176         } 
00177         if (swdesc.arg.empty() && d->kind == ARG_REQ) {
00178           throw ParseError(MISSING_ARG + swdesc.sw);
00179         }
00180       }
00181       
00182       // 4. Add option switch and any argument to map
00183       AddOption(*d, swdesc);
00184     }
00185     else { 
00186       // -------------------------------------------------------
00187       // A regular argument
00188       // -------------------------------------------------------
00189       arguments.push_back(string(str));
00190     } 
00191   } 
00192   
00193   delete[] optArgDescs;
00194 }
00195 
00196 
00197 //****************************************************************************
00198 
00199 const string& 
00200 CmdLineParser::GetCmd() const
00201 {
00202   return command;
00203 }
00204 
00205 
00206 // IsOpt:
00207 bool 
00208 CmdLineParser::IsOpt(const char swShort) const
00209 {
00210   string sw(1, swShort);
00211   return IsOpt(sw);
00212 }
00213 
00214 bool 
00215 CmdLineParser::IsOpt(const char* swLong) const
00216 {
00217   string sw(swLong);
00218   return IsOpt(sw);
00219 }
00220 
00221 bool 
00222 CmdLineParser::IsOpt(const string& sw) const
00223 {
00224   SwitchToArgMap::const_iterator it = switchToArgMap.find(sw);
00225   return (it != switchToArgMap.end());
00226 }
00227 
00228 
00229 // IsOptArg:
00230 bool 
00231 CmdLineParser::IsOptArg(const char swShort) const
00232 {
00233   string sw(1, swShort);
00234   return IsOptArg(sw);
00235 }
00236 
00237 bool 
00238 CmdLineParser::IsOptArg(const char* swLong) const
00239 {
00240   string sw(swLong);
00241   return IsOptArg(sw);
00242 }
00243 
00244 bool 
00245 CmdLineParser::IsOptArg(const string& sw) const
00246 {
00247   SwitchToArgMap::const_iterator it = switchToArgMap.find(sw);
00248   if ((it != switchToArgMap.end()) && ((*it).second != NULL)) {
00249     return true;
00250   }
00251   return false;
00252 }
00253 
00254 
00255 // GetOptArg:
00256 const string&
00257 CmdLineParser::GetOptArg(const char swShort) const
00258 {
00259   string sw(1, swShort);
00260   return GetOptArg(sw);
00261 }
00262 
00263 const string&
00264 CmdLineParser::GetOptArg(const char* swLong) const
00265 {
00266   string sw(swLong);
00267   return GetOptArg(sw);
00268 }
00269 
00270 const string&
00271 CmdLineParser::GetOptArg(const string& sw) const
00272 {
00273   SwitchToArgMap::const_iterator it = switchToArgMap.find(sw);
00274   if (it == switchToArgMap.end()) {
00275     // FIXME: ERROR
00276   }
00277   string* arg = (*it).second;
00278   if (!arg) {
00279     // FIXME: ERROR
00280   }
00281   return *arg;
00282 }
00283 
00284 
00285 unsigned int 
00286 CmdLineParser::GetNumArgs() const
00287 { 
00288   return arguments.size(); 
00289 }
00290 
00291 const string& 
00292 CmdLineParser::GetArg(unsigned int i) const
00293 {
00294   return arguments[i];
00295 }
00296 
00297 
00298 //****************************************************************************
00299 
00300 long
00301 CmdLineParser::ToLong(const string& str)
00302 {
00303   long value = 0;
00304   if (str.empty()) { throw InternalError("ToLong"); }
00305   
00306   errno = 0;
00307   char* endptr = NULL;
00308   value = strtol(str.c_str(), &endptr, 0);
00309   if (errno || (endptr && strlen(endptr) > 0)) {
00310     string msg = "Argument `" + str 
00311       + "' cannot be converted to integral value.";
00312     if (errno) { // not always set
00313       msg += " ";
00314       msg += strerror(errno);
00315     }
00316     throw ParseError(msg);
00317   } 
00318   return value;
00319 }
00320 
00321 
00322 uint64_t
00323 CmdLineParser::ToUInt64(const string& str)
00324 {
00325   uint64_t value = 0;
00326   if (str.empty()) { throw InternalError("ToUInt64"); }
00327   
00328   errno = 0;
00329   char* endptr = NULL;
00330   value = strtoul(str.c_str(), &endptr, 0);
00331   if (errno || (endptr && strlen(endptr) > 0)) {
00332     string msg = "Argument `" + str 
00333       + " cannot be converted to integral value.";
00334     if (errno) { // not always set
00335       msg += " ";
00336       msg += strerror(errno);
00337     }
00338     throw ParseError(msg);
00339   } 
00340   return value;
00341 }
00342 
00343 
00344 double   
00345 CmdLineParser::ToDbl(const string& str)
00346 {
00347   double value = 0;
00348   if (str.empty()) { throw InternalError("ToDbl"); }
00349   
00350   errno = 0;
00351   char* endptr = NULL;
00352   value = strtod(str.c_str(), &endptr);
00353   if (errno || (endptr && strlen(endptr) > 0)) {
00354     string msg = "Argument `" + str + "' cannot be converted to real value.";
00355     if (errno) { // not always set
00356       msg += " ";
00357       msg += strerror(errno);
00358     }
00359     throw ParseError(msg);
00360   } 
00361   return value;
00362 }
00363 
00364 
00365 //****************************************************************************
00366 
00367 void 
00368 CmdLineParser::Dump(std::ostream& os) const
00369 {
00370   os << "Command: `" << GetCmd() << "'" << std::endl;
00371   
00372   os << "Switch to Argument map:" << std::endl;
00373   for (SwitchToArgMap::const_iterator it = switchToArgMap.begin();
00374        it != switchToArgMap.end(); ++it) {
00375     const string& sw = (*it).first;
00376     const string* arg = (*it).second;
00377     os << "  " << sw << " --> " << ((arg) ? *arg : "<>") << std::endl;
00378   }
00379   
00380   os << "Regular arguments:" << std::endl;
00381   for (unsigned int i = 0; i < arguments.size(); ++i) {
00382     os << "  " << arguments[i] << std::endl;
00383   }
00384 }
00385 
00386 
00387 void 
00388 CmdLineParser::DDump() const
00389 {
00390   Dump(std::cerr);
00391 }
00392 
00393 
00394 //****************************************************************************
00395 
00396 // Reset: Clear data to prepare for parsing
00397 void
00398 CmdLineParser::Reset()
00399 {
00400   for (SwitchToArgMap::iterator it = switchToArgMap.begin();
00401        it != switchToArgMap.end(); ++it) {
00402     string* arg = (*it).second;
00403     delete arg;
00404   }
00405   switchToArgMap.clear();
00406   arguments.clear();
00407 }
00408 
00409 
00410 // CreateSortedCopy: create a sorted NULL-terminated copy of
00411 // 'optArgDescs'.  WARNING: the OptArgDesc objects are bitwise-copied.
00412 const CmdLineParser::OptArgDesc* 
00413 CmdLineParser::CreateSortedCopy(const OptArgDesc* optArgDescs)
00414 {
00415   // Find the size, not including the NULL-terminator
00416   unsigned int sz = 0; 
00417   for (const OptArgDesc* p = optArgDescs; *p != OptArgDesc_NULL; ++p) { ++sz; }
00418   
00419   // Make a copy of 'optArgDescs'
00420   OptArgDesc* copy = new OptArgDesc[sz+1];
00421   unsigned int i = 0; 
00422   for (const OptArgDesc* p = optArgDescs; *p != OptArgDesc_NULL; ++p, ++i) {
00423     copy[i] = *p; // bitwise copy is ok
00424   }
00425   copy[sz] = OptArgDesc_NULL; // add the NULL-terminator
00426   
00427   // Sort
00428   if (sz > 1) {
00429     std::sort(&copy[0], &copy[sz-1], lt_OptArgDesc());
00430   }
00431   
00432   return copy;
00433 }
00434 
00435 
00436 // CheckForErrors: Checks argument descriptor for errors
00437 void
00438 CmdLineParser::CheckForErrors(const OptArgDesc* optArgDescs)
00439 {
00440   // FIXME
00441   //   - detect duplicate option entries.  Not pressing because
00442   //   FindOptDesc() will effectively do this.
00443   
00444   // Check individual descriptors
00445   string msg;
00446   string sw;
00447   for (const OptArgDesc* p = optArgDescs; *p != OptArgDesc_NULL; ++p) {
00448     // Verify that at least one switch is present
00449     if (p->swShort == 0 && !p->swLong) {
00450       throw InternalError("Arg descriptor is missing a switch!");
00451     }
00452 
00453     if (p->swLong) {
00454       sw = p->swLong; 
00455     } else {
00456       sw = p->swShort;
00457     }
00458     
00459     // Verify that the kind is valid
00460     if (p->kind == ARG_NULL) {
00461       msg = "OptArgDesc.kind is invalid for: " + sw;
00462       throw InternalError(msg);
00463     }
00464     
00465     // Verify that dupKind is valid
00466     if (p->dupKind == DUPOPT_NULL) {
00467       msg = "OptArgDesc.dupKind is invalid for: " + sw;
00468       throw InternalError(msg);
00469     }
00470     
00471     // Verify that if dupKind == DUPOPT_CAT, dupArgSep is valid
00472     if (p->dupKind == DUPOPT_CAT && !p->dupArgSep) {
00473       msg = "OptArgDesc.dupArgSep is invalid for: " + sw;
00474       throw InternalError(msg);
00475     }
00476   }
00477 }
00478 
00479 
00480 // MakeSwitchDesc: Given an option string from argv (potentially
00481 // containing both an option and an argument), create a SwDesc,
00482 // separating switch text from any argument text.
00483 CmdLineParser::SwDesc
00484 CmdLineParser::MakeSwitchDesc(const char* str)
00485 {
00486   // 1. Find pointers for begin/end of switch and argument
00487   unsigned int len = strlen(str);
00488   const char* strEnd = str + len;
00489   const char* begSw = NULL, *endSw = NULL;   // end pointers are inclusive!
00490   const char* begArg = NULL, *endArg = NULL;
00491   bool isLong = false;
00492   if (IsLongSwitch(str)) {
00493     // test for --foo=arg
00494     begArg = strchr(str, '=');
00495     if (begArg) {
00496       begArg++;            // starts after the '='
00497       endArg = strEnd - 1; // ends right before '\0'
00498     }
00499     begSw = str + 2;       // bump past '--'
00500     endSw = (begArg) ? (begArg - 2) : (strEnd - 1);
00501     isLong = true;
00502   } 
00503   else if (IsShortSwitch(str)) {
00504     // test for -f[arg]
00505     begArg = (len > 2) ? (str + 2) : NULL;   // starts after '-f'
00506     endArg = (begArg) ? (strEnd - 1) : NULL; // ends right before '\0'
00507     begSw  = (len > 1) ? (str + 1) : NULL;   // starts after '-'
00508     endSw  = begSw;                               // single character
00509   } 
00510   else {
00511     throw InternalError("Programming Error!");
00512   }
00513   
00514   // 2. Copy switch and argument substrings
00515   SwDesc swdesc;
00516   swdesc.isLong = isLong;
00517   for (const char* p = begSw; p && p <= endSw; ++p) { swdesc.sw += *p; }
00518   for (const char* p = begArg; p && p <= endArg; ++p) { swdesc.arg += *p; }
00519   
00520   return swdesc;
00521 }
00522 
00523 
00524 // FindOptDesc: Given a *sorted* NULL-terminated array of OptArgDesc and
00525 // an option switch, return a reference to the appropriate OptArgDesc.
00526 // If 'errOnMultipleMatches' is true, checks to make sure we don't
00527 // match more than one descriptor (useful for testing long argument
00528 // abbreviation).
00529 const CmdLineParser::OptArgDesc*
00530 CmdLineParser::FindOptDesc(const OptArgDesc* optArgDescs, const SwDesc& swdesc,
00531                            bool errOnMultipleMatches)
00532 {
00533   // Note: Because there will never be very many options, we simply
00534   //   use a linear search.
00535   // Note: A long option may be a substring of another long option!
00536   //   Because 'optArgDescs' will be sorted, any options that are
00537   //   substrings of other options will be ordered so that they appear
00538   //   before the option that contains them, e.g. 'xx', 'xxx', 'xxxx',
00539   //   'xxxxx'.
00540   
00541   // Try to find a matching descriptor
00542   unsigned int swLen = swdesc.sw.length();
00543   const OptArgDesc* odesc = NULL;
00544   for (const OptArgDesc* p = optArgDescs; *p != OptArgDesc_NULL; ++p) {
00545     if (swdesc.isLong) {
00546       if (p->swLong && strncmp(p->swLong, swdesc.sw.c_str(), swLen) == 0) {
00547         odesc = p;
00548         break;
00549       }
00550     } else {
00551       if (p->swShort != 0 && p->swShort == swdesc.sw[0]) {
00552         odesc = p;
00553         break;
00554       }
00555     }
00556   }
00557   if (!odesc) { return NULL; }
00558   
00559   // We have a match. Check for more matches ==> ambiguity.
00560   const OptArgDesc* m = NULL;
00561   if (errOnMultipleMatches && (m = FindOptDesc((odesc+1), swdesc, false))) {
00562     // Special case to handle a long option that is a substring of
00563     // another. If the long option switch exactly matches 'odesc' and
00564     // it is different than 'm' then we do not want to generate an
00565     // ambiguous option error.
00566     bool ok = (swdesc.isLong && (strcmp(odesc->swLong, swdesc.sw.c_str()) == 0)
00567                && (strcmp(odesc->swLong, m->swLong) != 0));
00568     if (!ok) {
00569       string msg = "Switch `"; 
00570       msg += swdesc.sw; msg += "' matches two different options: ";
00571       if (swdesc.isLong) {
00572         msg += odesc->swLong; msg += ", "; msg += m->swLong;
00573       } else {
00574         msg += odesc->swShort; msg += ", "; msg += m->swShort;
00575       }
00576       throw ParseError(msg);
00577     } 
00578   }
00579   
00580   return odesc;
00581 }
00582 
00583 
00584 // AddOption: Records the option switch and its (possibly optional)
00585 // argument in the switch->argument map.  In order to support easy
00586 // lookup, both the *canonical* long and short form of the switches
00587 // are entered in the map.
00588 void
00589 CmdLineParser::AddOption(const OptArgDesc& odesc, const SwDesc& swdesc)
00590 {
00591   if (odesc.swShort != 0) {
00592     string swShort(1, odesc.swShort);
00593     AddOption(odesc, swShort, swdesc.arg);
00594   }
00595   if (odesc.swLong) {
00596     string swLong(odesc.swLong);
00597     AddOption(odesc, swLong, swdesc.arg);
00598   }
00599 }
00600 
00601 
00602 // AddOption: Records the option switch and its (possibly optional)
00603 // argument in the switch->argument map.  If the switch is not in the
00604 // map, it is inserted with the available argument or NULL.  If it is
00605 // already the map, the option descriptor defines how to handle
00606 // duplicates.
00607 void
00608 CmdLineParser::AddOption(const OptArgDesc& odesc,
00609                          const string& sw, const string& arg)
00610 {
00611   SwitchToArgMap::iterator it = switchToArgMap.find(sw);
00612   if (it == switchToArgMap.end()) {
00613     // Insert in map
00614     string* theArg = (arg.empty()) ? NULL : new string(arg);
00615     switchToArgMap.insert(SwitchToArgMap::value_type(sw, theArg));
00616   } else {
00617     // Handle duplicates
00618     string* theArg = (*it).second;
00619     
00620     if (odesc.dupKind == DUPOPT_ERR) {
00621       throw ParseError("Duplicate switch: " + sw);
00622     }
00623     
00624     if (!arg.empty()) {
00625       if (!theArg) {
00626         theArg = new string(arg);
00627       } else {
00628         if (odesc.dupKind == DUPOPT_CLOB) {
00629           *theArg = arg;
00630         } else if (odesc.dupKind == DUPOPT_CAT) {
00631           *theArg += odesc.dupArgSep + arg;
00632         } 
00633       }
00634     }
00635   }
00636 }
00637 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines