/* * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /** \file \brief Various definitions for the utility programs, ilmpt, machar, symutil, and symini. If compiled as C++ code this file provides a definition of the base class for various utility programs that generate documentation and header files for compilers in the build process. Define the preprocessor symbol USE_OLD_C_UTILS *before* including this file if you need the old C interfaces. The NroffInStream, NroffMap, and NroffTokenStream interfaces are always provided. */ #ifndef _UTILS_UTILS_H #define _UTILS_UTILS_H #include "universal.h" #if defined(__cplusplus) #include #include #include #include #include #include #include #include void collectIncludePaths(int argc, char *argv[]); extern std::list includePaths; /// types of errors printError can recognize. enum ErrorSeverity { INFO = 1, WARN, SEVERE, FATAL }; /** \brief Similar to an std::ifstream, but intended for reading nroff files. Self-closing when end of input is reached. Nroff include directives (.so "foo.n") are handled automatically. */ class NroffInStream { /* Context used for processing an input file. */ struct context { std::ifstream *file; /**< file descriptor */ std::string filename; /**< file name */ int lineno; /**< file line number */ context(const char *filename_) : file(new std::ifstream(filename_)), filename(filename_), lineno(0) { } }; //! Stack of open files. std::vector stack; //! Top of stack context & tos() { return stack.back(); } void push_file(const char *filename) { stack.push_back(context(filename)); } void pop_file(); public: /** Open file. Use operator!() to determine if file was successfully opened. */ void open(const char *filename) { assert(stack.empty()); push_file(filename); } /** Close all files. No-op if file is not open. */ void close(); /** \brief print an error message with a line number if non-zero and the error severity level. Exit if the error is FATAL. \param sev error severity; \param txt error text to be written; \param additional text to be appended to the error message. \return this function doesn't return if the error severity is FATAL, instead it calls exit() to terminate the utility program. */ void printError(ErrorSeverity sev, const char *txt1, const char *txt2 = ""); /** True if error occurred for innermost file. This method exists to simplify replacing uses of std::ifstream with NroffInStream. */ bool operator!() { return tos().file->operator!(); } friend bool getline(NroffInStream &f, std::string &s); ~NroffInStream() { close(); } }; //! Get line of input from the given stream. bool getline(NroffInStream &f, std::string &s); /** \brief Map from nroff commands to integers. This class is used in conjunction with class NroffTokenStream. */ class NroffMap { /** Note: with C++11, a std::unordered_map could be used instead, but the time savings are probably neglible. */ std::map myMap; public: /** Helper class that enables using subscript syntax to initialize an NroffMap. Clients should not mention the name 'proxy'. */ class proxy { NroffMap *map; const char *string; proxy(NroffMap *map_, const char *string_) : map(map_), string(string_) { } friend class NroffMap; public: //! See comments for NroffMap::operator[] void operator=(int code); }; /** Return proxy that can be used to assign a code to an nroffPrimitive. an NroffMap. E.g.: NroffMap m; m[".xy"] = 42; The string must have three characters, and the code must be non-zero. These restrictions are checked by proxy::operator=. */ proxy operator[](const char *nroffPrimitive) { return proxy(this, nroffPrimitive); } /** If current line begins with one of the strings in this map, return the corresponding code. Otherwise return 0. */ int match(const std::string &line) const; /** Given a code, find the corresponding string. Return nullptr code not found. */ const char *stringOf(int code); }; /** Class for reading nroff tokens from a file. Like NroffInStream, it is self-closing. */ class NroffTokenStream { NroffInStream ifs; std::string buf; const char *pos; //!< pointer into buf public: //! Create NroffTokenStream for a file. NroffTokenStream(const char *filename) : pos(nullptr) { ifs.open(filename); } /** Advance to next line that has a map code. Return 0 if none found. */ int get_line(const NroffMap &map); /** Get next token on curent line. */ bool get_token(std::string &tok); void printError(ErrorSeverity sev, const char *txt1) { ifs.printError(sev, txt1); } }; #endif /* defined(__cplusplus) */ #if defined(__cplusplus) && !defined(USE_OLD_C_UTILS) #include #include #include #include #include #define LT_UNK -1 #define LT_EOF 0 /** \class Manages conversion of NROFF input to Sphinx format. */ class SphinxConverter { typedef std::vector cell_type; typedef std::vector table_row_type; static const bool DEBUG = false; /// Sphinx convention specifies which character is used for underlining /// headers of various levels. This array maps header levels to the specific /// character. char heading[6]; std::stringstream source; ///< entire input file read into the stream buffer. std::stringstream target; ///< a buffer to hold the generated RST. std::ofstream ofs; ///< Sphinx output file stream. std::vector tokens; std::vector listStack; bool op_started; int lineno; bool genRST; ///< If set, generate RST. Otherwise, just eat lines. public: /** \brief Initializes the internal data structures. */ SphinxConverter() { heading[0] = '#'; heading[1] = '*'; heading[2] = '='; heading[3] = '-'; heading[4] = '^'; heading[5] = '"'; genRST = true; } /** \brief Write the last RST file. */ ~SphinxConverter() { if (ofs.is_open()) { generateRST(); ofs.close(); } } /** \brief Create the Sphinx output file. */ void setOutputFile(const std::string &s) { if (ofs.is_open()) { generateRST(); ofs.close(); } ofs.open(s.c_str()); if (!ofs) { std::cerr << "Can't create file " << s << '\n'; exit(1); } source.str(""); source.clear(); target.str(""); target.clear(); listStack.clear(); op_started = false; lineno = 0; } /** \brief Collect a single line of an input file and store in a string stream for running the conversion process later on the entire source file. */ void process(const std::string &s) { int startPos = 0; int len, dirLen; dirLen = chkCommentString(s); if (dirLen || !genRST) { len = s.length(); if (dirLen == 2 && len >= 12 && ((s.substr(2, 9)).compare("RST_ONLY ")) == 0) { /// found \#RST_ONLY directive startPos = 11; } else if (dirLen == 4 && len >= 13 && ((s.substr(4, 9)).compare("RST_ONLY ")) == 0) { /// found .\"#RST_ONLY directive startPos = 13; } else { return; } } auto r = (startPos == 0) ? escapeInlineMarkup(cutTrailingSpace(s)) : escapeInlineMarkup(cutTrailingSpace(s.substr(startPos))); source << r // join lines if new line is escaped with backslash << (r.empty() || r[r.length() - 1] != '\\' ? '\n' : ' '); } private: /** \brief Check to see if an input line is a special directive that begins with either a \# NROFF comment or an NROFF comment that begins with a .\" followed a # character. The \#NO_RST directive is used to turn off RST generation and the \#END_NO_RST directive is used to reenable RST generation. The .\"#NO_RST and .\"#END_NO_RST directives are also recognized. \param s is the string to process \returns 2 for \# style directive, 4 for .\"# style directive, 0 for no directive. */ int chkCommentString(const std::string &s) { int len, pos, dirLen; bool toggleGenRST; len = s.length(); if (len >= 2 && s[0] == '\\' && s[1] == '#') { dirLen = 2; } else if (len >= 3 && s[0] == '.' && s[1] == '\\' && s[2] == '\"') { dirLen = 4; } else { dirLen = 0; } if (dirLen > 0) { /// We have an NROFF \# or .\" comment if (dirLen == 4 && len >= 4 && s[3] == '#') { pos = 2; } else { pos = 0; } if (len >= 8) { /// Check for special \#NO_RST or \#END_NO_RST directive in comment /// or .\"#NO_RST or .\"#END_NO_RST directive in comment` if (len >= 12 && s[dirLen] == 'E' && s[dirLen + 1] == 'N' && s[dirLen + 2] == 'D' && s[dirLen + 3] == '_') { pos += 6; toggleGenRST = true; } else { pos += 2; toggleGenRST = false; } std::string str2 = s.substr(pos, 6); if (str2.compare("NO_RST") == 0) { genRST = toggleGenRST; } } } return dirLen; } /** \brief Generate an output file in RST format. */ void generateRST() { if (!ofs.is_open()) { return; } expandDefinedStrings(); for (std::string s; std::getline(source, s); ++lineno) { parseOneLineRequest(s, source, target, true); } ofs << target.str(); } /** \brief Convert a line in NROFF input to a Sphinx formatted line. */ void parseOneLineRequest(const std::string &s, std::stringstream &src, std::stringstream &tgt, bool newline = false) { if (DEBUG) tgt << "\n..\n Line " << lineno << " <" << s << ">\n"; if (s.empty()) { if (DEBUG) tgt << "\n..\n EMPTY\n"; return; } // comments if (s.substr(0, 2) == "\\\"" || s.substr(0, 3) == ".\\\"") { if (DEBUG) tgt << "\n..\n COMMENT\n"; return; } if (s.substr(0, 2) == "\\&") { tgt << s.substr(2) << '\n'; return; } if (s[0] != '.') { if (op_started) { tgt << '\n'; op_started = false; } tgt << s << '\n'; return; } tokenize(s); if (tokens.empty()) { tgt << s; return; } auto tag = tokens[0]; if (DEBUG) tgt << "\n..\n Tag <" << tag << ">\n"; if (tag == ".de") { for (std::string s; std::getline(src, s) && s.substr(0, 2) != ".."; ++lineno) { } ++lineno; return; } if (tag != ".OP") { op_started = false; } if (tag == ".NS") { auto title = tokens.size() > 3 ? tokens[3] + tokens[2] : tokens[2]; std::string lining(title.length(), '*'); tgt << "\n\n" << lining << '\n' << title << '\n' << lining << '\n'; return; } if (tag == ".sh") { auto level = atoi(tokens[1].c_str()); std::string lining(tokens[2].length(), heading[level]); tgt << "\n\n" << lining << '\n' << tokens[2] << '\n' << lining << '\n'; return; } if (tag == ".SM") { if (tokens[1] == "E") { return; } std::ostringstream ss; ss << tokens[1]; for (std::vector::const_iterator it = tokens.begin() + 2, E = tokens.end(); it != E; ++it) { ss << ", " << *it; } std::string lining(ss.str().length(), heading[4]); tgt << "\n\n" << lining << '\n' << ss.str() << '\n' << lining << '\n'; return; } if (tag == ".OP") { if (!op_started) { op_started = true; tgt << "\n.. code-block:: none\n\n"; } tgt << " "; for (std::vector::const_iterator it = tokens.begin() + 1, E = tokens.end(); it != E; ++it) { tgt << " " << *it; } tgt << "\n"; return; } // a list begins if (tag == ".ba" || // augments the base indent by n. tag == ".ip" || tag == ".np" || tag == ".BL" || tag == ".CL" || tag == ".CP" || tag == ".DE" || tag == ".FL" || tag == ".GN" || tag == ".H1" || tag == ".H2" || tag == ".H3" || tag == ".H4" || tag == ".H5" || tag == ".H6" || tag == ".H7" || tag == ".H8" || tag == ".H9" || tag == ".Ik" || tag == ".IL" || tag == ".IN" || tag == ".IP" || tag == ".OL" || tag == ".OC" || tag == ".OV" || tag == ".PD" || tag == ".Sc" || tag == ".SE" || tag == ".SF" || tag == ".ST" || tag == ".TY" || tag == ".XF") { src.seekg(-s.length() - 1, std::ios_base::cur); parseList(src, tgt); return; } // a table begins if (tag == ".TS") { parseTable(src, tgt); return; } // a code block begins if (tag == ".CS") { parseCodeBlock(src, tgt); return; } // a verbatim block begins if (tag == ".nf") { parseLineBlock(src, tgt); return; } if (tag == ".us" || tag == ".US") { if (tokens.size() > 1) { tgt << "\n*" << tokens[1]; for (std::vector::const_iterator it = tokens.begin() + 2, E = tokens.end(); it != E; ++it) { tgt << ' ' << *it; } tgt << "* --- "; } return; } if (tag == ".XB") { if (tokens.size() > 1) { tgt << "\n**" << tokens[1]; for (std::vector::const_iterator it = tokens.begin() + 2, E = tokens.end(); it != E; ++it) { tgt << ' ' << *it; } tgt << "**\n"; } return; } if (tag == ".b") { generateHighlightedItem("**", tgt); return; } if (tag == ".cw" || tag == ".MA" || tag == ".NM") { generateHighlightedItem("``", tgt); return; } if (tag == ".i") { generateHighlightedItem("*", tgt); return; } if (tag == ".q") { generateHighlightedItem("\"", tgt); return; } if (tag == ".DN") { if (tokens.size() > 1) { tgt << "\n``" << tokens[1] << "``\n"; } return; } if (tag == ".DA") { if (tokens.size() > 1) { tgt << " ``" << tokens[1] << "``\n"; } return; } if (tag == ".AT") { if (tokens.size() > 1) { tgt << "\n*Attributes*:"; for (std::vector::const_iterator it = tokens.begin() + 1, E = tokens.end(); it != E; ++it) { tgt << " " << *it; } tgt << "\n"; } return; } if (tag == ".SI") { if (tokens.size() > 1) { tgt << "*" << tokens[1] << "*"; for (std::vector::const_iterator it = tokens.begin() + 2, E = tokens.end(); it != E; ++it) { tgt << " " << *it; } tgt << "\n\n"; } return; } if (tag == ".ul") { std::string t; if (std::getline(src, t)) { tgt << '*' << t << "*\n"; } return; } if (tag == ".lp" || tag == ".br") { tgt << '\n'; return; } if (tag == ".(b" || tag == ".)b") { // FIXME: Couldn't find what .(b .)b pair stands for. // It's not defined in groff_me macro package. // tgt << '\n'; return; } if (tag == ".(z" || tag == ".)z") { // FIXME: .(z .)z means floating in groff_me. // How to represent this in RST? return; } if (tag == ".bp" || // begin new page tag == ".ce" || // FIXME: center next n lines. Not needed. tag == ".EP" || // tag == ".ft" || // font tag == ".hl" || // FIXME: horizontal line. Not needed. tag == ".ne" || // FIXME: can't find what this request does tag == ".nr" || // tag == ".re" || // Reset tabs to default values. tag == ".sp" || // tag == ".sz" || // Augment the point size by n points. tag == ".ta") { // return; } tgt << s << (newline ? "\n" : ""); } void parseList(std::stringstream &src, std::stringstream &tgt) { std::string s; if (!std::getline(src, s)) { return; } tokenize(s); auto tag = tokens[0]; if (tag == ".FL" || tag == ".CL" || tag == ".OL") { listStack.push_back(".IL"); } else { listStack.push_back(tag); } // format the list item header std::stringstream ss; if (DEBUG) ss << "\n..\n Start list <" << s << ">\n"; if (tag == ".BL") { ss << "* "; } else if (tag == ".OV") { if (tokens.size() > 2) { ss << "``" << tokens[1] << " (" << tokens[2] << ")``\n"; } } else if (tag == ".CL" || tag == ".FL" || tag == ".IL" || tag == ".OL" || tag == ".CP") { if (tokens.size() < 4) { ss << "``" << tokens[1] << "``\n"; } else if (tokens.size() > 3) { ss << "#. **" << tokens[1] << "**"; for (std::vector::size_type it = 3; it < tokens.size(); ++it) { ss << " " << tokens[it]; } ss << " *Type*: *" << tokens[2] << "*\n\n"; } } else if (tag == ".TY" || tag == ".PD") { if (tokens.size() > 1 && tokens[1] != "B" && tokens[1] != "E") { std::vector::const_iterator it = tokens.begin() + 1, E = tokens.end(); ss << "``" << *it++ << "``"; if (it != E) { ss << " ``" << *it++ << "``"; for (; it != E; ++it) { ss << " " << *it; } } } ss << '\n'; } else if (tag == ".IN" || tag == ".GN") { if (tokens.size() > 1) { for (std::vector::const_iterator it = tokens.begin() + 1, E = tokens.end(); it != E; ++it) { ss << "``" << *it << "`` "; } } } else if (tag == ".ip") { if (tokens.size() > 1) { ss << tokens[1] << '\n'; } else { ss << "* "; } } else if (tag == ".np") { ss << "#. "; } else if (tag == ".ba") { ss << '\n'; } else if (!(tokens.size() > 1 && (tag == ".Ik" || tag == ".OC" || tag == ".Ik" || tag == ".ST" || tag == ".SF" || tag == ".Sc") && (tokens[1] == "B" || tokens[1] == "E"))) { if (tokens.size() > 1) { ss << "``" << tokens[1] << "``\n"; } } // consume the lines up to the next list item, possibly nested while (std::getline(src, s)) { if (s.empty()) { ss << '\n'; continue; } tokenize(s); auto tag = tokens[0]; if (tag == ".sh" || tag == ".lp" || tag == ".SM") { src.seekg(-s.length() - 1, std::ios_base::cur); generateListItem(ss, tgt); listStack.clear(); return; } else if (tag == ".ip" || tag == ".np" || tag == ".BL" || tag == ".CL" || tag == ".CP" || tag == ".DE" || tag == ".FL" || tag == ".GN" || tag == ".H1" || tag == ".H2" || tag == ".H3" || tag == ".H4" || tag == ".H5" || tag == ".H6" || tag == ".H7" || tag == ".H8" || tag == ".H9" || tag == ".Ik" || tag == ".IL" || tag == ".IN" || tag == ".IP" || tag == ".OL" || tag == ".OC" || tag == ".OV" || tag == ".PD" || tag == ".Sc" || tag == ".SE" || tag == ".SF" || tag == ".ST" || tag == ".TY" || tag == ".XF") { if (tag == ".FL" || tag == ".CL" || tag == ".OL") { tag = ".IL"; } auto it = listStack.begin(); for (auto E = listStack.end(); it != E; ++it) { if (tag == *it) { listStack.erase(it, E); src.seekg(-s.length() - 1, std::ios_base::cur); generateListItem(ss, tgt); return; } } listStack.push_back(tag); ss << '\n'; src.seekg(-s.length() - 1, std::ios_base::cur); parseList(src, ss); ss << '\n'; } else if (tag == ".ba") { if (tokens.size() > 1) { if (tokens[1][0] == '+') { src.seekg(-s.length() - 1, std::ios_base::cur); parseList(src, ss); } else { generateListItem(ss, tgt); return; } } ss << '\n'; } else { parseOneLineRequest(s, src, ss); } } generateListItem(ss, tgt); } void parseTable(std::stringstream &src, std::stringstream &tgt) { std::ostringstream oss; for (std::string s; std::getline(src, s);) { auto pos = s.find_first_of(" \t"); auto tag = s.substr(0, pos); if (tag == ".TE") { if (DEBUG) tgt << "\n..\n END OF TABLE\n"; generateTable(oss.str(), tgt); return; } else { if (DEBUG) tgt << "\n..\n STORE THE LINE FOR FUTURE\n"; if (!s.empty()) { oss << s << '\n'; } } } } void parseCodeBlock(std::stringstream &src, std::stringstream &tgt) { tgt << "\n.. code-block:: none\n\n"; for (std::string s; getline(src, s);) { if (s == ".CE") { tgt << '\n'; return; } if (s.empty()) { tgt << '\n'; } else if (s.substr(0, 2) == "\\&") { tgt << " " << s.substr(2) << '\n'; } else { tgt << " " << s << '\n'; } } } void parseLineBlock(std::stringstream &src, std::stringstream &tgt) { tgt << "\n.. line-block::\n"; for (std::string s; std::getline(src, s);) { auto pos = s.find_first_of(" \t"); auto tag = s.substr(0, pos); if (tag == ".fi") { tgt << '\n'; return; } else { if (!s.empty()) { tgt << " " << s; } tgt << '\n'; } } } /** \brief Create a new string without any trailing white space from the given input string. \param s the input string. \return a copy of \a s without any trailing space. */ std::string cutTrailingSpace(const std::string &s) const { if (s.empty()) { return s; } auto pos = s.find_last_not_of(" \t"); // s is not empty therefore it must contain only white space if (pos == std::string::npos) { return std::string(); } // anything after pos, if exists, must be white space, so cut it. return s.substr(0, pos + 1); } std::string escapeInlineMarkup(const std::string &s) const { if (s.empty()) { return s; } std::string r; for (std::string::const_iterator c = s.begin(), E = s.end(); c != E; ++c) { if (*c == '*' || *c == '`' || (*c == '_' && (c + 1 == E || *(c + 1) == '\'' || *(c + 1) == '\t' || *(c + 1) == ' ' || *(c + 1) == '"' || *(c + 1) == ',' || *(c + 1) == '.' || *(c + 1) == ';' || *(c + 1) == ':' || *(c + 1) == ')'))) { r.append(1, '\\'); } else if (*c == '\\' && c + 1 != E && *(c + 1) == ' ') { continue; } r.append(1, *c); } return r; } /** \brief Find and convert strings defined with .ds requests. */ void expandDefinedStrings() { std::ostringstream os; lineno = 1; for (std::string s; getline(source, s);) { s = expandStringInline(s, "\\(bu", "*"); s = expandStringInline(s, "\\(em", "---"); s = expandStringInline(s, "\\\\*(SC", "**Flang**"); s = expandPairedString(s, "\\f(CW", "``", "\\fP", "``"); s = expandPairedString(s, "\\\\*(cf", "``", "\\fP", "``"); s = expandPairedString(s, "\\\\*(cf", "``", "\\\\*(rf", "``"); s = expandPairedString(s, "\\\\*(cr", "``", "\\\\*(rf", "``"); s = expandPairedString(s, "\\\\*(mf", "``", "\\\\*(rf", "``"); s = expandPairedString(s, "\\\\*(ff", "*", "\\\\*(rf", "*"); s = expandPairedString(s, "\\\\*(tf", "*", "\\\\*(rf", "*"); s = expandPairedString(s, "\\fI", "*", "\\fP", "*"); os << s << '\n'; } if (DEBUG) target << "\n..\n TOTAL LINES: " << lineno << '\n'; source.str(os.str()); source.clear(); lineno = 1; } /** \brief Do simple one to one .ds string expansions for a single line. */ std::string expandStringInline(const std::string &line, const std::string &s, const std::string &r) const { if (line.empty()) { return line; } auto result = line; auto len = s.length(); std::string::size_type pos = 0; while (1) { pos = result.find(s, pos); if (pos == std::string::npos) { break; } result.replace(pos, len, r); } return result; } /** \brief Do paired replacement of markup encoded with .ds strings. For example, fixed font or code is expressed as ``code`` constructs in RST, but Nroff explicitly sets and resets font for a substring of interest, e.g. \*(cfcode\*(rf, where cf and rf are names defined previous with .ds requests. */ std::string expandPairedString(const std::string &line, const std::string &sl, const std::string &rl, const std::string &sr, const std::string &rr) const { if (line.empty()) { return line; } auto result = line; auto lenl = sl.length(); auto lenr = sr.length(); std::string::size_type posl = 0; while (1) { posl = result.find(sl, posl); if (posl == std::string::npos) { break; } auto posr = result.find(sr, posl); if (posr == std::string::npos) { break; } // order of the following two statements is important, DO NOT change. result.replace(posr, lenr, rr); result.replace(posl, lenl, rl); // posr is invalid after this replacement } return result; } void tokenize(const std::string &s, const std::string &separator = std::string(" \t"), const bool allow_empty = false) { tokens.clear(); for (std::string::const_iterator head = s.begin(), B = s.begin(), E = s.end(); head != E;) { for (; head != E && separator.find_first_of(*head) != std::string::npos; ++head) { if (allow_empty) { tokens.push_back(std::string()); } } auto tail = head != E ? head + 1 : E; for (; tail != E; ++tail) { if (*head == '"') { if (*tail == '"') break; } else if (separator.find_first_of(*tail) != std::string::npos) { break; } } if (head != E && *head == '"') ++head; tokens.push_back(head == tail ? std::string() : s.substr(head - B, tail - head)); head = tail == E ? tail : tail + 1; if (allow_empty && head == E && tail != E) { tokens.push_back(std::string()); } } } void generateTable(const std::string &s, std::stringstream &tgt) { std::string token_separator = "\t"; std::istringstream iss(s); std::vector formatting; std::vector widths; std::vector table; table_row_type::size_type columns; table_row_type table_row; cell_type cell; bool is_multiline = false; bool end_of_options = false; // read table formatting options for (std::string s; std::getline(iss, s);) { if (DEBUG) tgt << "\n..\n OPTIONS Line <" << s << ">\n"; tokenize(s); if (tokens.empty()) { continue; } auto &last_token = tokens.back(); if (last_token.end()[-1] == ';') { for (std::vector::const_iterator it = tokens.begin(), E = tokens.end(); it != E; ++it) { if (it->find_first_of('%') != std::string::npos) { token_separator = '%'; } } continue; } // remove '|' tokens std::vector refined_tokens; for (std::vector::iterator it = tokens.begin(); it != tokens.end(); ++it) { if (DEBUG) tgt << "\n..\n TOKEN " << it - tokens.begin() << " <" << *it << ">\n"; if (*it != "|" && *it != "." && *it != "|.") { refined_tokens.push_back(*it); } else if (*it == "." || *it == "|.") { end_of_options = true; } } formatting.push_back(refined_tokens); auto &last_refined_token = refined_tokens.back(); if (end_of_options || last_refined_token.end()[-1] == '.') { if (DEBUG) { tgt << "\n..\n FORMATTING SPEC:\n"; for (std::vector::size_type it = 0; it < formatting.size(); ++it) { tgt << " " << it; for (cell_type::const_iterator ci = formatting[it].begin(), E = formatting[it].end(); ci != E; ++ci) { tgt << " " << *ci; } tgt << '\n'; } } break; } } // read table contents for (std::string s; std::getline(iss, s);) { if (DEBUG) tgt << "\n..\n TBLDATA Line <" << s << ">\n"; if (s == ".TH") { continue; // skip the running header request } if (s == ".T&") { std::getline(iss, s); continue; // skip table continue command and the following format opts } tokenize(s, token_separator, true); auto it = tokens.begin(); if (is_multiline) { if (tokens.empty() || tokens[0] != "T}") { cell.push_back(s); continue; } table_row.push_back(cell); is_multiline = false; cell.clear(); ++it; } for (auto E = tokens.end(); it != E; ++it) { if (*it == "T{") { is_multiline = true; } else { cell.push_back(*it); table_row.push_back(cell); cell.clear(); } } if (is_multiline) { continue; } table.push_back(table_row); table_row.clear(); } // check options consistency columns = 0; if (!formatting.empty()) { for (std::vector::const_iterator it = formatting.begin(), E = formatting.end(); it != E; ++it) { if (it->size() > columns) { columns = it->size(); } } } if (DEBUG) tgt << "\n..\n #COLUMNS " << columns << '\n'; // calculate each column width int row_counter = 0; bool do_corrections = false; for (std::vector::const_iterator r = table.begin(), rE = table.end(); r != rE; ++r, ++row_counter) { if (DEBUG) tgt << "\n..\n ROW " << std::setw(3) << row_counter; if (r->size() < columns) { do_corrections = true; if (DEBUG) tgt << '\n'; continue; } std::string::size_type column = 0; for (table_row_type::const_iterator c = r->begin(), cE = r->end(); c != cE; ++c, ++column) { std::string::size_type width = 0; for (cell_type::const_iterator s = c->begin(), sE = c->end(); s != sE; ++s) { if (s->length() > width) { width = s->length(); } } if (DEBUG) { tgt << " COL " << column << " (" << std::setw(2) << width << ")"; } if (column == widths.size()) { widths.push_back(width); } else if (widths[column] < width) { widths[column] = width; } } if (DEBUG) tgt << '\n'; } if (DEBUG) tgt << "\n..\n WIDTHS SIZE " << widths.size() << '\n'; std::string::size_type table_width = 0; for (std::vector::iterator it = widths.begin(), E = widths.end(); it != E; ++it) { if (*it > 0) { table_width += *it + 1; } } --table_width; if (do_corrections) { for (std::vector::const_iterator r = table.begin(), rE = table.end(); r != rE; ++r, ++row_counter) { if (r->size() == columns) { continue; } std::string::size_type column = 0; std::string::size_type cumulative_width = 0; for (table_row_type::const_iterator c = r->begin(), cE = r->end(); c != cE; ++c, ++column) { std::string::size_type width = 0; for (cell_type::const_iterator s = c->begin(), sE = c->end(); s != sE; ++s) { if (s->length() > width) { width = s->length(); } } if (c + 1 != cE) { if (widths[column] < width) { table_width += width - widths[column]; widths[column] = width; } cumulative_width += widths[column] + 1; } else if (cumulative_width + width > table_width) { table_width += width - widths.back(); widths.back() = width; } } } } if (DEBUG) { tgt << "\n..\n WIDTHS:"; for (std::vector::const_iterator it = widths.begin(), E = widths.end(); it != E; ++it) { tgt << " " << *it; } tgt << '\n'; } assert(!widths.empty()); // fix up standalone cells with width 0 for (std::vector::iterator r = table.begin(), rE = table.end(); r != rE; ++r) { std::vector::const_iterator w = widths.begin(); for (table_row_type::iterator c = r->begin(), cE = r->end(); c != cE; ++c, ++w) { if (*w == 0 && w + 1 != widths.end() && c + 1 == cE && !c->empty() && c->at(0).length() > 1) { r->push_back(*c); break; } } } // output the formatted table char row_separator = '-'; if (DEBUG) tgt << "\n..\n TABLE WIDTH " << table_width << '\n'; tgt << '\n'; for (std::vector::const_iterator r = table.begin(), rE = table.end(); r != rE; ++r) { if (r->size() == 1 && (r->at(0)[0] == "=" || r->at(0)[0] == "_")) { if (r->at(0)[0] == "=") { row_separator = '='; } continue; } for (std::vector::const_iterator it = widths.begin(), E = widths.end(); it != E; ++it) { if (*it > 0) { tgt << "+" << std::string(*it, row_separator); } } row_separator = '-'; // reset separator regardless of its value. tgt << "+\n"; cell_type::size_type lines = 1; for (table_row_type::const_iterator c = r->begin(), cE = r->end(); c != cE; ++c) { if (c->size() > lines) { lines = c->size(); } } for (cell_type::size_type line = 0; line < lines; ++line) { std::vector::const_iterator w = widths.begin(); std::string::size_type width_so_far = 0; for (table_row_type::const_iterator c = r->begin(), cE = r->end(); c != cE; ++c, ++w) { if (*w == 0) { continue; } tgt << '|'; if (line < c->size()) { tgt << c->at(line); width_so_far += c->at(line).length(); if (c + 1 != cE) { tgt << std::string(*w - c->at(line).length(), ' '); width_so_far += *w - c->at(line).length() + 1; } } else { tgt << std::string(*w, ' '); width_so_far += *w + 1; } } if (width_so_far < table_width) { if (width_so_far == 0) { --width_so_far; tgt << '|'; } tgt << std::string(table_width - width_so_far, ' '); } tgt << "|\n"; } } for (std::vector::const_iterator it = widths.begin(), E = widths.end(); it != E; ++it) { if (*it > 0) { tgt << "+" << std::string(*it, '-'); } } tgt << "+\n\n"; } void generateListItem(std::stringstream &src, std::stringstream &tgt) { src.seekg(0); std::string s; std::getline(src, s); tgt << '\n'; if (!s.empty()) { tgt << s; } tgt << '\n'; while (std::getline(src, s)) { if (!s.empty()) { tgt << " " << s; } tgt << '\n'; } } void generateHighlightedItem(const std::string &markup, std::stringstream &tgt) { if (tokens.size() > 1) { tgt << markup << tokens[1] << markup; for (std::vector::const_iterator it = tokens.begin() + 2, E = tokens.end(); it != E; ++it) { tgt << *it; if (it + 1 != E) { tgt << ' '; } } tgt << '\n'; } } }; /** \class is the base class for the implementations of utility programs ilmtp, machar, symutil, and symini. It provides the methods common to all utility derived classes, such as reading lines from an input file, splitting a line to tokens, identifying the type of an NROFF directive represented by a line, and reporting errors with corresponding line numbers in the error messages. */ class UtilityApplication { protected: SphinxConverter sphinx; ///< sphinx output generator std::vector tokens; ///< array of tokens in the last line std::vector::const_iterator token; ///< tokens array iterator NroffInStream ifs; ///< input file stream std::string line; ///< the current line read from ifs void printError(ErrorSeverity sev, const char *txt1, const char *txt2 = "") { ifs.printError(sev, txt1, txt2); } /** \brief Create a string that is a result of converting every character of the given string to the lower case. \param s is the input string to be converted. \return a converted copy of the input string \a s. */ std::string makeLower(const std::string &s) { std::string result; for (auto c = s.begin(), E = s.end(); c != E; ++c) { result.push_back(isupper(*c) ? tolower(*c) : *c); } return result; } /** \brief Read a line from the input stream and tokenize the string. \param elt map of line type strings to numeric type codes. \param os a pointer to an output stream. \return the type of line if found one of the lines with type in elt, otherwise LT_EOF when the entire input stream is read. If os is not nullptr, output every line that is discarded to the stream pointed by os. */ int getLineAndTokenize(NroffMap &elt, std::ostream *os = nullptr) { tokens.clear(); int result = LT_EOF; while (getline(ifs, line)) { sphinx.process(line); if (int item = elt.match(line)) { // collect tokens skipping the nroff macro at the line's head // which is the 3 first characters for (std::string::const_iterator s = line.begin() + 3, B = line.begin(), E = line.end(); s != E;) { for (; *s == ' '; ++s) { } // skip over initial spaces // 'e' will point to one character after the end of a token auto e = s != E ? s + 1 : E; // next after s or end of line // move to next pairing \" or space if didn't start with \" for (; e != E && *e != (*s == '"' ? '"' : ' '); ++e) { } if (s != E && *s == '"') { ++s; // exclude quotes if (e == E) { // validate quotes are paired printError(SEVERE, "double quote missing from end of string"); } } tokens.push_back(s == e ? std::string() : line.substr(s - B, e - s)); s = e == E ? e : e + 1; } result = item; break; } if (os) { *os << line << '\n'; } } token = tokens.begin(); return result; } /** \brief return the token currently pointed to by the token iterator and advance the iterator to the next token. If no more tokens in the sequence, return an empty string. */ std::string getToken() { return token != tokens.end() ? *token++ : std::string(); } /** \brief output the comment asking not to modify the file produced by an utility application. */ void outputNotModifyComment(FILE *out, const std::string &filename, const std::string &progname, bool is_nroff) { auto pos = progname.find_last_of('/'); std::string s = (pos == std::string::npos) ? progname : progname.substr(pos + 1); fputs(is_nroff ? ".\\\" " : "/* ", out); fprintf(out, "%s - This file written by utility program %s. Do not modify.", filename.c_str(), s.c_str()); fputs(is_nroff ? "\n" : " */\n", out); } /** \brief output the comment asking not to modify the file produced by an utility application. */ void outputNotModifyComment(std::ostream *out, const std::string &filename, const std::string &progname, bool is_nroff) { auto pos = progname.find_last_of('/'); std::string s = (pos == std::string::npos) ? progname : progname.substr(pos + 1); if (is_nroff) *out << ".\\\" "; else *out << "/* "; *out << filename << " - This file written by utility program " << s << ". Do not modify."; if (is_nroff) *out << "\n"; else *out << " */\n"; } }; #else BEGIN_DECL_WITH_C_LINKAGE #define MAXLINE 300 #define FUNCTION #define SUBROUTINE void #define LOOP while (1) #define INT int #define UINT unsigned int #define UCHAR char #define LT_EOF 0 #define LT_UNK -1 #ifndef TRUE #define TRUE 1 #define FALSE 0 #endif typedef struct { const char *str; char ltnum; } LT; extern FILE *infile[10], *outfile[10]; #define IN1 infile[0] #define IN2 infile[1] #define IN3 infile[2] #define IN4 infile[3] #define OUT1 outfile[0] #define OUT2 outfile[1] #define OUT3 outfile[2] #define OUT4 outfile[3] #define OUT5 outfile[4] #define put_error put_err void put_error(int sev, const char *txt); void open_files(int argc, char **argv); int get_line(FILE *funit, LT ltypes[]); void get_token(char out[], int *len); int get_line1(FILE *funit, LT ltypes[], FILE *outf); void flush_line(FILE *outf); void output_off(void); void output_on(void); void ili_op(void); void init_ili(LT *elt); int get_ili(char *ilin); END_DECL_WITH_C_LINKAGE #endif #endif // _UTILS_UTILS_H /* Local Variables: mode: c++ End: */