retdec
string.h
Go to the documentation of this file.
1 
7 #ifndef RETDEC_UTILS_STRING_H
8 #define RETDEC_UTILS_STRING_H
9 
10 #include <algorithm>
11 #include <limits>
12 #include <map>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 #include <set>
17 
18 namespace retdec {
19 namespace utils {
20 
21 // We assume that the largest supported character size is 32 bits.
22 using WideCharType = std::uint32_t;
23 
24 // Strings may have different character sizes, so we need to use a generic
25 // basic_string instead of std::string/std::wstring.
26 using WideStringType = std::basic_string<WideCharType>;
27 
28 bool hasOnlyDecimalDigits(const std::string &str);
29 
30 bool hasOnlyHexadecimalDigits(const std::string &str);
31 
32 bool hasNonprintableChars(const std::string &str);
33 bool hasNonasciiChars(const std::string &str);
34 
35 bool isLowerThanCaseInsensitive(const std::string &str1,
36  const std::string &str2);
37 
38 bool areEqualCaseInsensitive(const std::string &str1, const std::string &str2);
39 
40 bool isShorterPrefixOfCaseInsensitive(const std::string &str1,
41  const std::string &str2);
42 
43 bool contains(const std::string &str, const std::string &sub);
44 bool containsAny(const std::string &str, const std::vector<std::string> &subs);
45 
46 bool containsCaseInsensitive(const std::string &str, const std::string &sub);
47 
48 bool containsAnyOfChars(const std::string &str, const std::string &chars);
49 bool containsAnyOfChars(const std::string &str, std::string::value_type c);
50 
51 std::string toLower(std::string str);
52 std::string toUpper(std::string str);
53 
54 std::string toWide(const std::string &str, std::string::size_type length);
55 std::string unicodeToAscii(const std::uint8_t *bytes, std::size_t nBytes);
56 std::string unicodeToAscii(const std::uint8_t *bytes, std::size_t nBytes, std::size_t &nRead);
57 std::string readNullTerminatedAscii(const std::uint8_t *bytes, std::size_t bytesLen,
58  std::size_t offset = 0, std::size_t maxBytes = 0, bool failOnExceed = false);
59 
60 std::string trim(std::string str, const std::string &toTrim = " \t\r\n\v");
61 
62 std::vector<std::string> split(const std::string &str, char sep = ',',
63  bool trimWhitespace = true);
64 
65 std::string unifyLineEnds(const std::string &str);
66 
78 template<typename Container>
79 std::string joinStrings(const Container &strings,
80  const std::string &separator = ", ") {
81  std::string joined;
82  for (auto &s : strings) {
83  if (!joined.empty()) {
84  joined += separator;
85  }
86  joined += s;
87  }
88  return joined;
89 }
90 
91 // The number 4 below is needed because of the null byte.
92 std::string addSlashes(const std::string &str,
93  const std::string &toBackslash = std::string("\"'\\\0", 4));
94 
95 std::string replaceCharsWithStrings(const std::string &str, char what,
96  const std::string &withWhat);
97 
104 template<typename String>
105 bool startsWith(const std::string &str, const String &withWhat) {
106  return str.find(withWhat, 0) == 0;
107 }
108 
109 bool endsWith(const std::string &str, const std::string &withWhat);
110 bool endsWith(const std::string &str, char withWhat);
111 bool endsWith(const std::string &str, const std::set<std::string>& withWhat);
112 bool hasSubstringOnPosition(const std::string &str,
113  const std::string &withWhat,
114  std::string::size_type position);
115 bool hasSubstringInArea(const std::string &str, const std::string &withWhat,
116  std::string::size_type start, std::string::size_type stop);
117 
118 bool isComposedOnlyOfChars(const std::string &str, const std::string &chars);
119 bool isComposedOnlyOfChars(const std::string &str, std::string::value_type c);
120 
121 bool isComposedOnlyOfStrings(const std::string &str, const std::string &ss);
122 
123 std::string stripDirs(const std::string &path);
124 
125 std::string replaceAll(const std::string &str, const std::string &from,
126  const std::string &to);
127 
128 std::string replaceNonprintableChars(const std::string &str);
129 std::string replaceNonasciiChars(const std::string &str);
130 std::string replaceNonalnumCharsWith(const std::string &str, std::string::value_type c);
131 
132 std::string removeWhitespace(std::string s);
133 
134 std::pair<std::size_t, std::size_t> getLineAndColumnFromPosition(
135  const std::string &json, std::size_t position);
136 
137 bool isNumber(const std::string &str);
138 bool isIdentifier(const std::string &str);
139 bool isPrintable(const std::string &str);
140 
141 std::string removeLeadingCharacter(
142  const std::string &s,
143  char leading,
144  std::size_t n = std::numeric_limits<std::size_t>::max());
145 
146 bool isContolCharacter(char c);
147 bool isNiceCharacter(unsigned char c);
148 bool isNiceString(const std::string &str, double maxRatio = 2.0/3);
149 bool isNiceAsciiWideCharacter(unsigned long long c);
151  const std::vector<unsigned long long> &str,
152  double minRatio = 1.0);
153 
154 std::string getIndentation(std::size_t count, char c = '\t');
155 
156 void appendHex(std::string &n, const long long a);
157 void appendDec(std::string &n, const long long a);
158 std::string appendHexRet(const std::string &n, const long long a);
159 std::string appendDecRet(const std::string &n, const long long a);
160 void removeSuffix(std::string &n, const std::string &suffix = "_");
161 std::string removeSuffixRet(const std::string &n,
162  const std::string &suffix = "_");
163 
164 std::string normalizeName(const std::string &name);
165 std::string normalizeNamePrefix(const std::string &name);
166 
167 bool findFirstInEmbeddedLists(std::size_t &pos, const std::string &str,
168  char c, const std::vector<std::pair<char, char>> &pairs);
169 
170 std::string removeConsecutiveSpaces(const std::string& str);
171 
172 std::string asEscapedCString(const WideStringType& value, std::size_t charSize);
173 
174 std::string removeComments(const std::string& str, char commentChar);
175 
176 std::string extractVersion(const std::string& input);
177 
178 } // namespace utils
179 } // namespace retdec
180 
181 #endif
std::string removeComments(const std::string &str, char commentChar)
Definition: string.cpp:1364
bool isShorterPrefixOfCaseInsensitive(const std::string &str1, const std::string &str2)
Checks if the shorter string of str1 and str2 is a case-insensitive prefix of the longer string.
Definition: string.cpp:302
void removeSuffix(std::string &n, const std::string &suffix="_")
Finds the last occurrence of the specified suffix and removes everything from its start to the end.
Definition: string.cpp:1112
bool isNiceAsciiWideCharacter(unsigned long long c)
Definition: string.cpp:1022
bool startsWith(const std::string &str, const String &withWhat)
Returns true if str starts with the prefix withWhat, false otherwise.
Definition: string.h:105
void appendDec(std::string &n, const long long a)
Appends hexadecimal address to string (typically object name).
Definition: string.cpp:1071
std::string toWide(const std::string &str, std::string::size_type length)
Converts str to wide string.
Definition: string.cpp:398
std::string normalizeName(const std::string &name)
Replaces all special symbols by their normalized equivalent.
Definition: string.cpp:1144
bool hasSubstringOnPosition(const std::string &str, const std::string &withWhat, std::string::size_type position)
Returns true if str has substring withWhat on index position.
Definition: string.cpp:703
bool isNiceCharacter(unsigned char c)
Definition: string.cpp:992
std::string replaceNonprintableChars(const std::string &str)
Replaces non-printable characters in str with their hexadecimal values.
Definition: string.cpp:831
bool contains(const std::string &str, const std::string &sub)
Checks if str contains sub.
Definition: string.cpp:311
bool areEqualCaseInsensitive(const std::string &str1, const std::string &str2)
Checks if str1 == str2 (case-insensitively).
Definition: string.cpp:283
std::string joinStrings(const Container &strings, const std::string &separator=", ")
Joins all the strings in strings into a single string.
Definition: string.h:79
std::string asEscapedCString(const WideStringType &value, std::size_t charSize)
Returns the constant's value as an escaped C string.
Definition: string.cpp:1325
std::string toUpper(std::string str)
Converts all characters in str to upper case.
Definition: string.cpp:383
bool hasSubstringInArea(const std::string &str, const std::string &withWhat, std::string::size_type start, std::string::size_type stop)
Returns true if str has substring withWhat in area bordered by offsets start and stop.
Definition: string.cpp:713
std::string unicodeToAscii(const std::uint8_t *bytes, std::size_t nBytes)
Converts unicode bytes to ASCII string.
Definition: string.cpp:420
bool hasOnlyDecimalDigits(const std::string &str)
Returns true if the given string is formed only by decimal digits.
Definition: string.cpp:227
bool hasOnlyHexadecimalDigits(const std::string &str)
Returns true if the given string is formed only by hexadecimal digits.
Definition: string.cpp:238
std::string unifyLineEnds(const std::string &str)
Unifies line ends in the given string to LF.
Definition: string.cpp:623
std::string replaceCharsWithStrings(const std::string &str, char what, const std::string &withWhat)
Replaces all occurrences of what with withWhat in str and returns the resulting string.
Definition: string.cpp:652
bool hasNonprintableChars(const std::string &str)
Returns true if the given string contains at least one non-printable character.
Definition: string.cpp:247
std::string getIndentation(std::size_t count, char c='\t')
Returns an indentation string containing the specified number of characters.
Definition: string.cpp:1049
bool findFirstInEmbeddedLists(std::size_t &pos, const std::string &str, char c, const std::vector< std::pair< char, char >> &pairs)
Finds the first occurrence of c character in string str that is outside of embedded lists delimited b...
Definition: string.cpp:1257
bool hasNonasciiChars(const std::string &str)
Returns true if the given string contains at least one non-ASCII character.
Definition: string.cpp:256
bool isNumber(const std::string &str)
Checks if the string is a number.
Definition: string.cpp:899
std::string replaceNonalnumCharsWith(const std::string &str, std::string::value_type c)
Replaces non-alphanumeric characters in str with c.
Definition: string.cpp:846
std::string removeLeadingCharacter(const std::string &s, char leading, std::size_t n=std::numeric_limits< std::size_t >::max())
Removes n leading characters from the given string s and returns the result.
Definition: string.cpp:965
std::string appendDecRet(const std::string &n, const long long a)
Appends hexadecimal address to string and return new string.
Definition: string.cpp:1099
bool isNiceAsciiWideString(const std::vector< unsigned long long > &str, double minRatio=1.0)
Does the provided wide string consist only from ASCII characters and is nice? Nice string have ration...
Definition: string.cpp:1034
std::string trim(std::string str, const std::string &toTrim=" \t\r\n\v")
Trims the given string.
Definition: string.cpp:566
std::string stripDirs(const std::string &path)
Strips all directories from the given path.
Definition: string.cpp:799
std::string extractVersion(const std::string &input)
Definition: string.cpp:1401
bool isComposedOnlyOfStrings(const std::string &str, const std::string &ss)
Returns true if str is composed solely of strings ss, false otherwise.
Definition: string.cpp:758
bool containsAny(const std::string &str, const std::vector< std::string > &subs)
Check if at least one string from subs is contained in str.
Definition: string.cpp:318
bool containsAnyOfChars(const std::string &str, const std::string &chars)
Returns true if str contains at least one character from chars, false otherwise.
Definition: string.cpp:356
bool endsWith(const std::string &str, const std::string &withWhat)
Retruns true if str ends with the suffix withWhat, false otherwise.
Definition: string.cpp:669
std::basic_string< WideCharType > WideStringType
Definition: string.h:26
std::string readNullTerminatedAscii(const std::uint8_t *bytes, std::size_t bytesLen, std::size_t offset=0, std::size_t maxBytes=0, bool failOnExceed=false)
Read up to maxBytes bytes as ASCII string.
Definition: string.cpp:513
std::string replaceAll(const std::string &str, const std::string &from, const std::string &to)
Replaces all occurrences of from in str with to and returns the string obtained in this way.
Definition: string.cpp:813
std::string appendHexRet(const std::string &n, const long long a)
Appends hexadecimal address to string and return new string.
Definition: string.cpp:1085
std::pair< std::size_t, std::size_t > getLineAndColumnFromPosition(const std::string &json, std::size_t position)
Transform position in json into line and column location.
Definition: string.cpp:871
bool isIdentifier(const std::string &str)
Checks if the string is a valid C language identifier.
Definition: string.cpp:920
std::string removeSuffixRet(const std::string &n, const std::string &suffix="_")
Finds the last occurrence of the specified suffix and removes everything from its start to the end.
Definition: string.cpp:1128
std::string toLower(std::string str)
Converts all characters in str to lower case.
Definition: string.cpp:372
std::uint32_t WideCharType
Definition: string.h:22
bool isContolCharacter(char c)
Definition: string.cpp:984
std::string removeConsecutiveSpaces(const std::string &str)
Definition: string.cpp:1312
bool isComposedOnlyOfChars(const std::string &str, const std::string &chars)
Returns true if str is composed solely of chars in chars, false otherwise.
Definition: string.cpp:732
bool containsCaseInsensitive(const std::string &str, const std::string &sub)
Find out if string contains another string, no matter the case.
Definition: string.cpp:335
std::string replaceNonasciiChars(const std::string &str)
Replaces non-ASCII characters in str with their hexadecimal values.
Definition: string.cpp:839
std::string removeWhitespace(std::string s)
Removes all whitespace from the given string.
Definition: string.cpp:857
void appendHex(std::string &n, const long long a)
Appends hexadecimal address to string (typically object name).
Definition: string.cpp:1059
std::vector< std::string > split(const std::string &str, char sep=',', bool trimWhitespace=true)
Splits the given string by a separator.
Definition: string.cpp:599
std::string addSlashes(const std::string &str, const std::string &toBackslash=std::string("\"'\\\0", 4))
Returns str with backslashes before characters that need to be quoted, specified in toBackslash.
Definition: string.cpp:637
bool isNiceString(const std::string &str, double maxRatio=2.0/3)
Does the provided string seem nice, i.e ratio of printable characters and escape sequences in the str...
Definition: string.cpp:1007
std::string normalizeNamePrefix(const std::string &name)
Definition: string.cpp:1219
bool isLowerThanCaseInsensitive(const std::string &str1, const std::string &str2)
Checks if str1 < str2 (case-insensitively).
Definition: string.cpp:266
bool isPrintable(const std::string &str)
Checks if the string is printable.
Definition: string.cpp:945
Definition: archive_wrapper.h:19