jak-project/common/util/string_util.cpp

212 lines
5.6 KiB
C++
Raw Normal View History

#include "string_util.h"
#include <iomanip>
#include <random>
#include <regex>
#include <sstream>
#include "common/util/diff.h"
namespace str_util {
const std::string WHITESPACE = " \n\r\t\f\v";
bool contains(const std::string& s, const std::string& substr) {
return s.find(substr) != std::string::npos;
}
bool starts_with(const std::string& s, const std::string& prefix) {
return s.size() >= prefix.size() && 0 == s.compare(0, prefix.size(), prefix);
}
bool ends_with(const std::string& s, const std::string& suffix) {
return s.size() >= suffix.size() &&
0 == s.compare(s.size() - suffix.size(), suffix.size(), suffix);
}
// Left-trims any leading whitespace up to and including the final leading newline
// For example:
// " \n\n hello world" => " hello world"
std::string ltrim_newlines(const std::string& s) {
size_t start = s.find_first_not_of(WHITESPACE);
// Seek backwards until we hit the beginning of the string, or a newline -- this is the actual
// substr point we want to use
for (int i = start - 1; i >= 0; i--) {
const auto& c = s.at(i);
if (c == '\n') {
break;
}
start--;
}
return (start == std::string::npos) ? "" : s.substr(start);
}
std::string ltrim(const std::string& s) {
size_t start = s.find_first_not_of(WHITESPACE);
return (start == std::string::npos) ? "" : s.substr(start);
}
std::string rtrim(const std::string& s) {
size_t end = s.find_last_not_of(WHITESPACE);
return (end == std::string::npos) ? "" : s.substr(0, end + 1);
}
std::string trim(const std::string& s) {
return rtrim(ltrim(s));
}
std::string trim_newline_indents(const std::string& s) {
auto lines = split(s, '\n');
std::vector<std::string> trimmed_lines;
std::transform(lines.begin(), lines.end(), std::back_inserter(trimmed_lines),
[](const std::string& line) { return ltrim(line); });
return join(trimmed_lines, "\n");
}
std::string join(const std::vector<std::string>& strs, const std::string& join_with) {
std::string out;
for (size_t i = 0; i < strs.size(); i++) {
out += strs.at(i);
if (i < strs.size() - 1) {
out += join_with;
}
}
return out;
}
int line_count(const std::string& str) {
int result = 0;
for (auto& c : str) {
if (c == '\n') {
result++;
}
}
return result;
}
// NOTE - this won't work running within gk.exe!
bool valid_regex(const std::string& regex) {
try {
std::regex re(regex);
} catch (const std::regex_error& e) {
return false;
}
return true;
}
std::string diff(const std::string& lhs, const std::string& rhs) {
return google_diff::diff_strings(lhs, rhs);
}
/// Default splits on \n characters
std::vector<std::string> split(const ::std::string& str, char delimiter) {
return google_diff::split_string(str, delimiter);
}
docs: Automatically generate documentation from goal_src code (#2214) This automatically generates documentation from goal_src docstrings, think doxygen/java-docs/rust docs/etc. It mostly supports everything already, but here are the following things that aren't yet complete: - file descriptions - high-level documentation to go along with this (think pure markdown docs describing overall systems that would be co-located in goal_src for organizational purposes) - enums - states - std-lib functions (all have empty strings right now for docs anyway) The job of the new `gen-docs` function is solely to generate a bunch of JSON data which should give you everything you need to generate some decent documentation (outputting markdown/html/pdf/etc). It is not it's responsibility to do that nice formatting -- this is by design to intentionally delegate that responsibility elsewhere. Side-note, this is about 12-15MB of minified json for jak 2 so far :) In our normal "goal_src has changed" action -- we will generate this data, and the website can download it -- use the information to generate the documentation at build time -- and it will be included in the site. Likewise, if we wanted to include docs along with releases for offline viewing, we could do so in a similar fashion (just write a formatting script to generate said documentation). Lastly this work somewhat paves the way for doing more interesting things in the LSP like: - whats the docstring for this symbol? - autocompleting function arguments - type checking function arguments - where is this symbol defined? - etc Fixes #2215
2023-02-20 19:49:37 -05:00
std::vector<std::string> regex_get_capture_groups(const std::string& str,
const std::string& regex) {
std::vector<std::string> groups;
std::smatch matches;
if (std::regex_search(str, matches, std::regex(regex))) {
for (size_t i = 1; i < matches.size(); i++) {
groups.push_back(matches[i].str());
}
}
return groups;
}
bool replace(std::string& str, const std::string& from, const std::string& to) {
size_t start_pos = str.find(from);
if (start_pos == std::string::npos)
return false;
str.replace(start_pos, from.length(), to);
return true;
}
std::string lower(const std::string& str) {
std::string res;
for (auto c : str) {
res.push_back(tolower(c));
}
return res;
}
std::string uuid() {
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_int_distribution<int> dist(0, 15);
const char* v = "0123456789abcdef";
const bool dash[] = {0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0};
std::string res;
for (int i = 0; i < 16; i++) {
if (dash[i])
res += "-";
res += v[dist(rng)];
res += v[dist(rng)];
}
return res;
}
std::string repeat(size_t n, const std::string& str) {
if (n == 0 || str.empty())
return {};
if (n == 1)
return str;
const auto period = str.size();
if (period == 1)
return std::string(n, str.front());
std::string ret(str);
ret.reserve(period * n);
std::size_t m{2};
for (; m < n; m *= 2)
ret += ret;
ret.append(ret.c_str(), (n - (m / 2)) * period);
return ret;
}
std::string current_local_timestamp() {
std::time_t now = std::time(nullptr);
std::tm local_time = *std::localtime(&now);
const std::string format = "%Y-%m-%dT%H:%M:%S";
std::ostringstream oss;
oss << std::put_time(&local_time, format.c_str());
return oss.str();
}
std::string current_local_timestamp_no_colons() {
std::time_t now = std::time(nullptr);
std::tm local_time = *std::localtime(&now);
const std::string format = "%Y-%m-%dT%H-%M-%S";
std::ostringstream oss;
oss << std::put_time(&local_time, format.c_str());
return oss.str();
}
std::string current_isotimestamp() {
std::time_t now = std::time(nullptr);
std::tm utc_time = *std::gmtime(&now);
const std::string format = "%Y-%m-%dT%H:%M:%SZ";
std::ostringstream oss;
oss << std::put_time(&utc_time, format.c_str());
return oss.str();
}
std::string to_upper(const std::string& str) {
std::string new_str(str.size(), ' ');
std::transform(str.begin(), str.end(), new_str.begin(), ::toupper);
return new_str;
}
std::string to_lower(const std::string& str) {
std::string new_str(str.size(), ' ');
std::transform(str.begin(), str.end(), new_str.begin(), ::tolower);
return new_str;
}
i18n: subtitle code cleanup and update new subtitle JSON files to be compatible with Crowdin (#2802) The main thing that was done here was to slightly modify the new subtitle-v2 JSON schema to be more similar to the existing one so that it can properly be used in Crowdin. Draft while I double-check the diff myself Along the way the following was also done (among other things): - got rid of as much duplication as was feasible in the serialization and editor code - separated the text serialization code from the subtitle code for better organization - simplified "base language" in the editor. The new subtitle format has built-in support for defining a base language so the editor doesn't have to be used as a crutch. Also, cutscenes only defined in the base come first in the list now as that is generally the order you'd work from (what you havn't done first) - got rid of the GOAL subtitle format code completely - switched jak 2 text translations to the JSON format as well - found a few mistakes in the jak 1 subtitle metadata files - added a couple minor features to the editor - consolidate and removed complexity, ie. recently all jak 1 hints were forced to the `named` type, so I got rid of the two types as there isn't a need anymore. - removed subtitle editor groups for jak 1, the only reason they existed was so when the GOAL file was manually written out they were somewhat organized, the editor has a decent filter control, there's no need for them. - removed the GOAL -> JSON python script helper, it's been a month or so and no one has come forward with existing translations that they need help with migrating. If they do need it, the script will be in the git history. I did some reasonably through testing in Jak1/Jak 2 and everything seemed to work. But more testing is always a good idea. --------- Co-authored-by: ManDude <7569514+ManDude@users.noreply.github.com>
2023-07-08 21:53:39 -04:00
bool hex_char(char c) {
return !((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F'));
}
} // namespace str_util