Translations - Rdatatable/data.table GitHub Wiki
There are two roles which are related to translations:
-
Translation Manager: responsible for reviewing translation-related PRs, including creating CI for checking, as in PR#6358. should be familiar with
data.table
internals, and will need to understand the basics ofgettext()
and its interface through R, including possibly through thepotools
package. -
Translator: a project member involved with translating vignettes, messages, etc., from English to another language. Translators are encouraged to offer feedback on the quality and user-friendliness of English-language messages. They don't necessarily have to understand any
data.table
internals, though of course familiarity with the package will help lead to higher-quality translations.
Best way to communicate with data.table devs is by creating an issue, https://github.com/Rdatatable/data.table/issues/new
There are several teams that can be mentioned in issues:
- Collective: https://github.com/orgs/Rdatatable/teams/translators (all translators; mention with
@Rdatatable/translators
) - Language-specific: https://github.com/orgs/Rdatatable/teams/translators/teams (mention with
@Rdatatable/<Language>
, for e.g.@Rdatatable/French
)
Translating the data.table
messages from zero can be easier if data.table.pot
and R-data.table.pot
are split in multiple, smaller files. For that, place split.R
and combine.R
in an empty directory created "besides" the data.table source code (so that, setting the working directory as that new one, ../data.table
points to the source code), set MY_LOCALE
to something like "zh_CN"
or "es"
, and make sure you have installed the {potools}
package and the gettext
utilities. Run split.R
to create the split PO files, and then translate all of them. After that, and making sure the POT files in ../data.table/po/
are current, run combine.R
to create the two combined PO files, which you can then move to ../data.table/po/
. Check the beginning of the PO files (the part before the first translated message) with a text editor and make any necessary adjustments. Disclaimer: the scripts were tested on Linux only, and depend on a hardcoded list of source files with no messages. They were created for the Brazilian Portuguese translation and later made more general, but were attached to the wiki before being tested by other translation teams.
The {potools}
website lists applications for translating PO files, among other software packaged created for otherwise manipulating them.
TODO add details about what software tools were used by the various translation teams, to make it easier to generate the translated files.
TODO is it possible to join forces with base R's weblate? See discussions in https://github.com/Rdatatable/data.table/issues/6370 and (older) https://contributor.r-project.org/translations/ https://github.com/Rdatatable/data.table/pull/6199#issuecomment-2259220924
Please check https://contributor.r-project.org/translations/Conventions_for_Languages/#languages-and-contributions to see if your language already has translation guidelines in base R.
The wiki doesn't permit attaching R files, so the scripts are here for now.
split.R:
#!/usr/bin/env Rscript
# MY_LOCALE <- NULL # in example, "pt_BR", "fr" etc.
if (!exists("MY_LOCALE") | is.null(MY_LOCALE) | !is.character(MY_LOCALE) | length(MY_LOCALE) != 1) {
stop("Please set MY_LOCALE in the script to something like \"pt_BR\" or \"es\"")
}
DT_SRC <- "../data.table"
if (!dir.exists(DT_SRC)) {
stop(sprintf("Could not find %s. Is %s the intended working directory?", DT_SRC, getwd))
}
library(potools)
previous_po_R <- file.path(DT_SRC, "po", sprintf("R-%s.po", MY_LOCALE))
previous_po_C <- file.path(DT_SRC, "po", sprintf("%s.po", MY_LOCALE))
if (file.exists(previous_po_C) != file.exists(previous_po_R)) {
stop("Why does one PO file already exists but not the other?!")
}
any_previous_po <- any(file.exists(c(previous_po_C, previous_po_R))) # Will be reused
if (any_previous_po) {
warning("This script is more useful when there is no or little previous translation")
}
source_names <-
list.files(file.path(DT_SRC, c("R", "src")), ".*\\.R$|.*\\.c$") |>
sub(pattern = "(.*)\\.(R|c)", replacement = "\\1") |>
unique() |>
sort()
for (nm in source_names) {
if (nm == "negate") next # In this case po_extract() fails because there is no message to translate
for (dir in file.path(nm, c("", "R", "src"))) {
if (!dir.exists(dir)) dir.create(dir)
}
original_R <- sprintf("%s/R/%s.R", DT_SRC, nm)
copy_R <- sprintf("%s/R/%s.R", nm, nm)
if (file.exists(original_R)) {
success <- file.copy(original_R, copy_R, copy.date = TRUE)
if (!isTRUE(success)) stop("Could not copy from %s to %s", original_R, copy_R)
}
original_C <- sprintf("%s/src/%s.c", DT_SRC, nm)
copy_C <- sprintf("%s/src/%s.c", nm, nm)
if (file.exists(original_C)) {
success <- file.copy(original_C, copy_C, copy.date = TRUE)
if (!isTRUE(success)) stop("Could not copy from %s to %s", original_R, copy_R)
}
writeLines(c(sprintf("Package: %s", nm), "Version: 0.0"), file.path(nm, "DESCRIPTION"))
functions <- c('catf:fmt|1', 'stopf:fmt|1', 'warningf:fmt|1', 'messagef:fmt|1', 'packageStartupMessagef:fmt|1')
po_extract(dir = nm, style = "explicit", custom_translation_functions = list(R = functions, src = NULL))
}
if (!dir.exists("po")) dir.create("po")
pot_files_from <- list.dirs(recursive = TRUE) |>
grep(pattern = "^\\./[^/]+/po$", value = TRUE) |>
list.files(pattern = "\\.pot$", full.names = TRUE)
pot_files_to <- file.path("po", basename(pot_files_from))
success <- file.copy(pot_files_from, pot_files_to, overwrite = TRUE)
if (!all(success %in% TRUE)) stopf("Could not copy some files")
unlink(source_names, recursive = TRUE)
pot_files <- list.files("po", ".pot", full.names = TRUE)
po_files <- sub("(.*)\\.pot$", sprintf("\\1-%s.po", MY_LOCALE), pot_files)
# This script didn't cover an edge case when there is one but not both previous PO files
if (!any_previous_po) {
message(sprintf("Creating PO files for locale %s", MY_LOCALE))
for (i in seq_along(pot_files))
potools:::run_msginit(po_files[i], pot_files[i], MY_LOCALE)
} else {
message(sprintf("Existing %s translation was detected, and will be included in the split PO files", MY_LOCALE))
stopifnot(identical(grepl("po/R-.*", pot_files), grepl("po/R-.*", po_files)))
source_is_R <- grepl("po/R-.*.pot", pot_files)
for (i in seq_along(po_files)) {
ifelse(source_is_R[i], previous_po_R, previous_po_C) |>
file.copy(from = _, to = po_files[i], copy.date = TRUE)
potools:::run_msgmerge(po_files[i], pot_files[i])
}
}
message(sprintf("The PO files can be found in %s", file.path(getwd(), "po")))
combine.R:
#!/usr/bin/env Rscript
# MY_LOCALE <- NULL # in example, "pt_BR", "fr" etc.
if (!exists("MY_LOCALE") | is.null(MY_LOCALE) | !is.character(MY_LOCALE) | length(MY_LOCALE) != 1) {
stop("Please set MY_LOCALE in the script to something like \"pt_BR\" or \"es\"")
}
DT_SRC <- "../data.table"
if (!dir.exists(DT_SRC)) {
stop(sprintf("Could not find %s. Is %s the intended working directory?", DT_SRC, getwd()))
}
library(potools)
# Analogous to the potools:::run_msg* functions
run_msgcat <- function(input_files, output_file, verbose = TRUE) {
args <- c(shQuote(path.expand(input_files)), "-o", shQuote(path.expand(output_file)))
if (verbose) message("Running system command msgcat ", paste(args, collapse = " "), "...")
val <- system2("msgcat", args, stdout = TRUE, stderr = TRUE)
if (!identical(attr(val, "status", exact = TRUE), NULL)) warning(sprintf("Running msgcat for %s' failed:\n %s", basename(output_file), paste(val, collapse = "\n")))
else if (verbose) message(paste(val, collapse = "\n"))
res <- tools::checkPoFile(output_file, strictPlural = TRUE)
if (nrow(res)) {
warning(sprintf("tools::checkPoFile() found some issues in %s", po_file))
print(res)
}
return(invisible())
}
pot_R <- file.path(DT_SRC, "po", "R-data.table.pot")
pot_C <- file.path(DT_SRC, "po", "data.table.pot")
stopifnot(file.exists(pot_R), file.exists(pot_C))
new_po_R <- sprintf("R-%s.po", MY_LOCALE)
new_po_C <- sprintf("%s.po", MY_LOCALE)
previous_po_R <- file.path(DT_SRC, "po", sprintf("R-%s.po", MY_LOCALE))
previous_po_C <- file.path(DT_SRC, "po", sprintf("%s.po", MY_LOCALE))
if (file.exists(previous_po_C) != file.exists(previous_po_R)) {
stop("Why does one PO file already exists but not the other?!")
}
if (file.exists(previous_po_R)) {
success <- file.copy(c(previous_po_R, previous_po_C), c(new_po_R, new_po_C), overwrite = TRUE, copy.date = TRUE)
if (!all(success)) stop("Could not copy one or more previous PO files to new location")
} else {
potools:::run_msginit(new_po_R, pot_R, MY_LOCALE)
potools:::run_msginit(new_po_C, pot_C, MY_LOCALE)
}
po_files <- list.files("po", sprintf(".*-%s.po", MY_LOCALE), full.names = TRUE)
source_is_R <- grepl("po/R-.*.po", po_files)
run_msgcat(po_files[source_is_R], new_po_R)
run_msgcat(po_files[!source_is_R], new_po_C)
potools:::run_msgmerge(new_po_R, pot_R)
potools:::run_msgmerge(new_po_C, pot_C)
message(sprintf("Files %s and %s can be found in %s", new_po_R, new_po_C, getwd()))
message("You will have to adjust the beginning of the PO files, that is, the settings in the translation of msgid \"\" and the corresponding comments.")