Translations - Rdatatable/data.table GitHub Wiki

Role Definition

There are two roles which are related to translations:

  • Translation Manager: responsible for reviewing translation-related PRs, including creating CI for checking, as in PR#6358. should be familiar with data.table internals, and will need to understand the basics of gettext() and its interface through R, including possibly through the potools package.
  • Translator: a project member involved with translating vignettes, messages, etc., from English to another language. Translators are encouraged to offer feedback on the quality and user-friendliness of English-language messages. They don't necessarily have to understand any data.table internals, though of course familiarity with the package will help lead to higher-quality translations.

Communication between devs and translation teams

Best way to communicate with data.table devs is by creating an issue, https://github.com/Rdatatable/data.table/issues/new

There are several teams that can be mentioned in issues:

Software Tools

Translating the data.table messages from zero can be easier if data.table.pot and R-data.table.pot are split in multiple, smaller files. For that, place split.R and combine.R in an empty directory created "besides" the data.table source code (so that, setting the working directory as that new one, ../data.table points to the source code), set MY_LOCALE to something like "zh_CN" or "es", and make sure you have installed the {potools} package and the gettext utilities. Run split.R to create the split PO files, and then translate all of them. After that, and making sure the POT files in ../data.table/po/ are current, run combine.R to create the two combined PO files, which you can then move to ../data.table/po/. Check the beginning of the PO files (the part before the first translated message) with a text editor and make any necessary adjustments. Disclaimer: the scripts were tested on Linux only, and depend on a hardcoded list of source files with no messages. They were created for the Brazilian Portuguese translation and later made more general, but were attached to the wiki before being tested by other translation teams.

The {potools} website lists applications for translating PO files, among other software packaged created for otherwise manipulating them.

TODO add details about what software tools were used by the various translation teams, to make it easier to generate the translated files.

TODO is it possible to join forces with base R's weblate? See discussions in https://github.com/Rdatatable/data.table/issues/6370 and (older) https://contributor.r-project.org/translations/ https://github.com/Rdatatable/data.table/pull/6199#issuecomment-2259220924

Please check https://contributor.r-project.org/translations/Conventions_for_Languages/#languages-and-contributions to see if your language already has translation guidelines in base R.

Addend

The wiki doesn't permit attaching R files, so the scripts are here for now.

split.R:

#!/usr/bin/env Rscript

# MY_LOCALE <- NULL # in example, "pt_BR", "fr" etc.

if (!exists("MY_LOCALE") | is.null(MY_LOCALE) | !is.character(MY_LOCALE) | length(MY_LOCALE) != 1) {
  stop("Please set MY_LOCALE in the script to something like \"pt_BR\" or \"es\"")
}

DT_SRC <- "../data.table"
if (!dir.exists(DT_SRC)) {
  stop(sprintf("Could not find %s. Is %s the intended working directory?", DT_SRC, getwd))
}

library(potools)

previous_po_R <- file.path(DT_SRC, "po", sprintf("R-%s.po", MY_LOCALE))
previous_po_C <- file.path(DT_SRC, "po", sprintf("%s.po", MY_LOCALE))
if (file.exists(previous_po_C) != file.exists(previous_po_R)) {
  stop("Why does one PO file already exists but not the other?!")
}
any_previous_po <- any(file.exists(c(previous_po_C, previous_po_R))) # Will be reused
if (any_previous_po) {
  warning("This script is more useful when there is no or little previous translation")
}
source_names <- 
  list.files(file.path(DT_SRC, c("R", "src")), ".*\\.R$|.*\\.c$") |> 
  sub(pattern = "(.*)\\.(R|c)", replacement = "\\1") |>
  unique() |>
  sort()

for (nm in source_names) {
  if (nm == "negate") next # In this case po_extract() fails because there is no message to translate
  for (dir in file.path(nm, c("", "R", "src"))) { 
    if (!dir.exists(dir)) dir.create(dir)
  }
  original_R <- sprintf("%s/R/%s.R", DT_SRC, nm)
  copy_R <- sprintf("%s/R/%s.R", nm, nm)
  if (file.exists(original_R)) {
    success <- file.copy(original_R, copy_R, copy.date = TRUE)
    if (!isTRUE(success)) stop("Could not copy from %s to %s", original_R, copy_R)
  }
  original_C <- sprintf("%s/src/%s.c", DT_SRC, nm)
  copy_C <- sprintf("%s/src/%s.c", nm, nm)
  if (file.exists(original_C)) {
    success <- file.copy(original_C, copy_C, copy.date = TRUE)
    if (!isTRUE(success)) stop("Could not copy from %s to %s", original_R, copy_R)
  }
  writeLines(c(sprintf("Package: %s", nm), "Version: 0.0"), file.path(nm, "DESCRIPTION"))
  functions <- c('catf:fmt|1', 'stopf:fmt|1', 'warningf:fmt|1', 'messagef:fmt|1', 'packageStartupMessagef:fmt|1')
  po_extract(dir = nm, style = "explicit", custom_translation_functions = list(R = functions, src = NULL))
}

if (!dir.exists("po")) dir.create("po")
pot_files_from <- list.dirs(recursive = TRUE) |>
                  grep(pattern = "^\\./[^/]+/po$", value = TRUE) |>
                  list.files(pattern = "\\.pot$", full.names = TRUE)
pot_files_to <- file.path("po", basename(pot_files_from))
success <- file.copy(pot_files_from, pot_files_to, overwrite = TRUE)
if (!all(success %in% TRUE)) stopf("Could not copy some files")
unlink(source_names, recursive = TRUE)

pot_files <- list.files("po", ".pot", full.names = TRUE)
po_files <- sub("(.*)\\.pot$", sprintf("\\1-%s.po", MY_LOCALE), pot_files)

# This script didn't cover an edge case when there is one but not both previous PO files
if (!any_previous_po) {
  message(sprintf("Creating PO files for locale %s", MY_LOCALE))
  for (i in seq_along(pot_files))
    potools:::run_msginit(po_files[i], pot_files[i], MY_LOCALE)
} else {
  message(sprintf("Existing %s translation was detected, and will be included in the split PO files", MY_LOCALE))
  stopifnot(identical(grepl("po/R-.*", pot_files), grepl("po/R-.*", po_files)))
  source_is_R <- grepl("po/R-.*.pot", pot_files)
  for (i in seq_along(po_files)) {
    ifelse(source_is_R[i], previous_po_R, previous_po_C) |> 
      file.copy(from = _, to = po_files[i], copy.date = TRUE)
    potools:::run_msgmerge(po_files[i], pot_files[i])
  }
}

message(sprintf("The PO files can be found in %s", file.path(getwd(), "po")))

combine.R:

#!/usr/bin/env Rscript

# MY_LOCALE <- NULL # in example, "pt_BR", "fr" etc.

if (!exists("MY_LOCALE") | is.null(MY_LOCALE) | !is.character(MY_LOCALE) | length(MY_LOCALE) != 1) {
  stop("Please set MY_LOCALE in the script to something like \"pt_BR\" or \"es\"")
}

DT_SRC <- "../data.table"
if (!dir.exists(DT_SRC)) {
  stop(sprintf("Could not find %s. Is %s the intended working directory?", DT_SRC, getwd()))
}

library(potools)

# Analogous to the potools:::run_msg* functions
run_msgcat <- function(input_files, output_file, verbose = TRUE) {
  args <- c(shQuote(path.expand(input_files)), "-o", shQuote(path.expand(output_file)))
  if (verbose) message("Running system command msgcat ", paste(args, collapse = " "), "...")
  val <- system2("msgcat", args, stdout = TRUE, stderr = TRUE)
  if (!identical(attr(val, "status", exact = TRUE), NULL)) warning(sprintf("Running msgcat for %s' failed:\n  %s", basename(output_file), paste(val, collapse = "\n")))
  else if (verbose) message(paste(val, collapse = "\n"))
  res <- tools::checkPoFile(output_file, strictPlural = TRUE)
  if (nrow(res)) {
    warning(sprintf("tools::checkPoFile() found some issues in %s", po_file))
    print(res)
  }
  return(invisible())
}

pot_R <- file.path(DT_SRC, "po", "R-data.table.pot")
pot_C <- file.path(DT_SRC, "po", "data.table.pot")
stopifnot(file.exists(pot_R), file.exists(pot_C))

new_po_R <- sprintf("R-%s.po", MY_LOCALE)
new_po_C <- sprintf("%s.po", MY_LOCALE)

previous_po_R <- file.path(DT_SRC, "po", sprintf("R-%s.po", MY_LOCALE))
previous_po_C <- file.path(DT_SRC, "po", sprintf("%s.po", MY_LOCALE))
if (file.exists(previous_po_C) != file.exists(previous_po_R)) {
  stop("Why does one PO file already exists but not the other?!")
}

if (file.exists(previous_po_R)) {
  success <- file.copy(c(previous_po_R, previous_po_C), c(new_po_R, new_po_C), overwrite = TRUE, copy.date = TRUE)
  if (!all(success)) stop("Could not copy one or more previous PO files to new location")
} else {
  potools:::run_msginit(new_po_R, pot_R, MY_LOCALE)
  potools:::run_msginit(new_po_C, pot_C, MY_LOCALE)
}

po_files <- list.files("po", sprintf(".*-%s.po", MY_LOCALE), full.names = TRUE)
source_is_R <- grepl("po/R-.*.po", po_files)

run_msgcat(po_files[source_is_R], new_po_R)
run_msgcat(po_files[!source_is_R], new_po_C)

potools:::run_msgmerge(new_po_R, pot_R)
potools:::run_msgmerge(new_po_C, pot_C)

message(sprintf("Files %s and %s can be found in %s", new_po_R, new_po_C, getwd()))
message("You will have to adjust the beginning of the PO files, that is, the settings in the translation of msgid \"\" and the corresponding comments.")
⚠️ **GitHub.com Fallback** ⚠️