[Doc] Translation container (#49935)
Replace the translate GitHub workflow with a Docker container. To translate an English doc: Setup the environment There are three environment variables that need to be set in the file starrocks/docs/translation/.env: OPENAI_API_KEY WANDB_API_KEY GIT_PYTHON_REFRESH GIT_PYTHON_REFRESH should be set to quiet because we are not interacting with Git within the container. The other two environment variables will be provided by the Documentation Team leader. These should be set in the file in starrocks/docs/translation/.env This is the format: OPENAI_API_KEY=sk-proj-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa WANDB_API_KEY=bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb GIT_PYTHON_REFRESH=quiet Files to translate Provide the paths of the files to translate in the file starrocks/docs/translation/files.txt The entries in the file should be relative to the starrocks/docs/translation/ folder, for example: ../en/quick_start/quick_start.md ../en/deployment/helm.md Build the Docker image This probably only needs to be done once unless the folks from Weights and Biases modify the Python package gpt_translate. cd docs/translation docker build -f translation.Dockerfile -t translate . Translate the docs Change dir back up to the starrocks folder so that you can mount the docs/ folder in the container. cd ../../ Translate the files: docker run -v ./docs:/docs \ --env-file ./docs/translation/.env \ translate \ bash /docs/translation/scripts/translate.sh Check the files Once the translation is complete the container will exit. Check the status with git status and check the translated file(s). Signed-off-by: DanRoscigno <dan@roscigno.com>
This commit is contained in:
parent
d7c4fedda8
commit
3c8d1ca67a
|
|
@ -41,13 +41,3 @@ If yes, please specify the type of change:
|
|||
- [ ] 3.1
|
||||
- [ ] 3.0
|
||||
- [ ] 2.5
|
||||
|
||||
## Documentation PRs only:
|
||||
|
||||
If you are submitting a PR that adds or changes English documentation and have not
|
||||
included Chinese documentation, then you can check the box to request GPT to translate the
|
||||
English doc to Chinese. Please ensure to uncheck the **Do not translate** box if translation is needed.
|
||||
The workflow will generate a new PR with the Chinese translation after this PR is merged.
|
||||
|
||||
- [ ] Yes, translate English markdown files with GPT
|
||||
- [x] Do not translate
|
||||
|
|
|
|||
|
|
@ -1,96 +0,0 @@
|
|||
name: Translate changes to Chinese
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
types:
|
||||
- closed
|
||||
paths:
|
||||
- 'docs/en/**'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash # default shell is sh
|
||||
|
||||
jobs:
|
||||
# -------------------------------------------------------------
|
||||
# Event `pull_request`: Returns all changed pull request files.
|
||||
# --------------------------------------------------------------
|
||||
changed_files:
|
||||
# NOTE:
|
||||
# This workflow will only translate docs if:
|
||||
# - the PR is merged
|
||||
# - the `Yes, translate...` box is checked
|
||||
# - the `Do not translate` box is unchecked
|
||||
|
||||
if: github.event.pull_request.merged == true && contains(toJson(github.event.pull_request.body), '[x] Yes, translate English markdown files with GPT') && contains(toJson(github.event.pull_request.body), '[ ] Do not translate')
|
||||
runs-on: ubuntu-latest # windows-latest || macos-latest
|
||||
name: Test changed-files
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install gpt_translate
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v44
|
||||
with:
|
||||
files: docs/en/**/*.{md,mdx}
|
||||
output_dir: '.github/outputs' # this is the default dir
|
||||
write_output_files: 'true'
|
||||
|
||||
- name: List all changed files
|
||||
env:
|
||||
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
run: |
|
||||
for file in $ALL_CHANGED_FILES; do
|
||||
echo "$file"
|
||||
done
|
||||
echo "also cat the generated file all_changed_files.txt"
|
||||
cat ./.github/outputs/all_changed_files.txt
|
||||
|
||||
- name: Translate files
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
|
||||
run: |
|
||||
cp -r docs/translation/configs . # ugh, only works if configs is in cwd
|
||||
gpt_translate.files \
|
||||
--input_file ./.github/outputs/all_changed_files.txt \
|
||||
--config_folder ./configs
|
||||
rm -rf configs
|
||||
|
||||
- name: Fix sidebar display
|
||||
env:
|
||||
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
run: |
|
||||
sed "s#docs/en#docs/zh#g" ./.github/outputs/all_changed_files.txt > ./.github/outputs/new_files.txt
|
||||
cat ./.github/outputs/new_files.txt
|
||||
while IFS="" read -r english || [ -n "$english" ]
|
||||
do
|
||||
sed -i'' '/displayed_sidebar:/s/English/Chinese/' "$english"
|
||||
done < ./.github/outputs/new_files.txt
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.TRANSLATE_PAT }}
|
||||
commit-message: Translated Docs
|
||||
title: Automatic translation
|
||||
body: |
|
||||
This PR was automatically created by the translate-action when merging [PR](${{ github.event.pull_request.number }})
|
||||
Please review the changes and merge if they are correct.
|
||||
branch: translate-pr-${{ github.event.pull_request.number }}
|
||||
base: main
|
||||
delete-branch: true
|
||||
labels: translation
|
||||
add-paths: |
|
||||
docs/zh
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
.env
|
||||
files.txt
|
||||
new_files.txt
|
||||
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
# Translating from English to Chinese with GPT
|
||||
|
||||
To translate an English doc:
|
||||
|
||||
## Set up the environment
|
||||
|
||||
There are three environment variables that need to be set in the file starrocks/docs/translation/.env:
|
||||
|
||||
- OPENAI_API_KEY
|
||||
- WANDB_API_KEY
|
||||
- GIT_PYTHON_REFRESH
|
||||
|
||||
`GIT_PYTHON_REFRESH` should be set to `quiet` because we are not interacting with Git within the container. The other two environment variables will be provided by the Documentation Team leader.
|
||||
|
||||
These should be set in the file in `starrocks/docs/translation/.env`
|
||||
|
||||
This is the format:
|
||||
|
||||
```bash
|
||||
OPENAI_API_KEY=sk-proj-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
WANDB_API_KEY=bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||
GIT_PYTHON_REFRESH=quiet
|
||||
```
|
||||
|
||||
## Files to translate
|
||||
|
||||
Provide the paths of the files to translate in the file `starrocks/docs/translation/files.txt`
|
||||
|
||||
The entries in the file should be relative to the `starrocks/docs/translation/` folder, for example:
|
||||
|
||||
```bash
|
||||
../en/quick_start/quick_start.md
|
||||
../en/deployment/helm.md
|
||||
```
|
||||
|
||||
## Build the Docker image
|
||||
|
||||
This probably only needs to be done once unless the folks from Weights and Biases modify the Python package `gpt_translate`.
|
||||
|
||||
```bash
|
||||
cd docs/translation
|
||||
docker build -f translation.Dockerfile -t translate .
|
||||
```
|
||||
|
||||
## Translate the docs
|
||||
|
||||
Change dir back up to the `starrocks` folder so that you can mount the `docs/` folder in the container.
|
||||
|
||||
```bash
|
||||
cd ../../
|
||||
```
|
||||
|
||||
Translate the files:
|
||||
|
||||
```bash
|
||||
docker run -v ./docs:/docs \
|
||||
--env-file ./docs/translation/.env \
|
||||
translate \
|
||||
bash /docs/translation/scripts/translate.sh
|
||||
```
|
||||
## Check the files
|
||||
|
||||
Once the translation is complete the container will exit. Check the status with `git status` and check the translated file(s).
|
||||
|
||||
|
|
@ -14,8 +14,8 @@ max_openai_concurrent_calls: 7 # Max number of concurrent calls to OpenAI
|
|||
# Files:
|
||||
input_file: "docs/intro.md" # File to translate
|
||||
out_file: " intro_ja.md" # File to save the translated file to
|
||||
input_folder: ./docs/en # Folder to translate
|
||||
out_folder: ./docs/zh # Folder to save the translated files to
|
||||
input_folder: ../en # Folder to translate
|
||||
out_folder: ../zh # Folder to save the translated files to
|
||||
|
||||
limit: null # Limit number of files to translate (useful for testing)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
cd /docs/translation/
|
||||
gpt_translate.files --input_file ./files.txt --config_folder ./configs
|
||||
sed "s#^../en#../zh#g" ./files.txt > ./new_files.txt
|
||||
|
||||
echo "Fixing the frontmatter for the displayed sidebar"
|
||||
while IFS="" read -r english || [ -n "$english" ];
|
||||
do sed -i'' '/displayed_sidebar:/s/English/Chinese/' "$english";
|
||||
done < ./new_files.txt
|
||||
|
||||
echo "Removing new_files.txt"
|
||||
rm ./new_files.txt
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
FROM python:3.11.9-slim-bookworm
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt update && apt install -y neovim
|
||||
|
||||
RUN python -m pip install --upgrade pip
|
||||
RUN python -m pip install gpt_translate
|
||||
|
||||
Loading…
Reference in New Issue