Skip to content

Commit cfc835d

Browse files
docs-botCopilotheiskr
authored
fix: add new translation corruption patterns for es, zh, de, ru (#60975)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Kevin Heis <heiskr@users.noreply.github.com>
1 parent 828791d commit cfc835d

2 files changed

Lines changed: 98 additions & 1 deletion

File tree

src/languages/lib/correct-translation-content.ts

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,26 @@ export function correctTranslatedContentStrings(
8181
content = content.replaceAll('{% datos variables', '{% data variables')
8282
content = content.replaceAll('{% de datos variables', '{% data variables')
8383
content = content.replaceAll('{% datos reusables', '{% data reusables')
84+
// `{% WORD de datos variables.` — extra Spanish word before "de datos variables"
85+
// e.g. `{% uso de datos variables.` ("use of data variables") or
86+
// `{% análisis de datos variables.` ("data analysis variables").
87+
// Unicode-aware character class so accented translator words match.
88+
content = content.replace(
89+
/\{%(-?)\s*[\p{L}\p{M}]+\s+de datos (variables|reusables)\./gu,
90+
'{%$1 data $2.',
91+
)
92+
// `{% de datos WORD variables.` — adjective inserted between "de datos" and path
93+
// e.g. `{% de datos específico variables.` ("specific data variables")
94+
content = content.replace(
95+
/\{%(-?)\s*de datos [\p{L}\p{M}]+ (variables|reusables)\./gu,
96+
'{%$1 data $2.',
97+
)
98+
// `{% WORD de variables.` — word + "de variables" (missing "datos" keyword)
99+
// e.g. `{% alerta de variables.product.X %}` (alert of variables)
100+
content = content.replace(
101+
/\{%(-?)\s*[\p{L}\p{M}]+\s+de\s+(variables|reusables)\./gu,
102+
'{%$1 data $2.',
103+
)
84104
content = content.replaceAll('{% data reutilizables.', '{% data reusables.')
85105
// `{% datos reutilizables.` — fully translated "data reusables" path
86106
content = content.replaceAll('{% datos reutilizables.', '{% data reusables.')
@@ -552,8 +572,11 @@ export function correctTranslatedContentStrings(
552572
// `{% 行标题 %}` — "row headers" = rowheaders
553573
content = content.replaceAll('{% 行标题 %}', '{% rowheaders %}')
554574
content = content.replaceAll('{%- 行标题 %}', '{%- rowheaders %}')
555-
// `{% 数据变量.` — "data variables" = data variables
575+
// `{% 数据变量.` — "data variables" = data variables (with space before)
556576
content = content.replaceAll('{% 数据变量.', '{% data variables.')
577+
// `{%数据变量.` — same but no space between `{%` and 数据变量 (e.g. `{%数据变量.enterprise.management_console%}`)
578+
content = content.replaceAll('{%数据变量.', '{% data variables.')
579+
content = content.replaceAll('{%-数据变量.', '{%- data variables.')
557580
// `{% Windows 操作系统 %}` — "Windows OS" = windows platform tag
558581
content = content.replaceAll('{% Windows 操作系统 %}', '{% windows %}')
559582
content = content.replaceAll('{%- Windows 操作系统 %}', '{%- windows %}')
@@ -610,6 +633,9 @@ export function correctTranslatedContentStrings(
610633
if (context.code === 'ru') {
611634
content = content.replaceAll('[«AUTOTITLE»](', '[AUTOTITLE](')
612635
content = content.replaceAll('[АВТОЗАГОЛОВОК](', '[AUTOTITLE](')
636+
// `[{% autoTITLE](url)` — Liquid-embedded lowercase autotitle (translator lowercased
637+
// the link anchor and wrapped it in Liquid tag syntax instead of plain `[AUTOTITLE](url)`)
638+
content = content.replaceAll('[{% autoTITLE](', '[AUTOTITLE](')
613639
content = content.replaceAll('{% данных variables', '{% data variables')
614640
content = content.replaceAll('{% данных, variables', '{% data variables')
615641
content = content.replaceAll('{% данными variables', '{% data variables')
@@ -1122,6 +1148,10 @@ export function correctTranslatedContentStrings(
11221148
content = content.replaceAll('{%- Datenvariablen.', '{%- data variables.')
11231149
content = content.replaceAll('{%-Daten variables', '{%- data variables')
11241150
content = content.replaceAll('{%-Daten-variables', '{%- data variables')
1151+
// `{%-DatenXxx variables` — compound "Daten..." word immediately after `{%-` (no space)
1152+
// e.g. `{%-Datenpaket variables.`, `{%-Dateninstanz variables.`, `{%-Dateneinstellungen variables.`
1153+
// The existing `{%- DatenXxx variables` rules (with space) don't catch the no-space variant.
1154+
content = content.replace(/\{%-(Daten[A-Za-z]+)\s+(variables|reusables)/g, '{%- data $2')
11251155
content = content.replaceAll('{%- ifversion fpt oder ghec %}', '{%- ifversion fpt or ghec %}')
11261156
content = content.replaceAll('{% ifversion fpt oder ghec %}', '{% ifversion fpt or ghec %}')
11271157
// Catch remaining "oder" between any plan names in ifversion/elsif/if tags
@@ -1138,6 +1168,15 @@ export function correctTranslatedContentStrings(
11381168
content = content.replaceAll('{% Tipp %}', '{% tip %}')
11391169
content = content.replaceAll('{%- Tipp %}', '{%- tip %}')
11401170
content = content.replaceAll('{%- Tipp -%}', '{%- tip -%}')
1171+
// `{% Codespaces %}` — translator capitalized the platform tag
1172+
content = content.replaceAll('{% Codespaces %}', '{% codespaces %}')
1173+
content = content.replaceAll('{%- Codespaces %}', '{%- codespaces %}')
1174+
// `{% Aufforderung %}` — German "Aufforderung" (prompt/instruction) = prompt
1175+
content = content.replaceAll('{% Aufforderung %}', '{% prompt %}')
1176+
content = content.replaceAll('{%- Aufforderung %}', '{%- prompt %}')
1177+
// `{% Endprompt %}` — mix of German "End" and English "prompt" = endprompt
1178+
content = content.replaceAll('{% Endprompt %}', '{% endprompt %}')
1179+
content = content.replaceAll('{%- Endprompt %}', '{%- endprompt %}')
11411180
// Translated for-loop keywords: `für VARNAME in COLLECTION`
11421181
content = content.replace(/\{%-? für (\w+) in /g, (match) => {
11431182
return match.replace('für', 'for')

src/languages/tests/correct-translation-content.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,30 @@ describe('correctTranslatedContentStrings', () => {
3333
expect(fix('{% data reutilizables.foo.bar %}', 'es')).toBe('{% data reusables.foo.bar %}')
3434
})
3535

36+
test('fixes extra Spanish word inserted around "de datos" and "de variables"', () => {
37+
// `{% WORD de datos variables.` — leading translator word
38+
expect(fix('{% uso de datos variables.product.github %}', 'es')).toBe(
39+
'{% data variables.product.github %}',
40+
)
41+
// Unicode-aware: accented words must also match
42+
expect(fix('{% análisis de datos variables.product.github %}', 'es')).toBe(
43+
'{% data variables.product.github %}',
44+
)
45+
expect(fix('{%- uso de datos reusables.foo.bar %}', 'es')).toBe(
46+
'{%- data reusables.foo.bar %}',
47+
)
48+
49+
// `{% de datos WORD variables.` — adjective inserted after "de datos"
50+
expect(fix('{% de datos específico variables.product.github %}', 'es')).toBe(
51+
'{% data variables.product.github %}',
52+
)
53+
54+
// `{% WORD de variables.` — missing "datos" keyword
55+
expect(fix('{% alerta de variables.product.github %}', 'es')).toBe(
56+
'{% data variables.product.github %}',
57+
)
58+
})
59+
3660
test('fixes translated comment keyword', () => {
3761
expect(fix('{% comentario %}', 'es')).toBe('{% comment %}')
3862
expect(fix('{%- comentario %}', 'es')).toBe('{%- comment %}')
@@ -502,6 +526,15 @@ describe('correctTranslatedContentStrings', () => {
502526
test('fixes 数据变量 → data variables', () => {
503527
expect(fix('{% 数据变量.product.github %}', 'zh')).toBe('{% data variables.product.github %}')
504528
})
529+
530+
test('fixes 数据变量 with no leading space (`{%数据变量.`)', () => {
531+
expect(fix('{%数据变量.enterprise.management_console%}', 'zh')).toBe(
532+
'{% data variables.enterprise.management_console%}',
533+
)
534+
expect(fix('{%-数据变量.product.github %}', 'zh')).toBe(
535+
'{%- data variables.product.github %}',
536+
)
537+
})
505538
})
506539

507540
// ─── RUSSIAN (ru) ──────────────────────────────────────────────────
@@ -515,6 +548,10 @@ describe('correctTranslatedContentStrings', () => {
515548
expect(fix('[АВТОЗАГОЛОВОК](/path/to/article)', 'ru')).toBe('[AUTOTITLE](/path/to/article)')
516549
})
517550

551+
test('fixes Liquid-embedded lowercase autotitle anchor (`[{% autoTITLE](`)', () => {
552+
expect(fix('[{% autoTITLE](/path/to/article)', 'ru')).toBe('[AUTOTITLE](/path/to/article)')
553+
})
554+
518555
test('fixes translated data tag variants', () => {
519556
expect(fix('{% данных variables.product.github %}', 'ru')).toBe(
520557
'{% data variables.product.github %}',
@@ -983,6 +1020,27 @@ describe('correctTranslatedContentStrings', () => {
9831020
expect(fix('{%- Tipp -%}', 'de')).toBe('{%- tip -%}')
9841021
})
9851022

1023+
test('fixes capitalized Codespaces platform tag', () => {
1024+
expect(fix('{% Codespaces %}', 'de')).toBe('{% codespaces %}')
1025+
expect(fix('{%- Codespaces %}', 'de')).toBe('{%- codespaces %}')
1026+
})
1027+
1028+
test('fixes translated prompt/endprompt keywords', () => {
1029+
expect(fix('{% Aufforderung %}', 'de')).toBe('{% prompt %}')
1030+
expect(fix('{%- Aufforderung %}', 'de')).toBe('{%- prompt %}')
1031+
expect(fix('{% Endprompt %}', 'de')).toBe('{% endprompt %}')
1032+
expect(fix('{%- Endprompt %}', 'de')).toBe('{%- endprompt %}')
1033+
})
1034+
1035+
test('fixes `{%-DatenXxx variables` no-space compound German "Daten" tags', () => {
1036+
expect(fix('{%-Datenpaket variables.product.github %}', 'de')).toBe(
1037+
'{%- data variables.product.github %}',
1038+
)
1039+
expect(fix('{%-Dateneinstellungen reusables.foo.bar %}', 'de')).toBe(
1040+
'{%- data reusables.foo.bar %}',
1041+
)
1042+
})
1043+
9861044
test('fixes für → for in for-loops', () => {
9871045
expect(fix('{%- für version in tables.copilot.ides -%}', 'de')).toBe(
9881046
'{%- for version in tables.copilot.ides -%}',

0 commit comments

Comments
 (0)