diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6e87a00 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,13 @@ +# Editor configuration, see http://editorconfig.org +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.md] +max_line_length = off +trim_trailing_whitespace = false diff --git a/locales.ts b/locales.ts new file mode 100644 index 0000000..0752599 --- /dev/null +++ b/locales.ts @@ -0,0 +1,27 @@ +export interface HelpLocales { + docusaurus: string; + crowdin: string; +} + +export const helpLocales = [ + { + docusaurus: "de", + crowdin: "de", + }, + { + docusaurus: "es", + crowdin: "es-ES", + }, + { + docusaurus: "fr", + crowdin: "fr", + }, + { + docusaurus: "pt-BR", + crowdin: "pt-BR", + }, + { + docusaurus: "id", + crowdin: "id", + }, +] as HelpLocales[]; diff --git a/regex_validation.ts b/regex_validation.ts new file mode 100644 index 0000000..e445ac4 --- /dev/null +++ b/regex_validation.ts @@ -0,0 +1,22 @@ +export interface RegexValidation { + expression: RegExp; + message: string; + filesToSkip?: string[]; +} + +export const regexValidations: RegexValidation[] = [ + { + expression: /^---\n(.*?)\n---/s, + message: "front matter" + }, + { + expression: /(?:(?=)({#[\d\w]+}))/gs, + message: "anchor", + filesToSkip: ["Community-Checking/enable-community-checking.md", "Draft-Generation/generating-a-draft.md"] + }, + { + expression: /(?:(?=)(!\[\]\(\.\/[\d]+\.png\)))/gs, + message: "image", + filesToSkip: ["connect-paratext-project.md"] + } +] diff --git a/update.mts b/update.mts index 22bd2c5..6b3a07f 100755 --- a/update.mts +++ b/update.mts @@ -8,7 +8,8 @@ // This doesn't fully solve the problem though, because metadata in the translated files may be different from the English files. import { copy, readerFromStreamReader } from "jsr:@std/io"; -import { walk } from "jsr:@std/fs/walk"; +import { helpLocales } from "./locales.ts"; +import { runChecks } from "./validate_docs.mts"; const projectId = Deno.env.get("CROWDIN_PROJECT_ID"); const apiKey = Deno.env.get("CROWDIN_API_KEY"); @@ -112,29 +113,6 @@ async function saveLatestBuild() { } } -const helpLocales = [ - { - docusaurus: "de", - crowdin: "de", - }, - { - docusaurus: "es", - crowdin: "es-ES", - }, - { - docusaurus: "fr", - crowdin: "fr", - }, - { - docusaurus: "pt-BR", - crowdin: "pt-BR", - }, - { - docusaurus: "id", - crowdin: "id", - }, -] as const; - async function copyDirContentsRecursive( source: string, dest: string @@ -216,129 +194,6 @@ async function fetchNotionDocs() { } } -async function runChecks() { - // Check for malformed links in the i18n files - - for await (const dirEntry of walk(`${projectRoot}/i18n`)) { - if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { - const file = await Deno.readTextFile(dirEntry.path); - for (const [index, line] of file.split("\n").entries()) { - // Look for Markdown links that had a space added between the closing parenthesis and the opening bracket - if (/\]\s+\(/.test(line)) { - console.log( - `%cFound malformed link in ${dirEntry.path} at line ${index + 1}`, - "color: red" - ); - console.log(line); - console.log( - " ".repeat(line.indexOf("]") + 1) + "%c^ Erroneous space", - "color: blue" - ); - } - } - } - } - - // Check that all original docs have a corresponding translation, and vice versa - const originalDocs = new Map(); - const docsDir = `${projectRoot}/docs`; - for await (const dirEntry of walk(docsDir)) { - if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { - if (dirEntry.path.indexOf(docsDir) !== 0) - throw new Error("Unexpected path"); - const relativePath = dirEntry.path.slice(docsDir.length + 1); - originalDocs.set(relativePath, await Deno.readTextFile(dirEntry.path)); - } - } - - for (const locale of helpLocales) { - const i18nDir = `${projectRoot}/i18n/${locale.docusaurus}/docusaurus-plugin-content-docs/current`; - - const foundLocalizations = new Set(); - - for await (const dirEntry of walk(i18nDir)) { - if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { - if (dirEntry.path.indexOf(i18nDir) !== 0) - throw new Error("Unexpected path"); - - const relativePath = dirEntry.path.slice(i18nDir.length + 1); - - foundLocalizations.add(relativePath); - - if (!originalDocs.has(relativePath)) { - console.log( - `%cNo original document found for i18n file ${dirEntry.path}`, - "color: red" - ); - } - } - } - for (const [relativePath, content] of originalDocs) { - if (!foundLocalizations.has(relativePath)) { - console.log( - `%cNo translation file found for ${relativePath} in ${i18nDir}`, - "color: red" - ); - } else { - // Check that the front matter matches - const translation = await Deno.readTextFile( - `${i18nDir}/${relativePath}` - ); - const frontMatterRegex = /^---\n(.*?)\n---/s; - const originalFrontMatterMatch = content.match(frontMatterRegex); - const translationFrontMatterMatch = translation.match(frontMatterRegex); - if (originalFrontMatterMatch == null) { - console.log( - `%cNo front matter found in original document ${relativePath}`, - "color: red" - ); - } - if (translationFrontMatterMatch == null) { - console.log( - `%cNo front matter found in translation ${relativePath}`, - "color: red" - ); - } - - if ( - originalFrontMatterMatch == null || - translationFrontMatterMatch == null - ) - continue; - - const originalFrontMatter = originalFrontMatterMatch[1].split("\n"); - const translationFrontMatter = - translationFrontMatterMatch[1].split("\n"); - - if (originalFrontMatter.length !== translationFrontMatter.length) { - console.log( - `%cFront matter length mismatch in ${relativePath} for locale ${locale.docusaurus}`, - "color: red" - ); - } - - for (const [index, line] of originalFrontMatter.entries()) { - // the title can change; everything else should match - if (line.indexOf("title: ") === 0) continue; - if (line !== translationFrontMatter[index]) { - console.log( - `%cFront matter mismatch in ${relativePath} for locale ${ - locale.docusaurus - } at line ${index + 1}`, - "color: red" - ); - console.log(`%cOriginal: ${line}`, "color: blue"); - console.log( - `%cTranslation: ${translationFrontMatter[index]}`, - "color: blue" - ); - } - } - } - } - } -} - try { console.log("--- Deleting existing files ---"); await deleteExistingFiles(); diff --git a/validate_docs.mts b/validate_docs.mts new file mode 100644 index 0000000..f3b1a57 --- /dev/null +++ b/validate_docs.mts @@ -0,0 +1,165 @@ +// This script will run checks on the localization files in the i18n directory +// and the original docs in the docs directory to ensure that anchors match + +import { walk } from "jsr:@std/fs/walk"; +import { HelpLocales, helpLocales } from "./locales.ts"; +import { RegexValidation, regexValidations } from "./regex_validation.ts"; + +const projectRoot = Deno.cwd(); + +export async function runChecks() { + // Check for malformed links in the i18n files + console.log("--- Checking for malformed links ---"); + await checkMalformedLinks(); + console.log(); + + // Check that all original docs have a corresponding translation with matching front mater, anchors, and images; and vice versa + console.log("--- Checking english and localization docs for missing or mismatched front matter, anchors, and images ---"); + await checkDocsMatch(); + console.log(); +} + +async function checkDocsMatch() { + const originalDocs = new Map(); + const docsDir = `${projectRoot}/docs`; + for await (const dirEntry of walk(docsDir)) { + if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { + if (dirEntry.path.indexOf(docsDir) !== 0) + throw new Error("Unexpected path"); + const relativePath = dirEntry.path.slice(docsDir.length + 1); + originalDocs.set(relativePath, await Deno.readTextFile(dirEntry.path)); + } + } + + for (const locale of helpLocales) { + const i18nDir = `${projectRoot}/i18n/${locale.docusaurus}/docusaurus-plugin-content-docs/current`; + + const foundLocalizations = new Set(); + + for await (const dirEntry of walk(i18nDir)) { + if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { + if (dirEntry.path.indexOf(i18nDir) !== 0) + throw new Error("Unexpected path"); + + const relativePath = dirEntry.path.slice(i18nDir.length + 1); + + foundLocalizations.add(relativePath); + + if (!originalDocs.has(relativePath)) { + console.log( + `%cNo original document found for i18n file ${dirEntry.path}`, + "color: red" + ); + } + } + } + for (const [relativePath, content] of originalDocs) { + //console.log(`%cChecking '${relativePath}' matches between 'en' and '${locale.docusaurus}'`, "color: pink"); + if (!foundLocalizations.has(relativePath)) { + console.log( + `%cNo translation file found for ${relativePath} in ${i18nDir}`, + "color: red" + ); + } else { + const translation = await Deno.readTextFile(`${i18nDir}/${relativePath}`); + const frontMatterRegex = regexValidations[0]; + const anchorRegex = regexValidations[1]; + const imageRegex = regexValidations[2]; + + // Check that the front matter matches + compareOriginalToTranslation(frontMatterRegex, content, translation, relativePath, locale); + + // Check that the anchors match + if(!regexValidations[1].filesToSkip?.includes(relativePath)) { + compareOriginalToTranslation(anchorRegex, content, translation, relativePath, locale); + } + + // Check that the images match + if(regexValidations[2].filesToSkip?.includes(relativePath)) { + compareOriginalToTranslation(imageRegex, content, translation, relativePath, locale); + } + } + } + } +} + +async function checkMalformedLinks() { + for await (const dirEntry of walk(`${projectRoot}/i18n`)) { + if (dirEntry.isFile && dirEntry.name.endsWith(".md")) { + const file = await Deno.readTextFile(dirEntry.path); + for (const [index, line] of file.split("\n").entries()) { + // Look for Markdown links that had a space added between the closing parenthesis and the opening bracket + if (/\]\s+\(/.test(line)) { + console.log( + `%cFound malformed link in ${dirEntry.path} at line ${index + 1}`, + "color: red" + ); + console.log(line); + console.log( + " ".repeat(line.indexOf("]") + 1) + "%c^ Erroneous space", + "color: blue" + ); + } + } + } + } +} + +// TODO: Make this a generic function that takes in the regex and the locale +// TODO: Add image validation +function compareOriginalToTranslation(regex: RegexValidation, originalContent: string, translationContent: string, relativePath: string, locale: HelpLocales) { + const originalMatch = originalContent.match(regex.expression); + const translationMatch = translationContent.match(regex.expression); + const capitalizedRegexMessage = regex.message.charAt(0).toUpperCase() + regex.message.slice(1); + + if (originalMatch == null) { + console.log( + `%cNo ${regex.message} found in original document ${relativePath}`, + "color: red" + ); + } + if (translationMatch == null) { + console.log( + `%cNo ${regex.message} found in translation ${relativePath}`, + "color: red" + ); + } + + if (originalMatch == null || translationMatch == null) return; + + let original: string[] = []; + let translation: string[] = []; + if (regex.message === "front matter") { + original = originalMatch[1].split("\n"); + translation = translationMatch[1].split("\n"); + } + else { + original = originalMatch; + translation = translationMatch; + } + if (original.length !== translation.length) { + console.log( + `%c${capitalizedRegexMessage} length mismatch in ${relativePath} for locale ${locale.docusaurus}`, + "color: red" + ); + } + // console.log(`%cOriginal: ${original}`, "color: green"); + // console.log(`%cTranslation: ${translation}`, "color: green"); + for (const [index, line] of original.entries()) { + // the title can change; everything else should match + if (line.indexOf("title: ") === 0) continue; + if (line !== translation[index]) { + console.log( + `%c${capitalizedRegexMessage} mismatch in ${relativePath} for locale ${ + locale.docusaurus + }`, + "color: red" + ); + console.log(`%cOriginal: ${line}`, "color: blue"); + console.log( + `%cTranslation: ${translation[index]}`, + "color: blue" + ); + } + } +}