diff --git a/SetUpPriorityLabels/.gitignore b/SetUpPriorityLabels/.gitignore new file mode 100644 index 0000000..4c2355e --- /dev/null +++ b/SetUpPriorityLabels/.gitignore @@ -0,0 +1,14 @@ +# Python +__pycache__/ +requirements.txt +venv/ + +# Data files +*.csv +*.log +*.po +*.po_* +*.resx +*.txt +*.xlf +*.xliff \ No newline at end of file diff --git a/SetUpPriorityLabels/README.md b/SetUpPriorityLabels/README.md new file mode 100644 index 0000000..5d6bb5a --- /dev/null +++ b/SetUpPriorityLabels/README.md @@ -0,0 +1,110 @@ +# FieldWorks Crowdin + +Python utilities for interacting with the Crowdin API and parsing PO translation files. + +## Project Structure + +- **`crowdin_utils.py`**: Core Crowdin API utilities (projects, files, labels, strings) +- **`test_crowdin_api.py`**: CLI script for printing projects and labels +- **`beth_po_extractor.py`**: PO file parser for extracting strings, file paths, and priorities + +## Requirements + +- Python 3.8+ +- See `requirements.txt` for dependencies + +## Setup + +### Create Virtual Environment + +```bash +# Create virtual environment +python -m venv venv + +# Activate virtual environment +# On Windows: +venv\Scripts\activate +# On macOS/Linux: +source venv/bin/activate +``` + +### Install Dependencies + +```bash +pip install -r requirements.txt +``` + +### Updating Dependencies + +To update `requirements.txt` from `requirements.in`: + +```bash +pip install pip-tools +pip-compile requirements.in +``` + +## Configuration + +Set your Crowdin API token in your system keyring: + +```bash +keyring set crowdin +``` + +where could be `read_all` or `write_projects`. + +## Usage + +### Print Projects and Labels + +For a list of Crowding API tests to run, execute + +```bash +python test_crowdin_api.py +``` + +### Parse PO Files + +```bash +python beth_po_extractor.py +``` + +Parses a hard-coded `.po_` file and displays: + +- Total number of entries +- String count grouped by priority level +- (Optional) Distinct file paths by priority +- (Optional) matching against Crowdin strings with or without file path verification + +## Crowdin API Functions + +### `crowdin_utils.py` + +- **`get_projects()`**: Return all projects sorted alphabetically by name +- **`select_project(project_name)`**: Select a project by name or prompt user with enumerated list +- **`select_and_fetch_files()`**: Prompt user to select a project and display file count and names +- **`export_files_to_csv()`**: Export all source files from a project to CSV +- **`get_file_path(project_id, file_id)`**: Fetch file path with folder structure (cached or API) +- **`get_project_labels(project_id)`**: Fetch all labels in a project +- **`fetch_matching_strings(search_string, options)`**: Fetch all strings matching search criteria with optional filters: + - `project_id`: Crowdin project ID + - `exact_match`: Match entire string only (default: False) + - `case_sensitive`: Case-sensitive search (default: False) + - `include_duplicates`: Include duplicate strings (default: False) +- **`add_priority_to_string(search_string, priority, project_id)`**: Assign priority label to all matching strings + +## PO File Parser + +### `beth_po_extractor.py` + +Parses `.po_` files and extracts: + +- **Paths**: File locations (starting with `/`, stripped of `::` identifiers) +- **Strings**: Translation source text (msgid field) +- **Priority**: Numeric priority from msgstr prefix (e.g., `^1^`, `^4^`) or None if not specified + +### Functions + +- **`parse_po_file(file_path)`**: Parse a PO file and return dict of POEntry objects keyed by lowercase string +- **`summarize_po_entries(entries, match_files, match_strings)`**: Print summary of entries grouped by priority and file paths, with optional Crowdin matching +- **`get_matching_strings(entry, match_files)`**: Search Crowdin API for a string and optionally filter by matching file paths diff --git a/SetUpPriorityLabels/beth_po_extractor.py b/SetUpPriorityLabels/beth_po_extractor.py new file mode 100644 index 0000000..2d69417 --- /dev/null +++ b/SetUpPriorityLabels/beth_po_extractor.py @@ -0,0 +1,304 @@ +"""Parser for Beth's PO file format to extract strings, file paths, and priorities.""" + +from dataclasses import dataclass, field +from typing import Any, Optional +import re + +from crowdin_utils import add_priority_to_string, fetch_matching_strings, fw_project_id, get_file_path + +@dataclass +class POEntry: + """Represents a single entry from a PO file.""" + paths: list[str] = field(default_factory=list) + string: str = "" + priority: Optional[int] = None + + def __repr__(self) -> str: + paths_str = ", ".join(self.paths) if self.paths else "No paths" + return ( + f"POEntry(paths=[{paths_str}], " + f"string={repr(self.string[:50])}, priority={self.priority})" + ) + + +@dataclass +class MatchingStringsResult: + """Structured result for matching strings lookup.""" + + string_matches: list[dict[str, Any]] + string_ids_with_file_matches: list[str] + + +def parse_po_file(file_path: str) -> dict[str, POEntry]: + """Parse a PO file and extract entries with paths, strings, and priorities. + + Reads a `.po_` file and extracts file paths from comment lines (starting with /), + strings from msgid fields, and priorities from msgstr fields (e.g., ^1^). + Entries are deduplicated by lowercase string, keeping the highest priority. + + Args: + file_path: Path to the .po_ file to parse. + + Returns: + Dictionary of POEntry objects keyed by lowercase string, with merged entries + and deduplicated paths. + """ + entries: dict[str, POEntry] = {} + + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Split by double newlines to separate entries + blocks = content.split('\n\n') + + for block in blocks: + block = block.strip() + if not block: + continue + + entry = POEntry() + lines = block.split('\n') + + i = 0 + # Extract file paths from comment lines + while i < len(lines) and lines[i].startswith('#.'): + path_line = lines[i][3:].strip() # Remove '#. ' prefix + # Only include paths that start with '/' (skip descriptive comments) + if path_line.startswith('/'): + # Strip everything after '::' (e.g., resource identifier) + path = path_line.split('::')[0] + # Replace /| with the full path prefix + if path.startswith('/|'): + path = path.replace('/|', '/DistFiles/Language Explorer/Configuration/', 1) + entry.paths.append(path) + i += 1 + + # Skip other comment lines (metadata, etc.) + while i < len(lines) and lines[i].startswith('#'): + i += 1 + + # Extract msgid (the string to translate) + msgid_lines: list[str] = [] + if i < len(lines) and lines[i].startswith('msgid'): + line = lines[i] + i += 1 + + # Handle empty msgid (multiline string follows) + if line == 'msgid ""': + # Collect subsequent quoted lines until msgstr + while i < len(lines) and lines[i].startswith('"'): + # Extract content between quotes + content_line = lines[i][1:-1] # Remove surrounding quotes + msgid_lines.append(content_line) + i += 1 + else: + # Single line msgid: msgid "content" + match = re.match(r'msgid "(.*)"', line) + if match: + msgid_lines.append(match.group(1)) + + # Join multiline strings + msgid = ''.join(msgid_lines).replace('\\n', '\n').replace('\\t', '\t').replace('\\"', '\"').replace('\\\\', '\\') + if not msgid.strip(): + continue # Skip entries with empty msgid + entry.string = msgid + + # Extract priority from msgstr (e.g., "^1^" or "^4^") + if i < len(lines) and lines[i].startswith('msgstr'): + line = lines[i] + + if line == 'msgstr ""': + # Multiline msgstr - check first quoted line for priority + if i + 1 < len(lines) and lines[i + 1].startswith('"'): + first_line = lines[i + 1][1:-1] # Remove surrounding quotes + priority_match = re.match(r'\^(\d+)\^', first_line) + if priority_match: + entry.priority = int(priority_match.group(1)) + else: + # Single line msgstr + match = re.match(r'msgstr "(.*)"', line) + if match: + priority_match = re.match(r'\^(\d+)\^', match.group(1)) + if priority_match: + entry.priority = int(priority_match.group(1)) + + if entry.string or entry.paths: + key = entry.string.lower() + if key in entries: + existing = entries[key] + # Replace if new entry has higher priority (lower number) + if entry.priority is not None and (existing.priority is None or entry.priority < existing.priority): + # Merge paths and update entry + entry.paths = list(set(existing.paths + entry.paths)) + entries[key] = entry + else: + # Keep existing, but merge paths + existing.paths = list(set(existing.paths + entry.paths)) + else: + entries[key] = entry + + return entries + + +def summarize_po_entries(entries: list[POEntry], match_files: bool = False, match_strings: bool = False) -> None: + """Print a summary of PO entries grouped by priority and file paths. + + Prints total entry count, strings per priority level, and optionally distinct + file paths per priority. Can also search Crowdin for matching strings and verify + file path matches. + + Args: + entries: List of POEntry objects to summarize. + match_files: If True, count distinct file paths per priority and verify matches + against Crowdin file paths. + match_strings: If True, search Crowdin for matching strings and categorize as + no match, weak match (string found), or strong match (string and + file path found). + """ + print(f'{len(entries)} entries\n') + + # Count strings and distinct paths by priority + priority_counts: dict[Optional[int], int] = {} + priority_paths: dict[Optional[int], set[str]] = {} + path_priority_counts: dict[str, dict[Optional[int], int]] = {} + match_counts: dict[Optional[int], dict[str, int]] = {} + + for entry in entries: + priority = entry.priority + priority_counts[priority] = priority_counts.get(priority, 0) + 1 + if match_files: + if priority not in priority_paths: + priority_paths[priority] = set() + priority_paths[priority].update(entry.paths) + + for path in entry.paths: + path_counts = path_priority_counts.setdefault(path, {}) + path_counts[priority] = path_counts.get(priority, 0) + 1 + + if entry.priority is not None and match_strings: + string_match_result = get_matching_strings(entry, match_files=match_files) + if match_files: + if priority not in match_counts: + match_counts[priority] = {'no_match': 0, 'weak_match': 0, 'strong_match': 0} + if string_match_result.string_ids_with_file_matches: + match_counts[priority]['strong_match'] += 1 + elif string_match_result.string_matches: + match_counts[priority]['weak_match'] += 1 + else: + print(f'No match for priority {priority} string: {entry.string}') + match_counts[priority]['no_match'] += 1 + else: + if priority not in match_counts: + match_counts[priority] = {'no_match': 0, 'match': 0} + if string_match_result.string_matches: + match_counts[priority]['match'] += 1 + else: + print(f'No match for priority {priority} string: {entry.string}') + match_counts[priority]['no_match'] += 1 + + # Print summary + print('String counts by priority:') + for priority in sorted(priority_counts.keys(), key=lambda x: (x is None, x)): + count = priority_counts[priority] + priority_label = f'Priority {priority}' if priority is not None else 'No priority' + if match_files: + distinct_paths = len(priority_paths[priority]) + print(f' {priority_label}: {count} strings from {distinct_paths} distinct files') + else: + print(f' {priority_label}: {count} strings') + + if match_files: + print('\nPriority counts by path:') + path_items = list(path_priority_counts.items()) + path_items.sort(key=lambda item: (-sum(count for p, count in item[1].items() if p is not None), item[0])) + + for path, counts in path_items[:3]: + parts = [] + for priority, count in sorted(counts.items(), key=lambda x: (x[0] is None, x[0])): + label = f'P{priority}' if priority is not None else 'None' + parts.append(f'{label}:{count}') + print(f' {path}: ' + ', '.join(parts)) + + if match_strings: + print('\nMatch summary by priority:') + for priority in sorted(match_counts.keys(), key=lambda x: (x is None, x)): + counts = match_counts[priority] + priority_label = f'Priority {priority}' if priority is not None else 'No priority' + if match_files: + print(f' {priority_label}: ' + f"{counts['no_match']} no match, " + f"{counts['weak_match']} weak match, " + f"{counts['strong_match']} strong match") + else: + print(f' {priority_label}: ' + f"{counts['no_match']} no match, " + f"{counts['match']} match") + + +def get_matching_strings(entry: POEntry, match_files: bool = False) -> MatchingStringsResult: + """Search Crowdin for matching strings and optionally verify file path matches. + + Searches the Crowdin API for exact matches of the entry's string using exact_match + filter. If match_files is True, verifies each match against the entry's file paths. + + Args: + entry: POEntry object containing the string to search for and file paths. + match_files: If True, filter results to only include strings matching entry's + file paths. + + Returns: + MatchingStringsResult containing all string matches and subset with matching + file paths. + """ + #print(f'\nSearching for priority {entry.priority} string (from {len(entry.paths)} files):\n{entry.string}') + string_matches = fetch_matching_strings( + entry.string, + {'project_id': fw_project_id, 'exact_match': True} + ) + string_ids_with_file_matches: list[str] = [] + if match_files: + for string in string_matches: + file_id: str = string.get('fileId', 'N/A') + file_path: str = get_file_path(fw_project_id, file_id) if file_id != 'N/A' else 'N/A' + if file_path in entry.paths: + string_ids_with_file_matches.append(string.get('id', 'N/A')) + #print(f'Found {len(string_matches)} matching strings ({len(string_ids_with_file_matches)} with matching file)') + return MatchingStringsResult( + string_matches=string_matches, + string_ids_with_file_matches=string_ids_with_file_matches, + ) + + +if __name__ == '__main__': + # Example usage + po_file = 'messages.en-ca.po_' + entries = parse_po_file(po_file) + + entries_list = list(entries.values()) + summarize_po_entries(entries_list) + + start = next((i for i, entry in enumerate(entries_list) if entry.string.startswith('make-it-exit') ), None) + + if start is None: + exit('Starting string not found in entries.') + + number_to_process = 0 + + with open('po-no-match.txt', 'a', encoding='utf-8') as no_match_file: + for entry in entries_list[start:start+number_to_process]: + if entry.string == 'Choose writing system(s) of translated lists:': + # This throws a permission-denied error when updating in Crowdin via API; added manually + continue + + if entry.string == '&Writing System(s):': + # This throws a permission-denied error when updating in Crowdin via API; added manually + continue + + if entry.string == 'Word': + # Too many non-exact matches to fetch and sort through; added manually + continue + + if entry.priority: + update_count = add_priority_to_string(search_string=entry.string, priority=entry.priority) + if update_count is None: + no_match_file.write(f'{entry.priority}\t' + entry.string.replace('\n', '\\n').replace('\t', '\\t') + '\n') diff --git a/SetUpPriorityLabels/crowdin_utils.py b/SetUpPriorityLabels/crowdin_utils.py new file mode 100644 index 0000000..8d7304c --- /dev/null +++ b/SetUpPriorityLabels/crowdin_utils.py @@ -0,0 +1,374 @@ +"""Utility functions for interacting with the Crowdin API.""" + +from crowdin_api import CrowdinClient +import keyring +import csv +import os +from typing import Any, Callable, TypedDict + + +class Pagination(TypedDict): + offset: int + limit: int + + +class CrowdinItem(TypedDict): + data: dict[str, Any] + + +class CrowdinListResponse(TypedDict): + data: list[CrowdinItem] + pagination: Pagination + + +class SearchOptions(TypedDict, total=False): + project_id: str + case_sensitive: bool + exact_match: bool + include_duplicates: bool + + +fw_project_name: str = "Fieldworks" +fw_project_id: int = 379603 +fw_priority_label_ids: dict[int, int] = { 1: 11, 2: 9, 3: 7, 4: 5, 5: 3 } + +token: str | None = keyring.get_password('crowdin', 'write_projects') #or 'read_all' for safer testing + +client: CrowdinClient = CrowdinClient(token=token) + +# Cache for file paths to avoid repeated API calls +_file_path_cache: dict[tuple[int, int], str] = {} + + +def _load_file_path_cache_from_csv(csv_filename: str = f'{fw_project_name}_files.csv') -> None: + """Populate the file path cache from a CSV if present.""" + if not os.path.exists(csv_filename): + return + + with open(csv_filename, newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + try: + file_id = int(row.get('file_id', '')) + except ValueError: + continue + file_path = row.get('file_path', '') + _file_path_cache[(fw_project_id, file_id)] = file_path + + +def paginate_list(fetch_page: Callable[[int, int], CrowdinListResponse], page_limit: int = 100) -> list[dict[str, Any]]: + """Fetch all pages from a Crowdin list endpoint. + + Args: + fetch_page: Callable that accepts (limit, offset) and returns a Crowdin list response. + page_limit: Page size to request from the API. + + Returns: + Aggregated list of Crowdin items across all pages. + """ + offset: int = 0 + items: list[dict[str, Any]] = [] + + while True: + response: CrowdinListResponse = fetch_page(page_limit, offset) + batch: list[dict[str, Any]] = [page['data'] for page in response['data']] + items.extend(batch) + if len(batch) < page_limit: + break + offset += page_limit + + return items + + +def get_projects() -> list[dict[str, Any]]: + """Return all Crowdin projects sorted alphabetically by name.""" + # https://crowdin.github.io/crowdin-api-client-python/api_resources/projects/resource.html#crowdin_api.api_resources.projects.resource.ProjectsResource.list_projects + # https://support.crowdin.com/developer/api/v2/#operation/api.projects.getMany + projects: CrowdinListResponse = client.projects.list_projects() + data = [item['data'] for item in projects['data']] + return sorted(data, key=lambda x: x['name']) + + +def select_project(project_name: str | None = None) -> dict[str, Any]: + """Select and return a project. + + If project_name is specified, return the matching project if found. + Otherwise, display an enumerated list and prompt user to select by number. + + Args: + project_name: Optional project name to search for. + + Returns: + The selected project dictionary. + """ + projects: list[dict[str, Any]] = get_projects() + + if project_name: + for project in projects: + if project['name'] == project_name: + return project + print(f'Project "{project_name}" not found.') + + print(f'\n{len(projects)} projects:\n') + for i, project in enumerate(projects, start=1): + print(f'{i}. {project["name"]}') + + selection: str = input('\nSelect project (enter number): ') + return projects[int(selection) - 1] + + +def list_files_lambda(project_id: int = fw_project_id) -> Callable[[int, int], CrowdinListResponse]: + """Return a lambda function to list files for a given project ID.""" + # https://crowdin.github.io/crowdin-api-client-python/api_resources/source_files/resource.html#crowdin_api.api_resources.source_files.resource.SourceFilesResource.list_files + # https://support.crowdin.com/developer/api/v2/#operation/api.projects.files.getMany + return lambda limit, offset: client.source_files.list_files(project_id, limit=limit, offset=offset) + + +def select_and_fetch_files() -> None: + """Prompt user to select a project, then fetch and display source file information. + + Presents enumerated list of projects (starting at 1), prompts for numeric selection, + fetches all source files using paginated API calls (100 per page), and prints the + total count and first ten file names (with ellipsis if more exist). + """ + selected_project: dict[str, Any] = select_project() + + source_files: list[dict[str, Any]] = paginate_list(list_files_lambda(selected_project['id'])) + + print(f'\n{selected_project["name"]} has {len(source_files)} source file(s):') + for file in source_files[:10]: + print(file['name']) + if len(source_files) > 10: + print('...') + + +def export_files_to_csv() -> None: + """Export project files to CSV with file_id and file_path columns. + + Prompts to select a project, fetches all source files, and saves them + to a CSV file named '_files.csv'. + """ + selected_project: dict[str, Any] = select_project() + + source_files: list[dict[str, Any]] = paginate_list(list_files_lambda(selected_project['id'])) + + csv_filename: str = f"{selected_project['name']}_files.csv" + + with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['file_id', 'file_path']) + + for file in source_files: + file_id: int = file.get('id', '') + file_path: str = file.get('path', file.get('name', '')) + writer.writerow([file_id, file_path]) + + print(f'\nExported {len(source_files)} files to {csv_filename}') + + +def get_file_path(project_id: int, file_id: Any) -> str: + """Fetch the file path for a given file ID from a project. + + Checks local cache first, then queries the Crowdin API if not found. + + Args: + project_id: The Crowdin project ID. + file_id: The Crowdin file ID (int or convertible to int). + + Returns: + The file path with folder structure, or 'Unknown' if not found. + """ + if not _file_path_cache and project_id == fw_project_id: + _load_file_path_cache_from_csv() + + file_id_int: int = int(file_id) + project_id_int: int = int(project_id) + + cache_key: tuple[int, int] = (project_id_int, file_id_int) + + # Check cache first + if cache_key in _file_path_cache: + return _file_path_cache[cache_key] + + #return KeyError('File ID not found in cache, would have to fetch from API.') + + # https://crowdin.github.io/crowdin-api-client-python/api_resources/source_files/resource.html#crowdin_api.api_resources.source_files.resource.SourceFilesResource.get_file + # https://support.crowdin.com/developer/api/v2/#tag/Source-Files/operation/api.projects.files.get + file_response: dict[str, Any] = client.source_files.get_file(file_id_int, project_id_int) + file_data: dict[str, Any] = file_response.get('data', {}) + path: str = file_data.get('path', file_data.get('name', 'Unknown')) + + # Cache the result + _file_path_cache[cache_key] = path + return path + + +def list_labels_lambda(project_id: int = fw_project_id) -> Callable[[int, int], CrowdinListResponse]: + """Return a lambda function to list labels for a given project ID.""" + # https://crowdin.github.io/crowdin-api-client-python/api_resources/labels/resource.html#crowdin_api.api_resources.labels.resource.LabelsResource.list_labels + # https://support.crowdin.com/developer/api/v2/#operation/api.projects.labels.getMany + return lambda limit, offset: client.labels.list_labels(project_id, limit=limit, offset=offset) + + +def get_project_labels(project_id: int) -> list[dict[str, Any]]: + """Fetch all labels for a given project. + + Paginates through all labels using the Crowdin API, loading all pages + (100 items per page). + + Args: + project_id: The Crowdin project ID. + + Returns: + List of all label dictionaries for the project. + """ + return paginate_list(list_labels_lambda(project_id)) + + +def list_strings_lambda(filter:str, project_id: int = fw_project_id) -> Callable[[int, int], CrowdinListResponse]: + """Return a lambda function for paginated string list queries with text scope filter.""" + # https://crowdin.github.io/crowdin-api-client-python/api_resources/source_strings/resource.html#crowdin_api.api_resources.source_strings.resource.SourceStringsResource.list_strings + # https://support.crowdin.com/developer/api/v2/#operation/api.projects.strings.getMany + return lambda limit, offset: client.source_strings.list_strings( + project_id, + filter=filter, + limit=limit, + offset=offset, + scope="text", + ) + + +def fetch_matching_strings(search_string: str, options: SearchOptions | None = None) -> list[dict[str, Any]]: + """Fetch all strings matching the given search string from a project. + + Selects a project (by name if provided in options, otherwise via user selection) and + retrieves all strings matching the search term (by identifier, text, or context), + paginating through all results (100 per page). Optionally filters by case sensitivity + and exact match. + + Args: + search_string: The string to search for (filters by identifier, text, or context). + options: Optional search options dict with keys: + - project_name: Project name to search in. If not provided, user will be prompted. + - case_sensitive: Whether search is case-sensitive (default: False). + - exact_match: Whether to match entire string only (default: False). + - include_duplicates: Whether to include strings marked as duplicates (default: False). + + Returns: + A list of all matching string dictionaries. + """ + if options is None: + options = {} + + case_sensitive: bool = options.get('case_sensitive', False) + exact_match: bool = options.get('exact_match', False) + include_duplicates: bool = options.get('include_duplicates', False) + + project_id: int | None = options.get('project_id') + if project_id is None: + selected_project: dict[str, Any] = select_project(project_id) + project_id: int = selected_project['id'] + + matching_strings: list[dict[str, Any]] = [] + + strings_pages: list[dict[str, Any]] = paginate_list(list_strings_lambda(search_string, project_id)) + + # If no match and search string contains < or >, search again with HTML entities + if not strings_pages and ('<' in search_string or '>' in search_string): + search_string = search_string.replace('<', '<').replace('>', '>') + strings_pages = paginate_list(list_strings_lambda(search_string, project_id)) + + for item in strings_pages: + text: str = item.get('text', '') + is_duplicate: bool = item.get('isDuplicate', False) + + if is_duplicate and not include_duplicates: + continue + + search_text: str = search_string if case_sensitive else search_string.lower() + compare_text: str = text if case_sensitive else text.lower() + + if exact_match: + if search_text == compare_text: + matching_strings.append(item) + else: + if search_text in compare_text: + matching_strings.append(item) + + return matching_strings + + +def add_priority_to_string(search_string: str = '', priority: int | None = None, project_id: int | None = fw_project_id) -> int | None: + """Assign a priority label to all strings matching the search criteria. + + Searches the project for exact matches of the search string and applies a priority + label (1-5). Prompts for search_string and priority if not provided. Returns the + number of strings updated, or None if no matches found. + + Uses `fw_priority_label_ids` mapping to resolve Crowdin label IDs. + + Args: + search_string: String to search for. If empty, user is prompted. + priority: Desired priority (1-5). If invalid/None, user is prompted. + project_id: Crowdin project ID (default: fw_project_id). + + Returns: + Number of strings successfully labeled, or None if no matches found. + """ + if not search_string: + search_string = input('\nEnter search string: ') + + print(f'Labeling strings matching: {search_string.replace("\n", "\\n")}') + + matching_strings = fetch_matching_strings( + search_string, + {'project_id': project_id, 'exact_match': True} + ) + + if not matching_strings: + print('\tNo matching strings found.') + return None + + # Prompt until a valid priority (1-5) is provided + while priority is None or priority not in fw_priority_label_ids: + try: + entered = input('Enter priority (1-5): ') + priority = int(entered) + except ValueError: + print('Invalid number. Please enter 1-5.') + continue + if priority not in fw_priority_label_ids: + print('Priority must be 1-5.') + + label_id = fw_priority_label_ids[priority] + + updated_count = 0 + already_labeled_count = 0 + operations: list[dict[str, Any]] = [] + + for item in matching_strings: + string_id = item.get('id') + if string_id is None: + continue + + current_labels: list[int] = item.get('labelIds') or [] + if label_id in current_labels: + already_labeled_count += 1 + continue + + new_labels = current_labels + [label_id] + operations.append({'op': 'replace', 'path': f'/{string_id}/labelIds', 'value': new_labels}) + + if operations: + try: + # https://support.crowdin.com/developer/api/v2/#tag/Source-Strings/operation/api.projects.strings.batchPatch + client.source_strings.string_batch_operation(projectId=fw_project_id, data=operations) + updated_count = len(operations) + except Exception as e: + print(f'Failed to batch update strings: {e}') + + print(f'\tAdded priority {priority} to {updated_count} of {len(matching_strings)} matching strings.') + if already_labeled_count: + print(f'\t\t{already_labeled_count} strings were already labeled.') + + return updated_count diff --git a/SetUpPriorityLabels/requirements.in b/SetUpPriorityLabels/requirements.in new file mode 100644 index 0000000..d3288b5 --- /dev/null +++ b/SetUpPriorityLabels/requirements.in @@ -0,0 +1,2 @@ +crowdin-api-client +keyring \ No newline at end of file diff --git a/SetUpPriorityLabels/test_crowdin_api.py b/SetUpPriorityLabels/test_crowdin_api.py new file mode 100644 index 0000000..826284a --- /dev/null +++ b/SetUpPriorityLabels/test_crowdin_api.py @@ -0,0 +1,81 @@ +from crowdin_utils import ( + add_priority_to_string, + export_files_to_csv, + get_projects, + select_project, + select_and_fetch_files, + get_file_path, + get_project_labels, + fetch_matching_strings, + fw_project_id, + fw_project_name, +) + + +def print_projects() -> None: + """Display all projects (sorted by name) with their IDs.""" + data = get_projects() + print(f'{len(data)} projects:') + for project in data: + print(project['id'], project['name']) + + +def print_project_labels(project_name: str | None = None) -> None: + """Print all labels for the selected project. + + If `project_name` is provided, selects that project by name; otherwise, + prompts the user to choose from an enumerated list. Prints the total + labels and enumerates their titles. + """ + selected_project = select_project(project_name) + labels = get_project_labels(selected_project['id']) + + print(f"\nProject {selected_project['name']} has {len(labels)} label(s):") + for label in labels: + print(f"{label.get('id', 'Unknown')}: {label.get('title', 'Unknown')}") + + +def search_matching_strings(search_string: str = '') -> None: + if not search_string: + search_string = input('\nEnter search string: ') + matching_strings = fetch_matching_strings( + search_string, + {'project_id': fw_project_id, 'exact_match': True} + ) + + print(f'\nFound {len(matching_strings)} matching string(s):') + for string in matching_strings[:10]: + text: str = string.get('text', 'N/A') + file_id: str = string.get('fileId', 'N/A') + file_path: str = get_file_path(fw_project_id, file_id) if file_id != 'N/A' else 'N/A' + print(f'[{file_path}] {text}') + if len(matching_strings) > 10: + print('...') + + +if __name__ == '__main__': + print('\nAvailable tests:\n') + print('1. print_projects() - Display all projects with IDs') + print('2. select_and_fetch_files() - Show files for selected project') + print('3. search_matching_strings() - Search strings in Fieldworks project') + print('4. print_project_labels() - Display labels for Fieldworks project') + print('5. export_files_to_csv() - Export project files to CSV') + print('6. add_priority_to_string() - Add priority label to matching strings') + + selection: str = input('\nSelect test (enter number 1-6): ') + + match selection: + case '1': + print_projects() + case '2': + select_and_fetch_files() + case '3': + search_matching_strings() + case '4': + print_project_labels(fw_project_name) + case '5': + export_files_to_csv() + case '6': + add_priority_to_string() + case _: + print('Invalid selection.')