ch592-projects/translate-comments.py

# Translate comments of .c & .h source files 'GB18030' encoded into English 'utf-8'
# operates on single and multi-line comments
# operates recursively in the specified directory
# depends on deep-translator python module via pypi
# Usage: python3 ./translate-comments.py <source code directory>

import os
import argparse
from deep_translator import GoogleTranslator


def translate_comments_in_file(file_path, translator):
    with open(file_path, 'r', encoding='GB18030') as f:
        content = f.readlines()

    translated_content = []
    in_multiline_comment = False
    multiline_comment_buffer = []

    for line in content:
        if in_multiline_comment:
            # Capture the current line in the buffer
            multiline_comment_buffer.append(line)
            # Check if the comment ends in this line
            if '*/' in line:
                comment_end_idx = line.index('*/')
                after_comment_code = line[comment_end_idx:]
                multiline_comment_buffer.pop() # pop last line
                multiline_comment_buffer.append(line[:comment_end_idx])

                for l in multiline_comment_buffer:
                    # translate a line
                    translated_comment = translator.translate(l.strip())
                    if translated_comment == None:
                        translated_comment = ''
                    translated_content.append(translated_comment)
                    translated_content.append("\n")

                if after_comment_code:
                    translated_content.pop() # last \n
                    translated_content.append(after_comment_code)

                # Reset the states
                in_multiline_comment = False
                multiline_comment_buffer = []
        else:
            # Check for single-line comments
            if '//' in line:
                parts = line.split('//', 1)
                code = parts[0]
                comment = parts[1]
                translated_comment = translator.translate(comment.strip())
                translated_content.append(f"{code}// {translated_comment}\n")
            elif '/*' in line:
                # Handle the start of a multi-line comment
                comment_start_idx = line.index('/*')
                before_comment_code = line[:comment_start_idx+2]
                multiline_comment_buffer.append(line[comment_start_idx+2:])
                translated_content.append(before_comment_code)

                if '*/' in line:
                    # The comment starts and ends on the same line
                    comment_end_idx = line.index('*/')
                    after_comment_code = line[comment_end_idx:]
                    multiline_comment_buffer = [] # refresh buffer
                    multiline_comment_buffer.append(line[comment_start_idx+2:comment_end_idx])
                    complete_comment = ''.join(multiline_comment_buffer)

                    translated_comment = translator.translate(complete_comment.strip())
                    if translated_comment == None:
                        translated_comment = ''
                    translated_content.append(translated_comment)

                    if after_comment_code:
                        translated_content.append(after_comment_code)
                        if '/*' in after_comment_code: #if there are more than 1 multiline comment in single line - don't touch it!!
                            translated_content.pop() #after_comment_code
                            translated_content.pop() #translated_comment
                            translated_content.pop() #before_comment_code
                            translated_content.append(line) # revert


                    multiline_comment_buffer = []  # Reset buffer
                else:
                    in_multiline_comment = True
            else:
                translated_content.append(line)  # No comment, retain original line

    with open(file_path, 'w', encoding='utf-8') as f:
        f.writelines(translated_content)


def process_directory(directory, translator):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.c') or file.endswith('.h'):
                file_path = os.path.join(root, file)
                print(f"Translating comments in {file_path}")
                translate_comments_in_file(file_path, translator)


def main():
    parser = argparse.ArgumentParser(description='Translate comments in C/C++ source files')
    parser.add_argument('directory', type=str, help='The directory containing .c and .h files')
    args = parser.parse_args()

    #directory = './out1'  # Specify the directory to process
    translator = GoogleTranslator(source='auto', target='en')  # Set the target language
    process_directory(args.directory, translator)


if __name__ == '__main__':
    main()