# Translate comments of .c & .h source files 'GB18030' encoded into English 'utf-8' # operates on single and multi-line comments # operates recursively in the specified directory # depends on deep-translator python module via pypi # Usage: python3 ./translate-comments.py import os import argparse from deep_translator import GoogleTranslator def translate_comments_in_file(file_path, translator): with open(file_path, 'r', encoding='GB18030') as f: content = f.readlines() translated_content = [] in_multiline_comment = False multiline_comment_buffer = [] for line in content: if in_multiline_comment: # Capture the current line in the buffer multiline_comment_buffer.append(line) # Check if the comment ends in this line if '*/' in line: comment_end_idx = line.index('*/') after_comment_code = line[comment_end_idx:] multiline_comment_buffer.pop() # pop last line multiline_comment_buffer.append(line[:comment_end_idx]) for l in multiline_comment_buffer: # translate a line translated_comment = translator.translate(l.strip()) if translated_comment == None: translated_comment = '' translated_content.append(translated_comment) translated_content.append("\n") if after_comment_code: translated_content.pop() # last \n translated_content.append(after_comment_code) # Reset the states in_multiline_comment = False multiline_comment_buffer = [] else: # Check for single-line comments if '//' in line: parts = line.split('//', 1) code = parts[0] comment = parts[1] translated_comment = translator.translate(comment.strip()) translated_content.append(f"{code}// {translated_comment}\n") elif '/*' in line: # Handle the start of a multi-line comment comment_start_idx = line.index('/*') before_comment_code = line[:comment_start_idx+2] multiline_comment_buffer.append(line[comment_start_idx+2:]) translated_content.append(before_comment_code) if '*/' in line: # The comment starts and ends on the same line comment_end_idx = line.index('*/') after_comment_code = line[comment_end_idx:] multiline_comment_buffer = [] # refresh buffer multiline_comment_buffer.append(line[comment_start_idx+2:comment_end_idx]) complete_comment = ''.join(multiline_comment_buffer) translated_comment = translator.translate(complete_comment.strip()) if translated_comment == None: translated_comment = '' translated_content.append(translated_comment) if after_comment_code: translated_content.append(after_comment_code) if '/*' in after_comment_code: #if there are more than 1 multiline comment in single line - don't touch it!! translated_content.pop() #after_comment_code translated_content.pop() #translated_comment translated_content.pop() #before_comment_code translated_content.append(line) # revert multiline_comment_buffer = [] # Reset buffer else: in_multiline_comment = True else: translated_content.append(line) # No comment, retain original line with open(file_path, 'w', encoding='utf-8') as f: f.writelines(translated_content) def process_directory(directory, translator): for root, _, files in os.walk(directory): for file in files: if file.endswith('.c') or file.endswith('.h'): file_path = os.path.join(root, file) print(f"Translating comments in {file_path}") translate_comments_in_file(file_path, translator) def main(): parser = argparse.ArgumentParser(description='Translate comments in C/C++ source files') parser.add_argument('directory', type=str, help='The directory containing .c and .h files') args = parser.parse_args() #directory = './out1' # Specify the directory to process translator = GoogleTranslator(source='auto', target='en') # Set the target language process_directory(args.directory, translator) if __name__ == '__main__': main()