ch592-projects/translate-comments.py

113 lines
4.9 KiB
Python

# Translate comments of .c & .h source files 'GB18030' encoded into English 'utf-8'
# operates on single and multi-line comments
# operates recursively in the specified directory
# depends on deep-translator python module via pypi
# Usage: python3 ./translate-comments.py <source code directory>
import os
import argparse
from deep_translator import GoogleTranslator
def translate_comments_in_file(file_path, translator):
with open(file_path, 'r', encoding='GB18030') as f:
content = f.readlines()
translated_content = []
in_multiline_comment = False
multiline_comment_buffer = []
for line in content:
if in_multiline_comment:
# Capture the current line in the buffer
multiline_comment_buffer.append(line)
# Check if the comment ends in this line
if '*/' in line:
comment_end_idx = line.index('*/')
after_comment_code = line[comment_end_idx:]
multiline_comment_buffer.pop() # pop last line
multiline_comment_buffer.append(line[:comment_end_idx])
for l in multiline_comment_buffer:
# translate a line
translated_comment = translator.translate(l.strip())
if translated_comment == None:
translated_comment = ''
translated_content.append(translated_comment)
translated_content.append("\n")
if after_comment_code:
translated_content.pop() # last \n
translated_content.append(after_comment_code)
# Reset the states
in_multiline_comment = False
multiline_comment_buffer = []
else:
# Check for single-line comments
if '//' in line:
parts = line.split('//', 1)
code = parts[0]
comment = parts[1]
translated_comment = translator.translate(comment.strip())
translated_content.append(f"{code}// {translated_comment}\n")
elif '/*' in line:
# Handle the start of a multi-line comment
comment_start_idx = line.index('/*')
before_comment_code = line[:comment_start_idx+2]
multiline_comment_buffer.append(line[comment_start_idx+2:])
translated_content.append(before_comment_code)
if '*/' in line:
# The comment starts and ends on the same line
comment_end_idx = line.index('*/')
after_comment_code = line[comment_end_idx:]
multiline_comment_buffer = [] # refresh buffer
multiline_comment_buffer.append(line[comment_start_idx+2:comment_end_idx])
complete_comment = ''.join(multiline_comment_buffer)
translated_comment = translator.translate(complete_comment.strip())
if translated_comment == None:
translated_comment = ''
translated_content.append(translated_comment)
if after_comment_code:
translated_content.append(after_comment_code)
if '/*' in after_comment_code: #if there are more than 1 multiline comment in single line - don't touch it!!
translated_content.pop() #after_comment_code
translated_content.pop() #translated_comment
translated_content.pop() #before_comment_code
translated_content.append(line) # revert
multiline_comment_buffer = [] # Reset buffer
else:
in_multiline_comment = True
else:
translated_content.append(line) # No comment, retain original line
with open(file_path, 'w', encoding='utf-8') as f:
f.writelines(translated_content)
def process_directory(directory, translator):
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.c') or file.endswith('.h'):
file_path = os.path.join(root, file)
print(f"Translating comments in {file_path}")
translate_comments_in_file(file_path, translator)
def main():
parser = argparse.ArgumentParser(description='Translate comments in C/C++ source files')
parser.add_argument('directory', type=str, help='The directory containing .c and .h files')
args = parser.parse_args()
#directory = './out1' # Specify the directory to process
translator = GoogleTranslator(source='auto', target='en') # Set the target language
process_directory(args.directory, translator)
if __name__ == '__main__':
main()