113 lines
4.9 KiB
Python
113 lines
4.9 KiB
Python
# Translate comments of .c & .h source files 'GB18030' encoded into English 'utf-8'
|
|
# operates on single and multi-line comments
|
|
# operates recursively in the specified directory
|
|
# depends on deep-translator python module via pypi
|
|
# Usage: python3 ./translate-comments.py <source code directory>
|
|
|
|
import os
|
|
import argparse
|
|
from deep_translator import GoogleTranslator
|
|
|
|
|
|
def translate_comments_in_file(file_path, translator):
|
|
with open(file_path, 'r', encoding='GB18030') as f:
|
|
content = f.readlines()
|
|
|
|
translated_content = []
|
|
in_multiline_comment = False
|
|
multiline_comment_buffer = []
|
|
|
|
for line in content:
|
|
if in_multiline_comment:
|
|
# Capture the current line in the buffer
|
|
multiline_comment_buffer.append(line)
|
|
# Check if the comment ends in this line
|
|
if '*/' in line:
|
|
comment_end_idx = line.index('*/')
|
|
after_comment_code = line[comment_end_idx:]
|
|
multiline_comment_buffer.pop() # pop last line
|
|
multiline_comment_buffer.append(line[:comment_end_idx])
|
|
|
|
for l in multiline_comment_buffer:
|
|
# translate a line
|
|
translated_comment = translator.translate(l.strip())
|
|
if translated_comment == None:
|
|
translated_comment = ''
|
|
translated_content.append(translated_comment)
|
|
translated_content.append("\n")
|
|
|
|
if after_comment_code:
|
|
translated_content.pop() # last \n
|
|
translated_content.append(after_comment_code)
|
|
|
|
# Reset the states
|
|
in_multiline_comment = False
|
|
multiline_comment_buffer = []
|
|
else:
|
|
# Check for single-line comments
|
|
if '//' in line:
|
|
parts = line.split('//', 1)
|
|
code = parts[0]
|
|
comment = parts[1]
|
|
translated_comment = translator.translate(comment.strip())
|
|
translated_content.append(f"{code}// {translated_comment}\n")
|
|
elif '/*' in line:
|
|
# Handle the start of a multi-line comment
|
|
comment_start_idx = line.index('/*')
|
|
before_comment_code = line[:comment_start_idx+2]
|
|
multiline_comment_buffer.append(line[comment_start_idx+2:])
|
|
translated_content.append(before_comment_code)
|
|
|
|
if '*/' in line:
|
|
# The comment starts and ends on the same line
|
|
comment_end_idx = line.index('*/')
|
|
after_comment_code = line[comment_end_idx:]
|
|
multiline_comment_buffer = [] # refresh buffer
|
|
multiline_comment_buffer.append(line[comment_start_idx+2:comment_end_idx])
|
|
complete_comment = ''.join(multiline_comment_buffer)
|
|
|
|
translated_comment = translator.translate(complete_comment.strip())
|
|
if translated_comment == None:
|
|
translated_comment = ''
|
|
translated_content.append(translated_comment)
|
|
|
|
if after_comment_code:
|
|
translated_content.append(after_comment_code)
|
|
if '/*' in after_comment_code: #if there are more than 1 multiline comment in single line - don't touch it!!
|
|
translated_content.pop() #after_comment_code
|
|
translated_content.pop() #translated_comment
|
|
translated_content.pop() #before_comment_code
|
|
translated_content.append(line) # revert
|
|
|
|
|
|
multiline_comment_buffer = [] # Reset buffer
|
|
else:
|
|
in_multiline_comment = True
|
|
else:
|
|
translated_content.append(line) # No comment, retain original line
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.writelines(translated_content)
|
|
|
|
|
|
def process_directory(directory, translator):
|
|
for root, _, files in os.walk(directory):
|
|
for file in files:
|
|
if file.endswith('.c') or file.endswith('.h'):
|
|
file_path = os.path.join(root, file)
|
|
print(f"Translating comments in {file_path}")
|
|
translate_comments_in_file(file_path, translator)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Translate comments in C/C++ source files')
|
|
parser.add_argument('directory', type=str, help='The directory containing .c and .h files')
|
|
args = parser.parse_args()
|
|
|
|
#directory = './out1' # Specify the directory to process
|
|
translator = GoogleTranslator(source='auto', target='en') # Set the target language
|
|
process_directory(args.directory, translator)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|