add python translate script
This commit is contained in:
parent
034f12a602
commit
905a7c35a3
1 changed files with 113 additions and 0 deletions
113
translate-comments.py
Normal file
113
translate-comments.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# Translate comments of .c & .h source files 'GB18030' encoded into English 'utf-8'
|
||||
# operates on single and multi-line comments
|
||||
# operates recursively in the specified directory
|
||||
# depends on deep-translator python module via pypi
|
||||
# Usage: python3 ./translate-comments.py <source code directory>
|
||||
|
||||
import os
|
||||
import argparse
|
||||
from deep_translator import GoogleTranslator
|
||||
|
||||
|
||||
def translate_comments_in_file(file_path, translator):
|
||||
with open(file_path, 'r', encoding='GB18030') as f:
|
||||
content = f.readlines()
|
||||
|
||||
translated_content = []
|
||||
in_multiline_comment = False
|
||||
multiline_comment_buffer = []
|
||||
|
||||
for line in content:
|
||||
if in_multiline_comment:
|
||||
# Capture the current line in the buffer
|
||||
multiline_comment_buffer.append(line)
|
||||
# Check if the comment ends in this line
|
||||
if '*/' in line:
|
||||
comment_end_idx = line.index('*/')
|
||||
after_comment_code = line[comment_end_idx:]
|
||||
multiline_comment_buffer.pop() # pop last line
|
||||
multiline_comment_buffer.append(line[:comment_end_idx])
|
||||
|
||||
for l in multiline_comment_buffer:
|
||||
# translate a line
|
||||
translated_comment = translator.translate(l.strip())
|
||||
if translated_comment == None:
|
||||
translated_comment = ''
|
||||
translated_content.append(translated_comment)
|
||||
translated_content.append("\n")
|
||||
|
||||
if after_comment_code:
|
||||
translated_content.pop() # last \n
|
||||
translated_content.append(after_comment_code)
|
||||
|
||||
# Reset the states
|
||||
in_multiline_comment = False
|
||||
multiline_comment_buffer = []
|
||||
else:
|
||||
# Check for single-line comments
|
||||
if '//' in line:
|
||||
parts = line.split('//', 1)
|
||||
code = parts[0]
|
||||
comment = parts[1]
|
||||
translated_comment = translator.translate(comment.strip())
|
||||
translated_content.append(f"{code}// {translated_comment}\n")
|
||||
elif '/*' in line:
|
||||
# Handle the start of a multi-line comment
|
||||
comment_start_idx = line.index('/*')
|
||||
before_comment_code = line[:comment_start_idx+2]
|
||||
multiline_comment_buffer.append(line[comment_start_idx+2:])
|
||||
translated_content.append(before_comment_code)
|
||||
|
||||
if '*/' in line:
|
||||
# The comment starts and ends on the same line
|
||||
comment_end_idx = line.index('*/')
|
||||
after_comment_code = line[comment_end_idx:]
|
||||
multiline_comment_buffer = [] # refresh buffer
|
||||
multiline_comment_buffer.append(line[comment_start_idx+2:comment_end_idx])
|
||||
complete_comment = ''.join(multiline_comment_buffer)
|
||||
|
||||
translated_comment = translator.translate(complete_comment.strip())
|
||||
if translated_comment == None:
|
||||
translated_comment = ''
|
||||
translated_content.append(translated_comment)
|
||||
|
||||
if after_comment_code:
|
||||
translated_content.append(after_comment_code)
|
||||
if '/*' in after_comment_code: #if there are more than 1 multiline comment in single line - don't touch it!!
|
||||
translated_content.pop() #after_comment_code
|
||||
translated_content.pop() #translated_comment
|
||||
translated_content.pop() #before_comment_code
|
||||
translated_content.append(line) # revert
|
||||
|
||||
|
||||
multiline_comment_buffer = [] # Reset buffer
|
||||
else:
|
||||
in_multiline_comment = True
|
||||
else:
|
||||
translated_content.append(line) # No comment, retain original line
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(translated_content)
|
||||
|
||||
|
||||
def process_directory(directory, translator):
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith('.c') or file.endswith('.h'):
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"Translating comments in {file_path}")
|
||||
translate_comments_in_file(file_path, translator)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Translate comments in C/C++ source files')
|
||||
parser.add_argument('directory', type=str, help='The directory containing .c and .h files')
|
||||
args = parser.parse_args()
|
||||
|
||||
#directory = './out1' # Specify the directory to process
|
||||
translator = GoogleTranslator(source='auto', target='en') # Set the target language
|
||||
process_directory(args.directory, translator)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue