from dotenv import load_dotenv
import pdfplumber
import docx
defget_pdf_content(filepath, page_number):
pdf = pdfplumber.open(filepath)
p = pdf.pages[page_number]
text = p.extract_text() #讀文字# print(text)return text
deftranslate_text_with_model(target, text, model="nmt"):
"""Translates text into the target language.
Make sure your project is allowlisted.
Target must be an ISO 639-1 language code.
See https://g.co/cloud/translate/v2/translate-reference#supported_languages
"""from google.cloud import translate_v2 as translate
translate_client = translate.Client()
ifisinstance(text, bytes):
text = text.decode("utf-8")
# Text can also be a sequence of strings, in which case this method# will return a sequence of results for each text.
result = translate_client.translate(text, target_language=target, model=model)
# print(u"Text: {}".format(result["input"]))# print(u"Translation: {}".format(result["translatedText"]))# print(u"Detected source language: {}".format(result["detectedSourceLanguage"]))return result["translatedText"]
defwrite_docx(filepath, text):
mydoc = docx.Document()
first_para = mydoc.add_paragraph("第一章 aaaa\n")
first_para.add_run(text)
mydoc.save(filepath)
if __name__ == '__main__':
load_dotenv()
source_filepath=r"/home/amos/文件/kivy.pdf"# 外文pdf檔路徑
target_filepath=r"./test.docx"# 翻譯好的word檔路徑
source = 'en'# 原始檔案語系
target = 'zh-TW'# 要翻譯為哪個語系
page_number = 14# 取pdf檔第幾頁內容
text = get_pdf_content(source_filepath, page_number)
target_text = translate_text_with_model(target, text)
# print(target_text)
write_docx(target_filepath, target_text)