This guide shows how to download `.html` files from a specific Google Drive folder directly to your AWS server using a Python script and a service account.
You must have completed:
`/home/ec2-user/credentials/service-account.json`
<code bash> cd ~ </code>
<code bash> nano download_html.py </code>
<code python> import os import io from google.oauth2 import service_account from googleapiclient.discovery import build from googleapiclient.http import MediaIoBaseDownload
SERVICE_ACCOUNT_FILE = '/home/ec2-user/credentials/service-account.json' SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] HTML_FOLDER_ID = 'YOUR_HTML_FOLDER_ID' DESTINATION_FOLDER = './downloaded_html_files'
credentials = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES
)
service = build('drive', 'v3', credentials=credentials)
def download_files_from_folder(folder_id, destination_folder):
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
print(f"[+] Created folder: {destination_folder}")
query = f"'{folder_id}' in parents and mimeType='text/html'"
results = service.files().list(q=query, fields="files(id, name)").execute()
files = results.get('files', [])
if not files:
print("[-] No HTML files found.")
return
for file in files:
print(f"[~] Downloading {file['name']}")
request = service.files().get_media(fileId=file['id'])
file_path = os.path.join(destination_folder, file['name'])
with io.FileIO(file_path, 'wb') as fh:
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
if status:
print(f" {int(status.progress() * 100)}% complete")
print(f"[+] Downloaded: {file['name']}")
if __name__ == '__main__':
download_files_from_folder(HTML_FOLDER_ID, DESTINATION_FOLDER)
</code>
<code bash> source ~/gdrive-env/bin/activate </code>
<code bash> python download_html.py </code>
All `.html` files from the specified Google Drive folder will be downloaded to:
~/downloaded_html_files/
Youβll see logs like:
[~] Downloading filename.html
100% complete
[+] Downloaded: filename.html
Once youβre done:
deactivate