This guide shows how to download `.html` files from a specific Google Drive folder directly to your AWS server using a Python script and a service account.
You must have completed:
`/home/ec2-user/credentials/service-account.json`
<code bash> cd ~ </code>
<code bash> nano download_html.py </code>
<code python> import os import io from google.oauth2 import service_account from googleapiclient.discovery import build from googleapiclient.http import MediaIoBaseDownload
SERVICE_ACCOUNT_FILE = '/home/ec2-user/credentials/service-account.json' SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] HTML_FOLDER_ID = 'YOUR_HTML_FOLDER_ID' DESTINATION_FOLDER = './downloaded_html_files'
credentials = service_account.Credentials.from_service_account_file( SERVICE_ACCOUNT_FILE, scopes=SCOPES ) service = build('drive', 'v3', credentials=credentials)
def download_files_from_folder(folder_id, destination_folder): if not os.path.exists(destination_folder): os.makedirs(destination_folder) print(f"[+] Created folder: {destination_folder}")
query = f"'{folder_id}' in parents and mimeType='text/html'" results = service.files().list(q=query, fields="files(id, name)").execute() files = results.get('files', [])
if not files: print("[-] No HTML files found.") return
for file in files: print(f"[~] Downloading {file['name']}") request = service.files().get_media(fileId=file['id']) file_path = os.path.join(destination_folder, file['name']) with io.FileIO(file_path, 'wb') as fh: downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() if status: print(f" {int(status.progress() * 100)}% complete") print(f"[+] Downloaded: {file['name']}")
if __name__ == '__main__': download_files_from_folder(HTML_FOLDER_ID, DESTINATION_FOLDER) </code>
<code bash> source ~/gdrive-env/bin/activate </code>
<code bash> python download_html.py </code>
All `.html` files from the specified Google Drive folder will be downloaded to:
~/downloaded_html_files/
Youβll see logs like:
[~] Downloading filename.html 100% complete [+] Downloaded: filename.html
Once youβre done:
deactivate