I am trying to build a backup system for storage accounts. Just to make the example as clear as possible, this is my storage account structure
storageA:
|---container1
| |---Blob1.txt
|---container2
| |---Blob2.txt
| |---Blob3.txt
| |---Blob4.txt
|---container3
|---Blob5.txt
I have a script to loop over the containers and blob and copy the same structure to another storage account. And the script is as follow.
from typing import Container
from azure.storage.blob import BlobClient, BlobServiceClient, ContainerClient
from azure.storage.blob import ResourceTypes, AccountSasPermissions
from azure.storage.blob import generate_account_sas
from datetime import *
today = str(datetime.now().date())
print(today)
#================================ SOURCE ===============================
# Source Client
connection_string = '' # The connection string for the source container
account_key = '' # The account key for the source container
source_container_name = 'newblob' # Name of container which has blob to be copied
# Create client
client = BlobServiceClient.from_connection_string(connection_string)
client = BlobServiceClient.from_connection_string(connection_string)
all_containers = client.list_containers(include_metadata=True)
for container in all_containers:
# Create sas token for blob
sas_token = generate_account_sas(
account_name = client.account_name,
account_key = account_key,
resource_types = ResourceTypes(object=True, container=True),
permission= AccountSasPermissions(read=True,list=True),
# start = datetime.now(),
expiry = datetime.utcnow() + timedelta(hours=4) # Token valid for 4 hours
)
print(container['name'], container['metadata'])
# print("==========================")
container_client = client.get_container_client(container.name)
# print(container_client)
blobs_list = container_client.list_blobs()
for blob in blobs_list:
# Create blob client for source blob
source_blob = BlobClient(
client.url,
container_name = container['name'],
blob_name = blob.name,
credential = sas_token
)
print(blob.name)
print("==========================")
# # ============================= TARGET =======================================
# Target Client
target_connection_string = ''
target_account_key = ''
source_container_name = source_container_name
target_blob_name = blob.name
target_destination_blob = container['name'] + today
print(target_destination_blob)
# Create target client
target_client = BlobServiceClient.from_connection_string(target_connection_string)
container = ContainerClient.from_connection_string(target_connection_string, target_destination_blob)
try:
container_client = target_client.create_container(target_destination_blob)
# Create new blob and start copy operation.
except:
new_blob = target_client.get_blob_client(target_destination_blob, target_blob_name)
new_blob.start_copy_from_url(source_blob.url)
# print(source_blob.url)
This script make a full copy of containers and blob without any error.
But when I. go to my target storage, I can see that I have the same containers, container 1 and 3 they have the correct blobs, but container 2 have only 2 blobs, and no matter if I try upload new files into the Source storage and run my script, but the new files never get copied over.
Can anyone please help me to understand this problem?Thank you very much
UPDATE: After some debugging, I found something interesting. In my block of code, I put some print statement to keep track of the loops happening, specifically when it comes to copying blobs.
This is the updated version of my code, to reproduce:
from typing import Container
from azure.storage.blob import BlobClient, BlobServiceClient, ContainerClient
from azure.storage.blob import ResourceTypes, AccountSasPermissions
from azure.storage.blob import generate_account_sas
from datetime import *
today = str(datetime.now().date())
print(today)
#================================ SOURCE ===============================
# Source Client
connection_string = '' # The connection string for the source container
account_key = '' # The account key for the source container
# source_container_name = 'newblob' # Name of container which has blob to be copied
# Create client
client = BlobServiceClient.from_connection_string(connection_string)
client = BlobServiceClient.from_connection_string(connection_string)
all_containers = client.list_containers(include_metadata=True)
for container in all_containers:
# Create sas token for blob
sas_token = generate_account_sas(
account_name = client.account_name,
account_key = account_key,
resource_types = ResourceTypes(object=True, container=True),
permission= AccountSasPermissions(read=True,list=True),
# start = datetime.now(),
expiry = datetime.utcnow() + timedelta(hours=4) # Token valid for 4 hours
)
print(container['name'], container['metadata'])
print("==========================")
# print("==========================")
container_client = client.get_container_client(container.name)
# print(container_client)
blobs_list = container_client.list_blobs()
for blob in blobs_list:
# Create blob client for source blob
source_blob = BlobClient(
client.url,
container_name = container['name'],
blob_name = blob.name,
credential = sas_token
)
print(blob.name)
# # ============================= TARGET =======================================
# Target Client
target_connection_string = ''
target_account_key = ''
source_container_name = container['name']
target_blob_name = blob.name
target_destination_blob = container['name'] + today
print(target_destination_blob)
# Create target client
target_client = BlobServiceClient.from_connection_string(target_connection_string)
container = ContainerClient.from_connection_string(target_connection_string, target_destination_blob)
try:
container_client = target_client.create_container(target_destination_blob)
new_blob = target_client.get_blob_client(target_destination_blob, target_blob_name)
new_blob.start_copy_from_url(source_blob.url)
print(f"TRY: saving blob {target_blob_name} into {target_destination_blob} ")
except:
# Create new blob and start copy operation.
new_blob = target_client.get_blob_client(target_destination_blob, target_blob_name)
new_blob.start_copy_from_url(source_blob.url)
print(f"TRY: saving blob {target_blob_name} into {target_destination_blob} ")
# print(source_blob.url)
Now when I run the code, I get this output:
==========================
blob1 {}
lastfile.txt
blob12021-09-22
COPY TO
EXCEPT: saving blob lastfile.txt into blob12021-09-22
==========================
blob2 {}
lastfile.txt
lastupdate.txt
newFile.txt
blob22021-09-22
COPY TO
EXCEPT: saving blob newFile.txt into blob22021-09-22
==========================
blob3 {}
lastupdate.txt
blob32021-09-22
COPY TO
EXCEPT: saving blob lastupdate.txt into blob32021-09-22
As I could see, the entire loop, is copying only the last file of the list. Here is where I get confused with multiple loop. Can please please anyone explain me what am I doing wrong, and how to make the loop target each file and copy it to the new storage?. Thank you so much for any help you can provide me with