AN889の日記

組込みエンジニアのブログ

【ZynqMP】FSBLのQSPIフラッシュS25Hx対応改造

XILINX製Zynq™ UltraScale+™ MPSoCのブートプログラムFSBL(First-Stage Boot Loader)をInfineon(旧Cypress)製QSPIフラッシュS25Hxに対応させました。
FSBLはOSSとして公開されています。
https://github.com/Xilinx/embeddedsw/tree/master/lib/sw_apps/zynqmp_fsbl

XILINXがサポートしているフラッシュデバイス
https://support.xilinx.com/s/article/65463?language=ja
の通りです。
S25Hxは含まれてないので、FSBLも本品に対応していません。なので、ブートデバイスにS25Hxを採用した場合、FSBLはS25Hxを認識しないので、Second Stage Boot LoaderのU-Bootをフラッシュから読み出せません。
そこで、次のように改造してS25Hxに対応させました。FSBLのリビジョンは2020.3です。
https://github.com/Xilinx/embeddedsw/tree/release-2020.3/lib/sw_apps/zynqmp_fsbl


diff --git a/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.c b/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.c
index 0b6aac4624..4557a6a344 100644
--- a/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.c
+++ b/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.c
@@ -1,5 +1,5 @@
 /******************************************************************************
-* Copyright (c) 2015 - 2020 Xilinx, Inc.  All rights reserved.
+* Copyright (c) 2015 - 2022 Xilinx, Inc.  All rights reserved.
 * SPDX-License-Identifier: MIT
 ******************************************************************************/
 
@@ -25,6 +25,12 @@
 *       sk   03/13/19 Added dual parallel support and QPI support for 24bit
 *                     boot mode for Macronix flash parts.
 * 5.0   bsv  11/15/20 Added Macronix 2G flash support
+* 6.0   bsv  07/29/21 Added Winbond 2G flash support
+*       bsv  09/08/21 Added MultiDie read support for Micron 2G flash part
+* 7.0   bsv  04/28/22 Fixed bug in Qspi copy when destination address is not
+*                     64 byte aligned
+*       bsv  05/03/22 Replace memcpy with Xil_MemCpy to avoid non-word aligned
+*                     access to memory
 *
 * </pre>
 *
@@ -47,6 +53,8 @@
  * change all the needed parameters in one place.
  */
 #define QSPI_DEVICE_ID		XPAR_XQSPIPSU_0_DEVICE_ID
+#define XFSBL_SIXTY_FOUR_BYTE_MASK	(0x3FU)
+#define XFSBL_SIXTY_FOUR_BYTE_VAL	(64U)
 
 /**************************** Type Definitions *******************************/
 
@@ -56,9 +64,11 @@
 static u32 FlashReadID(XQspiPsu *QspiPsuPtr);
 static u32 MacronixEnable4B(XQspiPsu *QspiPsuPtr);
 static u32 MacronixEnableQPIMode(XQspiPsu *QspiPsuPtr, int Enable);
+static u32 CypressEnable4B(XQspiPsu *QspiPsuPtr);
+static u32 CypressEnableQuadMode(XQspiPsu *QspiPsuPtr, u32 QspiBaseAddr);
 
 /************************** Variable Definitions *****************************/
-static XQspiPsu QspiPsuInstance;
+static XQspiPsu QspiPsuInstance __attribute__ ((aligned(64)));
 static u32 QspiFlashSize=0U;
 static u32 QspiFlashMake=0U;
 static u32 ReadCommand=0U;
@@ -69,6 +79,9 @@ static u8 TxBfrPtr __attribute__ ((aligned(32)));
 static u8 ReadBuffer[10] __attribute__ ((aligned(32)));
 static u8 WriteBuffer[10] __attribute__ ((aligned(32)));
 static u32 MacronixFlash = 0U;
+static u32 CypressFlash = 0U;
+u8 MultiDie = (u8)FALSE;
+
 /******************************************************************************
 *
 * This function reads serial FLASH ID connected to the SPI interface.
@@ -118,6 +131,8 @@ static u32 FlashReadID(XQspiPsu *QspiPsuPtr)
 	 * Deduce flash make
 	 */
 	MacronixFlash = 0U;
+	CypressFlash  = 0U;
+	
 	if (ReadBuffer[0] == MICRON_ID) {
 		QspiFlashMake = MICRON_ID;
 		XFsbl_Printf(DEBUG_INFO, "MICRON ");
@@ -134,6 +149,10 @@ static u32 FlashReadID(XQspiPsu *QspiPsuPtr)
 	} else if(ReadBuffer[0] == ISSI_ID) {
 		QspiFlashMake = ISSI_ID;
 		XFsbl_Printf(DEBUG_INFO, "ISSI ");
+	} else if(ReadBuffer[0] == CYPRESS_ID) {
+		QspiFlashMake = CYPRESS_ID;
+		XFsbl_Printf(DEBUG_INFO, "CYPRESS ");
+		CypressFlash = 1U;
 	} else {
 		UStatus = XFSBL_ERROR_UNSUPPORTED_QSPI;
 		XFsbl_Printf(DEBUG_GENERAL,"XFSBL_ERROR_UNSUPPORTED_QSPI\r\n");
@@ -175,8 +194,13 @@ static u32 FlashReadID(XQspiPsu *QspiPsuPtr)
 	} else if ((ReadBuffer[2] == FLASH_SIZE_ID_2G)
 			|| (ReadBuffer[2] == MACRONIX_FLASH_SIZE_ID_2G)
 			|| (ReadBuffer[2] == MACRONIX_FLASH_1_8_V_SIZE_ID_2G)) {
-                QspiFlashSize = FLASH_SIZE_2G;
-                XFsbl_Printf(DEBUG_INFO, "2G Bits\r\n");
+		QspiFlashSize = FLASH_SIZE_2G;
+		if ((QspiFlashMake == WINBOND_ID) ||
+			(QspiFlashMake == MICRON_ID)  ||
+			(QspiFlashMake == CYPRESS_ID)) {
+			MultiDie = (u8)TRUE;
+		}
+		XFsbl_Printf(DEBUG_INFO, "2G Bits\r\n");
 	}else {
 		UStatus = XFSBL_ERROR_UNSUPPORTED_QSPI;
 		XFsbl_Printf(DEBUG_GENERAL,"XFSBL_ERROR_UNSUPPORTED_QSPI\r\n");
@@ -361,7 +385,7 @@ END:
 *
 *
 ******************************************************************************/
-static u32 XFsbl_GetQspiAddr(u32 Address )
+static u32 XFsbl_GetQspiAddr(u32 Address)
 {
 	u32 RealAddr;
 
@@ -602,6 +626,9 @@ u32 XFsbl_Qspi24Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 	u32 BankSize;
 	u32 BankMask;
 	s32 SStatus;
+	u8 TempBuf[XFSBL_SIXTY_FOUR_BYTE_VAL] __attribute__ ((aligned(64)));
+	u32 UnalignedBytes = (u32)(DestAddress & XFSBL_SIXTY_FOUR_BYTE_MASK);
+	PTRSIZE DestAddr;
 
 	XFsbl_Printf(DEBUG_INFO,"QSPI Reading Src 0x%0lx, Dest %0lx, Length %0lx\r\n",
 			SrcAddress, DestAddress, Length);
@@ -642,6 +669,21 @@ u32 XFsbl_Qspi24Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 			TransferBytes = RemainingBytes;
 		}
 
+		/* Check for 64 byte alignment of DMA destination */
+		if (UnalignedBytes != 0U) {
+			UnalignedBytes = (u32)XFSBL_SIXTY_FOUR_BYTE_VAL -
+				UnalignedBytes;
+			if (UnalignedBytes > TransferBytes) {
+				UnalignedBytes = TransferBytes;
+			}
+			else {
+				TransferBytes = UnalignedBytes;
+			}
+			DestAddr = (PTRSIZE)TempBuf;
+		}
+		else {
+			DestAddr = DestAddress;
+		}
 		/**
 		 * Translate address based on type of connection
 		 * If stacked assert the slave select based on address
@@ -736,7 +778,7 @@ u32 XFsbl_Qspi24Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 
 			/*Data*/
 			FlashMsg[3].TxBfrPtr = NULL;
-			FlashMsg[3].RxBfrPtr = (u8 *)DestAddress;
+			FlashMsg[3].RxBfrPtr = (u8 *)DestAddr;
 			FlashMsg[3].ByteCount = TransferBytes;
 			FlashMsg[3].BusWidth = XQSPIPSU_SELECT_MODE_QUADSPI;
 			FlashMsg[3].Flags = XQSPIPSU_MSG_FLAG_RX;
@@ -812,7 +854,7 @@ u32 XFsbl_Qspi24Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 			}
 
 			FlashMsg[2].TxBfrPtr = NULL;
-			FlashMsg[2].RxBfrPtr = (u8 *)DestAddress;
+			FlashMsg[2].RxBfrPtr = (u8 *)DestAddr;
 			FlashMsg[2].ByteCount = TransferBytes;
 			FlashMsg[2].Flags = XQSPIPSU_MSG_FLAG_RX;
 
@@ -834,6 +876,10 @@ u32 XFsbl_Qspi24Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 			}
 		}
 
+		if (UnalignedBytes != 0U) {
+			Xil_MemCpy((void *)DestAddress, TempBuf, UnalignedBytes);
+			UnalignedBytes = 0U;
+		}
 		/**
 		 * Update the variables
 		 */
@@ -895,6 +941,8 @@ u32 XFsbl_Qspi32Init(u32 DeviceFlags)
 	s32 Status;
 	u32 QspiMode;
 	u32 UStatus;
+	u32 Addr;
+	u32 Size;
 
 
 
@@ -943,7 +991,6 @@ u32 XFsbl_Qspi32Init(u32 DeviceFlags)
 	 * Configure the qspi in linear mode if running in XIP
 	 * TBD
 	 */
-
 	switch ((u32)XPAR_PSU_QSPI_0_QSPI_MODE) {
 
 		case XQSPIPSU_CONNECTION_MODE_SINGLE:
@@ -1007,6 +1054,9 @@ u32 XFsbl_Qspi32Init(u32 DeviceFlags)
 		goto END;
 	}
 
+	/*
+	 * for Macronix
+	 */
 	if (MacronixFlash == 1U) {
 		if (QspiPsuInstance.Config.BusWidth == XFSBL_QSPI_BUSWIDTH_FOUR) {
 			ReadCommand = QUAD_READ_CMD_24BIT2;
@@ -1036,10 +1086,11 @@ u32 XFsbl_Qspi32Init(u32 DeviceFlags)
 			}
 		}
 	}
+
 	/**
 	 * add code: For a Stacked connection, read second Flash ID
 	 */
-	 QspiMode = QspiPsuInstance.Config.ConnectionMode;
+	QspiMode = QspiPsuInstance.Config.ConnectionMode;
 	if ((QspiMode ==
 			(s32)(XQSPIPSU_CONNECTION_MODE_PARALLEL)) ||
 			(QspiMode ==
@@ -1047,6 +1098,68 @@ u32 XFsbl_Qspi32Init(u32 DeviceFlags)
 		QspiFlashSize = 2 * QspiFlashSize;
 	}
 
+	/*
+	 * for Cypress
+	 */
+	if (CypressFlash == 1U) {
+		if (QspiPsuInstance.Config.ConnectionMode ==
+				XQSPIPSU_CONNECTION_MODE_PARALLEL) {
+			XQspiPsu_SelectFlash(&QspiPsuInstance,
+						XQSPIPSU_SELECT_FLASH_CS_BOTH,
+						XQSPIPSU_SELECT_FLASH_BUS_BOTH);
+			for (Addr = 0; Addr < QspiFlashSize; Addr += FLASH_SIZE_1G /* 128 MByte */) {
+				Status = CypressEnable4B(&QspiPsuInstance);
+				if (Status != XFSBL_SUCCESS) {
+					UStatus = XFSBL_FAILURE;
+					goto END;
+				}
+				Status = CypressEnableQuadMode(&QspiPsuInstance, Addr);
+				if (Status != XFSBL_SUCCESS) {
+					UStatus = XFSBL_FAILURE;
+					goto END;
+				}
+			}
+		} else {
+			XQspiPsu_SelectFlash(&QspiPsuInstance,
+						XQSPIPSU_SELECT_FLASH_CS_LOWER,
+						XQSPIPSU_SELECT_FLASH_BUS_LOWER);
+			if (QspiPsuInstance.Config.ConnectionMode == XQSPIPSU_CONNECTION_MODE_SINGLE)
+				Size = QspiFlashSize;
+			else if (QspiPsuInstance.Config.ConnectionMode == XQSPIPSU_CONNECTION_MODE_STACKED)
+				Size = QspiFlashSize / 2;
+			for (Addr = 0; Addr < Size; Addr += FLASH_SIZE_1G) {
+				Status = CypressEnable4B(&QspiPsuInstance);
+				if (Status != XFSBL_SUCCESS) {
+					UStatus = XFSBL_FAILURE;
+					goto END;
+				}
+				Status = CypressEnableQuadMode(&QspiPsuInstance, Addr);
+				if (Status != XFSBL_SUCCESS) {
+					UStatus = XFSBL_FAILURE;
+					goto END;
+				}
+			}
+			if (QspiPsuInstance.Config.ConnectionMode ==
+					XQSPIPSU_CONNECTION_MODE_STACKED) {
+				XQspiPsu_SelectFlash(&QspiPsuInstance,
+						XQSPIPSU_SELECT_FLASH_CS_UPPER,
+						XQSPIPSU_SELECT_FLASH_BUS_LOWER);
+				for (Addr = Size; Addr < QspiFlashSize; Addr += FLASH_SIZE_1G) {
+					Status = CypressEnable4B(&QspiPsuInstance);
+					if (Status != XFSBL_SUCCESS) {
+						UStatus = XFSBL_FAILURE;
+						goto END;
+					}
+					Status = CypressEnableQuadMode(&QspiPsuInstance, Addr);
+					if (Status != XFSBL_SUCCESS) {
+						UStatus = XFSBL_FAILURE;
+						goto END;
+					}
+				}
+			}
+		}
+	}
+
 END:
 	return UStatus;
 }
@@ -1167,6 +1280,168 @@ END:
 	return UStatus;
 }
 
+/******************************************************************************
+*
+* Static API used for Cypress flash to enable 4BYTE mode
+*
+* @param	QspiPsuPtr Pointer to QSPI instance.
+*
+* @return	XFSBL_SUCCESS if success, otherwise XFSBL_FAILURE.
+*
+* @note		None.
+*
+******************************************************************************/
+static u32 CypressEnable4B(XQspiPsu *QspiPsuPtr)
+{
+	s32 Status;
+	u32 UStatus;
+
+	XFsbl_Printf(DEBUG_GENERAL,"CYPRESS_FLASH_MODE\r\n");
+
+	/*Enable 4 byte mode*/
+	TxBfrPtr = 0xB7;
+	FlashMsg[0].TxBfrPtr = &TxBfrPtr;
+	FlashMsg[0].RxBfrPtr = NULL;
+	FlashMsg[0].ByteCount = 1;
+	FlashMsg[0].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[0].Flags = XQSPIPSU_MSG_FLAG_TX;
+
+	Status = XQspiPsu_PolledTransfer(QspiPsuPtr, &FlashMsg[0], 1);
+	if (Status != XFSBL_SUCCESS) {
+		UStatus = XFSBL_FAILURE;
+		goto END;
+	}
+	XFsbl_Printf(DEBUG_GENERAL,"CYPESS_ENABLE_4BYTE_DONE\r\n");
+
+	UStatus = XFSBL_SUCCESS;
+
+END:
+	return UStatus;
+}
+
+/******************************************************************************
+*
+* Static API used for Cypress flash to enable Quad mode
+*
+* @param	QspiPsuPtr Pointer to QSPI instance.
+*
+* @return	XFSBL_SUCCESS if success, otherwise XFSBL_ERROR_QSPI_READ.
+*
+* @note		None.
+*
+******************************************************************************/
+#define CYPRESS_READ_AR_CMD		0x65 /* Read any register */
+#define CYPRESS_WRITE_AR_CMD	0x71 /* Write any register */
+#define CYPRESS_REG_ADDR_CFR1V	0x00800002
+#define CYPRESS_QUAD_EN			0x02 /* Quad I/O */
+
+static u32 CypressEnableQuadMode(XQspiPsu *QspiPsuPtr, u32 QspiBaseAddr)
+{
+	s32 Status;
+	u32 UStatus;
+	u32 QspiAddr;
+
+	memset(ReadBuffer,  0, sizeof(ReadBuffer));
+	memset(WriteBuffer, 0, sizeof(WriteBuffer));
+
+	/* Check current Quad Enable bit value. */
+	QspiAddr = QspiBaseAddr + CYPRESS_REG_ADDR_CFR1V;
+	WriteBuffer[COMMAND_OFFSET]   = CYPRESS_READ_AR_CMD;
+	WriteBuffer[ADDRESS_1_OFFSET] = (u8)((QspiAddr & 0xFF000000U) >> 24);
+	WriteBuffer[ADDRESS_2_OFFSET] = (u8)((QspiAddr & 0xFF0000U) >> 16);
+	WriteBuffer[ADDRESS_3_OFFSET] = (u8)((QspiAddr & 0xFF00U) >> 8);
+	WriteBuffer[ADDRESS_4_OFFSET] = (u8)(QspiAddr & 0xFFU);
+	FlashMsg[0].TxBfrPtr = WriteBuffer;
+	FlashMsg[0].RxBfrPtr = NULL;
+	FlashMsg[0].ByteCount = 5;
+	FlashMsg[0].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[0].Flags = XQSPIPSU_MSG_FLAG_TX;
+
+	FlashMsg[1].TxBfrPtr = NULL;
+	FlashMsg[1].RxBfrPtr = ReadBuffer;
+	FlashMsg[1].ByteCount = 1;
+	FlashMsg[1].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[1].Flags = XQSPIPSU_MSG_FLAG_RX;
+	if (QspiPsuInstance.Config.ConnectionMode == XQSPIPSU_CONNECTION_MODE_PARALLEL)
+		FlashMsg[1].Flags |= XQSPIPSU_MSG_FLAG_STRIPE;
+
+	Status = XQspiPsu_PolledTransfer(QspiPsuPtr, &FlashMsg[0], 2);
+	if (Status != XFSBL_SUCCESS) {
+		UStatus = XFSBL_FAILURE;
+		goto END;
+	}
+
+	if (ReadBuffer[0] & CYPRESS_QUAD_EN) {
+		UStatus = XFSBL_SUCCESS;
+		XFsbl_Printf(DEBUG_GENERAL, "CYPESS_QUAD_BIT_ALREADY_SET at 0x%08x\n\r", QspiBaseAddr);
+		goto END;
+	}
+
+	/* Enable register write */
+	TxBfrPtr = WRITE_ENABLE_CMD;
+	FlashMsg[0].TxBfrPtr = &TxBfrPtr;
+	FlashMsg[0].RxBfrPtr = NULL;
+	FlashMsg[0].ByteCount = 1;
+	FlashMsg[0].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[0].Flags = XQSPIPSU_MSG_FLAG_TX;
+
+	Status = XQspiPsu_PolledTransfer(&QspiPsuInstance, &FlashMsg[0], 1);
+	if (Status != XFSBL_SUCCESS) {
+		UStatus = XFSBL_FAILURE;
+		goto END;
+	}
+
+	/* Enable quad mode */
+	WriteBuffer[COMMAND_OFFSET] = CYPRESS_WRITE_AR_CMD;
+	WriteBuffer[5] = ReadBuffer[0] | CYPRESS_QUAD_EN;
+	FlashMsg[0].TxBfrPtr = WriteBuffer;
+	FlashMsg[0].RxBfrPtr = NULL;
+	FlashMsg[0].ByteCount = 6;
+	FlashMsg[0].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[0].Flags = XQSPIPSU_MSG_FLAG_TX;
+
+	Status = XQspiPsu_PolledTransfer(&QspiPsuInstance, &FlashMsg[0], 1);
+	if (Status != XFSBL_SUCCESS) {
+		UStatus = XFSBL_FAILURE;
+		goto END;
+	}
+
+	/* Read back and check it. */
+	WriteBuffer[COMMAND_OFFSET] = CYPRESS_READ_AR_CMD;
+	FlashMsg[0].TxBfrPtr = WriteBuffer;
+	FlashMsg[0].RxBfrPtr = NULL;
+	FlashMsg[0].ByteCount = 5;
+	FlashMsg[0].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[0].Flags = XQSPIPSU_MSG_FLAG_TX;
+
+	FlashMsg[1].TxBfrPtr = NULL;
+	FlashMsg[1].RxBfrPtr = ReadBuffer;
+	FlashMsg[1].ByteCount = 1;
+	FlashMsg[1].BusWidth = XQSPIPSU_SELECT_MODE_SPI;
+	FlashMsg[1].Flags = XQSPIPSU_MSG_FLAG_RX;
+	if (QspiPsuInstance.Config.ConnectionMode == XQSPIPSU_CONNECTION_MODE_PARALLEL)
+		FlashMsg[1].Flags |= XQSPIPSU_MSG_FLAG_STRIPE;
+
+	Status = XQspiPsu_PolledTransfer(QspiPsuPtr, &FlashMsg[0], 2);
+	if (Status != XFSBL_SUCCESS) {
+		UStatus = XFSBL_FAILURE;
+		goto END;
+	}
+
+	if (!(ReadBuffer[0] & CYPRESS_QUAD_EN)) {
+		UStatus = XFSBL_FAILURE;
+		XFsbl_Printf(DEBUG_GENERAL, "CYPESS_QUAD_BIT_NOT_SET at 0x%08x\n\r", QspiBaseAddr);
+		goto END;
+	}
+
+	XFsbl_Printf(DEBUG_GENERAL, "CYPESS_QUAD_BIT_SET at 0x%08x\n\r", QspiBaseAddr);
+
+	UStatus = XFSBL_SUCCESS;
+
+END:
+	return UStatus;
+}
+
 /*****************************************************************************/
 /**
  * This function is used to copy the data from QSPI flash to destination
@@ -1193,6 +1468,12 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 	u32 TransferBytes;
 	u32 DiscardByteCnt;
 	u32 UStatus;
+	u32 BankSize;
+	u32 BankMask;
+	u32 OrigAddr;
+	u8 TempBuf[XFSBL_SIXTY_FOUR_BYTE_VAL] __attribute__ ((aligned(64)));
+	u32 UnalignedBytes = (u32)(DestAddress & XFSBL_SIXTY_FOUR_BYTE_MASK);
+	PTRSIZE DestAddr;
 
 	XFsbl_Printf(DEBUG_INFO,"QSPI Reading Src 0x%0lx, Dest %0lx, Length %0lx\r\n",
 			SrcAddress, DestAddress, Length);
@@ -1207,12 +1488,24 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 		goto END;
 	}
 
+	if (MultiDie == (u8)TRUE) {
+		BankSize = BANKSIZE_64MB;
+		BankMask = BANKMASK_64MB;
+	}
+	else {
+		BankSize = BANKSIZE;
+		BankMask = BANKMASK;
+	}
 
+	if (QspiPsuInstance.Config.ConnectionMode ==
+	    XQSPIPSU_CONNECTION_MODE_PARALLEL){
+		BankSize *=  2U;
+		BankMask *=  2U;
+	}
 	/**
 	 * Update no of bytes to be copied
 	 */
 	RemainingBytes = Length;
-
 	while(RemainingBytes > 0U) {
 
 		if (RemainingBytes > DMA_DATA_TRAN_SIZE)
@@ -1222,24 +1515,62 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 			TransferBytes = RemainingBytes;
 		}
 
+		/* Check for 64 byte alignment of DMA destination */
+		if (UnalignedBytes != 0U) {
+			UnalignedBytes = (u32)XFSBL_SIXTY_FOUR_BYTE_VAL -
+				UnalignedBytes;
+			if (UnalignedBytes > TransferBytes) {
+				UnalignedBytes = TransferBytes;
+			}
+			else {
+				TransferBytes = UnalignedBytes;
+			}
+			DestAddr = (PTRSIZE)TempBuf;
+		}
+		else {
+			DestAddr = DestAddress;
+		}
+
 		/**
 		 * Translate address based on type of connection
 		 * If stacked assert the slave select based on address
 		 */
 		QspiAddr = XFsbl_GetQspiAddr((u32 )SrcAddress);
+		if (MultiDie == (u8)TRUE) {
+			/**
+			 * Multiply address by 2 in case of Dual Parallel
+			 * This address is used to calculate the bank crossing
+			 * condition
+			 */
+			if (QspiPsuInstance.Config.ConnectionMode ==
+				XQSPIPSU_CONNECTION_MODE_PARALLEL){
+				OrigAddr = QspiAddr * 2U;
+			} else {
+				OrigAddr = QspiAddr;
+			}
+			/**
+			 * If data to be read spans beyond the current die, then
+			 * calculate Transfer Bytes in current die. Else
+			 * transfer bytes are same
+			 */
+			if ((OrigAddr & BankMask) != ((OrigAddr + TransferBytes)
+					& BankMask)) {
+				TransferBytes = (OrigAddr & BankMask) + BankSize
+					- OrigAddr;
+			}
+		}
 
 		XFsbl_Printf(DEBUG_INFO,".");
 		XFsbl_Printf(DEBUG_DETAILED,
 					"QSPI Read Src 0x%0lx, Dest %0lx, Length %0lx\r\n",
-						QspiAddr, DestAddress, TransferBytes);
+						QspiAddr, DestAddr, TransferBytes);
 
 		/**
 		 * Setup the read command with the specified address and data for the
 		 * Flash
 		 */
 		if ((MacronixFlash == 1U) &&
-				(QspiPsuInstance.Config.BusWidth == XFSBL_QSPI_BUSWIDTH_FOUR)) {
-
+			(QspiPsuInstance.Config.BusWidth == XFSBL_QSPI_BUSWIDTH_FOUR)) {
 			/* Enable QPI mode */
 			Status = MacronixEnableQPIMode(&QspiPsuInstance, ENABLE_QPI);
 			if (Status != XFSBL_SUCCESS) {
@@ -1280,7 +1611,7 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 
 			/*Data*/
 			FlashMsg[3].TxBfrPtr = NULL;
-			FlashMsg[3].RxBfrPtr = (u8 *)DestAddress;
+			FlashMsg[3].RxBfrPtr = (u8 *)DestAddr;
 			FlashMsg[3].ByteCount = TransferBytes;
 			FlashMsg[3].BusWidth = XQSPIPSU_SELECT_MODE_QUADSPI;
 			FlashMsg[3].Flags = XQSPIPSU_MSG_FLAG_RX;
@@ -1303,7 +1634,6 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 				XFsbl_Printf(DEBUG_GENERAL,"XFSBL_ERROR_QSPI_READ\r\n");
 				goto END;
 			}
-
 		} else {
 			WriteBuffer[COMMAND_OFFSET]   = (u8)ReadCommand;
 			WriteBuffer[ADDRESS_1_OFFSET] = (u8)((QspiAddr & 0xFF000000U) >> 24);
@@ -1362,7 +1692,7 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 			}
 
 			FlashMsg[2].TxBfrPtr = NULL;
-			FlashMsg[2].RxBfrPtr = (u8 *)DestAddress;
+			FlashMsg[2].RxBfrPtr = (u8 *)DestAddr;
 			FlashMsg[2].ByteCount = TransferBytes;
 			FlashMsg[2].Flags = XQSPIPSU_MSG_FLAG_RX;
 
@@ -1383,6 +1713,10 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 				goto END;
 			}
 		}
+		if (UnalignedBytes != 0U) {
+			Xil_MemCpy((void *)DestAddress, TempBuf, TransferBytes);
+			UnalignedBytes = 0U;
+		}
 		/**
 		 * Update the variables
 		 */
@@ -1392,6 +1726,7 @@ u32 XFsbl_Qspi32Copy(u32 SrcAddress, PTRSIZE DestAddress, u32 Length)
 
 	}
 	UStatus = XFSBL_SUCCESS;
+
 END:
 	return UStatus;
 }
diff --git a/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.h b/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.h
index b37a657690..d1fc889bbc 100644
--- a/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.h
+++ b/lib/sw_apps/zynqmp_fsbl/src/xfsbl_qspi.h
@@ -1,5 +1,5 @@
 /******************************************************************************
-* Copyright (c) 2015 - 2020 Xilinx, Inc.  All rights reserved.
+* Copyright (c) 2015 - 2021 Xilinx, Inc.  All rights reserved.
 * SPDX-License-Identifier: MIT
 ******************************************************************************/
 
@@ -25,6 +25,8 @@
 *       sk   03/13/19 Added dual parallel support and QPI support for 24bit
 *                     boot mode for Macronix flash parts.
 * 5.0   bsv  11/15/20 Added Macronix 2G flash support
+* 6.0   bsv  07/29/21 Added Winbond 2G flash support
+*       bsv  09/08/21 Added MultiDie read support for Micron 2G flash part
 *
 * </pre>
 *
@@ -126,6 +128,8 @@ extern "C" {
 #define FLASH_SIZE_16MB			(0x1000000U)
 #define BANKSIZE			(FLASH_SIZE_16MB)
 #define SINGLEBANKSIZE			BANKSIZE
+#define BANKSIZE_64MB			(0x4000000U)
+#define BANKMASK_64MB			(~(BANKSIZE_64MB - 1U))
 
 /*
  * Bank mask
@@ -152,6 +156,7 @@ extern "C" {
 #define WINBOND_ID		(0xEFU)
 #define MACRONIX_ID		(0xC2U)
 #define ISSI_ID			(0x9DU)
+#define CYPRESS_ID		(0x34U)
 
 #define FLASH_SIZE_ID_8M		(0x14U)
 #define FLASH_SIZE_ID_16M		(0x15U)