dma: finalize dma_memset() and dma_memcpy()

Adds support for dma_memcpy(), and uses a proper ILRAM allocation scheme
(static linking here) for the temporary buffer in dma_memset().
This commit is contained in:
Lephe 2019-09-15 19:30:57 +02:00
parent 15558c8fb3
commit fc7aab6eba
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495
3 changed files with 37 additions and 5 deletions

View file

@ -83,16 +83,31 @@ void dma_transfer_noint(int channel, dma_size_t size, uint blocks,
//--- //---
/* dma_memset(): Fast 32-aligned memset /* dma_memset(): Fast 32-aligned memset
This function is your typical memset, except that the destination and size
This function is your typical memset(), except that the destination and size
must be 32-aligned, and that the pattern is 4 bytes instead of one. It is must be 32-aligned, and that the pattern is 4 bytes instead of one. It is
replicated to 32 bytes then used to fill the destination area. This 4-byte replicated to 32 bytes then used to fill the destination area. This 4-byte
fixed size may be lifted in future versions. fixed size may be lifted in future versions.
This function cannot be used with virtualized (P0) addresses.
@dst Destination address (32-aligned) @dst Destination address (32-aligned)
@pattern 4-byte pattern to fill @dst @pattern 4-byte pattern to fill @dst
@size Sie of destination area (32-aligned) */ @size Sie of destination area (32-aligned) */
void *dma_memset(void *dst, uint32_t pattern, size_t size); void *dma_memset(void *dst, uint32_t pattern, size_t size);
/* dma_memcpy(): Fast 32-aligned memcpy
This function works exactly like memcpy(), but it expects 32-aligned source,
destination, and size, and uses the DMA to efficiently copy.
This function cannot be used with virtualized (P0) addresses.
@dst Destination address (32-aligned)
@dst Source addresss (32-aligned)
@size Size of region (32-aligned) */
void *dma_memcpy(void * restrict dst, const void * restrict src, size_t size);
#endif /* FXCG50 */ #endif /* FXCG50 */
#endif /* GINT_DMA */ #endif /* GINT_DMA */

10
src/dma/memcpy.c Normal file
View file

@ -0,0 +1,10 @@
#include <gint/dma.h>
/* dma_memcpy(): Fast 32-aligned memcpy */
void *dma_memcpy(void * restrict dst, const void * restrict src, size_t size)
{
dma_transfer(1, DMA_32B, size >> 5, src, DMA_INC, dst, DMA_INC);
dma_transfer_wait(1);
return dst;
}

View file

@ -1,13 +1,20 @@
#include <gint/dma.h> #include <gint/dma.h>
/* Allocate a 32-byte buffer in ILRAM */
GALIGNED(32) GILRAM static uint32_t ILbuf[8];
/* dma_memset(): Fast 32-aligned memset */ /* dma_memset(): Fast 32-aligned memset */
void *dma_memset(void *dst, uint32_t l, size_t size) void *dma_memset(void *dst, uint32_t l, size_t size)
{ {
/* TODO: Use a proper IL memory allocation scheme */ /* Prepare the ILRAM buffer. We need to use ILRAM because the DMA will
uint32_t *IL = (void *)0xe5200000; have to read the operand once per block, as opposed to an assembler
for(int i = 0; i < 8; i++) IL[i] = l; routine that would hold it in a register. If we place it in RAM, the
DMA will perform twice as many RAM accesses as the handwritten
assembler, which would be very slow. By using ILRAM we use two
different memory regions, making the DMA faster than the CPU. */
for(int i = 0; i < 8; i++) ILbuf[i] = l;
dma_transfer(1, DMA_32B, size >> 5, IL, DMA_FIXED, dst, DMA_INC); dma_transfer(1, DMA_32B, size >> 5, ILbuf, DMA_FIXED, dst, DMA_INC);
dma_transfer_wait(1); dma_transfer_wait(1);
return dst; return dst;
} }