svstep

SVL-Form

  • svstep RT,SVi,vf
  • svstep. RT,SVi,vf

Pseudo-code:

step <- SVSTATE_NEXT(SVi, vf)
RT <- [0]*57 || step

Special Registers Altered:

CR0                     (if Rc=1)

setvl

SVL-Form

  • setvl RT,RA,SVi,vf,vs,ms
  • setvl. RT,RA,SVi,vf,vs,ms

Pseudo-code:

if (vf & (¬vs) & ¬(ms)) = 1 then
    step <- SVSTATE_NEXT(SVi, 0b0)
    if _RT != 0b00000 then
       GPR(_RT) <- [0]*57 || step
else
    VLimm <- SVi + 1
    if vs = 1 then
        if _RA != 0 then
            VL <- (RA|0)[57:63]
        else
            VL <- VLimm[0:6]
    else
        VL <- SVSTATE[7:13]
    if ms = 1 then
        MVL <- VLimm[0:6]
    else
        MVL <- SVSTATE[0:6]
    if VL > MVL then
        VL = MVL
    SVSTATE[0:6] <- MVL
    SVSTATE[7:13] <- VL
    if _RT != 0b00000 then
       GPR(_RT) <- [0]*57 || VL
    # set requested Vertical-First mode, clear persist
    SVSTATE[63] <- vf
    SVSTATE[62] <- 0b0

Special Registers Altered:

CR0                     (if Rc=1)

svremap

SVRM-Form

  • svremap SVme,mi0,mi1,mi2,mo0,mo1,pst

Pseudo-code:

# registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
SVSTATE[32:33] <- mi0
SVSTATE[34:35] <- mi1
SVSTATE[36:37] <- mi2
SVSTATE[38:39] <- mo0
SVSTATE[40:41] <- mo1
# enable bit for RA RB RC RT EA/FRS
SVSTATE[42:46] <- SVme
# persistence bit (applies to more than one instruction)
SVSTATE[62] <- pst

Special Registers Altered:

None

svshape

SVM-Form

  • svshape SVxd,SVyd,SVzd,SVRM,vf

Pseudo-code:

# for convenience, VL to be calculated and stored in SVSTATE
vlen <- [0] * 7
itercount[0:6] <- [0] * 7
SVSTATE[0:31] <- [0] * 32
# only overwrite REMAP if "persistence" is zero
if (SVSTATE[62] = 0b0) then
    SVSTATE[32:33] <- 0b00
    SVSTATE[34:35] <- 0b00
    SVSTATE[36:37] <- 0b00
    SVSTATE[38:39] <- 0b00
    SVSTATE[40:41] <- 0b00
    SVSTATE[42:46] <- 0b00000
    SVSTATE[62] <- 0b0
    SVSTATE[63] <- 0b0
# clear out all SVSHAPEs
SVSHAPE0[0:31] <- [0] * 32
SVSHAPE1[0:31] <- [0] * 32
SVSHAPE2[0:31] <- [0] * 32
SVSHAPE3[0:31] <- [0] * 32
# set schedule up for multiply
if (SVRM = 0b0000) then
    # VL in Matrix Multiply is xd*yd*zd
    n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
    vlen[0:6] <- n[14:20]
    # set up template in SVSHAPE0, then copy to 1-3
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
    SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
    SVSHAPE0[28:29] <- 0b11           # skip z
    # copy
    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
    SVSHAPE3[0:31] <- SVSHAPE0[0:31]
    # set up FRA
    SVSHAPE1[18:20] <- 0b001          # permute x,z,y
    SVSHAPE1[28:29] <- 0b01           # skip z
    # FRC
    SVSHAPE2[18:20] <- 0b001          # permute x,z,y
    SVSHAPE2[28:29] <- 0b11           # skip y
# set schedule up for FFT butterfly
if (SVRM = 0b0001) then
    # calculate O(N log2 N)
    n <- [0] * 3
    do while n < 5
       if SVxd[4-n] = 0 then
           leave
       n <- n + 1
    n <- ((0b0 || SVxd) + 1) * n
    vlen[0:6] <- n[1:7]
    # set up template in SVSHAPE0, then copy to 1-3
    # for FRA and FRT
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[30:31] <- 0b01          # Butterfly mode
    # copy
    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
    # set up FRB and FRS
    SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
    # FRC (coefficients)
    SVSHAPE2[28:29] <- 0b10           # k schedule
# set schedule up for DCT Inner butterfly
# SVRM Mode 2 is for pre-calculated coefficients,
# SVRM Mode 4 is for on-the-fly (Vertical-First Mode)
if (SVRM = 0b0010) | (SVRM = 0b0100) then
    # calculate O(N log2 N)
    n <- [0] * 3
    do while n < 5
       if SVxd[4-n] = 0 then
           leave
       n <- n + 1
    n <- ((0b0 || SVxd) + 1) * n
    vlen[0:6] <- n[1:7]
    # set up template in SVSHAPE0, then copy to 1-3
    # set up FRB and FRS
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
    if (SVRM = 0b0100) then
        SVSHAPE0[6:11] <- 0b000011       # DCT Inner Butterfly mode 4
    else
        SVSHAPE0[6:11] <- 0b000001       # DCT Inner Butterfly mode 2
    SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
    SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
    # copy
    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
    if (SVRM != 0b0100) then
        SVSHAPE3[0:31] <- SVSHAPE0[0:31]
    # for FRA and FRT
    SVSHAPE0[28:29] <- 0b01           # j+halfstep schedule
    # for cos coefficient
    SVSHAPE2[28:29] <- 0b10           # ci (k for mode 4) schedule
    if (SVRM != 0b0100) then
        SVSHAPE3[28:29] <- 0b11           # size schedule
# set schedule up for DCT Outer butterfly
if (SVRM = 0b0011) then
    # calculate O(N log2 N) number of outer butterfly overlapping adds
    vlen[0:6] <- [0] * 7
    n <- 0b000
    size <- 0b0000001
    itercount[0:6] <- (0b00 || SVxd) + 0b0000001
    itercount[0:6] <- (0b0 || itercount[0:5])
    do while n < 5
       if SVxd[4-n] = 0 then
           leave
       n <- n + 1
       count <- (itercount - 0b0000001) * size
       vlen[0:6] <- vlen + count[7:13]
       size[0:6] <- (size[1:6] || 0b0)
       itercount[0:6] <- (0b0 || itercount[0:5])
    # set up template in SVSHAPE0, then copy to 1-3
    # set up FRB and FRS
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
    SVSHAPE0[6:11] <- 0b000010       # DCT Butterfly mode
    SVSHAPE0[18:20] <- 0b100         # DCT Outer Butterfly sub-mode
    # copy
    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
    # for FRA and FRT
    SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
# set schedule up for DCT COS table generation
if (SVRM = 0b0101) then
    # calculate O(N log2 N)
    vlen[0:6] <- [0] * 7
    itercount[0:6] <- (0b00 || SVxd) + 0b0000001
    itercount[0:6] <- (0b0 || itercount[0:5])
    n <- [0] * 3
    do while n < 5
       if SVxd[4-n] = 0 then
           leave
       n <- n + 1
       vlen[0:6] <- vlen + itercount
       itercount[0:6] <- (0b0 || itercount[0:5])
    # set up template in SVSHAPE0, then copy to 1-3
    # set up FRB and FRS
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
    SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
    SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
    # copy
    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
    # for cos coefficient
    SVSHAPE1[28:29] <- 0b10           # ci schedule
    SVSHAPE2[28:29] <- 0b11           # size schedule
# set schedule up for DCT inverse of half-swapped ordering
if (SVRM = 0b0110) then
    vlen[0:6] <- (0b00 || SVxd) + 0b0000001
    # set up template in SVSHAPE0
    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
    SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
    SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
# set VL, MVL and Vertical-First
SVSTATE[0:6] <- vlen
SVSTATE[7:13] <- vlen
SVSTATE[63] <- vf

Special Registers Altered:

None