New instructions for CR/INT predication

Mon May 29 13:27:25 2023 · Destination elwidth overrides still apply

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | RT    |M |fmsk |BFA  |XO  |fmap | XO  |Rc|

    creg <- CR[4*BFA+32:4*BFA+35] 
    n <- (¬fmap ^ creg) & fmsk
    result <- (n != 0) if M else (n == fmsk)
    RT <- [0] * 63 || result
    if Rc then
        CR0 <- analyse(RT)

    CR0        (Rc=1)

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | RT    |M |fmsk |BFA  |XO  |fmap | XO  |Rc|

    creg = CR[4*BFA+32:4*BFA+35]
    result = (¬fmap ^ creg) & fmsk
    RT = [0] * 60 || result
    If Rc:
        CR0 = analyse(RT)

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | RA    |M |fmsk |BF   |XO  |fmap | XO     |
    | PO   | BT    |M |fmsk |BF   |XO  |fmap | XO     |
    | PO   | BF |  |M |fmsk |BF   |XO  |fmap | XO     |

    a = (RA|0)
    creg = a[60:63]
    result = (¬fmap ^ creg) & fmsk
    if M:
        result |= CR[4*BF+32:4*BF+35]  & ~fmsk
    CR[4*BF+32:4*BF+35]  = result

    CR Field BF

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | RA    |M |fmsk |BF   |XO  |fmap | XO     |
    | PO   | BT    |M |fmsk |BF   |XO  |fmap | XO     |
    | PO   | BF |  |M |fmsk |BF   |XO  |fmap | XO     |

    reg = (RA|0)
    creg = reg[63] || reg[63] || reg[63] || reg[63]
    result = (¬fmap ^ creg) & fmsk
    if M:
        result |= CR[4*BF+32:4*BF+35] & ~fmsk
    CR[4*BF+32:4*BF+35]  = result

    CR Field BF

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | BF |  |M |fmsk |BF   |XO  |fmap | XO     |

    result = fmsk & CR[4*BFA+32:4*BFA+35] 
    if M:
        result |= CR[4*BF+32:4*BF+35]  & ~fmsk
    result ^= fmap
    CR[4*BF+32:4*BF+35]  = result

    CR Field BF

    |0     |6   |9 |11|12   |16   |19  |22   |26   |31|
    | PO   | BT    |M |fmsk |BF   |XO  |fmap | XO     |

    creg = CR[4*BFA+32:4*BFA+35]
    n = (¬fmap ^ creg) & fmsk
    result = (n != 0) if M else (n == fmsk)
    CR[32+BT] = result

    CR[BT+32]

    mtcri BF, fmap    mtcrweird BF, r0, 0, 0b1111,~fmap
    mtcrset BF, fmsk  mtcrweird BF, r0, 1, fmsk,0b0000
    mtcrclr BF, fmsk  mtcrweird BF, r0, 1, fmsk,0b1111

    for i in range(VL):
        if BB.isvec: # Vector CR Field source?
            creg = CR{BB+i}
        else:
            creg = CR{BB}
        n = (¬fmap ^ creg) & fmsk
        result = (n != 0) if M else (n == fmsk)
        if RT.isvec:
            # TODO: RT.elwidth override to be also added here
            # note, yes, really, the CR's elwidth field determines
            # the bit-packing into the INT!
            if BB.elwidth == 0b00:
                # pack 1 result into 64-bit registers
                iregs[RT+i][0..62] = 0
                iregs[RT+i][63] = result # sets LSB to result
            if BB.elwidth == 0b01:
                # pack 2 results sequentially into INT registers
                iregs[RT+i//2][0..61] = 0
                iregs[RT+i//2][63-(i%2)] = result
            if BB.elwidth == 0b10:
                # pack 4 results sequentially into INT registers
                iregs[RT+i//4][0..59] = 0
                iregs[RT+i//4][63-(i%4)] = result
            if BB.elwidth == 0b11:
                # pack 8 results sequentially into INT registers
                iregs[RT+i//8][0..55] = 0
                iregs[RT+i//8][63-(i%8)] = result
        else:
            # scalar RT destination: exceeding VL=64 is UNDEFINED
            iregs[RT][63-i] = result # results also in scalar INT
            # only mapreduce mode (/mr) allows continuation here
            if not SVRM.mapreduce: break

    for i in range(VL):
        if BB.isvec:
            creg = CR{BB+i}
        else:
            creg = CR{BB}
        result = (¬fmap ^ creg) & fmsk # 4-bit result
        if RT.isvec:
            # RT.elwidth override can affect the packing
            bwid = {0b00:64, 0b01:8, 0b10:16, 0b11:32}[RT.elwidth]
            t4, t8 = min(4, bwid//2), min(8, bwid//2)
            # yes, really, the CR's elwidth field determines
            # the bit-packing into the INT!
            if BB.elwidth == 0b00:
                # pack 1 result into 64-bit registers
                idx, boff = i, 0
            if BB.elwidth == 0b01:
                # pack 2 results sequentially into INT registers
                idx, boff = i//2, i%2
            if BB.elwidth == 0b10:
                # pack 4 results sequentially into INT registers
                idx, boff = i//t4, i%t4
            if BB.elwidth == 0b11:
                # pack 8 results sequentially into INT registers
                idx, boff = i//t8, i%t8
        else:
            # scalar RT destination: exceeding VL=16 is UNDEFINED
            idx, boff = 0, i
        # store 4-bit result in Vector starting from RT
        iregs[RT+idx][60-boff*4:63-boff*4] = result
        if not RT.isvec:
            # only mapreduce mode (/mr) allows continuation here
            if not SVRM.mapreduce: break

    r10 = 0b00010
    sv.mtcrweird/dm=r10/dz cr8.v, 0, 0b0011.0000

New instructions for CR/INT predication

crrweird

mfcrrweird

mtcrrweird

mtcrweird

mcrfm - Move CR Field, masked.

crweirder

Vectorized versions involving GPRs

Predication Examples