mv.swizzle

Mon May 29 13:27:25 2023 · different

dest[i] = src[swiz[i]]

0    1    2    3
X    Y    Z    W  source
     |         |     
     +----+    |
     .    |    |
+--------------+
|    .    |    .
W    .    Y    .  swizzle
|    .    |    .
|    Y    |    W  Y,W unmodified
|    .    |    .
W    Y    Y    W  dest

    swiz[0] = imm[0:3]   # X
    swiz[1] = imm[3:6]   # Y
    swiz[2] = imm[6:9]   # Z
    swiz[3] = imm[9:12]  # W
    # determine implied subvector length from Swizzle
    dst_subvl = 4
    for i in range(4):
        if swiz[i] == 0b001:
            dst_subvl = i+1
            break

    def index_src():
        for i in range(VL):
            for j in range(SUBVL):
                if swiz[j] == 0b000: # skip
                    continue
                if swiz[j] == 0b001: # end
                    break
                if swiz[j] in [0b010, 0b011]:
                    yield (i*SUBVL, CONSTANT)
                else:
                    yield (i*SUBVL, swiz[j]-3)

    def index_dest():
        for i in range(VL):
            for j in range(dst_subvl):
                if swiz[j] == 0b000: # skip
                    continue
                yield i*dst_subvl+j

    # walk through both source and dest indices simultaneously
    for (src_idx, offs), dst_idx in zip(index_src(), index_dst()):
        if offs == CONSTANT:
             set(RT+dst_idx, CONSTANT)
        else
             move_operation(RT+dst_idx, RA+src_idx+offs)

    def index():
        for i in range(VL):
            for j in range(SUBVL):
                yield i*SUBVL+j

    for idx in index():
        operation_on(RA+idx)

    # yield an outer-SUBVL or inner VL loop with SUBVL
    def index_dest(outer):
        if outer:
            for j in range(dst_subvl):
                for i in range(VL):
                    yield j*VL+i
        else:
            for i in range(VL):
                for j in range(dst_subvl):
                    yield i*dst_subvl+j

    # yield an outer-SUBVL or inner VL loop with SUBVL
    def index_src(outer):
        if outer:
            for j in range(SUBVL):
                for i in range(VL):
                    yield j*VL+i
        else:
            for i in range(VL):
                for j in range(SUBVL):
                    yield i*SUBVL+j

if VERTICAL_FIRST:
    # must run through SUBVL or dst_subvl elements, to keep
    # the subvector "together".  weirdness occurs due to
    # PACK_en/UNPACK_en
    num_runs = SUBVL # 1-4
    if PACK_en:
        num_runs = dst_subvl # destination still an inner loop
    if PACK_en and UNPACK_en:
        num_runs = 1 # both are outer loops
    for substep in num_runs:
        (src_idx, offs) = yield from index_src(PACK_en)
        dst_idx = yield from index_dst(UNPACK_en)
        move_operation(RT+dst_idx, RA+src_idx+offs)

0.5	6.10	11.15	16.27	28.31	name	Form
PO	RTp	RAp	imm	0011	mv.swiz	DQ-Form
PO	RTp	RAp	imm	1011	fmv.swiz	DQ-Form

imm	0.2	3.5	6.8	9.11
swizzle	X	Y	Z	W
pixel	R	G	B	A
index	0	1	2	3

swizzle name	source	dest	half
X	RA	RT	lo-half
Y	RA	RT	hi-half
Z	RA+1	RT+1	lo-half
W	RA+1	RT+1	hi-half

mv.swizzle

Format

Pack/Unpack Mode: