Permutation

Generated from include/msa.h. This page contains 72 intrinsics.

int __msa_copy_s_b (v16i8 a, int imm)

Synopsis

int __msa_copy_s_b (v16i8 a, int imm)
#include <msa.h>
Instruction: copy.s.b
Builtin: __builtin_msa_copy_s_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:437

Description

Extract one i8 lane from a, sign-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u8[imm]);

Header Mapping

#define __msa_copy_s_b __builtin_msa_copy_s_b

long long __msa_copy_s_d (v2i64 a, int imm)

Synopsis

long long __msa_copy_s_d (v2i64 a, int imm)
#include <msa.h>
Instruction: copy.s.d
Builtin: __builtin_msa_copy_s_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:440

Description

Extract one i64 lane from a, sign-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u64[imm]);

Header Mapping

#define __msa_copy_s_d __builtin_msa_copy_s_d

int __msa_copy_s_h (v8i16 a, int imm)

Synopsis

int __msa_copy_s_h (v8i16 a, int imm)
#include <msa.h>
Instruction: copy.s.h
Builtin: __builtin_msa_copy_s_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:438

Description

Extract one i16 lane from a, sign-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u16[imm]);

Header Mapping

#define __msa_copy_s_h __builtin_msa_copy_s_h

int __msa_copy_s_w (v4i32 a, int imm)

Synopsis

int __msa_copy_s_w (v4i32 a, int imm)
#include <msa.h>
Instruction: copy.s.w
Builtin: __builtin_msa_copy_s_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:439

Description

Extract one i32 lane from a, sign-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u32[imm]);

Header Mapping

#define __msa_copy_s_w __builtin_msa_copy_s_w

unsigned int __msa_copy_u_b (v16i8 a, int imm)

Synopsis

unsigned int __msa_copy_u_b (v16i8 a, int imm)
#include <msa.h>
Instruction: copy.u.b
Builtin: __builtin_msa_copy_u_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:441

Description

Extract one u8 lane from a, zero-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u8[imm]);

Header Mapping

#define __msa_copy_u_b __builtin_msa_copy_u_b

unsigned long long __msa_copy_u_d (v2i64 a, int imm)

Synopsis

unsigned long long __msa_copy_u_d (v2i64 a, int imm)
#include <msa.h>
Instruction: copy.u.d
Builtin: __builtin_msa_copy_u_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:444

Description

Extract one u64 lane from a, zero-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u64[imm]);

Header Mapping

#define __msa_copy_u_d __builtin_msa_copy_u_d

unsigned int __msa_copy_u_h (v8i16 a, int imm)

Synopsis

unsigned int __msa_copy_u_h (v8i16 a, int imm)
#include <msa.h>
Instruction: copy.u.h
Builtin: __builtin_msa_copy_u_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:442

Description

Extract one u16 lane from a, zero-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u16[imm]);

Header Mapping

#define __msa_copy_u_h __builtin_msa_copy_u_h

unsigned int __msa_copy_u_w (v4i32 a, int imm)

Synopsis

unsigned int __msa_copy_u_w (v4i32 a, int imm)
#include <msa.h>
Instruction: copy.u.w
Builtin: __builtin_msa_copy_u_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:443

Description

Extract one u32 lane from a, zero-extend it if needed, and return the scalar value.

Operation

return sign_or_zero_extend(a.u32[imm]);

Header Mapping

#define __msa_copy_u_w __builtin_msa_copy_u_w

v16i8 __msa_fill_b (int imm)

Synopsis

v16i8 __msa_fill_b (int imm)
#include <msa.h>
Instruction: fill.b
Builtin: __builtin_msa_fill_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:421

Description

Replicate scalar imm into every u8 lane, creating a vector constant.

Operation

dst.u8[0] = truncate_or_extend(imm, 8);
dst.u8[1] = truncate_or_extend(imm, 8);
dst.u8[2] = truncate_or_extend(imm, 8);
dst.u8[3] = truncate_or_extend(imm, 8);
dst.u8[4] = truncate_or_extend(imm, 8);
dst.u8[5] = truncate_or_extend(imm, 8);
dst.u8[6] = truncate_or_extend(imm, 8);
dst.u8[7] = truncate_or_extend(imm, 8);
dst.u8[8] = truncate_or_extend(imm, 8);
dst.u8[9] = truncate_or_extend(imm, 8);
dst.u8[10] = truncate_or_extend(imm, 8);
dst.u8[11] = truncate_or_extend(imm, 8);
dst.u8[12] = truncate_or_extend(imm, 8);
dst.u8[13] = truncate_or_extend(imm, 8);
dst.u8[14] = truncate_or_extend(imm, 8);
dst.u8[15] = truncate_or_extend(imm, 8);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 2

Header Mapping

#define __msa_fill_b __builtin_msa_fill_b

v2i64 __msa_fill_d (long long imm)

Synopsis

v2i64 __msa_fill_d (long long imm)
#include <msa.h>
Instruction: fill.d
Builtin: __builtin_msa_fill_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:424

Description

Replicate scalar imm into every fp64 lane, creating a vector constant.

Operation

dst.u64[0] = truncate_or_extend(imm, 64);
dst.u64[1] = truncate_or_extend(imm, 64);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 2

Header Mapping

#define __msa_fill_d __builtin_msa_fill_d

v8i16 __msa_fill_h (int imm)

Synopsis

v8i16 __msa_fill_h (int imm)
#include <msa.h>
Instruction: fill.h
Builtin: __builtin_msa_fill_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:422

Description

Replicate scalar imm into every fp16 lane, creating a vector constant.

Operation

dst.u16[0] = truncate_or_extend(imm, 16);
dst.u16[1] = truncate_or_extend(imm, 16);
dst.u16[2] = truncate_or_extend(imm, 16);
dst.u16[3] = truncate_or_extend(imm, 16);
dst.u16[4] = truncate_or_extend(imm, 16);
dst.u16[5] = truncate_or_extend(imm, 16);
dst.u16[6] = truncate_or_extend(imm, 16);
dst.u16[7] = truncate_or_extend(imm, 16);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 2

Header Mapping

#define __msa_fill_h __builtin_msa_fill_h

v4i32 __msa_fill_w (int imm)

Synopsis

v4i32 __msa_fill_w (int imm)
#include <msa.h>
Instruction: fill.w
Builtin: __builtin_msa_fill_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:423

Description

Replicate scalar imm into every fp32 lane, creating a vector constant.

Operation

dst.u32[0] = truncate_or_extend(imm, 32);
dst.u32[1] = truncate_or_extend(imm, 32);
dst.u32[2] = truncate_or_extend(imm, 32);
dst.u32[3] = truncate_or_extend(imm, 32);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 2

Header Mapping

#define __msa_fill_w __builtin_msa_fill_w

v16i8 __msa_ilvev_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_ilvev_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: ilvev.b
Builtin: __builtin_msa_ilvev_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:390

Description

Take even-numbered u8 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u8 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvev_b __builtin_msa_ilvev_b

v2i64 __msa_ilvev_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_ilvev_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: ilvev.d
Builtin: __builtin_msa_ilvev_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:393

Description

Take even-numbered u64 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u64 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvev_d __builtin_msa_ilvev_d

v8i16 __msa_ilvev_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_ilvev_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: ilvev.h
Builtin: __builtin_msa_ilvev_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:391

Description

Take even-numbered u16 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u16 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvev_h __builtin_msa_ilvev_h

v4i32 __msa_ilvev_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_ilvev_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: ilvev.w
Builtin: __builtin_msa_ilvev_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:392

Description

Take even-numbered u32 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u32 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvev_w __builtin_msa_ilvev_w

v16i8 __msa_ilvl_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_ilvl_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: ilvl.b
Builtin: __builtin_msa_ilvl_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:382

Description

Interleave lower-half u8 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_lower_u8_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvl_b __builtin_msa_ilvl_b

v2i64 __msa_ilvl_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_ilvl_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: ilvl.d
Builtin: __builtin_msa_ilvl_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:385

Description

Interleave lower-half u64 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_lower_u64_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvl_d __builtin_msa_ilvl_d

v8i16 __msa_ilvl_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_ilvl_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: ilvl.h
Builtin: __builtin_msa_ilvl_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:383

Description

Interleave lower-half u16 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_lower_u16_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvl_h __builtin_msa_ilvl_h

v4i32 __msa_ilvl_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_ilvl_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: ilvl.w
Builtin: __builtin_msa_ilvl_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:384

Description

Interleave lower-half u32 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_lower_u32_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvl_w __builtin_msa_ilvl_w

v16i8 __msa_ilvod_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_ilvod_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: ilvod.b
Builtin: __builtin_msa_ilvod_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:394

Description

Take odd-numbered u8 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u8 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvod_b __builtin_msa_ilvod_b

v2i64 __msa_ilvod_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_ilvod_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: ilvod.d
Builtin: __builtin_msa_ilvod_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:397

Description

Take odd-numbered u64 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u64 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvod_d __builtin_msa_ilvod_d

v8i16 __msa_ilvod_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_ilvod_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: ilvod.h
Builtin: __builtin_msa_ilvod_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:395

Description

Take odd-numbered u16 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u16 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvod_h __builtin_msa_ilvod_h

v4i32 __msa_ilvod_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_ilvod_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: ilvod.w
Builtin: __builtin_msa_ilvod_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:396

Description

Take odd-numbered u32 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u32 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvod_w __builtin_msa_ilvod_w

v16i8 __msa_ilvr_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_ilvr_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: ilvr.b
Builtin: __builtin_msa_ilvr_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:386

Description

Interleave upper-half u8 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_upper_u8_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvr_b __builtin_msa_ilvr_b

v2i64 __msa_ilvr_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_ilvr_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: ilvr.d
Builtin: __builtin_msa_ilvr_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:389

Description

Interleave upper-half u64 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_upper_u64_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvr_d __builtin_msa_ilvr_d

v8i16 __msa_ilvr_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_ilvr_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: ilvr.h
Builtin: __builtin_msa_ilvr_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:387

Description

Interleave upper-half u16 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_upper_u16_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvr_h __builtin_msa_ilvr_h

v4i32 __msa_ilvr_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_ilvr_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: ilvr.w
Builtin: __builtin_msa_ilvr_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:388

Description

Interleave upper-half u32 lanes from a and b, useful when expanding two packed streams.

Operation

dst = interleave_upper_u32_lanes(a, b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_ilvr_w __builtin_msa_ilvr_w

v16i8 __msa_insert_b (v16i8 a, int imm, int imm1)

Synopsis

v16i8 __msa_insert_b (v16i8 a, int imm, int imm1)
#include <msa.h>
Instruction: insert.b
Builtin: __builtin_msa_insert_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:445

Description

Start from a and replace the selected u8 lane with the scalar value argument.

Operation

dst = a;
dst.u8[imm] = imm1;

Header Mapping

#define __msa_insert_b __builtin_msa_insert_b

v2i64 __msa_insert_d (v2i64 a, int imm, long long imm1)

Synopsis

v2i64 __msa_insert_d (v2i64 a, int imm, long long imm1)
#include <msa.h>
Instruction: insert.d
Builtin: __builtin_msa_insert_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:448

Description

Start from a and replace the selected u64 lane with the scalar value argument.

Operation

dst = a;
dst.u64[imm] = imm1;

Header Mapping

#define __msa_insert_d __builtin_msa_insert_d

v8i16 __msa_insert_h (v8i16 a, int imm, int imm1)

Synopsis

v8i16 __msa_insert_h (v8i16 a, int imm, int imm1)
#include <msa.h>
Instruction: insert.h
Builtin: __builtin_msa_insert_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:446

Description

Start from a and replace the selected u16 lane with the scalar value argument.

Operation

dst = a;
dst.u16[imm] = imm1;

Header Mapping

#define __msa_insert_h __builtin_msa_insert_h

v4i32 __msa_insert_w (v4i32 a, int imm, int imm1)

Synopsis

v4i32 __msa_insert_w (v4i32 a, int imm, int imm1)
#include <msa.h>
Instruction: insert.w
Builtin: __builtin_msa_insert_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:447

Description

Start from a and replace the selected u32 lane with the scalar value argument.

Operation

dst = a;
dst.u32[imm] = imm1;

Header Mapping

#define __msa_insert_w __builtin_msa_insert_w

v16i8 __msa_insve_b (v16i8 a, int imm, v16i8 b)

Synopsis

v16i8 __msa_insve_b (v16i8 a, int imm, v16i8 b)
#include <msa.h>
Instruction: insve.b
Builtin: __builtin_msa_insve_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:449

Description

Start from a and replace the selected u8 lane with lane 0 from b.

Operation

dst = a;
dst.u8[imm] = b.u8[0];

Header Mapping

#define __msa_insve_b __builtin_msa_insve_b

v2i64 __msa_insve_d (v2i64 a, int imm, v2i64 b)

Synopsis

v2i64 __msa_insve_d (v2i64 a, int imm, v2i64 b)
#include <msa.h>
Instruction: insve.d
Builtin: __builtin_msa_insve_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:452

Description

Start from a and replace the selected u64 lane with lane 0 from b.

Operation

dst = a;
dst.u64[imm] = b.u64[0];

Header Mapping

#define __msa_insve_d __builtin_msa_insve_d

v8i16 __msa_insve_h (v8i16 a, int imm, v8i16 b)

Synopsis

v8i16 __msa_insve_h (v8i16 a, int imm, v8i16 b)
#include <msa.h>
Instruction: insve.h
Builtin: __builtin_msa_insve_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:450

Description

Start from a and replace the selected u16 lane with lane 0 from b.

Operation

dst = a;
dst.u16[imm] = b.u16[0];

Header Mapping

#define __msa_insve_h __builtin_msa_insve_h

v4i32 __msa_insve_w (v4i32 a, int imm, v4i32 b)

Synopsis

v4i32 __msa_insve_w (v4i32 a, int imm, v4i32 b)
#include <msa.h>
Instruction: insve.w
Builtin: __builtin_msa_insve_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:451

Description

Start from a and replace the selected u32 lane with lane 0 from b.

Operation

dst = a;
dst.u32[imm] = b.u32[0];

Header Mapping

#define __msa_insve_w __builtin_msa_insve_w

v16i8 __msa_ldi_b (int imm)

Synopsis

v16i8 __msa_ldi_b (int imm)
#include <msa.h>
Instruction: ldi.b
Builtin: __builtin_msa_ldi_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:461

Description

Fill every u8 lane from a sign- or zero-extended immediate constant.

Operation

dst.u8[0] = sign_extend_or_zero_extend(imm, 8);
dst.u8[1] = sign_extend_or_zero_extend(imm, 8);
dst.u8[2] = sign_extend_or_zero_extend(imm, 8);
dst.u8[3] = sign_extend_or_zero_extend(imm, 8);
dst.u8[4] = sign_extend_or_zero_extend(imm, 8);
dst.u8[5] = sign_extend_or_zero_extend(imm, 8);
dst.u8[6] = sign_extend_or_zero_extend(imm, 8);
dst.u8[7] = sign_extend_or_zero_extend(imm, 8);
dst.u8[8] = sign_extend_or_zero_extend(imm, 8);
dst.u8[9] = sign_extend_or_zero_extend(imm, 8);
dst.u8[10] = sign_extend_or_zero_extend(imm, 8);
dst.u8[11] = sign_extend_or_zero_extend(imm, 8);
dst.u8[12] = sign_extend_or_zero_extend(imm, 8);
dst.u8[13] = sign_extend_or_zero_extend(imm, 8);
dst.u8[14] = sign_extend_or_zero_extend(imm, 8);
dst.u8[15] = sign_extend_or_zero_extend(imm, 8);

Header Mapping

#define __msa_ldi_b __builtin_msa_ldi_b

v2i64 __msa_ldi_d (int imm)

Synopsis

v2i64 __msa_ldi_d (int imm)
#include <msa.h>
Instruction: ldi.d
Builtin: __builtin_msa_ldi_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:464

Description

Fill every u64 lane from a sign- or zero-extended immediate constant.

Operation

dst.u64[0] = sign_extend_or_zero_extend(imm, 64);
dst.u64[1] = sign_extend_or_zero_extend(imm, 64);

Header Mapping

#define __msa_ldi_d __builtin_msa_ldi_d

v8i16 __msa_ldi_h (int imm)

Synopsis

v8i16 __msa_ldi_h (int imm)
#include <msa.h>
Instruction: ldi.h
Builtin: __builtin_msa_ldi_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:462

Description

Fill every u16 lane from a sign- or zero-extended immediate constant.

Operation

dst.u16[0] = sign_extend_or_zero_extend(imm, 16);
dst.u16[1] = sign_extend_or_zero_extend(imm, 16);
dst.u16[2] = sign_extend_or_zero_extend(imm, 16);
dst.u16[3] = sign_extend_or_zero_extend(imm, 16);
dst.u16[4] = sign_extend_or_zero_extend(imm, 16);
dst.u16[5] = sign_extend_or_zero_extend(imm, 16);
dst.u16[6] = sign_extend_or_zero_extend(imm, 16);
dst.u16[7] = sign_extend_or_zero_extend(imm, 16);

Header Mapping

#define __msa_ldi_h __builtin_msa_ldi_h

v4i32 __msa_ldi_w (int imm)

Synopsis

v4i32 __msa_ldi_w (int imm)
#include <msa.h>
Instruction: ldi.w
Builtin: __builtin_msa_ldi_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:463

Description

Fill every u32 lane from a sign- or zero-extended immediate constant.

Operation

dst.u32[0] = sign_extend_or_zero_extend(imm, 32);
dst.u32[1] = sign_extend_or_zero_extend(imm, 32);
dst.u32[2] = sign_extend_or_zero_extend(imm, 32);
dst.u32[3] = sign_extend_or_zero_extend(imm, 32);

Header Mapping

#define __msa_ldi_w __builtin_msa_ldi_w

v16i8 __msa_move_v (v16i8 a)

Synopsis

v16i8 __msa_move_v (v16i8 a)
#include <msa.h>
Instruction: move.v
Builtin: __builtin_msa_move_v
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:580

Description

Return the source vector unchanged; this wrapper exposes the move/copy builtin form.

Operation

dst = a;

Header Mapping

#define __msa_move_v __builtin_msa_move_v

v16i8 __msa_pckev_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_pckev_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: pckev.b
Builtin: __builtin_msa_pckev_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:374

Description

Take even-numbered u8 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u8 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckev_b __builtin_msa_pckev_b

v2i64 __msa_pckev_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_pckev_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: pckev.d
Builtin: __builtin_msa_pckev_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:377

Description

Take even-numbered u64 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u64 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckev_d __builtin_msa_pckev_d

v8i16 __msa_pckev_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_pckev_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: pckev.h
Builtin: __builtin_msa_pckev_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:375

Description

Take even-numbered u16 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u16 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckev_h __builtin_msa_pckev_h

v4i32 __msa_pckev_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_pckev_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: pckev.w
Builtin: __builtin_msa_pckev_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:376

Description

Take even-numbered u32 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(even u32 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckev_w __builtin_msa_pckev_w

v16i8 __msa_pckod_b (v16i8 a, v16i8 b)

Synopsis

v16i8 __msa_pckod_b (v16i8 a, v16i8 b)
#include <msa.h>
Instruction: pckod.b
Builtin: __builtin_msa_pckod_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:378

Description

Take odd-numbered u8 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u8 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckod_b __builtin_msa_pckod_b

v2i64 __msa_pckod_d (v2i64 a, v2i64 b)

Synopsis

v2i64 __msa_pckod_d (v2i64 a, v2i64 b)
#include <msa.h>
Instruction: pckod.d
Builtin: __builtin_msa_pckod_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:381

Description

Take odd-numbered u64 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u64 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckod_d __builtin_msa_pckod_d

v8i16 __msa_pckod_h (v8i16 a, v8i16 b)

Synopsis

v8i16 __msa_pckod_h (v8i16 a, v8i16 b)
#include <msa.h>
Instruction: pckod.h
Builtin: __builtin_msa_pckod_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:379

Description

Take odd-numbered u16 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u16 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckod_h __builtin_msa_pckod_h

v4i32 __msa_pckod_w (v4i32 a, v4i32 b)

Synopsis

v4i32 __msa_pckod_w (v4i32 a, v4i32 b)
#include <msa.h>
Instruction: pckod.w
Builtin: __builtin_msa_pckod_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:380

Description

Take odd-numbered u32 lanes from the two sources and interleave or pack them into the destination.

Operation

dst = interleave_or_pack(odd u32 lanes from a and b);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_pckod_w __builtin_msa_pckod_w

v16i8 __msa_shf_b (v16i8 a, int imm)

Synopsis

v16i8 __msa_shf_b (v16i8 a, int imm)
#include <msa.h>
Instruction: shf.b
Builtin: __builtin_msa_shf_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:416

Description

Use two-bit fields from the immediate to reorder each four-lane u8 group of a.

Operation

dst.u8[0] = a.u8[((imm >> 0) & 3)];
dst.u8[1] = a.u8[((imm >> 2) & 3)];
dst.u8[2] = a.u8[((imm >> 4) & 3)];
dst.u8[3] = a.u8[((imm >> 6) & 3)];
dst.u8[4] = a.u8[4 + ((imm >> 0) & 3)];
dst.u8[5] = a.u8[4 + ((imm >> 2) & 3)];
dst.u8[6] = a.u8[4 + ((imm >> 4) & 3)];
dst.u8[7] = a.u8[4 + ((imm >> 6) & 3)];
dst.u8[8] = a.u8[8 + ((imm >> 0) & 3)];
dst.u8[9] = a.u8[8 + ((imm >> 2) & 3)];
dst.u8[10] = a.u8[8 + ((imm >> 4) & 3)];
dst.u8[11] = a.u8[8 + ((imm >> 6) & 3)];
dst.u8[12] = a.u8[12 + ((imm >> 0) & 3)];
dst.u8[13] = a.u8[12 + ((imm >> 2) & 3)];
dst.u8[14] = a.u8[12 + ((imm >> 4) & 3)];
dst.u8[15] = a.u8[12 + ((imm >> 6) & 3)];

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_shf_b __builtin_msa_shf_b

v8i16 __msa_shf_h (v8i16 a, int imm)

Synopsis

v8i16 __msa_shf_h (v8i16 a, int imm)
#include <msa.h>
Instruction: shf.h
Builtin: __builtin_msa_shf_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:417

Description

Use two-bit fields from the immediate to reorder each four-lane u16 group of a.

Operation

dst.u16[0] = a.u16[((imm >> 0) & 3)];
dst.u16[1] = a.u16[((imm >> 2) & 3)];
dst.u16[2] = a.u16[((imm >> 4) & 3)];
dst.u16[3] = a.u16[((imm >> 6) & 3)];
dst.u16[4] = a.u16[4 + ((imm >> 0) & 3)];
dst.u16[5] = a.u16[4 + ((imm >> 2) & 3)];
dst.u16[6] = a.u16[4 + ((imm >> 4) & 3)];
dst.u16[7] = a.u16[4 + ((imm >> 6) & 3)];

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_shf_h __builtin_msa_shf_h

v4i32 __msa_shf_w (v4i32 a, int imm)

Synopsis

v4i32 __msa_shf_w (v4i32 a, int imm)
#include <msa.h>
Instruction: shf.w
Builtin: __builtin_msa_shf_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:418

Description

Use two-bit fields from the immediate to reorder each four-lane u32 group of a.

Operation

dst.u32[0] = a.u32[((imm >> 0) & 3)];
dst.u32[1] = a.u32[((imm >> 2) & 3)];
dst.u32[2] = a.u32[((imm >> 4) & 3)];
dst.u32[3] = a.u32[((imm >> 6) & 3)];

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_shf_w __builtin_msa_shf_w

v16i8 __msa_sld_b (v16i8 a, v16i8 b, int imm)

Synopsis

v16i8 __msa_sld_b (v16i8 a, v16i8 b, int imm)
#include <msa.h>
Instruction: sld.b
Builtin: __builtin_msa_sld_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:358

Description

Slide elements from the concatenation of two source vectors into 16 x u8 lanes using an immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sld_b __builtin_msa_sld_b

v2i64 __msa_sld_d (v2i64 a, v2i64 b, int imm)

Synopsis

v2i64 __msa_sld_d (v2i64 a, v2i64 b, int imm)
#include <msa.h>
Instruction: sld.d
Builtin: __builtin_msa_sld_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:361

Description

Slide elements from the concatenation of two source vectors into 2 x u64 lanes using an immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sld_d __builtin_msa_sld_d

v8i16 __msa_sld_h (v8i16 a, v8i16 b, int imm)

Synopsis

v8i16 __msa_sld_h (v8i16 a, v8i16 b, int imm)
#include <msa.h>
Instruction: sld.h
Builtin: __builtin_msa_sld_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:359

Description

Slide elements from the concatenation of two source vectors into 8 x u16 lanes using an immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sld_h __builtin_msa_sld_h

v4i32 __msa_sld_w (v4i32 a, v4i32 b, int imm)

Synopsis

v4i32 __msa_sld_w (v4i32 a, v4i32 b, int imm)
#include <msa.h>
Instruction: sld.w
Builtin: __builtin_msa_sld_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:360

Description

Slide elements from the concatenation of two source vectors into 4 x u32 lanes using an immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sld_w __builtin_msa_sld_w

v16i8 __msa_sldi_b (v16i8 a, v16i8 b, int imm)

Synopsis

v16i8 __msa_sldi_b (v16i8 a, v16i8 b, int imm)
#include <msa.h>
Instruction: sldi.b
Builtin: __builtin_msa_sldi_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:362

Description

Slide a lane window across the concatenation of b and a, controlled by the immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sldi_b __builtin_msa_sldi_b

v2i64 __msa_sldi_d (v2i64 a, v2i64 b, int imm)

Synopsis

v2i64 __msa_sldi_d (v2i64 a, v2i64 b, int imm)
#include <msa.h>
Instruction: sldi.d
Builtin: __builtin_msa_sldi_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:365

Description

Slide a lane window across the concatenation of b and a, controlled by the immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sldi_d __builtin_msa_sldi_d

v8i16 __msa_sldi_h (v8i16 a, v8i16 b, int imm)

Synopsis

v8i16 __msa_sldi_h (v8i16 a, v8i16 b, int imm)
#include <msa.h>
Instruction: sldi.h
Builtin: __builtin_msa_sldi_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:363

Description

Slide a lane window across the concatenation of b and a, controlled by the immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sldi_h __builtin_msa_sldi_h

v4i32 __msa_sldi_w (v4i32 a, v4i32 b, int imm)

Synopsis

v4i32 __msa_sldi_w (v4i32 a, v4i32 b, int imm)
#include <msa.h>
Instruction: sldi.w
Builtin: __builtin_msa_sldi_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:364

Description

Slide a lane window across the concatenation of b and a, controlled by the immediate offset.

Operation

dst = slide_lanes(concatenate(b, a), imm);

Header Mapping

#define __msa_sldi_w __builtin_msa_sldi_w

v16i8 __msa_splat_b (v16i8 a, int imm)

Synopsis

v16i8 __msa_splat_b (v16i8 a, int imm)
#include <msa.h>
Instruction: splat.b
Builtin: __builtin_msa_splat_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:366

Description

Broadcast one selected u8 lane from a into every destination lane.

Operation

dst.u8[0] = a.u8[imm];
dst.u8[1] = a.u8[imm];
dst.u8[2] = a.u8[imm];
dst.u8[3] = a.u8[imm];
dst.u8[4] = a.u8[imm];
dst.u8[5] = a.u8[imm];
dst.u8[6] = a.u8[imm];
dst.u8[7] = a.u8[imm];
dst.u8[8] = a.u8[imm];
dst.u8[9] = a.u8[imm];
dst.u8[10] = a.u8[imm];
dst.u8[11] = a.u8[imm];
dst.u8[12] = a.u8[imm];
dst.u8[13] = a.u8[imm];
dst.u8[14] = a.u8[imm];
dst.u8[15] = a.u8[imm];

Header Mapping

#define __msa_splat_b __builtin_msa_splat_b

v2i64 __msa_splat_d (v2i64 a, int imm)

Synopsis

v2i64 __msa_splat_d (v2i64 a, int imm)
#include <msa.h>
Instruction: splat.d
Builtin: __builtin_msa_splat_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:369

Description

Broadcast one selected u64 lane from a into every destination lane.

Operation

dst.u64[0] = a.u64[imm];
dst.u64[1] = a.u64[imm];

Header Mapping

#define __msa_splat_d __builtin_msa_splat_d

v8i16 __msa_splat_h (v8i16 a, int imm)

Synopsis

v8i16 __msa_splat_h (v8i16 a, int imm)
#include <msa.h>
Instruction: splat.h
Builtin: __builtin_msa_splat_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:367

Description

Broadcast one selected u16 lane from a into every destination lane.

Operation

dst.u16[0] = a.u16[imm];
dst.u16[1] = a.u16[imm];
dst.u16[2] = a.u16[imm];
dst.u16[3] = a.u16[imm];
dst.u16[4] = a.u16[imm];
dst.u16[5] = a.u16[imm];
dst.u16[6] = a.u16[imm];
dst.u16[7] = a.u16[imm];

Header Mapping

#define __msa_splat_h __builtin_msa_splat_h

v4i32 __msa_splat_w (v4i32 a, int imm)

Synopsis

v4i32 __msa_splat_w (v4i32 a, int imm)
#include <msa.h>
Instruction: splat.w
Builtin: __builtin_msa_splat_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:368

Description

Broadcast one selected u32 lane from a into every destination lane.

Operation

dst.u32[0] = a.u32[imm];
dst.u32[1] = a.u32[imm];
dst.u32[2] = a.u32[imm];
dst.u32[3] = a.u32[imm];

Header Mapping

#define __msa_splat_w __builtin_msa_splat_w

v16i8 __msa_splati_b (v16i8 a, int imm)

Synopsis

v16i8 __msa_splati_b (v16i8 a, int imm)
#include <msa.h>
Instruction: splati.b
Builtin: __builtin_msa_splati_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:370

Description

Broadcast one selected u8 lane from a into every destination lane.

Operation

dst.u8[0] = a.u8[imm];
dst.u8[1] = a.u8[imm];
dst.u8[2] = a.u8[imm];
dst.u8[3] = a.u8[imm];
dst.u8[4] = a.u8[imm];
dst.u8[5] = a.u8[imm];
dst.u8[6] = a.u8[imm];
dst.u8[7] = a.u8[imm];
dst.u8[8] = a.u8[imm];
dst.u8[9] = a.u8[imm];
dst.u8[10] = a.u8[imm];
dst.u8[11] = a.u8[imm];
dst.u8[12] = a.u8[imm];
dst.u8[13] = a.u8[imm];
dst.u8[14] = a.u8[imm];
dst.u8[15] = a.u8[imm];

Header Mapping

#define __msa_splati_b __builtin_msa_splati_b

v2i64 __msa_splati_d (v2i64 a, int imm)

Synopsis

v2i64 __msa_splati_d (v2i64 a, int imm)
#include <msa.h>
Instruction: splati.d
Builtin: __builtin_msa_splati_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:373

Description

Broadcast one selected u64 lane from a into every destination lane.

Operation

dst.u64[0] = a.u64[imm];
dst.u64[1] = a.u64[imm];

Header Mapping

#define __msa_splati_d __builtin_msa_splati_d

v8i16 __msa_splati_h (v8i16 a, int imm)

Synopsis

v8i16 __msa_splati_h (v8i16 a, int imm)
#include <msa.h>
Instruction: splati.h
Builtin: __builtin_msa_splati_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:371

Description

Broadcast one selected u16 lane from a into every destination lane.

Operation

dst.u16[0] = a.u16[imm];
dst.u16[1] = a.u16[imm];
dst.u16[2] = a.u16[imm];
dst.u16[3] = a.u16[imm];
dst.u16[4] = a.u16[imm];
dst.u16[5] = a.u16[imm];
dst.u16[6] = a.u16[imm];
dst.u16[7] = a.u16[imm];

Header Mapping

#define __msa_splati_h __builtin_msa_splati_h

v4i32 __msa_splati_w (v4i32 a, int imm)

Synopsis

v4i32 __msa_splati_w (v4i32 a, int imm)
#include <msa.h>
Instruction: splati.w
Builtin: __builtin_msa_splati_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:372

Description

Broadcast one selected u32 lane from a into every destination lane.

Operation

dst.u32[0] = a.u32[imm];
dst.u32[1] = a.u32[imm];
dst.u32[2] = a.u32[imm];
dst.u32[3] = a.u32[imm];

Header Mapping

#define __msa_splati_w __builtin_msa_splati_w

v16i8 __msa_vshf_b (v16i8 a, v16i8 b, v16i8 c)

Synopsis

v16i8 __msa_vshf_b (v16i8 a, v16i8 b, v16i8 c)
#include <msa.h>
Instruction: vshf.b
Builtin: __builtin_msa_vshf_b
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:398

Description

Use each control u8 lane from a to choose a u8 lane from b or c; control values with the zero bit set produce zero. This is a byte-style table lookup within each 128-bit half.

Operation

dst.u8[0] = (a.u8[0] & 0x40) ? 0 : ((a.u8[0] & 0x10) ? b.u8[(a.u8[0] & 15)] : c.u8[(a.u8[0] & 15)]);
dst.u8[1] = (a.u8[1] & 0x40) ? 0 : ((a.u8[1] & 0x10) ? b.u8[(a.u8[1] & 15)] : c.u8[(a.u8[1] & 15)]);
dst.u8[2] = (a.u8[2] & 0x40) ? 0 : ((a.u8[2] & 0x10) ? b.u8[(a.u8[2] & 15)] : c.u8[(a.u8[2] & 15)]);
dst.u8[3] = (a.u8[3] & 0x40) ? 0 : ((a.u8[3] & 0x10) ? b.u8[(a.u8[3] & 15)] : c.u8[(a.u8[3] & 15)]);
dst.u8[4] = (a.u8[4] & 0x40) ? 0 : ((a.u8[4] & 0x10) ? b.u8[(a.u8[4] & 15)] : c.u8[(a.u8[4] & 15)]);
dst.u8[5] = (a.u8[5] & 0x40) ? 0 : ((a.u8[5] & 0x10) ? b.u8[(a.u8[5] & 15)] : c.u8[(a.u8[5] & 15)]);
dst.u8[6] = (a.u8[6] & 0x40) ? 0 : ((a.u8[6] & 0x10) ? b.u8[(a.u8[6] & 15)] : c.u8[(a.u8[6] & 15)]);
dst.u8[7] = (a.u8[7] & 0x40) ? 0 : ((a.u8[7] & 0x10) ? b.u8[(a.u8[7] & 15)] : c.u8[(a.u8[7] & 15)]);
dst.u8[8] = (a.u8[8] & 0x40) ? 0 : ((a.u8[8] & 0x10) ? b.u8[(a.u8[8] & 15)] : c.u8[(a.u8[8] & 15)]);
dst.u8[9] = (a.u8[9] & 0x40) ? 0 : ((a.u8[9] & 0x10) ? b.u8[(a.u8[9] & 15)] : c.u8[(a.u8[9] & 15)]);
dst.u8[10] = (a.u8[10] & 0x40) ? 0 : ((a.u8[10] & 0x10) ? b.u8[(a.u8[10] & 15)] : c.u8[(a.u8[10] & 15)]);
dst.u8[11] = (a.u8[11] & 0x40) ? 0 : ((a.u8[11] & 0x10) ? b.u8[(a.u8[11] & 15)] : c.u8[(a.u8[11] & 15)]);
dst.u8[12] = (a.u8[12] & 0x40) ? 0 : ((a.u8[12] & 0x10) ? b.u8[(a.u8[12] & 15)] : c.u8[(a.u8[12] & 15)]);
dst.u8[13] = (a.u8[13] & 0x40) ? 0 : ((a.u8[13] & 0x10) ? b.u8[(a.u8[13] & 15)] : c.u8[(a.u8[13] & 15)]);
dst.u8[14] = (a.u8[14] & 0x40) ? 0 : ((a.u8[14] & 0x10) ? b.u8[(a.u8[14] & 15)] : c.u8[(a.u8[14] & 15)]);
dst.u8[15] = (a.u8[15] & 0x40) ? 0 : ((a.u8[15] & 0x10) ? b.u8[(a.u8[15] & 15)] : c.u8[(a.u8[15] & 15)]);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_vshf_b __builtin_msa_vshf_b

v2i64 __msa_vshf_d (v2i64 a, v2i64 b, v2i64 c)

Synopsis

v2i64 __msa_vshf_d (v2i64 a, v2i64 b, v2i64 c)
#include <msa.h>
Instruction: vshf.d
Builtin: __builtin_msa_vshf_d
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:401

Description

Use each control u64 lane from a to choose a u64 lane from b or c; control values with the zero bit set produce zero. This is a byte-style table lookup within each 128-bit half.

Operation

dst.u64[0] = (a.u64[0] & 0x8) ? 0 : ((a.u64[0] & 0x2) ? b.u64[(a.u64[0] & 1)] : c.u64[(a.u64[0] & 1)]);
dst.u64[1] = (a.u64[1] & 0x8) ? 0 : ((a.u64[1] & 0x2) ? b.u64[(a.u64[1] & 1)] : c.u64[(a.u64[1] & 1)]);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_vshf_d __builtin_msa_vshf_d

v8i16 __msa_vshf_h (v8i16 a, v8i16 b, v8i16 c)

Synopsis

v8i16 __msa_vshf_h (v8i16 a, v8i16 b, v8i16 c)
#include <msa.h>
Instruction: vshf.h
Builtin: __builtin_msa_vshf_h
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:399

Description

Use each control u16 lane from a to choose a u16 lane from b or c; control values with the zero bit set produce zero. This is a byte-style table lookup within each 128-bit half.

Operation

dst.u16[0] = (a.u16[0] & 0x20) ? 0 : ((a.u16[0] & 0x8) ? b.u16[(a.u16[0] & 7)] : c.u16[(a.u16[0] & 7)]);
dst.u16[1] = (a.u16[1] & 0x20) ? 0 : ((a.u16[1] & 0x8) ? b.u16[(a.u16[1] & 7)] : c.u16[(a.u16[1] & 7)]);
dst.u16[2] = (a.u16[2] & 0x20) ? 0 : ((a.u16[2] & 0x8) ? b.u16[(a.u16[2] & 7)] : c.u16[(a.u16[2] & 7)]);
dst.u16[3] = (a.u16[3] & 0x20) ? 0 : ((a.u16[3] & 0x8) ? b.u16[(a.u16[3] & 7)] : c.u16[(a.u16[3] & 7)]);
dst.u16[4] = (a.u16[4] & 0x20) ? 0 : ((a.u16[4] & 0x8) ? b.u16[(a.u16[4] & 7)] : c.u16[(a.u16[4] & 7)]);
dst.u16[5] = (a.u16[5] & 0x20) ? 0 : ((a.u16[5] & 0x8) ? b.u16[(a.u16[5] & 7)] : c.u16[(a.u16[5] & 7)]);
dst.u16[6] = (a.u16[6] & 0x20) ? 0 : ((a.u16[6] & 0x8) ? b.u16[(a.u16[6] & 7)] : c.u16[(a.u16[6] & 7)]);
dst.u16[7] = (a.u16[7] & 0x20) ? 0 : ((a.u16[7] & 0x8) ? b.u16[(a.u16[7] & 7)] : c.u16[(a.u16[7] & 7)]);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_vshf_h __builtin_msa_vshf_h

v4i32 __msa_vshf_w (v4i32 a, v4i32 b, v4i32 c)

Synopsis

v4i32 __msa_vshf_w (v4i32 a, v4i32 b, v4i32 c)
#include <msa.h>
Instruction: vshf.w
Builtin: __builtin_msa_vshf_w
CPU Flags: __mips_msa
Kind: alias
Source: include/msa.h:400

Description

Use each control u32 lane from a to choose a u32 lane from b or c; control values with the zero bit set produce zero. This is a byte-style table lookup within each 128-bit half.

Operation

dst.u32[0] = (a.u32[0] & 0x10) ? 0 : ((a.u32[0] & 0x4) ? b.u32[(a.u32[0] & 3)] : c.u32[(a.u32[0] & 3)]);
dst.u32[1] = (a.u32[1] & 0x10) ? 0 : ((a.u32[1] & 0x4) ? b.u32[(a.u32[1] & 3)] : c.u32[(a.u32[1] & 3)]);
dst.u32[2] = (a.u32[2] & 0x10) ? 0 : ((a.u32[2] & 0x4) ? b.u32[(a.u32[2] & 3)] : c.u32[(a.u32[2] & 3)]);
dst.u32[3] = (a.u32[3] & 0x10) ? 0 : ((a.u32[3] & 0x4) ? b.u32[(a.u32[3] & 3)] : c.u32[(a.u32[3] & 3)]);

Latency and Throughput

CPU µarch Latency Throughput (IPC)
3A4000 GS464V 1 2

Header Mapping

#define __msa_vshf_w __builtin_msa_vshf_w