1488 lines
37 KiB
ArmAsm
1488 lines
37 KiB
ArmAsm
|
// LzmaDecOpt.S -- ARM64-ASM version of LzmaDec_DecodeReal_3() function
|
||
|
// 2021-04-25 : Igor Pavlov : Public domain
|
||
|
|
||
|
/*
|
||
|
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
|
||
|
; function for check at link time.
|
||
|
; That code is tightly coupled with LzmaDec_TryDummy()
|
||
|
; and with another functions in LzmaDec.c file.
|
||
|
; CLzmaDec structure, (probs) array layout, input and output of
|
||
|
; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
|
||
|
*/
|
||
|
|
||
|
|
||
|
#include "7zAsm.S"
|
||
|
|
||
|
// .arch armv8-a
|
||
|
// .file "LzmaDecOpt.c"
|
||
|
.text
|
||
|
.align 2
|
||
|
.p2align 4,,15
|
||
|
#ifdef __APPLE__
|
||
|
.globl _LzmaDec_DecodeReal_3
|
||
|
#else
|
||
|
.global LzmaDec_DecodeReal_3
|
||
|
#endif
|
||
|
// .type LzmaDec_DecodeReal_3, %function
|
||
|
|
||
|
// #define _LZMA_SIZE_OPT 1
|
||
|
|
||
|
#define LZMA_USE_4BYTES_FILL 1
|
||
|
// #define LZMA_USE_2BYTES_COPY 1
|
||
|
// #define LZMA_USE_CMOV_LZ_WRAP 1
|
||
|
// #define _LZMA_PROB32 1
|
||
|
|
||
|
#define MY_ALIGN_FOR_ENTRY MY_ALIGN_32
|
||
|
#define MY_ALIGN_FOR_LOOP MY_ALIGN_32
|
||
|
#define MY_ALIGN_FOR_LOOP_16 MY_ALIGN_16
|
||
|
|
||
|
#ifdef _LZMA_PROB32
|
||
|
.equ PSHIFT , 2
|
||
|
.macro PLOAD dest:req, mem:req
|
||
|
ldr \dest, [\mem]
|
||
|
.endm
|
||
|
.macro PLOAD_PREINDEXED dest:req, mem:req, offset:req
|
||
|
ldr \dest, [\mem, \offset]!
|
||
|
.endm
|
||
|
.macro PLOAD_2 dest:req, mem1:req, mem2:req
|
||
|
ldr \dest, [\mem1, \mem2]
|
||
|
.endm
|
||
|
.macro PLOAD_LSL dest:req, mem1:req, mem2:req
|
||
|
ldr \dest, [\mem1, \mem2, lsl #PSHIFT]
|
||
|
.endm
|
||
|
.macro PSTORE src:req, mem:req
|
||
|
str \src, [\mem]
|
||
|
.endm
|
||
|
.macro PSTORE_2 src:req, mem1:req, mem2:req
|
||
|
str \src, [\mem1, \mem2]
|
||
|
.endm
|
||
|
.macro PSTORE_LSL src:req, mem1:req, mem2:req
|
||
|
str \src, [\mem1, \mem2, lsl #PSHIFT]
|
||
|
.endm
|
||
|
.macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req
|
||
|
// you must check that temp_reg is free register when macro is used
|
||
|
add \temp_reg, \mem1, \mem2
|
||
|
str \src, [\temp_reg, \mem2]
|
||
|
.endm
|
||
|
#else
|
||
|
// .equ PSHIFT , 1
|
||
|
#define PSHIFT 1
|
||
|
.macro PLOAD dest:req, mem:req
|
||
|
ldrh \dest, [\mem]
|
||
|
.endm
|
||
|
.macro PLOAD_PREINDEXED dest:req, mem:req, offset:req
|
||
|
ldrh \dest, [\mem, \offset]!
|
||
|
.endm
|
||
|
.macro PLOAD_2 dest:req, mem1:req, mem2:req
|
||
|
ldrh \dest, [\mem1, \mem2]
|
||
|
.endm
|
||
|
.macro PLOAD_LSL dest:req, mem1:req, mem2:req
|
||
|
ldrh \dest, [\mem1, \mem2, lsl #PSHIFT]
|
||
|
.endm
|
||
|
.macro PSTORE src:req, mem:req
|
||
|
strh \src, [\mem]
|
||
|
.endm
|
||
|
.macro PSTORE_2 src:req, mem1:req, mem2:req
|
||
|
strh \src, [\mem1, \mem2]
|
||
|
.endm
|
||
|
.macro PSTORE_LSL src:req, mem1:req, mem2:req
|
||
|
strh \src, [\mem1, \mem2, lsl #PSHIFT]
|
||
|
.endm
|
||
|
.macro PSTORE_LSL_M1 src:req, mem1:req, mem2:req, temp_reg:req
|
||
|
strh \src, [\mem1, \mem2]
|
||
|
.endm
|
||
|
#endif
|
||
|
|
||
|
.equ PMULT , (1 << PSHIFT)
|
||
|
.equ PMULT_2 , (2 << PSHIFT)
|
||
|
|
||
|
.equ kMatchSpecLen_Error_Data , (1 << 9)
|
||
|
|
||
|
# x7 t0 : NORM_CALC : prob2 (IF_BIT_1)
|
||
|
# x6 t1 : NORM_CALC : probs_state
|
||
|
# x8 t2 : (LITM) temp : (TREE) temp
|
||
|
# x4 t3 : (LITM) bit : (TREE) temp : UPDATE_0/UPDATE_0 temp
|
||
|
# x10 t4 : (LITM) offs : (TREE) probs_PMULT : numBits
|
||
|
# x9 t5 : (LITM) match : sym2 (ShortDist)
|
||
|
# x1 t6 : (LITM) litm_prob : (TREE) prob_reg : pbPos
|
||
|
# x2 t7 : (LITM) prm : probBranch : cnt
|
||
|
# x3 sym : dist
|
||
|
# x12 len
|
||
|
# x0 range
|
||
|
# x5 cod
|
||
|
|
||
|
|
||
|
#define range w0
|
||
|
|
||
|
// t6
|
||
|
#define pbPos w1
|
||
|
#define pbPos_R r1
|
||
|
#define prob_reg w1
|
||
|
#define litm_prob prob_reg
|
||
|
|
||
|
// t7
|
||
|
#define probBranch w2
|
||
|
#define cnt w2
|
||
|
#define cnt_R r2
|
||
|
#define prm r2
|
||
|
|
||
|
#define sym w3
|
||
|
#define sym_R r3
|
||
|
#define dist sym
|
||
|
|
||
|
#define t3 w4
|
||
|
#define bit w4
|
||
|
#define bit_R r4
|
||
|
#define update_temp_reg r4
|
||
|
|
||
|
#define cod w5
|
||
|
|
||
|
#define t1 w6
|
||
|
#define t1_R r6
|
||
|
#define probs_state t1_R
|
||
|
|
||
|
#define t0 w7
|
||
|
#define t0_R r7
|
||
|
#define prob2 t0
|
||
|
|
||
|
#define t2 w8
|
||
|
#define t2_R r8
|
||
|
|
||
|
// t5
|
||
|
#define match w9
|
||
|
#define sym2 w9
|
||
|
#define sym2_R r9
|
||
|
|
||
|
#define t4 w10
|
||
|
#define t4_R r10
|
||
|
|
||
|
#define offs w10
|
||
|
#define offs_R r10
|
||
|
|
||
|
#define probs r11
|
||
|
|
||
|
#define len w12
|
||
|
#define len_R x12
|
||
|
|
||
|
#define state w13
|
||
|
#define state_R r13
|
||
|
|
||
|
#define dicPos r14
|
||
|
#define buf r15
|
||
|
#define bufLimit r16
|
||
|
#define dicBufSize r17
|
||
|
|
||
|
#define limit r19
|
||
|
#define rep0 w20
|
||
|
#define rep0_R r20
|
||
|
#define rep1 w21
|
||
|
#define rep2 w22
|
||
|
#define rep3 w23
|
||
|
#define dic r24
|
||
|
#define probs_IsMatch r25
|
||
|
#define probs_Spec r26
|
||
|
#define checkDicSize w27
|
||
|
#define processedPos w28
|
||
|
#define pbMask w29
|
||
|
#define lc2_lpMask w30
|
||
|
|
||
|
|
||
|
.equ kNumBitModelTotalBits , 11
|
||
|
.equ kBitModelTotal , (1 << kNumBitModelTotalBits)
|
||
|
.equ kNumMoveBits , 5
|
||
|
.equ kBitModelOffset , (kBitModelTotal - (1 << kNumMoveBits) + 1)
|
||
|
|
||
|
.macro NORM_2 macro
|
||
|
ldrb t0, [buf], 1
|
||
|
shl range, 8
|
||
|
orr cod, t0, cod, lsl 8
|
||
|
/*
|
||
|
mov t0, cod
|
||
|
ldrb cod, [buf], 1
|
||
|
shl range, 8
|
||
|
bfi cod, t0, #8, #24
|
||
|
*/
|
||
|
.endm
|
||
|
|
||
|
.macro TEST_HIGH_BYTE_range macro
|
||
|
tst range, 0xFF000000
|
||
|
.endm
|
||
|
|
||
|
.macro NORM macro
|
||
|
TEST_HIGH_BYTE_range
|
||
|
jnz 1f
|
||
|
NORM_2
|
||
|
1:
|
||
|
.endm
|
||
|
|
||
|
|
||
|
# ---------- Branch MACROS ----------
|
||
|
|
||
|
.macro UPDATE_0__0
|
||
|
sub prob2, probBranch, kBitModelOffset
|
||
|
.endm
|
||
|
|
||
|
.macro UPDATE_0__1
|
||
|
sub probBranch, probBranch, prob2, asr #(kNumMoveBits)
|
||
|
.endm
|
||
|
|
||
|
.macro UPDATE_0__2 probsArray:req, probOffset:req, probDisp:req
|
||
|
.if \probDisp == 0
|
||
|
PSTORE_2 probBranch, \probsArray, \probOffset
|
||
|
.elseif \probOffset == 0
|
||
|
PSTORE_2 probBranch, \probsArray, \probDisp * PMULT
|
||
|
.else
|
||
|
.error "unsupported"
|
||
|
// add update_temp_reg, \probsArray, \probOffset
|
||
|
PSTORE_2 probBranch, update_temp_reg, \probDisp * PMULT
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro UPDATE_0 probsArray:req, probOffset:req, probDisp:req
|
||
|
UPDATE_0__0
|
||
|
UPDATE_0__1
|
||
|
UPDATE_0__2 \probsArray, \probOffset, \probDisp
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro UPDATE_1 probsArray:req, probOffset:req, probDisp:req
|
||
|
// sub cod, cod, prob2
|
||
|
// sub range, range, prob2
|
||
|
p2_sub cod, range
|
||
|
sub range, prob2, range
|
||
|
sub prob2, probBranch, probBranch, lsr #(kNumMoveBits)
|
||
|
.if \probDisp == 0
|
||
|
PSTORE_2 prob2, \probsArray, \probOffset
|
||
|
.elseif \probOffset == 0
|
||
|
PSTORE_2 prob2, \probsArray, \probDisp * PMULT
|
||
|
.else
|
||
|
.error "unsupported"
|
||
|
// add update_temp_reg, \probsArray, \probOffset
|
||
|
PSTORE_2 prob2, update_temp_reg, \probDisp * PMULT
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro CMP_COD_BASE
|
||
|
NORM
|
||
|
// lsr prob2, range, kNumBitModelTotalBits
|
||
|
// imul prob2, probBranch
|
||
|
// cmp cod, prob2
|
||
|
mov prob2, range
|
||
|
shr range, kNumBitModelTotalBits
|
||
|
imul range, probBranch
|
||
|
cmp cod, range
|
||
|
.endm
|
||
|
|
||
|
.macro CMP_COD_1 probsArray:req
|
||
|
PLOAD probBranch, \probsArray
|
||
|
CMP_COD_BASE
|
||
|
.endm
|
||
|
|
||
|
.macro CMP_COD_3 probsArray:req, probOffset:req, probDisp:req
|
||
|
.if \probDisp == 0
|
||
|
PLOAD_2 probBranch, \probsArray, \probOffset
|
||
|
.elseif \probOffset == 0
|
||
|
PLOAD_2 probBranch, \probsArray, \probDisp * PMULT
|
||
|
.else
|
||
|
.error "unsupported"
|
||
|
add update_temp_reg, \probsArray, \probOffset
|
||
|
PLOAD_2 probBranch, update_temp_reg, \probDisp * PMULT
|
||
|
.endif
|
||
|
CMP_COD_BASE
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro IF_BIT_1_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
||
|
CMP_COD_3 \probsArray, \probOffset, \probDisp
|
||
|
jae \toLabel
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro IF_BIT_1 probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
||
|
IF_BIT_1_NOUP \probsArray, \probOffset, \probDisp, \toLabel
|
||
|
UPDATE_0 \probsArray, \probOffset, \probDisp
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro IF_BIT_0_NOUP probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
||
|
CMP_COD_3 \probsArray, \probOffset, \probDisp
|
||
|
jb \toLabel
|
||
|
.endm
|
||
|
|
||
|
.macro IF_BIT_0_NOUP_1 probsArray:req, toLabel:req
|
||
|
CMP_COD_1 \probsArray
|
||
|
jb \toLabel
|
||
|
.endm
|
||
|
|
||
|
|
||
|
# ---------- CMOV MACROS ----------
|
||
|
|
||
|
.macro NORM_LSR
|
||
|
NORM
|
||
|
lsr t0, range, #kNumBitModelTotalBits
|
||
|
.endm
|
||
|
|
||
|
.macro COD_RANGE_SUB
|
||
|
subs t1, cod, t0
|
||
|
p2_sub range, t0
|
||
|
.endm
|
||
|
|
||
|
.macro RANGE_IMUL prob:req
|
||
|
imul t0, \prob
|
||
|
.endm
|
||
|
|
||
|
.macro NORM_CALC prob:req
|
||
|
NORM_LSR
|
||
|
RANGE_IMUL \prob
|
||
|
COD_RANGE_SUB
|
||
|
.endm
|
||
|
|
||
|
.macro CMOV_range
|
||
|
cmovb range, t0
|
||
|
.endm
|
||
|
|
||
|
.macro CMOV_code
|
||
|
cmovae cod, t1
|
||
|
.endm
|
||
|
|
||
|
.macro CMOV_code_Model_Pre prob:req
|
||
|
sub t0, \prob, kBitModelOffset
|
||
|
CMOV_code
|
||
|
cmovae t0, \prob
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro PUP_BASE_2 prob:req, dest_reg:req
|
||
|
# only sar works for both 16/32 bit prob modes
|
||
|
sub \dest_reg, \prob, \dest_reg, asr #(kNumMoveBits)
|
||
|
.endm
|
||
|
|
||
|
.macro PUP prob:req, probPtr:req, mem2:req
|
||
|
PUP_BASE_2 \prob, t0
|
||
|
PSTORE_2 t0, \probPtr, \mem2
|
||
|
.endm
|
||
|
|
||
|
|
||
|
|
||
|
#define probs_PMULT t4_R
|
||
|
|
||
|
.macro BIT_01
|
||
|
add probs_PMULT, probs, PMULT
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro BIT_0_R prob:req
|
||
|
PLOAD_2 \prob, probs, 1 * PMULT
|
||
|
NORM_LSR
|
||
|
sub t3, \prob, kBitModelOffset
|
||
|
RANGE_IMUL \prob
|
||
|
PLOAD_2 t2, probs, 1 * PMULT_2
|
||
|
COD_RANGE_SUB
|
||
|
CMOV_range
|
||
|
cmovae t3, \prob
|
||
|
PLOAD_2 t0, probs, 1 * PMULT_2 + PMULT
|
||
|
PUP_BASE_2 \prob, t3
|
||
|
csel \prob, t2, t0, lo
|
||
|
CMOV_code
|
||
|
mov sym, 2
|
||
|
PSTORE_2 t3, probs, 1 * PMULT
|
||
|
adc sym, sym, wzr
|
||
|
BIT_01
|
||
|
.endm
|
||
|
|
||
|
.macro BIT_1_R prob:req
|
||
|
NORM_LSR
|
||
|
p2_add sym, sym
|
||
|
sub t3, \prob, kBitModelOffset
|
||
|
RANGE_IMUL \prob
|
||
|
PLOAD_LSL t2, probs, sym_R
|
||
|
COD_RANGE_SUB
|
||
|
CMOV_range
|
||
|
cmovae t3, \prob
|
||
|
PLOAD_LSL t0, probs_PMULT, sym_R
|
||
|
PUP_BASE_2 \prob, t3
|
||
|
csel \prob, t2, t0, lo
|
||
|
CMOV_code
|
||
|
PSTORE_LSL_M1 t3, probs, sym_R, t2_R
|
||
|
adc sym, sym, wzr
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro BIT_2_R prob:req
|
||
|
NORM_LSR
|
||
|
p2_add sym, sym
|
||
|
sub t3, \prob, kBitModelOffset
|
||
|
RANGE_IMUL \prob
|
||
|
COD_RANGE_SUB
|
||
|
CMOV_range
|
||
|
cmovae t3, \prob
|
||
|
CMOV_code
|
||
|
PUP_BASE_2 \prob, t3
|
||
|
PSTORE_LSL_M1 t3, probs, sym_R, t2_R
|
||
|
adc sym, sym, wzr
|
||
|
.endm
|
||
|
|
||
|
|
||
|
# ---------- MATCHED LITERAL ----------
|
||
|
|
||
|
.macro LITM_0 macro
|
||
|
shl match, (PSHIFT + 1)
|
||
|
and bit, match, 256 * PMULT
|
||
|
add prm, probs, 256 * PMULT + 1 * PMULT
|
||
|
p2_add match, match
|
||
|
p2_add prm, bit_R
|
||
|
eor offs, bit, 256 * PMULT
|
||
|
PLOAD litm_prob, prm
|
||
|
|
||
|
NORM_LSR
|
||
|
sub t2, litm_prob, kBitModelOffset
|
||
|
RANGE_IMUL litm_prob
|
||
|
COD_RANGE_SUB
|
||
|
cmovae offs, bit
|
||
|
CMOV_range
|
||
|
and bit, match, offs
|
||
|
cmovae t2, litm_prob
|
||
|
CMOV_code
|
||
|
mov sym, 2
|
||
|
PUP_BASE_2 litm_prob, t2
|
||
|
PSTORE t2, prm
|
||
|
add prm, probs, offs_R
|
||
|
adc sym, sym, wzr
|
||
|
.endm
|
||
|
|
||
|
.macro LITM macro
|
||
|
p2_add prm, bit_R
|
||
|
xor offs, bit
|
||
|
PLOAD_LSL litm_prob, prm, sym_R
|
||
|
|
||
|
NORM_LSR
|
||
|
p2_add match, match
|
||
|
sub t2, litm_prob, kBitModelOffset
|
||
|
RANGE_IMUL litm_prob
|
||
|
COD_RANGE_SUB
|
||
|
cmovae offs, bit
|
||
|
CMOV_range
|
||
|
and bit, match, offs
|
||
|
cmovae t2, litm_prob
|
||
|
CMOV_code
|
||
|
PUP_BASE_2 litm_prob, t2
|
||
|
PSTORE_LSL t2, prm, sym_R
|
||
|
add prm, probs, offs_R
|
||
|
adc sym, sym, sym
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro LITM_2 macro
|
||
|
p2_add prm, bit_R
|
||
|
PLOAD_LSL litm_prob, prm, sym_R
|
||
|
|
||
|
NORM_LSR
|
||
|
sub t2, litm_prob, kBitModelOffset
|
||
|
RANGE_IMUL litm_prob
|
||
|
COD_RANGE_SUB
|
||
|
CMOV_range
|
||
|
cmovae t2, litm_prob
|
||
|
CMOV_code
|
||
|
PUP_BASE_2 litm_prob, t2
|
||
|
PSTORE_LSL t2, prm, sym_R
|
||
|
adc sym, sym, sym
|
||
|
.endm
|
||
|
|
||
|
|
||
|
# ---------- REVERSE BITS ----------
|
||
|
|
||
|
.macro REV_0 prob:req
|
||
|
NORM_CALC \prob
|
||
|
CMOV_range
|
||
|
PLOAD t2, sym2_R
|
||
|
PLOAD_2 t3, probs, 3 * PMULT
|
||
|
CMOV_code_Model_Pre \prob
|
||
|
add t1_R, probs, 3 * PMULT
|
||
|
cmovae sym2_R, t1_R
|
||
|
PUP \prob, probs, 1 * PMULT
|
||
|
csel \prob, t2, t3, lo
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro REV_1 prob:req, step:req
|
||
|
NORM_LSR
|
||
|
PLOAD_PREINDEXED t2, sym2_R, (\step * PMULT)
|
||
|
RANGE_IMUL \prob
|
||
|
COD_RANGE_SUB
|
||
|
CMOV_range
|
||
|
PLOAD_2 t3, sym2_R, (\step * PMULT)
|
||
|
sub t0, \prob, kBitModelOffset
|
||
|
CMOV_code
|
||
|
add t1_R, sym2_R, \step * PMULT
|
||
|
cmovae t0, \prob
|
||
|
cmovae sym2_R, t1_R
|
||
|
PUP_BASE_2 \prob, t0
|
||
|
csel \prob, t2, t3, lo
|
||
|
PSTORE_2 t0, t1_R, 0 - \step * PMULT_2
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro REV_2 prob:req, step:req
|
||
|
sub t1_R, sym2_R, probs
|
||
|
NORM_LSR
|
||
|
orr sym, sym, t1, lsr #PSHIFT
|
||
|
RANGE_IMUL \prob
|
||
|
COD_RANGE_SUB
|
||
|
sub t2, sym, \step
|
||
|
CMOV_range
|
||
|
cmovb sym, t2
|
||
|
CMOV_code_Model_Pre \prob
|
||
|
PUP \prob, sym2_R, 0
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro REV_1_VAR prob:req
|
||
|
PLOAD \prob, sym_R
|
||
|
mov probs, sym_R
|
||
|
p2_add sym_R, sym2_R
|
||
|
NORM_LSR
|
||
|
add t2_R, sym_R, sym2_R
|
||
|
RANGE_IMUL \prob
|
||
|
COD_RANGE_SUB
|
||
|
cmovae sym_R, t2_R
|
||
|
CMOV_range
|
||
|
CMOV_code_Model_Pre \prob
|
||
|
p2_add sym2, sym2
|
||
|
PUP \prob, probs, 0
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro add_big dest:req, src:req, param:req
|
||
|
.if (\param) < (1 << 12)
|
||
|
add \dest, \src, \param
|
||
|
.else
|
||
|
#ifndef _LZMA_PROB32
|
||
|
.error "unexpcted add_big expansion"
|
||
|
#endif
|
||
|
add \dest, \src, (\param) / 2
|
||
|
add \dest, \dest, (\param) - (\param) / 2
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
.macro sub_big dest:req, src:req, param:req
|
||
|
.if (\param) < (1 << 12)
|
||
|
sub \dest, \src, \param
|
||
|
.else
|
||
|
#ifndef _LZMA_PROB32
|
||
|
.error "unexpcted sub_big expansion"
|
||
|
#endif
|
||
|
sub \dest, \src, (\param) / 2
|
||
|
sub \dest, \dest, (\param) - (\param) / 2
|
||
|
.endif
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro SET_probs offset:req
|
||
|
// add_big probs, probs_Spec, (\offset) * PMULT
|
||
|
add probs, probs_IsMatch, ((\offset) - IsMatch) * PMULT
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro LIT_PROBS
|
||
|
add sym, sym, processedPos, lsl 8
|
||
|
inc processedPos
|
||
|
UPDATE_0__0
|
||
|
shl sym, lc2_lpMask
|
||
|
SET_probs Literal
|
||
|
p2_and sym, lc2_lpMask
|
||
|
// p2_add probs_state, pbPos_R
|
||
|
p2_add probs, sym_R
|
||
|
UPDATE_0__1
|
||
|
add probs, probs, sym_R, lsl 1
|
||
|
UPDATE_0__2 probs_state, pbPos_R, 0
|
||
|
.endm
|
||
|
|
||
|
|
||
|
|
||
|
.equ kNumPosBitsMax , 4
|
||
|
.equ kNumPosStatesMax , (1 << kNumPosBitsMax)
|
||
|
|
||
|
.equ kLenNumLowBits , 3
|
||
|
.equ kLenNumLowSymbols , (1 << kLenNumLowBits)
|
||
|
.equ kLenNumHighBits , 8
|
||
|
.equ kLenNumHighSymbols , (1 << kLenNumHighBits)
|
||
|
.equ kNumLenProbs , (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
|
||
|
|
||
|
.equ LenLow , 0
|
||
|
.equ LenChoice , LenLow
|
||
|
.equ LenChoice2 , (LenLow + kLenNumLowSymbols)
|
||
|
.equ LenHigh , (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
|
||
|
|
||
|
.equ kNumStates , 12
|
||
|
.equ kNumStates2 , 16
|
||
|
.equ kNumLitStates , 7
|
||
|
|
||
|
.equ kStartPosModelIndex , 4
|
||
|
.equ kEndPosModelIndex , 14
|
||
|
.equ kNumFullDistances , (1 << (kEndPosModelIndex >> 1))
|
||
|
|
||
|
.equ kNumPosSlotBits , 6
|
||
|
.equ kNumLenToPosStates , 4
|
||
|
|
||
|
.equ kNumAlignBits , 4
|
||
|
.equ kAlignTableSize , (1 << kNumAlignBits)
|
||
|
|
||
|
.equ kMatchMinLen , 2
|
||
|
.equ kMatchSpecLenStart , (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
|
||
|
|
||
|
// .equ kStartOffset , 1408
|
||
|
.equ kStartOffset , 0
|
||
|
.equ SpecPos , (-kStartOffset)
|
||
|
.equ IsRep0Long , (SpecPos + kNumFullDistances)
|
||
|
.equ RepLenCoder , (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
|
||
|
.equ LenCoder , (RepLenCoder + kNumLenProbs)
|
||
|
.equ IsMatch , (LenCoder + kNumLenProbs)
|
||
|
.equ kAlign , (IsMatch + (kNumStates2 << kNumPosBitsMax))
|
||
|
.equ IsRep , (kAlign + kAlignTableSize)
|
||
|
.equ IsRepG0 , (IsRep + kNumStates)
|
||
|
.equ IsRepG1 , (IsRepG0 + kNumStates)
|
||
|
.equ IsRepG2 , (IsRepG1 + kNumStates)
|
||
|
.equ PosSlot , (IsRepG2 + kNumStates)
|
||
|
.equ Literal , (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
|
||
|
.equ NUM_BASE_PROBS , (Literal + kStartOffset)
|
||
|
|
||
|
.if kStartOffset != 0 // && IsMatch != 0
|
||
|
.error "Stop_Compiling_Bad_StartOffset"
|
||
|
.endif
|
||
|
|
||
|
.if NUM_BASE_PROBS != 1984
|
||
|
.error "Stop_Compiling_Bad_LZMA_PROBS"
|
||
|
.endif
|
||
|
|
||
|
.equ offset_lc , 0
|
||
|
.equ offset_lp , 1
|
||
|
.equ offset_pb , 2
|
||
|
.equ offset_dicSize , 4
|
||
|
.equ offset_probs , 4 + offset_dicSize
|
||
|
.equ offset_probs_1664 , 8 + offset_probs
|
||
|
.equ offset_dic , 8 + offset_probs_1664
|
||
|
.equ offset_dicBufSize , 8 + offset_dic
|
||
|
.equ offset_dicPos , 8 + offset_dicBufSize
|
||
|
.equ offset_buf , 8 + offset_dicPos
|
||
|
.equ offset_range , 8 + offset_buf
|
||
|
.equ offset_code , 4 + offset_range
|
||
|
.equ offset_processedPos , 4 + offset_code
|
||
|
.equ offset_checkDicSize , 4 + offset_processedPos
|
||
|
.equ offset_rep0 , 4 + offset_checkDicSize
|
||
|
.equ offset_rep1 , 4 + offset_rep0
|
||
|
.equ offset_rep2 , 4 + offset_rep1
|
||
|
.equ offset_rep3 , 4 + offset_rep2
|
||
|
.equ offset_state , 4 + offset_rep3
|
||
|
.equ offset_remainLen , 4 + offset_state
|
||
|
.equ offset_TOTAL_SIZE , 4 + offset_remainLen
|
||
|
|
||
|
.if offset_TOTAL_SIZE != 96
|
||
|
.error "Incorrect offset_TOTAL_SIZE"
|
||
|
.endif
|
||
|
|
||
|
|
||
|
.macro IsMatchBranch_Pre
|
||
|
# prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
|
||
|
and pbPos, pbMask, processedPos, lsl #(kLenNumLowBits + 1 + PSHIFT)
|
||
|
add probs_state, probs_IsMatch, state_R
|
||
|
.endm
|
||
|
|
||
|
|
||
|
/*
|
||
|
.macro IsMatchBranch
|
||
|
IsMatchBranch_Pre
|
||
|
IF_BIT_1 probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
|
||
|
.endm
|
||
|
*/
|
||
|
|
||
|
.macro CheckLimits
|
||
|
cmp buf, bufLimit
|
||
|
jae fin_OK
|
||
|
cmp dicPos, limit
|
||
|
jae fin_OK
|
||
|
.endm
|
||
|
|
||
|
#define CheckLimits_lit CheckLimits
|
||
|
/*
|
||
|
.macro CheckLimits_lit
|
||
|
cmp buf, bufLimit
|
||
|
jae fin_OK_lit
|
||
|
cmp dicPos, limit
|
||
|
jae fin_OK_lit
|
||
|
.endm
|
||
|
*/
|
||
|
|
||
|
|
||
|
#define PARAM_lzma REG_ABI_PARAM_0
|
||
|
#define PARAM_limit REG_ABI_PARAM_1
|
||
|
#define PARAM_bufLimit REG_ABI_PARAM_2
|
||
|
|
||
|
|
||
|
.macro LOAD_LZMA_VAR reg:req, struct_offs:req
|
||
|
ldr \reg, [PARAM_lzma, \struct_offs]
|
||
|
.endm
|
||
|
|
||
|
.macro LOAD_LZMA_BYTE reg:req, struct_offs:req
|
||
|
ldrb \reg, [PARAM_lzma, \struct_offs]
|
||
|
.endm
|
||
|
|
||
|
.macro LOAD_LZMA_PAIR reg0:req, reg1:req, struct_offs:req
|
||
|
ldp \reg0, \reg1, [PARAM_lzma, \struct_offs]
|
||
|
.endm
|
||
|
|
||
|
|
||
|
LzmaDec_DecodeReal_3:
|
||
|
_LzmaDec_DecodeReal_3:
|
||
|
/*
|
||
|
.LFB0:
|
||
|
.cfi_startproc
|
||
|
*/
|
||
|
|
||
|
stp x19, x20, [sp, -128]!
|
||
|
stp x21, x22, [sp, 16]
|
||
|
stp x23, x24, [sp, 32]
|
||
|
stp x25, x26, [sp, 48]
|
||
|
stp x27, x28, [sp, 64]
|
||
|
stp x29, x30, [sp, 80]
|
||
|
|
||
|
str PARAM_lzma, [sp, 120]
|
||
|
|
||
|
mov bufLimit, PARAM_bufLimit
|
||
|
mov limit, PARAM_limit
|
||
|
|
||
|
LOAD_LZMA_PAIR dic, dicBufSize, offset_dic
|
||
|
LOAD_LZMA_PAIR dicPos, buf, offset_dicPos
|
||
|
LOAD_LZMA_PAIR rep0, rep1, offset_rep0
|
||
|
LOAD_LZMA_PAIR rep2, rep3, offset_rep2
|
||
|
|
||
|
mov t0, 1 << (kLenNumLowBits + 1 + PSHIFT)
|
||
|
LOAD_LZMA_BYTE pbMask, offset_pb
|
||
|
p2_add limit, dic
|
||
|
mov len, wzr // we can set it in all requiread branches instead
|
||
|
lsl pbMask, t0, pbMask
|
||
|
p2_add dicPos, dic
|
||
|
p2_sub pbMask, t0
|
||
|
|
||
|
LOAD_LZMA_BYTE lc2_lpMask, offset_lc
|
||
|
mov t0, 256 << PSHIFT
|
||
|
LOAD_LZMA_BYTE t1, offset_lp
|
||
|
p2_add t1, lc2_lpMask
|
||
|
p2_sub lc2_lpMask, (256 << PSHIFT) - PSHIFT
|
||
|
shl t0, t1
|
||
|
p2_add lc2_lpMask, t0
|
||
|
|
||
|
LOAD_LZMA_VAR probs_Spec, offset_probs
|
||
|
LOAD_LZMA_VAR checkDicSize, offset_checkDicSize
|
||
|
LOAD_LZMA_VAR processedPos, offset_processedPos
|
||
|
LOAD_LZMA_VAR state, offset_state
|
||
|
// range is r0 : this load must be last don't move
|
||
|
LOAD_LZMA_PAIR range, cod, offset_range
|
||
|
mov sym, wzr
|
||
|
shl state, PSHIFT
|
||
|
|
||
|
add_big probs_IsMatch, probs_Spec, ((IsMatch - SpecPos) << PSHIFT)
|
||
|
|
||
|
// if (processedPos != 0 || checkDicSize != 0)
|
||
|
orr t0, checkDicSize, processedPos
|
||
|
cbz t0, 1f
|
||
|
add t0_R, dicBufSize, dic
|
||
|
cmp dicPos, dic
|
||
|
cmovne t0_R, dicPos
|
||
|
ldrb sym, [t0_R, -1]
|
||
|
1:
|
||
|
IsMatchBranch_Pre
|
||
|
cmp state, 4 * PMULT
|
||
|
jb lit_end
|
||
|
cmp state, kNumLitStates * PMULT
|
||
|
jb lit_matched_end
|
||
|
jmp lz_end
|
||
|
|
||
|
|
||
|
|
||
|
#define BIT_0 BIT_0_R prob_reg
|
||
|
#define BIT_1 BIT_1_R prob_reg
|
||
|
#define BIT_2 BIT_2_R prob_reg
|
||
|
|
||
|
# ---------- LITERAL ----------
|
||
|
MY_ALIGN_64
|
||
|
lit_start:
|
||
|
mov state, wzr
|
||
|
lit_start_2:
|
||
|
LIT_PROBS
|
||
|
|
||
|
#ifdef _LZMA_SIZE_OPT
|
||
|
|
||
|
PLOAD_2 prob_reg, probs, 1 * PMULT
|
||
|
mov sym, 1
|
||
|
BIT_01
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
lit_loop:
|
||
|
BIT_1
|
||
|
tbz sym, 7, lit_loop
|
||
|
|
||
|
#else
|
||
|
|
||
|
BIT_0
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
|
||
|
#endif
|
||
|
|
||
|
BIT_2
|
||
|
IsMatchBranch_Pre
|
||
|
strb sym, [dicPos], 1
|
||
|
p2_and sym, 255
|
||
|
|
||
|
CheckLimits_lit
|
||
|
lit_end:
|
||
|
IF_BIT_0_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), lit_start
|
||
|
|
||
|
# jmp IsMatch_label
|
||
|
|
||
|
|
||
|
#define FLAG_STATE_BITS (4 + PSHIFT)
|
||
|
|
||
|
# ---------- MATCHES ----------
|
||
|
# MY_ALIGN_FOR_ENTRY
|
||
|
IsMatch_label:
|
||
|
UPDATE_1 probs_state, pbPos_R, (IsMatch - IsMatch)
|
||
|
IF_BIT_1 probs_state, 0, (IsRep - IsMatch), IsRep_label
|
||
|
|
||
|
SET_probs LenCoder
|
||
|
or state, (1 << FLAG_STATE_BITS)
|
||
|
|
||
|
# ---------- LEN DECODE ----------
|
||
|
len_decode:
|
||
|
mov len, 8 - kMatchMinLen
|
||
|
IF_BIT_0_NOUP_1 probs, len_mid_0
|
||
|
UPDATE_1 probs, 0, 0
|
||
|
p2_add probs, (1 << (kLenNumLowBits + PSHIFT))
|
||
|
mov len, 0 - kMatchMinLen
|
||
|
IF_BIT_0_NOUP_1 probs, len_mid_0
|
||
|
UPDATE_1 probs, 0, 0
|
||
|
p2_add probs, LenHigh * PMULT - (1 << (kLenNumLowBits + PSHIFT))
|
||
|
|
||
|
#if 0 == 1
|
||
|
BIT_0
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
#else
|
||
|
PLOAD_2 prob_reg, probs, 1 * PMULT
|
||
|
mov sym, 1
|
||
|
BIT_01
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
len8_loop:
|
||
|
BIT_1
|
||
|
tbz sym, 6, len8_loop
|
||
|
#endif
|
||
|
|
||
|
mov len, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - kMatchMinLen
|
||
|
jmp len_mid_2
|
||
|
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
len_mid_0:
|
||
|
UPDATE_0 probs, 0, 0
|
||
|
p2_add probs, pbPos_R
|
||
|
BIT_0
|
||
|
len_mid_2:
|
||
|
BIT_1
|
||
|
BIT_2
|
||
|
sub len, sym, len
|
||
|
tbz state, FLAG_STATE_BITS, copy_match
|
||
|
|
||
|
# ---------- DECODE DISTANCE ----------
|
||
|
// probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
|
||
|
|
||
|
mov t0, 3 + kMatchMinLen
|
||
|
cmp len, 3 + kMatchMinLen
|
||
|
cmovb t0, len
|
||
|
SET_probs PosSlot - (kMatchMinLen << (kNumPosSlotBits))
|
||
|
add probs, probs, t0_R, lsl #(kNumPosSlotBits + PSHIFT)
|
||
|
|
||
|
#ifdef _LZMA_SIZE_OPT
|
||
|
|
||
|
PLOAD_2 prob_reg, probs, 1 * PMULT
|
||
|
mov sym, 1
|
||
|
BIT_01
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
slot_loop:
|
||
|
BIT_1
|
||
|
tbz sym, 5, slot_loop
|
||
|
|
||
|
#else
|
||
|
|
||
|
BIT_0
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
BIT_1
|
||
|
|
||
|
#endif
|
||
|
|
||
|
#define numBits t4
|
||
|
mov numBits, sym
|
||
|
BIT_2
|
||
|
// we need only low bits
|
||
|
p2_and sym, 3
|
||
|
cmp numBits, 32 + kEndPosModelIndex / 2
|
||
|
jb short_dist
|
||
|
|
||
|
SET_probs kAlign
|
||
|
|
||
|
# unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
|
||
|
p2_sub numBits, (32 + 1 + kNumAlignBits)
|
||
|
# distance = (2 | (distance & 1));
|
||
|
or sym, 2
|
||
|
PLOAD_2 prob_reg, probs, 1 * PMULT
|
||
|
add sym2_R, probs, 2 * PMULT
|
||
|
|
||
|
# ---------- DIRECT DISTANCE ----------
|
||
|
|
||
|
.macro DIRECT_1
|
||
|
shr range, 1
|
||
|
subs t0, cod, range
|
||
|
p2_add sym, sym
|
||
|
// add t1, sym, 1
|
||
|
csel cod, cod, t0, mi
|
||
|
csinc sym, sym, sym, mi
|
||
|
// csel sym, t1, sym, pl
|
||
|
// adc sym, sym, sym // not 100% compatible for "corruptued-allowed" LZMA streams
|
||
|
dec_s numBits
|
||
|
je direct_end
|
||
|
.endm
|
||
|
|
||
|
#ifdef _LZMA_SIZE_OPT
|
||
|
|
||
|
jmp direct_norm
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
direct_loop:
|
||
|
DIRECT_1
|
||
|
direct_norm:
|
||
|
TEST_HIGH_BYTE_range
|
||
|
jnz direct_loop
|
||
|
NORM_2
|
||
|
jmp direct_loop
|
||
|
|
||
|
#else
|
||
|
|
||
|
.macro DIRECT_2
|
||
|
TEST_HIGH_BYTE_range
|
||
|
jz direct_unroll
|
||
|
DIRECT_1
|
||
|
.endm
|
||
|
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
DIRECT_2
|
||
|
|
||
|
direct_unroll:
|
||
|
NORM_2
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
DIRECT_1
|
||
|
jmp direct_unroll
|
||
|
|
||
|
#endif
|
||
|
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
direct_end:
|
||
|
shl sym, kNumAlignBits
|
||
|
REV_0 prob_reg
|
||
|
REV_1 prob_reg, 2
|
||
|
REV_1 prob_reg, 4
|
||
|
REV_2 prob_reg, 8
|
||
|
|
||
|
decode_dist_end:
|
||
|
|
||
|
// if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
|
||
|
|
||
|
tst checkDicSize, checkDicSize
|
||
|
csel t0, processedPos, checkDicSize, eq
|
||
|
cmp sym, t0
|
||
|
jae end_of_payload
|
||
|
// jmp end_of_payload # for debug
|
||
|
|
||
|
mov rep3, rep2
|
||
|
mov rep2, rep1
|
||
|
mov rep1, rep0
|
||
|
add rep0, sym, 1
|
||
|
|
||
|
.macro STATE_UPDATE_FOR_MATCH
|
||
|
// state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
|
||
|
// cmp state, (kNumStates + kNumLitStates) * PMULT
|
||
|
cmp state, kNumLitStates * PMULT + (1 << FLAG_STATE_BITS)
|
||
|
mov state, kNumLitStates * PMULT
|
||
|
mov t0, (kNumLitStates + 3) * PMULT
|
||
|
cmovae state, t0
|
||
|
.endm
|
||
|
STATE_UPDATE_FOR_MATCH
|
||
|
|
||
|
# ---------- COPY MATCH ----------
|
||
|
copy_match:
|
||
|
|
||
|
// if ((rem = limit - dicPos) == 0) break // return SZ_ERROR_DATA;
|
||
|
subs cnt_R, limit, dicPos
|
||
|
// jz fin_dicPos_LIMIT
|
||
|
jz fin_OK
|
||
|
|
||
|
// curLen = ((rem < len) ? (unsigned)rem : len);
|
||
|
cmp cnt_R, len_R
|
||
|
cmovae cnt, len
|
||
|
|
||
|
sub t0_R, dicPos, dic
|
||
|
p2_add dicPos, cnt_R
|
||
|
p2_add processedPos, cnt
|
||
|
p2_sub len, cnt
|
||
|
|
||
|
// pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
|
||
|
p2_sub_s t0_R, rep0_R
|
||
|
jae 1f
|
||
|
|
||
|
cmn t0_R, cnt_R
|
||
|
p2_add t0_R, dicBufSize
|
||
|
ja copy_match_cross
|
||
|
1:
|
||
|
# ---------- COPY MATCH FAST ----------
|
||
|
# t0_R : src_pos
|
||
|
p2_add t0_R, dic
|
||
|
ldrb sym, [t0_R]
|
||
|
p2_add t0_R, cnt_R
|
||
|
p1_neg cnt_R
|
||
|
|
||
|
copy_common:
|
||
|
dec dicPos
|
||
|
|
||
|
# dicPos : (ptr_to_last_dest_BYTE)
|
||
|
# t0_R : (src_lim)
|
||
|
# cnt_R : (-curLen)
|
||
|
|
||
|
IsMatchBranch_Pre
|
||
|
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
cmp rep0, 1
|
||
|
je copy_match_0
|
||
|
|
||
|
#ifdef LZMA_USE_2BYTES_COPY
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
dec dicPos
|
||
|
# dicPos : (ptr_to_last_dest_16bitWORD)
|
||
|
p2_and cnt_R, -2
|
||
|
ldrh sym, [t0_R, cnt_R]
|
||
|
adds cnt_R, cnt_R, 2
|
||
|
jz 2f
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
1:
|
||
|
/*
|
||
|
strh sym, [dicPos, cnt_R]
|
||
|
ldrh sym, [t0_R, cnt_R]
|
||
|
adds cnt_R, cnt_R, 2
|
||
|
jz 2f
|
||
|
*/
|
||
|
|
||
|
strh sym, [dicPos, cnt_R]
|
||
|
ldrh sym, [t0_R, cnt_R]
|
||
|
adds cnt_R, cnt_R, 2
|
||
|
jnz 1b
|
||
|
2:
|
||
|
|
||
|
/*
|
||
|
// for universal little/big endian code, but slow
|
||
|
strh sym, [dicPos]
|
||
|
inc dicPos
|
||
|
ldrb sym, [t0_R, -1]
|
||
|
*/
|
||
|
|
||
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||
|
// we must improve big-endian detection for another compilers
|
||
|
// for big-endian we need to revert bytes
|
||
|
rev16 sym, sym
|
||
|
#endif
|
||
|
|
||
|
// (sym) must represent as little-endian here:
|
||
|
strb sym, [dicPos], 1
|
||
|
shr sym, 8
|
||
|
|
||
|
#else
|
||
|
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
1:
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
ldrb sym, [t0_R, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
ldrb sym, [t0_R, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jnz 1b
|
||
|
#endif
|
||
|
|
||
|
copy_end:
|
||
|
lz_end_match:
|
||
|
strb sym, [dicPos], 1
|
||
|
|
||
|
# IsMatchBranch_Pre
|
||
|
CheckLimits
|
||
|
lz_end:
|
||
|
IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
|
||
|
|
||
|
|
||
|
|
||
|
# ---------- LITERAL MATCHED ----------
|
||
|
|
||
|
LIT_PROBS
|
||
|
|
||
|
// matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
||
|
|
||
|
sub t0_R, dicPos, dic
|
||
|
p2_sub_s t0_R, rep0_R
|
||
|
|
||
|
#ifdef LZMA_USE_CMOV_LZ_WRAP
|
||
|
add t1_R, t0_R, dicBufSize
|
||
|
cmovb t0_R, t1_R
|
||
|
#else
|
||
|
jae 1f
|
||
|
p2_add t0_R, dicBufSize
|
||
|
1:
|
||
|
#endif
|
||
|
|
||
|
ldrb match, [dic, t0_R]
|
||
|
|
||
|
// state -= (state < 10) ? 3 : 6;
|
||
|
sub sym, state, 6 * PMULT
|
||
|
cmp state, 10 * PMULT
|
||
|
p2_sub state, 3 * PMULT
|
||
|
cmovae state, sym
|
||
|
|
||
|
#ifdef _LZMA_SIZE_OPT
|
||
|
|
||
|
mov offs, 256 * PMULT
|
||
|
shl match, (PSHIFT + 1)
|
||
|
mov sym, 1
|
||
|
and bit, match, offs
|
||
|
add prm, probs, offs_R
|
||
|
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
litm_loop:
|
||
|
LITM
|
||
|
tbz sym, 8, litm_loop
|
||
|
|
||
|
#else
|
||
|
|
||
|
LITM_0
|
||
|
LITM
|
||
|
LITM
|
||
|
LITM
|
||
|
LITM
|
||
|
LITM
|
||
|
LITM
|
||
|
LITM_2
|
||
|
|
||
|
#endif
|
||
|
|
||
|
IsMatchBranch_Pre
|
||
|
strb sym, [dicPos], 1
|
||
|
p2_and sym, 255
|
||
|
|
||
|
// mov len, wzr // LITM uses same regisetr (len / offs). So we clear it
|
||
|
CheckLimits_lit
|
||
|
lit_matched_end:
|
||
|
IF_BIT_1_NOUP probs_state, pbPos_R, (IsMatch - IsMatch), IsMatch_label
|
||
|
# IsMatchBranch
|
||
|
p2_sub state, 3 * PMULT
|
||
|
jmp lit_start_2
|
||
|
|
||
|
|
||
|
|
||
|
# ---------- REP 0 LITERAL ----------
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
IsRep0Short_label:
|
||
|
UPDATE_0 probs_state, pbPos_R, 0
|
||
|
|
||
|
// dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
||
|
sub t0_R, dicPos, dic
|
||
|
|
||
|
// state = state < kNumLitStates ? 9 : 11;
|
||
|
or state, 1 * PMULT
|
||
|
|
||
|
# the caller doesn't allow (dicPos >= limit) case for REP_SHORT
|
||
|
# so we don't need the following (dicPos == limit) check here:
|
||
|
# cmp dicPos, limit
|
||
|
# jae fin_dicPos_LIMIT_REP_SHORT
|
||
|
# // jmp fin_dicPos_LIMIT_REP_SHORT // for testing/debug puposes
|
||
|
|
||
|
inc processedPos
|
||
|
|
||
|
IsMatchBranch_Pre
|
||
|
|
||
|
p2_sub_s t0_R, rep0_R
|
||
|
#ifdef LZMA_USE_CMOV_LZ_WRAP
|
||
|
add sym_R, t0_R, dicBufSize
|
||
|
cmovb t0_R, sym_R
|
||
|
#else
|
||
|
jae 1f
|
||
|
p2_add t0_R, dicBufSize
|
||
|
1:
|
||
|
#endif
|
||
|
|
||
|
ldrb sym, [dic, t0_R]
|
||
|
// mov len, wzr
|
||
|
jmp lz_end_match
|
||
|
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
IsRep_label:
|
||
|
UPDATE_1 probs_state, 0, (IsRep - IsMatch)
|
||
|
|
||
|
# The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
|
||
|
# So we don't check it here.
|
||
|
|
||
|
# mov t0, processedPos
|
||
|
# or t0, checkDicSize
|
||
|
# jz fin_ERROR_2
|
||
|
|
||
|
// state = state < kNumLitStates ? 8 : 11;
|
||
|
cmp state, kNumLitStates * PMULT
|
||
|
mov state, 8 * PMULT
|
||
|
mov probBranch, 11 * PMULT
|
||
|
cmovae state, probBranch
|
||
|
|
||
|
SET_probs RepLenCoder
|
||
|
|
||
|
IF_BIT_1 probs_state, 0, (IsRepG0 - IsMatch), IsRepG0_label
|
||
|
sub_big probs_state, probs_state, (IsMatch - IsRep0Long) << PSHIFT
|
||
|
IF_BIT_0_NOUP probs_state, pbPos_R, 0, IsRep0Short_label
|
||
|
UPDATE_1 probs_state, pbPos_R, 0
|
||
|
jmp len_decode
|
||
|
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
IsRepG0_label:
|
||
|
UPDATE_1 probs_state, 0, (IsRepG0 - IsMatch)
|
||
|
IF_BIT_1 probs_state, 0, (IsRepG1 - IsMatch), IsRepG1_label
|
||
|
mov dist, rep1
|
||
|
mov rep1, rep0
|
||
|
mov rep0, dist
|
||
|
jmp len_decode
|
||
|
|
||
|
# MY_ALIGN_FOR_ENTRY
|
||
|
IsRepG1_label:
|
||
|
UPDATE_1 probs_state, 0, (IsRepG1 - IsMatch)
|
||
|
IF_BIT_1 probs_state, 0, (IsRepG2 - IsMatch), IsRepG2_label
|
||
|
mov dist, rep2
|
||
|
mov rep2, rep1
|
||
|
mov rep1, rep0
|
||
|
mov rep0, dist
|
||
|
jmp len_decode
|
||
|
|
||
|
# MY_ALIGN_FOR_ENTRY
|
||
|
IsRepG2_label:
|
||
|
UPDATE_1 probs_state, 0, (IsRepG2 - IsMatch)
|
||
|
mov dist, rep3
|
||
|
mov rep3, rep2
|
||
|
mov rep2, rep1
|
||
|
mov rep1, rep0
|
||
|
mov rep0, dist
|
||
|
jmp len_decode
|
||
|
|
||
|
|
||
|
|
||
|
# ---------- SPEC SHORT DISTANCE ----------
|
||
|
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
short_dist:
|
||
|
p2_sub_s numBits, 32 + 1
|
||
|
jbe decode_dist_end
|
||
|
or sym, 2
|
||
|
shl sym, numBits
|
||
|
add sym_R, probs_Spec, sym_R, lsl #PSHIFT
|
||
|
p2_add sym_R, SpecPos * PMULT + 1 * PMULT
|
||
|
mov sym2, PMULT // # step
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
spec_loop:
|
||
|
REV_1_VAR prob_reg
|
||
|
dec_s numBits
|
||
|
jnz spec_loop
|
||
|
|
||
|
p2_add sym2_R, probs_Spec
|
||
|
.if SpecPos != 0
|
||
|
p2_add sym2_R, SpecPos * PMULT
|
||
|
.endif
|
||
|
p2_sub sym_R, sym2_R
|
||
|
shr sym, PSHIFT
|
||
|
|
||
|
jmp decode_dist_end
|
||
|
|
||
|
|
||
|
|
||
|
# ---------- COPY MATCH 0 ----------
|
||
|
MY_ALIGN_FOR_ENTRY
|
||
|
copy_match_0:
|
||
|
#ifdef LZMA_USE_4BYTES_FILL
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
orr t3, sym, sym, lsl 8
|
||
|
p2_and cnt_R, -4
|
||
|
orr t3, t3, t3, lsl 16
|
||
|
MY_ALIGN_FOR_LOOP_16
|
||
|
1:
|
||
|
/*
|
||
|
str t3, [dicPos, cnt_R]
|
||
|
adds cnt_R, cnt_R, 4
|
||
|
jz 2f
|
||
|
*/
|
||
|
|
||
|
str t3, [dicPos, cnt_R]
|
||
|
adds cnt_R, cnt_R, 4
|
||
|
jnz 1b
|
||
|
2:
|
||
|
// p2_and sym, 255
|
||
|
#else
|
||
|
|
||
|
MY_ALIGN_FOR_LOOP
|
||
|
1:
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jz copy_end
|
||
|
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc_s cnt_R
|
||
|
jnz 1b
|
||
|
#endif
|
||
|
|
||
|
jmp copy_end
|
||
|
|
||
|
|
||
|
# ---------- COPY MATCH CROSS ----------
|
||
|
copy_match_cross:
|
||
|
# t0_R - src pos
|
||
|
# cnt_R - total copy len
|
||
|
|
||
|
p1_neg cnt_R
|
||
|
1:
|
||
|
ldrb sym, [dic, t0_R]
|
||
|
inc t0_R
|
||
|
strb sym, [dicPos, cnt_R]
|
||
|
inc cnt_R
|
||
|
cmp t0_R, dicBufSize
|
||
|
jne 1b
|
||
|
|
||
|
ldrb sym, [dic]
|
||
|
sub t0_R, dic, cnt_R
|
||
|
jmp copy_common
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
/*
|
||
|
fin_dicPos_LIMIT_REP_SHORT:
|
||
|
mov len, 1
|
||
|
jmp fin_OK
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
fin_dicPos_LIMIT:
|
||
|
jmp fin_OK
|
||
|
# For more strict mode we can stop decoding with error
|
||
|
# mov sym, 1
|
||
|
# jmp fin
|
||
|
*/
|
||
|
|
||
|
fin_ERROR_MATCH_DIST:
|
||
|
# rep0 = distance + 1;
|
||
|
p2_add len, kMatchSpecLen_Error_Data
|
||
|
mov rep3, rep2
|
||
|
mov rep2, rep1
|
||
|
mov rep1, rep0
|
||
|
mov rep0, sym
|
||
|
STATE_UPDATE_FOR_MATCH
|
||
|
# jmp fin_OK
|
||
|
mov sym, 1
|
||
|
jmp fin
|
||
|
|
||
|
end_of_payload:
|
||
|
inc_s sym
|
||
|
jnz fin_ERROR_MATCH_DIST
|
||
|
|
||
|
mov len, kMatchSpecLenStart
|
||
|
xor state, (1 << FLAG_STATE_BITS)
|
||
|
jmp fin_OK
|
||
|
|
||
|
/*
|
||
|
fin_OK_lit:
|
||
|
mov len, wzr
|
||
|
*/
|
||
|
|
||
|
fin_OK:
|
||
|
mov sym, wzr
|
||
|
|
||
|
fin:
|
||
|
NORM
|
||
|
|
||
|
#define fin_lzma_reg t0_R
|
||
|
|
||
|
.macro STORE_LZMA_VAR reg:req, struct_offs:req
|
||
|
str \reg, [fin_lzma_reg, \struct_offs]
|
||
|
.endm
|
||
|
|
||
|
.macro STORE_LZMA_PAIR reg0:req, reg1:req, struct_offs:req
|
||
|
stp \reg0, \reg1, [fin_lzma_reg, \struct_offs]
|
||
|
.endm
|
||
|
|
||
|
ldr fin_lzma_reg, [sp, 120]
|
||
|
p2_sub dicPos, dic
|
||
|
shr state, PSHIFT
|
||
|
|
||
|
STORE_LZMA_PAIR dicPos, buf, offset_dicPos
|
||
|
STORE_LZMA_PAIR range, cod, offset_range
|
||
|
STORE_LZMA_VAR processedPos, offset_processedPos
|
||
|
STORE_LZMA_PAIR rep0, rep1, offset_rep0
|
||
|
STORE_LZMA_PAIR rep2, rep3, offset_rep2
|
||
|
STORE_LZMA_PAIR state, len, offset_state
|
||
|
|
||
|
mov w0, sym
|
||
|
|
||
|
ldp x29, x30, [sp, 80]
|
||
|
ldp x27, x28, [sp, 64]
|
||
|
ldp x25, x26, [sp, 48]
|
||
|
ldp x23, x24, [sp, 32]
|
||
|
ldp x21, x22, [sp, 16]
|
||
|
ldp x19, x20, [sp], 128
|
||
|
|
||
|
ret
|
||
|
/*
|
||
|
.cfi_endproc
|
||
|
.LFE0:
|
||
|
.size LzmaDec_DecodeReal_3, .-LzmaDec_DecodeReal_3
|
||
|
.ident "TAG_LZMA"
|
||
|
.section .note.GNU-stack,"",@progbits
|
||
|
*/
|