按原样从StackOverflow 重新发布。
我正在尝试从 ARMv5t 驱动的 Gameboy Advance ROM 中解压缩一个非常长的文本块,该文本块是使用某种自定义 LZ 式压缩算法进行压缩的。
解压算法,据我了解如下(别人算出来的):
Read data in 32 bit pieces
position = 0
output = initialized buffer
read 1 bit
if (bit == 1):
read next 7 bits, save at current position (ASCII character)
if (bit == 0):
read 2 bits:
if (bits == 00):
read 7 bits (X)
read 2 bits (Y)
take Y + 2 characters from (position - X)
write Y + 2 characters at current position to output
save X as save_X
if (bits == 01):
read 4 bits (X)
take the character at (position - X + 1)
write 1 character at current position to output
if (bits == 10):
read 3 bits (Y)
take Y characters at (position - save_X)
write Y characters at current position to output
if (bits == 11):
read 9 bits (X)
read 2 bits (Y)
Z = (Y + 1) * 2 + 1;
take Z characters at (position - X)
write Z characters at (position - X) to output
我的 C 实现对于前 157 个字符非常有效,其中 [bits == 11] 永远不会发生。然后,一旦这个写入完成,接下来的6位的目的对我来说是完全未知的,如果我跳过它们,下一个字符将正确显示,仅此而已,那就是胡言乱语。另外,可能值得注意的是,压缩字符串的前 17 位的来源似乎是无用的零,所以我什么也没做。
typedef struct tag_input_t {
uint32_t data;
FILE* file;
unsigned buffer;
int buffer_bits;
} input_t;
uint32_t data = get_next_block(file); // 32 bit block
input_t input; // input is a struct with a buffer, so that if I need to read X bits and there are only Y (Y<X) bits left, I can load the next 32 bit block.
init_input(&input, data, file);
new_read_bits(&input, 17);
int save_X = 0;
int position = 0;
while (position < 256) {
unsigned is_char = new_read_bits(&input, 1);
if (is_char) output[position++] = new_read_bits(&input, 7);
else {
unsigned mode = new_read_bits(&input, 2);
unsigned X, Y, Z;
switch (mode) {
case 0:
save_X = X = new_read_bits(&input, 7);
Y = new_read_bits(&input, 2);
memmove(output + position, output + position - X, Y + 2);
position += Y + 2;
break;
case 1:
X = new_read_bits(&input, 4);
output[position] = output[position - X + 1];
position++;
break;
case 2:
Y = new_read_bits(&input, 3);
memmove(output + position, output + position - save_X, Y + 1);
position += Y + 1;
break;
default: //case 3:
X = new_read_bits(&input, 9);
Y = new_read_bits(&input, 2);
Z = (Y + 1) * 2 + 1;
memmove(output + position, output + position - X, Z);
position += Z;
break;
}
}
}
压缩的字符串可以在这里找到(部分),我被困在 08D0CCD0。请记住,这些值需要读作 20006491 A023C247 等...
91640020 47C223A0 1E698F90 79443E92 8FE4E740 31501F79 C43D0736 25F4087D 203C0FE6 C0151332 3E1190C3 02ECF3E0 09584121 DCF3A03D A7C1261D 6B430698 E6422195 41701E0F 34BF0BF4 0EECA58E 3332407C 0BE49541 0262203D DEC7E15C 08D0CCD0 3C8D1C09 7004C4C5 66EEF099
因此,例如,如果我们取前 8 个字节 91640020 和 A023C247,我们将得到:
00100000000000000 11001001 0010001 10100000 0010001 11100001 0010001 11__
first 17 are ignored ^------- ^ ---- ^------- ^ ---- ^------- ^ ---- ^
I * (space) * a *
* - results in \0
^ - first read bit
The result will be I. a.d.m.i.r.e. .y.o.u.r. .e.n.t.h.u.s.i.a.s.m (. is 00)
ASM 中的例程可以在这里找到:
[0300:0040] e92d4ff0 stmfd sp!, {r4-r11,lr}
[0300:0044] e3a05008 mov r5, #0x8
[0300:0048] e3a0a000 mov r10, #0x0
[0300:004c] e3a0c001 mov r12, #0x1
[0300:0050] ea000003 b $03000064
[0300:0054] e1a08003 mov r8, r3
[0300:0058] eb00008b bl $0300028c
[0300:005c] e0877004 add r7, r7, r4
[0300:0060] e4c07001 strb r7, [r0], #0x1
[0300:0064] e25aa001 subs r10, r10, #0x1
[0300:0068] 44916004 ldrmi r6, [r1], #0x4
[0300:006c] 43a0a01f movmi r10, #0x1f
[0300:0070] e0966006 adds r6, r6, r6
[0300:0074] 2afffff6 bcs $03000054
[0300:0078] e25aa001 subs r10, r10, #0x1
[0300:007c] 44916004 ldrmi r6, [r1], #0x4
[0300:0080] 43a0a01f movmi r10, #0x1f
[0300:0084] e0966006 adds r6, r6, r6
[0300:0088] 2a00003b bcs $0300017c
[0300:008c] e25aa001 subs r10, r10, #0x1
[0300:0090] 44916004 ldrmi r6, [r1], #0x4
[0300:0094] 43a0a01f movmi r10, #0x1f
[0300:0098] e0966006 adds r6, r6, r6
[0300:009c] 3a000023 bcc $03000130
[0300:00a0] e3a08004 mov r8, #0x4
[0300:00a4] eb000078 bl $0300028c
[0300:00a8] e2577001 subs r7, r7, #0x1
[0300:00ac] 0affffeb beq $03000060
[0300:00b0] 5a00001c bpl $03000128
[0300:00b4] e25aa001 subs r10, r10, #0x1
[0300:00b8] 44916004 ldrmi r6, [r1], #0x4
[0300:00bc] 43a0a01f movmi r10, #0x1f
[0300:00c0] e0966006 adds r6, r6, r6
[0300:00c4] 3a00000b bcc $030000f8
[0300:00c8] e3a09f40 mov r9, #0x100
[0300:00cc] e3a08008 mov r8, #0x8
[0300:00d0] eb00006d bl $0300028c
[0300:00d4] e4c07001 strb r7, [r0], #0x1
[0300:00d8] e2599001 subs r9, r9, #0x1
[0300:00dc] 1afffffa bne $030000cc
[0300:00e0] e25aa001 subs r10, r10, #0x1
[0300:00e4] 44916004 ldrmi r6, [r1], #0x4
[0300:00e8] 43a0a01f movmi r10, #0x1f
[0300:00ec] e0966006 adds r6, r6, r6
[0300:00f0] 2afffff4 bcs $030000c8
[0300:00f4] eaffffda b $03000064
[0300:00f8] e3a04000 mov r4, #0x0
[0300:00fc] e25aa001 subs r10, r10, #0x1
[0300:0100] 44916004 ldrmi r6, [r1], #0x4
[0300:0104] 43a0a01f movmi r10, #0x1f
[0300:0108] e0966006 adds r6, r6, r6
[0300:010c] e2a43007 adc r3, r4, #0x7
[0300:0110] e3530008 cmps r3, #0x8
[0300:0114] 0affffd2 beq $03000064
[0300:0118] e3a08008 mov r8, #0x8
[0300:011C] eb00005a bl $0300028c
[0300:0120] e1a04007 mov r4, r7
[0300:0124] eaffffce b $03000064
[0300:0128] e7507007 ldrb r7, [r0, -r7]
[0300:012c] eaffffcb b $03000060
[0300:0130] e3a08007 mov r8, #0x7
[0300:0134] eb000054 bl $0300028c
[0300:0138] e3570000 cmps r7, #0x0
[0300:013c] 0a000006 beq $0300015c
[0300:0140] e1a0c007 mov r12, r7
[0300:0144] e3a08002 mov r8, #0x2
[0300:0148] eb00004f bl $0300028c
[0300:014C] e2879002 add r9, r7, #0x2
[0300:0150] e35c0001 cmps r12, #0x1
[0300:0154] 1a000042 bne $03000264
[0300:0158] ea000046 b $03000278
[0300:015c] e3a08002 mov r8, #0x2
[0300:0160] eb000049 bl $0300028c
[0300:0164] e3570000 cmps r7, #0x0
[0300:0168] 0a000059 beq $030002d4
[0300:016C] e2878003 add r8, r7, #0x03
[0300:0170] eb000045 bl $0300028c
[0300:0174] e1a05007 mov r5, r7
[0300:0178] eaffffb9 b $03000064
[0300:017c] e3a09001 mov r9, #0x1
[0300:0180] e25aa001 subs r10, r10, #0x1
[0300:0184] 44916004 ldrmi r6, [r1], #0x4
[0300:0188] 43a0a01f movmi r10, #0x1f
[0300:018c] e0966006 adds r6, r6, r6
[0300:0190] e0a99009 adc r9, r9, r9
[0300:0194] e25aa001 subs r10, r10, #0x1
[0300:0198] 44916004 ldrmi r6, [r1], #0x4
[0300:019c] 43a0a01f movmi r10, #0x1f
[0300:01a0] e0966006 adds r6, r6, r6
[0300:01a4] 2afffff5 bcs $03000180
[0300:01a8] e3590002 cmps r9, #0x2
[0300:01ac] 1a00000d bne $030001e8
[0300:01b0] e3a09001 mov r9, #0x1
[0300:01b4] e25aa001 subs r10, r10, #0x1
[0300:01b8] 44916004 ldrmi r6, [r1], #0x4
[0300:01bc] 43a0a01f movmi r10, #0x1f
[0300:01c0] e0966006 adds r6, r6, r6
[0300:01c4] e0a99009 adc r9, r9, r9
[0300:01c8] e25aa001 subs r10, r10, #0x1
[0300:01cc] 44916004 ldrmi r6, [r1], #0x4
[0300:01d0] 43a0a01f movmi r10, #0x1f
[0300:01d4] e0966006 adds r6, r6, r6
[0300:01d8] 2afffff5 bcs $030001b4
[0300:01dc] e35c0001 cmps r12, #0x1
[0300:01e0] 1a00001f bne $03000264
[0300:01e4] ea000023 b $03000278
[0300:01e8] e2499003 sub r9, r9, #0x3
[0300:01ec] e1a08005 mov r8, r5
[0300:01f0] eb000025 bl $0300028c
[0300:01f4] e087c519 add r12, r7, r9, lsl r5
[0300:01f8] e3a09001 mov r9, #0x1
[0300:01fc] e25aa001 subs r10, r10, #0x1
[0300:0200] 44916004 ldrmi r6, [r1], #0x4
[0300:0204] 43a0a01f movmi r10, #0x1f
[0300:0208] e0966006 adds r6, r6, r6
[0300:020c] e0a99009 adc r9, r9, r9
[0300:0210] e25aa001 subs r10, r10, #0x1
[0300:0214] 44916004 ldrmi r6, [r1], #0x4
[0300:0218] 43a0a01f movmi r10, #0x1f
[0300:021c] e0966006 adds r6, r6, r6
[0300:0220] 2afffff5 bcs $030001fc
[0300:0224] e35c0b40 cmps r12, #0x10000
[0300:0228] 22899003 addcs r9, r9, #0x3
[0300:022c] 2a00000c bcs $03000264
[0300:0230] e59f80f0 ldr r8, [$03000328] (=$000037ff)
[0300:0234] e15c0008 cmps r12, r8
[0300:0238] 22899002 addcs r9, r9, #0x2
[0300:023c] 2a000008 bcs $03000264
[0300:0240] e59f80e4 ldr r8, [$0300032c] (=$0000027f)
[0300:0244] e15c0008 cmps r12, r8
[0300:0248] 22899001 addcs r9, r9, #0x1
[0300:024c] 2a000004 bcs $03000264
[0300:0250] e35c007f cmps r12, #0x7f
[0300:0254] 8a000002 bhi $03000264
[0300:0258] e2899004 add r9, r9, #0x4
[0300:025c] e35c0001 cmps r12, #0x1
[0300:0260] 0a000004 beq $03000278
[0300:0264] e750800c ldrb r8, [r0, -r12]
[0300:0268] e4c08001 strb r8, [r0], #0x1
[0300:026c] e2599001 subs r9, r9, #0x1
[0300:0270] 1afffffb bne $03000264
[0300:0274] eaffff7a b $03000064
[0300:0278] e750800c ldrb r8, [r0, -r12]
[0300:027c] e4c08001 strb r8, [r0], #0x1
[0300:0280] e2599001 subs r9, r9, #0x1
[0300:0284] 1afffffc bne $0300027c
[0300:0288] eaffff75 b $03000064
[0300:028c] e35a0000 cmps r10, #0x0
[0300:0290] 04916004 ldreq r6, [r1], #0x4
[0300:0294] 03a0a020 moveq r10, #0x20
[0300:0298] e1a0b008 mov r11, r8
[0300:029c] e15b000a cmps r11, r10
[0300:02a0] 81a0b00a movhi r11, r10
[0300:02a4] e26b7020 rsb r7, r11, #0x20
[0300:02a8] e1a07736 mov r7, r6, lsr r7
[0300:02ac] e1a06b16 mov r6, r6, lsl r11
[0300:02b0] e04aa00b sub r10, r10, r11
[0300:02b4] e058800b subs r8, r8, r11
[0300:02b8] 012fff1e bxeq lr
[0300:02bc] e4916004 ldr r6, [r1], #0x4
[0300:02c0] e268a020 rsb r10, r8, #0x20
[0300:02c4] e1a0ba36 mov r11, r6, lsr r10
[0300:02c8] e08b7817 add r7, r11, r7, lsl r8
[0300:02cc] e1a06816 mov r6, r6, lsl r8
[0300:02d0] e12fff1e bx lr
[0300:02d4] e8bd4ff0 ldmfd sp!, {r4-r11,lr}
[0300:02d8] e12fff1e bx lr