313 lines
7.0 KiB
C
313 lines
7.0 KiB
C
|
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
|
||
|
2014-11-10 : Igor Pavlov : Public domain */
|
||
|
|
||
|
#include "Precomp.h"
|
||
|
|
||
|
/* #define SHOW_STAT */
|
||
|
|
||
|
#ifdef SHOW_STAT
|
||
|
#include <stdio.h>
|
||
|
#define PRF(x) x
|
||
|
#else
|
||
|
#define PRF(x)
|
||
|
#endif
|
||
|
|
||
|
#include <windows.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "Bcj2.h"
|
||
|
#include "CpuArch.h"
|
||
|
|
||
|
#define CProb UInt16
|
||
|
|
||
|
#define kTopValue ((UInt32)1 << 24)
|
||
|
#define kNumModelBits 11
|
||
|
#define kBitModelTotal (1 << kNumModelBits)
|
||
|
#define kNumMoveBits 5
|
||
|
|
||
|
void Bcj2Enc_Init(CBcj2Enc *p)
|
||
|
{
|
||
|
unsigned i;
|
||
|
|
||
|
p->state = BCJ2_ENC_STATE_OK;
|
||
|
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
|
||
|
|
||
|
p->prevByte = 0;
|
||
|
|
||
|
p->cache = 0;
|
||
|
p->range = 0xFFFFFFFF;
|
||
|
p->low = 0;
|
||
|
p->cacheSize = 1;
|
||
|
|
||
|
p->ip = 0;
|
||
|
|
||
|
p->fileIp = 0;
|
||
|
p->fileSize = 0;
|
||
|
p->relatLimit = BCJ2_RELAT_LIMIT;
|
||
|
|
||
|
p->tempPos = 0;
|
||
|
|
||
|
p->flushPos = 0;
|
||
|
|
||
|
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
|
||
|
p->probs[i] = kBitModelTotal >> 1;
|
||
|
}
|
||
|
|
||
|
static Bool MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
|
||
|
{
|
||
|
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
|
||
|
{
|
||
|
Byte *buf = p->bufs[BCJ2_STREAM_RC];
|
||
|
do
|
||
|
{
|
||
|
if (buf == p->lims[BCJ2_STREAM_RC])
|
||
|
{
|
||
|
p->state = BCJ2_STREAM_RC;
|
||
|
p->bufs[BCJ2_STREAM_RC] = buf;
|
||
|
return True;
|
||
|
}
|
||
|
*buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
|
||
|
p->cache = 0xFF;
|
||
|
}
|
||
|
while (--p->cacheSize);
|
||
|
p->bufs[BCJ2_STREAM_RC] = buf;
|
||
|
p->cache = (Byte)((UInt32)p->low >> 24);
|
||
|
}
|
||
|
p->cacheSize++;
|
||
|
p->low = (UInt32)p->low << 8;
|
||
|
return False;
|
||
|
}
|
||
|
|
||
|
static void Bcj2Enc_Encode_2(CBcj2Enc *p)
|
||
|
{
|
||
|
if (BCJ2_IS_32BIT_STREAM(p->state))
|
||
|
{
|
||
|
Byte *cur = p->bufs[p->state];
|
||
|
if (cur == p->lims[p->state])
|
||
|
return;
|
||
|
SetBe32(cur, p->tempTarget);
|
||
|
p->bufs[p->state] = cur + 4;
|
||
|
}
|
||
|
|
||
|
p->state = BCJ2_ENC_STATE_ORIG;
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
if (p->range < kTopValue)
|
||
|
{
|
||
|
if (RangeEnc_ShiftLow(p))
|
||
|
return;
|
||
|
p->range <<= 8;
|
||
|
}
|
||
|
|
||
|
{
|
||
|
{
|
||
|
const Byte *src = p->src;
|
||
|
const Byte *srcLim;
|
||
|
Byte *dest;
|
||
|
SizeT num = p->srcLim - src;
|
||
|
|
||
|
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
|
||
|
{
|
||
|
if (num <= 4)
|
||
|
return;
|
||
|
num -= 4;
|
||
|
}
|
||
|
else if (num == 0)
|
||
|
break;
|
||
|
|
||
|
dest = p->bufs[BCJ2_STREAM_MAIN];
|
||
|
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
|
||
|
{
|
||
|
num = p->lims[BCJ2_STREAM_MAIN] - dest;
|
||
|
if (num == 0)
|
||
|
{
|
||
|
p->state = BCJ2_STREAM_MAIN;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
srcLim = src + num;
|
||
|
|
||
|
if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
|
||
|
*dest = src[0];
|
||
|
else for (;;)
|
||
|
{
|
||
|
Byte b = *src;
|
||
|
*dest = b;
|
||
|
if (b != 0x0F)
|
||
|
{
|
||
|
if ((b & 0xFE) == 0xE8)
|
||
|
break;
|
||
|
dest++;
|
||
|
if (++src != srcLim)
|
||
|
continue;
|
||
|
break;
|
||
|
}
|
||
|
dest++;
|
||
|
if (++src == srcLim)
|
||
|
break;
|
||
|
if ((*src & 0xF0) != 0x80)
|
||
|
continue;
|
||
|
*dest = *src;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
num = src - p->src;
|
||
|
|
||
|
if (src == srcLim)
|
||
|
{
|
||
|
p->prevByte = src[-1];
|
||
|
p->bufs[BCJ2_STREAM_MAIN] = dest;
|
||
|
p->src = src;
|
||
|
p->ip += (UInt32)num;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
{
|
||
|
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
|
||
|
Bool needConvert;
|
||
|
|
||
|
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
|
||
|
p->ip += (UInt32)num + 1;
|
||
|
src++;
|
||
|
|
||
|
needConvert = False;
|
||
|
|
||
|
if ((SizeT)(p->srcLim - src) >= 4)
|
||
|
{
|
||
|
UInt32 relatVal = GetUi32(src);
|
||
|
if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
|
||
|
&& ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
|
||
|
needConvert = True;
|
||
|
}
|
||
|
|
||
|
{
|
||
|
UInt32 bound;
|
||
|
unsigned ttt;
|
||
|
Byte b = src[-1];
|
||
|
CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
|
||
|
|
||
|
ttt = *prob;
|
||
|
bound = (p->range >> kNumModelBits) * ttt;
|
||
|
|
||
|
if (!needConvert)
|
||
|
{
|
||
|
p->range = bound;
|
||
|
*prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||
|
p->src = src;
|
||
|
p->prevByte = b;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
p->low += bound;
|
||
|
p->range -= bound;
|
||
|
*prob = (CProb)(ttt - (ttt >> kNumMoveBits));
|
||
|
|
||
|
{
|
||
|
UInt32 relatVal = GetUi32(src);
|
||
|
UInt32 absVal;
|
||
|
p->ip += 4;
|
||
|
absVal = p->ip + relatVal;
|
||
|
p->prevByte = src[3];
|
||
|
src += 4;
|
||
|
p->src = src;
|
||
|
{
|
||
|
unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
|
||
|
Byte *cur = p->bufs[cj];
|
||
|
if (cur == p->lims[cj])
|
||
|
{
|
||
|
p->state = cj;
|
||
|
p->tempTarget = absVal;
|
||
|
return;
|
||
|
}
|
||
|
SetBe32(cur, absVal);
|
||
|
p->bufs[cj] = cur + 4;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
|
||
|
return;
|
||
|
|
||
|
for (; p->flushPos < 5; p->flushPos++)
|
||
|
if (RangeEnc_ShiftLow(p))
|
||
|
return;
|
||
|
p->state = BCJ2_ENC_STATE_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
void Bcj2Enc_Encode(CBcj2Enc *p)
|
||
|
{
|
||
|
PRF(printf("\n"));
|
||
|
PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||
|
|
||
|
if (p->tempPos != 0)
|
||
|
{
|
||
|
unsigned extra = 0;
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
const Byte *src = p->src;
|
||
|
const Byte *srcLim = p->srcLim;
|
||
|
unsigned finishMode = p->finishMode;
|
||
|
|
||
|
p->src = p->temp;
|
||
|
p->srcLim = p->temp + p->tempPos;
|
||
|
if (src != srcLim)
|
||
|
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
|
||
|
|
||
|
PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||
|
|
||
|
Bcj2Enc_Encode_2(p);
|
||
|
|
||
|
{
|
||
|
unsigned num = (unsigned)(p->src - p->temp);
|
||
|
unsigned tempPos = p->tempPos - num;
|
||
|
unsigned i;
|
||
|
p->tempPos = tempPos;
|
||
|
for (i = 0; i < tempPos; i++)
|
||
|
p->temp[i] = p->temp[i + num];
|
||
|
|
||
|
p->src = src;
|
||
|
p->srcLim = srcLim;
|
||
|
p->finishMode = finishMode;
|
||
|
|
||
|
if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
|
||
|
return;
|
||
|
|
||
|
if (extra >= tempPos)
|
||
|
{
|
||
|
p->src = src - tempPos;
|
||
|
p->tempPos = 0;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
p->temp[tempPos] = src[0];
|
||
|
p->tempPos = tempPos + 1;
|
||
|
p->src = src + 1;
|
||
|
extra++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||
|
|
||
|
Bcj2Enc_Encode_2(p);
|
||
|
|
||
|
if (p->state == BCJ2_ENC_STATE_ORIG)
|
||
|
{
|
||
|
const Byte *src = p->src;
|
||
|
unsigned rem = (unsigned)(p->srcLim - src);
|
||
|
unsigned i;
|
||
|
for (i = 0; i < rem; i++)
|
||
|
p->temp[i] = src[i];
|
||
|
p->tempPos = rem;
|
||
|
p->src = src + rem;
|
||
|
}
|
||
|
}
|