Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

1116 lines
33 KiB

unit imjchuff;
{ This file contains Huffman entropy encoding routines.
Much of the complexity here has to do with supporting output suspension.
If the data destination module demands suspension, we want to be able to
back up to the start of the current MCU. To do this, we copy state
variables into local working storage, and update them back to the
permanent JPEG objects only upon successful completion of an MCU. }
{ Original: jchuff.c; Copyright (C) 1991-1997, Thomas G. Lane. }
interface
{$I imjconfig.inc}
uses
imjmorecfg, { longptr definition missing }
imjpeglib,
imjdeferr,
imjerror,
imjutils,
imjinclude,
imjcomapi;
{ The legal range of a DCT coefficient is
-1024 .. +1023 for 8-bit data;
-16384 .. +16383 for 12-bit data.
Hence the magnitude should always fit in 10 or 14 bits respectively. }
{$ifdef BITS_IN_JSAMPLE_IS_8}
const
MAX_COEF_BITS = 10;
{$else}
const
MAX_COEF_BITS = 14;
{$endif}
{ Derived data constructed for each Huffman table }
{ Declarations shared with jcphuff.c }
type
c_derived_tbl_ptr = ^c_derived_tbl;
c_derived_tbl = record
ehufco : array[0..256-1] of uInt; { code for each symbol }
ehufsi : array[0..256-1] of byte; { length of code for each symbol }
{ If no code has been allocated for a symbol S, ehufsi[S] contains 0 }
end;
{ for JCHUFF und JCPHUFF }
type
TLongTable = array[0..256] of long;
TLongTablePtr = ^TLongTable;
{ Compute the derived values for a Huffman table.
Note this is also used by jcphuff.c. }
{GLOBAL}
procedure jpeg_make_c_derived_tbl (cinfo : j_compress_ptr;
isDC : boolean;
tblno : int;
var pdtbl : c_derived_tbl_ptr);
{ Generate the optimal coding for the given counts, fill htbl.
Note this is also used by jcphuff.c. }
{GLOBAL}
procedure jpeg_gen_optimal_table (cinfo : j_compress_ptr;
htbl : JHUFF_TBL_PTR;
var freq : TLongTable); { Nomssi }
{ Module initialization routine for Huffman entropy encoding. }
{GLOBAL}
procedure jinit_huff_encoder (cinfo : j_compress_ptr);
implementation
{ Expanded entropy encoder object for Huffman encoding.
The savable_state subrecord contains fields that change within an MCU,
but must not be updated permanently until we complete the MCU. }
type
savable_state = record
put_buffer : INT32; { current bit-accumulation buffer }
put_bits : int; { # of bits now in it }
last_dc_val : array[0..MAX_COMPS_IN_SCAN-1] of int;
{ last DC coef for each component }
end;
type
huff_entropy_ptr = ^huff_entropy_encoder;
huff_entropy_encoder = record
pub : jpeg_entropy_encoder; { public fields }
saved : savable_state; { Bit buffer & DC state at start of MCU }
{ These fields are NOT loaded into local working state. }
restarts_to_go : uInt; { MCUs left in this restart interval }
next_restart_num : int; { next restart number to write (0-7) }
{ Pointers to derived tables (these workspaces have image lifespan) }
dc_derived_tbls : array[0..NUM_HUFF_TBLS-1] of c_derived_tbl_ptr;
ac_derived_tbls : array[0..NUM_HUFF_TBLS-1] of c_derived_tbl_ptr;
{$ifdef ENTROPY_OPT_SUPPORTED} { Statistics tables for optimization }
dc_count_ptrs : array[0..NUM_HUFF_TBLS-1] of TLongTablePtr;
ac_count_ptrs : array[0..NUM_HUFF_TBLS-1] of TLongTablePtr;
{$endif}
end;
{ Working state while writing an MCU.
This struct contains all the fields that are needed by subroutines. }
type
working_state = record
next_output_byte : JOCTETptr; { => next byte to write in buffer }
free_in_buffer : size_t; { # of byte spaces remaining in buffer }
cur : savable_state; { Current bit buffer & DC state }
cinfo : j_compress_ptr; { dump_buffer needs access to this }
end;
{ Forward declarations }
{METHODDEF}
function encode_mcu_huff (cinfo : j_compress_ptr;
const MCU_data : array of JBLOCKROW) : boolean;
forward;
{METHODDEF}
procedure finish_pass_huff (cinfo : j_compress_ptr); forward;
{$ifdef ENTROPY_OPT_SUPPORTED}
{METHODDEF}
function encode_mcu_gather (cinfo : j_compress_ptr;
const MCU_data: array of JBLOCKROW) : boolean;
forward;
{METHODDEF}
procedure finish_pass_gather (cinfo : j_compress_ptr); forward;
{$endif}
{ Initialize for a Huffman-compressed scan.
If gather_statistics is TRUE, we do not output anything during the scan,
just count the Huffman symbols used and generate Huffman code tables. }
{METHODDEF}
procedure start_pass_huff (cinfo : j_compress_ptr;
gather_statistics : boolean);
var
entropy : huff_entropy_ptr;
ci, dctbl, actbl : int;
compptr : jpeg_component_info_ptr;
begin
entropy := huff_entropy_ptr (cinfo^.entropy);
if (gather_statistics) then
begin
{$ifdef ENTROPY_OPT_SUPPORTED}
entropy^.pub.encode_mcu := encode_mcu_gather;
entropy^.pub.finish_pass := finish_pass_gather;
{$else}
ERREXIT(j_common_ptr(cinfo), JERR_NOT_COMPILED);
{$endif}
end
else
begin
entropy^.pub.encode_mcu := encode_mcu_huff;
entropy^.pub.finish_pass := finish_pass_huff;
end;
for ci := 0 to pred(cinfo^.comps_in_scan) do
begin
compptr := cinfo^.cur_comp_info[ci];
dctbl := compptr^.dc_tbl_no;
actbl := compptr^.ac_tbl_no;
if (gather_statistics) then
begin
{$ifdef ENTROPY_OPT_SUPPORTED}
{ Check for invalid table indexes }
{ (make_c_derived_tbl does this in the other path) }
if (dctbl < 0) or (dctbl >= NUM_HUFF_TBLS) then
ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, dctbl);
if (actbl < 0) or (actbl >= NUM_HUFF_TBLS) then
ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, actbl);
{ Allocate and zero the statistics tables }
{ Note that jpeg_gen_optimal_table expects 257 entries in each table! }
if (entropy^.dc_count_ptrs[dctbl] = NIL) then
entropy^.dc_count_ptrs[dctbl] := TLongTablePtr(
cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
257 * SIZEOF(long)) );
MEMZERO(entropy^.dc_count_ptrs[dctbl], 257 * SIZEOF(long));
if (entropy^.ac_count_ptrs[actbl] = NIL) then
entropy^.ac_count_ptrs[actbl] := TLongTablePtr(
cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
257 * SIZEOF(long)) );
MEMZERO(entropy^.ac_count_ptrs[actbl], 257 * SIZEOF(long));
{$endif}
end
else
begin
{ Compute derived values for Huffman tables }
{ We may do this more than once for a table, but it's not expensive }
jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
entropy^.dc_derived_tbls[dctbl]);
jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
entropy^.ac_derived_tbls[actbl]);
end;
{ Initialize DC predictions to 0 }
entropy^.saved.last_dc_val[ci] := 0;
end;
{ Initialize bit buffer to empty }
entropy^.saved.put_buffer := 0;
entropy^.saved.put_bits := 0;
{ Initialize restart stuff }
entropy^.restarts_to_go := cinfo^.restart_interval;
entropy^.next_restart_num := 0;
end;
{ Compute the derived values for a Huffman table.
This routine also performs some validation checks on the table.
Note this is also used by jcphuff.c. }
{GLOBAL}
procedure jpeg_make_c_derived_tbl (cinfo : j_compress_ptr;
isDC : boolean;
tblno : int;
var pdtbl : c_derived_tbl_ptr);
var
htbl : JHUFF_TBL_PTR;
dtbl : c_derived_tbl_ptr;
p, i, l, lastp, si, maxsymbol : int;
huffsize : array[0..257-1] of byte;
huffcode : array[0..257-1] of uInt;
code : uInt;
begin
{ Note that huffsize[] and huffcode[] are filled in code-length order,
paralleling the order of the symbols themselves in htbl->huffval[]. }
{ Find the input Huffman table }
if (tblno < 0) or (tblno >= NUM_HUFF_TBLS) then
ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, tblno);
if isDC then
htbl := cinfo^.dc_huff_tbl_ptrs[tblno]
else
htbl := cinfo^.ac_huff_tbl_ptrs[tblno];
if (htbl = NIL) then
ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, tblno);
{ Allocate a workspace if we haven't already done so. }
if (pdtbl = NIL) then
pdtbl := c_derived_tbl_ptr(
cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
SIZEOF(c_derived_tbl)) );
dtbl := pdtbl;
{ Figure C.1: make table of Huffman code length for each symbol }
p := 0;
for l := 1 to 16 do
begin
i := int(htbl^.bits[l]);
if (i < 0) and (p + i > 256) then { protect against table overrun }
ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
while (i > 0) do
begin
huffsize[p] := byte(l);
Inc(p);
Dec(i);
end;
end;
huffsize[p] := 0;
lastp := p;
{ Figure C.2: generate the codes themselves }
{ We also validate that the counts represent a legal Huffman code tree. }
code := 0;
si := huffsize[0];
p := 0;
while (huffsize[p] <> 0) do
begin
while (( int(huffsize[p]) ) = si) do
begin
huffcode[p] := code;
Inc(p);
Inc(code);
end;
{ code is now 1 more than the last code used for codelength si; but
it must still fit in si bits, since no code is allowed to be all ones. }
if (INT32(code) >= (INT32(1) shl si)) then
ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
code := code shl 1;
Inc(si);
end;
{ Figure C.3: generate encoding tables }
{ These are code and size indexed by symbol value }
{ Set all codeless symbols to have code length 0;
this lets us detect duplicate VAL entries here, and later
allows emit_bits to detect any attempt to emit such symbols. }
MEMZERO(@dtbl^.ehufsi, SIZEOF(dtbl^.ehufsi));
{ This is also a convenient place to check for out-of-range
and duplicated VAL entries. We allow 0..255 for AC symbols
but only 0..15 for DC. (We could constrain them further
based on data depth and mode, but this seems enough.) }
if isDC then
maxsymbol := 15
else
maxsymbol := 255;
for p := 0 to pred(lastp) do
begin
i := htbl^.huffval[p];
if (i < 0) or (i > maxsymbol) or (dtbl^.ehufsi[i] <> 0) then
ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
dtbl^.ehufco[i] := huffcode[p];
dtbl^.ehufsi[i] := huffsize[p];
end;
end;
{ Outputting bytes to the file }
{LOCAL}
function dump_buffer (var state : working_state) : boolean;
{ Empty the output buffer; return TRUE if successful, FALSE if must suspend }
var
dest : jpeg_destination_mgr_ptr;
begin
dest := state.cinfo^.dest;
if (not dest^.empty_output_buffer (state.cinfo)) then
begin
dump_buffer := FALSE;
exit;
end;
{ After a successful buffer dump, must reset buffer pointers }
state.next_output_byte := dest^.next_output_byte;
state.free_in_buffer := dest^.free_in_buffer;
dump_buffer := TRUE;
end;
{ Outputting bits to the file }
{ Only the right 24 bits of put_buffer are used; the valid bits are
left-justified in this part. At most 16 bits can be passed to emit_bits
in one call, and we never retain more than 7 bits in put_buffer
between calls, so 24 bits are sufficient. }
{LOCAL}
function emit_bits (var state : working_state;
code : uInt;
size : int) : boolean; {INLINE}
{ Emit some bits; return TRUE if successful, FALSE if must suspend }
var
{ This routine is heavily used, so it's worth coding tightly. }
{register} put_buffer : INT32;
{register} put_bits : int;
var
c : int;
begin
put_buffer := INT32 (code);
put_bits := state.cur.put_bits;
{ if size is 0, caller used an invalid Huffman table entry }
if (size = 0) then
ERREXIT(j_common_ptr(state.cinfo), JERR_HUFF_MISSING_CODE);
put_buffer := put_buffer and pred(INT32(1) shl size);
{ mask off any extra bits in code }
Inc(put_bits, size); { new number of bits in buffer }
put_buffer := put_buffer shl (24 - put_bits);
{ align incoming bits }
put_buffer := put_buffer or state.cur.put_buffer;
{ and merge with old buffer contents }
while (put_bits >= 8) do
begin
c := int ((put_buffer shr 16) and $FF);
{emit_byte(state, c, return FALSE);}
{ Emit a byte, return FALSE if must suspend. }
state.next_output_byte^ := JOCTET (c);
Inc(state.next_output_byte);
Dec(state.free_in_buffer);
if (state.free_in_buffer = 0) then
if not dump_buffer(state) then
begin
emit_bits := FALSE;
exit;
end;
if (c = $FF) then { need to stuff a zero byte? }
begin
{emit_byte(state, 0, return FALSE);}
state.next_output_byte^ := JOCTET (0);
Inc(state.next_output_byte);
Dec(state.free_in_buffer);
if (state.free_in_buffer = 0) then
if not dump_buffer(state) then
begin
emit_bits := FALSE;
exit;
end;
end;
put_buffer := put_buffer shl 8;
Dec(put_bits, 8);
end;
state.cur.put_buffer := put_buffer; { update state variables }
state.cur.put_bits := put_bits;
emit_bits := TRUE;
end;
{LOCAL}
function flush_bits (var state : working_state) : boolean;
begin
if (not emit_bits(state, $7F, 7)) then { fill any partial byte with ones }
begin
flush_bits := FALSE;
exit;
end;
state.cur.put_buffer := 0; { and reset bit-buffer to empty }
state.cur.put_bits := 0;
flush_bits := TRUE;
end;
{ Encode a single block's worth of coefficients }
{LOCAL}
function encode_one_block (var state : working_state;
const block : JBLOCK;
last_dc_val : int;
dctbl : c_derived_tbl_ptr;
actbl : c_derived_tbl_ptr) : boolean;
var
{register} temp, temp2 : int;
{register} nbits : int;
{register} k, r, i : int;
begin
{ Encode the DC coefficient difference per section F.1.2.1 }
temp2 := block[0] - last_dc_val;
temp := temp2;
if (temp < 0) then
begin
temp := -temp; { temp is abs value of input }
{ For a negative input, want temp2 := bitwise complement of abs(input) }
{ This code assumes we are on a two's complement machine }
Dec(temp2);
end;
{ Find the number of bits needed for the magnitude of the coefficient }
nbits := 0;
while (temp <> 0) do
begin
Inc(nbits);
temp := temp shr 1;
end;
{ Check for out-of-range coefficient values.
Since we're encoding a difference, the range limit is twice as much. }
if (nbits > MAX_COEF_BITS+1) then
ERREXIT(j_common_ptr(state.cinfo), JERR_BAD_DCT_COEF);
{ Emit the Huffman-coded symbol for the number of bits }
if not emit_bits(state, dctbl^.ehufco[nbits], dctbl^.ehufsi[nbits]) then
begin
encode_one_block := FALSE;
exit;
end;
{ Emit that number of bits of the value, if positive, }
{ or the complement of its magnitude, if negative. }
if (nbits <> 0) then { emit_bits rejects calls with size 0 }
if not emit_bits(state, uInt(temp2), nbits) then
begin
encode_one_block := FALSE;
exit;
end;
{ Encode the AC coefficients per section F.1.2.2 }
r := 0; { r := run length of zeros }
for k := 1 to pred(DCTSIZE2) do
begin
temp := block[jpeg_natural_order[k]];
if (temp = 0) then
begin
Inc(r);
end
else
begin
{ if run length > 15, must emit special run-length-16 codes ($F0) }
while (r > 15) do
begin
if not emit_bits(state, actbl^.ehufco[$F0], actbl^.ehufsi[$F0]) then
begin
encode_one_block := FALSE;
exit;
end;
Dec(r, 16);
end;
temp2 := temp;
if (temp < 0) then
begin
temp := -temp; { temp is abs value of input }
{ This code assumes we are on a two's complement machine }
Dec(temp2);
end;
{ Find the number of bits needed for the magnitude of the coefficient }
nbits := 0; { there must be at least one 1 bit }
repeat
Inc(nbits);
temp := temp shr 1;
until (temp = 0);
{ Check for out-of-range coefficient values }
if (nbits > MAX_COEF_BITS) then
ERREXIT(j_common_ptr(state.cinfo), JERR_BAD_DCT_COEF);
{ Emit Huffman symbol for run length / number of bits }
i := (r shl 4) + nbits;
if not emit_bits(state, actbl^.ehufco[i], actbl^.ehufsi[i]) then
begin
encode_one_block := FALSE;
exit;
end;
{ Emit that number of bits of the value, if positive, }
{ or the complement of its magnitude, if negative. }
if not emit_bits(state, uInt(temp2), nbits) then
begin
encode_one_block := FALSE;
exit;
end;
r := 0;
end;
end;
{ If the last coef(s) were zero, emit an end-of-block code }
if (r > 0) then
if not emit_bits(state, actbl^.ehufco[0], actbl^.ehufsi[0]) then
begin
encode_one_block := FALSE;
exit;
end;
encode_one_block := TRUE;
end;
{ Emit a restart marker & resynchronize predictions. }
{LOCAL}
function emit_restart (var state : working_state;
restart_num : int) : boolean;
var
ci : int;
begin
if (not flush_bits(state)) then
begin
emit_restart := FALSE;
exit;
end;
{emit_byte(state, $FF, return FALSE);}
{ Emit a byte, return FALSE if must suspend. }
state.next_output_byte^ := JOCTET ($FF);
Inc(state.next_output_byte);
Dec(state.free_in_buffer);
if (state.free_in_buffer = 0) then
if not dump_buffer(state) then
begin
emit_restart := FALSE;
exit;
end;
{emit_byte(state, JPEG_RST0 + restart_num, return FALSE);}
{ Emit a byte, return FALSE if must suspend. }
state.next_output_byte^ := JOCTET (JPEG_RST0 + restart_num);
Inc(state.next_output_byte);
Dec(state.free_in_buffer);
if (state.free_in_buffer = 0) then
if not dump_buffer(state) then
begin
emit_restart := FALSE;
exit;
end;
{ Re-initialize DC predictions to 0 }
for ci := 0 to pred(state.cinfo^.comps_in_scan) do
state.cur.last_dc_val[ci] := 0;
{ The restart counter is not updated until we successfully write the MCU. }
emit_restart := TRUE;
end;
{ Encode and output one MCU's worth of Huffman-compressed coefficients. }
{METHODDEF}
function encode_mcu_huff (cinfo : j_compress_ptr;
const MCU_data: array of JBLOCKROW) : boolean;
var
entropy : huff_entropy_ptr;
state : working_state;
blkn, ci : int;
compptr : jpeg_component_info_ptr;
begin
entropy := huff_entropy_ptr (cinfo^.entropy);
{ Load up working state }
state.next_output_byte := cinfo^.dest^.next_output_byte;
state.free_in_buffer := cinfo^.dest^.free_in_buffer;
{ASSIGN_STATE(state.cur, entropy^.saved);}
state.cur := entropy^.saved;
state.cinfo := cinfo;
{ Emit restart marker if needed }
if (cinfo^.restart_interval <> 0) then
begin
if (entropy^.restarts_to_go = 0) then
if not emit_restart(state, entropy^.next_restart_num) then
begin
encode_mcu_huff := FALSE;
exit;
end;
end;
{ Encode the MCU data blocks }
for blkn := 0 to pred(cinfo^.blocks_in_MCU) do
begin
ci := cinfo^.MCU_membership[blkn];
compptr := cinfo^.cur_comp_info[ci];
if not encode_one_block(state,
MCU_data[blkn]^[0],
state.cur.last_dc_val[ci],
entropy^.dc_derived_tbls[compptr^.dc_tbl_no],
entropy^.ac_derived_tbls[compptr^.ac_tbl_no]) then
begin
encode_mcu_huff := FALSE;
exit;
end;
{ Update last_dc_val }
state.cur.last_dc_val[ci] := MCU_data[blkn]^[0][0];
end;
{ Completed MCU, so update state }
cinfo^.dest^.next_output_byte := state.next_output_byte;
cinfo^.dest^.free_in_buffer := state.free_in_buffer;
{ASSIGN_STATE(entropy^.saved, state.cur);}
entropy^.saved := state.cur;
{ Update restart-interval state too }
if (cinfo^.restart_interval <> 0) then
begin
if (entropy^.restarts_to_go = 0) then
begin
entropy^.restarts_to_go := cinfo^.restart_interval;
Inc(entropy^.next_restart_num);
with entropy^ do
next_restart_num := next_restart_num and 7;
end;
Dec(entropy^.restarts_to_go);
end;
encode_mcu_huff := TRUE;
end;
{ Finish up at the end of a Huffman-compressed scan. }
{METHODDEF}
procedure finish_pass_huff (cinfo : j_compress_ptr);
var
entropy : huff_entropy_ptr;
state : working_state;
begin
entropy := huff_entropy_ptr (cinfo^.entropy);
{ Load up working state ... flush_bits needs it }
state.next_output_byte := cinfo^.dest^.next_output_byte;
state.free_in_buffer := cinfo^.dest^.free_in_buffer;
{ASSIGN_STATE(state.cur, entropy^.saved);}
state.cur := entropy^.saved;
state.cinfo := cinfo;
{ Flush out the last data }
if not flush_bits(state) then
ERREXIT(j_common_ptr(cinfo), JERR_CANT_SUSPEND);
{ Update state }
cinfo^.dest^.next_output_byte := state.next_output_byte;
cinfo^.dest^.free_in_buffer := state.free_in_buffer;
{ASSIGN_STATE(entropy^.saved, state.cur);}
entropy^.saved := state.cur;
end;
{ Huffman coding optimization.
We first scan the supplied data and count the number of uses of each symbol
that is to be Huffman-coded. (This process MUST agree with the code above.)
Then we build a Huffman coding tree for the observed counts.
Symbols which are not needed at all for the particular image are not
assigned any code, which saves space in the DHT marker as well as in
the compressed data. }
{$ifdef ENTROPY_OPT_SUPPORTED}
{ Process a single block's worth of coefficients }
{LOCAL}
procedure htest_one_block (cinfo : j_compress_ptr;
const block : JBLOCK;
last_dc_val : int;
dc_counts : TLongTablePtr;
ac_counts : TLongTablePtr);
var
{register} temp : int;
{register} nbits : int;
{register} k, r : int;
begin
{ Encode the DC coefficient difference per section F.1.2.1 }
temp := block[0] - last_dc_val;
if (temp < 0) then
temp := -temp;
{ Find the number of bits needed for the magnitude of the coefficient }
nbits := 0;
while (temp <> 0) do
begin
Inc(nbits);
temp := temp shr 1;
end;
{ Check for out-of-range coefficient values.
Since we're encoding a difference, the range limit is twice as much. }
if (nbits > MAX_COEF_BITS+1) then
ERREXIT(j_common_ptr(cinfo), JERR_BAD_DCT_COEF);
{ Count the Huffman symbol for the number of bits }
Inc(dc_counts^[nbits]);
{ Encode the AC coefficients per section F.1.2.2 }
r := 0; { r := run length of zeros }
for k := 1 to pred(DCTSIZE2) do
begin
temp := block[jpeg_natural_order[k]];
if (temp = 0) then
begin
Inc(r);
end
else
begin
{ if run length > 15, must emit special run-length-16 codes ($F0) }
while (r > 15) do
begin
Inc(ac_counts^[$F0]);
Dec(r, 16);
end;
{ Find the number of bits needed for the magnitude of the coefficient }
if (temp < 0) then
temp := -temp;
{ Find the number of bits needed for the magnitude of the coefficient }
nbits := 0; { there must be at least one 1 bit }
repeat
Inc(nbits);
temp := temp shr 1;
until (temp = 0);
{ Count Huffman symbol for run length / number of bits }
Inc(ac_counts^[(r shl 4) + nbits]);
r := 0;
end;
end;
{ If the last coef(s) were zero, emit an end-of-block code }
if (r > 0) then
Inc(ac_counts^[0]);
end;
{ Trial-encode one MCU's worth of Huffman-compressed coefficients.
No data is actually output, so no suspension return is possible. }
{METHODDEF}
function encode_mcu_gather (cinfo : j_compress_ptr;
const MCU_data: array of JBLOCKROW) : boolean;
var
entropy : huff_entropy_ptr;
blkn, ci : int;
compptr : jpeg_component_info_ptr;
begin
entropy := huff_entropy_ptr (cinfo^.entropy);
{ Take care of restart intervals if needed }
if (cinfo^.restart_interval <> 0) then
begin
if (entropy^.restarts_to_go = 0) then
begin
{ Re-initialize DC predictions to 0 }
for ci := 0 to pred(cinfo^.comps_in_scan) do
entropy^.saved.last_dc_val[ci] := 0;
{ Update restart state }
entropy^.restarts_to_go := cinfo^.restart_interval;
end;
Dec(entropy^.restarts_to_go);
end;
for blkn := 0 to pred(cinfo^.blocks_in_MCU) do
begin
ci := cinfo^.MCU_membership[blkn];
compptr := cinfo^.cur_comp_info[ci];
htest_one_block(cinfo, MCU_data[blkn]^[0],
entropy^.saved.last_dc_val[ci],
entropy^.dc_count_ptrs[compptr^.dc_tbl_no],
entropy^.ac_count_ptrs[compptr^.ac_tbl_no]);
entropy^.saved.last_dc_val[ci] := MCU_data[blkn]^[0][0];
end;
encode_mcu_gather := TRUE;
end;
{ Generate the best Huffman code table for the given counts, fill htbl.
Note this is also used by jcphuff.c.
The JPEG standard requires that no symbol be assigned a codeword of all
one bits (so that padding bits added at the end of a compressed segment
can't look like a valid code). Because of the canonical ordering of
codewords, this just means that there must be an unused slot in the
longest codeword length category. Section K.2 of the JPEG spec suggests
reserving such a slot by pretending that symbol 256 is a valid symbol
with count 1. In theory that's not optimal; giving it count zero but
including it in the symbol set anyway should give a better Huffman code.
But the theoretically better code actually seems to come out worse in
practice, because it produces more all-ones bytes (which incur stuffed
zero bytes in the final file). In any case the difference is tiny.
The JPEG standard requires Huffman codes to be no more than 16 bits long.
If some symbols have a very small but nonzero probability, the Huffman tree
must be adjusted to meet the code length restriction. We currently use
the adjustment method suggested in JPEG section K.2. This method is *not*
optimal; it may not choose the best possible limited-length code. But
typically only very-low-frequency symbols will be given less-than-optimal
lengths, so the code is almost optimal. Experimental comparisons against
an optimal limited-length-code algorithm indicate that the difference is
microscopic --- usually less than a hundredth of a percent of total size.
So the extra complexity of an optimal algorithm doesn't seem worthwhile. }
{GLOBAL}
procedure jpeg_gen_optimal_table (cinfo : j_compress_ptr;
htbl : JHUFF_TBL_PTR;
var freq : TLongTable);
const
MAX_CLEN = 32; { assumed maximum initial code length }
var
bits : array[0..MAX_CLEN+1-1] of UINT8; { bits[k] := # of symbols with code length k }
codesize : array[0..257-1] of int; { codesize[k] := code length of symbol k }
others : array[0..257-1] of int; { next symbol in current branch of tree }
c1, c2 : int;
p, i, j : int;
v : long;
begin
{ This algorithm is explained in section K.2 of the JPEG standard }
MEMZERO(@bits, SIZEOF(bits));
MEMZERO(@codesize, SIZEOF(codesize));
for i := 0 to 256 do
others[i] := -1; { init links to empty }
freq[256] := 1; { make sure 256 has a nonzero count }
{ Including the pseudo-symbol 256 in the Huffman procedure guarantees
that no real symbol is given code-value of all ones, because 256
will be placed last in the largest codeword category. }
{ Huffman's basic algorithm to assign optimal code lengths to symbols }
while TRUE do
begin
{ Find the smallest nonzero frequency, set c1 := its symbol }
{ In case of ties, take the larger symbol number }
c1 := -1;
v := long(1000000000);
for i := 0 to 256 do
begin
if (freq[i] <> 0) and (freq[i] <= v) then
begin
v := freq[i];
c1 := i;
end;
end;
{ Find the next smallest nonzero frequency, set c2 := its symbol }
{ In case of ties, take the larger symbol number }
c2 := -1;
v := long(1000000000);
for i := 0 to 256 do
begin
if (freq[i] <> 0) and (freq[i] <= v) and (i <> c1) then
begin
v := freq[i];
c2 := i;
end;
end;
{ Done if we've merged everything into one frequency }
if (c2 < 0) then
break;
{ Else merge the two counts/trees }
Inc(freq[c1], freq[c2]);
freq[c2] := 0;
{ Increment the codesize of everything in c1's tree branch }
Inc(codesize[c1]);
while (others[c1] >= 0) do
begin
c1 := others[c1];
Inc(codesize[c1]);
end;
others[c1] := c2; { chain c2 onto c1's tree branch }
{ Increment the codesize of everything in c2's tree branch }
Inc(codesize[c2]);
while (others[c2] >= 0) do
begin
c2 := others[c2];
Inc(codesize[c2]);
end;
end;
{ Now count the number of symbols of each code length }
for i := 0 to 256 do
begin
if (codesize[i]<>0) then
begin
{ The JPEG standard seems to think that this can't happen, }
{ but I'm paranoid... }
if (codesize[i] > MAX_CLEN) then
ERREXIT(j_common_ptr(cinfo), JERR_HUFF_CLEN_OVERFLOW);
Inc(bits[codesize[i]]);
end;
end;
{ JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
Huffman procedure assigned any such lengths, we must adjust the coding.
Here is what the JPEG spec says about how this next bit works:
Since symbols are paired for the longest Huffman code, the symbols are
removed from this length category two at a time. The prefix for the pair
(which is one bit shorter) is allocated to one of the pair; then,
skipping the BITS entry for that prefix length, a code word from the next
shortest nonzero BITS entry is converted into a prefix for two code words
one bit longer. }
for i := MAX_CLEN downto 17 do
begin
while (bits[i] > 0) do
begin
j := i - 2; { find length of new prefix to be used }
while (bits[j] = 0) do
Dec(j);
Dec(bits[i], 2); { remove two symbols }
Inc(bits[i-1]); { one goes in this length }
Inc(bits[j+1], 2); { two new symbols in this length }
Dec(bits[j]); { symbol of this length is now a prefix }
end;
end;
{ Delphi 2: FOR-loop variable 'i' may be undefined after loop }
i := 16; { Nomssi: work around }
{ Remove the count for the pseudo-symbol 256 from the largest codelength }
while (bits[i] = 0) do { find largest codelength still in use }
Dec(i);
Dec(bits[i]);
{ Return final symbol counts (only for lengths 0..16) }
MEMCOPY(@htbl^.bits, @bits, SIZEOF(htbl^.bits));
{ Return a list of the symbols sorted by code length }
{ It's not real clear to me why we don't need to consider the codelength
changes made above, but the JPEG spec seems to think this works. }
p := 0;
for i := 1 to MAX_CLEN do
begin
for j := 0 to 255 do
begin
if (codesize[j] = i) then
begin
htbl^.huffval[p] := UINT8 (j);
Inc(p);
end;
end;
end;
{ Set sent_table FALSE so updated table will be written to JPEG file. }
htbl^.sent_table := FALSE;
end;
{ Finish up a statistics-gathering pass and create the new Huffman tables. }
{METHODDEF}
procedure finish_pass_gather (cinfo : j_compress_ptr);
var
entropy : huff_entropy_ptr;
ci, dctbl, actbl : int;
compptr : jpeg_component_info_ptr;
htblptr : ^JHUFF_TBL_PTR;
did_dc : array[0..NUM_HUFF_TBLS-1] of boolean;
did_ac : array[0..NUM_HUFF_TBLS-1] of boolean;
begin
entropy := huff_entropy_ptr (cinfo^.entropy);
{ It's important not to apply jpeg_gen_optimal_table more than once
per table, because it clobbers the input frequency counts! }
MEMZERO(@did_dc, SIZEOF(did_dc));
MEMZERO(@did_ac, SIZEOF(did_ac));
for ci := 0 to pred(cinfo^.comps_in_scan) do
begin
compptr := cinfo^.cur_comp_info[ci];
dctbl := compptr^.dc_tbl_no;
actbl := compptr^.ac_tbl_no;
if (not did_dc[dctbl]) then
begin
htblptr := @(cinfo^.dc_huff_tbl_ptrs[dctbl]);
if ( htblptr^ = NIL) then
htblptr^ := jpeg_alloc_huff_table(j_common_ptr(cinfo));
jpeg_gen_optimal_table(cinfo, htblptr^, entropy^.dc_count_ptrs[dctbl]^);
did_dc[dctbl] := TRUE;
end;
if (not did_ac[actbl]) then
begin
htblptr := @(cinfo^.ac_huff_tbl_ptrs[actbl]);
if ( htblptr^ = NIL) then
htblptr^ := jpeg_alloc_huff_table(j_common_ptr(cinfo));
jpeg_gen_optimal_table(cinfo, htblptr^, entropy^.ac_count_ptrs[actbl]^);
did_ac[actbl] := TRUE;
end;
end;
end;
{$endif} { ENTROPY_OPT_SUPPORTED }
{ Module initialization routine for Huffman entropy encoding. }
{GLOBAL}
procedure jinit_huff_encoder (cinfo : j_compress_ptr);
var
entropy : huff_entropy_ptr;
i : int;
begin
entropy := huff_entropy_ptr(
cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
SIZEOF(huff_entropy_encoder)) );
cinfo^.entropy := jpeg_entropy_encoder_ptr (entropy);
entropy^.pub.start_pass := start_pass_huff;
{ Mark tables unallocated }
for i := 0 to pred(NUM_HUFF_TBLS) do
begin
entropy^.ac_derived_tbls[i] := NIL;
entropy^.dc_derived_tbls[i] := NIL;
{$ifdef ENTROPY_OPT_SUPPORTED}
entropy^.ac_count_ptrs[i] := NIL;
entropy^.dc_count_ptrs[i] := NIL;
{$endif}
end;
end;
end.