FUNCTION huffman,problist,symlist,$ Entropy=entropy,Avglen=avglen,Verbose=verbose,LATEX=latex ;+ ; C=Huffman(problist,symlist) makes a binary Huffman code ; ; symlist is a list of character symbols that corresponds to ; the list of probabilities. ; ; c=huffman(problist,symlist) returns an array of code strings ; associated with the symbols as determined by the Huffman coding ; algorithm. The code is returned in an array of structures sorted ; in order of descending probability. The structure structue tags ; are "codeword", "probability", "length" and "symbol". If the symlist is ; omitted then the symbol field is blank in each case. ; ; ;KEYWORDS ; ; ENTROPY ; c=huffman(problist,symlist,Entropy=entropy) causes a value to be ; returned for entropy of the code. ; ; AVGLEN ; c=huffman(problist,symlist,Entropy=entropy,AvgLen=avglen) causes a value to be ; returned for entropy and the average length of the code. ; ; VERBOSE ; Setting /VERBOSE causes the code progress to be printed out and the entropy and ; average codeword length to be reported at the end. ; ; LATEX ; Setting /LATEX causes a printout that can be cut and pasted into a LaTeX report. ; ; This version will only produce a binary code. ; ; ; EXAMPLE: ; p=[.2,.3,.1,.4] ; s=['a','b','c','d'] ; c=huffman(p,s) ; for n=0,3 DO print,format='(a2,2x,f3.1,1x,a4)',c[n].symbol,$ ; c[n].probability,c[n].codeword ; ; produces the printout shown below: ; ; d 0.4 0 ; b 0.3 10 ; a 0.2 111 ; c 0.1 110 ; ; HISTORY: ; Written by H. Rhody February, 1999 ; Modified March 2001 to make symlist a positional ; parameter and ENTROPY and AVGLEN keyword parameters ; and to include keyword VERBOSE. ; Modified May 2001 to add the length tag. ;- ; Determine the number of symbols in the code and ; set up the code structures. wcode is a working ; structure that we use in the Huffman algorithm and ; code is a structure that receives the code symbols. nwords=N_ELEMENTS(problist) a={probability:0.0,list:INTARR(nwords),nval:1} b={codeword:'',probability:0.0,symbol:'',length:0} code=REPLICATE(b,nwords) wcode=REPLICATE(a,nwords) ; Initialize the probability values wcode.probability=problist code.probability=problist ; Initialize the symbol list IF N_Params() GT 1 THEN code.symbol=symlist ; Arrange things in the order of increasing probability order=SORT(wcode.probability) wcode=wcode[order] code=code[order] ; Initialize the values in the tracking list FOR k=0,nwords-1 DO wcode[k].list[0]=k WHILE (N_ELEMENTS(wcode) GE 2) DO BEGIN If Keyword_Set(Verbose) THEN print,Format='(20(F5.3,2x))',Reverse(wcode.probability) ; Find the length of the tracking list for the ; two lowest probabilities. Then extract that ; section of each tracking list. A code symbol ; will be added to the words for these elements. n0=wcode[0].nval n1=wcode[1].nval c0=wcode[0].list[0:n0-1] c1=wcode[1].list[0:n1-1] ; Prepend a '0' to the codeword for each word in the ; lowest probability selection and a '1' for each word ; in the second lowest probability selection. code[c0].codeword='0'+code[c0].codeword code[c1].codeword='1'+code[c1].codeword ; Compute the sum of the two lowest probability values. ; We are going to merge these tracking lists. wcode[1].probability=TOTAL((wcode.probability)[0:1]) ; Concatenate the tracking lists for the two low probability ; items. clist=[wcode[0].list[0:n0-1],wcode[1].list[0:n1-1]] wcode[1].list[0:n0+n1-1]=clist ; Keep track of the number of elements in that list. wcode[1].nval=n0+n1 ; Drop the lowest probability element off the roster, now ; that it has been merged. wcode=wcode[1:N_ELEMENTS(wcode)-1] ; Reorder the new set of elements. ord=SORT(wcode.probability) wcode=wcode[ord] ENDWHILE ; ; Find the length of each codeword and put it in the length tag. ; For k=0,N_Elements(code)-1 DO code[k].length=StrLen(code[k].codeword) ; Make things pretty by putting the code in order of ; decreasing probabilities. This puts the short codes at the ; beginning. order=reverse(sort(code.probability)) ; Calculate the entropy p=code.probability ent=entropy(p) ; Calculate the average code length avglen=TOTAL(p*strlen(code.codeword)) IF KEYWORD_SET(Verbose) THEN BEGIN Print Print,'Average Codeword Length=',avglen Print,'Entropy=',ent Print,'Efficiency=',ent/avglen ENDIF IF KEYWORD_SET(Verbose) THEN BEGIN ;Print out a summary table PRINT FOR n=N_Elements(code)-1,0,-1 DO Print,$ format='(i2,1x,a2,1x,f5.3,1x,i2,1x,f5.2,1x,a10)',$ N_Elements(code)-n,code[n].symbol,code[n].probability,StrLen(code[n].codeword),$ StrLen(code[n].codeword)*code[n].probability,code[n].codeword ENDIF m=n_elements(code) IF KEYWORD_SET(LaTeX) THEN BEGIN Print,'\begin{tabular}{|l|l|l|l|l|}\hline' Print,'Event & $P(x_i)$ & Codeword & $l_i$ & $P(x_i)l_i$ \\ \hline' For n=N_Elements(code)-1,0,-1 DO Print,$ FORMAT='(A2,"$ & ",F5.3," & ",a10," & ",I2," & ",F6.3," \\")',$ code[n].symbol,code[n].probability,code[n].codeword,StrLen(code[n].codeword),$ StrLen(code[n].codeword)*code[n].probability Print,'\hline' Print,'\end{tabular}' ENDIF return,code[order] end