dslinux/user/pixil/packages/dvdview/dvdview/oldlibgfx/libvideogfx/graphics/lowlevel .cvsignore Makefile.am grey2rgb16mmx.cc grey2rgb16mmx.hh grey2rgb32mmx.cc grey2rgb32mmx.hh img2raw.cc img2raw.hh yuv2rgb16.cc yuv2rgb16.hh yuv2rgb16mmx.cc yuv2rgb16mmx.hh yuv2rgb32mmx.cc yuv2rgb32mmx.hh

amadeus dslinux_amadeus at user.in-berlin.de
Tue Oct 3 13:25:54 CEST 2006


Update of /cvsroot/dslinux/dslinux/user/pixil/packages/dvdview/dvdview/oldlibgfx/libvideogfx/graphics/lowlevel
In directory antilope:/tmp/cvs-serv11916/packages/dvdview/dvdview/oldlibgfx/libvideogfx/graphics/lowlevel

Added Files:
	.cvsignore Makefile.am grey2rgb16mmx.cc grey2rgb16mmx.hh 
	grey2rgb32mmx.cc grey2rgb32mmx.hh img2raw.cc img2raw.hh 
	yuv2rgb16.cc yuv2rgb16.hh yuv2rgb16mmx.cc yuv2rgb16mmx.hh 
	yuv2rgb32mmx.cc yuv2rgb32mmx.hh 
Log Message:
adding pristine copy of pixil to HEAD so I can branch from it

--- NEW FILE: .cvsignore ---
Makefile
Makefile.in
*.lo
_libs
.libs
.deps
libvideogfx-graphics-lowlevel.la
libvideogfx-graphics-lowlevel-mmx.la

--- NEW FILE: grey2rgb32mmx.hh ---
/*********************************************************************
  grey2rgb32mmx.hh

  purpose:

  notes:

  to do:

  author(s):
   - Dirk Farin, Kapellenweg 15, 72070 Tuebingen, Germany,
     email: farindk at trick.informatik.uni-stuttgart.de

  modifications:
   08/Mar/2000 - Dirk Farin - first implementation
 *********************************************************************/

#ifndef DISPLAY_GREY2RGB32MMX_HH
#define DISPLAY_GREY2RGB32MMX_HH

#include "img2raw.hh"


class i2r_grey_32bit_mmx : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_grey_32bit_mmx() { }

  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "grey to 32bit RGB, MMX accelerated"; }
};

#endif

--- NEW FILE: Makefile.am ---
## Makefile.am for libvideogfx/libvideogfx/graphics/lowlevel

if ENABLE_MMX
noinst_LTLIBRARIES = \
	libvideogfx-graphics-lowlevel.la	\
	libvideogfx-graphics-lowlevel-mmx.la
else
noinst_LTLIBRARIES = \
	libvideogfx-graphics-lowlevel.la
endif

libvideogfx_graphics_lowlevel_la_SOURCES = \
	img2raw.cc		\
	img2raw.hh		\
	yuv2rgb16.cc		\
	yuv2rgb16.hh

if ENABLE_MMX
libvideogfx_graphics_lowlevel_mmx_la_SOURCES = \
	grey2rgb16mmx.cc	\
	grey2rgb16mmx.hh	\
	grey2rgb32mmx.cc	\
	grey2rgb32mmx.hh	\
	yuv2rgb16mmx.cc		\
	yuv2rgb16mmx.hh		\
	yuv2rgb32mmx.cc		\
	yuv2rgb32mmx.hh
endif

INCLUDES = \
	-I$(top_srcdir)

.PHONY: files

files:
	@files=`ls $(DISTFILES) 2> /dev/null`; for p in $$files; do \
	  echo $$p; \
	done

--- NEW FILE: grey2rgb16mmx.hh ---
/*********************************************************************
  grey2rgb32mmx.hh

  purpose:

  notes:

  to do:

  author(s):
   - Dirk Farin, Kapellenweg 15, 72070 Tuebingen, Germany,
     email: farindk at trick.informatik.uni-stuttgart.de

  modifications:
   16/Jul/2000 - Dirk Farin - first implementation
 *********************************************************************/

#ifndef DISPLAY_GREY2RGB16MMX_HH
#define DISPLAY_GREY2RGB16MMX_HH

#include "img2raw.hh"


class i2r_grey_16bit_mmx : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_grey_16bit_mmx() { }

  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "grey to 16bit RGB, MMX accelerated"; }
};

#endif

--- NEW FILE: yuv2rgb16mmx.cc ---
/*
 *  yuv2rgb16mmx.cc
 */

#include <iostream.h>
#include <iomanip.h>

#include "yuv2rgb16mmx.hh"


bool i2r_16bit_mmx::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 16) return false;
  if (!spec.little_endian) return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma420) return false;

  int w = (param.width+7) & ~7;
  if (spec.bytes_per_line < 2*w) return false;

  return true;
}


void i2r_16bit_mmx::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  uint64 constants[20];

  const uint32 rmask=d_spec.r_mask;
  const uint32 gmask=d_spec.g_mask;
  const uint32 bmask=d_spec.b_mask;

  uint32 rshift,gshift,bshift;

  rshift = d_spec.r_shift;  rshift -= 8-d_spec.r_bits;  rshift -= 8;  rshift = -rshift;
  gshift = d_spec.g_shift;  gshift -= 8-d_spec.g_bits;  gshift -= 8;  gshift = -gshift;
  bshift = d_spec.b_shift;  bshift -= 8-d_spec.b_bits;  bshift -= 8;  bshift = -bshift;


  // ---------------------------------------

  constants[0] = 0x0080008000800080LL;     //   0  4x  128   // UV offs
  constants[1] = 0x1010101010101010LL;     //   8  8x   16   // Y offs
  constants[2] = 0x0066006600660066LL;     //  16  4x  102 =  409/4         // Cb  ->R
  constants[3] = 0x0034001900340019LL;     //  24  2x (52 25) = 208/4 100/4 // CbCr->G
  constants[4] = 0x0081008100810081LL;     //  32  4x  129 =  516/4         // Cb  ->B
  constants[5] = 0x004A004A004A004ALL;     //  40  4x   74 =  298/4         // Y mul

  //  6 tmp  0        //  48
  //  7 tmp  8        //  56
  //  8 tmp 16        //  64
  //  9 tmp 24        //  72
  // 10 tmp 32        //  80
  // 11 tmp 40        //  88

  static uint64 bitsconsts[9] =
  {
    0,
    0xfefefefefefefefeLL,     // 1 bit-Mask
    0xfcfcfcfcfcfcfcfcLL,     // 2 bit-Mask
    0xf8f8f8f8f8f8f8f8LL,     // 3 bit-Mask
    0xf0f0f0f0f0f0f0f0LL,     // 4 bit-Mask
    0xe0e0e0e0e0e0e0e0LL,     // 5 bit-Mask
    0xc0c0c0c0c0c0c0c0LL,     // 6 bit-Mask
    0x8080808080808080LL,     // 7 bit-Mask
    0
  };

  constants[12] = bitsconsts[d_spec.r_bits];   //  96
  constants[13] = bitsconsts[d_spec.g_bits];   // 104
  constants[14] = bitsconsts[d_spec.b_bits];   // 112
  constants[15] = (8-d_spec.r_bits)+6;         // 120
  constants[16] = (8-d_spec.g_bits)+6;         // 128
  constants[17] = (8-d_spec.b_bits)+6;         // 136
  constants[18] = d_spec.r_shift-8;            // 144
  constants[19] = d_spec.g_shift;              // 152



  // --------- TRANSFORM -----------

  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma420);

  assert((firstline%2)==0);
  //assert((lastline%2)==1);

  const Pixel*const* pix_y  = img.AskFrameY_const();
  const Pixel*const* pix_cb = img.AskFrameU_const();
  const Pixel*const* pix_cr = img.AskFrameV_const();

  int chr_w, chr_h;

  param.GetChromaSizes(chr_w,chr_h);

  const int h = param.height;
  const int w = param.width;

  int yskip = 2*img.AskBitmap_const(Image<Pixel>::Bitmap_Y ).AskInternalWidth() - w;
  int cskip =   img.AskBitmap_const(Image<Pixel>::Bitmap_Cb).AskInternalWidth() - w/2;
  int mskip = 2*d_spec.bytes_per_line - 2*w;

  const uint8*  yptr1 = (uint8*)pix_y[firstline];
  const uint8*  yptr2 = (uint8*)pix_y[firstline+1];
  const uint8*  cbptr = (uint8*)pix_cb[firstline/2];
  const uint8*  crptr = (uint8*)pix_cr[firstline/2];
  uint8* membuf_a = ((uint8*)(mem));
  uint8* membuf_b = ((uint8*)(mem))+d_spec.bytes_per_line;

  for (int y=firstline;y<=lastline;y+=2)
    {
      for (int x=0;x<w;x+=8)
        {
          __asm__ __volatile__
            (
             "movd        (%1),%%mm1\n\t"   // 4 Cb-Werte nach mm1
             " pxor       %%mm0,%%mm0\n\t"  // mm0=0
             "movd        (%2),%%mm2\n\t"   // 4 Cr-Werte nach mm2
             " punpcklbw  %%mm0,%%mm1\n\t"  // Cb-Werte in mm1 auf 16bit Breite bringen
             "psubw       (%3),%%mm1\n\t"   // Offset 128 von Cb-Werten abziehen
             " punpcklbw  %%mm0,%%mm2\n\t"  // Cr-Werte in mm2 auf 16bit Breite bringen
             "psubw       (%3),%%mm2\n\t"   // Offset 128 von Cr-Werten abziehen
             " movq       %%mm1,%%mm3\n\t"  // Kopie von Cb-Werten nach mm3
             "movq        %%mm1,%%mm5\n\t"  // ... und nach mm5
             " punpcklwd  %%mm2,%%mm1\n\t"  // in mm1 ist jetzt: LoCr1 LoCb1 LoCr2 LoCb2
             "pmaddwd     24(%3),%%mm1\n\t" // mm1 mit CbCr-MulAdd -> LoGimpact1 LoGimpact2
             " punpckhwd  %%mm2,%%mm3\n\t"  // in mm3 ist jetzt: HiCr1 HiCb1 HiCr2 HiCb2
             "pmaddwd     24(%3),%%mm3\n\t" // mm3 mit CbCr-MulAdd -> HiGimpact1 HiGimpact2
             "movq        %%mm2,48(%3)\n\t" // mm2 sichern (Cr)
             "movq        (%0),%%mm6\n\t"   // 8 Y-Pixel nach mm6
             "psubusb     8(%3),%%mm6\n\t"  // Y -= 16
             " packssdw   %%mm3,%%mm1\n\t"  // mm1 enthaelt nun 4x G-Impact
             "movq        %%mm6,%%mm7\n\t"  // Y-Pixel nach mm7 kopieren
             " punpcklbw  %%mm0,%%mm6\n\t"  // 4 low Y-Pixel nach mm6
             "pmullw      40(%3),%%mm6\n\t" // ... diese mit Ymul multiplizieren
             " punpckhbw  %%mm0,%%mm7\n\t"  // 4 high Y-Pixel nach mm7
             "pmullw      40(%3),%%mm7\n\t" // ... diese mit Ymul multiplizieren
             " movq       %%mm1,%%mm4\n\t"  // G-Impact nach mm4
             "movq        %%mm1,56(%3)\n\t" // G-Impact sichern
             " punpcklwd  %%mm1,%%mm1\n\t"  // beide low G-Impacts verdoppeln
             "movq        %%mm6,%%mm0\n\t"  // 4 low Y-Pixel nach mm0
             " punpckhwd  %%mm4,%%mm4\n\t"  // beide high G-Impacts verdoppeln
             "movq        %%mm7,%%mm3\n\t"  // 4 high Y-Pixel nach mm3
             " psubw      %%mm1,%%mm6\n\t"  // 4 low G in mm6 berechnen
             "psraw       128(%3),%%mm6\n\t"// G-Werte in mm6 in richtige Position bringen
             " psubw      %%mm4,%%mm7\n\t"  // 4 high G in mm7 berechnen
             "movq        %%mm5,%%mm2\n\t"  // 4 Cr-Werte nach mm2
             " punpcklwd  %%mm5,%%mm5\n\t"  // beide low Cr-Impacts verdoppeln
             "pmullw      32(%3),%%mm5\n\t" // 4 low B-Impacts berechnen
             " punpckhwd  %%mm2,%%mm2\n\t"  // beide high Cr-Impacts verdoppeln
             "psraw       128(%3),%%mm7\n\t"// G-Werte in mm7 in richtige Position bringen
             " pmullw     32(%3),%%mm2\n\t" // 4 high B-Impacts berechnen
             "packuswb    %%mm7,%%mm6\n\t"  // G-Werte in mm6 zusammenfassen
             "movq        %%mm5,64(%3)\n\t" // 4 low B-Impacts sichern
             " paddw      %%mm0,%%mm5\n\t"  // 4 low B in mm5 berechnen
             "movq        %%mm2,88(%3)\n\t" // 4 high B-Impacts sichern
             " paddw      %%mm3,%%mm2\n\t"  // 4 high B in mm2 berechnen
             "psraw       136(%3),%%mm5\n\t"// B-Werte in richtige Position bringen
             "psraw       136(%3),%%mm2\n\t"// B-Werte in richtige Position bringen
             "packuswb    %%mm2,%%mm5\n\t"  // B-Werte in mm5 zusammenfassen

             "movq        48(%3),%%mm2\n\t" // 4 Cr-Werte nach mm2
             "movq        %%mm2,%%mm7\n\t"  // 4 Cr-Werte nach mm7 kopieren
             " punpcklwd  %%mm2,%%mm2\n\t"  // 2 low Cr Werte verdoppeln
             "pmullw      16(%3),%%mm2\n\t" // 2 low R-Impacts berechnen
             " punpckhwd  %%mm7,%%mm7\n\t"  // 2 high Cr Werte verdoppeln
             "pmullw      16(%3),%%mm7\n\t" // 4 high R-Impacts berechnen
             "paddusb    112(%3),%%mm5\n\t" // B saettigen (nach oben)
             "movq        %%mm2,72(%3)\n\t" // 4 low R-Impacts sichern
             "paddw       %%mm0,%%mm2\n\t"  // 4 low R berechnen
             "psraw      120(%3),%%mm2\n\t" // 4 low R in richtige Position bringen
             " pxor       %%mm4,%%mm4\n\t"  // mm4=0
             "movq        %%mm7,80(%3)\n\t" // 4 high R-Impacts sichern
             " paddw      %%mm3,%%mm7\n\t"  // 4 high R berechnen
             "psraw      120(%3),%%mm7\n\t" // 4 high R in richtige Position bringen
             "psubusb    112(%3),%%mm5\n\t" // B saettigen (nach unten)
             " packuswb   %%mm7,%%mm2\n\t"  // R-Werte in mm2 zusammenfassen
             "paddusb    104(%3),%%mm6\n\t" // G saettigen
             "psubusb    104(%3),%%mm6\n\t"
             "paddusb     96(%3),%%mm2\n\t" // R saettigen
             "psubusb     96(%3),%%mm2\n\t"


             // Nun noch in richtiges Display-Format umwandeln.

             "psllq      144(%3),%%mm2\n\t" // R nach links schieben
             " movq      %%mm5,%%mm7\n\t"   // B nach mm7 kopieren
             "punpcklbw  %%mm2,%%mm5\n\t"   // 4 low R und B zusammenfassen (R-B)(R-B)(R-B)(R-B)
             " pxor      %%mm0,%%mm0\n\t"   // mm0=0
             "punpckhbw  %%mm2,%%mm7\n\t"   // 4 high R und B zusammenfassen
             " movq      %%mm6,%%mm3\n\t"   // G nach mm3
             "punpcklbw  %%mm0,%%mm6\n\t"   // 4 low G nach mm6
             "psllw      152(%3),%%mm6\n\t" // 4 low G in Position bringen
             "punpckhbw  %%mm0,%%mm3\n\t"   // 4 high G nach mm3
             " por       %%mm6,%%mm5\n\t"   // 4 low RGB16 nach mm5
             "psllw      152(%3),%%mm3\n\t" // 4 high G in Position bringen
             "por        %%mm3,%%mm7\n\t"   // 4 high RGB16 nach mm7

             : : "r" (yptr1), "r" (cbptr), "r" (crptr) , "r" (&constants[0])
             );


          __asm__ __volatile__
            (
             "movq       %%mm5, (%1)\n\t"   // die ersten 4 RGB16 Pixel schreiben

             // zweite der beiden Zeilen bearbeiten

             "movq       (%0),%%mm1\n\t"    // 8 Y-Pixel nach mm1
             " pxor      %%mm2,%%mm2\n\t"   // mm2=0
             "psubusb    8(%3),%%mm1\n\t"   // Y-Offset subtrahieren
             "movq       %%mm1,%%mm5\n\t"   // 8 Y nach mm5
             " punpcklbw %%mm2,%%mm1\n\t"   // 4 low Y nach mm1
             "pmullw     40(%3),%%mm1\n\t"  // 4 low Y mit Ymul multiplizieren
             " punpckhbw %%mm2,%%mm5\n\t"   // 4 high Y nach mm5
             "pmullw     40(%3),%%mm5\n\t"  // 4 high Y mit Ymul multiplizieren
             "movq       %%mm7,8(%1)\n\t"   // die zweiten 4 RGB16 Pixel schreiben
             " movq      %%mm1,%%mm0\n\t"   // 4 low Y nach mm0
             "paddw      72(%3),%%mm0\n\t"  // 4 low R-Impacts addieren -> 4 low R in mm0
             " movq      %%mm5,%%mm6\n\t"   // 4 high Y nach mm6
             "psraw      120(%3),%%mm0\n\t" // 4 low R in richtige Position schieben
             "paddw      80(%3),%%mm5\n\t"  // 4 high R-Impacts addieren -> 4 high R in mm5
             " movq      %%mm1,%%mm2\n\t"   // 4 low Y nach mm2
             "psraw      120(%3),%%mm5\n\t" // 4 high R in richtige Position schieben
             "paddw      64(%3),%%mm2\n\t"  // 4 low B-Impacts addieren -> 4 low B in mm2
             " packuswb  %%mm5,%%mm0\n\t"   // 8 R Werte zusammenfassen nach mm0
             "psraw      136(%3),%%mm2\n\t" // 4 low B in Position schieben
             " movq      %%mm6,%%mm5\n\t"   // 4 high Y nach mm5
             "paddw      88(%3),%%mm6\n\t"  // 4 high B-Impacts addieren -> 4 high B in mm6
             "psraw      136(%3),%%mm6\n\t" // 4 high B in richtige Position schieben
             "movq       56(%3),%%mm3\n\t"  // 4 low G-Impacts nach mm3
             "packuswb   %%mm6,%%mm2\n\t"   // 8 B Werte zusammenfassen nach mm2
             " movq      %%mm3,%%mm4\n\t"   // 4 low G-Impacts nach mm4 kopieren
             "punpcklwd  %%mm3,%%mm3\n\t"   // low 2 G-Impacts verdoppeln
             "punpckhwd  %%mm4,%%mm4\n\t"   // high 2 G-Impacts verdoppeln
             " psubw     %%mm3,%%mm1\n\t"   // 4 low G in mm1 berechnen
             "psraw      128(%3),%%mm1\n\t" // 4 low G in richtige Position schieben
             " psubw     %%mm4,%%mm5\n\t"   // 4 high G in mm5 berechnen
             "psraw      128(%3),%%mm5\n\t" // 4 high G in richtige Position schieben
             "paddusb    112(%3),%%mm2\n\t" // B nach oben saettigen
             " packuswb  %%mm5,%%mm1\n\t"   // 8 G Werte in mm1 zusammenfassen
             "psubusb    112(%3),%%mm2\n\t" // B nach unten saettigen
             "paddusb     96(%3),%%mm0\n\t" // R nach oben saettigen
             "psubusb     96(%3),%%mm0\n\t" // R nach unten saettigen
             "paddusb    104(%3),%%mm1\n\t" // G nach oben saettigen
             "psubusb    104(%3),%%mm1\n\t" // G nach unten saettigen

             // Nun noch in richtiges Display-Format umwandeln.

             "psllq      144(%3),%%mm0\n\t" // R nach links schieben
             " movq      %%mm2,%%mm7\n\t"   // B nach mm7 kopieren

             "punpcklbw  %%mm0,%%mm2\n\t"   // 4 low R und B zusammenfassen (R-B)(R-B)(R-B)(R-B)
             " pxor      %%mm4,%%mm4\n\t"   // mm4=0
             "movq       %%mm1,%%mm3\n\t"   // G nach mm3
             " punpckhbw %%mm0,%%mm7\n\t"   // 4 high R und B zusammenfassen
             "punpcklbw  %%mm4,%%mm1\n\t"   // 4 low G nach mm1
             "punpckhbw  %%mm4,%%mm3\n\t"   // 4 high G nach mm3
             "psllw      152(%3),%%mm1\n\t" // 4 low G in Position bringen
             " por       %%mm1,%%mm2\n\t"   // 4 low RGB16 nach mm2
             "psllw      152(%3),%%mm3\n\t" // 4 high G in Position bringen
             "por        %%mm3,%%mm7\n\t"   // 4 high RGB16 nach mm7

             "movq       %%mm2, (%2)\n\t"   // die ersten 4 RGB16 Pixel schreiben
             "movq       %%mm7,8(%2)\n\t"   // die zweiten 4 RGB16 Pixel schreiben

             : : "r" (yptr2), "r" (membuf_a), "r" (membuf_b) , "r" (&constants[0])
             );

          yptr1+=8;
          yptr2+=8;
          cbptr+=4;
          crptr+=4;
          membuf_a+=16;
          membuf_b+=16;
        }

      yptr1 += yskip;
      yptr2 += yskip;
      cbptr += cskip;
      crptr += cskip;
      membuf_a += mskip;
      membuf_b += mskip;
    }

  __asm__
    (
     "emms\n\t"
     );
}

--- NEW FILE: yuv2rgb16.cc ---
/*
 *  yuv2rgb16.cc
 */

#include <iostream.h>
#include <iomanip.h>

#include "yuv2rgb16.hh"


bool i2r_yuv_16bit::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 16) return false;
  if (!spec.little_endian) return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma420) return false;

  return true;
}

void i2r_yuv_16bit::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  const uint32 rmask=d_spec.r_mask;
  const uint32 gmask=d_spec.g_mask;
  const uint32 bmask=d_spec.b_mask;

  uint32 rshift,gshift,bshift;

  rshift = d_spec.r_shift;  rshift -= 8-d_spec.r_bits;  rshift -= 8;  rshift = -rshift;
  gshift = d_spec.g_shift;  gshift -= 8-d_spec.g_bits;  gshift -= 8;  gshift = -gshift;
  bshift = d_spec.b_shift;  bshift -= 8-d_spec.b_bits;  bshift -= 8;  bshift = -bshift;

  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma420);
  assert(firstline%2 == 0);

  const Pixel*const* yp = img.AskFrameY_const();
  const Pixel*const* up = img.AskFrameU_const();
  const Pixel*const* vp = img.AskFrameV_const();

  int chr_w, chr_h;

  //param.GetChromaSizes(chr_w,chr_h);

  bool fastversion = (rshift==0 &&  // red does not need shifting
                      bshift>=0);   // blue will be shifted right -> no need to mask it out

  if (fastversion)
    {
      for (int cy=firstline/2;cy<=lastline/2;cy++)
        {
          const Pixel*  yptr1 = yp[2*cy  ];
          const Pixel*  yptr2 = yp[2*cy+1];
          const Pixel*  uptr  = up[cy];
          const Pixel*  vptr  = vp[cy];
          uint16* membuf16a = ((uint16*)(mem+ 2*cy   *d_spec.bytes_per_line));
          uint16* membuf16b = ((uint16*)(mem+(2*cy+1)*d_spec.bytes_per_line));

          for (int cx=0;cx<chr_w;cx++)
            {
              int u=((int)*uptr++) -128;
              int v=((int)*vptr++) -128;
              
              int r0 = (int)(         + 409*v);
              int g0 = (int)( - 100*u - 208*v);
              int b0 = (int)( + 516*u        );

              int val;
              int yy=(((int)*yptr1++) -16)*298;

              int red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val = rmask; } else { val = (red)&rmask; }
              int green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              int blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift); }
              *membuf16a++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));

              yy=(((int)*yptr1++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val = rmask; } else { val = (red)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift); }
              *membuf16a++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
              
              yy=(((int)*yptr2++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val = rmask; } else { val = (red)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift); }
              *membuf16b++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
              
              yy=(((int)*yptr2++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val = rmask; } else { val = (red)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift); }
              *membuf16b++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
            }
        }
    }
  else
    {
      for (int cy=firstline/2;cy<=lastline/2;cy++)
        {
          const Pixel*  yptr1 = yp[2*cy  ];
          const Pixel*  yptr2 = yp[2*cy+1];
          const Pixel*  uptr  = up[cy];
          const Pixel*  vptr  = vp[cy];
          uint16* membuf16a = ((uint16*)(mem+ 2*cy   *d_spec.bytes_per_line));
          uint16* membuf16b = ((uint16*)(mem+(2*cy+1)*d_spec.bytes_per_line));

          for (int cx=0;cx<chr_w;cx++)
            {
              int u=((int)*uptr++) -128;
              int v=((int)*vptr++) -128;
              
              int r0 = (int)(         + 409*v);
              int g0 = (int)( - 100*u - 208*v);
              int b0 = (int)( + 516*u        );
              
              int val;
              int yy=(((int)*yptr1++) -16)*298;
              int red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val=rmask; } else { val = (red>>rshift)&rmask; }
              int green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              int blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift)&bmask; }
              *membuf16a++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
              
              yy=(((int)*yptr1++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val=rmask; } else { val = (red>>rshift)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift)&bmask; }
              *membuf16a++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
              
              yy=(((int)*yptr2++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val=rmask; } else { val = (red>>rshift)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift)&bmask; }
              *membuf16b++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
              
              yy=(((int)*yptr2++) -16)*298;
              red   = r0+yy;
              if (red<=0) { val=0; } else if (red>0xff00) { val=rmask; } else { val = (red>>rshift)&rmask; }
              green = g0+yy;
              if (green<=0) { } else if (green>0xff00) { val|=gmask; } else { val |= (green>>gshift)&gmask; }
              blue  = b0+yy;
              if (blue<=0) { } else if (blue>=0xff00) { val |= bmask; } else { val |= (blue>>bshift)&bmask; }
              *membuf16b++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
            }
        }
    }
}

--- NEW FILE: yuv2rgb32mmx.cc ---
/*
 *  yuv2rgb32mmx.cc
 */

#include <iostream.h>
#include <iomanip.h>

#include "yuv2rgb32mmx.hh"


static uint64 UVoffset   = 0x0080008000800080LL;     //   0  4x  128   // UV offs
static uint64 Yoffset    = 0x1010101010101010LL;     //   8  8x   16   // Y offs
static uint64 Cb2Rfact   = 0x0066006600660066LL;     //  16  4x  102 =  409/4         // Cb  ->R
static uint64 CbCr2Gfact = 0x0034001900340019LL;     //  24  2x (52 25) = 208/4 100/4 // CbCr->G
static uint64 Cb2Bfact   = 0x0081008100810081LL;     //  32  4x  129 =  516/4         // Cb  ->B
static uint64 Yfact      = 0x004A004A004A004ALL;     //  40  4x   74 =  298/4         // Y mul
static uint64 shift6bit  = 0x0000000000000006LL;     //  40  4x   74 =  298/4         // Y mul

static uint64 tmp_cr, tmp_rimpact, tmp_rimpact2, tmp_gimpact, tmp_bimpact, tmp_bimpact2;


bool i2r_32bit_BGR_mmx::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (!spec.little_endian) return false;
  if (spec.r_shift!=16 || spec.g_shift!= 8 || spec.b_shift!= 0) return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma420) return false;

  int w = (param.width+7) & ~7;
  if (spec.bytes_per_line < 4*w) return false;

  return true;
}

void i2r_32bit_BGR_mmx::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma420);
  assert((firstline%2) == 0);

  const Pixel*const* pix_y  = img.AskFrameY_const();
  const Pixel*const* pix_cb = img.AskFrameU_const();
  const Pixel*const* pix_cr = img.AskFrameV_const();

  const int h = param.height;
  const int w = param.width;

  for (int y=firstline;y<=lastline;y+=2)
    {
      const uint8*  yptr1 = (uint8*)pix_y[y];
      const uint8*  yptr2 = (uint8*)pix_y[y+1];
      const uint8*  cbptr = (uint8*)pix_cb[y/2];
      const uint8*  crptr = (uint8*)pix_cr[y/2];

      uint8* membuf_a = ((uint8*)(mem))+d_spec.bytes_per_line*(y-firstline);
      uint8* membuf_b = membuf_a+d_spec.bytes_per_line;

      for (int x=0;x<w;x+=8)
        {
          __asm__ __volatile__
            (
	     ".align 8 \n\t"
             "movd        (%1),%%mm1\n\t"   // 4 Cb-Werte nach mm1
             " pxor       %%mm0,%%mm0\n\t"  // mm0=0
             "movd        (%2),%%mm2\n\t"   // 4 Cr-Werte nach mm2
             " punpcklbw  %%mm0,%%mm1\n\t"  // Cb-Werte in mm1 auf 16bit Breite bringen
             "psubw       UVoffset,%%mm1\n\t"   // Offset 128 von Cb-Werten abziehen
             " punpcklbw  %%mm0,%%mm2\n\t"  // Cr-Werte in mm2 auf 16bit Breite bringen
             "psubw       UVoffset,%%mm2\n\t"   // Offset 128 von Cr-Werten abziehen

             " movq       %%mm1,%%mm3\n\t"  // Kopie von Cb-Werten nach mm3
             "movq        %%mm1,%%mm5\n\t"  // ... und nach mm5
             " punpcklwd  %%mm2,%%mm1\n\t"  // in mm1 ist jetzt: LoCr1 LoCb1 LoCr2 LoCb2
             "pmaddwd     CbCr2Gfact,%%mm1\n\t" // mm1 mit CbCr-MulAdd -> LoGimpact1 LoGimpact2
             " punpckhwd  %%mm2,%%mm3\n\t"  // in mm3 ist jetzt: HiCr1 HiCb1 HiCr2 HiCb2
             "pmaddwd     CbCr2Gfact,%%mm3\n\t" // mm3 mit CbCr-MulAdd -> HiGimpact1 HiGimpact2

             "movq        %%mm2,tmp_cr\n\t" // mm2 sichern (Cr)
             "movq        (%0),%%mm6\n\t"   // 8 Y-Pixel nach mm6
             "psubusb     Yoffset,%%mm6\n\t"  // Y -= 16
             " packssdw   %%mm3,%%mm1\n\t"  // mm1 enthaelt nun 4x G-Impact
             "movq        %%mm6,%%mm7\n\t"  // Y-Pixel nach mm7 kopieren
             " punpcklbw  %%mm0,%%mm6\n\t"  // 4 low Y-Pixel nach mm6
             "pmullw      Yfact,%%mm6\n\t" // ... diese mit Ymul multiplizieren
             " punpckhbw  %%mm0,%%mm7\n\t"  // 4 high Y-Pixel nach mm7
             "pmullw      Yfact,%%mm7\n\t" // ... diese mit Ymul multiplizieren
             " movq       %%mm1,%%mm4\n\t"  // G-Impact nach mm4
             "movq        %%mm1,tmp_gimpact\n\t" // G-Impact sichern
             " punpcklwd  %%mm1,%%mm1\n\t"  // beide low G-Impacts verdoppeln
             "movq        %%mm6,%%mm0\n\t"  // 4 low Y-Pixel nach mm0
             " punpckhwd  %%mm4,%%mm4\n\t"  // beide high G-Impacts verdoppeln
             "movq        %%mm7,%%mm3\n\t"  // 4 high Y-Pixel nach mm3
             " psubw      %%mm1,%%mm6\n\t"  // 4 low G in mm6 berechnen
               "psraw       shift6bit,%%mm6\n\t"// G-Werte in mm6 in richtige Position bringen
             " psubw      %%mm4,%%mm7\n\t"  // 4 high G in mm7 berechnen
             "movq        %%mm5,%%mm2\n\t"  // 4 Cr-Werte nach mm2
             " punpcklwd  %%mm5,%%mm5\n\t"  // beide low Cr-Impacts verdoppeln
             "pmullw      Cb2Bfact,%%mm5\n\t" // 4 low B-Impacts berechnen
             " punpckhwd  %%mm2,%%mm2\n\t"  // beide high Cr-Impacts verdoppeln
               "psraw       shift6bit,%%mm7\n\t"// G-Werte in mm7 in richtige Position bringen
             " pmullw     Cb2Bfact,%%mm2\n\t" // 4 high B-Impacts berechnen
             "packuswb    %%mm7,%%mm6\n\t"  // G-Werte in mm6 zusammenfassen

             "movq        %%mm5,tmp_bimpact\n\t" // 4 low B-Impacts sichern
             " paddw      %%mm0,%%mm5\n\t"  // 4 low B in mm5 berechnen
             "movq        %%mm2,tmp_bimpact2\n\t" // 4 high B-Impacts sichern
             " paddw      %%mm3,%%mm2\n\t"  // 4 high B in mm2 berechnen
               "psraw       shift6bit,%%mm5\n\t"// B-Werte in richtige Position bringen
               "psraw       shift6bit,%%mm2\n\t"// B-Werte in richtige Position bringen
             "packuswb    %%mm2,%%mm5\n\t"  // B-Werte in mm5 zusammenfassen

             "movq        tmp_cr,%%mm2\n\t" // 4 Cr-Werte nach mm2
             "movq        %%mm2,%%mm7\n\t"  // 4 Cr-Werte nach mm7 kopieren
             " punpcklwd  %%mm2,%%mm2\n\t"  // 2 low Cr Werte verdoppeln
             "pmullw      Cb2Rfact,%%mm2\n\t" // 2 low R-Impacts berechnen
             " punpckhwd  %%mm7,%%mm7\n\t"  // 2 high Cr Werte verdoppeln
             "pmullw      Cb2Rfact,%%mm7\n\t" // 4 high R-Impacts berechnen
             "movq        %%mm2,tmp_rimpact\n\t" // 4 low R-Impacts sichern
             "paddw       %%mm0,%%mm2\n\t"  // 4 low R berechnen
               "psraw      shift6bit,%%mm2\n\t" // 4 low R in richtige Position bringen
             " pxor       %%mm4,%%mm4\n\t"  // mm4=0
             "movq        %%mm7,tmp_rimpact2\n\t" // 4 high R-Impacts sichern
             " paddw      %%mm3,%%mm7\n\t"  // 4 high R berechnen
               "psraw      shift6bit,%%mm7\n\t" // 4 high R in richtige Position bringen
             " packuswb   %%mm7,%%mm2\n\t"  // R-Werte in mm2 zusammenfassen

	     //"movq        %%mm6,(%4)\n\t"


             // Nun noch in richtiges Display-Format umwandeln.

             : : "r" (yptr1), "r" (cbptr), "r" (crptr) 
             );


          __asm__ __volatile__
            (
	     ".align 8 \n\t"
	     "movq       %%mm2,%%mm7\n\t" // G
	     "movq       %%mm5,%%mm4\n\t" // B
	     "movq       %%mm6,%%mm3\n\t" // R

	     "pxor       %%mm0,%%mm0\n\t"
	     "punpcklbw  %%mm0,%%mm2\n\t"
	     "punpcklbw  %%mm6,%%mm5\n\t"
	     "movq       %%mm5,%%mm1\n\t"
	     "punpcklwd  %%mm2,%%mm5\n\t"
             "movq       %%mm5,  (%1)\n\t" // die ersten  2 RGB32 Pixel schreiben
	     "punpckhwd  %%mm2,%%mm1\n\t"
             "movq       %%mm1, 8(%1)\n\t" // die zweiten 2 RGB32 Pixel schreiben

	     "pxor       %%mm0,%%mm0\n\t"
	     "punpckhbw  %%mm0,%%mm7\n\t"
	     "punpckhbw  %%mm3,%%mm4\n\t"
	     "movq       %%mm4,%%mm2\n\t"
	     "punpcklwd  %%mm7,%%mm4\n\t"
             "movq       %%mm4,16(%1)\n\t" // die dritten 2 RGB32 Pixel schreiben
	     "punpckhwd  %%mm7,%%mm2\n\t"
             "movq       %%mm2,24(%1)\n\t" // die vierten 2 RGB32 Pixel schreiben


             // zweite der beiden Zeilen bearbeiten

             "movq       (%0),%%mm1\n\t"    // 8 Y-Pixel nach mm1
             " pxor      %%mm2,%%mm2\n\t"   // mm2=0
             "psubusb    Yoffset,%%mm1\n\t"   // Y-Offset subtrahieren
             "movq       %%mm1,%%mm5\n\t"   // 8 Y nach mm5
             " punpcklbw %%mm2,%%mm1\n\t"   // 4 low Y nach mm1
             "pmullw     Yfact,%%mm1\n\t"  // 4 low Y mit Ymul multiplizieren
             " punpckhbw %%mm2,%%mm5\n\t"   // 4 high Y nach mm5
             "pmullw     Yfact,%%mm5\n\t"  // 4 high Y mit Ymul multiplizieren
             " movq      %%mm1,%%mm0\n\t"   // 4 low Y nach mm0
             "paddw      tmp_rimpact,%%mm0\n\t"  // 4 low R-Impacts addieren -> 4 low R in mm0
             " movq      %%mm5,%%mm6\n\t"   // 4 high Y nach mm6
               "psraw      shift6bit,%%mm0\n\t" // 4 low R in richtige Position schieben
             "paddw      tmp_rimpact2,%%mm5\n\t"  // 4 high R-Impacts addieren -> 4 high R in mm5
             " movq      %%mm1,%%mm2\n\t"   // 4 low Y nach mm2
               "psraw      shift6bit,%%mm5\n\t" // 4 high R in richtige Position schieben
             "paddw      tmp_bimpact,%%mm2\n\t"  // 4 low B-Impacts addieren -> 4 low B in mm2
             " packuswb  %%mm5,%%mm0\n\t"   // 8 R Werte zusammenfassen nach mm0
               "psraw      shift6bit,%%mm2\n\t" // 4 low B in Position schieben
             " movq      %%mm6,%%mm5\n\t"   // 4 high Y nach mm5
             "paddw      tmp_bimpact2,%%mm6\n\t"  // 4 high B-Impacts addieren -> 4 high B in mm6
               "psraw      shift6bit,%%mm6\n\t" // 4 high B in richtige Position schieben
             "movq       tmp_gimpact,%%mm3\n\t"  // 4 low G-Impacts nach mm3
             "packuswb   %%mm6,%%mm2\n\t"   // 8 B Werte zusammenfassen nach mm2
             " movq      %%mm3,%%mm4\n\t"   // 4 low G-Impacts nach mm4 kopieren
             "punpcklwd  %%mm3,%%mm3\n\t"   // low 2 G-Impacts verdoppeln
             "punpckhwd  %%mm4,%%mm4\n\t"   // high 2 G-Impacts verdoppeln
             " psubw     %%mm3,%%mm1\n\t"   // 4 low G in mm1 berechnen
               "psraw      shift6bit,%%mm1\n\t" // 4 low G in richtige Position schieben
             " psubw     %%mm4,%%mm5\n\t"   // 4 high G in mm5 berechnen
               "psraw      shift6bit,%%mm5\n\t" // 4 high G in richtige Position schieben
             " packuswb  %%mm5,%%mm1\n\t"   // 8 G Werte in mm1 zusammenfassen

             // Nun noch in richtiges Display-Format umwandeln.

	     /*
	       6->1
	       2->0
	       5->2
	       3->3
	       4->4
	       7->7
	       0->6
	       1->5
	     */

	     "movq       %%mm0,%%mm7\n\t" // R
	     "movq       %%mm2,%%mm4\n\t" // B
	     "movq       %%mm1,%%mm3\n\t" // G

	     "pxor       %%mm6,%%mm6\n\t"
	     "punpcklbw  %%mm6,%%mm0\n\t"
	     "punpcklbw  %%mm1,%%mm2\n\t"
	     "movq       %%mm2,%%mm5\n\t"
	     "punpcklwd  %%mm0,%%mm2\n\t"
             "movq       %%mm2,  (%2)\n\t" // die ersten  2 RGB32 Pixel schreiben
	     "punpckhwd  %%mm0,%%mm5\n\t"
             "movq       %%mm5, 8(%2)\n\t" // die zweiten 2 RGB32 Pixel schreiben

	     "pxor       %%mm6,%%mm6\n\t"
	     "punpckhbw  %%mm6,%%mm7\n\t"
	     "punpckhbw  %%mm3,%%mm4\n\t"
	     "movq       %%mm4,%%mm0\n\t"
	     "punpcklwd  %%mm7,%%mm4\n\t"
             "movq       %%mm4,16(%2)\n\t" // die dritten 2 RGB32 Pixel schreiben
	     "punpckhwd  %%mm7,%%mm0\n\t"
             "movq       %%mm0,24(%2)\n\t" // die vierten 2 RGB32 Pixel schreiben

             : : "r" (yptr2), "r" (membuf_a), "r" (membuf_b) 
             );

          yptr1+=8;
          yptr2+=8;
          cbptr+=4;
          crptr+=4;
          membuf_a+=32;
          membuf_b+=32;
        }
    }

  __asm__
    (
     "emms\n\t"
     );
}

--- NEW FILE: img2raw.hh ---
/*********************************************************************
  libvideogfx/graphics/lowlevel/img2raw.hh

  purpose:
    Classes that allow image data stored in Image_*<Pixel> objects
    in either RGB or YUV format to be converted to a format the
    hardware directly understands. This can be (for example) the
    data ordering required for X11 XImages. Only RGB outputs
    are supported so far.

    Special hardware oriented convertion routines should be
    integrated here. The appropriate transformation is selected
    automagically.

  notes:

  to do:

  author(s):
   - Dirk Farin, farin at ti.uni-mannheim.de
     University Mannheim, Dept. Circuitry and Simulation
     B 6,26 EG, room 0.10 / D-68131 Mannheim / Germany

  modifications:
    18/Jul/2000 - Dirk Farin - complete reimplementation
    29/Jul/1999 - Dirk Farin - first implementation
 *********************************************************************/

#ifndef LIBVIDEOGFX_GRAPHICS_LOWLEVEL_IMG2RAW_HH
#define LIBVIDEOGFX_GRAPHICS_LOWLEVEL_IMG2RAW_HH

#include "libvideogfx/graphics/basic/image.hh"


void CalcBitsShift(uint32 mask,int& bits,int& shift);   /* mask=000011111000 -> bits=5; shift=3 */

/** Image manipulation operators that act on the fly.
    None of them is supported yet.
*/
struct RawImageOperator
{
  RawImageOperator() : resize_to_fixed(false), resize_with_factor(false), force_to_greyscale(false) { }

  bool resize_to_fixed;
  int  final_width,final_height;

  bool   resize_with_factor;
  double resize_factor;

  bool   force_to_greyscale; // REDUNDANT ?
};


/** RGB-output data format specification
 */
struct RawImageSpec_RGB : public RawImageOperator
{
  int  bytes_per_line;
  int  bits_per_pixel;
  bool little_endian;

  int r_mask,r_bits,r_shift;
  int g_mask,g_bits,g_shift;
  int b_mask,b_bits,b_shift;

  // example:   mask: 0xFF0000, bits: 8, shift = 16

  void SetRGBMasks(uint32 r,uint32 g,uint32 b)
    {
      CalcBitsShift(r_mask=r,r_bits,r_shift);
      CalcBitsShift(g_mask=g,g_bits,g_shift);
      CalcBitsShift(b_mask=b,b_bits,b_shift);
    }

  void Debug_ShowParam() const;
};


/** Image to raw-format converter class.
 */
class Image2Raw
{
public:
   Image2Raw();
  ~Image2Raw();

  void SetOutputSpec(const RawImageSpec_RGB& spec) { d_spec=spec; }

  void SetZoomFactor(double f=2)    { d_spec.resize_factor=f; d_spec.resize_with_factor = (f!=1.0); }
  void SetGrayscale(bool flag=true) { d_spec.force_to_greyscale = flag; }

  void TransformRGB(const Image_RGB<Pixel>&,uint8* mem,int firstline=0,int lastline=-1);
  void TransformYUV(const Image_YUV<Pixel>&,uint8* mem,int firstline=0,int lastline=-1);

private:
  RawImageSpec_RGB  d_spec;

  class Image2Raw_TransformYUV* d_last_yuv_transform;
  class Image2Raw_TransformRGB* d_last_rgb_transform;
};


// ---------------------------------------- only for implementation use ----------------------


class Image2Raw_TransformRGB
{
public:
  virtual ~Image2Raw_TransformRGB() { }
  virtual bool CanConvert(const Image_RGB<Pixel>&,const RawImageSpec_RGB&) = 0;
  void SetSpec(const RawImageSpec_RGB& spec) { d_spec=spec; }
  virtual void Transform(const Image_RGB<Pixel>&,uint8* mem,int firstline,int lastline) { assert(0); }

  virtual const char* TransformationName() = 0;

  static Image2Raw_TransformRGB* SelectTransform(const Image_RGB<Pixel>&,const RawImageSpec_RGB&);

protected:
  RawImageSpec_RGB d_spec;
};


class Image2Raw_TransformYUV
{
public:
  virtual ~Image2Raw_TransformYUV() { }
  virtual bool CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&) = 0;
  void SetSpec(const RawImageSpec_RGB& spec) { d_spec=spec; }
  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline) { assert(0); }

  virtual const char* TransformationName() = 0;

protected:
  RawImageSpec_RGB d_spec;
};

#endif

--- NEW FILE: grey2rgb16mmx.cc ---
/*
 *  grey2rgb16mmx.cc
 */

#include "grey2rgb16mmx.hh"


bool i2r_grey_16bit_mmx::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return NULL;
  if (spec.bits_per_pixel != 16) return NULL;
  if (!spec.little_endian) return NULL;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==false) return false;

  int w = (param.width+7) & ~7;
  if (spec.bytes_per_line < w) return false;

  return true;
}

void i2r_grey_16bit_mmx::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  uint64 constants[6];

  constants[0] = 16-d_spec.r_bits-d_spec.r_shift;
  constants[1] = 16-d_spec.g_bits-d_spec.g_shift;
  constants[2] = 16-d_spec.b_bits-d_spec.b_shift;

  assert(constants[0]==0); // Assume that R is aligned to the very left.

  uint64 mask;
  mask = d_spec.r_mask; constants[3] = mask | (mask<<16) | (mask<<32) | (mask<<48);
  mask = d_spec.g_mask; constants[4] = mask | (mask<<16) | (mask<<32) | (mask<<48);
  mask = d_spec.b_mask; constants[5] = mask | (mask<<16) | (mask<<32) | (mask<<48);


  ImageParam_YUV param;
  img.GetParam(param);

  //assert(param.nocolor);

  const Pixel*const* pix_y  = img.AskFrameY_const();

  const int h = param.height;
  const int w = param.width;

        uint8* dp;
  const uint8* sp;

  uint8* dpstart=mem;

  assert(w%8 == 0);

  for (int y=firstline;y<=lastline;y++)
    {
      sp = pix_y[y];

      dp=dpstart;
      dpstart+=d_spec.bytes_per_line;

      for (int x=0;x<w;x+=8)
        {
          __asm__ __volatile__
            (
	     "movq        (%0),%%mm1\n\t"   // 8 Pixel nach mm1  (ABCDEFGH)
	     " pxor       %%mm2,%%mm2\n\t"
	     "movq        %%mm1,%%mm4\n\t"  //         und nach mm4
	     " punpckhbw  %%mm1,%%mm2\n\t"  // A0B0C0D0 in mm2,mm4,mm6
	     "pxor        %%mm3,%%mm3\n\t"
	     " punpcklbw  %%mm4,%%mm3\n\t"  // E0F0G0H0 in mm3,mm5,mm7
	     "movq        %%mm2,%%mm4\n\t"
	     " movq       %%mm3,%%mm5\n\t"
             "psrlq       1*8(%2),%%mm5\n\t" // G nach rechts schieben
	     " movq       %%mm2,%%mm6\n\t"
             "psrlq       1*8(%2),%%mm4\n\t" // G nach rechts schieben
	     " movq       %%mm3,%%mm7\n\t"
             "psrlq       2*8(%2),%%mm6\n\t" // B nach rechts schieben
	     " pand       3*8(%2),%%mm2\n\t"
             "psrlq       2*8(%2),%%mm7\n\t" // B nach rechts schieben
	     " pand       3*8(%2),%%mm3\n\t"
	     "pand        4*8(%2),%%mm4\n\t"
	     " pand       4*8(%2),%%mm5\n\t"
	     "por         %%mm4,%%mm2\n\t"
	     " pand       5*8(%2),%%mm6\n\t"
	     "por         %%mm5,%%mm3\n\t"
	     " pand       5*8(%2),%%mm7\n\t"
	     "por         %%mm6,%%mm2\n\t"
	     " movq       %%mm2,8(%1)\n\t"
	     "por         %%mm7,%%mm3\n\t"
	     " movq       %%mm3,(%1)\n\t"
             : : "r" (sp), "r" (dp), "r" (&constants[0])
             );

	  sp += 8;
	  dp += 16;
        }
    }

  __asm__
    (
     "emms\n\t"
     );
}

--- NEW FILE: img2raw.cc ---
/*
 *  img2raw.cc
 */

#include <iostream.h>

#include "img2raw.hh"
#include "yuv2rgb16.hh"

#if ENABLE_MMX
#include "yuv2rgb16mmx.hh"
#include "yuv2rgb32mmx.hh"
#include "grey2rgb32mmx.hh"
#include "grey2rgb16mmx.hh"
#endif


#define SHOWLINE(x,y) cout << #x ": " << x << y

void RawImageSpec_RGB::Debug_ShowParam() const
{
  SHOWLINE(bytes_per_line,endl);
  SHOWLINE(bits_per_pixel,endl);
  cout << "little_endian: " << (little_endian ? "little" : "big") << endl;
  SHOWLINE(r_mask," "); SHOWLINE(r_bits," "); SHOWLINE(r_shift,endl);
  SHOWLINE(g_mask," "); SHOWLINE(g_bits," "); SHOWLINE(g_shift,endl);
  SHOWLINE(b_mask," "); SHOWLINE(b_bits," "); SHOWLINE(b_shift,endl);

  if (resize_to_fixed) { cout << "resize to fixed: " << final_width << "x" << final_height << endl; }
  if (resize_with_factor) { cout << "resize with factor: " << resize_factor << endl; }
  if (force_to_greyscale) cout << "force to greyscale\n";
}


// --------------------------------------------------------------------------------------------


/* Convert greyscale to RGB components in 32bit entities in arbitrary order.
 */
class i2r_grey_32bit : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_grey_32bit() { }
  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar grey to 32bit RGB"; }
};


bool i2r_grey_32bit::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (spec.r_shift%8   || spec.g_shift%8   || spec.b_shift%8)   return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==false) return false;

  return true;
}

void i2r_grey_32bit::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  ImageParam_YUV param;
  img.GetParam(param);

  const Pixel*const * pix_y = img.AskFrameY_const();

  for (int y=firstline;y<=lastline;y++)
    {
      uint32* membuf = (uint32*)(mem + d_spec.bytes_per_line*(y-firstline));

      for (int x=0;x<param.width;x++)
        {
	  uint32 val = pix_y[y][x];
	  val |= val<<16;
	  val |= val<< 8;

          *membuf++ = val;
        }
    }
}


// --------------------------------------------------------------------------------------------


/* Convert YUV 4:2:0 and place RGB components in 32bit entities in arbitrary order.
 */
class i2r_yuv_32bit : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_yuv_32bit() { }
  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar YUV to 32bit RGB"; }
};

bool i2r_yuv_32bit::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32 && spec.bits_per_pixel != 24) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (spec.r_shift%8   || spec.g_shift%8   || spec.b_shift%8)   return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma420) return false;

  return true;
}

static int* clip_0_255=NULL;
static int s_clip[1024];


static struct InitClip
{
  InitClip()
    {
      clip_0_255 = &s_clip[512];

      for (int i=-512;i<512;i++)
        {
          if (i<0)
            {
              clip_0_255[i]=0;
            }
          else if (i>255)
            {
              clip_0_255[i]=255;
            }
          else
            {
              clip_0_255[i]=i;
            }
        }
    }
} dummy_23874678;


void i2r_yuv_32bit::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  int rpos,gpos,bpos;

  const int bytes_per_pixel = d_spec.bits_per_pixel/8;
  const int lastidx = bytes_per_pixel-1;

  rpos = lastidx-d_spec.r_shift/8;
  gpos = lastidx-d_spec.g_shift/8;
  bpos = lastidx-d_spec.b_shift/8;

  if (d_spec.little_endian)
    {
      rpos = lastidx-rpos;
      gpos = lastidx-gpos;
      bpos = lastidx-bpos;
    }


  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma420);

  const Pixel*const * pix_y = img.AskFrameY_const();
  const Pixel*const * pix_u = img.AskFrameU_const();
  const Pixel*const * pix_v = img.AskFrameV_const();

  for (int y=firstline;y<=lastline;y+=2)
    {
      uint8* membuf8a = mem + d_spec.bytes_per_line*(y-firstline);
      uint8* membuf8b = mem + d_spec.bytes_per_line*y +d_spec.bytes_per_line;

      const Pixel* cbp = pix_u[y/2];
      const Pixel* crp = pix_v[y/2];
      const Pixel* y0p = pix_y[y  ];
      const Pixel* y1p = pix_y[y+1];

      for (int x=0;x<param.width;x+=2)
        {
          int cb=((int)*cbp++) -128;
          int cr=((int)*crp++) -128;

          int r0 = (int)(          + 409*cr);
          int g0 = (int)( - 100*cb - 208*cr);
          int b0 = (int)( + 516*cb         );
          
          int yy=(((int)*y0p++) -16)*298;
          int red   = (r0+yy)>>8; red  = clip_0_255[red];
          int green = (g0+yy)>>8; green= clip_0_255[green];
          int blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8a[rpos] = red;
          membuf8a[gpos] = green;
          membuf8a[bpos] = blue;
          membuf8a+=bytes_per_pixel;

          yy=(((int)*y0p++) -16)*298;
          red   = (r0+yy)>>8; red  = clip_0_255[red];
          green = (g0+yy)>>8; green= clip_0_255[green];
          blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8a[rpos] = red;
          membuf8a[gpos] = green;
          membuf8a[bpos] = blue;
          membuf8a+=bytes_per_pixel;
          
          yy=(((int)*y1p++) -16)*298;
          red   = (r0+yy)>>8; red  = clip_0_255[red];
          green = (g0+yy)>>8; green= clip_0_255[green];
          blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8b[rpos] = red;
          membuf8b[gpos] = green;
          membuf8b[bpos] = blue;
          membuf8b+=bytes_per_pixel;
          
          yy=(((int)*y1p++) -16)*298;
          red   = (r0+yy)>>8; red  = clip_0_255[red];
          green = (g0+yy)>>8; green= clip_0_255[green];
          blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8b[rpos] = red;
          membuf8b[gpos] = green;
          membuf8b[bpos] = blue;
          membuf8b+=bytes_per_pixel;
        }
    }
}


// --------------------------------------------------------------------------------------------

/* Convert YUV 4:2:2 and place RGB components in 32bit entities in arbitrary order.
 */
class i2r_yuv422_32bit : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_yuv422_32bit() { }
  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar 4:2:2-YUV to 32bit RGB"; }
};

bool i2r_yuv422_32bit::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (spec.r_shift%8   || spec.g_shift%8   || spec.b_shift%8)   return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma422) return false;

  return true;
}

void i2r_yuv422_32bit::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  int rpos,gpos,bpos;

  rpos = 3-d_spec.r_shift/8;
  gpos = 3-d_spec.g_shift/8;
  bpos = 3-d_spec.b_shift/8;

  if (d_spec.little_endian)
    {
      rpos = 3-rpos;
      gpos = 3-gpos;
      bpos = 3-bpos;
    }


  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma422);

  const Pixel*const * pix_y = img.AskFrameY_const();
  const Pixel*const * pix_u = img.AskFrameU_const();
  const Pixel*const * pix_v = img.AskFrameV_const();

  for (int y=firstline;y<=lastline;y++)
    {
      uint8* membuf8a = mem + d_spec.bytes_per_line*(y-firstline);

      const Pixel* cbp = pix_u[y];
      const Pixel* crp = pix_v[y];
      const Pixel* y0p = pix_y[y];

      for (int x=0;x<param.width;x+=2)
        {
          int cb=((int)*cbp++) -128;
          int cr=((int)*crp++) -128;

          int r0 = (int)(          + 409*cr);
          int g0 = (int)( - 100*cb - 208*cr);
          int b0 = (int)( + 516*cb         );
          
          int yy=(((int)*y0p++) -16)*298;
          int red   = (r0+yy)>>8; red  = clip_0_255[red];
          int green = (g0+yy)>>8; green= clip_0_255[green];
          int blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8a[rpos] = red;
          membuf8a[gpos] = green;
          membuf8a[bpos] = blue;
          membuf8a+=4;

          yy=(((int)*y0p++) -16)*298;
          red   = (r0+yy)>>8; red  = clip_0_255[red];
          green = (g0+yy)>>8; green= clip_0_255[green];
          blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8a[rpos] = red;
          membuf8a[gpos] = green;
          membuf8a[bpos] = blue;
          membuf8a+=4;
        }
    }
}

// --------------------------------------------------------------------------------------------

/* Convert YUV 4:4:4 and place RGB components in 32bit entities in arbitrary order.
 */
class i2r_yuv444_32bit : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_yuv444_32bit() { }
  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar 4:4:4-YUV to 32bit RGB"; }
};

bool i2r_yuv444_32bit::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (spec.r_shift%8   || spec.g_shift%8   || spec.b_shift%8)   return false;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==true) return false;
  if (param.chroma !=Chroma444) return false;

  return true;
}

void i2r_yuv444_32bit::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  int rpos,gpos,bpos;

  rpos = 3-d_spec.r_shift/8;
  gpos = 3-d_spec.g_shift/8;
  bpos = 3-d_spec.b_shift/8;

  if (d_spec.little_endian)
    {
      rpos = 3-rpos;
      gpos = 3-gpos;
      bpos = 3-bpos;
    }


  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.chroma==Chroma444);

  const Pixel*const * pix_y = img.AskFrameY_const();
  const Pixel*const * pix_u = img.AskFrameU_const();
  const Pixel*const * pix_v = img.AskFrameV_const();

  for (int y=firstline;y<=lastline;y++)
    {
      uint8* membuf8a = mem + d_spec.bytes_per_line*(y-firstline);

      const Pixel* cbp = pix_u[y];
      const Pixel* crp = pix_v[y];
      const Pixel* y0p = pix_y[y];

      for (int x=0;x<param.width;x++)
        {
          int cb=((int)*cbp++) -128;
          int cr=((int)*crp++) -128;

          int r0 = (int)(          + 409*cr);
          int g0 = (int)( - 100*cb - 208*cr);
          int b0 = (int)( + 516*cb         );
          
          int yy=(((int)*y0p++) -16)*298;
          int red   = (r0+yy)>>8; red  = clip_0_255[red];
          int green = (g0+yy)>>8; green= clip_0_255[green];
          int blue  = (b0+yy)>>8; blue = clip_0_255[blue];
          membuf8a[rpos] = red;
          membuf8a[gpos] = green;
          membuf8a[bpos] = blue;
          membuf8a+=4;
        }
    }
}

// --------------------------------------------------------------------------------------------

/* Place RGB components in 32bit entities in arbitrary order.
   For the special cases that can be handled by the classes i2r_xrgb and i2r_xbgr you
   should use them as they are a bit faster.
 */
class i2r_32bit : public Image2Raw_TransformRGB
{
public:
  virtual ~i2r_32bit() { }
  virtual void Transform(const Image_RGB<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_RGB<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_RGB<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar 32bit RGB"; }
};

bool i2r_32bit::s_CanConvert(const Image_RGB<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 32 && spec.bits_per_pixel != 24) return false;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return false;
  if (spec.r_shift%8   || spec.g_shift%8   || spec.b_shift%8)   return false;

  return true;
}

void i2r_32bit::Transform(const Image_RGB<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  int rpos,gpos,bpos;

  const int bytes_per_pixel = d_spec.bits_per_pixel/8;
  const int lastidx = bytes_per_pixel-1;

  rpos = lastidx-d_spec.r_shift/8;
  gpos = lastidx-d_spec.g_shift/8;
  bpos = lastidx-d_spec.b_shift/8;

  if (d_spec.little_endian)
    {
      rpos = lastidx-rpos;
      gpos = lastidx-gpos;
      bpos = lastidx-bpos;
    }

  ImageParam param;
  img.GetParam(param);

  const Pixel*const* r = img.AskFrameR_const();
  const Pixel*const* g = img.AskFrameG_const();
  const Pixel*const* b = img.AskFrameB_const();

  for (int y=firstline;y<=lastline;y++)
    {
      uint8* p = mem+y*d_spec.bytes_per_line;

      int yy = y-firstline;

      for (int x=0;x<param.width;x++)
	{
	  p[rpos] = r[yy][x];
	  p[gpos] = g[yy][x];
	  p[bpos] = b[yy][x];
	  p+=bytes_per_pixel;
	}
    }
}

// --------------------------------------------------------------------------------------------

/* Place RGB components in 16bit entities in arbitrary order.
   For the special cases that can be handled by the classes i2r_xrgb and i2r_xbgr you
   should use them as they are a bit faster.
 */
class i2r_16bit : public Image2Raw_TransformRGB
{
public:
  virtual ~i2r_16bit() { }
  virtual void Transform(const Image_RGB<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_RGB<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_RGB<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar 16bit RGB"; }
};

bool i2r_16bit::s_CanConvert(const Image_RGB<Pixel>& img,const RawImageSpec_RGB& spec)
{
  int rshift = spec.r_shift+(spec.r_bits-8);
  int gshift = spec.g_shift+(spec.g_bits-8);
  int bshift = spec.b_shift+(spec.b_bits-8);

  if (rshift<0 || gshift<0) return false;
  if (bshift>0) return false;

  if (spec.resize_to_fixed || spec.resize_with_factor) return false;
  if (spec.bits_per_pixel != 16) return false;

  return true;
}

void i2r_16bit::Transform(const Image_RGB<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  int rpos,gpos,bpos;

  ImageParam param;
  img.GetParam(param);

  const Pixel*const* r = img.AskFrameR_const();
  const Pixel*const* g = img.AskFrameG_const();
  const Pixel*const* b = img.AskFrameB_const();

  int rshift = d_spec.r_shift+(d_spec.r_bits-8);
  int gshift = d_spec.g_shift+(d_spec.g_bits-8);
  int bshift = d_spec.b_shift+(d_spec.b_bits-8);

  bshift = -bshift;

  for (int y=firstline;y<=lastline;y++)
    {
      uint16* p = (uint16*)(mem+y*d_spec.bytes_per_line);

      int yy = y-firstline;

      for (int x=0;x<param.width;x++)
	{
	  uint16 val;

	  val  = (r[yy][x]<<rshift) & d_spec.r_mask;
	  val |= (g[yy][x]<<gshift) & d_spec.g_mask;
	  val |= (b[yy][x]>>bshift); // & d_spec.b_mask;

	  *p++ = (d_spec.little_endian ? ToLittleEndian((uint16)val) : ToBigEndian((uint16)val));
	}
    }
}

// --------------------------------------------------------------------------------------------


void CalcBitsShift(uint32 mask,int& bits,int& shift)
{
  assert(mask!=0);

  shift=0;
  while ((mask&1)==0) { shift++; mask>>=1; }
  bits=0;
  while (mask&1) { bits++; mask>>=1; }

  assert(mask==0); // This may fail if there are more than one continuous sequences if ones (like 000111000011100).
}


Image2Raw::Image2Raw()
  : d_last_yuv_transform(NULL),
    d_last_rgb_transform(NULL)
{
}


Image2Raw::~Image2Raw()
{
  if (d_last_yuv_transform) delete d_last_yuv_transform;
  if (d_last_rgb_transform) delete d_last_rgb_transform;
}


void Image2Raw::TransformRGB(const Image_RGB<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  Image2Raw_TransformRGB* t=NULL;

  if (d_last_rgb_transform)
    {
      if (d_last_rgb_transform->CanConvert(img,d_spec))
	{
	  // cout << "reusing old transform\n";
	  t = d_last_rgb_transform;
	  goto found2;
	}
      else
	{
	  delete d_last_rgb_transform;
	  d_last_rgb_transform=NULL;
	}
    }

  //cout << "searching for transform to match:\n";
  //d_spec.Debug_ShowParam();

  if (i2r_32bit::s_CanConvert(img,d_spec)) { t = new i2r_32bit; goto found; }
  if (i2r_16bit::s_CanConvert(img,d_spec)) { t = new i2r_16bit; goto found; }

  throw "no suitable colorspace transformation found!\n";
  assert(0); // no transformation found;
  return;

found:
  //cout << "RGB transformation used: " << t->TransformationName() << endl;
found2:
  t->SetSpec(d_spec);

  if (lastline==-1)
    {
      ImageParam param;
      img.GetParam(param);
      lastline=param.height-1;
    }
  t->Transform(img,mem,firstline,lastline);

  d_last_rgb_transform = t;
}

void Image2Raw::TransformYUV(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  Image2Raw_TransformYUV* t=NULL;

  if (d_last_yuv_transform)
    {
      if (d_last_yuv_transform->CanConvert(img,d_spec))
	{
	  // cout << "reusing old transform\n";
	  t = d_last_yuv_transform;
	  goto found2;
	}
      else
	{
	  delete d_last_yuv_transform;
	  d_last_yuv_transform=NULL;
	}
    }

  //cout << "searching for transform to match:\n";
  //d_spec.Debug_ShowParam();

#if ENABLE_MMX
  if (i2r_32bit_BGR_mmx ::s_CanConvert(img,d_spec)) { t = new i2r_32bit_BGR_mmx;  goto found; }
  if (i2r_grey_32bit_mmx::s_CanConvert(img,d_spec)) { t = new i2r_grey_32bit_mmx; goto found; }
  if (i2r_16bit_mmx::s_CanConvert(img,d_spec)) { t = new i2r_16bit_mmx; goto found; }
  if (i2r_grey_16bit_mmx::s_CanConvert(img,d_spec)) { t = new i2r_grey_16bit_mmx; goto found; }

#endif

  if (i2r_yuv_32bit   ::s_CanConvert(img,d_spec)) { t = new i2r_yuv_32bit;    goto found; }
  if (i2r_grey_32bit  ::s_CanConvert(img,d_spec)) { t = new i2r_grey_32bit;   goto found; }
  if (i2r_yuv422_32bit::s_CanConvert(img,d_spec)) { t = new i2r_yuv422_32bit; goto found; }
  if (i2r_yuv444_32bit::s_CanConvert(img,d_spec)) { t = new i2r_yuv444_32bit; goto found; }
  if (i2r_yuv_16bit   ::s_CanConvert(img,d_spec)) { t = new i2r_yuv_16bit;    goto found; }
  

  throw "no suitable colorspace transformation found!\n";
  assert(0); // no transformation found;
  return;

found:
  //cout << "YUV transformation used: " << t->TransformationName() << endl;
found2:
  t->SetSpec(d_spec);

  if (lastline==-1)
    {
      ImageParam_YUV param;
      img.GetParam(param);
      lastline=param.height-1;
    }
  t->Transform(img,mem,firstline,lastline);

  d_last_yuv_transform = t;
}

--- NEW FILE: yuv2rgb32mmx.hh ---
/*********************************************************************
  yuv2rgb32mmx.hh

  purpose:

  notes:

  to do:

  author(s):
   - Dirk Farin, Kapellenweg 15, 72070 Tuebingen, Germany,
     email: farindk at trick.informatik.uni-stuttgart.de

  modifications:
   13/Apr/00 - Dirk Farin
     - first implementation based on 16bit version
 *********************************************************************/

#ifndef DISPLAY_YUV2RGB32MMX_HH
#define DISPLAY_YUV2RGB32MMX_HH

#include "img2raw.hh"


class i2r_32bit_BGR_mmx : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_32bit_BGR_mmx() { }

  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "YUV to 32bit RGB, (BGR format), MMX accelerated"; }

private:
};

#endif

--- NEW FILE: grey2rgb32mmx.cc ---
/*
 *  grey2rgb32mmx.cc
 */

#include "grey2rgb32mmx.hh"


bool i2r_grey_32bit_mmx::s_CanConvert(const Image_YUV<Pixel>& img,const RawImageSpec_RGB& spec)
{
  if (spec.resize_to_fixed || spec.resize_with_factor) return NULL;
  if (spec.bits_per_pixel != 32) return NULL;
  if (spec.r_bits != 8 || spec.g_bits != 8 || spec.b_bits != 8) return NULL;
  if ((spec.r_shift%8) || (spec.g_shift%8) || (spec.b_shift%8)) return NULL;

  ImageParam_YUV param;
  img.GetParam(param);

  if (param.nocolor==false) return false;

  int w = (param.width+7) & ~7;

  if (spec.bytes_per_line < 4*w) return false;

  return true;
}

void i2r_grey_32bit_mmx::Transform(const Image_YUV<Pixel>& img,uint8* mem,int firstline,int lastline)
{
  ImageParam_YUV param;
  img.GetParam(param);

  assert(param.nocolor);

  const Pixel*const* pix_y  = img.AskFrameY_const();

  const int h = param.height;
  const int w = param.width;

        uint8* dp;
  const uint8* sp;

  uint8* dpstart=mem;

  //assert(w%8 == 0);

  for (int y=firstline;y<=lastline;y++)
    {
      sp = pix_y[y];

      dp=dpstart;
      dpstart+=d_spec.bytes_per_line;

      for (int x=0;x<w;x+=8)
        {
          __asm__ __volatile__
            (
	     "movq        (%0),%%mm0\n\t"   // 8 Pixel nach mm0  (ABCDEFGH)
	     "movq        %%mm0,%%mm4\n\t"  //         und nach mm4
	     "punpckhbw   %%mm0,%%mm0\n\t"  // AABBCCDD in mm0,mm2
	     " punpcklbw  %%mm4,%%mm4\n\t"  // EEFFGGHH in mm4,mm6
	     "movq        %%mm0,%%mm2\n\t"  //
             " movq       %%mm4,%%mm6\n\t"  //
	     "punpckhbw   %%mm0,%%mm0\n\t"  // AAAABBBB in mm0
	     " punpckhbw  %%mm4,%%mm4\n\t"  // EEEEFFFF in mm4
	     "movq        %%mm4,8(%1)\n\t"  // EF->mem
             " punpcklbw  %%mm2,%%mm2\n\t"  // CCCCDDDD in mm2
	     "movq        %%mm0,24(%1)\n\t" // AB->mem
	     " punpcklbw  %%mm6,%%mm6\n\t"  // GGGGHHHH in mm6
	     "movq        %%mm2,16(%1)\n\t" // CD->mem
	     "movq        %%mm6,(%1)\n\t"   // GH->mem
             : : "r" (sp), "r" (dp)
             );

	  sp += 8;
	  dp += 32;
        }
    }

  __asm__
    (
     "emms\n\t"
     );
}

--- NEW FILE: yuv2rgb16.hh ---
/*********************************************************************
  libvideogfx/graphics/lowlevel/yuv2rgb16.hh

  purpose:
    Transform YUV data into 16bit true color RGB raw data.
    Every bit organization in a 16bit field and endianess
    translation is supported.

  notes:

  to do:

  author(s):
   - Dirk Farin, farin at ti.uni-mannheim.de
     University Mannheim, Dept. Circuitry and Simulation
     B 6,26 EG, room 0.10 / D-68131 Mannheim / Germany

  modifications:
    08/Aug/1999 - Dirk Farin - code imported from DVDview and
                               slightly modified
 *********************************************************************/

#ifndef LIBVIDEOGFX_GRAPHICS_LOWLEVEL_YUV2RGB16_HH
#define LIBVIDEOGFX_GRAPHICS_LOWLEVEL_YUV2RGB16_HH

#include "img2raw.hh"


class i2r_yuv_16bit : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_yuv_16bit() { }

  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "scalar YUV to 16bit RGB"; }

private:
};

#endif

--- NEW FILE: yuv2rgb16mmx.hh ---
/*********************************************************************
  yuv2rgb16mmx.hh

  purpose:

  notes:

  to do:

  author(s):
   - Dirk Farin, Kapellenweg 15, 72070 Tuebingen, Germany,
     email: farindk at trick.informatik.uni-stuttgart.de

  modifications:
   01/Feb/99 - Dirk Farin
     - interface definition
 *********************************************************************/

#ifndef DISPLAY_YUV2RGB16MMX_HH
#define DISPLAY_YUV2RGB16MMX_HH

#include "img2raw.hh"


class i2r_16bit_mmx : public Image2Raw_TransformYUV
{
public:
  virtual ~i2r_16bit_mmx() { }

  virtual void Transform(const Image_YUV<Pixel>&,uint8* mem,int firstline,int lastline);

  static bool s_CanConvert(const Image_YUV<Pixel>&,const RawImageSpec_RGB&);
  virtual bool CanConvert(const Image_YUV<Pixel>& i,const RawImageSpec_RGB& s) { return s_CanConvert(i,s); }

  virtual const char* TransformationName() { return "YUV to 16bit RGB, MMX accelerated"; }
};

#endif




More information about the dslinux-commit mailing list