diff --git a/Engine/lib/libtheora/CHANGES b/Engine/lib/libtheora/CHANGES
index 74183d91b..b30327e63 100644
--- a/Engine/lib/libtheora/CHANGES
+++ b/Engine/lib/libtheora/CHANGES
@@ -1,3 +1,65 @@
+libtheora 1.1.1 (2009 October 1)
+
+ - Fix problems with MSVC inline assembly
+ - Add the missing encoder_disabled.c to the distribution
+ - build updates: autogen.sh should work better after switching systems
+ and the MSVC project now defaults to the dynamic runtime library
+ - Namespace some variables to avoid conflicts on wince.
+
+libtheora 1.1.0 (2009 September 24)
+
+ - Fix various small issues with the example and telemetry code
+ - Fix handing a zero-byte packet as the first frame
+ - Documentation cleanup
+ - Two minor build fixes
+
+libtheora 1.1beta3 (2009 August 22)
+
+ - Rate control fixes to smooth quality
+ - MSVC build now exports all of the 1.0 api
+ - Assorted small bug fixes
+
+libtheora 1.1beta2 (2009 August 12)
+
+ - Fix a rate control problem with difficult input
+ - Build fixes for OpenBSD and Apple Xcode
+ - Examples now all use the 1.0 api
+ - TH_ENCCTL_SET_SPLEVEL works again
+ - Various bug fixes and source tree rearrangement
+
+libtheora 1.1beta1 (2009 August 5)
+
+ - Support for two-pass encoding
+ - Performance optimization of both encoder and decoder
+ - Encoder supports dynamic adjustment of quality and
+ bitrate targets
+ - Encoder is generally more configurable, and all
+ rate control modes perform better
+ - Encoder now accepts 4:2:2 and 4:4:4 chroma sampling
+ - Decoder telemetry output shows quantization choice
+ and a breakdown of bitrate usage in the frame
+ - MSVC assembly optimizations up to date and functional
+
+libtheora 1.1alpha2 (2009 May 26)
+
+ - Reduce lambda for small quantizers.
+ - New encoder fDCT does better on smooth gradients
+ - Use SATD for mode decisions (1-2% bitrate reduction)
+ - Assembly rewrite for new features and general speed up
+ - Share code between the encoder and decoder for performance
+ - Fix 4:2:2 decoding and telemetry
+ - MSVC project files updated, but assembly is disabled.
+ - New configure option --disable-spec to work around toolchain
+ detection failures.
+ - Limit symbol exports on MacOS X.
+ - Port remaining unit tests from the 1.0 release.
+
+libtheora 1.1alpha1 (2009 March 27)
+
+ - Encoder rewrite with much improved vbr quality/bitrate and
+ better tracking of the target rate in cbr mode.
+ - MSVC project files do not work in this release.
+
libtheora 1.0 (2008 November 3)
- Merge x86 assembly for forward DCT from Thusnelda branch.
diff --git a/Engine/lib/libtheora/COPYING b/Engine/lib/libtheora/COPYING
index 5a711972d..c8ccce4ff 100644
--- a/Engine/lib/libtheora/COPYING
+++ b/Engine/lib/libtheora/COPYING
@@ -1,4 +1,4 @@
-Copyright (C) 2002-2008 Xiph.Org Foundation and contributors.
+Copyright (C) 2002-2009 Xiph.org Foundation
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
-- Neither the name of the Xiph.Org Foundation nor the names of its
+- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/Engine/lib/libtheora/LICENSE b/Engine/lib/libtheora/LICENSE
new file mode 100644
index 000000000..5e5ec0846
--- /dev/null
+++ b/Engine/lib/libtheora/LICENSE
@@ -0,0 +1,18 @@
+Please see the file COPYING for the copyright license for this software.
+
+In addition to and irrespective of the copyright license associated
+with this software, On2 Technologies, Inc. makes the following statement
+regarding technology used in this software:
+
+ On2 represents and warrants that it shall not assert any rights
+ relating to infringement of On2's registered patents, nor initiate
+ any litigation asserting such rights, against any person who, or
+ entity which utilizes the On2 VP3 Codec Software, including any
+ use, distribution, and sale of said Software; which make changes,
+ modifications, and improvements in said Software; and to use,
+ distribute, and sell said changes as well as applications for other
+ fields of use.
+
+This reference implementation is originally derived from the On2 VP3
+Codec Software, and the Theora video format is essentially compatible
+with the VP3 video format, consisting of a backward-compatible superset.
diff --git a/Engine/lib/libtheora/include/Makefile.am b/Engine/lib/libtheora/include/Makefile.am
new file mode 100644
index 000000000..d5db4b40f
--- /dev/null
+++ b/Engine/lib/libtheora/include/Makefile.am
@@ -0,0 +1,3 @@
+## Process this file with automake to produce Makefile.in
+
+SUBDIRS = theora
diff --git a/Engine/lib/libtheora/include/Makefile.in b/Engine/lib/libtheora/include/Makefile.in
new file mode 100644
index 000000000..805e6c29e
--- /dev/null
+++ b/Engine/lib/libtheora/include/Makefile.in
@@ -0,0 +1,414 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+
+SUBDIRS = theora
+subdir = include
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+
+RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \
+ uninstall-info-recursive all-recursive install-data-recursive \
+ install-exec-recursive installdirs-recursive install-recursive \
+ uninstall-recursive check-recursive installcheck-recursive
+DIST_COMMON = Makefile.am Makefile.in
+DIST_SUBDIRS = $(SUBDIRS)
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu include/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @set fnord $$MAKEFLAGS; amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $$MAKEFLAGS; amf=$$2; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
+ fi; \
+ done; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$(top_distdir)" \
+ distdir=../$(distdir)/$$subdir \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+
+distclean-am: clean-am distclean-generic distclean-libtool \
+ distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-exec-am:
+
+install-info: install-info-recursive
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-info-am
+
+uninstall-info: uninstall-info-recursive
+
+.PHONY: $(RECURSIVE_TARGETS) GTAGS all all-am check check-am clean \
+ clean-generic clean-libtool clean-recursive distclean \
+ distclean-generic distclean-libtool distclean-recursive \
+ distclean-tags distdir dvi dvi-am dvi-recursive info info-am \
+ info-recursive install install-am install-data install-data-am \
+ install-data-recursive install-exec install-exec-am \
+ install-exec-recursive install-info install-info-am \
+ install-info-recursive install-man install-recursive \
+ install-strip installcheck installcheck-am installdirs \
+ installdirs-am installdirs-recursive maintainer-clean \
+ maintainer-clean-generic maintainer-clean-recursive mostlyclean \
+ mostlyclean-generic mostlyclean-libtool mostlyclean-recursive \
+ tags tags-recursive uninstall uninstall-am uninstall-info-am \
+ uninstall-info-recursive uninstall-recursive
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/Engine/lib/libtheora/include/theora/Makefile.am b/Engine/lib/libtheora/include/theora/Makefile.am
new file mode 100644
index 000000000..5479e82a5
--- /dev/null
+++ b/Engine/lib/libtheora/include/theora/Makefile.am
@@ -0,0 +1,7 @@
+## Process this file with automake to produce Makefile.in
+
+theoraincludedir = $(includedir)/theora
+
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+
+noinst_HEADERS = codec.h theoradec.h
diff --git a/Engine/lib/libtheora/include/theora/Makefile.in b/Engine/lib/libtheora/include/theora/Makefile.in
new file mode 100644
index 000000000..d20e60ab4
--- /dev/null
+++ b/Engine/lib/libtheora/include/theora/Makefile.in
@@ -0,0 +1,355 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ../..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+
+theoraincludedir = $(includedir)/theora
+
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+
+noinst_HEADERS = codec.h theoradec.h
+subdir = include/theora
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+HEADERS = $(noinst_HEADERS) $(theorainclude_HEADERS)
+
+DIST_COMMON = $(noinst_HEADERS) $(theorainclude_HEADERS) Makefile.am \
+ Makefile.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu include/theora/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+theoraincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+install-theoraincludeHEADERS: $(theorainclude_HEADERS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(theoraincludedir)
+ @list='$(theorainclude_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f"; \
+ $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f; \
+ done
+
+uninstall-theoraincludeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(theorainclude_HEADERS)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(theoraincludedir)/$$f"; \
+ rm -f $(DESTDIR)$(theoraincludedir)/$$f; \
+ done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ../..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(theoraincludedir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-generic distclean-libtool \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-theoraincludeHEADERS
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-theoraincludeHEADERS
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool distclean distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am install-man \
+ install-strip install-theoraincludeHEADERS installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool tags uninstall uninstall-am \
+ uninstall-info-am uninstall-theoraincludeHEADERS
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/Engine/lib/libtheora/include/theora/codec.h b/Engine/lib/libtheora/include/theora/codec.h
index afdc1b0fa..5c2669630 100644
--- a/Engine/lib/libtheora/include/theora/codec.h
+++ b/Engine/lib/libtheora/include/theora/codec.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
@@ -24,10 +24,10 @@
* implementation for Theora, a free,
* patent-unencumbered video codec.
* Theora is derived from On2's VP3 codec with additional features and
- * integration for Ogg multimedia formats by
+ * integration with Ogg multimedia formats by
* the Xiph.Org Foundation.
* Complete documentation of the format itself is available in
- * the Theora
+ * the Theora
* specification.
*
* \subsection Organization
@@ -92,9 +92,9 @@ extern "C" {
/*@}*/
/**The currently defined color space tags.
- * See the Theora
- * specification, Chapter 4, for exact details on the meaning of each of
- * these color spaces.*/
+ * See the Theora
+ * specification, Chapter 4, for exact details on the meaning
+ * of each of these color spaces.*/
typedef enum{
/**The color space was not specified at the encoder.
It may be conveyed by an external means.*/
@@ -108,13 +108,13 @@ typedef enum{
}th_colorspace;
/**The currently defined pixel format tags.
- * See the Theora
+ * See the Theora
* specification, Section 4.4, for details on the precise sample
* locations.*/
typedef enum{
/**Chroma decimation by 2 in both the X and Y directions (4:2:0).
- The Cb and Cr chroma planes are half the width and half the height of the
- luma plane.*/
+ The Cb and Cr chroma planes are half the width and half the
+ height of the luma plane.*/
TH_PF_420,
/**Currently reserved.*/
TH_PF_RSVD,
@@ -133,11 +133,11 @@ typedef enum{
/**A buffer for a single color plane in an uncompressed image.
* This contains the image data in a left-to-right, top-down format.
- * Each row of pixels is stored contiguously in memory, but successive rows
- * need not be.
+ * Each row of pixels is stored contiguously in memory, but successive
+ * rows need not be.
* Use \a stride to compute the offset of the next row.
- * The encoder accepts both positive \a stride values (top-down in memory) and
- * negative (bottom-up in memory).
+ * The encoder accepts both positive \a stride values (top-down in memory)
+ * and negative (bottom-up in memory).
* The decoder currently always generates images with positive strides.*/
typedef struct{
/**The width of this plane.*/
@@ -151,18 +151,18 @@ typedef struct{
}th_img_plane;
/**A complete image buffer for an uncompressed frame.
- * The chroma planes may be decimated by a factor of two in either direction,
- * as indicated by th_info#pixel_fmt.
+ * The chroma planes may be decimated by a factor of two in either
+ * direction, as indicated by th_info#pixel_fmt.
* The width and height of the Y' plane must be multiples of 16.
- * They may need to be cropped for display, using the rectangle specified by
- * th_info#pic_x, th_info#pic_y, th_info#pic_width, and
- * th_info#pic_height.
+ * They may need to be cropped for display, using the rectangle
+ * specified by th_info#pic_x, th_info#pic_y, th_info#pic_width,
+ * and th_info#pic_height.
* All samples are 8 bits.
* \note The term YUV often used to describe a colorspace is ambiguous.
- * The exact parameters of the RGB to YUV conversion process aside, in many
- * contexts the U and V channels actually have opposite meanings.
- * To avoid this confusion, we are explicit: the name of the color channels are
- * Y'CbCr, and they appear in that order, always.
+ * The exact parameters of the RGB to YUV conversion process aside, in
+ * many contexts the U and V channels actually have opposite meanings.
+ * To avoid this confusion, we are explicit: the name of the color
+ * channels are Y'CbCr, and they appear in that order, always.
* The prime symbol denotes that the Y channel is non-linear.
* Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
typedef th_img_plane th_ycbcr_buffer[3];
@@ -192,7 +192,7 @@ typedef th_img_plane th_ycbcr_buffer[3];
*
* It is also generally recommended that the offsets and sizes should still be
* multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
- * See the Theora
+ * See the Theora
* specification, Section 4.4, for more details.
*
* Frame rate, in frames per second, is stored as a rational fraction, as is
@@ -230,8 +230,8 @@ typedef struct{
* #frame_height-#pic_height-#pic_y must be no larger than 255.
* This slightly funny restriction is due to the fact that the offset is
* specified from the top of the image for consistency with the standard
- * graphics left-handed coordinate system used throughout this API, while it
- * is stored in the encoded stream as an offset from the bottom.*/
+ * graphics left-handed coordinate system used throughout this API, while
+ * it is stored in the encoded stream as an offset from the bottom.*/
ogg_uint32_t pic_y;
/**\name Frame rate
* The frame rate, as a fraction.
@@ -259,9 +259,6 @@ typedef struct{
/**The target bit-rate in bits per second.
If initializing an encoder with this struct, set this field to a non-zero
value to activate CBR encoding by default.*/
- /*TODO: Current encoder does not support CBR mode, or anything like it.
- We also don't really know what nominal rate each quality level
- corresponds to yet.*/
int target_bitrate;
/**The target quality level.
Valid values range from 0 to 63, inclusive, with higher values giving
@@ -314,7 +311,7 @@ typedef struct{
* A particular tag may occur more than once, and order is significant.
* The character set encoding for the strings is always UTF-8, but the tag
* names are limited to ASCII, and treated as case-insensitive.
- * See the Theora
+ * See the Theora
* specification, Section 6.3.3 for details.
*
* In filling in this structure, th_decode_headerin() will null-terminate
diff --git a/Engine/lib/libtheora/include/theora/theora.h b/Engine/lib/libtheora/include/theora/theora.h
index dbef71675..af6eb6f38 100644
--- a/Engine/lib/libtheora/include/theora/theora.h
+++ b/Engine/lib/libtheora/include/theora/theora.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
@@ -27,11 +27,11 @@ extern "C"
#include
-/** \defgroup oldfuncs Legacy pre-1.0 C API */
-/* @{ */
-
-/** \mainpage
- *
+/** \file
+ * The libtheora pre-1.0 legacy C API.
+ *
+ * \ingroup oldfuncs
+ *
* \section intro Introduction
*
* This is the documentation for the libtheora legacy C API, declared in
@@ -42,7 +42,7 @@ extern "C"
*
* libtheora is the reference implementation for
* Theora, a free video codec.
- * Theora is derived from On2's VP3 codec with improved integration for
+ * Theora is derived from On2's VP3 codec with improved integration with
* Ogg multimedia formats by Xiph.Org.
*
* \section overview Overview
@@ -114,21 +114,11 @@ extern "C"
* checking beyond whether a header bit is present. Instead, use the
* theora_decode_header() function and check the return value; or examine the
* header bytes at the beginning of the Ogg page.
- *
- * \subsection example Example Decoder
- *
- * See
- * examples/dump_video.c for a simple decoder implementation.
- *
- * \section encoding Encoding Process
- *
- * See
- * examples/encoder_example.c for a simple encoder implementation.
*/
-/** \file
- * The libtheora pre-1.0 legacy C API.
- */
+
+/** \defgroup oldfuncs Legacy pre-1.0 C API */
+/* @{ */
/**
* A YUV buffer for passing uncompressed frames to and from the codec.
@@ -292,14 +282,21 @@ typedef struct theora_comment{
/**\name theora_control() codes */
-
-/**\anchor decctlcodes
+/* \anchor decctlcodes_old
* These are the available request codes for theora_control()
* when called with a decoder instance.
- * By convention, these are odd, to distinguish them from the
- * \ref encctlcodes "encoder control codes".
+ * By convention decoder control codes are odd, to distinguish
+ * them from \ref encctlcodes_old "encoder control codes" which
+ * are even.
+ *
+ * Note that since the 1.0 release, both the legacy and the final
+ * implementation accept all the same control codes, but only the
+ * final API declares the newer codes.
+ *
* Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+
/**Get the maximum post-processing level.
* The decoder supports a post-processing filter that can improve
* the appearance of the decoded images. This returns the highest
@@ -324,9 +321,9 @@ typedef struct theora_comment{
* \param[in] buf ogg_uint32_t: The maximum distance between key
* frames.
* \param[out] buf ogg_uint32_t: The actual maximum distance set.
- * \retval TH_FAULT \a theora_state or \a buf is NULL.
- * \retval TH_EINVAL \a buf_sz is not sizeof(ogg_uint32_t).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval OC_FAULT \a theora_state or \a buf is NULL.
+ * \retval OC_EINVAL \a buf_sz is not sizeof(ogg_uint32_t).
+ * \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Set the granule position.
@@ -338,33 +335,23 @@ typedef struct theora_comment{
*/
#define TH_DECCTL_SET_GRANPOS (5)
+/**\anchor encctlcodes_old */
-/**\anchor encctlcodes
- * These are the available request codes for theora_control()
- * when called with an encoder instance.
- * By convention, these are even, to distinguish them from the
- * \ref decctlcodes "decoder control codes".
- * Keep any experimental or vendor-specific values above \c 0x8000.*/
-/*@{*/
/**Sets the quantization parameters to use.
* The parameters are copied, not stored by reference, so they can be freed
* after this call.
* NULL may be specified to revert to the default parameters.
- * For the current encoder, scale[ci!=0][qi] must be no greater than
- * scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must be no
- * greater than base[qti][pli][qi-1][ci].
- * These two conditions ensure that the actual quantizer for a given \a qti,
- * \a pli, and \a ci does not increase as \a qi increases.
*
* \param[in] buf #th_quant_info
- * \retval TH_FAULT \a theora_state is NULL.
- * \retval TH_EINVAL Encoding has already begun, the quantization parameters
- * do not meet one of the above stated conditions, \a buf
- * is NULL and \a buf_sz is not zero, or \a buf
- * is non-NULL and \a buf_sz is not
- * sizeof(#th_quant_info).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval OC_FAULT \a theora_state is NULL.
+ * \retval OC_EINVAL Encoding has already begun, the quantization parameters
+ * are not acceptable to this version of the encoder,
+ * \a buf is NULL and \a buf_sz is not zero,
+ * or \a buf is non-NULL and \a buf_sz is
+ * not sizeof(#th_quant_info).
+ * \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+
/**Disables any encoder features that would prevent lossless transcoding back
* to VP3.
* This primarily means disabling block-level QI values and not using 4MV mode
@@ -389,10 +376,11 @@ typedef struct theora_comment{
* 4:2:0, the picture region is smaller than the full frame,
* or if encoding has begun, preventing the quantization
* tables and codebooks from being set.
- * \retval TH_FAULT \a theora_state or \a buf is NULL.
- * \retval TH_EINVAL \a buf_sz is not sizeof(int).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval OC_FAULT \a theora_state or \a buf is NULL.
+ * \retval OC_EINVAL \a buf_sz is not sizeof(int).
+ * \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+
/**Gets the maximum speed level.
* Higher speed levels favor quicker encoding over better quality per bit.
* Depending on the encoding mode, and the internal algorithms used, quality
@@ -402,25 +390,27 @@ typedef struct theora_comment{
* the current encoding mode (VBR vs. CQI, etc.).
*
* \param[out] buf int: The maximum encoding speed level.
- * \retval TH_FAULT \a theora_state or \a buf is NULL.
- * \retval TH_EINVAL \a buf_sz is not sizeof(int).
- * \retval TH_IMPL Not supported by this implementation in the current
+ * \retval OC_FAULT \a theora_state or \a buf is NULL.
+ * \retval OC_EINVAL \a buf_sz is not sizeof(int).
+ * \retval OC_IMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+
/**Sets the speed level.
* By default a speed value of 1 is used.
*
* \param[in] buf int: The new encoding speed level.
* 0 is slowest, larger values use less CPU.
- * \retval TH_FAULT \a theora_state or \a buf is NULL.
- * \retval TH_EINVAL \a buf_sz is not sizeof(int), or the
+ * \retval OC_FAULT \a theora_state or \a buf is NULL.
+ * \retval OC_EINVAL \a buf_sz is not sizeof(int), or the
* encoding speed level is out of bounds.
* The maximum encoding speed level may be
* implementation- and encoding mode-specific, and can be
* obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
- * \retval TH_IMPL Not supported by this implementation in the current
+ * \retval OC_IMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_SPLEVEL (14)
+
/*@}*/
#define OC_FAULT -1 /**< General failure */
@@ -779,8 +769,8 @@ extern void theora_comment_clear(theora_comment *tc);
* This is used to provide advanced control the encoding process.
* \param th A #theora_state handle.
* \param req The control code to process.
- * See \ref encctlcodes "the list of available control codes"
- * for details.
+ * See \ref encctlcodes_old "the list of available
+ * control codes" for details.
* \param buf The parameters for this control code.
* \param buf_sz The size of the parameter buffer.*/
extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
diff --git a/Engine/lib/libtheora/include/theora/theoradec.h b/Engine/lib/libtheora/include/theora/theoradec.h
index 7c08caadf..b20f0e3a6 100644
--- a/Engine/lib/libtheora/include/theora/theoradec.h
+++ b/Engine/lib/libtheora/include/theora/theoradec.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
@@ -38,6 +38,10 @@ extern "C" {
* Keep any experimental or vendor-specific values above \c 0x8000.*/
/*@{*/
/**Gets the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
*
* \param[out] _buf int: The maximum post-processing level.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL.
@@ -47,6 +51,10 @@ extern "C" {
/**Sets the post-processing level.
* By default, post-processing is disabled.
*
+ * Sets the level of post-processing to use when decoding the
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ *
* \param[in] _buf int: The new post-processing level.
* 0 to disable; larger values use more CPU.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL.
@@ -83,6 +91,15 @@ extern "C" {
* \retval TH_EINVAL \a _buf_sz is not
* sizeof(th_stripe_callback).*/
#define TH_DECCTL_SET_STRIPE_CB (7)
+
+/**Enables telemetry and sets the macroblock display mode */
+#define TH_DECCTL_SET_TELEMETRY_MBMODE (9)
+/**Enables telemetry and sets the motion vector display mode */
+#define TH_DECCTL_SET_TELEMETRY_MV (11)
+/**Enables telemetry and sets the adaptive quantization display mode */
+#define TH_DECCTL_SET_TELEMETRY_QI (13)
+/**Enables telemetry and sets the bitstream breakdown visualization mode */
+#define TH_DECCTL_SET_TELEMETRY_BITS (15)
/*@}*/
@@ -289,6 +306,7 @@ extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
* It may be freed or overwritten without notification when
* subsequent frames are decoded.
* \retval 0 Success
+ * \retval TH_EFAULT \a _dec or \a _ycbcr was NULL.
*/
extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
th_ycbcr_buffer _ycbcr);
diff --git a/Engine/lib/libtheora/include/theora/theoraenc.h b/Engine/lib/libtheora/include/theora/theoraenc.h
index b98285862..fdf2ab21e 100644
--- a/Engine/lib/libtheora/include/theora/theoraenc.h
+++ b/Engine/lib/libtheora/include/theora/theoraenc.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
@@ -49,26 +49,20 @@ extern "C" {
* NULL and \a _buf_sz is not zero, or \a _buf is
* non-NULL and \a _buf_sz is not
* sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS.
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
/**Sets the quantization parameters to use.
* The parameters are copied, not stored by reference, so they can be freed
* after this call.
* NULL may be specified to revert to the default parameters.
- * For the current encoder, scale[ci!=0][qi] must be no greater than
- * scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must be no
- * greater than base[qti][pli][qi-1][ci].
- * These two conditions ensure that the actual quantizer for a given \a qti,
- * \a pli, and \a ci does not increase as \a qi increases.
*
* \param[in] _buf #th_quant_info
* \retval TH_EFAULT \a _enc_ctx is NULL.
- * \retval TH_EINVAL Encoding has already begun, the quantization parameters
- * do not meet one of the above stated conditions, \a _buf
- * is NULL and \a _buf_sz is not zero, or \a _buf
- * is non-NULL and \a _buf_sz is not
- * sizeof(#th_quant_info).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval TH_EINVAL Encoding has already begun, \a _buf is
+ * NULL and \a _buf_sz is not zero,
+ * or \a _buf is non-NULL and
+ * \a _buf_sz is not sizeof(#th_quant_info).
+ * \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUANT_PARAMS (2)
/**Sets the maximum distance between key frames.
* This can be changed during an encode, but will be bounded by
@@ -81,12 +75,12 @@ extern "C" {
* \param[out] _buf ogg_uint32_t: The actual maximum distance set.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
* \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_uint32_t).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Disables any encoder features that would prevent lossless transcoding back
* to VP3.
- * This primarily means disabling block-level QI values and not using 4MV mode
- * when any of the luma blocks in a macro block are not coded.
+ * This primarily means disabling block-adaptive quantization and always coding
+ * all four luma blocks in a macro block when 4MV is used.
* It also includes using the VP3 quantization tables and Huffman codes; if you
* set them explicitly after calling this function, the resulting stream will
* not be VP3-compatible.
@@ -109,7 +103,7 @@ extern "C" {
* tables and codebooks from being set.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
* \retval TH_EINVAL \a _buf_sz is not sizeof(int).
- * \retval TH_IMPL Not supported by this implementation.*/
+ * \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
/**Gets the maximum speed level.
* Higher speed levels favor quicker encoding over better quality per bit.
@@ -117,28 +111,254 @@ extern "C" {
* may actually improve, but in this case bitrate will also likely increase.
* In any case, overall rate/distortion performance will probably decrease.
* The maximum value, and the meaning of each value, may change depending on
- * the current encoding mode (VBR vs. CQI, etc.).
+ * the current encoding mode (VBR vs. constant quality, etc.).
*
- * \param[out] _buf int: The maximum encoding speed level.
+ * \param[out] _buf int: The maximum encoding speed level.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
* \retval TH_EINVAL \a _buf_sz is not sizeof(int).
- * \retval TH_IMPL Not supported by this implementation in the current
+ * \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
/**Sets the speed level.
- * By default, the slowest speed (0) is used.
+ * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL.
*
- * \param[in] _buf int: The new encoding speed level.
- * 0 is slowest, larger values use less CPU.
+ * \param[in] _buf int: The new encoding speed level.
+ * 0 is slowest, larger values use less CPU.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
* \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the
* encoding speed level is out of bounds.
* The maximum encoding speed level may be
* implementation- and encoding mode-specific, and can be
* obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
- * \retval TH_IMPL Not supported by this implementation in the current
+ * \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_SPLEVEL (14)
+/**Gets the current speed level.
+ * The default speed level may vary according to encoder implementation, but if
+ * this control code is not supported (it returns #TH_EIMPL), the default may
+ * be assumed to be the slowest available speed (0).
+ * The maximum encoding speed level may be implementation- and encoding
+ * mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ *
+ * \param[out] _buf int: The current encoding speed level.
+ * 0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL \a _buf_sz is not sizeof(int).
+ * \retval TH_EIMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL (16)
+/**Sets the number of duplicates of the next frame to produce.
+ * Although libtheora can encode duplicate frames very cheaply, it costs some
+ * amount of CPU to detect them, and a run of duplicates cannot span a
+ * keyframe boundary.
+ * This control code tells the encoder to produce the specified number of extra
+ * duplicates of the next frame.
+ * This allows the encoder to make smarter keyframe placement decisions and
+ * rate control decisions, and reduces CPU usage as well, when compared to
+ * just submitting the same frame for encoding multiple times.
+ * This setting only applies to the next frame submitted for encoding.
+ * You MUST call th_encode_packetout() repeatedly until it returns 0, or the
+ * extra duplicate frames will be lost.
+ *
+ * \param[in] _buf int: The number of duplicates to produce.
+ * If this is negative or zero, no duplicates will be produced.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the
+ * number of duplicates is greater than or equal to the
+ * maximum keyframe interval.
+ * In the latter case, NO duplicate frames will be produced.
+ * You must ensure that the maximum keyframe interval is set
+ * larger than the maximum number of duplicates you will
+ * ever wish to insert prior to encoding.
+ * \retval TH_EIMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define TH_ENCCTL_SET_DUP_COUNT (18)
+/**Modifies the default bitrate management behavior.
+ * Use to allow or disallow frame dropping, and to enable or disable capping
+ * bit reservoir overflows and underflows.
+ * See \ref encctlcodes "the list of available flags".
+ * The flags are set by default to
+ * #TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW.
+ *
+ * \param[in] _buf int: Any combination of
+ * \ref ratectlflags "the available flags":
+ * - #TH_RATECTL_DROP_FRAMES: Enable frame dropping.
+ * - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later
+ * use.
+ * - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls
+ * later.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL \a _buf_sz is not sizeof(int) or rate control
+ * is not enabled.
+ * \retval TH_EIMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define TH_ENCCTL_SET_RATE_FLAGS (20)
+/**Sets the size of the bitrate management bit reservoir as a function
+ * of number of frames.
+ * The reservoir size affects how quickly bitrate management reacts to
+ * instantaneous changes in the video complexity.
+ * Larger reservoirs react more slowly, and provide better overall quality, but
+ * require more buffering by a client, adding more latency to live streams.
+ * By default, libtheora sets the reservoir to the maximum distance between
+ * keyframes, subject to a minimum and maximum limit.
+ * This call may be used to increase or decrease the reservoir, increasing or
+ * decreasing the allowed temporary variance in bitrate.
+ * An implementation may impose some limits on the size of a reservoir it can
+ * handle, in which case the actual reservoir size may not be exactly what was
+ * requested.
+ * The actual value set will be returned.
+ *
+ * \param[in] _buf int: Requested size of the reservoir measured in
+ * frames.
+ * \param[out] _buf int: The actual size of the reservoir set.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or rate control
+ * is not enabled. The buffer has an implementation
+ * defined minimum and maximum size and the value in _buf
+ * will be adjusted to match the actual value set.
+ * \retval TH_EIMPL Not supported by this implementation in the current
+ * encoding mode.*/
+#define TH_ENCCTL_SET_RATE_BUFFER (22)
+/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics.
+ * Pass 1 mode must be enabled before the first frame is encoded, and a target
+ * bitrate must have already been specified to the encoder.
+ * Although this does not have to be the exact rate that will be used in the
+ * second pass, closer values may produce better results.
+ * The first call returns the size of the two-pass header data, along with some
+ * placeholder content, and sets the encoder into pass 1 mode implicitly.
+ * This call sets the encoder to pass 1 mode implicitly.
+ * Then, a subsequent call must be made after each call to
+ * th_encode_ycbcr_in() to retrieve the metrics for that frame.
+ * An additional, final call must be made to retrieve the summary data,
+ * containing such information as the total number of frames, etc.
+ * This must be stored in place of the placeholder data that was returned
+ * in the first call, before the frame metrics data.
+ * All of this data must be presented back to the encoder during pass 2 using
+ * #TH_ENCCTL_2PASS_IN.
+ *
+ * \param[out] char *_buf: Returns a pointer to internal storage
+ * containing the two pass metrics data.
+ * This storage is only valid until the next call, or until the
+ * encoder context is freed, and must be copied by the
+ * application.
+ * \retval >=0 The number of bytes of metric data available in the
+ * returned buffer.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL \a _buf_sz is not sizeof(char *), no target
+ * bitrate has been set, or the first call was made after
+ * the first frame was submitted for encoding.
+ * \retval TH_EIMPL Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_OUT (24)
+/**Submits two-pass encoding metric data collected the first encoding pass to
+ * the second pass.
+ * The first call must be made before the first frame is encoded, and a target
+ * bitrate must have already been specified to the encoder.
+ * It sets the encoder to pass 2 mode implicitly; this cannot be disabled.
+ * The encoder may require reading data from some or all of the frames in
+ * advance, depending on, e.g., the reservoir size used in the second pass.
+ * You must call this function repeatedly before each frame to provide data
+ * until either a) it fails to consume all of the data presented or b) all of
+ * the pass 1 data has been consumed.
+ * In the first case, you must save the remaining data to be presented after
+ * the next frame.
+ * You can call this function with a NULL argument to get an upper bound on
+ * the number of bytes that will be required before the next frame.
+ *
+ * When pass 2 is first enabled, the default bit reservoir is set to the entire
+ * file; this gives maximum flexibility but can lead to very high peak rates.
+ * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER
+ * (e.g., to set it to the keyframe interval for non-live streaming), however,
+ * you may then need to provide more data before the next frame.
+ *
+ * \param[in] _buf char[]: A buffer containing the data returned by
+ * #TH_ENCCTL_2PASS_OUT in pass 1.
+ * You may pass NULL for \a _buf to return an upper
+ * bound on the number of additional bytes needed before the
+ * next frame.
+ * The summary data returned at the end of pass 1 must be at
+ * the head of the buffer on the first call with a
+ * non-NULL \a _buf, and the placeholder data
+ * returned at the start of pass 1 should be omitted.
+ * After each call you should advance this buffer by the number
+ * of bytes consumed.
+ * \retval >0 The number of bytes of metric data required/consumed.
+ * \retval 0 No more data is required before the next frame.
+ * \retval TH_EFAULT \a _enc_ctx is NULL.
+ * \retval TH_EINVAL No target bitrate has been set, or the first call was
+ * made after the first frame was submitted for
+ * encoding.
+ * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible
+ * implementation of this library.
+ * \retval TH_EBADHEADER The data was invalid; this may be returned when
+ * attempting to read an aborted pass 1 file that still
+ * has the placeholder data in place of the summary
+ * data.
+ * \retval TH_EIMPL Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_IN (26)
+/**Sets the current encoding quality.
+ * This is only valid so long as no bitrate has been specified, either through
+ * the #th_info struct used to initialize the encoder or through
+ * #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future
+ * version).
+ * If it is set before the headers are emitted, the target quality encoded in
+ * them will be updated.
+ *
+ * \param[in] _buf int: The new target quality, in the range 0...63,
+ * inclusive.
+ * \retval 0 Success.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL A target bitrate has already been specified, or the
+ * quality index was not in the range 0...63.
+ * \retval TH_EIMPL Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUALITY (28)
+/**Sets the current encoding bitrate.
+ * Once a bitrate is set, the encoder must use a rate-controlled mode for all
+ * future frames (this restriction may be relaxed in a future version).
+ * If it is set before the headers are emitted, the target bitrate encoded in
+ * them will be updated.
+ * Due to the buffer delay, the exact bitrate of each section of the encode is
+ * not guaranteed.
+ * The encoder may have already used more bits than allowed for the frames it
+ * has encoded, expecting to make them up in future frames, or it may have
+ * used fewer, holding the excess in reserve.
+ * The exact transition between the two bitrates is not well-defined by this
+ * API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS.
+ * After a number of frames equal to the buffer delay, one may expect further
+ * output to average at the target bitrate.
+ *
+ * \param[in] _buf long: The new target bitrate, in bits per second.
+ * \retval 0 Success.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL.
+ * \retval TH_EINVAL The target bitrate was not positive.
+ * \retval TH_EIMPL Not supported by this implementation.*/
+#define TH_ENCCTL_SET_BITRATE (30)
+
+/*@}*/
+
+
+/**\name TH_ENCCTL_SET_RATE_FLAGS flags
+ * \anchor ratectlflags
+ * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/
+/*@{*/
+/**Drop frames to keep within bitrate buffer constraints.
+ * This can have a severe impact on quality, but is the only way to ensure that
+ * bitrate targets are met at low rates during sudden bursts of activity.*/
+#define TH_RATECTL_DROP_FRAMES (0x1)
+/**Ignore bitrate buffer overflows.
+ * If the encoder uses so few bits that the reservoir of available bits
+ * overflows, ignore the excess.
+ * The encoder will not try to use these extra bits in future frames.
+ * At high rates this may cause the result to be undersized, but allows a
+ * client to play the stream using a finite buffer; it should normally be
+ * enabled.*/
+#define TH_RATECTL_CAP_OVERFLOW (0x2)
+/**Ignore bitrate buffer underflows.
+ * If the encoder uses so many bits that the reservoir of available bits
+ * underflows, ignore the deficit.
+ * The encoder will not try to make up these extra bits in future frames.
+ * At low rates this may cause the result to be oversized; it should normally
+ * be disabled.*/
+#define TH_RATECTL_CAP_UNDERFLOW (0x4)
/*@}*/
diff --git a/Engine/lib/libtheora/lib/Makefile.am b/Engine/lib/libtheora/lib/Makefile.am
new file mode 100644
index 000000000..89ce26120
--- /dev/null
+++ b/Engine/lib/libtheora/lib/Makefile.am
@@ -0,0 +1,173 @@
+INCLUDES = -I$(top_srcdir)/include
+AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS)
+
+EXTRA_DIST = \
+ cpu.c \
+ encoder_disabled.c \
+ x86/mmxencfrag.c \
+ x86/mmxfdct.c \
+ x86/sse2fdct.c \
+ x86/x86enc.c \
+ x86/x86enc.h \
+ x86/mmxfrag.c \
+ x86/mmxfrag.h \
+ x86/mmxidct.c \
+ x86/mmxloop.h \
+ x86/mmxstate.c \
+ x86/x86int.h \
+ x86/x86state.c \
+ x86_vc
+
+lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la
+
+if THEORA_DISABLE_ENCODE
+encoder_uniq_sources = \
+ encoder_disabled.c
+
+encoder_sources = \
+ $(encoder_uniq_sources)
+else
+encoder_uniq_x86_sources = \
+ x86/mmxencfrag.c \
+ x86/mmxfdct.c \
+ x86/x86enc.c
+
+encoder_uniq_x86_64_sources = \
+ x86/sse2fdct.c
+
+encoder_shared_x86_sources = \
+ x86/mmxfrag.c \
+ x86/mmxidct.c \
+ x86/mmxstate.c \
+ x86/x86state.c
+
+encoder_shared_x86_64_sources =
+
+if CPU_x86_64
+encoder_uniq_arch_sources = \
+ $(encoder_uniq_x86_sources) \
+ $(encoder_uniq_x86_64_sources)
+encoder_shared_arch_sources = \
+ $(encoder_shared_x86_sources) \
+ $(encoder_shared_x86_64_sources)
+else
+if CPU_x86_32
+encoder_uniq_arch_sources = $(encoder_uniq_x86_sources)
+encoder_shared_arch_sources = $(encoder_shared_x86_sources)
+else
+encoder_uniq_arch_sources =
+encoder_shared_arch_sources =
+endif
+endif
+
+encoder_uniq_sources = \
+ analyze.c \
+ fdct.c \
+ encfrag.c \
+ encapiwrapper.c \
+ encinfo.c \
+ encode.c \
+ enquant.c \
+ huffenc.c \
+ mathops.c \
+ mcenc.c \
+ rate.c \
+ tokenize.c \
+ $(encoder_uniq_arch_sources)
+
+encoder_sources = \
+ apiwrapper.c \
+ fragment.c \
+ idct.c \
+ internal.c \
+ state.c \
+ quant.c \
+ $(encoder_shared_arch_sources) \
+ $(encoder_uniq_sources)
+
+endif
+
+decoder_x86_sources = \
+ x86/mmxidct.c \
+ x86/mmxfrag.c \
+ x86/mmxstate.c \
+ x86/x86state.c
+if CPU_x86_64
+decoder_arch_sources = $(decoder_x86_sources)
+else
+if CPU_x86_32
+decoder_arch_sources = $(decoder_x86_sources)
+else
+decoder_arch_sources =
+endif
+endif
+
+decoder_sources = \
+ apiwrapper.c \
+ bitpack.c \
+ decapiwrapper.c \
+ decinfo.c \
+ decode.c \
+ dequant.c \
+ fragment.c \
+ huffdec.c \
+ idct.c \
+ info.c \
+ internal.c \
+ quant.c \
+ state.c \
+ $(decoder_arch_sources)
+
+noinst_HEADERS = \
+ cpu.h \
+ internal.h \
+ encint.h \
+ enquant.h \
+ huffenc.h \
+ mathops.h \
+ modedec.h \
+ x86/x86enc.h \
+ apiwrapper.h \
+ bitpack.h \
+ dct.h \
+ decint.h \
+ dequant.h \
+ huffdec.h \
+ huffman.h \
+ ocintrin.h \
+ quant.h \
+ x86/mmxfrag.h \
+ x86/mmxloop.h \
+ x86/x86int.h
+
+libtheoradec_la_SOURCES = \
+ $(decoder_sources) \
+ Version_script-dec theoradec.exp
+libtheoradec_la_LDFLAGS = \
+ -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \
+ @THEORADEC_LDFLAGS@ @CAIRO_LIBS@
+
+libtheoraenc_la_SOURCES = \
+ $(encoder_sources) \
+ Version_script-enc theoraenc.exp
+libtheoraenc_la_LDFLAGS = \
+ -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \
+ @THEORAENC_LDFLAGS@ $(OGG_LIBS)
+
+libtheora_la_SOURCES = \
+ $(decoder_sources) \
+ $(encoder_uniq_sources) \
+ Version_script theora.exp
+libtheora_la_LDFLAGS = \
+ -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \
+ @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS)
+
+debug:
+ $(MAKE) all CFLAGS="@DEBUG@"
+
+profile:
+ $(MAKE) all CFLAGS="@PROFILE@"
+
+# contstruct various symbol export list files
+.def.exp : defexp.awk
+ awk -f defexp.awk $< > $@
diff --git a/Engine/lib/libtheora/lib/Makefile.in b/Engine/lib/libtheora/lib/Makefile.in
new file mode 100644
index 000000000..f26ccdc0e
--- /dev/null
+++ b/Engine/lib/libtheora/lib/Makefile.in
@@ -0,0 +1,845 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+INCLUDES = -I$(top_srcdir)/include
+AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS)
+
+EXTRA_DIST = \
+ cpu.c \
+ encoder_disabled.c \
+ x86/mmxencfrag.c \
+ x86/mmxfdct.c \
+ x86/sse2fdct.c \
+ x86/x86enc.c \
+ x86/x86enc.h \
+ x86/mmxfrag.c \
+ x86/mmxfrag.h \
+ x86/mmxidct.c \
+ x86/mmxloop.h \
+ x86/mmxstate.c \
+ x86/x86int.h \
+ x86/x86state.c \
+ x86_vc
+
+
+lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la
+
+@THEORA_DISABLE_ENCODE_TRUE@encoder_uniq_sources = \
+@THEORA_DISABLE_ENCODE_TRUE@ encoder_disabled.c
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@ analyze.c \
+@THEORA_DISABLE_ENCODE_FALSE@ fdct.c \
+@THEORA_DISABLE_ENCODE_FALSE@ encfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@ encapiwrapper.c \
+@THEORA_DISABLE_ENCODE_FALSE@ encinfo.c \
+@THEORA_DISABLE_ENCODE_FALSE@ encode.c \
+@THEORA_DISABLE_ENCODE_FALSE@ enquant.c \
+@THEORA_DISABLE_ENCODE_FALSE@ huffenc.c \
+@THEORA_DISABLE_ENCODE_FALSE@ mathops.c \
+@THEORA_DISABLE_ENCODE_FALSE@ mcenc.c \
+@THEORA_DISABLE_ENCODE_FALSE@ rate.c \
+@THEORA_DISABLE_ENCODE_FALSE@ tokenize.c \
+@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_arch_sources)
+
+
+@THEORA_DISABLE_ENCODE_TRUE@encoder_sources = \
+@THEORA_DISABLE_ENCODE_TRUE@ $(encoder_uniq_sources)
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@ apiwrapper.c \
+@THEORA_DISABLE_ENCODE_FALSE@ fragment.c \
+@THEORA_DISABLE_ENCODE_FALSE@ idct.c \
+@THEORA_DISABLE_ENCODE_FALSE@ internal.c \
+@THEORA_DISABLE_ENCODE_FALSE@ state.c \
+@THEORA_DISABLE_ENCODE_FALSE@ quant.c \
+@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_arch_sources) \
+@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_sources)
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxencfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxfdct.c \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/x86enc.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_64_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/sse2fdct.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxidct.c \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxstate.c \
+@THEORA_DISABLE_ENCODE_FALSE@ x86/x86state.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_64_sources =
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = $(encoder_uniq_x86_sources)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_sources) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_64_sources)
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = $(encoder_shared_x86_sources)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_sources) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_64_sources)
+
+
+decoder_x86_sources = \
+ x86/mmxidct.c \
+ x86/mmxfrag.c \
+ x86/mmxstate.c \
+ x86/x86state.c
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@decoder_arch_sources =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@decoder_arch_sources = $(decoder_x86_sources)
+@CPU_x86_64_TRUE@decoder_arch_sources = $(decoder_x86_sources)
+
+decoder_sources = \
+ apiwrapper.c \
+ bitpack.c \
+ decapiwrapper.c \
+ decinfo.c \
+ decode.c \
+ dequant.c \
+ fragment.c \
+ huffdec.c \
+ idct.c \
+ info.c \
+ internal.c \
+ quant.c \
+ state.c \
+ $(decoder_arch_sources)
+
+
+noinst_HEADERS = \
+ cpu.h \
+ internal.h \
+ encint.h \
+ enquant.h \
+ huffenc.h \
+ mathops.h \
+ modedec.h \
+ x86/x86enc.h \
+ apiwrapper.h \
+ bitpack.h \
+ dct.h \
+ decint.h \
+ dequant.h \
+ huffdec.h \
+ huffman.h \
+ ocintrin.h \
+ quant.h \
+ x86/mmxfrag.h \
+ x86/mmxloop.h \
+ x86/x86int.h
+
+
+libtheoradec_la_SOURCES = \
+ $(decoder_sources) \
+ Version_script-dec theoradec.exp
+
+libtheoradec_la_LDFLAGS = \
+ -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \
+ @THEORADEC_LDFLAGS@ @CAIRO_LIBS@
+
+
+libtheoraenc_la_SOURCES = \
+ $(encoder_sources) \
+ Version_script-enc theoraenc.exp
+
+libtheoraenc_la_LDFLAGS = \
+ -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \
+ @THEORAENC_LDFLAGS@ $(OGG_LIBS)
+
+
+libtheora_la_SOURCES = \
+ $(decoder_sources) \
+ $(encoder_uniq_sources) \
+ Version_script theora.exp
+
+libtheora_la_LDFLAGS = \
+ -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \
+ @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS)
+
+subdir = lib
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+libtheora_la_LIBADD =
+am__objects_1 = mmxidct.lo mmxfrag.lo mmxstate.lo x86state.lo
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@am__objects_2 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@am__objects_2 = $(am__objects_1)
+@CPU_x86_64_TRUE@am__objects_2 = $(am__objects_1)
+am__objects_3 = apiwrapper.lo bitpack.lo decapiwrapper.lo decinfo.lo \
+ decode.lo dequant.lo fragment.lo huffdec.lo idct.lo info.lo \
+ internal.lo quant.lo state.lo $(am__objects_2)
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_4 = mmxencfrag.lo mmxfdct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ x86enc.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_5 = sse2fdct.lo
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_4)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_4) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_5)
+@THEORA_DISABLE_ENCODE_TRUE@am__objects_7 = encoder_disabled.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_7 = analyze.lo fdct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ encfrag.lo encapiwrapper.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ encinfo.lo encode.lo enquant.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ huffenc.lo mathops.lo mcenc.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ rate.lo tokenize.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_6)
+am_libtheora_la_OBJECTS = $(am__objects_3) $(am__objects_7)
+libtheora_la_OBJECTS = $(am_libtheora_la_OBJECTS)
+libtheoradec_la_LIBADD =
+am_libtheoradec_la_OBJECTS = $(am__objects_3)
+libtheoradec_la_OBJECTS = $(am_libtheoradec_la_OBJECTS)
+libtheoraenc_la_LIBADD =
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_8 = mmxfrag.lo mmxidct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ mmxstate.lo x86state.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_9 =
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_8)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_8) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_9)
+@THEORA_DISABLE_ENCODE_TRUE@am__objects_11 = $(am__objects_7)
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_11 = apiwrapper.lo fragment.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ idct.lo internal.lo state.lo \
+@THEORA_DISABLE_ENCODE_FALSE@ quant.lo $(am__objects_10) \
+@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_7)
+am_libtheoraenc_la_OBJECTS = $(am__objects_11)
+libtheoraenc_la_OBJECTS = $(am_libtheoraenc_la_OBJECTS)
+
+DEFS = @DEFS@
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/analyze.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/apiwrapper.Plo ./$(DEPDIR)/bitpack.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/decapiwrapper.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/decinfo.Plo ./$(DEPDIR)/decode.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/dequant.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/encapiwrapper.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/encfrag.Plo ./$(DEPDIR)/encinfo.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/encode.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/encoder_disabled.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/enquant.Plo ./$(DEPDIR)/fdct.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/fragment.Plo ./$(DEPDIR)/huffdec.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/huffenc.Plo ./$(DEPDIR)/idct.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/info.Plo ./$(DEPDIR)/internal.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/mathops.Plo ./$(DEPDIR)/mcenc.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/mmxencfrag.Plo ./$(DEPDIR)/mmxfdct.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/mmxfrag.Plo ./$(DEPDIR)/mmxidct.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/mmxstate.Plo ./$(DEPDIR)/quant.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/rate.Plo ./$(DEPDIR)/sse2fdct.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/state.Plo ./$(DEPDIR)/tokenize.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/x86enc.Plo ./$(DEPDIR)/x86state.Plo
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = @CFLAGS@
+DIST_SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) \
+ $(libtheoraenc_la_SOURCES)
+HEADERS = $(noinst_HEADERS)
+
+DIST_COMMON = $(noinst_HEADERS) Makefile.am Makefile.in
+SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) $(libtheoraenc_la_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .def .exp .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu lib/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+libLTLIBRARIES_INSTALL = $(INSTALL)
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test -z "$dir" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+mmxidct.lo: x86/mmxidct.c
+mmxfrag.lo: x86/mmxfrag.c
+mmxstate.lo: x86/mmxstate.c
+x86state.lo: x86/x86state.c
+mmxencfrag.lo: x86/mmxencfrag.c
+mmxfdct.lo: x86/mmxfdct.c
+x86enc.lo: x86/x86enc.c
+sse2fdct.lo: x86/sse2fdct.c
+libtheora.la: $(libtheora_la_OBJECTS) $(libtheora_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libtheora_la_LDFLAGS) $(libtheora_la_OBJECTS) $(libtheora_la_LIBADD) $(LIBS)
+libtheoradec.la: $(libtheoradec_la_OBJECTS) $(libtheoradec_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libtheoradec_la_LDFLAGS) $(libtheoradec_la_OBJECTS) $(libtheoradec_la_LIBADD) $(LIBS)
+libtheoraenc.la: $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libtheoraenc_la_LDFLAGS) $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/analyze.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitpack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decapiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decinfo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dequant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encapiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encinfo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoder_disabled.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/enquant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fragment.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffdec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffenc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/internal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mathops.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mcenc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxencfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxidct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sse2fdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tokenize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86enc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86state.Plo@am__quote@
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mmxidct.o: x86/mmxidct.c
+@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.o `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c
+
+mmxidct.obj: x86/mmxidct.c
+@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.obj `cygpath -w x86/mmxidct.c`
+
+mmxidct.lo: x86/mmxidct.c
+@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Plo' tmpdepfile='$(DEPDIR)/mmxidct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.lo `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c
+
+mmxfrag.o: x86/mmxfrag.c
+@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.o `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c
+
+mmxfrag.obj: x86/mmxfrag.c
+@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.obj `cygpath -w x86/mmxfrag.c`
+
+mmxfrag.lo: x86/mmxfrag.c
+@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Plo' tmpdepfile='$(DEPDIR)/mmxfrag.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.lo `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c
+
+mmxstate.o: x86/mmxstate.c
+@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.o `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c
+
+mmxstate.obj: x86/mmxstate.c
+@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.obj `cygpath -w x86/mmxstate.c`
+
+mmxstate.lo: x86/mmxstate.c
+@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Plo' tmpdepfile='$(DEPDIR)/mmxstate.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.lo `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c
+
+x86state.o: x86/x86state.c
+@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.o `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c
+
+x86state.obj: x86/x86state.c
+@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.obj `cygpath -w x86/x86state.c`
+
+x86state.lo: x86/x86state.c
+@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Plo' tmpdepfile='$(DEPDIR)/x86state.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.lo `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c
+
+mmxencfrag.o: x86/mmxencfrag.c
+@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.o `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c
+
+mmxencfrag.obj: x86/mmxencfrag.c
+@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.obj `cygpath -w x86/mmxencfrag.c`
+
+mmxencfrag.lo: x86/mmxencfrag.c
+@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Plo' tmpdepfile='$(DEPDIR)/mmxencfrag.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.lo `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c
+
+mmxfdct.o: x86/mmxfdct.c
+@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.o `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c
+
+mmxfdct.obj: x86/mmxfdct.c
+@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.obj `cygpath -w x86/mmxfdct.c`
+
+mmxfdct.lo: x86/mmxfdct.c
+@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Plo' tmpdepfile='$(DEPDIR)/mmxfdct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.lo `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c
+
+x86enc.o: x86/x86enc.c
+@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.o `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c
+
+x86enc.obj: x86/x86enc.c
+@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.obj `cygpath -w x86/x86enc.c`
+
+x86enc.lo: x86/x86enc.c
+@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Plo' tmpdepfile='$(DEPDIR)/x86enc.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.lo `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c
+
+sse2fdct.o: x86/sse2fdct.c
+@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.o `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c
+
+sse2fdct.obj: x86/sse2fdct.c
+@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.obj `cygpath -w x86/sse2fdct.c`
+
+sse2fdct.lo: x86/sse2fdct.c
+@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Plo' tmpdepfile='$(DEPDIR)/sse2fdct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.lo `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c
+CCDEPMODE = @CCDEPMODE@
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/x86
+ @list='$(DISTFILES)'; for file in $$list; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool distclean distclean-compile \
+ distclean-depend distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool tags uninstall \
+ uninstall-am uninstall-info-am uninstall-libLTLIBRARIES
+
+
+debug:
+ $(MAKE) all CFLAGS="@DEBUG@"
+
+profile:
+ $(MAKE) all CFLAGS="@PROFILE@"
+
+# contstruct various symbol export list files
+.def.exp : defexp.awk
+ awk -f defexp.awk $< > $@
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/Engine/lib/libtheora/lib/Version_script b/Engine/lib/libtheora/lib/Version_script
new file mode 100644
index 000000000..2ecb5e43a
--- /dev/null
+++ b/Engine/lib/libtheora/lib/Version_script
@@ -0,0 +1,53 @@
+#
+# Export file for libtheora
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# We use something that looks like a versioned so filename here
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+ global:
+ theora_version_string;
+ theora_version_number;
+
+ theora_encode_init;
+ theora_encode_YUVin;
+ theora_encode_packetout;
+ theora_encode_header;
+ theora_encode_comment;
+ theora_encode_tables;
+
+ theora_decode_header;
+ theora_decode_init;
+ theora_decode_packetin;
+ theora_decode_YUVout;
+
+ theora_control;
+
+ theora_packet_isheader;
+ theora_packet_iskeyframe;
+
+ theora_granule_shift;
+ theora_granule_frame;
+ theora_granule_time;
+
+ theora_info_init;
+ theora_info_clear;
+
+ theora_clear;
+
+ theora_comment_init;
+ theora_comment_add;
+ theora_comment_add_tag;
+ theora_comment_query;
+ theora_comment_query_count;
+ theora_comment_clear;
+
+ local:
+ *;
+};
diff --git a/Engine/lib/libtheora/lib/Version_script-dec b/Engine/lib/libtheora/lib/Version_script-dec
new file mode 100644
index 000000000..cab368397
--- /dev/null
+++ b/Engine/lib/libtheora/lib/Version_script-dec
@@ -0,0 +1,82 @@
+#
+# Export file for libtheoradec
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# The 1.x API
+libtheoradec_1.0
+{
+ global:
+ th_version_string;
+ th_version_number;
+
+ th_decode_headerin;
+ th_decode_alloc;
+ th_setup_free;
+ th_decode_ctl;
+ th_decode_packetin;
+ th_decode_ycbcr_out;
+ th_decode_free;
+
+ th_packet_isheader;
+ th_packet_iskeyframe;
+
+ th_granule_frame;
+ th_granule_time;
+
+ th_info_init;
+ th_info_clear;
+
+ th_comment_init;
+ th_comment_add;
+ th_comment_add_tag;
+ th_comment_query;
+ th_comment_query_count;
+ th_comment_clear;
+
+ local:
+ *;
+};
+
+# The deprecated legacy api from the libtheora alpha releases.
+# We use something that looks like a versioned so filename here
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+ global:
+ theora_version_string;
+ theora_version_number;
+
+ theora_decode_header;
+ theora_decode_init;
+ theora_decode_packetin;
+ theora_decode_YUVout;
+
+ theora_control;
+
+ theora_packet_isheader;
+ theora_packet_iskeyframe;
+
+ theora_granule_shift;
+ theora_granule_frame;
+ theora_granule_time;
+
+ theora_info_init;
+ theora_info_clear;
+
+ theora_clear;
+
+ theora_comment_init;
+ theora_comment_add;
+ theora_comment_add_tag;
+ theora_comment_query;
+ theora_comment_query_count;
+ theora_comment_clear;
+
+ local:
+ *;
+};
diff --git a/Engine/lib/libtheora/lib/Version_script-enc b/Engine/lib/libtheora/lib/Version_script-enc
new file mode 100644
index 000000000..37699edd6
--- /dev/null
+++ b/Engine/lib/libtheora/lib/Version_script-enc
@@ -0,0 +1,43 @@
+#
+# Export file for libtheora
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# The 1.x encoder API
+libtheoraenc_1.0
+{
+ global:
+ th_encode_alloc;
+ th_encode_ctl;
+ th_encode_flushheader;
+ th_encode_ycbcr_in;
+ th_encode_packetout;
+ th_encode_free;
+
+ TH_VP31_QUANT_INFO;
+ TH_VP31_HUFF_CODES;
+
+ local:
+ *;
+};
+
+# The encoder portion of the deprecated alpha release api.
+# We use something that looks like a versioned so filename here
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+ global:
+ theora_encode_init;
+ theora_encode_YUVin;
+ theora_encode_packetout;
+ theora_encode_header;
+ theora_encode_comment;
+ theora_encode_tables;
+
+ local:
+ *;
+};
diff --git a/Engine/lib/libtheora/lib/analyze.c b/Engine/lib/libtheora/lib/analyze.c
new file mode 100644
index 000000000..af01b60df
--- /dev/null
+++ b/Engine/lib/libtheora/lib/analyze.c
@@ -0,0 +1,2709 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: mode selection code
+ last mod: $Id$
+
+ ********************************************************************/
+#include
+#include
+#include "encint.h"
+#include "modedec.h"
+
+
+
+typedef struct oc_fr_state oc_fr_state;
+typedef struct oc_qii_state oc_qii_state;
+typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
+typedef struct oc_rd_metric oc_rd_metric;
+typedef struct oc_mode_choice oc_mode_choice;
+
+
+
+/*There are 8 possible schemes used to encode macro block modes.
+ Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes.
+ The same set of Huffman codes is used for each of these 7 schemes, but the
+ mode assigned to each codeword varies.
+ Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream,
+ while schemes 1-6 have a fixed mapping.
+ Scheme 7 just encodes each mode directly in 3 bits.*/
+
+/*The mode orderings for the various mode coding schemes.
+ Scheme 0 uses a custom alphabet, which is not stored in this table.
+ This is the inverse of the equivalent table OC_MODE_ALPHABETS in the
+ decoder.*/
+static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={
+ /*Last MV dominates.*/
+ /*L P M N I G GM 4*/
+ {3,4,2,0,1,5,6,7},
+ /*L P N M I G GM 4*/
+ {2,4,3,0,1,5,6,7},
+ /*L M P N I G GM 4*/
+ {3,4,1,0,2,5,6,7},
+ /*L M N P I G GM 4*/
+ {2,4,1,0,3,5,6,7},
+ /*No MV dominates.*/
+ /*N L P M I G GM 4*/
+ {0,4,3,1,2,5,6,7},
+ /*N G L P M I GM 4*/
+ {0,5,4,2,3,1,6,7},
+ /*Default ordering.*/
+ /*N I M L P G GM 4*/
+ {0,1,2,3,4,5,6,7}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+ This need only be called once per encoder.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+ int si;
+ _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+ for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1];
+}
+
+/*Reset the mode scheme chooser.
+ This needs to be called once for each frame, including the first.*/
+static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+ int si;
+ memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts));
+ /*Scheme 0 starts with 24 bits to store the mode list in.*/
+ _chooser->scheme_bits[0]=24;
+ memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits));
+ for(si=0;si<8;si++){
+ /*Scheme 7 should always start first, and scheme 0 should always start
+ last.*/
+ _chooser->scheme_list[si]=7-si;
+ _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si;
+ }
+}
+
+
+/*This is the real purpose of this data structure: not actually selecting a
+ mode scheme, but estimating the cost of coding a given mode given all the
+ modes selected so far.
+ This is done via opportunity cost: the cost is defined as the number of bits
+ required to encode all the modes selected so far including the current one
+ using the best possible scheme, minus the number of bits required to encode
+ all the modes selected so far not including the current one using the best
+ possible scheme.
+ The computational expense of doing this probably makes it overkill.
+ Just be happy we take a greedy approach instead of trying to solve the
+ global mode-selection problem (which is NP-hard).
+ _mb_mode: The mode to determine the cost of.
+ Return: The number of bits required to code this mode.*/
+static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+ int scheme0;
+ int scheme1;
+ int best_bits;
+ int mode_bits;
+ int si;
+ int scheme_bits;
+ scheme0=_chooser->scheme_list[0];
+ scheme1=_chooser->scheme_list[1];
+ best_bits=_chooser->scheme_bits[scheme0];
+ mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]];
+ /*Typical case: If the difference between the best scheme and the next best
+ is greater than 6 bits, then adding just one mode cannot change which
+ scheme we use.*/
+ if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+ /*Otherwise, check to see if adding this mode selects a different scheme as
+ the best.*/
+ si=1;
+ best_bits+=mode_bits;
+ do{
+ /*For any scheme except 0, we can just use the bit cost of the mode's rank
+ in that scheme.*/
+ if(scheme1!=0){
+ scheme_bits=_chooser->scheme_bits[scheme1]+
+ OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]];
+ }
+ else{
+ int ri;
+ /*For scheme 0, incrementing the mode count could potentially change the
+ mode's rank.
+ Find the index where the mode would be moved to in the optimal list,
+ and use its bit cost instead of the one for the mode's current
+ position in the list.*/
+ /*We don't recompute scheme bits; this is computing opportunity cost, not
+ an update.*/
+ for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&&
+ _chooser->mode_counts[_mb_mode]>=
+ _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+ scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri];
+ }
+ if(scheme_bits=8)break;
+ scheme1=_chooser->scheme_list[si];
+ }
+ while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6);
+ return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Incrementally update the mode counts and per-scheme bit counts and re-order
+ the scheme lists once a mode has been selected.
+ _mb_mode: The mode that was chosen.*/
+static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+ int ri;
+ int si;
+ _chooser->mode_counts[_mb_mode]++;
+ /*Re-order the scheme0 mode list if necessary.*/
+ for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){
+ int pmode;
+ pmode=_chooser->scheme0_list[ri-1];
+ if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break;
+ /*Reorder the mode ranking.*/
+ _chooser->scheme0_ranks[pmode]++;
+ _chooser->scheme0_list[ri]=pmode;
+ }
+ _chooser->scheme0_ranks[_mb_mode]=ri;
+ _chooser->scheme0_list[ri]=_mb_mode;
+ /*Now add the bit cost for the mode to each scheme.*/
+ for(si=0;si<8;si++){
+ _chooser->scheme_bits[si]+=
+ OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]];
+ }
+ /*Finally, re-order the list of schemes.*/
+ for(si=1;si<8;si++){
+ int sj;
+ int scheme0;
+ int bits0;
+ sj=si;
+ scheme0=_chooser->scheme_list[si];
+ bits0=_chooser->scheme_bits[scheme0];
+ do{
+ int scheme1;
+ scheme1=_chooser->scheme_list[sj-1];
+ if(bits0>=_chooser->scheme_bits[scheme1])break;
+ _chooser->scheme_list[sj]=scheme1;
+ }
+ while(--sj>0);
+ _chooser->scheme_list[sj]=scheme0;
+ }
+}
+
+
+
+/*The number of bits required to encode a super block run.
+ _run_count: The desired run count; must be positive and less than 4130.*/
+static int oc_sb_run_bits(int _run_count){
+ int i;
+ for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+ return OC_SB_RUN_CODE_NBITS[i];
+}
+
+/*The number of bits required to encode a block run.
+ _run_count: The desired run count; must be positive and less than 30.*/
+static int oc_block_run_bits(int _run_count){
+ return OC_BLOCK_RUN_CODE_NBITS[_run_count-1];
+}
+
+
+
+/*State to track coded block flags and their bit cost.*/
+struct oc_fr_state{
+ ptrdiff_t bits;
+ unsigned sb_partial_count:16;
+ unsigned sb_full_count:16;
+ unsigned b_coded_count_prev:8;
+ unsigned b_coded_count:8;
+ unsigned b_count:8;
+ signed int sb_partial:2;
+ signed int sb_full:2;
+ signed int b_coded_prev:2;
+ signed int b_coded:2;
+};
+
+
+
+static void oc_fr_state_init(oc_fr_state *_fr){
+ _fr->bits=0;
+ _fr->sb_partial_count=0;
+ _fr->sb_full_count=0;
+ _fr->b_coded_count_prev=0;
+ _fr->b_coded_count=0;
+ _fr->b_count=0;
+ _fr->sb_partial=-1;
+ _fr->sb_full=-1;
+ _fr->b_coded_prev=-1;
+ _fr->b_coded=-1;
+}
+
+
+static void oc_fr_state_advance_sb(oc_fr_state *_fr,
+ int _sb_partial,int _sb_full){
+ ptrdiff_t bits;
+ int sb_partial_count;
+ int sb_full_count;
+ bits=_fr->bits;
+ /*Extend the sb_partial run, or start a new one.*/
+ sb_partial_count=_fr->sb_partial;
+ if(_fr->sb_partial==_sb_partial){
+ if(sb_partial_count>=4129){
+ bits++;
+ sb_partial_count=0;
+ }
+ else bits-=oc_sb_run_bits(sb_partial_count);
+ }
+ else sb_partial_count=0;
+ sb_partial_count++;
+ bits+=oc_sb_run_bits(sb_partial_count);
+ if(!_sb_partial){
+ /*Extend the sb_full run, or start a new one.*/
+ sb_full_count=_fr->sb_full_count;
+ if(_fr->sb_full==_sb_full){
+ if(sb_full_count>=4129){
+ bits++;
+ sb_full_count=0;
+ }
+ else bits-=oc_sb_run_bits(sb_full_count);
+ }
+ else sb_full_count=0;
+ sb_full_count++;
+ bits+=oc_sb_run_bits(sb_full_count);
+ _fr->sb_full=_sb_full;
+ _fr->sb_full_count=sb_full_count;
+ }
+ _fr->bits=bits;
+ _fr->sb_partial=_sb_partial;
+ _fr->sb_partial_count=sb_partial_count;
+}
+
+/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16
+ blocks).*/
+static void oc_fr_state_flush_sb(oc_fr_state *_fr){
+ ptrdiff_t bits;
+ int sb_partial;
+ int sb_full=sb_full;
+ int b_coded_count;
+ int b_coded;
+ int b_count;
+ b_count=_fr->b_count;
+ if(b_count>0){
+ bits=_fr->bits;
+ b_coded=_fr->b_coded;
+ b_coded_count=_fr->b_coded_count;
+ if(b_coded_count>=b_count){
+ /*This SB was fully coded/uncoded; roll back the partial block flags.*/
+ bits-=oc_block_run_bits(b_coded_count);
+ if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count);
+ sb_partial=0;
+ sb_full=b_coded;
+ b_coded=_fr->b_coded_prev;
+ b_coded_count=_fr->b_coded_count_prev;
+ }
+ else{
+ /*It was partially coded.*/
+ sb_partial=1;
+ /*sb_full is unused.*/
+ }
+ _fr->bits=bits;
+ _fr->b_coded_count=b_coded_count;
+ _fr->b_coded_count_prev=b_coded_count;
+ _fr->b_count=0;
+ _fr->b_coded=b_coded;
+ _fr->b_coded_prev=b_coded;
+ oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+ }
+}
+
+static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){
+ ptrdiff_t bits;
+ int b_coded_count;
+ int b_count;
+ int sb_partial;
+ int sb_full=sb_full;
+ bits=_fr->bits;
+ /*Extend the b_coded run, or start a new one.*/
+ b_coded_count=_fr->b_coded_count;
+ if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count);
+ else b_coded_count=0;
+ b_coded_count++;
+ b_count=_fr->b_count+1;
+ if(b_count>=16){
+ /*We finished a superblock.*/
+ if(b_coded_count>=16){
+ /*It was fully coded/uncoded; roll back the partial block flags.*/
+ if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16);
+ sb_partial=0;
+ sb_full=_b_coded;
+ _b_coded=_fr->b_coded_prev;
+ b_coded_count=_fr->b_coded_count_prev;
+ }
+ else{
+ bits+=oc_block_run_bits(b_coded_count);
+ /*It was partially coded.*/
+ sb_partial=1;
+ /*sb_full is unused.*/
+ }
+ _fr->bits=bits;
+ _fr->b_coded_count=b_coded_count;
+ _fr->b_coded_count_prev=b_coded_count;
+ _fr->b_count=0;
+ _fr->b_coded=_b_coded;
+ _fr->b_coded_prev=_b_coded;
+ oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+ }
+ else{
+ bits+=oc_block_run_bits(b_coded_count);
+ _fr->bits=bits;
+ _fr->b_coded_count=b_coded_count;
+ _fr->b_count=b_count;
+ _fr->b_coded=_b_coded;
+ }
+}
+
+static void oc_fr_skip_block(oc_fr_state *_fr){
+ oc_fr_state_advance_block(_fr,0);
+}
+
+static void oc_fr_code_block(oc_fr_state *_fr){
+ oc_fr_state_advance_block(_fr,1);
+}
+
+static int oc_fr_cost1(const oc_fr_state *_fr){
+ oc_fr_state tmp;
+ ptrdiff_t bits;
+ *&tmp=*_fr;
+ oc_fr_skip_block(&tmp);
+ bits=tmp.bits;
+ *&tmp=*_fr;
+ oc_fr_code_block(&tmp);
+ return (int)(tmp.bits-bits);
+}
+
+static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){
+ oc_fr_state tmp;
+ *&tmp=*_pre;
+ oc_fr_skip_block(&tmp);
+ oc_fr_skip_block(&tmp);
+ oc_fr_skip_block(&tmp);
+ oc_fr_skip_block(&tmp);
+ return (int)(_post->bits-tmp.bits);
+}
+
+
+
+struct oc_qii_state{
+ ptrdiff_t bits;
+ unsigned qi01_count:14;
+ signed int qi01:2;
+ unsigned qi12_count:14;
+ signed int qi12:2;
+};
+
+
+
+static void oc_qii_state_init(oc_qii_state *_qs){
+ _qs->bits=0;
+ _qs->qi01_count=0;
+ _qs->qi01=-1;
+ _qs->qi12_count=0;
+ _qs->qi12=-1;
+}
+
+
+static void oc_qii_state_advance(oc_qii_state *_qd,
+ const oc_qii_state *_qs,int _qii){
+ ptrdiff_t bits;
+ int qi01;
+ int qi01_count;
+ int qi12;
+ int qi12_count;
+ bits=_qs->bits;
+ qi01=_qii+1>>1;
+ qi01_count=_qs->qi01_count;
+ if(qi01==_qs->qi01){
+ if(qi01_count>=4129){
+ bits++;
+ qi01_count=0;
+ }
+ else bits-=oc_sb_run_bits(qi01_count);
+ }
+ else qi01_count=0;
+ qi01_count++;
+ bits+=oc_sb_run_bits(qi01_count);
+ qi12_count=_qs->qi12_count;
+ if(_qii){
+ qi12=_qii>>1;
+ if(qi12==_qs->qi12){
+ if(qi12_count>=4129){
+ bits++;
+ qi12_count=0;
+ }
+ else bits-=oc_sb_run_bits(qi12_count);
+ }
+ else qi12_count=0;
+ qi12_count++;
+ bits+=oc_sb_run_bits(qi12_count);
+ }
+ else qi12=_qs->qi12;
+ _qd->bits=bits;
+ _qd->qi01=qi01;
+ _qd->qi01_count=qi01_count;
+ _qd->qi12=qi12;
+ _qd->qi12_count=qi12_count;
+}
+
+
+
+/*Temporary encoder state for the analysis pipeline.*/
+struct oc_enc_pipeline_state{
+ int bounding_values[256];
+ oc_fr_state fr[3];
+ oc_qii_state qs[3];
+ /*Condensed dequantization tables.*/
+ const ogg_uint16_t *dequant[3][3][2];
+ /*Condensed quantization tables.*/
+ const oc_iquant *enquant[3][3][2];
+ /*Skip SSD storage for the current MCU in each plane.*/
+ unsigned *skip_ssd[3];
+ /*Coded/uncoded fragment lists for each plane for the current MCU.*/
+ ptrdiff_t *coded_fragis[3];
+ ptrdiff_t *uncoded_fragis[3];
+ ptrdiff_t ncoded_fragis[3];
+ ptrdiff_t nuncoded_fragis[3];
+ /*The starting fragment for the current MCU in each plane.*/
+ ptrdiff_t froffset[3];
+ /*The starting row for the current MCU in each plane.*/
+ int fragy0[3];
+ /*The ending row for the current MCU in each plane.*/
+ int fragy_end[3];
+ /*The starting superblock for the current MCU in each plane.*/
+ unsigned sbi0[3];
+ /*The ending superblock for the current MCU in each plane.*/
+ unsigned sbi_end[3];
+ /*The number of tokens for zzi=1 for each color plane.*/
+ int ndct_tokens1[3];
+ /*The outstanding eob_run count for zzi=1 for each color plane.*/
+ int eob_run1[3];
+ /*Whether or not the loop filter is enabled.*/
+ int loop_filter;
+};
+
+
+static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){
+ ptrdiff_t *coded_fragis;
+ unsigned mcu_nvsbs;
+ ptrdiff_t mcu_nfrags;
+ int hdec;
+ int vdec;
+ int pli;
+ int qii;
+ int qti;
+ /*Initialize the per-plane coded block flag trackers.
+ These are used for bit-estimation purposes only; the real flag bits span
+ all three planes, so we can't compute them in parallel.*/
+ for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli);
+ for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli);
+ /*Set up the per-plane skip SSD storage pointers.*/
+ mcu_nvsbs=_enc->mcu_nvsbs;
+ mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16;
+ hdec=!(_enc->state.info.pixel_fmt&1);
+ vdec=!(_enc->state.info.pixel_fmt&2);
+ _pipe->skip_ssd[0]=_enc->mcu_skip_ssd;
+ _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags;
+ _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec);
+ /*Set up per-plane pointers to the coded and uncoded fragments lists.
+ Unlike the decoder, each planes' coded and uncoded fragment list is kept
+ separate during the analysis stage; we only make the coded list for all
+ three planes contiguous right before the final packet is output
+ (destroying the uncoded lists, which are no longer needed).*/
+ coded_fragis=_enc->state.coded_fragis;
+ for(pli=0;pli<3;pli++){
+ _pipe->coded_fragis[pli]=coded_fragis;
+ coded_fragis+=_enc->state.fplanes[pli].nfrags;
+ _pipe->uncoded_fragis[pli]=coded_fragis;
+ }
+ memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis));
+ memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis));
+ /*Set up condensed quantizer tables.*/
+ for(pli=0;pli<3;pli++){
+ for(qii=0;qii<_enc->state.nqis;qii++){
+ int qi;
+ qi=_enc->state.qis[qii];
+ for(qti=0;qti<2;qti++){
+ _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti];
+ _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti];
+ }
+ }
+ }
+ /*Initialize the tokenization state.*/
+ for(pli=0;pli<3;pli++){
+ _pipe->ndct_tokens1[pli]=0;
+ _pipe->eob_run1[pli]=0;
+ }
+ /*Initialize the bounding value array for the loop filter.*/
+ _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state,
+ _pipe->bounding_values);
+}
+
+/*Sets the current MCU stripe to super block row _sby.
+ Return: A non-zero value if this was the last MCU.*/
+static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _sby){
+ const oc_fragment_plane *fplane;
+ unsigned mcu_nvsbs;
+ int sby_end;
+ int notdone;
+ int vdec;
+ int pli;
+ mcu_nvsbs=_enc->mcu_nvsbs;
+ sby_end=_enc->state.fplanes[0].nvsbs;
+ notdone=_sby+mcu_nvsbsstate.fplanes+pli;
+ _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs;
+ _pipe->fragy0[pli]=_sby<<2-vdec;
+ _pipe->froffset[pli]=fplane->froffset
+ +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags;
+ if(notdone){
+ _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs;
+ _pipe->fragy_end[pli]=sby_end<<2-vdec;
+ }
+ else{
+ _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs;
+ _pipe->fragy_end[pli]=fplane->nvfrags;
+ }
+ vdec=!(_enc->state.info.pixel_fmt&2);
+ }
+ return notdone;
+}
+
+static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
+ int refi;
+ /*Copy over all the uncoded fragments from this plane and advance the uncoded
+ fragment list.*/
+ _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+ oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli],
+ _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+ _pipe->nuncoded_fragis[_pli]=0;
+ /*Perform DC prediction.*/
+ oc_enc_pred_dc_frag_rows(_enc,_pli,
+ _pipe->fragy0[_pli],_pipe->fragy_end[_pli]);
+ /*Finish DC tokenization.*/
+ oc_enc_tokenize_dc_frag_list(_enc,_pli,
+ _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli],
+ _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]);
+ _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1];
+ _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1];
+ /*And advance the coded fragment list.*/
+ _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+ _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+ _pipe->ncoded_fragis[_pli]=0;
+ /*Apply the loop filter if necessary.*/
+ refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+ if(_pipe->loop_filter){
+ oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values,
+ refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
+ }
+ else _sdelay=_edelay=0;
+ /*To fill borders, we have an additional two pixel delay, since a fragment
+ in the next row could filter its top edge, using two pixels from a
+ fragment in this row.
+ But there's no reason to delay a full fragment between the two.*/
+ oc_state_borders_fill_rows(&_enc->state,refi,_pli,
+ (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
+ (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
+}
+
+
+
+/*Cost information about the coded blocks in a MB.*/
+struct oc_rd_metric{
+ int uncoded_ac_ssd;
+ int coded_ac_ssd;
+ int ac_bits;
+ int dc_flag;
+};
+
+
+
+static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits,
+ oc_rd_metric *_mo,oc_token_checkpoint **_stack){
+ OC_ALIGN16(ogg_int16_t dct[64]);
+ OC_ALIGN16(ogg_int16_t data[64]);
+ ogg_uint16_t dc_dequant;
+ const ogg_uint16_t *dequant;
+ const oc_iquant *enquant;
+ ptrdiff_t frag_offs;
+ int ystride;
+ const unsigned char *src;
+ const unsigned char *ref;
+ unsigned char *dst;
+ int frame_type;
+ int nonzero;
+ unsigned uncoded_ssd;
+ unsigned coded_ssd;
+ int coded_dc;
+ oc_token_checkpoint *checkpoint;
+ oc_fragment *frags;
+ int mb_mode;
+ int mv_offs[2];
+ int nmv_offs;
+ int ac_bits;
+ int borderi;
+ int qti;
+ int qii;
+ int pi;
+ int zzi;
+ int v;
+ int val;
+ int d;
+ int s;
+ int dc;
+ frags=_enc->state.frags;
+ frag_offs=_enc->state.frag_buf_offs[_fragi];
+ ystride=_enc->state.ref_ystride[_pli];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
+ borderi=frags[_fragi].borderi;
+ qii=frags[_fragi].qii;
+ if(qii&~3){
+#if !defined(OC_COLLECT_METRICS)
+ if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
+ /*Enable early skip detection.*/
+ frags[_fragi].coded=0;
+ return 0;
+ }
+#endif
+ /*Try and code this block anyway.*/
+ qii&=3;
+ frags[_fragi].qii=qii;
+ }
+ mb_mode=frags[_fragi].mb_mode;
+ ref=_enc->state.ref_frame_data[
+ _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs;
+ dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
+ +frag_offs;
+ /*Motion compensation:*/
+ switch(mb_mode){
+ case OC_MODE_INTRA:{
+ nmv_offs=0;
+ oc_enc_frag_sub_128(_enc,data,src,ystride);
+ }break;
+ case OC_MODE_GOLDEN_NOMV:
+ case OC_MODE_INTER_NOMV:{
+ nmv_offs=1;
+ mv_offs[0]=0;
+ oc_enc_frag_sub(_enc,data,src,ref,ystride);
+ }break;
+ default:{
+ const oc_mv *frag_mvs;
+ frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+ nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
+ frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
+ if(nmv_offs>1){
+ oc_enc_frag_copy2(_enc,dst,
+ ref+mv_offs[0],ref+mv_offs[1],ystride);
+ oc_enc_frag_sub(_enc,data,src,dst,ystride);
+ }
+ else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride);
+ }break;
+ }
+#if defined(OC_COLLECT_METRICS)
+ {
+ unsigned satd;
+ switch(nmv_offs){
+ case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
+ case 1:{
+ satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+ }break;
+ default:{
+ satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
+ }
+ }
+ _enc->frag_satd[_fragi]=satd;
+ }
+#endif
+ /*Transform:*/
+ oc_enc_fdct8x8(_enc,dct,data);
+ /*Quantize the DC coefficient:*/
+ qti=mb_mode!=OC_MODE_INTRA;
+ enquant=_pipe->enquant[_pli][0][qti];
+ dc_dequant=_pipe->dequant[_pli][0][qti][0];
+ v=dct[0];
+ val=v<<1;
+ s=OC_SIGNMASK(val);
+ val+=dc_dequant+s^s;
+ val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s;
+ dc=OC_CLAMPI(-580,val,580);
+ nonzero=0;
+ /*Quantize the AC coefficients:*/
+ dequant=_pipe->dequant[_pli][qii][qti];
+ enquant=_pipe->enquant[_pli][qii][qti];
+ for(zzi=1;zzi<64;zzi++){
+ v=dct[OC_FZIG_ZAG[zzi]];
+ d=dequant[zzi];
+ val=v<<1;
+ v=abs(val);
+ if(v>=d){
+ s=OC_SIGNMASK(val);
+ /*The bias added here rounds ties away from zero, since token
+ optimization can only decrease the magnitude of the quantized
+ value.*/
+ val+=d+s^s;
+ /*Note the arithmetic right shift is not guaranteed by ANSI C.
+ Hopefully no one still uses ones-complement architectures.*/
+ val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s;
+ data[zzi]=OC_CLAMPI(-580,val,580);
+ nonzero=zzi;
+ }
+ else data[zzi]=0;
+ }
+ /*Tokenize.*/
+ checkpoint=*_stack;
+ ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
+ _stack,qti?0:3);
+ /*Reconstruct.
+ TODO: nonzero may need to be adjusted after tokenization.*/
+ if(nonzero==0){
+ ogg_int16_t p;
+ int ci;
+ /*We round this dequant product (and not any of the others) because there's
+ no iDCT rounding.*/
+ p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5);
+ /*LOOP VECTORIZES.*/
+ for(ci=0;ci<64;ci++)data[ci]=p;
+ }
+ else{
+ data[0]=dc*dc_dequant;
+ oc_idct8x8(&_enc->state,data,nonzero+1);
+ }
+ if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
+ else{
+ oc_enc_frag_recon_inter(_enc,dst,
+ nmv_offs==1?ref+mv_offs[0]:dst,ystride,data);
+ }
+ frame_type=_enc->state.frame_type;
+#if !defined(OC_COLLECT_METRICS)
+ if(frame_type!=OC_INTRA_FRAME)
+#endif
+ {
+ /*In retrospect, should we have skipped this block?*/
+ oc_enc_frag_sub(_enc,data,src,dst,ystride);
+ coded_ssd=coded_dc=0;
+ if(borderi<0){
+ for(pi=0;pi<64;pi++){
+ coded_ssd+=data[pi]*data[pi];
+ coded_dc+=data[pi];
+ }
+ }
+ else{
+ ogg_int64_t mask;
+ mask=_enc->state.borders[borderi].mask;
+ for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+ coded_ssd+=data[pi]*data[pi];
+ coded_dc+=data[pi];
+ }
+ }
+ /*Scale to match DCT domain.*/
+ coded_ssd<<=4;
+ /*We actually only want the AC contribution to the SSD.*/
+ coded_ssd-=coded_dc*coded_dc>>2;
+#if defined(OC_COLLECT_METRICS)
+ _enc->frag_ssd[_fragi]=coded_ssd;
+ }
+ if(frame_type!=OC_INTRA_FRAME){
+#endif
+ uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]];
+ if(uncoded_ssdlambda&&
+ /*Don't allow luma blocks to be skipped in 4MV mode when VP3
+ compatibility is enabled.*/
+ (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){
+ /*Hm, not worth it; roll back.*/
+ oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
+ *_stack=checkpoint;
+ frags[_fragi].coded=0;
+ return 0;
+ }
+ }
+ else _mo->dc_flag=1;
+ _mo->uncoded_ac_ssd+=uncoded_ssd;
+ _mo->coded_ac_ssd+=coded_ssd;
+ _mo->ac_bits+=ac_bits;
+ }
+ oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii);
+ frags[_fragi].dc=dc;
+ frags[_fragi].coded=1;
+ return 1;
+}
+
+static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){
+ /*Worst case token stack usage for 4 fragments.*/
+ oc_token_checkpoint stack[64*4];
+ oc_token_checkpoint *stackptr;
+ const oc_sb_map *sb_maps;
+ signed char *mb_modes;
+ oc_fragment *frags;
+ ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t *uncoded_fragis;
+ ptrdiff_t nuncoded_fragis;
+ oc_rd_metric mo;
+ oc_fr_state fr_checkpoint;
+ oc_qii_state qs_checkpoint;
+ int mb_mode;
+ int ncoded;
+ ptrdiff_t fragi;
+ int bi;
+ *&fr_checkpoint=*(_pipe->fr+0);
+ *&qs_checkpoint=*(_pipe->qs+0);
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+ mb_modes=_enc->state.mb_modes;
+ frags=_enc->state.frags;
+ coded_fragis=_pipe->coded_fragis[0];
+ ncoded_fragis=_pipe->ncoded_fragis[0];
+ uncoded_fragis=_pipe->uncoded_fragis[0];
+ nuncoded_fragis=_pipe->nuncoded_fragis[0];
+ mb_mode=mb_modes[_mbi];
+ ncoded=0;
+ stackptr=stack;
+ memset(&mo,0,sizeof(mo));
+ for(bi=0;bi<4;bi++){
+ fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+ frags[fragi].mb_mode=mb_mode;
+ if(oc_enc_block_transform_quantize(_enc,
+ _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){
+ oc_fr_code_block(_pipe->fr+0);
+ coded_fragis[ncoded_fragis++]=fragi;
+ ncoded++;
+ }
+ else{
+ *(uncoded_fragis-++nuncoded_fragis)=fragi;
+ oc_fr_skip_block(_pipe->fr+0);
+ }
+ }
+ if(_enc->state.frame_type!=OC_INTRA_FRAME){
+ if(ncoded>0&&!mo.dc_flag){
+ int cost;
+ /*Some individual blocks were worth coding.
+ See if that's still true when accounting for mode and MV overhead.*/
+ cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits
+ +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead);
+ if(mo.uncoded_ac_ssd<=cost){
+ /*Taking macroblock overhead into account, it is not worth coding this
+ MB.*/
+ oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack);
+ *(_pipe->fr+0)=*&fr_checkpoint;
+ *(_pipe->qs+0)=*&qs_checkpoint;
+ for(bi=0;bi<4;bi++){
+ fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+ if(frags[fragi].coded){
+ *(uncoded_fragis-++nuncoded_fragis)=fragi;
+ frags[fragi].coded=0;
+ }
+ oc_fr_skip_block(_pipe->fr+0);
+ }
+ ncoded_fragis-=ncoded;
+ ncoded=0;
+ }
+ }
+ /*If no luma blocks coded, the mode is forced.*/
+ if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV;
+ /*Assume that a 1MV with a single coded block is always cheaper than a 4MV
+ with a single coded block.
+ This may not be strictly true: a 4MV computes chroma MVs using (0,0) for
+ skipped blocks, while a 1MV does not.*/
+ else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){
+ mb_modes[_mbi]=OC_MODE_INTER_MV;
+ }
+ }
+ _pipe->ncoded_fragis[0]=ncoded_fragis;
+ _pipe->nuncoded_fragis[0]=nuncoded_fragis;
+ return ncoded;
+}
+
+static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+ const oc_sb_map *sb_maps;
+ oc_sb_flags *sb_flags;
+ ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t *uncoded_fragis;
+ ptrdiff_t nuncoded_fragis;
+ int sbi;
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+ sb_flags=_enc->state.sb_flags;
+ coded_fragis=_pipe->coded_fragis[_pli];
+ ncoded_fragis=_pipe->ncoded_fragis[_pli];
+ uncoded_fragis=_pipe->uncoded_fragis[_pli];
+ nuncoded_fragis=_pipe->nuncoded_fragis[_pli];
+ for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+ /*Worst case token stack usage for 1 fragment.*/
+ oc_token_checkpoint stack[64];
+ oc_rd_metric mo;
+ int quadi;
+ int bi;
+ memset(&mo,0,sizeof(mo));
+ for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+ ptrdiff_t fragi;
+ fragi=sb_maps[sbi][quadi][bi];
+ if(fragi>=0){
+ oc_token_checkpoint *stackptr;
+ stackptr=stack;
+ if(oc_enc_block_transform_quantize(_enc,
+ _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){
+ coded_fragis[ncoded_fragis++]=fragi;
+ oc_fr_code_block(_pipe->fr+_pli);
+ }
+ else{
+ *(uncoded_fragis-++nuncoded_fragis)=fragi;
+ oc_fr_skip_block(_pipe->fr+_pli);
+ }
+ }
+ }
+ oc_fr_state_flush_sb(_pipe->fr+_pli);
+ sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full;
+ sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial;
+ }
+ _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+ _pipe->nuncoded_fragis[_pli]=nuncoded_fragis;
+}
+
+/*Mode decision is done by exhaustively examining all potential choices.
+ Obviously, doing the motion compensation, fDCT, tokenization, and then
+ counting the bits each token uses is computationally expensive.
+ Theora's EOB runs can also split the cost of these tokens across multiple
+ fragments, and naturally we don't know what the optimal choice of Huffman
+ codes will be until we know all the tokens we're going to encode in all the
+ fragments.
+ So we use a simple approach to estimating the bit cost and distortion of each
+ mode based upon the SATD value of the residual before coding.
+ The mathematics behind the technique are outlined by Kim \cite{Kim03}, but
+ the process (modified somewhat from that of the paper) is very simple.
+ We build a non-linear regression of the mappings from
+ (pre-transform+quantization) SATD to (post-transform+quantization) bits and
+ SSD for each qi.
+ A separate set of mappings is kept for each quantization type and color
+ plane.
+ The mappings are constructed by partitioning the SATD values into a small
+ number of bins (currently 24) and using a linear regression in each bin
+ (as opposed to the 0th-order regression used by Kim).
+ The bit counts and SSD measurements are obtained by examining actual encoded
+ frames, with appropriate lambda values and optimal Huffman codes selected.
+ EOB bits are assigned to the fragment that started the EOB run (as opposed to
+ dividing them among all the blocks in the run; though the latter approach
+ seems more theoretically correct, Monty's testing showed a small improvement
+ with the former, though that may have been merely statistical noise).
+
+ @ARTICLE{Kim03,
+ author="Hyun Mun Kim",
+ title="Adaptive Rate Control Using Nonlinear Regression",
+ journal="IEEE Transactions on Circuits and Systems for Video Technology",
+ volume=13,
+ number=5,
+ pages="432--439",
+ month=May,
+ year=2003
+ }*/
+
+/*Computes (_ssd+_lambda*_rate)/(1<>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \
+ +(((_ssd)&(1<>1)>>OC_BIT_SCALE)
+
+/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after
+ prediction.*/
+static unsigned oc_dct_cost2(unsigned *_ssd,
+ int _qi,int _pli,int _qti,int _satd){
+ unsigned rmse;
+ int bin;
+ int dx;
+ int y0;
+ int z0;
+ int dy;
+ int dz;
+ /*SATD metrics for chroma planes vary much less than luma, so we scale them
+ by 4 to distribute them into the mode decision bins more evenly.*/
+ _satd<<=_pli+1&2;
+ bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2);
+ dx=_satd-(bin<>OC_SAD_SHIFT),0);
+ *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
+ return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0);
+}
+
+/*Select luma block-level quantizers for a MB in an INTRA frame.*/
+static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,unsigned _mbi){
+ const unsigned char *src;
+ const ptrdiff_t *frag_buf_offs;
+ const oc_sb_map *sb_maps;
+ oc_fragment *frags;
+ ptrdiff_t frag_offs;
+ ptrdiff_t fragi;
+ oc_qii_state qs[4][3];
+ unsigned cost[4][3];
+ unsigned ssd[4][3];
+ unsigned rate[4][3];
+ int prev[3][3];
+ unsigned satd;
+ unsigned best_cost;
+ unsigned best_ssd;
+ unsigned best_rate;
+ int best_qii;
+ int qii;
+ int lambda;
+ int ystride;
+ int nqis;
+ int bi;
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ystride=_enc->state.ref_ystride[0];
+ fragi=sb_maps[_mbi>>2][_mbi&3][0];
+ frag_offs=frag_buf_offs[fragi];
+ satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ nqis=_enc->state.nqis;
+ lambda=_enc->lambda;
+ for(qii=0;qiistate.qis[qii],0,0,satd)
+ +(qs[0][qii].bits-_qs->bits<>2][_mbi&3][bi];
+ frag_offs=frag_buf_offs[fragi];
+ satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ for(qii=0;qiistate.qis[qii],0,0,satd);
+ best_ssd=ssd[bi-1][0]+cur_ssd;
+ best_rate=rate[bi-1][0]+cur_rate
+ +(qt[0].bits-qs[bi-1][0].bits<state.frags;
+ for(bi=3;;){
+ fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+ frags[fragi].qii=best_qii;
+ if(bi--<=0)break;
+ best_qii=prev[bi][best_qii];
+ }
+ return best_cost;
+}
+
+/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/
+static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){
+ const unsigned char *src;
+ oc_fragment *frags;
+ ptrdiff_t frag_offs;
+ oc_qii_state qt[3];
+ unsigned cost[3];
+ unsigned satd;
+ unsigned best_cost;
+ int best_qii;
+ int qii;
+ int lambda;
+ int ystride;
+ int nqis;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ystride=_enc->state.ref_ystride[_pli];
+ frag_offs=_enc->state.frag_buf_offs[_fragi];
+ satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ nqis=_enc->state.nqis;
+ lambda=_enc->lambda;
+ best_qii=0;
+ for(qii=0;qiistate.qis[qii],_pli,0,satd)
+ +(qt[qii].bits-_qs->bits<state.frags;
+ frags[_fragi].qii=best_qii;
+ return best_cost;
+}
+
+static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+ const oc_sb_map *sb_maps;
+ oc_sb_flags *sb_flags;
+ ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ int sbi;
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+ sb_flags=_enc->state.sb_flags;
+ coded_fragis=_pipe->coded_fragis[_pli];
+ ncoded_fragis=_pipe->ncoded_fragis[_pli];
+ for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+ /*Worst case token stack usage for 1 fragment.*/
+ oc_token_checkpoint stack[64];
+ int quadi;
+ int bi;
+ for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+ ptrdiff_t fragi;
+ fragi=sb_maps[sbi][quadi][bi];
+ if(fragi>=0){
+ oc_token_checkpoint *stackptr;
+ oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi);
+ stackptr=stack;
+ oc_enc_block_transform_quantize(_enc,
+ _pipe,_pli,fragi,0,NULL,&stackptr);
+ coded_fragis[ncoded_fragis++]=fragi;
+ }
+ }
+ }
+ _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+}
+
+/*Analysis stage for an INTRA frame.*/
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){
+ oc_enc_pipeline_state pipe;
+ const unsigned char *map_idxs;
+ int nmap_idxs;
+ oc_sb_flags *sb_flags;
+ signed char *mb_modes;
+ const oc_mb_map *mb_maps;
+ oc_mb_enc_info *embs;
+ oc_fragment *frags;
+ unsigned stripe_sby;
+ unsigned mcu_nvsbs;
+ int notstart;
+ int notdone;
+ int refi;
+ int pli;
+ _enc->state.frame_type=OC_INTRA_FRAME;
+ oc_enc_tokenize_start(_enc);
+ oc_enc_pipeline_init(_enc,&pipe);
+ /*Choose MVs and MB modes and quantize and code luma.
+ Must be done in Hilbert order.*/
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ _enc->state.ncoded_fragis[0]=0;
+ _enc->state.ncoded_fragis[1]=0;
+ _enc->state.ncoded_fragis[2]=0;
+ sb_flags=_enc->state.sb_flags;
+ mb_modes=_enc->state.mb_modes;
+ mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+ embs=_enc->mb_info;
+ frags=_enc->state.frags;
+ notstart=0;
+ notdone=1;
+ mcu_nvsbs=_enc->mcu_nvsbs;
+ for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+ unsigned sbi;
+ unsigned sbi_end;
+ notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+ sbi_end=pipe.sbi_end[0];
+ for(sbi=pipe.sbi0[0];sbistate.curframe_num>0)oc_mcenc_search(_enc,mbi);
+ oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi);
+ mb_modes[mbi]=OC_MODE_INTRA;
+ oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0);
+ /*Propagate final MB mode and MVs to the chroma blocks.*/
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_maps[mbi][pli][bi];
+ frags[fragi].mb_mode=OC_MODE_INTRA;
+ }
+ }
+ }
+ oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+ /*Code chroma planes.*/
+ for(pli=1;pli<3;pli++){
+ oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe,
+ pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+ oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+ }
+ notstart=1;
+ }
+ /*Finish filling in the reference frame borders.*/
+ refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+ for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+ _enc->state.ntotal_coded_fragis=_enc->state.nfrags;
+}
+
+
+
+/*Cost information about a MB mode.*/
+struct oc_mode_choice{
+ unsigned cost;
+ unsigned ssd;
+ unsigned rate;
+ unsigned overhead;
+ unsigned char qii[12];
+};
+
+
+
+static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){
+ _modec->cost=OC_MODE_RD_COST(_modec->ssd,
+ _modec->rate+_modec->overhead,_lambda);
+}
+
+/*A set of skip SSD's to use to disable early skipping.*/
+static const unsigned OC_NOSKIP[12]={
+ UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+ UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+ UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX
+};
+
+/*The estimated number of bits used by a coded chroma block to specify the AC
+ quantizer.
+ TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression);
+ measurements suggest this is in the right ballpark, but it varies somewhat
+ with lambda.*/
+#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1)
+
+static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+ oc_fr_state fr;
+ oc_qii_state qs;
+ unsigned ssd;
+ unsigned rate;
+ int overhead;
+ unsigned satd;
+ unsigned best_ssd;
+ unsigned best_rate;
+ int best_overhead;
+ int best_fri;
+ int best_qii;
+ unsigned cur_cost;
+ unsigned cur_ssd;
+ unsigned cur_rate;
+ int cur_overhead;
+ int lambda;
+ int nqis;
+ int nskipped;
+ int bi;
+ int qii;
+ lambda=_enc->lambda;
+ nqis=_enc->state.nqis;
+ /*We could do a trellis optimization here, but we don't make final skip
+ decisions until after transform+quantization, so the result wouldn't be
+ optimal anyway.
+ Instead we just use a greedy approach; for most SATD values, the
+ differences between the qiis are large enough to drown out the cost to
+ code the flags, anyway.*/
+ *&fr=*_fr;
+ *&qs=*_qs;
+ ssd=rate=overhead=nskipped=0;
+ for(bi=0;bi<4;bi++){
+ oc_fr_state ft[2];
+ oc_qii_state qt[3];
+ unsigned best_cost;
+ satd=_frag_satd[bi];
+ *(ft+0)=*&fr;
+ oc_fr_code_block(ft+0);
+ oc_qii_state_advance(qt+0,&qs,0);
+ best_overhead=(ft[0].bits-fr.bits<state.qis[0],0,_qti,satd)
+ +(qt[0].bits-qs.bits<state.qis[qii],0,_qti,satd)
+ +(qt[qii].bits-qs.bits<qii[bi]=best_qii;
+ }
+ _modec->ssd=ssd;
+ _modec->rate=rate;
+ _modec->overhead=OC_MAXI(overhead,0);
+}
+
+static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+ unsigned ssd;
+ unsigned rate;
+ unsigned satd;
+ unsigned best_ssd;
+ unsigned best_rate;
+ int best_qii;
+ unsigned cur_cost;
+ unsigned cur_ssd;
+ unsigned cur_rate;
+ int lambda;
+ int nblocks;
+ int nqis;
+ int pli;
+ int bi;
+ int qii;
+ lambda=_enc->lambda;
+ nqis=_enc->state.nqis;
+ ssd=_modec->ssd;
+ rate=_modec->rate;
+ /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded
+ order, we assume a constant overhead for coded block and qii flags.*/
+ nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ nblocks=(nblocks-4>>1)+4;
+ bi=4;
+ for(pli=1;pli<3;pli++){
+ for(;bistate.qis[0],pli,_qti,satd)
+ +OC_CHROMA_QII_RATE;
+ best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
+ best_qii=0;
+ for(qii=1;qiistate.qis[qii],0,_qti,satd)
+ +OC_CHROMA_QII_RATE;
+ cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
+ if(cur_costqii[bi]=best_qii;
+ }
+ nblocks=(nblocks-4<<1)+4;
+ }
+ _modec->ssd=ssd;
+ _modec->rate=rate;
+}
+
+static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
+ unsigned _mbi,unsigned _ssd[12]){
+ OC_ALIGN16(ogg_int16_t buffer[64]);
+ const unsigned char *src;
+ const unsigned char *ref;
+ int ystride;
+ const oc_fragment *frags;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *sb_map;
+ const oc_mb_map_plane *mb_map;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ ogg_int64_t mask;
+ unsigned uncoded_ssd;
+ int uncoded_dc;
+ unsigned dc_dequant;
+ int dc_flag;
+ int mapii;
+ int mapi;
+ int pli;
+ int bi;
+ ptrdiff_t fragi;
+ ptrdiff_t frag_offs;
+ int borderi;
+ int pi;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ ystride=_enc->state.ref_ystride[0];
+ frags=_enc->state.frags;
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+ dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0];
+ for(bi=0;bi<4;bi++){
+ fragi=sb_map[bi];
+ frag_offs=frag_buf_offs[fragi];
+ oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
+ borderi=frags[fragi].borderi;
+ uncoded_ssd=uncoded_dc=0;
+ if(borderi<0){
+ for(pi=0;pi<64;pi++){
+ uncoded_ssd+=buffer[pi]*buffer[pi];
+ uncoded_dc+=buffer[pi];
+ }
+ }
+ else{
+ ogg_int64_t mask;
+ mask=_enc->state.borders[borderi].mask;
+ for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+ uncoded_ssd+=buffer[pi]*buffer[pi];
+ uncoded_dc+=buffer[pi];
+ }
+ }
+ /*Scale to match DCT domain.*/
+ uncoded_ssd<<=4;
+ /*We actually only want the AC contribution to the SSD.*/
+ uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+ /*DC is a special case; if there's more than a full-quantizer improvement
+ in the effective DC component, always force-code the block.*/
+ dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+ uncoded_ssd|=-dc_flag;
+ _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
+ }
+ mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+ map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ map_nidxs=(map_nidxs-4>>1)+4;
+ mapii=4;
+ for(pli=1;pli<3;pli++){
+ ystride=_enc->state.ref_ystride[pli];
+ dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0];
+ for(;mapiistate.borders[borderi].mask;
+ for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+ uncoded_ssd+=buffer[pi]*buffer[pi];
+ uncoded_dc+=buffer[pi];
+ }
+ }
+ /*Scale to match DCT domain.*/
+ uncoded_ssd<<=4;
+ /*We actually only want the AC contribution to the SSD.*/
+ uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+ /*DC is a special case; if there's more than a full-quantizer improvement
+ in the effective DC component, always force-code the block.*/
+ dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+ uncoded_ssd|=-dc_flag;
+ _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
+ }
+ map_nidxs=(map_nidxs-4<<1)+4;
+ }
+}
+
+static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _frag_satd[12]){
+ const unsigned char *src;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *sb_map;
+ const oc_mb_map_plane *mb_map;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ int mapii;
+ int mapi;
+ int ystride;
+ int pli;
+ int bi;
+ ptrdiff_t fragi;
+ ptrdiff_t frag_offs;
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ystride=_enc->state.ref_ystride[0];
+ for(bi=0;bi<4;bi++){
+ fragi=sb_map[bi];
+ frag_offs=frag_buf_offs[fragi];
+ _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ }
+ mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+ ystride=_enc->state.ref_ystride[1];
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_map[pli][bi];
+ frag_offs=frag_buf_offs[fragi];
+ _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ }
+}
+
+static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){
+ oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+ oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+ _modec->overhead+=
+ oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<lambda);
+}
+
+static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+ unsigned frag_satd[12];
+ const unsigned char *src;
+ const unsigned char *ref;
+ int ystride;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *sb_map;
+ const oc_mb_map_plane *mb_map;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ int mapii;
+ int mapi;
+ int mv_offs[2];
+ int dx;
+ int dy;
+ int pli;
+ int bi;
+ ptrdiff_t fragi;
+ ptrdiff_t frag_offs;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[
+ _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
+ ystride=_enc->state.ref_ystride[0];
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+ dx=_mv[0];
+ dy=_mv[1];
+ _modec->rate=_modec->ssd=0;
+ if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+ for(bi=0;bi<4;bi++){
+ fragi=sb_map[bi];
+ frag_offs=frag_buf_offs[fragi];
+ frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ }
+ }
+ else{
+ for(bi=0;bi<4;bi++){
+ fragi=sb_map[bi];
+ frag_offs=frag_buf_offs[fragi];
+ frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ }
+ }
+ mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+ ystride=_enc->state.ref_ystride[1];
+ if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_map[pli][bi];
+ frag_offs=frag_buf_offs[fragi];
+ frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ }
+ }
+ else{
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_map[pli][bi];
+ frag_offs=frag_buf_offs[fragi];
+ frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ }
+ }
+ oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+ oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+ _modec->overhead+=
+ oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<lambda);
+}
+
+static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+ static const oc_mv OC_MV_ZERO;
+ oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd);
+}
+
+static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+ int bits0;
+ oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd);
+ bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31];
+ _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12)
+ -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda);
+ return bits0;
+}
+
+/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/
+static const unsigned char OC_MB_PHASE[4][4]={
+ {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0}
+};
+
+static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+ unsigned frag_satd[12];
+ oc_mv lbmvs[4];
+ oc_mv cbmvs[4];
+ const unsigned char *src;
+ const unsigned char *ref;
+ int ystride;
+ const ptrdiff_t *frag_buf_offs;
+ oc_mv *frag_mvs;
+ const oc_mb_map_plane *mb_map;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ int nqis;
+ int mapii;
+ int mapi;
+ int mv_offs[2];
+ int dx;
+ int dy;
+ int pli;
+ int bi;
+ ptrdiff_t fragi;
+ ptrdiff_t frag_offs;
+ int bits0;
+ int bits1;
+ unsigned satd;
+ src=_enc->state.ref_frame_data[OC_FRAME_IO];
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+ ystride=_enc->state.ref_ystride[0];
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ frag_mvs=_enc->state.frag_mvs;
+ mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+ _modec->rate=_modec->ssd=0;
+ for(bi=0;bi<4;bi++){
+ fragi=mb_map[0][bi];
+ dx=_mv[bi][0];
+ dy=_mv[bi][1];
+ /*Save the block MVs as the current ones while we're here; we'll replace
+ them if we don't ultimately choose 4MV mode.*/
+ frag_mvs[fragi][0]=(signed char)dx;
+ frag_mvs[fragi][1]=(signed char)dy;
+ frag_offs=frag_buf_offs[fragi];
+ if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+ satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ }
+ else{
+ satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ }
+ frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
+ }
+ oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
+ _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1);
+ /*Figure out which blocks are being skipped and give them (0,0) MVs.*/
+ bits0=0;
+ bits1=0;
+ nqis=_enc->state.nqis;
+ for(bi=0;bi<4;bi++){
+ if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
+ memset(lbmvs+bi,0,sizeof(*lbmvs));
+ }
+ else{
+ memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
+ bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
+ bits1+=12;
+ }
+ }
+ (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
+ (const oc_mv *)lbmvs);
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+ ystride=_enc->state.ref_ystride[1];
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_map[pli][bi];
+ dx=cbmvs[bi][0];
+ dy=cbmvs[bi][1];
+ frag_offs=frag_buf_offs[fragi];
+ /*TODO: We could save half these calls by re-using the results for the Cb
+ and Cr planes; is it worth it?*/
+ if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
+ satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ }
+ else{
+ satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ }
+ frag_satd[mapii]=satd;
+ }
+ oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+ _modec->overhead+=
+ oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR)
+ +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1)
+ -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda);
+}
+
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){
+ oc_set_chroma_mvs_func set_chroma_mvs;
+ oc_enc_pipeline_state pipe;
+ oc_qii_state intra_luma_qs;
+ oc_mv last_mv;
+ oc_mv prior_mv;
+ ogg_int64_t interbits;
+ ogg_int64_t intrabits;
+ const unsigned char *map_idxs;
+ int nmap_idxs;
+ unsigned *coded_mbis;
+ unsigned *uncoded_mbis;
+ size_t ncoded_mbis;
+ size_t nuncoded_mbis;
+ oc_sb_flags *sb_flags;
+ signed char *mb_modes;
+ const oc_sb_map *sb_maps;
+ const oc_mb_map *mb_maps;
+ oc_mb_enc_info *embs;
+ oc_fragment *frags;
+ oc_mv *frag_mvs;
+ int qi;
+ unsigned stripe_sby;
+ unsigned mcu_nvsbs;
+ int notstart;
+ int notdone;
+ int vdec;
+ unsigned sbi;
+ unsigned sbi_end;
+ int refi;
+ int pli;
+ set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+ _enc->state.frame_type=OC_INTER_FRAME;
+ oc_mode_scheme_chooser_reset(&_enc->chooser);
+ oc_enc_tokenize_start(_enc);
+ oc_enc_pipeline_init(_enc,&pipe);
+ if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs);
+ _enc->mv_bits[0]=_enc->mv_bits[1]=0;
+ interbits=intrabits=0;
+ last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
+ /*Choose MVs and MB modes and quantize and code luma.
+ Must be done in Hilbert order.*/
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ qi=_enc->state.qis[0];
+ coded_mbis=_enc->coded_mbis;
+ uncoded_mbis=coded_mbis+_enc->state.nmbs;
+ ncoded_mbis=0;
+ nuncoded_mbis=0;
+ _enc->state.ncoded_fragis[0]=0;
+ _enc->state.ncoded_fragis[1]=0;
+ _enc->state.ncoded_fragis[2]=0;
+ sb_flags=_enc->state.sb_flags;
+ mb_modes=_enc->state.mb_modes;
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+ mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+ embs=_enc->mb_info;
+ frags=_enc->state.frags;
+ frag_mvs=_enc->state.frag_mvs;
+ vdec=!(_enc->state.info.pixel_fmt&2);
+ notstart=0;
+ notdone=1;
+ mcu_nvsbs=_enc->mcu_nvsbs;
+ for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+ notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+ sbi_end=pipe.sbi_end[0];
+ for(sbi=pipe.sbi0[0];sbisp_levelsp_levellambda*3;
+ if(modes[OC_MODE_INTER_MV_FOUR].cost>2][mbi&3][bi];
+ frags[fragi].qii=modes[mb_mode].qii[bi];
+ }
+ if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,
+ modes[mb_mode].overhead>>OC_BIT_SCALE)>0){
+ int orig_mb_mode;
+ orig_mb_mode=mb_mode;
+ mb_mode=mb_modes[mbi];
+ switch(mb_mode){
+ case OC_MODE_INTER_MV:{
+ memcpy(prior_mv,last_mv,sizeof(prior_mv));
+ /*If we're backing out from 4MV, find the MV we're actually
+ using.*/
+ if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
+ for(bi=0;;bi++){
+ fragi=mb_maps[mbi][0][bi];
+ if(frags[fragi].coded){
+ memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+ dx=frag_mvs[fragi][0];
+ dy=frag_mvs[fragi][1];
+ break;
+ }
+ }
+ mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31];
+ }
+ /*Otherwise we used the original analysis MV.*/
+ else{
+ memcpy(last_mv,
+ embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
+ }
+ _enc->mv_bits[0]+=mb_mv_bits_0;
+ _enc->mv_bits[1]+=12;
+ }break;
+ case OC_MODE_INTER_MV_LAST2:{
+ oc_mv tmp_mv;
+ memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
+ memcpy(prior_mv,last_mv,sizeof(prior_mv));
+ memcpy(last_mv,tmp_mv,sizeof(last_mv));
+ }break;
+ case OC_MODE_GOLDEN_MV:{
+ _enc->mv_bits[0]+=mb_gmv_bits_0;
+ _enc->mv_bits[1]+=12;
+ }break;
+ case OC_MODE_INTER_MV_FOUR:{
+ oc_mv lbmvs[4];
+ oc_mv cbmvs[4];
+ memcpy(prior_mv,last_mv,sizeof(prior_mv));
+ for(bi=0;bi<4;bi++){
+ fragi=mb_maps[mbi][0][bi];
+ if(frags[fragi].coded){
+ memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+ memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
+ _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
+ +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
+ _enc->mv_bits[1]+=12;
+ }
+ /*Replace the block MVs for not-coded blocks with (0,0).*/
+ else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
+ }
+ (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_maps[mbi][pli][bi];
+ frags[fragi].mb_mode=mb_mode;
+ frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
+ memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
+ }
+ }break;
+ }
+ coded_mbis[ncoded_mbis++]=mbi;
+ oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode);
+ interbits+=modes[mb_mode].rate+modes[mb_mode].overhead;
+ }
+ else{
+ *(uncoded_mbis-++nuncoded_mbis)=mbi;
+ mb_mode=OC_MODE_INTER_NOMV;
+ dx=dy=0;
+ }
+ /*Propagate final MB mode and MVs to the chroma blocks.
+ This has already been done for 4MV mode, since it requires individual
+ block motion vectors.*/
+ if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+ for(mapii=4;mapii>2;
+ bi=mapi&3;
+ fragi=mb_maps[mbi][pli][bi];
+ frags[fragi].mb_mode=mb_mode;
+ /*If we switched from 4MV mode to INTER_MV mode, then the qii
+ values won't have been chosen with the right MV, but it's
+ probaby not worth re-estimating them.*/
+ frags[fragi].qii=modes[mb_mode].qii[mapii];
+ frag_mvs[fragi][0]=(signed char)dx;
+ frag_mvs[fragi][1]=(signed char)dy;
+ }
+ }
+ }
+ oc_fr_state_flush_sb(pipe.fr+0);
+ sb_flags[sbi].coded_fully=pipe.fr[0].sb_full;
+ sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial;
+ }
+ oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+ /*Code chroma planes.*/
+ for(pli=1;pli<3;pli++){
+ oc_enc_sb_transform_quantize_chroma(_enc,&pipe,
+ pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+ oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+ }
+ notstart=1;
+ }
+ /*Finish filling in the reference frame borders.*/
+ refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+ for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+ /*Finish adding flagging overhead costs to inter bit counts to determine if
+ we should have coded a key frame instead.*/
+ if(_allow_keyframe){
+ if(interbits>intrabits)return 1;
+ /*Technically the chroma plane counts are over-estimations, because they
+ don't account for continuing runs from the luma planes, but the
+ inaccuracy is small.*/
+ for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<mv_bits[0],_enc->mv_bits[1])<chooser.scheme_bits[_enc->chooser.scheme_list[0]]<intrabits)return 1;
+ }
+ _enc->ncoded_mbis=ncoded_mbis;
+ /*Compact the coded fragment list.*/
+ {
+ ptrdiff_t ncoded_fragis;
+ ncoded_fragis=_enc->state.ncoded_fragis[0];
+ for(pli=1;pli<3;pli++){
+ memmove(_enc->state.coded_fragis+ncoded_fragis,
+ _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset,
+ _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis));
+ ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+ }
+ _enc->state.ntotal_coded_fragis=ncoded_fragis;
+ }
+ return 0;
+}
+
+#if defined(OC_COLLECT_METRICS)
+# include
+# include
+
+/*TODO: It may be helpful (for block-level quantizers especially) to separate
+ out the contributions from AC and DC into separate tables.*/
+
+# define OC_ZWEIGHT (0.25)
+
+static void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _satd,int _rate,double _rmse){
+ double rate;
+ /*Accumulate statistics without the scaling; this lets us change the scale
+ factor yet still use old data.*/
+ rate=ldexp(_rate,-OC_BIT_SCALE);
+ if(_metrics->fragw>0){
+ double dsatd;
+ double drate;
+ double drmse;
+ double w;
+ dsatd=_satd-_metrics->satd/_metrics->fragw;
+ drate=rate-_metrics->rate/_metrics->fragw;
+ drmse=_rmse-_metrics->rmse/_metrics->fragw;
+ w=_metrics->fragw*_w/(_metrics->fragw+_w);
+ _metrics->satd2+=dsatd*dsatd*w;
+ _metrics->satdrate+=dsatd*drate*w;
+ _metrics->rate2+=drate*drate*w;
+ _metrics->satdrmse+=dsatd*drmse*w;
+ _metrics->rmse2+=drmse*drmse*w;
+ }
+ _metrics->fragw+=_w;
+ _metrics->satd+=_satd*_w;
+ _metrics->rate+=rate*_w;
+ _metrics->rmse+=_rmse*_w;
+}
+
+static void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n){
+ int i;
+ /*Find a non-empty set of metrics.*/
+ for(i=0;i<_n&&_src[i].fragw<=0;i++);
+ if(i>=_n){
+ memset(_dst,0,sizeof(*_dst));
+ return;
+ }
+ memcpy(_dst,_src+i,sizeof(*_dst));
+ /*And iterate over the remaining non-empty sets of metrics.*/
+ for(i++;i<_n;i++)if(_src[i].fragw>0){
+ double wa;
+ double wb;
+ double dsatd;
+ double drate;
+ double drmse;
+ double w;
+ wa=_dst->fragw;
+ wb=_src[i].fragw;
+ dsatd=_src[i].satd/wb-_dst->satd/wa;
+ drate=_src[i].rate/wb-_dst->rate/wa;
+ drmse=_src[i].rmse/wb-_dst->rmse/wa;
+ w=wa*wb/(wa+wb);
+ _dst->fragw+=_src[i].fragw;
+ _dst->satd+=_src[i].satd;
+ _dst->rate+=_src[i].rate;
+ _dst->rmse+=_src[i].rmse;
+ _dst->satd2+=_src[i].satd2+dsatd*dsatd*w;
+ _dst->satdrate+=_src[i].satdrate+dsatd*drate*w;
+ _dst->rate2+=_src[i].rate2+drate*drate*w;
+ _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w;
+ _dst->rmse2+=_src[i].rmse2+drmse*drmse*w;
+ }
+}
+
+/*Compile collected SATD/rate/RMSE metrics into a form that's immediately
+ useful for mode decision.*/
+static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){
+ int pli;
+ int qti;
+ oc_restore_fpu(&_enc->state);
+ /*Convert raw collected data into cleaned up sample points.*/
+ for(pli=0;pli<3;pli++){
+ for(qti=0;qti<2;qti++){
+ double fragw;
+ int bin0;
+ int bin1;
+ int bin;
+ fragw=0;
+ bin0=bin1=0;
+ for(bin=0;bin=OC_ZWEIGHT){
+ fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw;
+ }
+ /*Merge statistics and fit lines.*/
+ oc_mode_metrics_merge(&metrics,
+ OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0);
+ if(metrics.fragw>0&&metrics.satd2>0){
+ double a;
+ double b;
+ double msatd;
+ double mrate;
+ double mrmse;
+ double rate;
+ double rmse;
+ msatd=metrics.satd/metrics.fragw;
+ mrate=metrics.rate/metrics.fragw;
+ mrmse=metrics.rmse/metrics.fragw;
+ /*Compute the points on these lines corresponding to the actual bin
+ value.*/
+ b=metrics.satdrate/metrics.satd2;
+ a=mrate-b*msatd;
+ rate=ldexp(a+b*(bin<>1);
+ return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
+ return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static ptrdiff_t oc_token_skip_val(void){
+ return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
+ return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+ int run_cati;
+ int ncoeffs_mask;
+ int ncoeffs_adjust;
+ run_cati=_token-OC_DCT_RUN_CAT1B;
+ ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
+ ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
+ return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+ a given token value.
+ This reduces all the conditional branches, etc., needed to parse these token
+ values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob,
+ oc_token_skip_eob6,
+ oc_token_skip_zrl,
+ oc_token_skip_zrl,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_val,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ (oc_token_skip_func)oc_token_skip_run_cat1a,
+ oc_token_skip_run,
+ oc_token_skip_run,
+ oc_token_skip_run,
+ oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+ token value.
+ _token: The token value to skip.
+ _extra_bits: The extra bits attached to this token.
+ Return: A positive value indicates that number of coefficients are to be
+ skipped in the current block.
+ Otherwise, the negative of the return value indicates that number of
+ blocks are to be ended.
+ 0 will never be returned, so that at least one coefficient in one
+ block will always be decoded for every token.*/
+static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
+ return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
+ static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+ 0,16,16,16,16,16,32,32,
+ 32,32,32,32,32,32,32,48,
+ 48,48,48,48,48,48,48,48,
+ 48,48,48,48,64,64,64,64,
+ 64,64,64,64,64,64,64,64,
+ 64,64,64,64,64,64,64,64,
+ 64,64,64,64,64,64,64,64
+ };
+ const oc_fragment *frags;
+ const unsigned *frag_satd;
+ const unsigned *frag_ssd;
+ const ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragii;
+ double fragw;
+ int qti;
+ int qii;
+ int qi;
+ int pli;
+ int zzi;
+ int token;
+ int eb;
+ oc_restore_fpu(&_enc->state);
+ /*Load any existing mode metrics if we haven't already.*/
+ if(!oc_has_mode_metrics){
+ FILE *fmetrics;
+ memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS));
+ fmetrics=fopen("modedec.stats","rb");
+ if(fmetrics!=NULL){
+ fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+ fclose(fmetrics);
+ }
+ for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+ oc_has_mode_metrics=1;
+ }
+ qti=_enc->state.frame_type;
+ frags=_enc->state.frags;
+ frag_satd=_enc->frag_satd;
+ frag_ssd=_enc->frag_ssd;
+ coded_fragis=_enc->state.coded_fragis;
+ ncoded_fragis=fragii=0;
+ /*Weight the fragments by the inverse frame size; this prevents HD content
+ from dominating the statistics.*/
+ fragw=1.0/_enc->state.nfrags;
+ for(pli=0;pli<3;pli++){
+ ptrdiff_t ti[64];
+ int eob_token[64];
+ int eob_run[64];
+ /*Set up token indices and eob run counts.
+ We don't bother trying to figure out the real cost of the runs that span
+ coefficients; instead we use the costs that were available when R-D
+ token optimization was done.*/
+ for(zzi=0;zzi<64;zzi++){
+ ti[zzi]=_enc->dct_token_offs[pli][zzi];
+ if(ti[zzi]>0){
+ token=_enc->dct_tokens[pli][zzi][0];
+ eb=_enc->extra_bits[pli][zzi][0];
+ eob_token[zzi]=token;
+ eob_run[zzi]=-oc_dct_token_skip(token,eb);
+ }
+ else{
+ eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+ eob_run[zzi]=0;
+ }
+ }
+ /*Scan the list of coded fragments for this plane.*/
+ ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+ for(;fragii0){
+ /*We've reached the end of the block.*/
+ eob_run[zzi]--;
+ break;
+ }
+ huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
+ +OC_ZZI_HUFF_OFFSET[zzi];
+ if(eob_token[zzi]huff_codes[huffi][eob_token[zzi]].nbits
+ +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
+ eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+ }
+ token=_enc->dct_tokens[pli][zzi][ti[zzi]];
+ eb=_enc->extra_bits[pli][zzi][ti[zzi]];
+ ti[zzi]++;
+ skip=oc_dct_token_skip(token,eb);
+ if(skip<0){
+ eob_token[zzi]=token;
+ eob_run[zzi]=-skip;
+ }
+ else{
+ /*A regular DCT value token; accumulate the bits for it.*/
+ frag_bits+=_enc->huff_codes[huffi][token].nbits
+ +OC_DCT_TOKEN_EXTRA_BITS[token];
+ zzi+=skip;
+ }
+ }
+ mb_mode=frags[fragi].mb_mode;
+ qi=_enc->state.qis[frags[fragi].qii];
+ satd=frag_satd[fragi]<<(pli+1&2);
+ bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1);
+ oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin,
+ fragw,satd,frag_bits<state.nqis;qii++){
+ oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]);
+ }
+}
+
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){
+ FILE *fmetrics;
+ int qi;
+ /*Generate sample points for complete list of QI values.*/
+ for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+ fmetrics=fopen("modedec.stats","wb");
+ if(fmetrics!=NULL){
+ fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+ fclose(fmetrics);
+ }
+ fprintf(stdout,
+ "/*File generated by libtheora with OC_COLLECT_METRICS"
+ " defined at compile time.*/\n"
+ "#if !defined(_modedec_H)\n"
+ "# define _modedec_H (1)\n"
+ "\n"
+ "\n"
+ "\n"
+ "# if defined(OC_COLLECT_METRICS)\n"
+ "typedef struct oc_mode_metrics oc_mode_metrics;\n"
+ "# endif\n"
+ "typedef struct oc_mode_rd oc_mode_rd;\n"
+ "\n"
+ "\n"
+ "\n"
+ "/*The number of extra bits of precision at which to store rate"
+ " metrics.*/\n"
+ "# define OC_BIT_SCALE (%i)\n"
+ "/*The number of extra bits of precision at which to store RMSE metrics.\n"
+ " This must be at least half OC_BIT_SCALE (rounded up).*/\n"
+ "# define OC_RMSE_SCALE (%i)\n"
+ "/*The number of bins to partition statistics into.*/\n"
+ "# define OC_SAD_BINS (%i)\n"
+ "/*The number of bits of precision to drop"
+ " from SAD scores to assign them to a\n"
+ " bin.*/\n"
+ "# define OC_SAD_SHIFT (%i)\n"
+ "\n"
+ "\n"
+ "\n"
+ "# if defined(OC_COLLECT_METRICS)\n"
+ "struct oc_mode_metrics{\n"
+ " double fragw;\n"
+ " double satd;\n"
+ " double rate;\n"
+ " double rmse;\n"
+ " double satd2;\n"
+ " double satdrate;\n"
+ " double rate2;\n"
+ " double satdrmse;\n"
+ " double rmse2;\n"
+ "};\n"
+ "\n"
+ "\n"
+ "int oc_has_mode_metrics;\n"
+ "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n"
+ "# endif\n"
+ "\n"
+ "\n"
+ "\n"
+ "struct oc_mode_rd{\n"
+ " ogg_int16_t rate;\n"
+ " ogg_int16_t rmse;\n"
+ "};\n"
+ "\n"
+ "\n"
+ "# if !defined(OC_COLLECT_METRICS)\n"
+ "static const\n"
+ "# endif\n"
+ "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n",
+ OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT);
+ for(qi=0;qi<64;qi++){
+ int pli;
+ fprintf(stdout," {\n");
+ for(pli=0;pli<3;pli++){
+ int qti;
+ fprintf(stdout," {\n");
+ for(qti=0;qti<2;qti++){
+ int bin;
+ static const char *pl_names[3]={"Y'","Cb","Cr"};
+ static const char *qti_names[2]={"INTRA","INTER"};
+ fprintf(stdout," /*%s qi=%i %s*/\n",
+ pl_names[pli],qi,qti_names[qti]);
+ fprintf(stdout," {\n");
+ fprintf(stdout," ");
+ for(bin=0;bininternal_decode!=NULL){
- (*((oc_state_dispatch_vtbl *)_th->internal_decode)->clear)(_th);
+ (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
}
if(_th->internal_encode!=NULL){
- (*((oc_state_dispatch_vtbl *)_th->internal_encode)->clear)(_th);
+ (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
}
if(_th->i!=NULL)theora_info_clear(_th->i);
memset(_th,0,sizeof(*_th));
@@ -59,11 +59,11 @@ void theora_clear(theora_state *_th){
int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->control)(_th,
+ return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
_req,_buf,_buf_sz);
}
else if(_th->internal_encode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->control)(_th,
+ return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
_req,_buf,_buf_sz);
}
else return TH_EINVAL;
@@ -72,11 +72,11 @@ int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_frame)(
+ return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
_th,_gp);
}
else if(_th->internal_encode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_frame)(
+ return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
_th,_gp);
}
else return -1;
@@ -85,11 +85,11 @@ ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_time)(
+ return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
_th,_gp);
}
else if(_th->internal_encode!=NULL){
- return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_time)(
+ return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
_th,_gp);
}
else return -1;
diff --git a/Engine/lib/libtheora/lib/dec/apiwrapper.h b/Engine/lib/libtheora/lib/apiwrapper.h
similarity index 92%
rename from Engine/lib/libtheora/lib/dec/apiwrapper.h
rename to Engine/lib/libtheora/lib/apiwrapper.h
index 211021fc0..93454d7bd 100644
--- a/Engine/lib/libtheora/lib/dec/apiwrapper.h
+++ b/Engine/lib/libtheora/lib/apiwrapper.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
@@ -20,9 +20,8 @@
# include
# include
# include "theora/theoradec.h"
-/*# include "theora/theoraenc.h"*/
-typedef struct th_enc_ctx th_enc_ctx;
-# include "../internal.h"
+# include "theora/theoraenc.h"
+# include "internal.h"
typedef struct th_api_wrapper th_api_wrapper;
typedef struct th_api_info th_api_info;
diff --git a/Engine/lib/libtheora/lib/bitpack.c b/Engine/lib/libtheora/lib/bitpack.c
new file mode 100644
index 000000000..8195003ba
--- /dev/null
+++ b/Engine/lib/libtheora/lib/bitpack.c
@@ -0,0 +1,111 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: packing variable sized words into an octet stream
+ last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include
+#include
+#include "bitpack.h"
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+ then we'll read the MSb first.*/
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
+ memset(_b,0,sizeof(*_b));
+ _b->ptr=_buf;
+ _b->stop=_buf+_bytes;
+}
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+ const unsigned char *ptr;
+ const unsigned char *stop;
+ oc_pb_window window;
+ int available;
+ window=_b->window;
+ available=_b->bits;
+ ptr=_b->ptr;
+ stop=_b->stop;
+ while(available<=OC_PB_WINDOW_SIZE-8&&ptrptr=ptr;
+ if(_bits>available){
+ if(ptr>=stop){
+ _b->eof=1;
+ available=OC_LOTS_OF_BITS;
+ }
+ else window|=*ptr>>(available&7);
+ }
+ _b->bits=available;
+ return window;
+}
+
+int oc_pack_look1(oc_pack_buf *_b){
+ oc_pb_window window;
+ int available;
+ window=_b->window;
+ available=_b->bits;
+ if(available<1)_b->window=window=oc_pack_refill(_b,1);
+ return window>>OC_PB_WINDOW_SIZE-1;
+}
+
+void oc_pack_adv1(oc_pack_buf *_b){
+ _b->window<<=1;
+ _b->bits--;
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits){
+ oc_pb_window window;
+ int available;
+ long result;
+ window=_b->window;
+ available=_b->bits;
+ if(_bits==0)return 0;
+ if(available<_bits){
+ window=oc_pack_refill(_b,_bits);
+ available=_b->bits;
+ }
+ result=window>>OC_PB_WINDOW_SIZE-_bits;
+ available-=_bits;
+ window<<=1;
+ window<<=_bits-1;
+ _b->bits=available;
+ _b->window=window;
+ return result;
+}
+
+int oc_pack_read1(oc_pack_buf *_b){
+ oc_pb_window window;
+ int available;
+ int result;
+ window=_b->window;
+ available=_b->bits;
+ if(available<1){
+ window=oc_pack_refill(_b,1);
+ available=_b->bits;
+ }
+ result=window>>OC_PB_WINDOW_SIZE-1;
+ available--;
+ window<<=1;
+ _b->bits=available;
+ _b->window=window;
+ return result;
+}
+
+long oc_pack_bytes_left(oc_pack_buf *_b){
+ if(_b->eof)return -1;
+ return _b->stop-_b->ptr+(_b->bits>>3);
+}
diff --git a/Engine/lib/libtheora/lib/dec/bitpack.h b/Engine/lib/libtheora/lib/bitpack.h
similarity index 51%
rename from Engine/lib/libtheora/lib/dec/bitpack.h
rename to Engine/lib/libtheora/lib/bitpack.h
index 1bff3fa50..a020a292f 100644
--- a/Engine/lib/libtheora/lib/dec/bitpack.h
+++ b/Engine/lib/libtheora/lib/bitpack.h
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
@@ -16,23 +16,44 @@
********************************************************************/
#if !defined(_bitpack_H)
# define _bitpack_H (1)
-# include
+# include
-void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes);
-int theorapackB_look1(oggpack_buffer *_b,long *_ret);
-void theorapackB_adv1(oggpack_buffer *_b);
+
+
+typedef unsigned long oc_pb_window;
+typedef struct oc_pack_buf oc_pack_buf;
+
+
+
+# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+ loaded as an immediate (on platforms like ARM, for example).
+ Even relatively modest values like 100 would work fine.*/
+# define OC_LOTS_OF_BITS (0x40000000)
+
+
+
+struct oc_pack_buf{
+ oc_pb_window window;
+ const unsigned char *ptr;
+ const unsigned char *stop;
+ int bits;
+ int eof;
+};
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
+int oc_pack_look1(oc_pack_buf *_b);
+void oc_pack_adv1(oc_pack_buf *_b);
/*Here we assume 0<=_bits&&_bits<=32.*/
-int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret);
-int theorapackB_read1(oggpack_buffer *_b,long *_ret);
-long theorapackB_bytes(oggpack_buffer *_b);
-long theorapackB_bits(oggpack_buffer *_b);
-unsigned char *theorapackB_get_buffer(oggpack_buffer *_b);
+long oc_pack_read(oc_pack_buf *_b,int _bits);
+int oc_pack_read1(oc_pack_buf *_b);
+/* returns -1 for read beyond EOF, or the number of whole bytes available */
+long oc_pack_bytes_left(oc_pack_buf *_b);
/*These two functions are implemented locally in huffdec.c*/
/*Read in bits without advancing the bitptr.
Here we assume 0<=_bits&&_bits<=32.*/
-/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/
-/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/
-
+/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
+/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
#endif
diff --git a/Engine/lib/libtheora/lib/cpu.c b/Engine/lib/libtheora/lib/cpu.c
index 8da50d070..a863aad7f 100644
--- a/Engine/lib/libtheora/lib/cpu.c
+++ b/Engine/lib/libtheora/lib/cpu.c
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
@@ -14,13 +14,13 @@
Originally written by Rudolf Marek.
function:
- last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $
+ last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include "cpu.h"
-#if !defined(USE_ASM)
+#if !defined(OC_X86_ASM)
static ogg_uint32_t oc_cpu_flags_get(void){
return 0;
}
@@ -166,7 +166,7 @@ static ogg_uint32_t oc_cpu_flags_get(void){
/* D M A c i t n e h t u A*/
else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
/* C S N y b e d o e G*/
- ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){
+ ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
/*AMD, Geode:*/
cpuid(0x80000000,eax,ebx,ecx,edx);
if(eax<0x80000001)flags=0;
@@ -192,7 +192,6 @@ static ogg_uint32_t oc_cpu_flags_get(void){
The C3-2 (Nehemiah) cores appear to, as well.*/
cpuid(1,eax,ebx,ecx,edx);
flags=oc_parse_intel_flags(edx,ecx);
- cpuid(0x80000000,eax,ebx,ecx,edx);
if(eax>=0x80000001){
/*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
We need to check this even if the Intel test succeeds to pick up 3DNow!
diff --git a/Engine/lib/libtheora/lib/cpu.h b/Engine/lib/libtheora/lib/cpu.h
index efe5e9f52..a43c957a3 100644
--- a/Engine/lib/libtheora/lib/cpu.h
+++ b/Engine/lib/libtheora/lib/cpu.h
@@ -5,12 +5,12 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
- last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $
+ last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
diff --git a/Engine/lib/libtheora/lib/dec/dct.h b/Engine/lib/libtheora/lib/dct.h
similarity index 90%
rename from Engine/lib/libtheora/lib/dec/dct.h
rename to Engine/lib/libtheora/lib/dct.h
index 09043dc51..24ba6f111 100644
--- a/Engine/lib/libtheora/lib/dec/dct.h
+++ b/Engine/lib/libtheora/lib/dct.h
@@ -5,13 +5,13 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
- last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $
+ last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
diff --git a/Engine/lib/libtheora/lib/dec/bitpack.c b/Engine/lib/libtheora/lib/dec/bitpack.c
deleted file mode 100644
index 3836150c2..000000000
--- a/Engine/lib/libtheora/lib/dec/bitpack.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function: packing variable sized words into an octet stream
- last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*We're 'MSb' endian; if we write a word but read individual bits,
- then we'll read the MSb first.*/
-
-#include
-#include
-#include "bitpack.h"
-
-void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){
- memset(_b,0,sizeof(*_b));
- _b->buffer=_b->ptr=_buf;
- _b->storage=_bytes;
-}
-
-int theorapackB_look1(oggpack_buffer *_b,long *_ret){
- if(_b->endbyte>=_b->storage){
- *_ret=0L;
- return -1;
- }
- *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
- return 0;
-}
-
-void theorapackB_adv1(oggpack_buffer *_b){
- if(++(_b->endbit)>7){
- _b->endbit=0;
- _b->ptr++;
- _b->endbyte++;
- }
-}
-
-/*Here we assume that 0<=_bits&&_bits<=32.*/
-int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){
- long ret;
- long m;
- long d;
- int fail;
- m=32-_bits;
- _bits+=_b->endbit;
- d=_b->storage-_b->endbyte;
- if(d<=4){
- /*Not the main path.*/
- if(d*8<_bits){
- *_ret=0L;
- fail=-1;
- goto overflow;
- }
- /*Special case to avoid reading _b->ptr[0], which might be past the end of
- the buffer; also skips some useless accounting.*/
- else if(!_bits){
- *_ret=0L;
- return 0;
- }
- }
- ret=_b->ptr[0]<<24+_b->endbit;
- if(_bits>8){
- ret|=_b->ptr[1]<<16+_b->endbit;
- if(_bits>16){
- ret|=_b->ptr[2]<<8+_b->endbit;
- if(_bits>24){
- ret|=_b->ptr[3]<<_b->endbit;
- if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
- }
- }
- }
- *_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1);
- fail=0;
-overflow:
- _b->ptr+=_bits>>3;
- _b->endbyte+=_bits>>3;
- _b->endbit=_bits&7;
- return fail;
-}
-
-int theorapackB_read1(oggpack_buffer *_b,long *_ret){
- int fail;
- if(_b->endbyte>=_b->storage){
- /*Not the main path.*/
- *_ret=0L;
- fail=-1;
- }
- else{
- *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
- fail=0;
- }
- _b->endbit++;
- if(_b->endbit>7){
- _b->endbit=0;
- _b->ptr++;
- _b->endbyte++;
- }
- return fail;
-}
-
-long theorapackB_bytes(oggpack_buffer *_b){
- return _b->endbyte+(_b->endbit+7>>3);
-}
-
-long theorapackB_bits(oggpack_buffer *_b){
- return _b->endbyte*8+_b->endbit;
-}
-
-unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){
- return _b->buffer;
-}
diff --git a/Engine/lib/libtheora/lib/dec/decode.c b/Engine/lib/libtheora/lib/dec/decode.c
deleted file mode 100644
index 5804cf709..000000000
--- a/Engine/lib/libtheora/lib/dec/decode.c
+++ /dev/null
@@ -1,2057 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $
-
- ********************************************************************/
-
-#include
-#include
-#include
-#include "decint.h"
-#if defined(OC_DUMP_IMAGES)
-# include
-# include "png.h"
-#endif
-
-/*No post-processing.*/
-#define OC_PP_LEVEL_DISABLED (0)
-/*Keep track of DC qi for each block only.*/
-#define OC_PP_LEVEL_TRACKDCQI (1)
-/*Deblock the luma plane.*/
-#define OC_PP_LEVEL_DEBLOCKY (2)
-/*Dering the luma plane.*/
-#define OC_PP_LEVEL_DERINGY (3)
-/*Stronger luma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGY (4)
-/*Deblock the chroma planes.*/
-#define OC_PP_LEVEL_DEBLOCKC (5)
-/*Dering the chroma planes.*/
-#define OC_PP_LEVEL_DERINGC (6)
-/*Stronger chroma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGC (7)
-/*Maximum valid post-processing level.*/
-#define OC_PP_LEVEL_MAX (7)
-
-
-
-/*The mode alphabets for the various mode coding schemes.
- Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const int OC_MODE_ALPHABETS[7][OC_NMODES]={
- /*Last MV dominates */
- {
- OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
- OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- },
- {
- OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
- OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- },
- {
- OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
- OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- },
- {
- OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
- OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
- OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
- },
- /*No MV dominates.*/
- {
- OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
- OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- },
- {
- OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
- OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- },
- /*Default ordering.*/
- {
- OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
- OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
- OC_MODE_INTER_MV_FOUR
- }
-};
-
-
-static int oc_sb_run_unpack(oggpack_buffer *_opb){
- long bits;
- int ret;
- /*Coding scheme:
- Codeword Run Length
- 0 1
- 10x 2-3
- 110x 4-5
- 1110xx 6-9
- 11110xxx 10-17
- 111110xxxx 18-33
- 111111xxxxxxxxxxxx 34-4129*/
- theorapackB_read1(_opb,&bits);
- if(bits==0)return 1;
- theorapackB_read(_opb,2,&bits);
- if((bits&2)==0)return 2+(int)bits;
- else if((bits&1)==0){
- theorapackB_read1(_opb,&bits);
- return 4+(int)bits;
- }
- theorapackB_read(_opb,3,&bits);
- if((bits&4)==0)return 6+(int)bits;
- else if((bits&2)==0){
- ret=10+((bits&1)<<2);
- theorapackB_read(_opb,2,&bits);
- return ret+(int)bits;
- }
- else if((bits&1)==0){
- theorapackB_read(_opb,4,&bits);
- return 18+(int)bits;
- }
- theorapackB_read(_opb,12,&bits);
- return 34+(int)bits;
-}
-
-static int oc_block_run_unpack(oggpack_buffer *_opb){
- long bits;
- long bits2;
- /*Coding scheme:
- Codeword Run Length
- 0x 1-2
- 10x 3-4
- 110x 5-6
- 1110xx 7-10
- 11110xx 11-14
- 11111xxxx 15-30*/
- theorapackB_read(_opb,2,&bits);
- if((bits&2)==0)return 1+(int)bits;
- else if((bits&1)==0){
- theorapackB_read1(_opb,&bits);
- return 3+(int)bits;
- }
- theorapackB_read(_opb,2,&bits);
- if((bits&2)==0)return 5+(int)bits;
- else if((bits&1)==0){
- theorapackB_read(_opb,2,&bits);
- return 7+(int)bits;
- }
- theorapackB_read(_opb,3,&bits);
- if((bits&4)==0)return 11+bits;
- theorapackB_read(_opb,2,&bits2);
- return 15+((bits&3)<<2)+bits2;
-}
-
-
-
-static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
- const th_setup_info *_setup){
- int qti;
- int pli;
- int qi;
- int ret;
- ret=oc_state_init(&_dec->state,_info);
- if(ret<0)return ret;
- oc_huff_trees_copy(_dec->huff_tables,
- (const oc_huff_node *const *)_setup->huff_tables);
- for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
- _dec->state.dequant_tables[qti][pli]=
- _dec->state.dequant_table_data[qti][pli];
- }
- oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
- &_setup->qinfo);
- for(qi=0;qi<64;qi++){
- int qsum;
- qsum=0;
- for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
- qsum+=_dec->state.dequant_tables[qti][pli][qi][18]+
- _dec->state.dequant_tables[qti][pli][qi][19]+
- _dec->state.dequant_tables[qti][pli][qi][26]+
- _dec->state.dequant_tables[qti][pli][qi][27]<<(pli==0);
- }
- _dec->pp_sharp_mod[qi]=-(qsum>>11);
- }
- _dec->dct_tokens=(unsigned char **)oc_calloc_2d(64,
- _dec->state.nfrags,sizeof(_dec->dct_tokens[0][0]));
- _dec->extra_bits=(ogg_uint16_t **)oc_calloc_2d(64,
- _dec->state.nfrags,sizeof(_dec->extra_bits[0][0]));
- memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
- sizeof(_dec->state.loop_filter_limits));
- _dec->pp_level=OC_PP_LEVEL_DISABLED;
- _dec->dc_qis=NULL;
- _dec->variances=NULL;
- _dec->pp_frame_data=NULL;
- _dec->stripe_cb.ctx=NULL;
- _dec->stripe_cb.stripe_decoded=NULL;
- return 0;
-}
-
-static void oc_dec_clear(oc_dec_ctx *_dec){
- _ogg_free(_dec->pp_frame_data);
- _ogg_free(_dec->variances);
- _ogg_free(_dec->dc_qis);
- oc_free_2d(_dec->extra_bits);
- oc_free_2d(_dec->dct_tokens);
- oc_huff_trees_clear(_dec->huff_tables);
- oc_state_clear(&_dec->state);
-}
-
-
-static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
- long val;
- /*Check to make sure this is a data packet.*/
- theorapackB_read1(&_dec->opb,&val);
- if(val!=0)return TH_EBADPACKET;
- /*Read in the frame type (I or P).*/
- theorapackB_read1(&_dec->opb,&val);
- _dec->state.frame_type=(int)val;
- /*Read in the current qi.*/
- theorapackB_read(&_dec->opb,6,&val);
- _dec->state.qis[0]=(int)val;
- theorapackB_read1(&_dec->opb,&val);
- if(!val)_dec->state.nqis=1;
- else{
- theorapackB_read(&_dec->opb,6,&val);
- _dec->state.qis[1]=(int)val;
- theorapackB_read1(&_dec->opb,&val);
- if(!val)_dec->state.nqis=2;
- else{
- theorapackB_read(&_dec->opb,6,&val);
- _dec->state.qis[2]=(int)val;
- _dec->state.nqis=3;
- }
- }
- if(_dec->state.frame_type==OC_INTRA_FRAME){
- /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
- Most of the other unused bits in the VP3 headers were eliminated.
- I don't know why these remain.*/
- /* I wanted to eliminate wasted bits, but not all config wiggle room --Monty */
- theorapackB_read(&_dec->opb,3,&val);
- if(val!=0)return TH_EIMPL;
- }
- return 0;
-}
-
-/*Mark all fragments as coded and in OC_MODE_INTRA.
- This also builds up the coded fragment list (in coded order), and clears the
- uncoded fragment list.
- It does not update the coded macro block list, as that is not used when
- decoding INTRA frames.*/
-static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
- oc_sb *sb;
- oc_sb *sb_end;
- int pli;
- int ncoded_fragis;
- int prev_ncoded_fragis;
- prev_ncoded_fragis=ncoded_fragis=0;
- sb=sb_end=_dec->state.sbs;
- for(pli=0;pli<3;pli++){
- const oc_fragment_plane *fplane;
- fplane=_dec->state.fplanes+pli;
- sb_end+=fplane->nsbs;
- for(;sbquad_valid&1<map[quadi][bi];
- if(fragi>=0){
- oc_fragment *frag;
- frag=_dec->state.frags+fragi;
- frag->coded=1;
- frag->mbmode=OC_MODE_INTRA;
- _dec->state.coded_fragis[ncoded_fragis++]=fragi;
- }
- }
- }
- }
- _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
- prev_ncoded_fragis=ncoded_fragis;
- _dec->state.nuncoded_fragis[pli]=0;
- }
-}
-
-/*Decodes the bit flags for whether or not each super block is partially coded
- or not.
- Return: The number of partially coded super blocks.*/
-static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
- oc_sb *sb;
- oc_sb *sb_end;
- long val;
- int flag;
- int npartial;
- int run_count;
- theorapackB_read1(&_dec->opb,&val);
- flag=(int)val;
- sb=_dec->state.sbs;
- sb_end=sb+_dec->state.nsbs;
- run_count=npartial=0;
- while(sbopb);
- full_run=run_count>=4129;
- do{
- sb->coded_partially=flag;
- sb->coded_fully=0;
- npartial+=flag;
- sb++;
- }
- while(--run_count>0&&sbopb,&val);
- flag=(int)val;
- }
- else flag=!flag;
- }
- /*TODO: run_count should be 0 here.
- If it's not, we should issue a warning of some kind.*/
- return npartial;
-}
-
-/*Decodes the bit flags for whether or not each non-partially-coded super
- block is fully coded or not.
- This function should only be called if there is at least one
- non-partially-coded super block.
- Return: The number of partially coded super blocks.*/
-static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
- oc_sb *sb;
- oc_sb *sb_end;
- long val;
- int flag;
- int run_count;
- sb=_dec->state.sbs;
- sb_end=sb+_dec->state.nsbs;
- /*Skip partially coded super blocks.*/
- for(;sb->coded_partially;sb++);
- theorapackB_read1(&_dec->opb,&val);
- flag=(int)val;
- while(sbopb);
- full_run=run_count>=4129;
- for(;sbcoded_partially)continue;
- if(run_count--<=0)break;
- sb->coded_fully=flag;
- }
- if(full_run&&sbopb,&val);
- flag=(int)val;
- }
- else flag=!flag;
- }
- /*TODO: run_count should be 0 here.
- If it's not, we should issue a warning of some kind.*/
-}
-
-static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
- oc_sb *sb;
- oc_sb *sb_end;
- long val;
- int npartial;
- int pli;
- int flag;
- int run_count;
- int ncoded_fragis;
- int prev_ncoded_fragis;
- int nuncoded_fragis;
- int prev_nuncoded_fragis;
- npartial=oc_dec_partial_sb_flags_unpack(_dec);
- if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
- if(npartial>0){
- theorapackB_read1(&_dec->opb,&val);
- flag=!(int)val;
- }
- else flag=0;
- run_count=0;
- prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
- sb=sb_end=_dec->state.sbs;
- for(pli=0;pli<3;pli++){
- const oc_fragment_plane *fplane;
- fplane=_dec->state.fplanes+pli;
- sb_end+=fplane->nsbs;
- for(;sbquad_valid&1<map[quadi][bi];
- if(fragi>=0){
- oc_fragment *frag;
- frag=_dec->state.frags+fragi;
- if(sb->coded_fully)frag->coded=1;
- else if(!sb->coded_partially)frag->coded=0;
- else{
- if(run_count<=0){
- run_count=oc_block_run_unpack(&_dec->opb);
- flag=!flag;
- }
- run_count--;
- frag->coded=flag;
- }
- if(frag->coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi;
- else *(_dec->state.uncoded_fragis-++nuncoded_fragis)=fragi;
- }
- }
- }
- }
- _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
- prev_ncoded_fragis=ncoded_fragis;
- _dec->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
- prev_nuncoded_fragis=nuncoded_fragis;
- }
- /*TODO: run_count should be 0 here.
- If it's not, we should issue a warning of some kind.*/
-}
-
-
-
-typedef int (*oc_mode_unpack_func)(oggpack_buffer *_opb);
-
-static int oc_vlc_mode_unpack(oggpack_buffer *_opb){
- long val;
- int i;
- for(i=0;i<7;i++){
- theorapackB_read1(_opb,&val);
- if(!val)break;
- }
- return i;
-}
-
-static int oc_clc_mode_unpack(oggpack_buffer *_opb){
- long val;
- theorapackB_read(_opb,3,&val);
- return (int)val;
-}
-
-/*Unpacks the list of macro block modes for INTER frames.*/
-static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
- oc_mode_unpack_func mode_unpack;
- oc_mb *mb;
- oc_mb *mb_end;
- const int *alphabet;
- long val;
- int scheme0_alphabet[8];
- int mode_scheme;
- theorapackB_read(&_dec->opb,3,&val);
- mode_scheme=(int)val;
- if(mode_scheme==0){
- int mi;
- /*Just in case, initialize the modes to something.
- If the bitstream doesn't contain each index exactly once, it's likely
- corrupt and the rest of the packet is garbage anyway, but this way we
- won't crash, and we'll decode SOMETHING.*/
- /*LOOP VECTORIZES.*/
- for(mi=0;miopb,3,&val);
- scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
- }
- alphabet=scheme0_alphabet;
- }
- else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
- if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
- else mode_unpack=oc_vlc_mode_unpack;
- mb=_dec->state.mbs;
- mb_end=mb+_dec->state.nmbs;
- for(;mbmode!=OC_MODE_INVALID){
- int bi;
- for(bi=0;bi<4;bi++){
- int fragi;
- fragi=mb->map[0][bi];
- if(fragi>=0&&_dec->state.frags[fragi].coded)break;
- }
- if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
- else mb->mode=OC_MODE_INTER_NOMV;
- }
- }
-}
-
-
-
-typedef int (*oc_mv_comp_unpack_func)(oggpack_buffer *_opb);
-
-static int oc_vlc_mv_comp_unpack(oggpack_buffer *_opb){
- long bits;
- int mvsigned[2];
- theorapackB_read(_opb,3,&bits);
- switch(bits){
- case 0:return 0;
- case 1:return 1;
- case 2:return -1;
- case 3:
- case 4:{
- mvsigned[0]=(int)(bits-1);
- theorapackB_read1(_opb,&bits);
- }break;
- /*case 5:
- case 6:
- case 7:*/
- default:{
- mvsigned[0]=1<>1);
- bits&=1;
- }break;
- }
- mvsigned[1]=-mvsigned[0];
- return mvsigned[bits];
-}
-
-static int oc_clc_mv_comp_unpack(oggpack_buffer *_opb){
- long bits;
- int mvsigned[2];
- theorapackB_read(_opb,6,&bits);
- mvsigned[0]=bits>>1;
- mvsigned[1]=-mvsigned[0];
- return mvsigned[bits&1];
-}
-
-/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
- block modes and motion vectors to the individual fragments.*/
-static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
- oc_set_chroma_mvs_func set_chroma_mvs;
- oc_mv_comp_unpack_func mv_comp_unpack;
- oc_mb *mb;
- oc_mb *mb_end;
- const int *map_idxs;
- long val;
- int map_nidxs;
- oc_mv last_mv[2];
- oc_mv cbmvs[4];
- set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
- theorapackB_read1(&_dec->opb,&val);
- mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
- map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
- map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
- memset(last_mv,0,sizeof(last_mv));
- mb=_dec->state.mbs;
- mb_end=mb+_dec->state.nmbs;
- for(;mbmode!=OC_MODE_INVALID){
- oc_fragment *frag;
- oc_mv mbmv;
- int coded[13];
- int codedi;
- int ncoded;
- int mapi;
- int mapii;
- int fragi;
- int mb_mode;
- /*Search for at least one coded fragment.*/
- ncoded=mapii=0;
- do{
- mapi=map_idxs[mapii];
- fragi=mb->map[mapi>>2][mapi&3];
- if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi;
- }
- while(++mapiimode;
- switch(mb_mode){
- case OC_MODE_INTER_MV_FOUR:{
- oc_mv lbmvs[4];
- int bi;
- /*Mark the tail of the list, so we don't accidentally go past it.*/
- coded[ncoded]=-1;
- for(bi=codedi=0;bi<4;bi++){
- if(coded[codedi]==bi){
- codedi++;
- frag=_dec->state.frags+mb->map[0][bi];
- frag->mbmode=mb_mode;
- frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- }
- else lbmvs[bi][0]=lbmvs[bi][1]=0;
- }
- if(codedi>0){
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- last_mv[0][0]=lbmvs[coded[codedi-1]][0];
- last_mv[0][1]=lbmvs[coded[codedi-1]][1];
- }
- if(codedistate.frags+mb->map[mapi>>2][bi];
- frag->mbmode=mb_mode;
- frag->mv[0]=cbmvs[bi][0];
- frag->mv[1]=cbmvs[bi][1];
- }
- }
- }break;
- case OC_MODE_INTER_MV:{
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- }break;
- case OC_MODE_INTER_MV_LAST:{
- mbmv[0]=last_mv[0][0];
- mbmv[1]=last_mv[0][1];
- }break;
- case OC_MODE_INTER_MV_LAST2:{
- mbmv[0]=last_mv[1][0];
- mbmv[1]=last_mv[1][1];
- last_mv[1][0]=last_mv[0][0];
- last_mv[1][1]=last_mv[0][1];
- last_mv[0][0]=mbmv[0];
- last_mv[0][1]=mbmv[1];
- }break;
- case OC_MODE_GOLDEN_MV:{
- mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
- }break;
- default:mbmv[0]=mbmv[1]=0;break;
- }
- /*4MV mode fills in the fragments itself.
- For all other modes we can use this common code.*/
- if(mb_mode!=OC_MODE_INTER_MV_FOUR){
- for(codedi=0;codedimap[mapi>>2][mapi&3];
- frag=_dec->state.frags+fragi;
- frag->mbmode=mb_mode;
- frag->mv[0]=mbmv[0];
- frag->mv[1]=mbmv[1];
- }
- }
- }
-}
-
-static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
- oc_fragment *frag;
- int *coded_fragi;
- int *coded_fragi_end;
- int ncoded_fragis;
- ncoded_fragis=_dec->state.ncoded_fragis[0]+
- _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
- if(ncoded_fragis<=0)return;
- coded_fragi=_dec->state.coded_fragis;
- coded_fragi_end=coded_fragi+ncoded_fragis;
- if(_dec->state.nqis==1){
- /*If this frame has only a single qi value, then just set it in all coded
- fragments.*/
- while(coded_fragistate.frags[*coded_fragi++].qi=_dec->state.qis[0];
- }
- }
- else{
- long val;
- int flag;
- int nqi1;
- int run_count;
- /*Otherwise, we decode a qi index for each fragment, using two passes of
- the same binary RLE scheme used for super-block coded bits.
- The first pass marks each fragment as having a qii of 0 or greater than
- 0, and the second pass (if necessary), distinguishes between a qii of
- 1 and 2.
- At first we just store the qii in the fragment.
- After all the qii's are decoded, we make a final pass to replace them
- with the corresponding qi's for this frame.*/
- theorapackB_read1(&_dec->opb,&val);
- flag=(int)val;
- run_count=nqi1=0;
- while(coded_fragiopb);
- full_run=run_count>=4129;
- do{
- _dec->state.frags[*coded_fragi++].qi=flag;
- nqi1+=flag;
- }
- while(--run_count>0&&coded_fragiopb,&val);
- flag=(int)val;
- }
- else flag=!flag;
- }
- /*TODO: run_count should be 0 here.
- If it's not, we should issue a warning of some kind.*/
- /*If we have 3 different qi's for this frame, and there was at least one
- fragment with a non-zero qi, make the second pass.*/
- if(_dec->state.nqis==3&&nqi1>0){
- /*Skip qii==0 fragments.*/
- for(coded_fragi=_dec->state.coded_fragis;
- _dec->state.frags[*coded_fragi].qi==0;coded_fragi++);
- theorapackB_read1(&_dec->opb,&val);
- flag=(int)val;
- while(coded_fragiopb);
- full_run=run_count>=4129;
- for(;coded_fragistate.frags+*coded_fragi;
- if(frag->qi==0)continue;
- if(run_count--<=0)break;
- frag->qi+=flag;
- }
- if(full_run&&coded_fragiopb,&val);
- flag=(int)val;
- }
- else flag=!flag;
- }
- /*TODO: run_count should be 0 here.
- If it's not, we should issue a warning of some kind.*/
- }
- /*Finally, translate qii's to qi's.*/
- for(coded_fragi=_dec->state.coded_fragis;coded_fragistate.frags+*coded_fragi;
- frag->qi=_dec->state.qis[frag->qi];
- }
- }
-}
-
-
-
-/*Returns the decoded value of the given token.
- It CANNOT be called for any of the EOB tokens.
- _token: The token value to skip.
- _extra_bits: The extra bits attached to this token.
- Return: The decoded coefficient value.*/
-typedef int (*oc_token_dec1val_func)(int _token,int _extra_bits);
-
-/*Handles zero run tokens.*/
-static int oc_token_dec1val_zrl(void){
- return 0;
-}
-
-/*Handles 1, -1, 2 and -2 tokens.*/
-static int oc_token_dec1val_const(int _token){
- static const int CONST_VALS[4]={1,-1,2,-2};
- return CONST_VALS[_token-OC_NDCT_ZRL_TOKEN_MAX];
-}
-
-/*Handles DCT value tokens category 2.*/
-static int oc_token_dec1val_cat2(int _token,int _extra_bits){
- int valsigned[2];
- valsigned[0]=_token-OC_DCT_VAL_CAT2+3;
- valsigned[1]=-valsigned[0];
- return valsigned[_extra_bits];
-}
-
-/*Handles DCT value tokens categories 3 through 8.*/
-static int oc_token_dec1val_cati(int _token,int _extra_bits){
- static const int VAL_CAT_OFFS[6]={
- OC_NDCT_VAL_CAT2_SIZE+3,
- OC_NDCT_VAL_CAT2_SIZE+5,
- OC_NDCT_VAL_CAT2_SIZE+9,
- OC_NDCT_VAL_CAT2_SIZE+17,
- OC_NDCT_VAL_CAT2_SIZE+33,
- OC_NDCT_VAL_CAT2_SIZE+65
- };
- static const int VAL_CAT_MASKS[6]={
- 0x001,0x003,0x007,0x00F,0x01F,0x1FF
- };
- static const int VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
- int valsigned[2];
- int cati;
- cati=_token-OC_NDCT_VAL_CAT2_MAX;
- valsigned[0]=VAL_CAT_OFFS[cati]+(_extra_bits&VAL_CAT_MASKS[cati]);
- valsigned[1]=-valsigned[0];
- return valsigned[_extra_bits>>VAL_CAT_SHIFTS[cati]&1];
-}
-
-/*A jump table for compute the first coefficient value the given token value
- represents.*/
-static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_const,
- (oc_token_dec1val_func)oc_token_dec1val_const,
- (oc_token_dec1val_func)oc_token_dec1val_const,
- (oc_token_dec1val_func)oc_token_dec1val_const,
- oc_token_dec1val_cat2,
- oc_token_dec1val_cat2,
- oc_token_dec1val_cat2,
- oc_token_dec1val_cat2,
- oc_token_dec1val_cati,
- oc_token_dec1val_cati,
- oc_token_dec1val_cati,
- oc_token_dec1val_cati,
- oc_token_dec1val_cati,
- oc_token_dec1val_cati,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl,
- (oc_token_dec1val_func)oc_token_dec1val_zrl
-};
-
-/*Returns the decoded value of the given token.
- It CANNOT be called for any of the EOB tokens.
- _token: The token value to skip.
- _extra_bits: The extra bits attached to this token.
- Return: The decoded coefficient value.*/
-static int oc_dct_token_dec1val(int _token,int _extra_bits){
- return (*OC_TOKEN_DEC1VAL_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
- _extra_bits);
-}
-
-/*Unpacks the DC coefficient tokens.
- Unlike when unpacking the AC coefficient tokens, we actually need to decode
- the DC coefficient values now so that we can do DC prediction.
- _huff_idx: The index of the Huffman table to use for each color plane.
- _ntoks_left: The number of tokens left to be decoded in each color plane for
- each coefficient.
- This is updated as EOB tokens and zero run tokens are decoded.
- Return: The length of any outstanding EOB run.*/
-static int oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3],
- int _ntoks_left[3][64]){
- long val;
- int *coded_fragi;
- int *coded_fragi_end;
- int run_counts[64];
- int cfi;
- int eobi;
- int eobs;
- int ti;
- int ebi;
- int pli;
- int rli;
- eobs=0;
- ti=ebi=0;
- coded_fragi_end=coded_fragi=_dec->state.coded_fragis;
- for(pli=0;pli<3;pli++){
- coded_fragi_end+=_dec->state.ncoded_fragis[pli];
- memset(run_counts,0,sizeof(run_counts));
- _dec->eob_runs[pli][0]=eobs;
- /*Continue any previous EOB run, if there was one.*/
- for(eobi=eobs;eobi-->0&&coded_fragistate.frags[*coded_fragi++].dc=0;
- }
- cfi=0;
- while(eobs<_ntoks_left[pli][0]-cfi){
- int token;
- int neb;
- int eb;
- int skip;
- cfi+=eobs;
- run_counts[63]+=eobs;
- token=oc_huff_token_decode(&_dec->opb,
- _dec->huff_tables[_huff_idxs[pli]]);
- _dec->dct_tokens[0][ti++]=(unsigned char)token;
- neb=OC_DCT_TOKEN_EXTRA_BITS[token];
- if(neb){
- theorapackB_read(&_dec->opb,neb,&val);
- eb=(int)val;
- _dec->extra_bits[0][ebi++]=(ogg_uint16_t)eb;
- }
- else eb=0;
- skip=oc_dct_token_skip(token,eb);
- if(skip<0){
- eobs=eobi=-skip;
- while(eobi-->0&&coded_fragistate.frags[*coded_fragi++].dc=0;
- }
- }
- else{
- run_counts[skip-1]++;
- cfi++;
- eobs=0;
- _dec->state.frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb);
- }
- }
- _dec->ti0[pli][0]=ti;
- _dec->ebi0[pli][0]=ebi;
- /*Set the EOB count to the portion of the last EOB run which extends past
- this coefficient.*/
- eobs=eobs+cfi-_ntoks_left[pli][0];
- /*Add the portion of the last EOB which was included in this coefficient to
- to the longest run length.*/
- run_counts[63]+=_ntoks_left[pli][0]-cfi;
- /*And convert the run_counts array to a moment table.*/
- for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
- /*Finally, subtract off the number of coefficients that have been
- accounted for by runs started in this coefficient.*/
- for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
- }
- return eobs;
-}
-
-/*Unpacks the AC coefficient tokens.
- This can completely discard coefficient values while unpacking, and so is
- somewhat simpler than unpacking the DC coefficient tokens.
- _huff_idx: The index of the Huffman table to use for each color plane.
- _ntoks_left: The number of tokens left to be decoded in each color plane for
- each coefficient.
- This is updated as EOB tokens and zero run tokens are decoded.
- _eobs: The length of any outstanding EOB run from previous
- coefficients.
- Return: The length of any outstanding EOB run.*/
-static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[3],
- int _ntoks_left[3][64],int _eobs){
- long val;
- int run_counts[64];
- int cfi;
- int ti;
- int ebi;
- int pli;
- int rli;
- ti=ebi=0;
- for(pli=0;pli<3;pli++){
- memset(run_counts,0,sizeof(run_counts));
- _dec->eob_runs[pli][_zzi]=_eobs;
- cfi=0;
- while(_eobs<_ntoks_left[pli][_zzi]-cfi){
- int token;
- int neb;
- int eb;
- int skip;
- cfi+=_eobs;
- run_counts[63]+=_eobs;
- token=oc_huff_token_decode(&_dec->opb,
- _dec->huff_tables[_huff_idxs[pli]]);
- _dec->dct_tokens[_zzi][ti++]=(unsigned char)token;
- neb=OC_DCT_TOKEN_EXTRA_BITS[token];
- if(neb){
- theorapackB_read(&_dec->opb,neb,&val);
- eb=(int)val;
- _dec->extra_bits[_zzi][ebi++]=(ogg_uint16_t)eb;
- }
- else eb=0;
- skip=oc_dct_token_skip(token,eb);
- if(skip<0)_eobs=-skip;
- else{
- run_counts[skip-1]++;
- cfi++;
- _eobs=0;
- }
- }
- _dec->ti0[pli][_zzi]=ti;
- _dec->ebi0[pli][_zzi]=ebi;
- /*Set the EOB count to the portion of the last EOB run which extends past
- this coefficient.*/
- _eobs=_eobs+cfi-_ntoks_left[pli][_zzi];
- /*Add the portion of the last EOB which was included in this coefficient to
- to the longest run length.*/
- run_counts[63]+=_ntoks_left[pli][_zzi]-cfi;
- /*And convert the run_counts array to a moment table.*/
- for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
- /*Finally, subtract off the number of coefficients that have been
- accounted for by runs started in this coefficient.*/
- for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
- }
- return _eobs;
-}
-
-/*Tokens describing the DCT coefficients that belong to each fragment are
- stored in the bitstream grouped by coefficient, not by fragment.
-
- This means that we either decode all the tokens in order, building up a
- separate coefficient list for each fragment as we go, and then go back and
- do the iDCT on each fragment, or we have to create separate lists of tokens
- for each coefficient, so that we can pull the next token required off the
- head of the appropriate list when decoding a specific fragment.
-
- The former was VP3's choice, and it meant 2*w*h extra storage for all the
- decoded coefficient values.
-
- We take the second option, which lets us store just one or three bytes per
- token (generally far fewer than the number of coefficients, due to EOB
- tokens and zero runs), and which requires us to only maintain a counter for
- each of the 64 coefficients, instead of a counter for every fragment to
- determine where the next token goes.
-
- Actually, we use 3 counters per coefficient, one for each color plane, so we
- can decode all color planes simultaneously.
-
- This lets color conversion, etc., be done as soon as a full MCU (one or
- two super block rows) is decoded, while the image data is still in cache.*/
-
-static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
- static const int OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
- long val;
- int ntoks_left[3][64];
- int huff_idxs[3];
- int pli;
- int zzi;
- int hgi;
- int huffi_y;
- int huffi_c;
- int eobs;
- for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
- ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
- }
- theorapackB_read(&_dec->opb,4,&val);
- huffi_y=(int)val;
- theorapackB_read(&_dec->opb,4,&val);
- huffi_c=(int)val;
- huff_idxs[0]=huffi_y;
- huff_idxs[1]=huff_idxs[2]=huffi_c;
- _dec->eob_runs[0][0]=0;
- eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
- theorapackB_read(&_dec->opb,4,&val);
- huffi_y=(int)val;
- theorapackB_read(&_dec->opb,4,&val);
- huffi_c=(int)val;
- zzi=1;
- for(hgi=1;hgi<5;hgi++){
- huff_idxs[0]=huffi_y+(hgi<<4);
- huff_idxs[1]=huff_idxs[2]=huffi_c+(hgi<<4);
- for(;zzi0);
- *_zzi=zzi;
-}
-
-/*Expands a constant, single-value token.*/
-static void oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
- _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token);
-}
-
-/*Expands category 2 single-valued tokens.*/
-static void oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
- _dct_coeffs[(*_zzi)++]=
- (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
-}
-
-/*Expands category 3 through 8 single-valued tokens.*/
-static void oc_token_expand_cati(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
- _dct_coeffs[(*_zzi)++]=
- (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
-}
-
-/*Expands a category 1a zero run/value combo token.*/
-static void oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
- int zzi;
- int rl;
- zzi=*_zzi;
- /*LOOP VECTORIZES.*/
- for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
- _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
- *_zzi=zzi;
-}
-
-/*Expands all other zero run/value combo tokens.*/
-static void oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
- static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 6,10,1,2
- };
- static const int NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 3,7,0,1
- };
- static const int VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 0,0,0,1
- };
- static const int VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 0,0,1,1
- };
- static const int VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 1,1,2,2
- };
- static const int SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
- 2,3,1,2
- };
- int valsigned[2];
- int zzi;
- int rl;
- _token-=OC_DCT_RUN_CAT1B;
- rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
- zzi=*_zzi;
- /*LOOP VECTORIZES.*/
- while(rl-->0)_dct_coeffs[zzi++]=0;
- valsigned[0]=VALUE_ADJUST[_token]+
- (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]);
- valsigned[1]=-valsigned[0];
- _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[
- _extra_bits>>SIGN_SHIFT[_token]];
- *_zzi=zzi;
-}
-
-/*A jump table for expanding token values into coefficient values.
- This reduces all the conditional branches, etc., needed to parse these token
- values down to one indirect jump.*/
-static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
- oc_token_expand_zrl,
- oc_token_expand_zrl,
- oc_token_expand_const,
- oc_token_expand_const,
- oc_token_expand_const,
- oc_token_expand_const,
- oc_token_expand_cat2,
- oc_token_expand_cat2,
- oc_token_expand_cat2,
- oc_token_expand_cat2,
- oc_token_expand_cati,
- oc_token_expand_cati,
- oc_token_expand_cati,
- oc_token_expand_cati,
- oc_token_expand_cati,
- oc_token_expand_cati,
- oc_token_expand_run_cat1a,
- oc_token_expand_run_cat1a,
- oc_token_expand_run_cat1a,
- oc_token_expand_run_cat1a,
- oc_token_expand_run_cat1a,
- oc_token_expand_run,
- oc_token_expand_run,
- oc_token_expand_run,
- oc_token_expand_run
-};
-
-/*Expands a single token into the given coefficient list.
- This fills in the zeros for zero runs as well as coefficient values, and
- updates the index of the current coefficient.
- It CANNOT be called for any of the EOB tokens.
- _token: The token value to expand.
- _extra_bits: The extra bits associated with the token.
- _dct_coeffs: The current list of coefficients, in zig-zag order.
- _zzi: A pointer to the zig-zag index of the next coefficient to write
- to.
- This is updated before the function returns.*/
-static void oc_dct_token_expand(int _token,int _extra_bits,
- ogg_int16_t *_dct_coeffs,int *_zzi){
- (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
- _extra_bits,_dct_coeffs,_zzi);
-}
-
-
-
-static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
- /*pp_level 0: disabled; free any memory used and return*/
- if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
- if(_dec->dc_qis!=NULL){
- _ogg_free(_dec->dc_qis);
- _dec->dc_qis=NULL;
- _ogg_free(_dec->variances);
- _dec->variances=NULL;
- _ogg_free(_dec->pp_frame_data);
- _dec->pp_frame_data=NULL;
- }
- return 1;
- }
- if(_dec->dc_qis==NULL){
- /*If we haven't been tracking DC quantization indices, there's no point in
- starting now.*/
- if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
- _dec->dc_qis=(unsigned char *)_ogg_malloc(
- _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
- memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
- }
- else{
- int *coded_fragi;
- int *coded_fragi_end;
- unsigned char qi0;
- /*Update the DC quantization index of each coded block.*/
- qi0=(unsigned char)_dec->state.qis[0];
- coded_fragi_end=_dec->state.coded_fragis+_dec->state.ncoded_fragis[0]+
- _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
- for(coded_fragi=_dec->state.coded_fragis;coded_fragidc_qis[*coded_fragi]=qi0;
- }
- }
- /*pp_level 1: Stop after updating DC quantization indices.*/
- if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
- if(_dec->variances!=NULL){
- _ogg_free(_dec->variances);
- _dec->variances=NULL;
- _ogg_free(_dec->pp_frame_data);
- _dec->pp_frame_data=NULL;
- }
- return 1;
- }
- if(_dec->variances==NULL||
- _dec->pp_frame_has_chroma!=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
- size_t frame_sz;
- frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height;
- if(_dec->pp_levelvariances=(int *)_ogg_realloc(_dec->variances,
- _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
- _dec->pp_frame_data=(unsigned char *)_ogg_realloc(
- _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
- _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
- _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
- _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
- _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
- (1-_dec->pp_frame_buf[0].height)*_dec->pp_frame_buf[0].stride;
- }
- else{
- size_t y_sz;
- size_t c_sz;
- int c_w;
- int c_h;
- _dec->variances=(int *)_ogg_realloc(_dec->variances,
- _dec->state.nfrags*sizeof(_dec->variances[0]));
- y_sz=frame_sz;
- c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
- c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
- c_sz=c_w*c_h;
- frame_sz+=c_sz<<1;
- _dec->pp_frame_data=(unsigned char *)_ogg_realloc(
- _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
- _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
- _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
- _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
- _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
- _dec->pp_frame_buf[1].width=c_w;
- _dec->pp_frame_buf[1].height=c_h;
- _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
- _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
- _dec->pp_frame_buf[2].width=c_w;
- _dec->pp_frame_buf[2].height=c_h;
- _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
- _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
- oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
- }
- _dec->pp_frame_has_chroma=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
- }
- /*If we're not processing chroma, copy the reference frame's chroma planes.*/
- if(_dec->pp_levelpp_frame_buf+1,
- _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
- sizeof(_dec->pp_frame_buf[1])*2);
- }
- return 0;
-}
-
-
-
-typedef struct{
- int ti[3][64];
- int ebi[3][64];
- int eob_runs[3][64];
- int bounding_values[256];
- int *coded_fragis[3];
- int *uncoded_fragis[3];
- int fragy0[3];
- int fragy_end[3];
- int ncoded_fragis[3];
- int nuncoded_fragis[3];
- int pred_last[3][3];
- int mcu_nvfrags;
- int loop_filter;
- int pp_level;
-}oc_dec_pipeline_state;
-
-
-
-/*Initialize the main decoding pipeline.*/
-static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe){
- int *coded_fragi_end;
- int *uncoded_fragi_end;
- int pli;
- /*If chroma is sub-sampled in the vertical direction, we have to decode two
- super block rows of Y' for each super block row of Cb and Cr.*/
- _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2);
- /*Initialize the token and extra bits indices for each plane and
- coefficient.*/
- memset(_pipe->ti[0],0,sizeof(_pipe->ti[0]));
- memset(_pipe->ebi[0],0,sizeof(_pipe->ebi[0]));
- for(pli=1;pli<3;pli++){
- memcpy(_pipe->ti[pli],_dec->ti0[pli-1],sizeof(_pipe->ti[0]));
- memcpy(_pipe->ebi[pli],_dec->ebi0[pli-1],sizeof(_pipe->ebi[0]));
- }
- /*Also copy over the initial the EOB run counts.*/
- memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
- /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
- coded_fragi_end=_dec->state.coded_fragis;
- uncoded_fragi_end=_dec->state.uncoded_fragis;
- for(pli=0;pli<3;pli++){
- _pipe->coded_fragis[pli]=coded_fragi_end;
- _pipe->uncoded_fragis[pli]=uncoded_fragi_end;
- coded_fragi_end+=_dec->state.ncoded_fragis[pli];
- uncoded_fragi_end-=_dec->state.nuncoded_fragis[pli];
- }
- /*Set the previous DC predictor to 0 for all color planes and frame types.*/
- memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
- /*Initialize the bounding value array for the loop filter.*/
- _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
- _pipe->bounding_values);
- /*Initialize any buffers needed for post-processing.
- We also save the current post-processing level, to guard against the user
- changing it from a callback.*/
- if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
- /*If we don't have enough information to post-process, disable it, regardless
- of the user-requested level.*/
- else{
- _pipe->pp_level=OC_PP_LEVEL_DISABLED;
- memcpy(_dec->pp_frame_buf,
- _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
- sizeof(_dec->pp_frame_buf[0])*3);
- }
-}
-
-/*Undo the DC prediction in a single plane of an MCU (one or two super block
- rows).
- As a side effect, the number of coded and uncoded fragments in this plane of
- the MCU is also computed.*/
-static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
- /*Undo the DC prediction.*/
- oc_fragment_plane *fplane;
- oc_fragment *frag;
- int *pred_last;
- int ncoded_fragis;
- int fragx;
- int fragy;
- int fragy0;
- int fragy_end;
- /*Compute the first and last fragment row of the current MCU for this
- plane.*/
- fplane=_dec->state.fplanes+_pli;
- fragy0=_pipe->fragy0[_pli];
- fragy_end=_pipe->fragy_end[_pli];
- frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
- ncoded_fragis=0;
- pred_last=_pipe->pred_last[_pli];
- for(fragy=fragy0;fragynhfrags;fragx++,frag++){
- if(!frag->coded)continue;
- pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
- oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
- ncoded_fragis++;
- }
- }
- _pipe->ncoded_fragis[_pli]=ncoded_fragis;
- /*Also save the number of uncoded fragments so we know how many to copy.*/
- _pipe->nuncoded_fragis[_pli]=
- (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis;
-}
-
-/*Reconstructs all coded fragments in a single MCU (one or two super block
- rows).
- This requires that each coded fragment have a proper macro block mode and
- motion vector (if not in INTRA mode), and have it's DC value decoded, with
- the DC prediction process reversed, and the number of coded and uncoded
- fragments in this plane of the MCU be counted.
- The token lists for each color plane and coefficient should also be filled
- in, along with initial token offsets, extra bits offsets, and EOB run
- counts.*/
-static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
- /*Decode the AC coefficients.*/
- int *ti;
- int *ebi;
- int *eob_runs;
- int *coded_fragi;
- int *coded_fragi_end;
- ti=_pipe->ti[_pli];
- ebi=_pipe->ebi[_pli];
- eob_runs=_pipe->eob_runs[_pli];
- coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
- coded_fragi_end+=_pipe->ncoded_fragis[_pli];
- for(;coded_fragistate.frags+fragi;
- for(zzi=0;zzi<64;){
- int token;
- int eb;
- last_zzi=zzi;
- if(eob_runs[zzi]){
- eob_runs[zzi]--;
- break;
- }
- else{
- int ebflag;
- token=_dec->dct_tokens[zzi][ti[zzi]++];
- ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
- eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
- ebi[zzi]+=ebflag;
- if(tokendc;
- iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
- /*last_zzi is always initialized.
- If your compiler thinks otherwise, it is dumb.*/
- oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
- iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
- }
- _pipe->coded_fragis[_pli]=coded_fragi;
- /*Right now the reconstructed MCU has only the coded blocks in it.*/
- /*TODO: We make the decision here to always copy the uncoded blocks into it
- from the reference frame.
- We could also copy the coded blocks back over the reference frame, if we
- wait for an additional MCU to be decoded, which might be faster if only a
- small number of blocks are coded.
- However, this introduces more latency, creating a larger cache footprint.
- It's unknown which decision is better, but this one results in simpler
- code, and the hard case (high bitrate, high resolution) is handled
- correctly.*/
- /*Copy the uncoded blocks from the previous reference frame.*/
- _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
- oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
- _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
-}
-
-/*Filter a horizontal block edge.*/
-static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
- int *_variance0,int *_variance1){
- unsigned char *rdst;
- const unsigned char *rsrc;
- unsigned char *cdst;
- const unsigned char *csrc;
- int r[10];
- int sum0;
- int sum1;
- int bx;
- int by;
- rdst=_dst;
- rsrc=_src;
- for(bx=0;bx<8;bx++){
- cdst=rdst;
- csrc=rsrc;
- for(by=0;by<10;by++){
- r[by]=*csrc;
- csrc+=_src_ystride;
- }
- sum0=sum1=0;
- for(by=0;by<4;by++){
- sum0+=abs(r[by+1]-r[by]);
- sum1+=abs(r[by+5]-r[by+6]);
- }
- *_variance0+=OC_MINI(255,sum0);
- *_variance1+=OC_MINI(255,sum1);
- if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
- *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
- cdst+=_dst_ystride;
- *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
- cdst+=_dst_ystride;
- for(by=0;by<4;by++){
- *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
- r[by+4]+r[by+5]+r[by+6]+4>>3);
- cdst+=_dst_ystride;
- }
- *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
- cdst+=_dst_ystride;
- *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
- }
- else{
- for(by=1;by<=8;by++){
- *cdst=(unsigned char)r[by];
- cdst+=_dst_ystride;
- }
- }
- rdst++;
- rsrc++;
- }
-}
-
-/*Filter a vertical block edge.*/
-static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
- int _qstep,int _flimit,int *_variances){
- unsigned char *rdst;
- const unsigned char *rsrc;
- unsigned char *cdst;
- int r[10];
- int sum0;
- int sum1;
- int bx;
- int by;
- cdst=_dst;
- for(by=0;by<8;by++){
- rsrc=cdst-1;
- rdst=cdst;
- for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
- sum0=sum1=0;
- for(bx=0;bx<4;bx++){
- sum0+=abs(r[bx+1]-r[bx]);
- sum1+=abs(r[bx+5]-r[bx+6]);
- }
- _variances[0]+=OC_MINI(255,sum0);
- _variances[1]+=OC_MINI(255,sum1);
- if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
- *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
- *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
- for(bx=0;bx<4;bx++){
- *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
- r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
- }
- *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
- *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
- }
- else for(bx=1;bx<=8;bx++)*rdst++=(unsigned char)r[bx];
- cdst+=_dst_ystride;
- }
-}
-
-static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
- th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
- int _fragy_end){
- oc_fragment_plane *fplane;
- int *variance;
- unsigned char *dc_qi;
- unsigned char *dst;
- const unsigned char *src;
- int notstart;
- int notdone;
- int froffset;
- int flimit;
- int qstep;
- int y_end;
- int y;
- int x;
- _dst+=_pli;
- _src+=_pli;
- fplane=_dec->state.fplanes+_pli;
- froffset=fplane->froffset+_fragy0*fplane->nhfrags;
- variance=_dec->variances+froffset;
- dc_qi=_dec->dc_qis+froffset;
- notstart=_fragy0>0;
- notdone=_fragy_endnvfrags;
- /*We want to clear an extra row of variances, except at the end.*/
- memset(variance+(fplane->nhfrags&-notstart),0,
- (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0]));
- /*Except for the first time, we want to point to the middle of the row.*/
- y=(_fragy0<<3)+(notstart<<2);
- dst=_dst->data+y*_dst->stride;
- src=_src->data+y*_src->stride;
- for(;y<4;y++){
- memcpy(dst,src,_dst->width*sizeof(dst[0]));
- dst+=_dst->stride;
- src+=_src->stride;
- }
- /*We also want to skip the last row in the frame for this loop.*/
- y_end=_fragy_end-!notdone<<3;
- for(;ypp_dc_scale[*dc_qi];
- flimit=(qstep*3)>>2;
- oc_filter_hedge(dst,_dst->stride,src-_src->stride,_src->stride,
- qstep,flimit,variance,variance+fplane->nhfrags);
- variance++;
- dc_qi++;
- for(x=8;x<_dst->width;x+=8){
- qstep=_dec->pp_dc_scale[*dc_qi];
- flimit=(qstep*3)>>2;
- oc_filter_hedge(dst+x,_dst->stride,src+x-_src->stride,_src->stride,
- qstep,flimit,variance,variance+fplane->nhfrags);
- oc_filter_vedge(dst+x-(_dst->stride<<2)-4,_dst->stride,
- qstep,flimit,variance-1);
- variance++;
- dc_qi++;
- }
- dst+=_dst->stride<<3;
- src+=_src->stride<<3;
- }
- /*And finally, handle the last row in the frame, if it's in the range.*/
- if(!notdone){
- for(;y<_dst->height;y++){
- memcpy(dst,src,_dst->width*sizeof(dst[0]));
- dst+=_dst->stride;
- src+=_src->stride;
- }
- /*Filter the last row of vertical block edges.*/
- dc_qi++;
- for(x=8;x<_dst->width;x+=8){
- qstep=_dec->pp_dc_scale[*dc_qi++];
- flimit=(qstep*3)>>2;
- oc_filter_vedge(dst+x-(_dst->stride<<3)-4,_dst->stride,
- qstep,flimit,variance++);
- }
- }
-}
-
-static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
- int _dc_scale,int _sharp_mod,int _strong){
- static const int MOD_MAX[2]={24,32};
- static const int MOD_SHIFT[2]={1,0};
- const unsigned char *psrc;
- const unsigned char *src;
- const unsigned char *nsrc;
- unsigned char *dst;
- int vmod[72];
- int hmod[72];
- int mod_hi;
- int by;
- int bx;
- mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
- dst=_idata;
- src=dst;
- psrc=src-(_ystride&-!(_b&4));
- for(by=0;by<9;by++){
- for(bx=0;bx<8;bx++){
- int mod;
- mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7);
- for(bx=1;bx<7;bx++){
- a=128;
- b=64;
- w=hmod[(bx<<3)+by];
- a-=w;
- b+=w*src[bx-1];
- w=vmod[(by<<3)+bx];
- a-=w;
- b+=w*psrc[bx];
- w=vmod[(by+1<<3)+bx];
- a-=w;
- b+=w*nsrc[bx];
- w=hmod[(bx+1<<3)+by];
- a-=w;
- b+=w*src[bx+1];
- dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
- }
- a=128;
- b=64;
- w=hmod[(7<<3)+by];
- a-=w;
- b+=w*src[6];
- w=vmod[(by<<3)+7];
- a-=w;
- b+=w*psrc[7];
- w=vmod[(by+1<<3)+7];
- a-=w;
- b+=w*nsrc[7];
- w=hmod[(8<<3)+by];
- a-=w;
- b+=w*src[7+!(_b&2)];
- dst[7]=OC_CLAMP255(a*src[7]+b>>7);
- dst+=_ystride;
- psrc=src;
- src=nsrc;
- nsrc+=_ystride&-(!(_b&8)|by<6);
- }
-}
-
-#define OC_DERING_THRESH1 (384)
-#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
-#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
-#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
-
-static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
- int _pli,int _fragy0,int _fragy_end){
- th_img_plane *iplane;
- oc_fragment_plane *fplane;
- oc_fragment *frag;
- int *variance;
- unsigned char *idata;
- int sthresh;
- int strong;
- int froffset;
- int y_end;
- int y;
- int x;
- iplane=_img+_pli;
- fplane=_dec->state.fplanes+_pli;
- froffset=fplane->froffset+_fragy0*fplane->nhfrags;
- variance=_dec->variances+froffset;
- frag=_dec->state.frags+froffset;
- strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
- sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
- y=_fragy0<<3;
- idata=iplane->data+y*iplane->stride;
- y_end=_fragy_end<<3;
- for(;ywidth;x+=8){
- int b;
- int qi;
- int var;
- qi=frag->qi;
- var=*variance;
- b=(x<=0)|(x+8>=iplane->width)<<1|(y<=0)<<2|(y+8>=iplane->height)<<3;
- if(strong&&var>sthresh){
- oc_dering_block(idata+x,iplane->stride,b,
- _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
- if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
- !(b&2)&&variance[1]>OC_DERING_THRESH4||
- !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
- !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
- oc_dering_block(idata+x,iplane->stride,b,
- _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
- oc_dering_block(idata+x,iplane->stride,b,
- _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
- }
- }
- else if(var>OC_DERING_THRESH2){
- oc_dering_block(idata+x,iplane->stride,b,
- _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
- }
- else if(var>OC_DERING_THRESH1){
- oc_dering_block(idata+x,iplane->stride,b,
- _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
- }
- frag++;
- variance++;
- }
- idata+=iplane->stride<<3;
- }
-}
-
-
-
-th_dec_ctx *th_decode_alloc(const th_info *_info,
- const th_setup_info *_setup){
- oc_dec_ctx *dec;
- if(_info==NULL||_setup==NULL)return NULL;
- dec=_ogg_malloc(sizeof(*dec));
- if(oc_dec_init(dec,_info,_setup)<0){
- _ogg_free(dec);
- return NULL;
- }
- dec->state.curframe_num=0;
- return dec;
-}
-
-void th_decode_free(th_dec_ctx *_dec){
- if(_dec!=NULL){
- oc_dec_clear(_dec);
- _ogg_free(_dec);
- }
-}
-
-int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
- size_t _buf_sz){
- switch(_req){
- case TH_DECCTL_GET_PPLEVEL_MAX:{
- if(_dec==NULL||_buf==NULL)return TH_EFAULT;
- if(_buf_sz!=sizeof(int))return TH_EINVAL;
- (*(int *)_buf)=OC_PP_LEVEL_MAX;
- return 0;
- }break;
- case TH_DECCTL_SET_PPLEVEL:{
- int pp_level;
- if(_dec==NULL||_buf==NULL)return TH_EFAULT;
- if(_buf_sz!=sizeof(int))return TH_EINVAL;
- pp_level=*(int *)_buf;
- if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
- _dec->pp_level=pp_level;
- return 0;
- }break;
- case TH_DECCTL_SET_GRANPOS:{
- ogg_int64_t granpos;
- if(_dec==NULL||_buf==NULL)return TH_EFAULT;
- if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
- granpos=*(ogg_int64_t *)_buf;
- if(granpos<0)return TH_EINVAL;
- _dec->state.granpos=granpos;
- _dec->state.keyframe_num=
- granpos>>_dec->state.info.keyframe_granule_shift;
- _dec->state.curframe_num=_dec->state.keyframe_num+
- (granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
- return 0;
- }break;
- case TH_DECCTL_SET_STRIPE_CB:{
- th_stripe_callback *cb;
- if(_dec==NULL||_buf==NULL)return TH_EFAULT;
- if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
- cb=(th_stripe_callback *)_buf;
- _dec->stripe_cb.ctx=cb->ctx;
- _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
- return 0;
- }break;
- default:return TH_EIMPL;
- }
-}
-
-int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
- ogg_int64_t *_granpos){
- int ret;
- if(_dec==NULL||_op==NULL)return TH_EFAULT;
- /*A completely empty packet indicates a dropped frame and is treated exactly
- like an inter frame with no coded blocks.
- Only proceed if we have a non-empty packet.*/
- if(_op->bytes!=0){
- oc_dec_pipeline_state pipe;
- th_ycbcr_buffer stripe_buf;
- int stripe_fragy;
- int refi;
- int pli;
- int notstart;
- int notdone;
- theorapackB_readinit(&_dec->opb,_op->packet,_op->bytes);
- ret=oc_dec_frame_header_unpack(_dec);
- if(ret<0)return ret;
- /*Select a free buffer to use for the reconstructed version of this
- frame.*/
- if(_dec->state.frame_type!=OC_INTRA_FRAME&&
- (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
- _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
- th_info *info;
- size_t yplane_sz;
- size_t cplane_sz;
- int yhstride;
- int yvstride;
- int chstride;
- int cvstride;
- /*We're decoding an INTER frame, but have no initialized reference
- buffers (i.e., decoding did not start on a key frame).
- We initialize them to a solid gray here.*/
- _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
- _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
- _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1;
- info=&_dec->state.info;
- yhstride=info->frame_width+2*OC_UMV_PADDING;
- yvstride=info->frame_height+2*OC_UMV_PADDING;
- chstride=yhstride>>!(info->pixel_fmt&1);
- cvstride=yvstride>>!(info->pixel_fmt&2);
- yplane_sz=(size_t)yhstride*yvstride;
- cplane_sz=(size_t)chstride*cvstride;
- memset(_dec->state.ref_frame_data,0x80,yplane_sz+2*cplane_sz);
- }
- else{
- for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
- refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
- _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
- }
- if(_dec->state.frame_type==OC_INTRA_FRAME){
- oc_dec_mark_all_intra(_dec);
- _dec->state.keyframe_num=_dec->state.curframe_num;
- }else{
- oc_dec_coded_flags_unpack(_dec);
- oc_dec_mb_modes_unpack(_dec);
- oc_dec_mv_unpack_and_frag_modes_fill(_dec);
- }
- oc_dec_block_qis_unpack(_dec);
- oc_dec_residual_tokens_unpack(_dec);
- /*Update granule position.
- This must be done before the striped decode callbacks so that the
- application knows what to do with the frame data.*/
- _dec->state.granpos=
- (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
- (_dec->state.curframe_num-_dec->state.keyframe_num);
- _dec->state.curframe_num++;
- if(_granpos!=NULL)*_granpos=_dec->state.granpos;
- /*All of the rest of the operations -- DC prediction reversal,
- reconstructing coded fragments, copying uncoded fragments, loop
- filtering, extending borders, and out-of-loop post-processing -- should
- be pipelined.
- I.e., DC prediction reversal, reconstruction, and uncoded fragment
- copying are done for one or two super block rows, then loop filtering is
- run as far as it can, then bordering copying, then post-processing.
- For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
- block rows, and one chroma.
- Otherwise, an MCU consists of one super block row from each plane.
- Inside each MCU, we perform all of the steps on one color plane before
- moving on to the next.
- After reconstruction, the additional filtering stages introduce a delay
- since they need some pixels from the next fragment row.
- Thus the actual number of decoded rows available is slightly smaller for
- the first MCU, and slightly larger for the last.
-
- This entire process allows us to operate on the data while it is still in
- cache, resulting in big performance improvements.
- An application callback allows further application processing (blitting
- to video memory, color conversion, etc.) to also use the data while it's
- in cache.*/
- oc_dec_pipeline_init(_dec,&pipe);
- oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
- notstart=0;
- notdone=1;
- for(stripe_fragy=notstart=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
- int avail_fragy0;
- int avail_fragy_end;
- avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
- notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli;
- /*Compute the first and last fragment row of the current MCU for this
- plane.*/
- frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
- pipe.fragy0[pli]=stripe_fragy>>frag_shift;
- pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
- pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
- oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
- oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
- sdelay=edelay=0;
- if(pipe.loop_filter){
- sdelay+=notstart;
- edelay+=notdone;
- oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
- refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- }
- /*To fill the borders, we have an additional two pixel delay, since a
- fragment in the next row could filter its top edge, using two pixels
- from a fragment in this row.
- But there's no reason to delay a full fragment between the two.*/
- oc_state_borders_fill_rows(&_dec->state,refi,pli,
- (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
- (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
- /*Out-of-loop post-processing.*/
- pp_offset=3*(pli!=0);
- if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
- /*Perform de-blocking in one plane.*/
- sdelay+=notstart;
- edelay+=notdone;
- oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
- _dec->state.ref_frame_bufs[refi],pli,
- pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
- /*Perform de-ringing in one plane.*/
- sdelay+=notstart;
- edelay+=notdone;
- oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
- pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
- }
- }
- /*If no post-processing is done, we still need to delay a row for the
- loop filter, thanks to the strange filtering order VP3 chose.*/
- else if(pipe.loop_filter){
- sdelay+=notstart;
- edelay+=notdone;
- }
- /*Compute the intersection of the available rows in all planes.
- If chroma is sub-sampled, the effect of each of its delays is
- doubled, but luma might have more post-processing filters enabled
- than chroma, so we don't know up front which one is the limiting
- factor.*/
- avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){
- /*Make the callback, ensuring we flip the sense of the "start" and
- "end" of the available region upside down.*/
- (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
- _dec->state.fplanes[0].nvfrags-avail_fragy_end,
- _dec->state.fplanes[0].nvfrags-avail_fragy0);
- }
- notstart=1;
- }
- /*Finish filling in the reference frame borders.*/
- for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
- /*Update the reference frame indices.*/
- if(_dec->state.frame_type==OC_INTRA_FRAME){
- /*The new frame becomes both the previous and gold reference frames.*/
- _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
- _dec->state.ref_frame_idx[OC_FRAME_PREV]=
- _dec->state.ref_frame_idx[OC_FRAME_SELF];
- }
- else{
- /*Otherwise, just replace the previous reference frame.*/
- _dec->state.ref_frame_idx[OC_FRAME_PREV]=
- _dec->state.ref_frame_idx[OC_FRAME_SELF];
- }
-#if defined(OC_DUMP_IMAGES)
- /*Don't dump images for dropped frames.*/
- oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
-#endif
- return 0;
- }
- else{
- /*Just update the granule position and return.*/
- _dec->state.granpos=
- (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
- (_dec->state.curframe_num-_dec->state.keyframe_num);
- _dec->state.curframe_num++;
- if(_granpos!=NULL)*_granpos=_dec->state.granpos;
- return TH_DUPFRAME;
- }
-}
-
-int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
- oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
- return 0;
-}
diff --git a/Engine/lib/libtheora/lib/dec/fragment.c b/Engine/lib/libtheora/lib/dec/fragment.c
deleted file mode 100644
index 77f1c7f6b..000000000
--- a/Engine/lib/libtheora/lib/dec/fragment.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: fragment.c 15469 2008-10-30 12:49:42Z tterribe $
-
- ********************************************************************/
-
-#include "../internal.h"
-
-void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const ogg_int16_t *_residue){
- _state->opt_vtable.frag_recon_intra(_dst,_dst_ystride,_residue);
-}
-
-void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue){
- int i;
- for(i=0;i<8;i++){
- int j;
- for(j=0;j<8;j++){
- int res;
- res=*_residue++;
- _dst[j]=OC_CLAMP255(res+128);
- }
- _dst+=_dst_ystride;
- }
-}
-
-void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const unsigned char *_src,int _src_ystride,
- const ogg_int16_t *_residue){
- _state->opt_vtable.frag_recon_inter(_dst,_dst_ystride,_src,_src_ystride,
- _residue);
-}
-
-void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){
- int i;
- for(i=0;i<8;i++){
- int j;
- for(j=0;j<8;j++){
- int res;
- res=*_residue++;
- _dst[j]=OC_CLAMP255(res+_src[j]);
- }
- _dst+=_dst_ystride;
- _src+=_src_ystride;
- }
-}
-
-void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const unsigned char *_src1,int _src1_ystride,
- const unsigned char *_src2,int _src2_ystride,const ogg_int16_t *_residue){
- _state->opt_vtable.frag_recon_inter2(_dst,_dst_ystride,_src1,_src1_ystride,
- _src2,_src2_ystride,_residue);
-}
-
-void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue){
- int i;
- for(i=0;i<8;i++){
- int j;
- for(j=0;j<8;j++){
- int res;
- res=*_residue++;
- _dst[j]=OC_CLAMP255(res+((int)_src1[j]+_src2[j]>>1));
- }
- _dst+=_dst_ystride;
- _src1+=_src1_ystride;
- _src2+=_src2_ystride;
- }
-}
-
-/*Computes the predicted DC value for the given fragment.
- This requires that the fully decoded DC values be available for the left,
- upper-left, upper, and upper-right fragments (if they exist).
- _frag: The fragment to predict the DC value for.
- _fplane: The fragment plane the fragment belongs to.
- _x: The x-coordinate of the fragment.
- _y: The y-coordinate of the fragment.
- _pred_last: The last fully-decoded DC value for each predictor frame
- (OC_FRAME_GOLD, OC_FRAME_PREV and OC_FRAME_SELF).
- This should be initialized to 0's for the first fragment in each
- color plane.
- Return: The predicted DC value for this fragment.*/
-int oc_frag_pred_dc(const oc_fragment *_frag,
- const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]){
- static const int PRED_SCALE[16][4]={
- /*0*/
- {0,0,0,0},
- /*OC_PL*/
- {1,0,0,0},
- /*OC_PUL*/
- {1,0,0,0},
- /*OC_PL|OC_PUL*/
- {1,0,0,0},
- /*OC_PU*/
- {1,0,0,0},
- /*OC_PL|OC_PU*/
- {1,1,0,0},
- /*OC_PUL|OC_PU*/
- {0,1,0,0},
- /*OC_PL|OC_PUL|PC_PU*/
- {29,-26,29,0},
- /*OC_PUR*/
- {1,0,0,0},
- /*OC_PL|OC_PUR*/
- {75,53,0,0},
- /*OC_PUL|OC_PUR*/
- {1,1,0,0},
- /*OC_PL|OC_PUL|OC_PUR*/
- {75,0,53,0},
- /*OC_PU|OC_PUR*/
- {1,0,0,0},
- /*OC_PL|OC_PU|OC_PUR*/
- {75,0,53,0},
- /*OC_PUL|OC_PU|OC_PUR*/
- {3,10,3,0},
- /*OC_PL|OC_PUL|OC_PU|OC_PUR*/
- {29,-26,29,0}
- };
- static const int PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5};
- static const int PRED_RMASK[16]={0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31};
- static const int BC_MASK[8]={
- /*No boundary condition.*/
- OC_PL|OC_PUL|OC_PU|OC_PUR,
- /*Left column.*/
- OC_PU|OC_PUR,
- /*Top row.*/
- OC_PL,
- /*Top row, left column.*/
- 0,
- /*Right column.*/
- OC_PL|OC_PUL|OC_PU,
- /*Right and left column.*/
- OC_PU,
- /*Top row, right column.*/
- OC_PL,
- /*Top row, right and left column.*/
- 0
- };
- /*Predictor fragments, left, up-left, up, up-right.*/
- const oc_fragment *predfr[4];
- /*The frame used for prediction for this fragment.*/
- int pred_frame;
- /*The boundary condition flags.*/
- int bc;
- /*DC predictor values: left, up-left, up, up-right, missing values
- skipped.*/
- int p[4];
- /*Predictor count.*/
- int np;
- /*Which predictor constants to use.*/
- int pflags;
- /*The predicted DC value.*/
- int ret;
- int i;
- pred_frame=OC_FRAME_FOR_MODE[_frag->mbmode];
- bc=(_x==0)+((_y==0)<<1)+((_x+1==_fplane->nhfrags)<<2);
- predfr[0]=_frag-1;
- predfr[1]=_frag-_fplane->nhfrags-1;
- predfr[2]=predfr[1]+1;
- predfr[3]=predfr[2]+1;
- np=0;
- pflags=0;
- for(i=0;i<4;i++){
- int pflag;
- pflag=1<coded&&
- OC_FRAME_FOR_MODE[predfr[i]->mbmode]==pred_frame){
- p[np++]=predfr[i]->dc;
- pflags|=pflag;
- }
- }
- if(pflags==0)return _pred_last[pred_frame];
- else{
- ret=PRED_SCALE[pflags][0]*p[0];
- /*LOOP VECTORIZES.*/
- for(i=1;i128)ret=p[2];
- else if(abs(ret-p[0])>128)ret=p[0];
- else if(abs(ret-p[1])>128)ret=p[1];
- }
- return ret;
-}
diff --git a/Engine/lib/libtheora/lib/dec/huffdec.c b/Engine/lib/libtheora/lib/dec/huffdec.c
deleted file mode 100644
index 86c52b62f..000000000
--- a/Engine/lib/libtheora/lib/dec/huffdec.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: huffdec.c 15431 2008-10-21 05:04:02Z giles $
-
- ********************************************************************/
-
-#include
-#include
-#include "huffdec.h"
-#include "decint.h"
-
-
-/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
-#define _ogg_offsetof(_type,_field)\
- ((size_t)((char *)&((_type *)0)->_field-(char *)0))
-
-/*These two functions are really part of the bitpack.c module, but
- they are only used here. Declaring local static versions so they
- can be inlined saves considerable function call overhead.*/
-
-/*Read in bits without advancing the bitptr.
- Here we assume 0<=_bits&&_bits<=32.*/
-static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){
- long ret;
- long m;
- long d;
- m=32-_bits;
- _bits+=_b->endbit;
- d=_b->storage-_b->endbyte;
- if(d<=4){
- /*Not the main path.*/
- if(d<=0){
- *_ret=0L;
- return -(_bits>d*8);
- }
- /*If we have some bits left, but not enough, return the ones we have.*/
- if(d*8<_bits)_bits=d*8;
- }
- ret=_b->ptr[0]<<24+_b->endbit;
- if(_bits>8){
- ret|=_b->ptr[1]<<16+_b->endbit;
- if(_bits>16){
- ret|=_b->ptr[2]<<8+_b->endbit;
- if(_bits>24){
- ret|=_b->ptr[3]<<_b->endbit;
- if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
- }
- }
- }
- *_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1);
- return 0;
-}
-
-/*advance the bitptr*/
-static void theorapackB_adv(oggpack_buffer *_b,int _bits){
- _bits+=_b->endbit;
- _b->ptr+=_bits>>3;
- _b->endbyte+=_bits>>3;
- _b->endbit=_bits&7;
-}
-
-
-/*The log_2 of the size of a lookup table is allowed to grow to relative to
- the number of unique nodes it contains.
- E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
- wasted (each node will have an amortized cost of at most 20 bytes when using
- 4-byte pointers).
- Larger numbers can decode tokens with fewer read operations, while smaller
- numbers may save more space (requiring as little as 8 bytes amortized per
- node, though there will be more nodes).
- With a sample file:
- 32233473 read calls are required when no tree collapsing is done (100.0%).
- 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
- 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
- 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
- 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
- Since a value of 1 gets us the vast majority of the speed-up with only a
- small amount of wasted memory, this is what we use.*/
-#define OC_HUFF_SLUSH (1)
-
-
-/*Allocates a Huffman tree node that represents a subtree of depth _nbits.
- _nbits: The depth of the subtree.
- If this is 0, the node is a leaf node.
- Otherwise 1<<_nbits pointers are allocated for children.
- Return: The newly allocated and fully initialized Huffman tree node.*/
-static oc_huff_node *oc_huff_node_alloc(int _nbits){
- oc_huff_node *ret;
- size_t size;
- size=_ogg_offsetof(oc_huff_node,nodes);
- if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
- ret=_ogg_calloc(1,size);
- ret->nbits=(unsigned char)_nbits;
- return ret;
-}
-
-/*Frees a Huffman tree node allocated with oc_huf_node_alloc.
- _node: The node to free.
- This may be NULL.*/
-static void oc_huff_node_free(oc_huff_node *_node){
- _ogg_free(_node);
-}
-
-/*Frees the memory used by a Huffman tree.
- _node: The Huffman tree to free.
- This may be NULL.*/
-static void oc_huff_tree_free(oc_huff_node *_node){
- if(_node==NULL)return;
- if(_node->nbits){
- int nchildren;
- int i;
- int inext;
- nchildren=1<<_node->nbits;
- for(i=0;inodes[i]!=NULL?1<<_node->nbits-_node->nodes[i]->depth:1);
- oc_huff_tree_free(_node->nodes[i]);
- }
- }
- oc_huff_node_free(_node);
-}
-
-/*Unpacks a sub-tree from the given buffer.
- _opb: The buffer to unpack from.
- _binode: The location to store a pointer to the sub-tree in.
- _depth: The current depth of the tree.
- This is used to prevent infinite recursion.
- Return: 0 on success, or a negative value on error.*/
-static int oc_huff_tree_unpack(oggpack_buffer *_opb,
- oc_huff_node **_binode,int _depth){
- oc_huff_node *binode;
- long bits;
- /*Prevent infinite recursion.*/
- if(++_depth>32)return TH_EBADHEADER;
- if(theorapackB_read1(_opb,&bits)<0)return TH_EBADHEADER;
- /*Read an internal node:*/
- if(!bits){
- int ret;
- binode=oc_huff_node_alloc(1);
- binode->depth=(unsigned char)(_depth>1);
- ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
- if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
- if(ret<0){
- oc_huff_tree_free(binode);
- *_binode=NULL;
- return ret;
- }
- }
- /*Read a leaf node:*/
- else{
- if(theorapackB_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return TH_EBADHEADER;
- binode=oc_huff_node_alloc(0);
- binode->depth=(unsigned char)(_depth>1);
- binode->token=(unsigned char)bits;
- }
- *_binode=binode;
- return 0;
-}
-
-/*Finds the depth of shortest branch of the given sub-tree.
- The tree must be binary.
- _binode: The root of the given sub-tree.
- _binode->nbits must be 0 or 1.
- Return: The smallest depth of a leaf node in this sub-tree.
- 0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(oc_huff_node *_binode){
- int depth0;
- int depth1;
- if(_binode->nbits==0)return 0;
- depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
- depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
- return OC_MINI(depth0,depth1)+1;
-}
-
-/*Finds the number of internal nodes at a given depth, plus the number of
- leaves at that depth or shallower.
- The tree must be binary.
- _binode: The root of the given sub-tree.
- _binode->nbits must be 0 or 1.
- Return: The number of entries that would be contained in a jump table of the
- given depth.*/
-static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
- if(_binode->nbits==0||_depth<=0)return 1;
- else{
- return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
- oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
- }
-}
-
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode);
-
-/*Fills the given nodes table with all the children in the sub-tree at the
- given depth.
- The nodes in the sub-tree with a depth less than that stored in the table
- are freed.
- The sub-tree must be binary and complete up until the given depth.
- _nodes: The nodes table to fill.
- _binode: The root of the sub-tree to fill it with.
- _binode->nbits must be 0 or 1.
- _level: The current level in the table.
- 0 indicates that the current node should be stored, regardless of
- whether it is a leaf node or an internal node.
- _depth: The depth of the nodes to fill the table with, relative to their
- parent.*/
-static void oc_huff_node_fill(oc_huff_node **_nodes,
- oc_huff_node *_binode,int _level,int _depth){
- if(_level<=0||_binode->nbits==0){
- int i;
- _binode->depth=(unsigned char)(_depth-_level);
- _nodes[0]=oc_huff_tree_collapse(_binode);
- for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
- }
- else{
- _level--;
- oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth);
- oc_huff_node_fill(_nodes+(1<<_level),_binode->nodes[1],_level,_depth);
- oc_huff_node_free(_binode);
- }
-}
-
-/*Finds the largest complete sub-tree rooted at the current node and collapses
- it into a single node.
- This procedure is then applied recursively to all the children of that node.
- _binode: The root of the sub-tree to collapse.
- _binode->nbits must be 0 or 1.
- Return: The new root of the collapsed sub-tree.*/
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){
- oc_huff_node *root;
- int mindepth;
- int depth;
- int loccupancy;
- int occupancy;
- depth=mindepth=oc_huff_tree_mindepth(_binode);
- occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth;
- oc_huff_node_fill(root->nodes,_binode,depth,depth);
- return root;
-}
-
-/*Makes a copy of the given Huffman tree.
- _node: The Huffman tree to copy.
- Return: The copy of the Huffman tree.*/
-static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node){
- oc_huff_node *ret;
- ret=oc_huff_node_alloc(_node->nbits);
- ret->depth=_node->depth;
- if(_node->nbits){
- int nchildren;
- int i;
- int inext;
- nchildren=1<<_node->nbits;
- for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i]);
- inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
- while(++inodes[i]=ret->nodes[i-1];
- }
- }
- else ret->token=_node->token;
- return ret;
-}
-
-/*Unpacks a set of Huffman trees, and reduces them to a collapsed
- representation.
- _opb: The buffer to unpack the trees from.
- _nodes: The table to fill with the Huffman trees.
- Return: 0 on success, or a negative value on error.*/
-int oc_huff_trees_unpack(oggpack_buffer *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
- int i;
- for(i=0;inbits!=0){
- theorapackB_look(_opb,_node->nbits,&bits);
- _node=_node->nodes[bits];
- theorapackB_adv(_opb,_node->depth);
- }
- return _node->token;
-}
diff --git a/Engine/lib/libtheora/lib/dec/idct.h b/Engine/lib/libtheora/lib/dec/idct.h
deleted file mode 100644
index 3ee53712e..000000000
--- a/Engine/lib/libtheora/lib/dec/idct.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*Inverse DCT transforms.*/
-#include
-#if !defined(_idct_H)
-# define _idct_H (1)
-
-void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
-void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/ocintrin.h b/Engine/lib/libtheora/lib/dec/ocintrin.h
deleted file mode 100644
index 317f5aeae..000000000
--- a/Engine/lib/libtheora/lib/dec/ocintrin.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*Some common macros for potential platform-specific optimization.*/
-#include
-#if !defined(_ocintrin_H)
-# define _ocintrin_H (1)
-
-/*Some specific platforms may have optimized intrinsic or inline assembly
- versions of these functions which can substantially improve performance.
- We define macros for them to allow easy incorporation of these non-ANSI
- features.*/
-
-/*Branchless, but not correct for differences larger than INT_MAX.
-static int oc_mini(int _a,int _b){
- int ambsign;
- ambsign=_a-_b>>sizeof(int)*8-1;
- return (_a&~ambsign)+(_b&ambsign);
-}*/
-
-
-#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
-#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
-/*Clamps an integer into the given range.
- If _a>_c, then the lower bound _a is respected over the upper bound _c (this
- behavior is required to meet our documented API behavior).
- _a: The lower bound.
- _b: The value to clamp.
- _c: The upper boud.*/
-#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
-#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
-/*Divides an integer by a power of two, truncating towards 0.
- _dividend: The integer to divide.
- _shift: The non-negative power of two to divide by.
- _rmask: (1<<_shift)-1*/
-#define OC_DIV_POW2(_dividend,_shift,_rmask)\
- ((_dividend)+(((_dividend)>>sizeof(_dividend)*8-1)&(_rmask))>>(_shift))
-/*Divides _x by 65536, truncating towards 0.*/
-#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
-/*Divides _x by 2, truncating towards 0.*/
-#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
-/*Divides _x by 8, truncating towards 0.*/
-#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
-/*Divides _x by 16, truncating towards 0.*/
-#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
-/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
- negative dividends first..
- When _rval is (1<<_shift-1), this is equivalent to division with rounding
- ties towards positive infinity.*/
-#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
- ((_dividend)+((_dividend)>>sizeof(_dividend)*8-1)+(_rval)>>(_shift))
-/*Swaps two integers _a and _b if _a>_b.*/
-#define OC_SORT2I(_a,_b)\
- if((_a)>(_b)){\
- int t__;\
- t__=(_a);\
- (_a)=(_b);\
- (_b)=t__;\
- }
-
-
-
-/*All of these macros should expect floats as arguments.*/
-#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a))
-#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a))
-#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
-#define OC_FABSF(_f) ((float)fabs(_f))
-#define OC_SQRTF(_f) ((float)sqrt(_f))
-#define OC_POWF(_b,_e) ((float)pow(_b,_e))
-#define OC_LOGF(_f) ((float)log(_f))
-#define OC_IFLOORF(_f) ((int)floor(_f))
-#define OC_ICEILF(_f) ((int)ceil(_f))
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/quant.c b/Engine/lib/libtheora/lib/dec/quant.c
deleted file mode 100644
index 5cb7784db..000000000
--- a/Engine/lib/libtheora/lib/dec/quant.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include
-#include
-#include
-#include "quant.h"
-#include "decint.h"
-
-static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
-static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
-
-/*Initializes the dequantization tables from a set of quantizer info.
- Currently the dequantizer (and elsewhere enquantizer) tables are expected to
- be initialized as pointing to the storage reserved for them in the
- oc_theora_state (resp. oc_enc_ctx) structure.
- If some tables are duplicates of others, the pointers will be adjusted to
- point to a single copy of the tables, but the storage for them will not be
- freed.
- If you're concerned about the memory footprint, the obvious thing to do is
- to move the storage out of its fixed place in the structures and allocate
- it on demand.
- However, a much, much better option is to only store the quantization
- matrices being used for the current frame, and to recalculate these as the
- qi values change between frames (this is what VP3 did).*/
-void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
- int _pp_dc_scale[64],const th_quant_info *_qinfo){
- /*coding mode: intra or inter.*/
- int qti;
- /*Y', C_b, C_r*/
- int pli;
- for(qti=0;qti<2;qti++){
- for(pli=0;pli<3;pli++){
- oc_quant_tables stage;
- /*Quality index.*/
- int qi;
- /*Range iterator.*/
- int qri;
- for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
- th_quant_base base;
- ogg_uint32_t q;
- int qi_start;
- int qi_end;
- int ci;
- memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
- sizeof(base));
- qi_start=qi;
- if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
- else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
- /*Iterate over quality indicies in this range.*/
- for(;;){
- ogg_uint32_t qfac;
- /*In the original VP3.2 code, the rounding offset and the size of the
- dead zone around 0 were controlled by a "sharpness" parameter.
- The size of our dead zone is now controlled by the per-coefficient
- quality thresholds returned by our HVS module.
- We round down from a more accurate value when the quality of the
- reconstruction does not fall below our threshold and it saves bits.
- Hence, all of that VP3.2 code is gone from here, and the remaining
- floating point code has been implemented as equivalent integer code
- with exact precision.*/
- qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
- /*For postprocessing, not dequantization.*/
- if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
- /*Scale DC the coefficient from the proper table.*/
- q=(qfac/100)<<2;
- q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
- stage[qi][0]=(ogg_uint16_t)q;
- /*Now scale AC coefficients from the proper table.*/
- for(ci=1;ci<64;ci++){
- q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
- q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
- stage[qi][ci]=(ogg_uint16_t)q;
- }
- if(++qi>=qi_end)break;
- /*Interpolate the next base matrix.*/
- for(ci=0;ci<64;ci++){
- base[ci]=(unsigned char)(
- (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
- (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
- +_qinfo->qi_ranges[qti][pli].sizes[qri])/
- (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
- }
- }
- }
- /*Staging matrices complete; commit to memory only if this isn't a
- duplicate of a preceeding plane.
- This simple check helps us improve cache coherency later.*/
- {
- int dupe;
- int qtj;
- int plj;
- dupe=0;
- for(qtj=0;qtj<=qti;qtj++){
- for(plj=0;plj<(qtj
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) int OC_FZIG_ZAGMMX[64]={
- 0, 8, 1, 2, 9,16,24,17,
- 10, 3,32,11,18,25, 4,12,
- 5,26,19,40,33,34,41,48,
- 27, 6,13,20,28,21,14, 7,
- 56,49,42,35,43,50,57,36,
- 15,22,29,30,23,44,37,58,
- 51,59,38,45,52,31,60,53,
- 46,39,47,54,61,62,55,63
-};
-
-
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
- ogg_int16_t __attribute__((aligned(8))) res_buf[64];
- int dst_framei;
- int dst_ystride;
- int zzi;
- /*_last_zzi is subtly different from an actual count of the number of
- coefficients we decoded for this block.
- It contains the value of zzi BEFORE the final token in the block was
- decoded.
- In most cases this is an EOB token (the continuation of an EOB run from a
- previous block counts), and so this is the same as the coefficient count.
- However, in the case that the last token was NOT an EOB token, but filled
- the block up with exactly 64 coefficients, _last_zzi will be less than 64.
- Provided the last token was not a pure zero run, the minimum value it can
- be is 46, and so that doesn't affect any of the cases in this routine.
- However, if the last token WAS a pure zero run of length 63, then _last_zzi
- will be 1 while the number of coefficients decoded is 64.
- Thus, we will trigger the following special case, where the real
- coefficient count would not.
- Note also that a zero run of length 64 will give _last_zzi a value of 0,
- but we still process the DC coefficient, which might have a non-zero value
- due to DC prediction.
- Although convoluted, this is arguably the correct behavior: it allows us to
- dequantize fewer coefficients and use a smaller transform when the block
- ends with a long zero run instead of a normal EOB token.
- It could be smarter... multiple separate zero runs at the end of a block
- will fool it, but an encoder that generates these really deserves what it
- gets.
- Needless to say we inherited this approach from VP3.*/
- /*Special case only having a DC component.*/
- if(_last_zzi<2){
- ogg_uint16_t p;
- /*Why is the iquant product rounded in this case and no others?
- Who knows.*/
- p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
- /*Fill res_buf with p.*/
- __asm__ __volatile__(
- /*mm0=0000 0000 0000 AAAA*/
- "movd %[p],%%mm0\n\t"
- /*mm1=0000 0000 0000 AAAA*/
- "movd %[p],%%mm1\n\t"
- /*mm0=0000 0000 AAAA 0000*/
- "pslld $16,%%mm0\n\t"
- /*mm0=0000 0000 AAAA AAAA*/
- "por %%mm1,%%mm0\n\t"
- /*mm0=AAAA AAAA AAAA AAAA*/
- "punpcklwd %%mm0,%%mm0\n\t"
- "movq %%mm0,(%[res_buf])\n\t"
- "movq %%mm0,8(%[res_buf])\n\t"
- "movq %%mm0,16(%[res_buf])\n\t"
- "movq %%mm0,24(%[res_buf])\n\t"
- "movq %%mm0,32(%[res_buf])\n\t"
- "movq %%mm0,40(%[res_buf])\n\t"
- "movq %%mm0,48(%[res_buf])\n\t"
- "movq %%mm0,56(%[res_buf])\n\t"
- "movq %%mm0,64(%[res_buf])\n\t"
- "movq %%mm0,72(%[res_buf])\n\t"
- "movq %%mm0,80(%[res_buf])\n\t"
- "movq %%mm0,88(%[res_buf])\n\t"
- "movq %%mm0,96(%[res_buf])\n\t"
- "movq %%mm0,104(%[res_buf])\n\t"
- "movq %%mm0,112(%[res_buf])\n\t"
- "movq %%mm0,120(%[res_buf])\n\t"
- :
- :[res_buf]"r"(res_buf),[p]"r"((unsigned)p)
- :"memory"
- );
- }
- else{
- /*Then, fill in the remainder of the coefficients with 0's, and perform
- the iDCT.*/
- /*First zero the buffer.*/
- /*On K7, etc., this could be replaced with movntq and sfence.*/
- __asm__ __volatile__(
- "pxor %%mm0,%%mm0\n\t"
- "movq %%mm0,(%[res_buf])\n\t"
- "movq %%mm0,8(%[res_buf])\n\t"
- "movq %%mm0,16(%[res_buf])\n\t"
- "movq %%mm0,24(%[res_buf])\n\t"
- "movq %%mm0,32(%[res_buf])\n\t"
- "movq %%mm0,40(%[res_buf])\n\t"
- "movq %%mm0,48(%[res_buf])\n\t"
- "movq %%mm0,56(%[res_buf])\n\t"
- "movq %%mm0,64(%[res_buf])\n\t"
- "movq %%mm0,72(%[res_buf])\n\t"
- "movq %%mm0,80(%[res_buf])\n\t"
- "movq %%mm0,88(%[res_buf])\n\t"
- "movq %%mm0,96(%[res_buf])\n\t"
- "movq %%mm0,104(%[res_buf])\n\t"
- "movq %%mm0,112(%[res_buf])\n\t"
- "movq %%mm0,120(%[res_buf])\n\t"
- :
- :[res_buf]"r"(res_buf)
- :"memory"
- );
- res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
- /*This is planned to be rewritten in MMX.*/
- for(zzi=1;zzi<_ncoefs;zzi++){
- int ci;
- ci=OC_FZIG_ZAG[zzi];
- res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
- _ac_iquant[ci]);
- }
- if(_last_zzi<10)oc_idct8x8_10_mmx(res_buf);
- else oc_idct8x8_mmx(res_buf);
- }
- /*Fill in the target buffer.*/
- dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
- /*For now ystride values in all ref frames assumed to be equal.*/
- if(_frag->mbmode==OC_MODE_INTRA){
- oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
- }
- else{
- int ref_framei;
- int ref_ystride;
- int mvoffsets[2];
- ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
- ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
- if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],_frag->mv[1],
- ref_ystride,_pli)>1){
- oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
- _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
- }
- else{
- oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
- }
- }
- oc_restore_fpu(_state);
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
- frame to another.
- _fragis: A pointer to a list of fragment indices.
- _nfragis: The number of fragment indices to copy.
- _dst_frame: The reference frame to copy to.
- _src_frame: The reference frame to copy from.
- _pli: The color plane the fragments lie in.*/
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
- const int *fragi;
- const int *fragi_end;
- int dst_framei;
- ptrdiff_t dst_ystride;
- int src_framei;
- ptrdiff_t src_ystride;
- dst_framei=_state->ref_frame_idx[_dst_frame];
- src_framei=_state->ref_frame_idx[_src_frame];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
- src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
- fragi_end=_fragis+_nfragis;
- for(fragi=_fragis;fragifrags+*fragi;
- dst=frag->buffer[dst_framei];
- src=frag->buffer[src_framei];
- __asm__ __volatile__(
- /*src+0*src_ystride*/
- "movq (%[src]),%%mm0\n\t"
- /*s=src_ystride*3*/
- "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
- /*src+1*src_ystride*/
- "movq (%[src],%[src_ystride]),%%mm1\n\t"
- /*src+2*src_ystride*/
- "movq (%[src],%[src_ystride],2),%%mm2\n\t"
- /*src+3*src_ystride*/
- "movq (%[src],%[s]),%%mm3\n\t"
- /*dst+0*dst_ystride*/
- "movq %%mm0,(%[dst])\n\t"
- /*s=dst_ystride*3*/
- "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
- /*dst+1*dst_ystride*/
- "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
- /*Pointer to next 4.*/
- "lea (%[src],%[src_ystride],4),%[src]\n\t"
- /*dst+2*dst_ystride*/
- "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
- /*dst+3*dst_ystride*/
- "movq %%mm3,(%[dst],%[s])\n\t"
- /*Pointer to next 4.*/
- "lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
- /*src+0*src_ystride*/
- "movq (%[src]),%%mm0\n\t"
- /*s=src_ystride*3*/
- "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
- /*src+1*src_ystride*/
- "movq (%[src],%[src_ystride]),%%mm1\n\t"
- /*src+2*src_ystride*/
- "movq (%[src],%[src_ystride],2),%%mm2\n\t"
- /*src+3*src_ystride*/
- "movq (%[src],%[s]),%%mm3\n\t"
- /*dst+0*dst_ystride*/
- "movq %%mm0,(%[dst])\n\t"
- /*s=dst_ystride*3*/
- "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
- /*dst+1*dst_ystride*/
- "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
- /*dst+2*dst_ystride*/
- "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
- /*dst+3*dst_ystride*/
- "movq %%mm3,(%[dst],%[s])\n\t"
- :[s]"=&r"(s)
- :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
- [src_ystride]"r"(src_ystride)
- :"memory"
- );
- }
- /*This needs to be removed when decode specific functions are implemented:*/
- __asm__ __volatile__("emms\n\t");
-}
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- ptrdiff_t s;
- _pix-=_ystride*2;
- __asm__ __volatile__(
- /*mm0=0*/
- "pxor %%mm0,%%mm0\n\t"
- /*s=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*mm7=_pix[0...8]*/
- "movq (%[pix]),%%mm7\n\t"
- /*mm4=_pix[0...8+_ystride*3]*/
- "movq (%[pix],%[s]),%%mm4\n\t"
- /*mm6=_pix[0...8]*/
- "movq %%mm7,%%mm6\n\t"
- /*Expand unsigned _pix[0...3] to 16 bits.*/
- "punpcklbw %%mm0,%%mm6\n\t"
- "movq %%mm4,%%mm5\n\t"
- /*Expand unsigned _pix[4...8] to 16 bits.*/
- "punpckhbw %%mm0,%%mm7\n\t"
- /*Expand other arrays too.*/
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
- "psubw %%mm4,%%mm6\n\t"
- "psubw %%mm5,%%mm7\n\t"
- /*mm5=mm4=_pix[0...8+_ystride]*/
- "movq (%[pix],%[ystride]),%%mm4\n\t"
- /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
- "movq (%[pix],%[ystride],2),%%mm2\n\t"
- "movq %%mm4,%%mm5\n\t"
- "movq %%mm2,%%mm3\n\t"
- "movq %%mm2,%%mm1\n\t"
- /*Expand these arrays.*/
- "punpckhbw %%mm0,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm3\n\t"
- "punpcklbw %%mm0,%%mm2\n\t"
- /*mm0=3 3 3 3
- mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
- "pcmpeqw %%mm0,%%mm0\n\t"
- "psubw %%mm5,%%mm3\n\t"
- "psrlw $14,%%mm0\n\t"
- "psubw %%mm4,%%mm2\n\t"
- /*Scale by 3.*/
- "pmullw %%mm0,%%mm3\n\t"
- "pmullw %%mm0,%%mm2\n\t"
- /*mm0=4 4 4 4
- f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
- 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
- "psrlw $1,%%mm0\n\t"
- "paddw %%mm7,%%mm3\n\t"
- "psllw $2,%%mm0\n\t"
- "paddw %%mm6,%%mm2\n\t"
- /*Add 4.*/
- "paddw %%mm0,%%mm3\n\t"
- "paddw %%mm0,%%mm2\n\t"
- /*"Divide" by 8.*/
- "psraw $3,%%mm3\n\t"
- "psraw $3,%%mm2\n\t"
- /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
- /*Free up mm5.*/
- "packuswb %%mm5,%%mm4\n\t"
- /*mm0=L L L L*/
- "movq (%[ll]),%%mm0\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm2,%%mm5\n\t"
- "pxor %%mm6,%%mm6\n\t"
- "movq %%mm0,%%mm7\n\t"
- "psubw %%mm0,%%mm6\n\t"
- "psllw $1,%%mm7\n\t"
- "psllw $1,%%mm6\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm2,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm3,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm3\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm3\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm6\n\t"
- "movq %%mm2,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm2,%%mm6\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm7=-L>R_i?2L:0*/
- "pand %%mm6,%%mm7\n\t"
- "pxor %%mm6,%%mm6\n\t"
- /*mm2=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm7,%%mm2\n\t"
- "psubw %%mm0,%%mm6\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm2,%%mm5\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm3,%%mm6\n\t"
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm3\n\t"
- "psllw $1,%%mm0\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- /*mm0=-L>R_i?2L:0*/
- "pand %%mm6,%%mm0\n\t"
- /*mm3=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm0,%%mm3\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm3,%%mm5\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*Unfortunately, there's no unsigned byte+signed byte with unsigned
- saturation op code, so we have to promote things back 16 bits.*/
- "pxor %%mm0,%%mm0\n\t"
- "movq %%mm4,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- "movq %%mm1,%%mm6\n\t"
- "punpcklbw %%mm0,%%mm1\n\t"
- "punpckhbw %%mm0,%%mm6\n\t"
- /*_pix[0...8+_ystride]+=R_i*/
- "paddw %%mm2,%%mm4\n\t"
- "paddw %%mm3,%%mm5\n\t"
- /*_pix[0...8+_ystride*2]-=R_i*/
- "psubw %%mm2,%%mm1\n\t"
- "psubw %%mm3,%%mm6\n\t"
- "packuswb %%mm5,%%mm4\n\t"
- "packuswb %%mm6,%%mm1\n\t"
- /*Write it back out.*/
- "movq %%mm4,(%[pix],%[ystride])\n\t"
- "movq %%mm1,(%[pix],%[ystride],2)\n\t"
- :[s]"=&r"(s)
- :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll)
- :"memory"
- );
-}
-
-/*This code implements the bulk of loop_filter_h().
- Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
- four p0's to one register we must transpose the values in four mmx regs.
- When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride,
- const ogg_int16_t *_ll){
- ptrdiff_t s;
- /*d doesn't technically need to be 64-bit on x86-64, but making it so will
- help avoid partial register stalls.*/
- ptrdiff_t d;
- __asm__ __volatile__(
- /*x x x x 3 2 1 0*/
- "movd (%[pix]),%%mm0\n\t"
- /*s=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*x x x x 7 6 5 4*/
- "movd (%[pix],%[ystride]),%%mm1\n\t"
- /*x x x x B A 9 8*/
- "movd (%[pix],%[ystride],2),%%mm2\n\t"
- /*x x x x F E D C*/
- "movd (%[pix],%[s]),%%mm3\n\t"
- /*mm0=7 3 6 2 5 1 4 0*/
- "punpcklbw %%mm1,%%mm0\n\t"
- /*mm2=F B E A D 9 C 8*/
- "punpcklbw %%mm3,%%mm2\n\t"
- /*mm1=7 3 6 2 5 1 4 0*/
- "movq %%mm0,%%mm1\n\t"
- /*mm0=F B 7 3 E A 6 2*/
- "punpckhwd %%mm2,%%mm0\n\t"
- /*mm1=D 9 5 1 C 8 4 0*/
- "punpcklwd %%mm2,%%mm1\n\t"
- "pxor %%mm7,%%mm7\n\t"
- /*mm5=D 9 5 1 C 8 4 0*/
- "movq %%mm1,%%mm5\n\t"
- /*mm1=x C x 8 x 4 x 0==pix[0]*/
- "punpcklbw %%mm7,%%mm1\n\t"
- /*mm5=x D x 9 x 5 x 1==pix[1]*/
- "punpckhbw %%mm7,%%mm5\n\t"
- /*mm3=F B 7 3 E A 6 2*/
- "movq %%mm0,%%mm3\n\t"
- /*mm0=x E x A x 6 x 2==pix[2]*/
- "punpcklbw %%mm7,%%mm0\n\t"
- /*mm3=x F x B x 7 x 3==pix[3]*/
- "punpckhbw %%mm7,%%mm3\n\t"
- /*mm1=mm1-mm3==pix[0]-pix[3]*/
- "psubw %%mm3,%%mm1\n\t"
- /*Save a copy of pix[2] for later.*/
- "movq %%mm0,%%mm4\n\t"
- /*mm2=3 3 3 3
- mm0=mm0-mm5==pix[2]-pix[1]*/
- "pcmpeqw %%mm2,%%mm2\n\t"
- "psubw %%mm5,%%mm0\n\t"
- "psrlw $14,%%mm2\n\t"
- /*Scale by 3.*/
- "pmullw %%mm2,%%mm0\n\t"
- /*mm2=4 4 4 4
- f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
- "psrlw $1,%%mm2\n\t"
- "paddw %%mm1,%%mm0\n\t"
- "psllw $2,%%mm2\n\t"
- /*Add 4.*/
- "paddw %%mm2,%%mm0\n\t"
- /*"Divide" by 8, producing the residuals R_i.*/
- "psraw $3,%%mm0\n\t"
- /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
- /*mm6=L L L L*/
- "movq (%[ll]),%%mm6\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm0,%%mm1\n\t"
- "pxor %%mm2,%%mm2\n\t"
- "movq %%mm6,%%mm3\n\t"
- "psubw %%mm6,%%mm2\n\t"
- "psllw $1,%%mm3\n\t"
- "psllw $1,%%mm2\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-2L -2L -2L -2L*/
- /*mm3==2L 2L 2L 2L*/
- "pcmpgtw %%mm0,%%mm3\n\t"
- "pcmpgtw %%mm2,%%mm1\n\t"
- "pand %%mm3,%%mm0\n\t"
- "pand %%mm1,%%mm0\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm2\n\t"
- "movq %%mm0,%%mm1\n\t"
- "movq %%mm6,%%mm3\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-L -L -L -L*/
- /*mm6==L L L L*/
- /*mm2=-L>R_i?FF:00*/
- "pcmpgtw %%mm0,%%mm2\n\t"
- /*mm1=R_i>L?FF:00*/
- "pcmpgtw %%mm6,%%mm1\n\t"
- /*mm3=2L 2L 2L 2L*/
- "psllw $1,%%mm3\n\t"
- /*mm6=2L 2L 2L 2L*/
- "psllw $1,%%mm6\n\t"
- /*mm3=R_i>L?2L:0*/
- "pand %%mm1,%%mm3\n\t"
- /*mm6=-L>R_i?2L:0*/
- "pand %%mm2,%%mm6\n\t"
- /*mm0=R_i>L?R_i-2L:R_i*/
- "psubw %%mm3,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L*/
- "por %%mm2,%%mm1\n\t"
- /*mm0=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm6,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L?R_i':0*/
- "pand %%mm0,%%mm1\n\t"
- /*mm0=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*_pix[1]+=R_i;*/
- "paddw %%mm0,%%mm5\n\t"
- /*_pix[2]-=R_i;*/
- "psubw %%mm0,%%mm4\n\t"
- /*mm5=x x x x D 9 5 1*/
- "packuswb %%mm7,%%mm5\n\t"
- /*mm4=x x x x E A 6 2*/
- "packuswb %%mm7,%%mm4\n\t"
- /*mm5=E D A 9 6 5 2 1*/
- "punpcklbw %%mm4,%%mm5\n\t"
- /*d=6 5 2 1*/
- "movd %%mm5,%[d]\n\t"
- "movw %w[d],1(%[pix])\n\t"
- /*Why is there such a big stall here?*/
- "psrlq $32,%%mm5\n\t"
- "shr $16,%[d]\n\t"
- "movw %w[d],1(%[pix],%[ystride])\n\t"
- /*d=E D A 9*/
- "movd %%mm5,%[d]\n\t"
- "movw %w[d],1(%[pix],%[ystride],2)\n\t"
- "shr $16,%[d]\n\t"
- "movw %w[d],1(%[pix],%[s])\n\t"
- :[s]"=&r"(s),[d]"=&r"(d),
- [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
- :
- :"memory"
- );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _pix-=2;
- loop_filter_h4(_pix,_ystride,_ll);
- loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-/*We copy the whole function because the MMX routines will be inlined 4 times,
- and we can do just a single emms call at the end this way.
- We also do not use the _bv lookup table, instead computing the values that
- would lie in it on the fly.*/
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
- The filter may be run on the bottom edge, affecting pixels in the next row of
- fragments, so this row also needs to be available.
- _bv: The bounding values array.
- _refi: The index of the frame buffer to filter.
- _pli: The color plane to filter.
- _fragy0: The Y coordinate of the first fragment row to filter.
- _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
- ogg_int16_t __attribute__((aligned(8))) ll[4];
- th_img_plane *iplane;
- oc_fragment_plane *fplane;
- oc_fragment *frag_top;
- oc_fragment *frag0;
- oc_fragment *frag;
- oc_fragment *frag_end;
- oc_fragment *frag0_end;
- oc_fragment *frag_bot;
- ll[0]=ll[1]=ll[2]=ll[3]=
- (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
- iplane=_state->ref_frame_bufs[_refi]+_pli;
- fplane=_state->fplanes+_pli;
- /*The following loops are constructed somewhat non-intuitively on purpose.
- The main idea is: if a block boundary has at least one coded fragment on
- it, the filter is applied to it.
- However, the order that the filters are applied in matters, and VP3 chose
- the somewhat strange ordering used below.*/
- frag_top=_state->frags+fplane->froffset;
- frag0=frag_top+_fragy0*fplane->nhfrags;
- frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
- frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
- while(frag0nhfrags;
- while(fragcoded){
- if(frag>frag0){
- loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
- }
- if(frag0>frag_top){
- loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
- }
- if(frag+1coded){
- loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
- }
- if(frag+fplane->nhfragsnhfrags)->coded){
- loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
- iplane->stride,ll);
- }
- }
- frag++;
- }
- frag0+=fplane->nhfrags;
- }
- /*This needs to be removed when decode specific functions are implemented:*/
- __asm__ __volatile__("emms\n\t");
-}
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86/x86int.h b/Engine/lib/libtheora/lib/dec/x86/x86int.h
deleted file mode 100644
index 05f9c57c1..000000000
--- a/Engine/lib/libtheora/lib/dec/x86/x86int.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_H)
-# define _x86_x86int_H (1)
-# include "../../internal.h"
-
-void oc_state_vtable_init_x86(oc_theora_state *_state);
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue);
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-void oc_restore_fpu_mmx(void);
-void oc_idct8x8_mmx(ogg_int16_t _y[64]);
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
-void oc_fill_idct_constants_mmx(void);
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c
deleted file mode 100644
index e87e0640d..000000000
--- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-#include "../../internal.h"
-
-/* ------------------------------------------------------------------------
- MMX reconstruction fragment routines for Visual Studio.
- Tested with VS2005. Should compile for VS2003 and VC6 as well.
-
- Initial implementation 2007 by Nils Pipenbrinck.
- ---------------------------------------------------------------------*/
-
-#if defined(USE_ASM)
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter reconstruction step with 8 iterations
- unrolled. The iteration for each instruction is noted by the #id in the
- comments (in case you want to reconstruct it)
- --------------------------------------------------------------------- */
- _asm{
- mov edi, [_residue] /* load residue ptr */
- mov eax, 0x00800080 /* generate constant */
- mov ebx, [_dst_ystride] /* load dst-stride */
- mov edx, [_dst] /* load dest pointer */
-
- /* unrolled loop begins here */
-
- movd mm0, eax /* load constant */
- movq mm1, [edi+ 8*0] /* #1 load low residue */
- movq mm2, [edi+ 8*1] /* #1 load high residue */
- punpckldq mm0, mm0 /* build constant */
- movq mm3, [edi+ 8*2] /* #2 load low residue */
- movq mm4, [edi+ 8*3] /* #2 load high residue */
- movq mm5, [edi+ 8*4] /* #3 load low residue */
- movq mm6, [edi+ 8*5] /* #3 load high residue */
- paddsw mm1, mm0 /* #1 bias low residue */
- paddsw mm2, mm0 /* #1 bias high residue */
- packuswb mm1, mm2 /* #1 pack to byte */
- paddsw mm3, mm0 /* #2 bias low residue */
- paddsw mm4, mm0 /* #2 bias high residue */
- packuswb mm3, mm4 /* #2 pack to byte */
- paddsw mm5, mm0 /* #3 bias low residue */
- paddsw mm6, mm0 /* #3 bias high residue */
- packuswb mm5, mm6 /* #3 pack to byte */
- movq [edx], mm1 /* #1 write row */
- movq [edx + ebx], mm3 /* #2 write row */
- movq [edx + ebx*2], mm5 /* #3 write row */
- movq mm1, [edi+ 8*6] /* #4 load low residue */
- lea ecx, [ebx + ebx*2] /* make dst_ystride * 3 */
- movq mm2, [edi+ 8*7] /* #4 load high residue */
- movq mm3, [edi+ 8*8] /* #5 load low residue */
- lea esi, [ebx*4 + ebx] /* make dst_ystride * 5 */
- movq mm4, [edi+ 8*9] /* #5 load high residue */
- movq mm5, [edi+ 8*10] /* #6 load low residue */
- lea eax, [ecx*2 + ebx] /* make dst_ystride * 7 */
- movq mm6, [edi+ 8*11] /* #6 load high residue */
- paddsw mm1, mm0 /* #4 bias low residue */
- paddsw mm2, mm0 /* #4 bias high residue */
- packuswb mm1, mm2 /* #4 pack to byte */
- paddsw mm3, mm0 /* #5 bias low residue */
- paddsw mm4, mm0 /* #5 bias high residue */
- packuswb mm3, mm4 /* #5 pack to byte */
- paddsw mm5, mm0 /* #6 bias low residue */
- paddsw mm6, mm0 /* #6 bias high residue */
- packuswb mm5, mm6 /* #6 pack to byte */
- movq [edx + ecx], mm1 /* #4 write row */
- movq [edx + ebx*4], mm3 /* #5 write row */
- movq [edx + esi], mm5 /* #6 write row */
- movq mm1, [edi+ 8*12] /* #7 load low residue */
- movq mm2, [edi+ 8*13] /* #7 load high residue */
- movq mm3, [edi+ 8*14] /* #8 load low residue */
- movq mm4, [edi+ 8*15] /* #8 load high residue */
- paddsw mm1, mm0 /* #7 bias low residue */
- paddsw mm2, mm0 /* #7 bias high residue */
- packuswb mm1, mm2 /* #7 pack to byte */
- paddsw mm3, mm0 /* #8 bias low residue */
- paddsw mm4, mm0 /* #8 bias high residue */
- packuswb mm3, mm4 /* #8 pack to byte */
- movq [edx + ecx*2], mm1 /* #7 write row */
- movq [edx + eax], mm3 /* #8 write row */
- }
-}
-
-
-
-void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
- const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter reconstruction step with two iterations
- running in parallel to hide some load-latencies and break the dependency
- chains. The iteration for each instruction is noted by the #id in the
- comments (in case you want to reconstruct it)
- --------------------------------------------------------------------- */
- _asm{
- pxor mm0, mm0 /* generate constant 0 */
- mov esi, [_src]
- mov edi, [_residue]
- mov eax, [_src_ystride]
- mov edx, [_dst]
- mov ebx, [_dst_ystride]
- mov ecx, 4
-
- align 16
-
-nextchunk:
- movq mm3, [esi] /* #1 load source */
- movq mm1, [edi+0] /* #1 load residium low */
- movq mm2, [edi+8] /* #1 load residium high */
- movq mm7, [esi+eax] /* #2 load source */
- movq mm4, mm3 /* #1 get copy of src */
- movq mm5, [edi+16] /* #2 load residium low */
- punpckhbw mm4, mm0 /* #1 expand high source */
- movq mm6, [edi+24] /* #2 load residium high */
- punpcklbw mm3, mm0 /* #1 expand low source */
- paddsw mm4, mm2 /* #1 add residium high */
- movq mm2, mm7 /* #2 get copy of src */
- paddsw mm3, mm1 /* #1 add residium low */
- punpckhbw mm2, mm0 /* #2 expand high source */
- packuswb mm3, mm4 /* #1 final row pixels */
- punpcklbw mm7, mm0 /* #2 expand low source */
- movq [edx], mm3 /* #1 write row */
- paddsw mm2, mm6 /* #2 add residium high */
- add edi, 32 /* residue += 4 */
- paddsw mm7, mm5 /* #2 add residium low */
- sub ecx, 1 /* update loop counter */
- packuswb mm7, mm2 /* #2 final row */
- lea esi, [esi+eax*2] /* src += stride * 2 */
- movq [edx + ebx], mm7 /* #2 write row */
- lea edx, [edx+ebx*2] /* dst += stride * 2 */
- jne nextchunk
- }
-}
-
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
- const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter2 reconstruction step.The building of the
- average is done with a bit-twiddeling trick to avoid excessive register
- copy work during byte to word conversion.
-
- average = (a & b) + (((a ^ b) & 0xfe) >> 1);
-
- (shown for a single byte; it's done with 8 of them at a time)
-
- Slightly faster than the obvious method using add and shift, but not
- earthshaking improvement either.
-
- If anyone comes up with a way that produces bit-identical outputs
- using the pavgb instruction let me know and I'll do the 3dnow codepath.
- --------------------------------------------------------------------- */
- _asm{
- mov eax, 0xfefefefe
- mov esi, [_src1]
- mov edi, [_src2]
- movd mm1, eax
- mov ebx, [_residue]
- mov edx, [_dst]
- mov eax, [_dst_ystride]
- punpckldq mm1, mm1 /* replicate lsb32 */
- mov ecx, 8 /* init loop counter */
- pxor mm0, mm0 /* constant zero */
- sub edx, eax /* dst -= dst_stride */
-
- align 16
-
-nextrow:
- movq mm2, [esi] /* load source1 */
- movq mm3, [edi] /* load source2 */
- movq mm5, [ebx + 0] /* load lower residue */
- movq mm6, [ebx + 8] /* load higer residue */
- add esi, _src1_ystride /* src1 += src1_stride */
- add edi, _src2_ystride /* src2 += src1_stride */
- movq mm4, mm2 /* get copy of source1 */
- pand mm2, mm3 /* s1 & s2 (avg part) */
- pxor mm3, mm4 /* s1 ^ s2 (avg part) */
- add ebx, 16 /* residue++ */
- pand mm3, mm1 /* mask out low bits */
- psrlq mm3, 1 /* shift xor avg-part */
- paddd mm3, mm2 /* build final average */
- add edx, eax /* dst += dst_stride */
- movq mm2, mm3 /* get copy of average */
- punpckhbw mm3, mm0 /* average high */
- punpcklbw mm2, mm0 /* average low */
- paddsw mm3, mm6 /* high + residue */
- paddsw mm2, mm5 /* low + residue */
- sub ecx, 1 /* update loop counter */
- packuswb mm2, mm3 /* pack and saturate */
- movq [edx], mm2 /* write row */
- jne nextrow
- }
-}
-
-void oc_restore_fpu_mmx(void){
- _asm { emms }
-}
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c
deleted file mode 100644
index 2c171594f..000000000
--- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c
+++ /dev/null
@@ -1,1006 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
- MMX based IDCT for the theora codec.
-
- Originally written by Rudolf Marek, based on code from On2's VP3.
- Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
- ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include
-#include "../dct.h"
-#include "../idct.h"
-#include "x86int.h"
-
-/*A table of constants used by the MMX routines.*/
-static const __declspec(align(16)) ogg_uint16_t
- OC_IDCT_CONSTS[(7+1)*4]={
- (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
- (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
- (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
- (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
- (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
- (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
- (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
- (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
- (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
- (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
- (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
- (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
- (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
- (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
- 8, 8, 8, 8
-};
-
-
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
- _asm {
- mov edx, [_y]
- mov eax, offset OC_IDCT_CONSTS
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 18H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 38H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 28H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 08H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 20H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 10H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 08H], mm4
- punpckhwd mm0, mm7
- movq [edx + 18H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx]
- punpckldq mm1, mm0
- movq mm5, [edx + 10H]
- movq mm0, mm4
- movq [edx + 38H], mm6
- punpcklwd mm0, mm5
- movq [edx + 28H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx], mm0
- punpckhwd mm5, mm3
- movq [edx + 10H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 30H], mm4
- movq [edx + 20H], mm2
- movq mm2, [edx + 70H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 50H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 60H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 50H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 60H], mm6
- movq mm2, mm0
- movq mm6, [edx + 40H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 50H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 60H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 50H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx + 40H], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 48H], mm4
- punpckhwd mm0, mm7
- movq [edx + 58H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx + 40H]
- punpckldq mm1, mm0
- movq mm5, [edx + 50H]
- movq mm0, mm4
- movq [edx + 78H], mm6
- punpcklwd mm0, mm5
- movq [edx + 68H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx + 40H], mm0
- punpckhwd mm5, mm3
- movq [edx + 50H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 70H], mm4
- movq [edx + 60H], mm2
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 50H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 70H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 60H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 40H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 20H]
- paddw mm7, mm7
- movq [edx + 20H], mm2
- paddw mm7, mm4
- movq [edx + 10H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 40H], mm4
- psraw mm5, 4
- movq [edx + 30H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 60H], mm6
- psraw mm0, 4
- movq [edx + 50H], mm5
- movq [edx + 70H], mm7
- movq [edx], mm0
- movq mm2, [edx + 38H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 18H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 28H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 18H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 28H], mm6
- movq mm2, mm0
- movq mm6, [edx + 08H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 18H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 28H]
- paddw mm7, mm7
- movq [edx + 28H], mm2
- paddw mm7, mm4
- movq [edx + 18H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 48H], mm4
- psraw mm5, 4
- movq [edx + 38H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 68H], mm6
- psraw mm0, 4
- movq [edx + 58H], mm5
- movq [edx + 78H], mm7
- movq [edx + 08H], mm0
- /* emms */
- }
-}
-
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]){
- _asm {
- mov edx, [_y]
- mov eax, offset OC_IDCT_CONSTS
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 18H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 38H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 28H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 08H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 20H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 10H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 08H], mm4
- punpckhwd mm0, mm7
- movq [edx + 18H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx]
- punpckldq mm1, mm0
- movq mm5, [edx + 10H]
- movq mm0, mm4
- movq [edx + 38H], mm6
- punpcklwd mm0, mm5
- movq [edx + 28H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx], mm0
- punpckhwd mm5, mm3
- movq [edx + 10H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 30H], mm4
- movq [edx + 20H], mm2
- movq mm2, [edx + 70H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 50H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 60H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 50H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 60H], mm6
- movq mm2, mm0
- movq mm6, [edx + 40H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 50H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 60H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 50H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx + 40H], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 48H], mm4
- punpckhwd mm0, mm7
- movq [edx + 58H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx + 40H]
- punpckldq mm1, mm0
- movq mm5, [edx + 50H]
- movq mm0, mm4
- movq [edx + 78H], mm6
- punpcklwd mm0, mm5
- movq [edx + 68H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx + 40H], mm0
- punpckhwd mm5, mm3
- movq [edx + 50H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 70H], mm4
- movq [edx + 60H], mm2
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 50H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 70H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 60H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 40H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 20H]
- paddw mm7, mm7
- movq [edx + 20H], mm2
- paddw mm7, mm4
- movq [edx + 10H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 40H], mm4
- psraw mm5, 4
- movq [edx + 30H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 60H], mm6
- psraw mm0, 4
- movq [edx + 50H], mm5
- movq [edx + 70H], mm7
- movq [edx], mm0
- movq mm2, [edx + 38H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 18H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 28H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 18H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 28H], mm6
- movq mm2, mm0
- movq mm6, [edx + 08H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 18H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 28H]
- paddw mm7, mm7
- movq [edx + 28H], mm2
- paddw mm7, mm4
- movq [edx + 18H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 48H], mm4
- psraw mm5, 4
- movq [edx + 38H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 68H], mm6
- psraw mm0, 4
- movq [edx + 58H], mm5
- movq [edx + 78H], mm7
- movq [edx + 08H], mm0
- /* emms */
- }
-}
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c
deleted file mode 100644
index 62d06dc89..000000000
--- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
- MMX based loop filter for the theora codec.
-
- Originally written by Rudolf Marek, based on code from On2's VP3.
- Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
- Note: I can't test these since my example files never get into the
- loop filters, but the code has been converted semi-automatic from
- the GCC sources, so it ought to work.
- ---------------------------------------------------------------------*/
-#include "../../internal.h"
-#include "x86int.h"
-#include
-
-#if defined(USE_ASM)
-
-
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _asm {
- mov eax, [_pix]
- mov edx, [_ystride]
- mov ebx, [_ll]
-
- /* _pix -= ystride */
- sub eax, edx
- /* mm0=0 */
- pxor mm0, mm0
- /* _pix -= ystride */
- sub eax, edx
- /* esi=_ystride*3 */
- lea esi, [edx + edx*2]
-
- /* mm7=_pix[0...8]*/
- movq mm7, [eax]
- /* mm4=_pix[0...8+_ystride*3]*/
- movq mm4, [eax + esi]
- /* mm6=_pix[0...8]*/
- movq mm6, mm7
- /* Expand unsigned _pix[0...3] to 16 bits.*/
- punpcklbw mm6, mm0
- movq mm5, mm4
- /* Expand unsigned _pix[4...7] to 16 bits.*/
- punpckhbw mm7, mm0
- punpcklbw mm4, mm0
- /* Expand other arrays too.*/
- punpckhbw mm5, mm0
- /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/
- psubw mm6, mm4
- psubw mm7, mm5
- /*mm5=mm4=_pix[0...7+_ystride]*/
- movq mm4, [eax + edx]
- /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/
- movq mm2, [eax + edx*2]
- movq mm5, mm4
- movq mm3, mm2
- movq mm1, mm2
- /*Expand these arrays.*/
- punpckhbw mm5, mm0
- punpcklbw mm4, mm0
- punpckhbw mm3, mm0
- punpcklbw mm2, mm0
- pcmpeqw mm0, mm0
- /*mm0=3 3 3 3
- mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
- psubw mm3, mm5
- psrlw mm0, 14
- psubw mm2, mm4
- /*Scale by 3.*/
- pmullw mm3, mm0
- pmullw mm2, mm0
- /*mm0=4 4 4 4
- f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
- 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
- psrlw mm0, 1
- paddw mm3, mm7
- psllw mm0, 2
- paddw mm2, mm6
- /*Add 4.*/
- paddw mm3, mm0
- paddw mm2, mm0
- /*"Divide" by 8.*/
- psraw mm3, 3
- psraw mm2, 3
- /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
- /*Free up mm5.*/
- packuswb mm4, mm5
- /*mm0=L L L L*/
- movq mm0, [ebx]
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- movq mm5, mm2
- pxor mm6, mm6
- movq mm7, mm0
- psubw mm6, mm0
- psllw mm7, 1
- psllw mm6, 1
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- pcmpgtw mm7, mm2
- pcmpgtw mm5, mm6
- pand mm2, mm7
- movq mm7, mm0
- pand mm2, mm5
- psllw mm7, 1
- movq mm5, mm3
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- pcmpgtw mm7, mm3
- pcmpgtw mm5, mm6
- pand mm3, mm7
- movq mm7, mm0
- pand mm3, mm5
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- psraw mm6, 1
- movq mm5, mm2
- psllw mm7, 1
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm5=R_i>L?FF:00*/
- pcmpgtw mm5, mm0
- /*mm6=-L>R_i?FF:00*/
- pcmpgtw mm6, mm2
- /*mm7=R_i>L?2L:0*/
- pand mm7, mm5
- /*mm2=R_i>L?R_i-2L:R_i*/
- psubw mm2, mm7
- movq mm7, mm0
- /*mm5=-L>R_i||R_i>L*/
- por mm5, mm6
- psllw mm7, 1
- /*mm7=-L>R_i?2L:0*/
- pand mm7, mm6
- pxor mm6, mm6
- /*mm2=-L>R_i?R_i+2L:R_i*/
- paddw mm2, mm7
- psubw mm6, mm0
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- pand mm5, mm2
- movq mm7, mm0
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- psubw mm2, mm5
- psllw mm7, 1
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- psubw mm2, mm5
- movq mm5, mm3
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm6=-L>R_i?FF:00*/
- pcmpgtw mm6, mm3
- /*mm5=R_i>L?FF:00*/
- pcmpgtw mm5, mm0
- /*mm7=R_i>L?2L:0*/
- pand mm7, mm5
- /*mm2=R_i>L?R_i-2L:R_i*/
- psubw mm3, mm7
- psllw mm0, 1
- /*mm5=-L>R_i||R_i>L*/
- por mm5, mm6
- /*mm0=-L>R_i?2L:0*/
- pand mm0, mm6
- /*mm3=-L>R_i?R_i+2L:R_i*/
- paddw mm3, mm0
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- pand mm5, mm3
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- psubw mm3, mm5
- /*mm3=-L>R_i||R_i>L?-R_i':R_i*/
- psubw mm3, mm5
- /*Unfortunately, there's no unsigned byte+signed byte with unsigned
- saturation op code, so we have to promote things back 16 bits.*/
- pxor mm0, mm0
- movq mm5, mm4
- punpcklbw mm4, mm0
- punpckhbw mm5, mm0
- movq mm6, mm1
- punpcklbw mm1, mm0
- punpckhbw mm6, mm0
- /*_pix[0...8+_ystride]+=R_i*/
- paddw mm4, mm2
- paddw mm5, mm3
- /*_pix[0...8+_ystride*2]-=R_i*/
- psubw mm1, mm2
- psubw mm6, mm3
- packuswb mm4, mm5
- packuswb mm1, mm6
- /*Write it back out.*/
- movq [eax + edx], mm4
- movq [eax + edx*2], mm1
- }
-}
-
-/*This code implements the bulk of loop_filter_h().
- Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
- four p0's to one register we must transpose the values in four mmx regs.
- When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
- const ogg_int16_t *_ll){
- /* todo: merge the comments from the GCC sources */
- _asm {
- mov ecx, [_pix]
- mov edx, [_ystride]
- mov eax, [_ll]
- /*esi=_ystride*3*/
- lea esi, [edx + edx*2]
-
- movd mm0, dword ptr [ecx]
- movd mm1, dword ptr [ecx + edx]
- movd mm2, dword ptr [ecx + edx*2]
- movd mm3, dword ptr [ecx + esi]
- punpcklbw mm0, mm1
- punpcklbw mm2, mm3
- movq mm1, mm0
- punpckhwd mm0, mm2
- punpcklwd mm1, mm2
- pxor mm7, mm7
- movq mm5, mm1
- punpcklbw mm1, mm7
- punpckhbw mm5, mm7
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
- psubw mm1, mm3
- movq mm4, mm0
- pcmpeqw mm2, mm2
- psubw mm0, mm5
- psrlw mm2, 14
- pmullw mm0, mm2
- psrlw mm2, 1
- paddw mm0, mm1
- psllw mm2, 2
- paddw mm0, mm2
- psraw mm0, 3
- movq mm6, qword ptr [eax]
- movq mm1, mm0
- pxor mm2, mm2
- movq mm3, mm6
- psubw mm2, mm6
- psllw mm3, 1
- psllw mm2, 1
- pcmpgtw mm3, mm0
- pcmpgtw mm1, mm2
- pand mm0, mm3
- pand mm0, mm1
- psraw mm2, 1
- movq mm1, mm0
- movq mm3, mm6
- pcmpgtw mm2, mm0
- pcmpgtw mm1, mm6
- psllw mm3, 1
- psllw mm6, 1
- pand mm3, mm1
- pand mm6, mm2
- psubw mm0, mm3
- por mm1, mm2
- paddw mm0, mm6
- pand mm1, mm0
- psubw mm0, mm1
- psubw mm0, mm1
- paddw mm5, mm0
- psubw mm4, mm0
- packuswb mm5, mm7
- packuswb mm4, mm7
- punpcklbw mm5, mm4
- movd edi, mm5
- mov word ptr [ecx + 01H], di
- psrlq mm5, 32
- shr edi, 16
- mov word ptr [ecx + edx + 01H], di
- movd edi, mm5
- mov word ptr [ecx + edx*2 + 01H], di
- shr edi, 16
- mov word ptr [ecx + esi + 01H], di
- }
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _pix-=2;
- loop_filter_h4(_pix,_ystride,_ll);
- loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-
-/*We copy the whole function because the MMX routines will be inlined 4 times,
- and we can do just a single emms call at the end this way.
- We also do not use the _bv lookup table, instead computing the values that
- would lie in it on the fly.*/
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
- The filter may be run on the bottom edge, affecting pixels in the next row of
- fragments, so this row also needs to be available.
- _bv: The bounding values array.
- _refi: The index of the frame buffer to filter.
- _pli: The color plane to filter.
- _fragy0: The Y coordinate of the first fragment row to filter.
- _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
- ogg_int16_t __declspec(align(8)) ll[4];
- th_img_plane *iplane;
- oc_fragment_plane *fplane;
- oc_fragment *frag_top;
- oc_fragment *frag0;
- oc_fragment *frag;
- oc_fragment *frag_end;
- oc_fragment *frag0_end;
- oc_fragment *frag_bot;
- ll[0]=ll[1]=ll[2]=ll[3]=
- (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
- iplane=_state->ref_frame_bufs[_refi]+_pli;
- fplane=_state->fplanes+_pli;
- /*The following loops are constructed somewhat non-intuitively on purpose.
- The main idea is: if a block boundary has at least one coded fragment on
- it, the filter is applied to it.
- However, the order that the filters are applied in matters, and VP3 chose
- the somewhat strange ordering used below.*/
- frag_top=_state->frags+fplane->froffset;
- frag0=frag_top+_fragy0*fplane->nhfrags;
- frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
- frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
- while(frag0nhfrags;
- while(fragcoded){
- if(frag>frag0){
- loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
- }
- if(frag0>frag_top){
- loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
- }
- if(frag+1coded){
- loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
- }
- if(frag+fplane->nhfragsnhfrags)->coded){
- loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
- iplane->stride,ll);
- }
- }
- frag++;
- }
- frag0+=fplane->nhfrags;
- }
-
- /*This needs to be removed when decode specific functions are implemented:*/
- _mm_empty();
-}
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c
deleted file mode 100644
index 526ef53f3..000000000
--- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/* ------------------------------------------------------------------------
- MMX acceleration of complete fragment reconstruction algorithm.
- Originally written by Rudolf Marek.
-
- Conversion to MSC intrinsics by Nils Pipenbrinck.
- ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include "../../internal.h"
-#include "../idct.h"
-#include "x86int.h"
-#include
-
-static const unsigned char OC_FZIG_ZAGMMX[64]=
-{
- 0, 8, 1, 2, 9,16,24,17,
- 10, 3,32,11,18,25, 4,12,
- 5,26,19,40,33,34,41,48,
- 27, 6,13,20,28,21,14, 7,
- 56,49,42,35,43,50,57,36,
- 15,22,29,30,23,44,37,58,
- 51,59,38,45,52,31,60,53,
- 46,39,47,54,61,62,55,63
-};
-
-/* Fill a block with value */
-static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){
- __m64 t = _value;
- _dst[0] = t; _dst[1] = t; _dst[2] = t; _dst[3] = t;
- _dst[4] = t; _dst[5] = t; _dst[6] = t; _dst[7] = t;
- _dst[8] = t; _dst[9] = t; _dst[10] = t; _dst[11] = t;
- _dst[12] = t; _dst[13] = t; _dst[14] = t; _dst[15] = t;
-}
-
-/* copy a block of 8 byte elements using different strides */
-static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride,
- unsigned char * _src, int _src_ystride){
- __m64 a,b,c,d,e,f,g,h;
- a = *(__m64*)(_src + 0 * _src_ystride);
- b = *(__m64*)(_src + 1 * _src_ystride);
- c = *(__m64*)(_src + 2 * _src_ystride);
- d = *(__m64*)(_src + 3 * _src_ystride);
- e = *(__m64*)(_src + 4 * _src_ystride);
- f = *(__m64*)(_src + 5 * _src_ystride);
- g = *(__m64*)(_src + 6 * _src_ystride);
- h = *(__m64*)(_src + 7 * _src_ystride);
- *(__m64*)(_dst + 0 * _dst_ystride) = a;
- *(__m64*)(_dst + 1 * _dst_ystride) = b;
- *(__m64*)(_dst + 2 * _dst_ystride) = c;
- *(__m64*)(_dst + 3 * _dst_ystride) = d;
- *(__m64*)(_dst + 4 * _dst_ystride) = e;
- *(__m64*)(_dst + 5 * _dst_ystride) = f;
- *(__m64*)(_dst + 6 * _dst_ystride) = g;
- *(__m64*)(_dst + 7 * _dst_ystride) = h;
-}
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
- ogg_int16_t __declspec(align(16)) res_buf[64];
- int dst_framei;
- int dst_ystride;
- int zzi;
- /*_last_zzi is subtly different from an actual count of the number of
- coefficients we decoded for this block.
- It contains the value of zzi BEFORE the final token in the block was
- decoded.
- In most cases this is an EOB token (the continuation of an EOB run from a
- previous block counts), and so this is the same as the coefficient count.
- However, in the case that the last token was NOT an EOB token, but filled
- the block up with exactly 64 coefficients, _last_zzi will be less than 64.
- Provided the last token was not a pure zero run, the minimum value it can
- be is 46, and so that doesn't affect any of the cases in this routine.
- However, if the last token WAS a pure zero run of length 63, then _last_zzi
- will be 1 while the number of coefficients decoded is 64.
- Thus, we will trigger the following special case, where the real
- coefficient count would not.
- Note also that a zero run of length 64 will give _last_zzi a value of 0,
- but we still process the DC coefficient, which might have a non-zero value
- due to DC prediction.
- Although convoluted, this is arguably the correct behavior: it allows us to
- dequantize fewer coefficients and use a smaller transform when the block
- ends with a long zero run instead of a normal EOB token.
- It could be smarter... multiple separate zero runs at the end of a block
- will fool it, but an encoder that generates these really deserves what it
- gets.
- Needless to say we inherited this approach from VP3.*/
- /*Special case only having a DC component.*/
- if(_last_zzi<2){
- __m64 p;
- /*Why is the iquant product rounded in this case and no others? Who knows.*/
- p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
- /* broadcast 16 bits into all 4 mmx subregisters */
- p = _m_punpcklwd (p,p);
- p = _m_punpckldq (p,p);
- loc_fill_mmx_value ((__m64 *)res_buf, p);
- }
- else{
- /*Then, fill in the remainder of the coefficients with 0's, and perform
- the iDCT.*/
- /*First zero the buffer.*/
- /*On K7, etc., this could be replaced with movntq and sfence.*/
- loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64());
-
- res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
- /*This is planned to be rewritten in MMX.*/
- for(zzi=1;zzi<_ncoefs;zzi++)
- {
- int ci;
- ci=OC_FZIG_ZAG[zzi];
- res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
- _ac_iquant[ci]);
- }
-
- if(_last_zzi<10){
- oc_idct8x8_10_mmx(res_buf);
- }
- else {
- oc_idct8x8_mmx(res_buf);
- }
- }
- /*Fill in the target buffer.*/
- dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
- /*For now ystride values in all ref frames assumed to be equal.*/
- if(_frag->mbmode==OC_MODE_INTRA){
- oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
- }
- else{
- int ref_framei;
- int ref_ystride;
- int mvoffsets[2];
- ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
- ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
- if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],
- _frag->mv[1],ref_ystride,_pli)>1){
- oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
- _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
- }
- else{
- oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
- }
- }
-
- _mm_empty();
-}
-
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
- const int *fragi;
- const int *fragi_end;
- int dst_framei;
- int dst_ystride;
- int src_framei;
- int src_ystride;
- dst_framei=_state->ref_frame_idx[_dst_frame];
- src_framei=_state->ref_frame_idx[_src_frame];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
- src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
- fragi_end=_fragis+_nfragis;
- for(fragi=_fragis;fragifrags+*fragi;
- loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride,
- frag->buffer[src_framei], src_ystride);
- }
- _m_empty();
-}
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h b/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h
deleted file mode 100644
index be5016100..000000000
--- a/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_vc_H)
-# define _x86_x86int_vc_H (1)
-# include "../../internal.h"
-
-void oc_state_vtable_init_x86(oc_theora_state *_state);
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue);
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-
-void oc_restore_fpu_mmx(void);
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]);
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
-
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
-
-#endif
diff --git a/Engine/lib/libtheora/lib/dec/decapiwrapper.c b/Engine/lib/libtheora/lib/decapiwrapper.c
similarity index 95%
rename from Engine/lib/libtheora/lib/dec/decapiwrapper.c
rename to Engine/lib/libtheora/lib/decapiwrapper.c
index bceec6c26..12ea475d1 100644
--- a/Engine/lib/libtheora/lib/dec/decapiwrapper.c
+++ b/Engine/lib/libtheora/lib/decapiwrapper.c
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
@@ -19,6 +19,7 @@
#include
#include
#include "apiwrapper.h"
+#include "decint.h"
#include "theora/theoradec.h"
static void th_dec_api_clear(th_api_wrapper *_api){
@@ -47,7 +48,7 @@ static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
}
-static const oc_state_dispatch_vtbl OC_DEC_DISPATCH_VTBL={
+static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
(oc_state_clear_func)theora_decode_clear,
(oc_state_control_func)theora_decode_control,
(oc_state_granule_frame_func)theora_decode_granule_frame,
@@ -95,6 +96,7 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){
This avoids having to figure out whether or not we need to free the info
struct in either theora_info_clear() or theora_clear().*/
apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+ if(apiinfo==NULL)return OC_FAULT;
/*Make our own copy of the info struct, since its lifetime should be
independent of the one we were passed in.*/
*&apiinfo->info=*_ci;
@@ -130,6 +132,7 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
theora_info struct like the ones that are used in a theora_state struct.*/
if(api==NULL){
_ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+ if(_ci->codec_setup==NULL)return OC_FAULT;
api=(th_api_wrapper *)_ci->codec_setup;
api->clear=(oc_setup_clear_func)th_dec_api_clear;
}
@@ -167,12 +170,14 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
th_api_wrapper *api;
+ th_dec_ctx *decode;
th_ycbcr_buffer buf;
int ret;
if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
- if(!api->decode)return OC_FAULT;
- ret=th_decode_ycbcr_out(api->decode,buf);
+ decode=(th_dec_ctx *)api->decode;
+ if(!decode)return OC_FAULT;
+ ret=th_decode_ycbcr_out(decode,buf);
if(ret>=0){
_yuv->y_width=buf[0].width;
_yuv->y_height=buf[0].height;
diff --git a/Engine/lib/libtheora/lib/dec/decinfo.c b/Engine/lib/libtheora/lib/decinfo.c
similarity index 77%
rename from Engine/lib/libtheora/lib/dec/decinfo.c
rename to Engine/lib/libtheora/lib/decinfo.c
index 3c4ba868a..845eb1361 100644
--- a/Engine/lib/libtheora/lib/dec/decinfo.c
+++ b/Engine/lib/libtheora/lib/decinfo.c
@@ -5,13 +5,13 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
- last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $
+ last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
@@ -27,30 +27,30 @@
_opb: The pack buffer to read the octets from.
_buf: The byte array to store the unpacked bytes in.
_len: The number of octets to unpack.*/
-static void oc_unpack_octets(oggpack_buffer *_opb,char *_buf,size_t _len){
+static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
while(_len-->0){
long val;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
*_buf++=(char)val;
}
}
/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
-static long oc_unpack_length(oggpack_buffer *_opb){
+static long oc_unpack_length(oc_pack_buf *_opb){
long ret[4];
int i;
- for(i=0;i<4;i++)theorapackB_read(_opb,8,ret+i);
+ for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
}
-static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
+static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
long val;
/*Check the codec bitstream version.*/
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
_info->version_major=(unsigned char)val;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
_info->version_minor=(unsigned char)val;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
_info->version_subminor=(unsigned char)val;
/*verify we can parse this bitstream version.
We accept earlier minors and all subminors, by spec*/
@@ -60,25 +60,21 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
return TH_EVERSION;
}
/*Read the encoded frame description.*/
- theorapackB_read(_opb,16,&val);
+ val=oc_pack_read(_opb,16);
_info->frame_width=(ogg_uint32_t)val<<4;
- theorapackB_read(_opb,16,&val);
+ val=oc_pack_read(_opb,16);
_info->frame_height=(ogg_uint32_t)val<<4;
- theorapackB_read(_opb,24,&val);
+ val=oc_pack_read(_opb,24);
_info->pic_width=(ogg_uint32_t)val;
- theorapackB_read(_opb,24,&val);
+ val=oc_pack_read(_opb,24);
_info->pic_height=(ogg_uint32_t)val;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
_info->pic_x=(ogg_uint32_t)val;
- /*Note: The sense of pic_y is inverted in what we pass back to the
- application compared to how it is stored in the bitstream.
- This is because the bitstream uses a right-handed coordinate system, while
- applications expect a left-handed one.*/
- theorapackB_read(_opb,8,&val);
- _info->pic_y=_info->frame_height-_info->pic_height-(ogg_uint32_t)val;
- theorapackB_read(_opb,32,&val);
+ val=oc_pack_read(_opb,8);
+ _info->pic_y=(ogg_uint32_t)val;
+ val=oc_pack_read(_opb,32);
_info->fps_numerator=(ogg_uint32_t)val;
- theorapackB_read(_opb,32,&val);
+ val=oc_pack_read(_opb,32);
_info->fps_denominator=(ogg_uint32_t)val;
if(_info->frame_width==0||_info->frame_height==0||
_info->pic_width+_info->pic_x>_info->frame_width||
@@ -86,38 +82,46 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
_info->fps_numerator==0||_info->fps_denominator==0){
return TH_EBADHEADER;
}
- theorapackB_read(_opb,24,&val);
+ /*Note: The sense of pic_y is inverted in what we pass back to the
+ application compared to how it is stored in the bitstream.
+ This is because the bitstream uses a right-handed coordinate system, while
+ applications expect a left-handed one.*/
+ _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+ val=oc_pack_read(_opb,24);
_info->aspect_numerator=(ogg_uint32_t)val;
- theorapackB_read(_opb,24,&val);
+ val=oc_pack_read(_opb,24);
_info->aspect_denominator=(ogg_uint32_t)val;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
_info->colorspace=(th_colorspace)val;
- theorapackB_read(_opb,24,&val);
+ val=oc_pack_read(_opb,24);
_info->target_bitrate=(int)val;
- theorapackB_read(_opb,6,&val);
+ val=oc_pack_read(_opb,6);
_info->quality=(int)val;
- theorapackB_read(_opb,5,&val);
+ val=oc_pack_read(_opb,5);
_info->keyframe_granule_shift=(int)val;
- theorapackB_read(_opb,2,&val);
+ val=oc_pack_read(_opb,2);
_info->pixel_fmt=(th_pixel_fmt)val;
if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
- if(theorapackB_read(_opb,3,&val)<0||val!=0)return TH_EBADHEADER;
+ val=oc_pack_read(_opb,3);
+ if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
return 0;
}
-static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){
+static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
long len;
int i;
/*Read the vendor string.*/
len=oc_unpack_length(_opb);
- if(len<0||theorapackB_bytes(_opb)+len>_opb->storage)return TH_EBADHEADER;
+ if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
_tc->vendor=_ogg_malloc((size_t)len+1);
+ if(_tc->vendor==NULL)return TH_EFAULT;
oc_unpack_octets(_opb,_tc->vendor,len);
_tc->vendor[len]='\0';
/*Read the user comments.*/
_tc->comments=(int)oc_unpack_length(_opb);
- if(_tc->comments<0||_tc->comments>(LONG_MAX>>2)||
- theorapackB_bytes(_opb)+((long)_tc->comments<<2)>_opb->storage){
+ len=_tc->comments;
+ if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
+ _tc->comments=0;
return TH_EBADHEADER;
}
_tc->comment_lengths=(int *)_ogg_malloc(
@@ -126,19 +130,23 @@ static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){
_tc->comments*sizeof(_tc->user_comments[0]));
for(i=0;i<_tc->comments;i++){
len=oc_unpack_length(_opb);
- if(len<0||theorapackB_bytes(_opb)+len>_opb->storage){
+ if(len<0||len>oc_pack_bytes_left(_opb)){
_tc->comments=i;
return TH_EBADHEADER;
}
_tc->comment_lengths[i]=len;
_tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+ if(_tc->user_comments[i]==NULL){
+ _tc->comments=i;
+ return TH_EFAULT;
+ }
oc_unpack_octets(_opb,_tc->user_comments[i],len);
_tc->user_comments[i][len]='\0';
}
- return theorapackB_read(_opb,0,&len)<0?TH_EBADHEADER:0;
+ return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
}
-static int oc_setup_unpack(oggpack_buffer *_opb,th_setup_info *_setup){
+static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
int ret;
/*Read the quantizer tables.*/
ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
@@ -152,13 +160,13 @@ static void oc_setup_clear(th_setup_info *_setup){
oc_huff_trees_clear(_setup->huff_tables);
}
-static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
+static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
char buffer[6];
long val;
int packtype;
int ret;
- theorapackB_read(_opb,8,&val);
+ val=oc_pack_read(_opb,8);
packtype=(int)val;
/*If we're at a data packet and we have received all three headers, we're
done.*/
@@ -198,6 +206,7 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
return TH_EBADHEADER;
}
setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+ if(setup==NULL)return TH_EFAULT;
ret=oc_setup_unpack(_opb,setup);
if(ret<0){
oc_setup_clear(setup);
@@ -222,13 +231,11 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
stream until it returns 0.*/
int th_decode_headerin(th_info *_info,th_comment *_tc,
th_setup_info **_setup,ogg_packet *_op){
- oggpack_buffer opb;
- int ret;
+ oc_pack_buf opb;
if(_op==NULL)return TH_EBADHEADER;
if(_info==NULL)return TH_EFAULT;
- theorapackB_readinit(&opb,_op->packet,_op->bytes);
- ret=oc_dec_headerin(&opb,_info,_tc,_setup,_op);
- return ret;
+ oc_pack_readinit(&opb,_op->packet,_op->bytes);
+ return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
}
void th_setup_free(th_setup_info *_setup){
diff --git a/Engine/lib/libtheora/lib/dec/decint.h b/Engine/lib/libtheora/lib/decint.h
similarity index 68%
rename from Engine/lib/libtheora/lib/dec/decint.h
rename to Engine/lib/libtheora/lib/decint.h
index 7924c0e0c..261b67631 100644
--- a/Engine/lib/libtheora/lib/dec/decint.h
+++ b/Engine/lib/libtheora/lib/decint.h
@@ -5,13 +5,13 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
- last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $
+ last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
@@ -19,13 +19,12 @@
#if !defined(_decint_H)
# define _decint_H (1)
# include "theora/theoradec.h"
-# include "../internal.h"
+# include "internal.h"
# include "bitpack.h"
typedef struct th_setup_info oc_setup_info;
typedef struct th_dec_ctx oc_dec_ctx;
-# include "idct.h"
# include "huffdec.h"
# include "dequant.h"
@@ -54,24 +53,20 @@ struct th_dec_ctx{
when a frame has been processed and a data packet is ready.*/
int packet_state;
/*Buffer in which to assemble packets.*/
- oggpack_buffer opb;
+ oc_pack_buf opb;
/*Huffman decode trees.*/
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
- /*The index of one past the last token in each plane for each coefficient.
- The final entries are the total number of tokens for each coefficient.*/
- int ti0[3][64];
- /*The index of one past the last extra bits entry in each plane for each
- coefficient.
- The final entries are the total number of extra bits entries for each
- coefficient.*/
- int ebi0[3][64];
+ /*The index of the first token in each plane for each coefficient.*/
+ ptrdiff_t ti0[3][64];
/*The number of outstanding EOB runs at the start of each coefficient in each
plane.*/
- int eob_runs[3][64];
+ ptrdiff_t eob_runs[3][64];
/*The DCT token lists.*/
- unsigned char **dct_tokens;
+ unsigned char *dct_tokens;
/*The extra bits associated with DCT tokens.*/
- ogg_uint16_t **extra_bits;
+ unsigned char *extra_bits;
+ /*The number of dct tokens unpacked so far.*/
+ int dct_tokens_count;
/*The out-of-loop post-processing level.*/
int pp_level;
/*The DC scale used for out-of-loop deblocking.*/
@@ -85,11 +80,28 @@ struct th_dec_ctx{
/*The storage for the post-processed frame buffer.*/
unsigned char *pp_frame_data;
/*Whether or not the post-processsed frame buffer has space for chroma.*/
- int pp_frame_has_chroma;
- /*The buffer used for the post-processed frame.*/
+ int pp_frame_state;
+ /*The buffer used for the post-processed frame.
+ Note that this is _not_ guaranteed to have the same strides and offsets as
+ the reference frame buffers.*/
th_ycbcr_buffer pp_frame_buf;
/*The striped decode callback function.*/
th_stripe_callback stripe_cb;
+# if defined(HAVE_CAIRO)
+ /*Output metrics for debugging.*/
+ int telemetry;
+ int telemetry_mbmode;
+ int telemetry_mv;
+ int telemetry_qi;
+ int telemetry_bits;
+ int telemetry_frame_bytes;
+ int telemetry_coding_bytes;
+ int telemetry_mode_bytes;
+ int telemetry_mv_bytes;
+ int telemetry_qi_bytes;
+ int telemetry_dc_bytes;
+ unsigned char *telemetry_frame_data;
+# endif
};
#endif
diff --git a/Engine/lib/libtheora/lib/decode.c b/Engine/lib/libtheora/lib/decode.c
new file mode 100644
index 000000000..7be66463d
--- /dev/null
+++ b/Engine/lib/libtheora/lib/decode.c
@@ -0,0 +1,2943 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $
+
+ ********************************************************************/
+
+#include
+#include
+#include
+#include "decint.h"
+#if defined(OC_DUMP_IMAGES)
+# include
+# include "png.h"
+#endif
+#if defined(HAVE_CAIRO)
+# include
+#endif
+
+
+/*No post-processing.*/
+#define OC_PP_LEVEL_DISABLED (0)
+/*Keep track of DC qi for each block only.*/
+#define OC_PP_LEVEL_TRACKDCQI (1)
+/*Deblock the luma plane.*/
+#define OC_PP_LEVEL_DEBLOCKY (2)
+/*Dering the luma plane.*/
+#define OC_PP_LEVEL_DERINGY (3)
+/*Stronger luma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGY (4)
+/*Deblock the chroma planes.*/
+#define OC_PP_LEVEL_DEBLOCKC (5)
+/*Dering the chroma planes.*/
+#define OC_PP_LEVEL_DERINGC (6)
+/*Stronger chroma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGC (7)
+/*Maximum valid post-processing level.*/
+#define OC_PP_LEVEL_MAX (7)
+
+
+
+/*The mode alphabets for the various mode coding schemes.
+ Scheme 0 uses a custom alphabet, which is not stored in this table.*/
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
+ /*Last MV dominates */
+ {
+ OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
+ OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ },
+ {
+ OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
+ OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ },
+ {
+ OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
+ OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ },
+ {
+ OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
+ OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
+ OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
+ },
+ /*No MV dominates.*/
+ {
+ OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
+ OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ },
+ {
+ OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
+ OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ },
+ /*Default ordering.*/
+ {
+ OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
+ OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+ OC_MODE_INTER_MV_FOUR
+ }
+};
+
+
+/*The original DCT tokens are extended and reordered during the construction of
+ the Huffman tables.
+ The extension means more bits can be read with fewer calls to the bitpacker
+ during the Huffman decoding process (at the cost of larger Huffman tables),
+ and fewer tokens require additional extra bits (reducing the average storage
+ per decoded token).
+ The revised ordering reveals essential information in the token value
+ itself; specifically, whether or not there are additional extra bits to read
+ and the parameter to which those extra bits are applied.
+ The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
+ The extra bits are added into code word at the bit position inferred from the
+ token value, giving the final code word from which all required parameters
+ are derived.
+ The number of EOBs and the leading zero run length can be extracted directly.
+ The coefficient magnitude is optionally negated before extraction, according
+ to a 'flip' bit.*/
+
+/*The number of additional extra bits that are decoded with each of the
+ internal DCT tokens.*/
+static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
+ 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
+};
+
+/*Whether or not an internal token needs any additional extra bits.*/
+#define OC_DCT_TOKEN_NEEDS_MORE(token) \
+ (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
+ sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
+
+/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
+#define OC_DCT_TOKEN_FAT_EOB (0)
+
+/*The number of EOBs to use for an end-of-frame token.
+ Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
+ is not yet available everywhere; this should be equivalent.*/
+#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
+
+/*The location of the (6) run legth bits in the code word.
+ These are placed at index 0 and given 8 bits (even though 6 would suffice)
+ because it may be faster to extract the lower byte on some platforms.*/
+#define OC_DCT_CW_RLEN_SHIFT (0)
+/*The location of the (12) EOB bits in the code word.*/
+#define OC_DCT_CW_EOB_SHIFT (8)
+/*The location of the (1) flip bit in the code word.
+ This must be right under the magnitude bits.*/
+#define OC_DCT_CW_FLIP_BIT (20)
+/*The location of the (11) token magnitude bits in the code word.
+ These must be last, and rely on a sign-extending right shift.*/
+#define OC_DCT_CW_MAG_SHIFT (21)
+
+/*Pack the given fields into a code word.*/
+#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
+ ((_eobs)<state,_info,3);
+ if(ret<0)return ret;
+ ret=oc_huff_trees_copy(_dec->huff_tables,
+ (const oc_huff_node *const *)_setup->huff_tables);
+ if(ret<0){
+ oc_state_clear(&_dec->state);
+ return ret;
+ }
+ /*For each fragment, allocate one byte for every DCT coefficient token, plus
+ one byte for extra-bits for each token, plus one more byte for the long
+ EOB run, just in case it's the very last token and has a run length of
+ one.*/
+ _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
+ _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+ if(_dec->dct_tokens==NULL){
+ oc_huff_trees_clear(_dec->huff_tables);
+ oc_state_clear(&_dec->state);
+ return TH_EFAULT;
+ }
+ for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+ _dec->state.dequant_tables[qi][pli][qti]=
+ _dec->state.dequant_table_data[qi][pli][qti];
+ }
+ oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
+ &_setup->qinfo);
+ for(qi=0;qi<64;qi++){
+ int qsum;
+ qsum=0;
+ for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+ qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
+ _dec->state.dequant_tables[qti][pli][qi][17]+
+ _dec->state.dequant_tables[qti][pli][qi][18]+
+ _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
+ }
+ _dec->pp_sharp_mod[qi]=-(qsum>>11);
+ }
+ memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
+ sizeof(_dec->state.loop_filter_limits));
+ _dec->pp_level=OC_PP_LEVEL_DISABLED;
+ _dec->dc_qis=NULL;
+ _dec->variances=NULL;
+ _dec->pp_frame_data=NULL;
+ _dec->stripe_cb.ctx=NULL;
+ _dec->stripe_cb.stripe_decoded=NULL;
+#if defined(HAVE_CAIRO)
+ _dec->telemetry=0;
+ _dec->telemetry_bits=0;
+ _dec->telemetry_qi=0;
+ _dec->telemetry_mbmode=0;
+ _dec->telemetry_mv=0;
+ _dec->telemetry_frame_data=NULL;
+#endif
+ return 0;
+}
+
+static void oc_dec_clear(oc_dec_ctx *_dec){
+#if defined(HAVE_CAIRO)
+ _ogg_free(_dec->telemetry_frame_data);
+#endif
+ _ogg_free(_dec->pp_frame_data);
+ _ogg_free(_dec->variances);
+ _ogg_free(_dec->dc_qis);
+ _ogg_free(_dec->dct_tokens);
+ oc_huff_trees_clear(_dec->huff_tables);
+ oc_state_clear(&_dec->state);
+}
+
+
+static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
+ long val;
+ /*Check to make sure this is a data packet.*/
+ val=oc_pack_read1(&_dec->opb);
+ if(val!=0)return TH_EBADPACKET;
+ /*Read in the frame type (I or P).*/
+ val=oc_pack_read1(&_dec->opb);
+ _dec->state.frame_type=(int)val;
+ /*Read in the qi list.*/
+ val=oc_pack_read(&_dec->opb,6);
+ _dec->state.qis[0]=(unsigned char)val;
+ val=oc_pack_read1(&_dec->opb);
+ if(!val)_dec->state.nqis=1;
+ else{
+ val=oc_pack_read(&_dec->opb,6);
+ _dec->state.qis[1]=(unsigned char)val;
+ val=oc_pack_read1(&_dec->opb);
+ if(!val)_dec->state.nqis=2;
+ else{
+ val=oc_pack_read(&_dec->opb,6);
+ _dec->state.qis[2]=(unsigned char)val;
+ _dec->state.nqis=3;
+ }
+ }
+ if(_dec->state.frame_type==OC_INTRA_FRAME){
+ /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
+ Most of the other unused bits in the VP3 headers were eliminated.
+ I don't know why these remain.*/
+ /*I wanted to eliminate wasted bits, but not all config wiggle room
+ --Monty.*/
+ val=oc_pack_read(&_dec->opb,3);
+ if(val!=0)return TH_EIMPL;
+ }
+ return 0;
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+ This also builds up the coded fragment list (in coded order), and clears the
+ uncoded fragment list.
+ It does not update the coded macro block list nor the super block flags, as
+ those are not used when decoding INTRA frames.*/
+static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
+ const oc_sb_map *sb_maps;
+ const oc_sb_flags *sb_flags;
+ oc_fragment *frags;
+ ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t prev_ncoded_fragis;
+ unsigned nsbs;
+ unsigned sbi;
+ int pli;
+ coded_fragis=_dec->state.coded_fragis;
+ prev_ncoded_fragis=ncoded_fragis=0;
+ sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+ sb_flags=_dec->state.sb_flags;
+ frags=_dec->state.frags;
+ sbi=nsbs=0;
+ for(pli=0;pli<3;pli++){
+ nsbs+=_dec->state.fplanes[pli].nsbs;
+ for(;sbi=0){
+ frags[fragi].coded=1;
+ frags[fragi].mb_mode=OC_MODE_INTRA;
+ coded_fragis[ncoded_fragis++]=fragi;
+ }
+ }
+ }
+ }
+ _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+ prev_ncoded_fragis=ncoded_fragis;
+ }
+ _dec->state.ntotal_coded_fragis=ncoded_fragis;
+}
+
+/*Decodes the bit flags indicating whether each super block is partially coded
+ or not.
+ Return: The number of partially coded super blocks.*/
+static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
+ oc_sb_flags *sb_flags;
+ unsigned nsbs;
+ unsigned sbi;
+ unsigned npartial;
+ unsigned run_count;
+ long val;
+ int flag;
+ val=oc_pack_read1(&_dec->opb);
+ flag=(int)val;
+ sb_flags=_dec->state.sb_flags;
+ nsbs=_dec->state.nsbs;
+ sbi=npartial=0;
+ while(sbiopb);
+ full_run=run_count>=4129;
+ do{
+ sb_flags[sbi].coded_partially=flag;
+ sb_flags[sbi].coded_fully=0;
+ npartial+=flag;
+ sbi++;
+ }
+ while(--run_count>0&&sbiopb);
+ flag=(int)val;
+ }
+ else flag=!flag;
+ }
+ /*TODO: run_count should be 0 here.
+ If it's not, we should issue a warning of some kind.*/
+ return npartial;
+}
+
+/*Decodes the bit flags for whether or not each non-partially-coded super
+ block is fully coded or not.
+ This function should only be called if there is at least one
+ non-partially-coded super block.
+ Return: The number of partially coded super blocks.*/
+static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
+ oc_sb_flags *sb_flags;
+ unsigned nsbs;
+ unsigned sbi;
+ unsigned run_count;
+ long val;
+ int flag;
+ sb_flags=_dec->state.sb_flags;
+ nsbs=_dec->state.nsbs;
+ /*Skip partially coded super blocks.*/
+ for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+ val=oc_pack_read1(&_dec->opb);
+ flag=(int)val;
+ do{
+ int full_run;
+ run_count=oc_sb_run_unpack(&_dec->opb);
+ full_run=run_count>=4129;
+ for(;sbiopb);
+ flag=(int)val;
+ }
+ else flag=!flag;
+ }
+ while(sbistate.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
+ if(npartial>0){
+ val=oc_pack_read1(&_dec->opb);
+ flag=!(int)val;
+ }
+ else flag=0;
+ sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+ sb_flags=_dec->state.sb_flags;
+ frags=_dec->state.frags;
+ sbi=nsbs=run_count=0;
+ coded_fragis=_dec->state.coded_fragis;
+ uncoded_fragis=coded_fragis+_dec->state.nfrags;
+ prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
+ for(pli=0;pli<3;pli++){
+ nsbs+=_dec->state.fplanes[pli].nsbs;
+ for(;sbi=0){
+ int coded;
+ if(sb_flags[sbi].coded_fully)coded=1;
+ else if(!sb_flags[sbi].coded_partially)coded=0;
+ else{
+ if(run_count<=0){
+ run_count=oc_block_run_unpack(&_dec->opb);
+ flag=!flag;
+ }
+ run_count--;
+ coded=flag;
+ }
+ if(coded)coded_fragis[ncoded_fragis++]=fragi;
+ else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+ frags[fragi].coded=coded;
+ }
+ }
+ }
+ }
+ _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+ prev_ncoded_fragis=ncoded_fragis;
+ }
+ _dec->state.ntotal_coded_fragis=ncoded_fragis;
+ /*TODO: run_count should be 0 here.
+ If it's not, we should issue a warning of some kind.*/
+}
+
+
+
+typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
+ long val;
+ int i;
+ for(i=0;i<7;i++){
+ val=oc_pack_read1(_opb);
+ if(!val)break;
+ }
+ return i;
+}
+
+static int oc_clc_mode_unpack(oc_pack_buf *_opb){
+ long val;
+ val=oc_pack_read(_opb,3);
+ return (int)val;
+}
+
+/*Unpacks the list of macro block modes for INTER frames.*/
+static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+ const oc_mb_map *mb_maps;
+ signed char *mb_modes;
+ const oc_fragment *frags;
+ const unsigned char *alphabet;
+ unsigned char scheme0_alphabet[8];
+ oc_mode_unpack_func mode_unpack;
+ size_t nmbs;
+ size_t mbi;
+ long val;
+ int mode_scheme;
+ val=oc_pack_read(&_dec->opb,3);
+ mode_scheme=(int)val;
+ if(mode_scheme==0){
+ int mi;
+ /*Just in case, initialize the modes to something.
+ If the bitstream doesn't contain each index exactly once, it's likely
+ corrupt and the rest of the packet is garbage anyway, but this way we
+ won't crash, and we'll decode SOMETHING.*/
+ /*LOOP VECTORIZES*/
+ for(mi=0;miopb,3);
+ scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
+ }
+ alphabet=scheme0_alphabet;
+ }
+ else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+ if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
+ else mode_unpack=oc_vlc_mode_unpack;
+ mb_modes=_dec->state.mb_modes;
+ mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+ nmbs=_dec->state.nmbs;
+ frags=_dec->state.frags;
+ for(mbi=0;mbiopb)];
+ /*There were none: INTER_NOMV is forced.*/
+ else mb_modes[mbi]=OC_MODE_INTER_NOMV;
+ }
+ }
+}
+
+
+
+typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
+ long bits;
+ int mask;
+ int mv;
+ bits=oc_pack_read(_opb,3);
+ switch(bits){
+ case 0:return 0;
+ case 1:return 1;
+ case 2:return -1;
+ case 3:
+ case 4:{
+ mv=(int)(bits-1);
+ bits=oc_pack_read1(_opb);
+ }break;
+ /*case 5:
+ case 6:
+ case 7:*/
+ default:{
+ mv=1<>1);
+ bits&=1;
+ }break;
+ }
+ mask=-(int)bits;
+ return mv+mask^mask;
+}
+
+static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
+ long bits;
+ int mask;
+ int mv;
+ bits=oc_pack_read(_opb,6);
+ mv=(int)bits>>1;
+ mask=-((int)bits&1);
+ return mv+mask^mask;
+}
+
+/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
+ block modes and motion vectors to the individual fragments.*/
+static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+ const oc_mb_map *mb_maps;
+ const signed char *mb_modes;
+ oc_set_chroma_mvs_func set_chroma_mvs;
+ oc_mv_comp_unpack_func mv_comp_unpack;
+ oc_fragment *frags;
+ oc_mv *frag_mvs;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ oc_mv last_mv[2];
+ oc_mv cbmvs[4];
+ size_t nmbs;
+ size_t mbi;
+ long val;
+ set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
+ val=oc_pack_read1(&_dec->opb);
+ mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
+ map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
+ map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
+ memset(last_mv,0,sizeof(last_mv));
+ frags=_dec->state.frags;
+ frag_mvs=_dec->state.frag_mvs;
+ mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+ mb_modes=_dec->state.mb_modes;
+ nmbs=_dec->state.nmbs;
+ for(mbi=0;mbi>2][mapi&3];
+ if(frags[fragi].coded)coded[ncoded++]=mapi;
+ }
+ while(++mapiiopb);
+ lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+ memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
+ }
+ else lbmvs[bi][0]=lbmvs[bi][1]=0;
+ }
+ if(codedi>0){
+ memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+ memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
+ }
+ if(codedi>2][bi];
+ frags[fragi].mb_mode=mb_mode;
+ memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
+ }
+ }
+ }break;
+ case OC_MODE_INTER_MV:{
+ memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+ mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+ mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+ }break;
+ case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
+ case OC_MODE_INTER_MV_LAST2:{
+ memcpy(mbmv,last_mv[1],sizeof(mbmv));
+ memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+ memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
+ }break;
+ case OC_MODE_GOLDEN_MV:{
+ mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+ mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+ }break;
+ default:memset(mbmv,0,sizeof(mbmv));break;
+ }
+ /*4MV mode fills in the fragments itself.
+ For all other modes we can use this common code.*/
+ if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+ for(codedi=0;codedi>2][mapi&3];
+ frags[fragi].mb_mode=mb_mode;
+ memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
+ }
+ }
+ }
+ }
+}
+
+static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
+ oc_fragment *frags;
+ const ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragii;
+ ptrdiff_t fragi;
+ ncoded_fragis=_dec->state.ntotal_coded_fragis;
+ if(ncoded_fragis<=0)return;
+ frags=_dec->state.frags;
+ coded_fragis=_dec->state.coded_fragis;
+ if(_dec->state.nqis==1){
+ /*If this frame has only a single qi value, then just use it for all coded
+ fragments.*/
+ for(fragii=0;fragiiopb);
+ flag=(int)val;
+ nqi1=0;
+ fragii=0;
+ while(fragiiopb);
+ full_run=run_count>=4129;
+ do{
+ frags[coded_fragis[fragii++]].qii=flag;
+ nqi1+=flag;
+ }
+ while(--run_count>0&&fragiiopb);
+ flag=(int)val;
+ }
+ else flag=!flag;
+ }
+ /*TODO: run_count should be 0 here.
+ If it's not, we should issue a warning of some kind.*/
+ /*If we have 3 different qi's for this frame, and there was at least one
+ fragment with a non-zero qi, make the second pass.*/
+ if(_dec->state.nqis==3&&nqi1>0){
+ /*Skip qii==0 fragments.*/
+ for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
+ val=oc_pack_read1(&_dec->opb);
+ flag=(int)val;
+ do{
+ int full_run;
+ run_count=oc_sb_run_unpack(&_dec->opb);
+ full_run=run_count>=4129;
+ for(;fragiiopb);
+ flag=(int)val;
+ }
+ else flag=!flag;
+ }
+ while(fragiidct_tokens;
+ frags=_dec->state.frags;
+ coded_fragis=_dec->state.coded_fragis;
+ ncoded_fragis=fragii=eobs=ti=0;
+ for(pli=0;pli<3;pli++){
+ ptrdiff_t run_counts[64];
+ ptrdiff_t eob_count;
+ ptrdiff_t eobi;
+ int rli;
+ ncoded_fragis+=_dec->state.ncoded_fragis[pli];
+ memset(run_counts,0,sizeof(run_counts));
+ _dec->eob_runs[pli][0]=eobs;
+ _dec->ti0[pli][0]=ti;
+ /*Continue any previous EOB run, if there was one.*/
+ eobi=eobs;
+ if(ncoded_fragis-fragii0)frags[coded_fragis[fragii++]].dc=0;
+ while(fragiiopb,
+ _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+ dct_tokens[ti++]=(unsigned char)token;
+ if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+ eb=(int)oc_pack_read(&_dec->opb,
+ OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+ dct_tokens[ti++]=(unsigned char)eb;
+ if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+ eb<<=OC_DCT_TOKEN_EB_POS(token);
+ }
+ else eb=0;
+ cw=OC_DCT_CODE_WORD[token]+eb;
+ eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+ if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
+ if(eobs){
+ eobi=OC_MINI(eobs,ncoded_fragis-fragii);
+ eob_count+=eobi;
+ eobs-=eobi;
+ while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+ }
+ else{
+ int coeff;
+ skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+ cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT;
+ if(skip)coeff=0;
+ run_counts[skip]++;
+ frags[coded_fragis[fragii++]].dc=coeff;
+ }
+ }
+ /*Add the total EOB count to the longest run length.*/
+ run_counts[63]+=eob_count;
+ /*And convert the run_counts array to a moment table.*/
+ for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+ /*Finally, subtract off the number of coefficients that have been
+ accounted for by runs started in this coefficient.*/
+ for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
+ }
+ _dec->dct_tokens_count=ti;
+ return eobs;
+}
+
+/*Unpacks the AC coefficient tokens.
+ This can completely discard coefficient values while unpacking, and so is
+ somewhat simpler than unpacking the DC coefficient tokens.
+ _huff_idx: The index of the Huffman table to use for each color plane.
+ _ntoks_left: The number of tokens left to be decoded in each color plane for
+ each coefficient.
+ This is updated as EOB tokens and zero run tokens are decoded.
+ _eobs: The length of any outstanding EOB run from previous
+ coefficients.
+ Return: The length of any outstanding EOB run.*/
+static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+ unsigned char *dct_tokens;
+ ptrdiff_t ti;
+ int pli;
+ dct_tokens=_dec->dct_tokens;
+ ti=_dec->dct_tokens_count;
+ for(pli=0;pli<3;pli++){
+ ptrdiff_t run_counts[64];
+ ptrdiff_t eob_count;
+ size_t ntoks_left;
+ size_t ntoks;
+ int rli;
+ _dec->eob_runs[pli][_zzi]=_eobs;
+ _dec->ti0[pli][_zzi]=ti;
+ ntoks_left=_ntoks_left[pli][_zzi];
+ memset(run_counts,0,sizeof(run_counts));
+ eob_count=0;
+ ntoks=0;
+ while(ntoks+_eobsopb,
+ _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+ dct_tokens[ti++]=(unsigned char)token;
+ if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+ eb=(int)oc_pack_read(&_dec->opb,
+ OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+ dct_tokens[ti++]=(unsigned char)eb;
+ if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+ eb<<=OC_DCT_TOKEN_EB_POS(token);
+ }
+ else eb=0;
+ cw=OC_DCT_CODE_WORD[token]+eb;
+ skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+ _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+ if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
+ if(_eobs==0){
+ run_counts[skip]++;
+ ntoks++;
+ }
+ }
+ /*Add the portion of the last EOB run actually used by this coefficient.*/
+ eob_count+=ntoks_left-ntoks;
+ /*And remove it from the remaining EOB count.*/
+ _eobs-=ntoks_left-ntoks;
+ /*Add the total EOB count to the longest run length.*/
+ run_counts[63]+=eob_count;
+ /*And convert the run_counts array to a moment table.*/
+ for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+ /*Finally, subtract off the number of coefficients that have been
+ accounted for by runs started in this coefficient.*/
+ for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
+ }
+ _dec->dct_tokens_count=ti;
+ return _eobs;
+}
+
+/*Tokens describing the DCT coefficients that belong to each fragment are
+ stored in the bitstream grouped by coefficient, not by fragment.
+
+ This means that we either decode all the tokens in order, building up a
+ separate coefficient list for each fragment as we go, and then go back and
+ do the iDCT on each fragment, or we have to create separate lists of tokens
+ for each coefficient, so that we can pull the next token required off the
+ head of the appropriate list when decoding a specific fragment.
+
+ The former was VP3's choice, and it meant 2*w*h extra storage for all the
+ decoded coefficient values.
+
+ We take the second option, which lets us store just one to three bytes per
+ token (generally far fewer than the number of coefficients, due to EOB
+ tokens and zero runs), and which requires us to only maintain a counter for
+ each of the 64 coefficients, instead of a counter for every fragment to
+ determine where the next token goes.
+
+ We actually use 3 counters per coefficient, one for each color plane, so we
+ can decode all color planes simultaneously.
+ This lets color conversion, etc., be done as soon as a full MCU (one or
+ two super block rows) is decoded, while the image data is still in cache.*/
+
+static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
+ static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+ ptrdiff_t ntoks_left[3][64];
+ int huff_idxs[2];
+ ptrdiff_t eobs;
+ long val;
+ int pli;
+ int zzi;
+ int hgi;
+ for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+ ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
+ }
+ val=oc_pack_read(&_dec->opb,4);
+ huff_idxs[0]=(int)val;
+ val=oc_pack_read(&_dec->opb,4);
+ huff_idxs[1]=(int)val;
+ _dec->eob_runs[0][0]=0;
+ eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ val=oc_pack_read(&_dec->opb,4);
+ huff_idxs[0]=(int)val;
+ val=oc_pack_read(&_dec->opb,4);
+ huff_idxs[1]=(int)val;
+ zzi=1;
+ for(hgi=1;hgi<5;hgi++){
+ huff_idxs[0]+=16;
+ huff_idxs[1]+=16;
+ for(;zzipp_level<=OC_PP_LEVEL_DISABLED){
+ if(_dec->dc_qis!=NULL){
+ _ogg_free(_dec->dc_qis);
+ _dec->dc_qis=NULL;
+ _ogg_free(_dec->variances);
+ _dec->variances=NULL;
+ _ogg_free(_dec->pp_frame_data);
+ _dec->pp_frame_data=NULL;
+ }
+ return 1;
+ }
+ if(_dec->dc_qis==NULL){
+ /*If we haven't been tracking DC quantization indices, there's no point in
+ starting now.*/
+ if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
+ _dec->dc_qis=(unsigned char *)_ogg_malloc(
+ _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
+ if(_dec->dc_qis==NULL)return 1;
+ memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
+ }
+ else{
+ unsigned char *dc_qis;
+ const ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragii;
+ unsigned char qi0;
+ /*Update the DC quantization index of each coded block.*/
+ dc_qis=_dec->dc_qis;
+ coded_fragis=_dec->state.coded_fragis;
+ ncoded_fragis=_dec->state.ncoded_fragis[0]+
+ _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
+ qi0=(unsigned char)_dec->state.qis[0];
+ for(fragii=0;fragiipp_level<=OC_PP_LEVEL_TRACKDCQI){
+ if(_dec->variances!=NULL){
+ _ogg_free(_dec->variances);
+ _dec->variances=NULL;
+ _ogg_free(_dec->pp_frame_data);
+ _dec->pp_frame_data=NULL;
+ }
+ return 1;
+ }
+ if(_dec->variances==NULL){
+ size_t frame_sz;
+ size_t c_sz;
+ int c_w;
+ int c_h;
+ frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+ c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+ c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+ c_sz=c_w*(size_t)c_h;
+ /*Allocate space for the chroma planes, even if we're not going to use
+ them; this simplifies allocation state management, though it may waste
+ memory on the few systems that don't overcommit pages.*/
+ frame_sz+=c_sz<<1;
+ _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
+ frame_sz*sizeof(_dec->pp_frame_data[0]));
+ _dec->variances=(int *)_ogg_malloc(
+ _dec->state.nfrags*sizeof(_dec->variances[0]));
+ if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
+ _ogg_free(_dec->pp_frame_data);
+ _dec->pp_frame_data=NULL;
+ _ogg_free(_dec->variances);
+ _dec->variances=NULL;
+ return 1;
+ }
+ /*Force an update of the PP buffer pointers.*/
+ _dec->pp_frame_state=0;
+ }
+ /*Update the PP buffer pointers if necessary.*/
+ if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
+ if(_dec->pp_levelpp_frame_buf[0].width=_dec->state.info.frame_width;
+ _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+ _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
+ _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
+ (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
+ }
+ else{
+ size_t y_sz;
+ size_t c_sz;
+ int c_w;
+ int c_h;
+ /*Otherwise, set up pointers to all three PP planes.*/
+ y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+ c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+ c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+ c_sz=c_w*(size_t)c_h;
+ _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+ _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+ _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
+ _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
+ _dec->pp_frame_buf[1].width=c_w;
+ _dec->pp_frame_buf[1].height=c_h;
+ _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
+ _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
+ _dec->pp_frame_buf[2].width=c_w;
+ _dec->pp_frame_buf[2].height=c_h;
+ _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
+ _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
+ oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
+ }
+ _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
+ }
+ /*If we're not processing chroma, copy the reference frame's chroma planes.*/
+ if(_dec->pp_levelpp_frame_buf+1,
+ _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
+ sizeof(_dec->pp_frame_buf[1])*2);
+ }
+ return 0;
+}
+
+
+
+typedef struct{
+ int bounding_values[256];
+ ptrdiff_t ti[3][64];
+ ptrdiff_t eob_runs[3][64];
+ const ptrdiff_t *coded_fragis[3];
+ const ptrdiff_t *uncoded_fragis[3];
+ ptrdiff_t ncoded_fragis[3];
+ ptrdiff_t nuncoded_fragis[3];
+ const ogg_uint16_t *dequant[3][3][2];
+ int fragy0[3];
+ int fragy_end[3];
+ int pred_last[3][3];
+ int mcu_nvfrags;
+ int loop_filter;
+ int pp_level;
+}oc_dec_pipeline_state;
+
+
+
+/*Initialize the main decoding pipeline.*/
+static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe){
+ const ptrdiff_t *coded_fragis;
+ const ptrdiff_t *uncoded_fragis;
+ int pli;
+ int qii;
+ int qti;
+ /*If chroma is sub-sampled in the vertical direction, we have to decode two
+ super block rows of Y' for each super block row of Cb and Cr.*/
+ _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2);
+ /*Initialize the token and extra bits indices for each plane and
+ coefficient.*/
+ memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+ /*Also copy over the initial the EOB run counts.*/
+ memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
+ /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
+ coded_fragis=_dec->state.coded_fragis;
+ uncoded_fragis=coded_fragis+_dec->state.nfrags;
+ for(pli=0;pli<3;pli++){
+ ptrdiff_t ncoded_fragis;
+ _pipe->coded_fragis[pli]=coded_fragis;
+ _pipe->uncoded_fragis[pli]=uncoded_fragis;
+ ncoded_fragis=_dec->state.ncoded_fragis[pli];
+ coded_fragis+=ncoded_fragis;
+ uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
+ }
+ /*Set up condensed quantizer tables.*/
+ for(pli=0;pli<3;pli++){
+ for(qii=0;qii<_dec->state.nqis;qii++){
+ for(qti=0;qti<2;qti++){
+ _pipe->dequant[pli][qii][qti]=
+ _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
+ }
+ }
+ }
+ /*Set the previous DC predictor to 0 for all color planes and frame types.*/
+ memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
+ /*Initialize the bounding value array for the loop filter.*/
+ _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
+ _pipe->bounding_values);
+ /*Initialize any buffers needed for post-processing.
+ We also save the current post-processing level, to guard against the user
+ changing it from a callback.*/
+ if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
+ /*If we don't have enough information to post-process, disable it, regardless
+ of the user-requested level.*/
+ else{
+ _pipe->pp_level=OC_PP_LEVEL_DISABLED;
+ memcpy(_dec->pp_frame_buf,
+ _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
+ sizeof(_dec->pp_frame_buf[0])*3);
+ }
+}
+
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+ rows).
+ As a side effect, the number of coded and uncoded fragments in this plane of
+ the MCU is also computed.*/
+static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+ const oc_fragment_plane *fplane;
+ oc_fragment *frags;
+ int *pred_last;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragi;
+ int fragx;
+ int fragy;
+ int fragy0;
+ int fragy_end;
+ int nhfrags;
+ /*Compute the first and last fragment row of the current MCU for this
+ plane.*/
+ fplane=_dec->state.fplanes+_pli;
+ fragy0=_pipe->fragy0[_pli];
+ fragy_end=_pipe->fragy_end[_pli];
+ nhfrags=fplane->nhfrags;
+ pred_last=_pipe->pred_last[_pli];
+ frags=_dec->state.frags;
+ ncoded_fragis=0;
+ fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
+ for(fragy=fragy0;fragy=nhfrags)ur_ref=-1;
+ else{
+ ur_ref=u_frags[fragi+1].coded?
+ OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+ }
+ if(frags[fragi].coded){
+ int pred;
+ int ref;
+ ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+ /*We break out a separate case based on which of our neighbors use
+ the same reference frames.
+ This is somewhat faster than trying to make a generic case which
+ handles all of them, since it reduces lots of poorly predicted
+ jumps to one switch statement, and also lets a number of the
+ multiplications be optimized out by strength reduction.*/
+ switch((l_ref==ref)|(ul_ref==ref)<<1|
+ (u_ref==ref)<<2|(ur_ref==ref)<<3){
+ default:pred=pred_last[ref];break;
+ case 1:
+ case 3:pred=frags[fragi-1].dc;break;
+ case 2:pred=u_frags[fragi-1].dc;break;
+ case 4:
+ case 6:
+ case 12:pred=u_frags[fragi].dc;break;
+ case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+ case 8:pred=u_frags[fragi+1].dc;break;
+ case 9:
+ case 11:
+ case 13:{
+ pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+ }break;
+ case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+ case 14:{
+ pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+ +10*u_frags[fragi].dc)/16;
+ }break;
+ case 7:
+ case 15:{
+ int p0;
+ int p1;
+ int p2;
+ p0=frags[fragi-1].dc;
+ p1=u_frags[fragi-1].dc;
+ p2=u_frags[fragi].dc;
+ pred=(29*(p0+p2)-26*p1)/32;
+ if(abs(pred-p2)>128)pred=p2;
+ else if(abs(pred-p0)>128)pred=p0;
+ else if(abs(pred-p1)>128)pred=p1;
+ }break;
+ }
+ pred_last[ref]=frags[fragi].dc+=pred;
+ ncoded_fragis++;
+ l_ref=ref;
+ }
+ else l_ref=-1;
+ ul_ref=u_ref;
+ u_ref=ur_ref;
+ }
+ }
+ }
+ _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+ /*Also save the number of uncoded fragments so we know how many to copy.*/
+ _pipe->nuncoded_fragis[_pli]=
+ (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
+}
+
+/*Reconstructs all coded fragments in a single MCU (one or two super block
+ rows).
+ This requires that each coded fragment have a proper macro block mode and
+ motion vector (if not in INTRA mode), and have it's DC value decoded, with
+ the DC prediction process reversed, and the number of coded and uncoded
+ fragments in this plane of the MCU be counted.
+ The token lists for each color plane and coefficient should also be filled
+ in, along with initial token offsets, extra bits offsets, and EOB run
+ counts.*/
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+ unsigned char *dct_tokens;
+ const unsigned char *dct_fzig_zag;
+ ogg_uint16_t dc_quant[2];
+ const oc_fragment *frags;
+ const ptrdiff_t *coded_fragis;
+ ptrdiff_t ncoded_fragis;
+ ptrdiff_t fragii;
+ ptrdiff_t *ti;
+ ptrdiff_t *eob_runs;
+ int qti;
+ dct_tokens=_dec->dct_tokens;
+ dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
+ frags=_dec->state.frags;
+ coded_fragis=_pipe->coded_fragis[_pli];
+ ncoded_fragis=_pipe->ncoded_fragis[_pli];
+ ti=_pipe->ti[_pli];
+ eob_runs=_pipe->eob_runs[_pli];
+ for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
+ for(fragii=0;fragiidequant[_pli][frags[fragi].qii][qti];
+ /*Decode the AC coefficients.*/
+ for(zzi=0;zzi<64;){
+ int token;
+ last_zzi=zzi;
+ if(eob_runs[zzi]){
+ eob_runs[zzi]--;
+ break;
+ }
+ else{
+ ptrdiff_t eob;
+ int cw;
+ int rlen;
+ int coeff;
+ int lti;
+ lti=ti[zzi];
+ token=dct_tokens[lti++];
+ cw=OC_DCT_CODE_WORD[token];
+ /*These parts could be done branchless, but the branches are fairly
+ predictable and the C code translates into more than a few
+ instructions, so it's worth it to avoid them.*/
+ if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+ cw+=dct_tokens[lti++]<>OC_DCT_CW_EOB_SHIFT&0xFFF;
+ if(token==OC_DCT_TOKEN_FAT_EOB){
+ eob+=dct_tokens[lti++]<<8;
+ if(eob==0)eob=OC_DCT_EOB_FINISH;
+ }
+ rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+ cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT;
+ eob_runs[zzi]=eob;
+ ti[zzi]=lti;
+ zzi+=rlen;
+ dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
+ zzi+=!eob;
+ }
+ }
+ /*TODO: zzi should be exactly 64 here.
+ If it's not, we should report some kind of warning.*/
+ zzi=OC_MINI(zzi,64);
+ dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+ /*last_zzi is always initialized.
+ If your compiler thinks otherwise, it is dumb.*/
+ oc_state_frag_recon(&_dec->state,fragi,_pli,
+ dct_coeffs,last_zzi,dc_quant[qti]);
+ }
+ _pipe->coded_fragis[_pli]+=ncoded_fragis;
+ /*Right now the reconstructed MCU has only the coded blocks in it.*/
+ /*TODO: We make the decision here to always copy the uncoded blocks into it
+ from the reference frame.
+ We could also copy the coded blocks back over the reference frame, if we
+ wait for an additional MCU to be decoded, which might be faster if only a
+ small number of blocks are coded.
+ However, this introduces more latency, creating a larger cache footprint.
+ It's unknown which decision is better, but this one results in simpler
+ code, and the hard case (high bitrate, high resolution) is handled
+ correctly.*/
+ /*Copy the uncoded blocks from the previous reference frame.*/
+ _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+ oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
+ _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+}
+
+/*Filter a horizontal block edge.*/
+static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
+ int *_variance0,int *_variance1){
+ unsigned char *rdst;
+ const unsigned char *rsrc;
+ unsigned char *cdst;
+ const unsigned char *csrc;
+ int r[10];
+ int sum0;
+ int sum1;
+ int bx;
+ int by;
+ rdst=_dst;
+ rsrc=_src;
+ for(bx=0;bx<8;bx++){
+ cdst=rdst;
+ csrc=rsrc;
+ for(by=0;by<10;by++){
+ r[by]=*csrc;
+ csrc+=_src_ystride;
+ }
+ sum0=sum1=0;
+ for(by=0;by<4;by++){
+ sum0+=abs(r[by+1]-r[by]);
+ sum1+=abs(r[by+5]-r[by+6]);
+ }
+ *_variance0+=OC_MINI(255,sum0);
+ *_variance1+=OC_MINI(255,sum1);
+ if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+ *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+ cdst+=_dst_ystride;
+ *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+ cdst+=_dst_ystride;
+ for(by=0;by<4;by++){
+ *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
+ r[by+4]+r[by+5]+r[by+6]+4>>3);
+ cdst+=_dst_ystride;
+ }
+ *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+ cdst+=_dst_ystride;
+ *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+ }
+ else{
+ for(by=1;by<=8;by++){
+ *cdst=(unsigned char)r[by];
+ cdst+=_dst_ystride;
+ }
+ }
+ rdst++;
+ rsrc++;
+ }
+}
+
+/*Filter a vertical block edge.*/
+static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
+ int _qstep,int _flimit,int *_variances){
+ unsigned char *rdst;
+ const unsigned char *rsrc;
+ unsigned char *cdst;
+ int r[10];
+ int sum0;
+ int sum1;
+ int bx;
+ int by;
+ cdst=_dst;
+ for(by=0;by<8;by++){
+ rsrc=cdst-1;
+ rdst=cdst;
+ for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+ sum0=sum1=0;
+ for(bx=0;bx<4;bx++){
+ sum0+=abs(r[bx+1]-r[bx]);
+ sum1+=abs(r[bx+5]-r[bx+6]);
+ }
+ _variances[0]+=OC_MINI(255,sum0);
+ _variances[1]+=OC_MINI(255,sum1);
+ if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+ *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+ *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+ for(bx=0;bx<4;bx++){
+ *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
+ r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
+ }
+ *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+ *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+ }
+ cdst+=_dst_ystride;
+ }
+}
+
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
+ oc_fragment_plane *fplane;
+ int *variance;
+ unsigned char *dc_qi;
+ unsigned char *dst;
+ const unsigned char *src;
+ ptrdiff_t froffset;
+ int dst_ystride;
+ int src_ystride;
+ int nhfrags;
+ int width;
+ int notstart;
+ int notdone;
+ int flimit;
+ int qstep;
+ int y_end;
+ int y;
+ int x;
+ _dst+=_pli;
+ _src+=_pli;
+ fplane=_dec->state.fplanes+_pli;
+ nhfrags=fplane->nhfrags;
+ froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+ variance=_dec->variances+froffset;
+ dc_qi=_dec->dc_qis+froffset;
+ notstart=_fragy0>0;
+ notdone=_fragy_endnvfrags;
+ /*We want to clear an extra row of variances, except at the end.*/
+ memset(variance+(nhfrags&-notstart),0,
+ (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
+ /*Except for the first time, we want to point to the middle of the row.*/
+ y=(_fragy0<<3)+(notstart<<2);
+ dst_ystride=_dst->stride;
+ src_ystride=_src->stride;
+ dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+ src=_src->data+y*(ptrdiff_t)src_ystride;
+ width=_dst->width;
+ for(;y<4;y++){
+ memcpy(dst,src,width*sizeof(dst[0]));
+ dst+=dst_ystride;
+ src+=src_ystride;
+ }
+ /*We also want to skip the last row in the frame for this loop.*/
+ y_end=_fragy_end-!notdone<<3;
+ for(;ypp_dc_scale[*dc_qi];
+ flimit=(qstep*3)>>2;
+ oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+ qstep,flimit,variance,variance+nhfrags);
+ variance++;
+ dc_qi++;
+ for(x=8;xpp_dc_scale[*dc_qi];
+ flimit=(qstep*3)>>2;
+ oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+ qstep,flimit,variance,variance+nhfrags);
+ oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
+ qstep,flimit,variance-1);
+ variance++;
+ dc_qi++;
+ }
+ dst+=dst_ystride<<3;
+ src+=src_ystride<<3;
+ }
+ /*And finally, handle the last row in the frame, if it's in the range.*/
+ if(!notdone){
+ int height;
+ height=_dst->height;
+ for(;ypp_dc_scale[*dc_qi++];
+ flimit=(qstep*3)>>2;
+ oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
+ qstep,flimit,variance++);
+ }
+ }
+}
+
+static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+ int _dc_scale,int _sharp_mod,int _strong){
+ static const unsigned char OC_MOD_MAX[2]={24,32};
+ static const unsigned char OC_MOD_SHIFT[2]={1,0};
+ const unsigned char *psrc;
+ const unsigned char *src;
+ const unsigned char *nsrc;
+ unsigned char *dst;
+ int vmod[72];
+ int hmod[72];
+ int mod_hi;
+ int by;
+ int bx;
+ mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
+ dst=_idata;
+ src=dst;
+ psrc=src-(_ystride&-!(_b&4));
+ for(by=0;by<9;by++){
+ for(bx=0;bx<8;bx++){
+ int mod;
+ mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7);
+ for(bx=1;bx<7;bx++){
+ a=128;
+ b=64;
+ w=hmod[(bx<<3)+by];
+ a-=w;
+ b+=w*src[bx-1];
+ w=vmod[(by<<3)+bx];
+ a-=w;
+ b+=w*psrc[bx];
+ w=vmod[(by+1<<3)+bx];
+ a-=w;
+ b+=w*nsrc[bx];
+ w=hmod[(bx+1<<3)+by];
+ a-=w;
+ b+=w*src[bx+1];
+ dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
+ }
+ a=128;
+ b=64;
+ w=hmod[(7<<3)+by];
+ a-=w;
+ b+=w*src[6];
+ w=vmod[(by<<3)+7];
+ a-=w;
+ b+=w*psrc[7];
+ w=vmod[(by+1<<3)+7];
+ a-=w;
+ b+=w*nsrc[7];
+ w=hmod[(8<<3)+by];
+ a-=w;
+ b+=w*src[7+!(_b&2)];
+ dst[7]=OC_CLAMP255(a*src[7]+b>>7);
+ dst+=_ystride;
+ psrc=src;
+ src=nsrc;
+ nsrc+=_ystride&-(!(_b&8)|by<6);
+ }
+}
+
+#define OC_DERING_THRESH1 (384)
+#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
+#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
+#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
+
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
+ th_img_plane *iplane;
+ oc_fragment_plane *fplane;
+ oc_fragment *frag;
+ int *variance;
+ unsigned char *idata;
+ ptrdiff_t froffset;
+ int ystride;
+ int nhfrags;
+ int sthresh;
+ int strong;
+ int y_end;
+ int width;
+ int height;
+ int y;
+ int x;
+ iplane=_img+_pli;
+ fplane=_dec->state.fplanes+_pli;
+ nhfrags=fplane->nhfrags;
+ froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+ variance=_dec->variances+froffset;
+ frag=_dec->state.frags+froffset;
+ strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
+ sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
+ y=_fragy0<<3;
+ ystride=iplane->stride;
+ idata=iplane->data+y*(ptrdiff_t)ystride;
+ y_end=_fragy_end<<3;
+ width=iplane->width;
+ height=iplane->height;
+ for(;ystate.qis[frag->qii];
+ var=*variance;
+ b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
+ if(strong&&var>sthresh){
+ oc_dering_block(idata+x,ystride,b,
+ _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+ if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+ !(b&2)&&variance[1]>OC_DERING_THRESH4||
+ !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+ !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+ oc_dering_block(idata+x,ystride,b,
+ _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+ oc_dering_block(idata+x,ystride,b,
+ _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+ }
+ }
+ else if(var>OC_DERING_THRESH2){
+ oc_dering_block(idata+x,ystride,b,
+ _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+ }
+ else if(var>OC_DERING_THRESH1){
+ oc_dering_block(idata+x,ystride,b,
+ _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
+ }
+ frag++;
+ variance++;
+ }
+ idata+=ystride<<3;
+ }
+}
+
+
+
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
+ oc_dec_ctx *dec;
+ if(_info==NULL||_setup==NULL)return NULL;
+ dec=_ogg_malloc(sizeof(*dec));
+ if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
+ _ogg_free(dec);
+ return NULL;
+ }
+ dec->state.curframe_num=0;
+ return dec;
+}
+
+void th_decode_free(th_dec_ctx *_dec){
+ if(_dec!=NULL){
+ oc_dec_clear(_dec);
+ _ogg_free(_dec);
+ }
+}
+
+int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz){
+ switch(_req){
+ case TH_DECCTL_GET_PPLEVEL_MAX:{
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ (*(int *)_buf)=OC_PP_LEVEL_MAX;
+ return 0;
+ }break;
+ case TH_DECCTL_SET_PPLEVEL:{
+ int pp_level;
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ pp_level=*(int *)_buf;
+ if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
+ _dec->pp_level=pp_level;
+ return 0;
+ }break;
+ case TH_DECCTL_SET_GRANPOS:{
+ ogg_int64_t granpos;
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
+ granpos=*(ogg_int64_t *)_buf;
+ if(granpos<0)return TH_EINVAL;
+ _dec->state.granpos=granpos;
+ _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
+ -_dec->state.granpos_bias;
+ _dec->state.curframe_num=_dec->state.keyframe_num
+ +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
+ return 0;
+ }break;
+ case TH_DECCTL_SET_STRIPE_CB:{
+ th_stripe_callback *cb;
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
+ cb=(th_stripe_callback *)_buf;
+ _dec->stripe_cb.ctx=cb->ctx;
+ _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+ return 0;
+ }break;
+#ifdef HAVE_CAIRO
+ case TH_DECCTL_SET_TELEMETRY_MBMODE:{
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ _dec->telemetry=1;
+ _dec->telemetry_mbmode=*(int *)_buf;
+ return 0;
+ }break;
+ case TH_DECCTL_SET_TELEMETRY_MV:{
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ _dec->telemetry=1;
+ _dec->telemetry_mv=*(int *)_buf;
+ return 0;
+ }break;
+ case TH_DECCTL_SET_TELEMETRY_QI:{
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ _dec->telemetry=1;
+ _dec->telemetry_qi=*(int *)_buf;
+ return 0;
+ }break;
+ case TH_DECCTL_SET_TELEMETRY_BITS:{
+ if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+ if(_buf_sz!=sizeof(int))return TH_EINVAL;
+ _dec->telemetry=1;
+ _dec->telemetry_bits=*(int *)_buf;
+ return 0;
+ }break;
+#endif
+ default:return TH_EIMPL;
+ }
+}
+
+/*We're decoding an INTER frame, but have no initialized reference
+ buffers (i.e., decoding did not start on a key frame).
+ We initialize them to a solid gray here.*/
+static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
+ th_info *info;
+ size_t yplane_sz;
+ size_t cplane_sz;
+ int yhstride;
+ int yheight;
+ int chstride;
+ int cheight;
+ _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
+ _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
+ _dec->state.ref_frame_idx[OC_FRAME_SELF]=1;
+ info=&_dec->state.info;
+ yhstride=info->frame_width+2*OC_UMV_PADDING;
+ yheight=info->frame_height+2*OC_UMV_PADDING;
+ chstride=yhstride>>!(info->pixel_fmt&1);
+ cheight=yheight>>!(info->pixel_fmt&2);
+ yplane_sz=yhstride*(size_t)yheight;
+ cplane_sz=chstride*(size_t)cheight;
+ memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
+}
+
+int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos){
+ int ret;
+ if(_dec==NULL||_op==NULL)return TH_EFAULT;
+ /*A completely empty packet indicates a dropped frame and is treated exactly
+ like an inter frame with no coded blocks.
+ Only proceed if we have a non-empty packet.*/
+ if(_op->bytes!=0){
+ oc_dec_pipeline_state pipe;
+ th_ycbcr_buffer stripe_buf;
+ int stripe_fragy;
+ int refi;
+ int pli;
+ int notstart;
+ int notdone;
+ oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_frame_bytes=_op->bytes;
+#endif
+ ret=oc_dec_frame_header_unpack(_dec);
+ if(ret<0)return ret;
+ /*Select a free buffer to use for the reconstructed version of this
+ frame.*/
+ if(_dec->state.frame_type!=OC_INTRA_FRAME&&
+ (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+ _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
+ /*No reference frames yet!*/
+ oc_dec_init_dummy_frame(_dec);
+ refi=_dec->state.ref_frame_idx[OC_FRAME_SELF];
+ }
+ else{
+ for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
+ refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+ _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+ }
+ if(_dec->state.frame_type==OC_INTRA_FRAME){
+ oc_dec_mark_all_intra(_dec);
+ _dec->state.keyframe_num=_dec->state.curframe_num;
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_coding_bytes=
+ _dec->telemetry_mode_bytes=
+ _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ }
+ else{
+ oc_dec_coded_flags_unpack(_dec);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ oc_dec_mb_modes_unpack(_dec);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ oc_dec_mv_unpack_and_frag_modes_fill(_dec);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ }
+ oc_dec_block_qis_unpack(_dec);
+#if defined(HAVE_CAIRO)
+ _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+ oc_dec_residual_tokens_unpack(_dec);
+ /*Update granule position.
+ This must be done before the striped decode callbacks so that the
+ application knows what to do with the frame data.*/
+ _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+ _dec->state.info.keyframe_granule_shift)
+ +(_dec->state.curframe_num-_dec->state.keyframe_num);
+ _dec->state.curframe_num++;
+ if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+ /*All of the rest of the operations -- DC prediction reversal,
+ reconstructing coded fragments, copying uncoded fragments, loop
+ filtering, extending borders, and out-of-loop post-processing -- should
+ be pipelined.
+ I.e., DC prediction reversal, reconstruction, and uncoded fragment
+ copying are done for one or two super block rows, then loop filtering is
+ run as far as it can, then bordering copying, then post-processing.
+ For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+ block rows, and one chroma.
+ Otherwise, an MCU consists of one super block row from each plane.
+ Inside each MCU, we perform all of the steps on one color plane before
+ moving on to the next.
+ After reconstruction, the additional filtering stages introduce a delay
+ since they need some pixels from the next fragment row.
+ Thus the actual number of decoded rows available is slightly smaller for
+ the first MCU, and slightly larger for the last.
+
+ This entire process allows us to operate on the data while it is still in
+ cache, resulting in big performance improvements.
+ An application callback allows further application processing (blitting
+ to video memory, color conversion, etc.) to also use the data while it's
+ in cache.*/
+ oc_dec_pipeline_init(_dec,&pipe);
+ oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+ notstart=0;
+ notdone=1;
+ for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
+ int avail_fragy0;
+ int avail_fragy_end;
+ avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+ notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli;
+ /*Compute the first and last fragment row of the current MCU for this
+ plane.*/
+ frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+ pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+ pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+ pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+ oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+ oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
+ sdelay=edelay=0;
+ if(pipe.loop_filter){
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
+ refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ }
+ /*To fill the borders, we have an additional two pixel delay, since a
+ fragment in the next row could filter its top edge, using two pixels
+ from a fragment in this row.
+ But there's no reason to delay a full fragment between the two.*/
+ oc_state_borders_fill_rows(&_dec->state,refi,pli,
+ (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+ (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+ /*Out-of-loop post-processing.*/
+ pp_offset=3*(pli!=0);
+ if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+ /*Perform de-blocking in one plane.*/
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+ _dec->state.ref_frame_bufs[refi],pli,
+ pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+ /*Perform de-ringing in one plane.*/
+ sdelay+=notstart;
+ edelay+=notdone;
+ oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+ pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+ }
+ }
+ /*If no post-processing is done, we still need to delay a row for the
+ loop filter, thanks to the strange filtering order VP3 chose.*/
+ else if(pipe.loop_filter){
+ sdelay+=notstart;
+ edelay+=notdone;
+ }
+ /*Compute the intersection of the available rows in all planes.
+ If chroma is sub-sampled, the effect of each of its delays is
+ doubled, but luma might have more post-processing filters enabled
+ than chroma, so we don't know up front which one is the limiting
+ factor.*/
+ avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){
+ /*The callback might want to use the FPU, so let's make sure they can.
+ We violate all kinds of ABI restrictions by not doing this until
+ now, but none of them actually matter since we don't use floating
+ point ourselves.*/
+ oc_restore_fpu(&_dec->state);
+ /*Make the callback, ensuring we flip the sense of the "start" and
+ "end" of the available region upside down.*/
+ (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+ _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+ _dec->state.fplanes[0].nvfrags-avail_fragy0);
+ }
+ notstart=1;
+ }
+ /*Finish filling in the reference frame borders.*/
+ for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
+ /*Update the reference frame indices.*/
+ if(_dec->state.frame_type==OC_INTRA_FRAME){
+ /*The new frame becomes both the previous and gold reference frames.*/
+ _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
+ _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+ _dec->state.ref_frame_idx[OC_FRAME_SELF];
+ }
+ else{
+ /*Otherwise, just replace the previous reference frame.*/
+ _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+ _dec->state.ref_frame_idx[OC_FRAME_SELF];
+ }
+ /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+ gamma values, if nothing else).*/
+ oc_restore_fpu(&_dec->state);
+#if defined(OC_DUMP_IMAGES)
+ /*Don't dump images for dropped frames.*/
+ oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
+ return 0;
+ }
+ else{
+ if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+ _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){
+ int refi;
+ /*No reference frames yet!*/
+ oc_dec_init_dummy_frame(_dec);
+ refi=_dec->state.ref_frame_idx[OC_FRAME_PREV];
+ _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+ memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi],
+ sizeof(_dec->pp_frame_buf[0])*3);
+ }
+ /*Just update the granule position and return.*/
+ _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+ _dec->state.info.keyframe_granule_shift)
+ +(_dec->state.curframe_num-_dec->state.keyframe_num);
+ _dec->state.curframe_num++;
+ if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+ return TH_DUPFRAME;
+ }
+}
+
+int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
+ if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
+ oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
+#if defined(HAVE_CAIRO)
+ /*If telemetry ioctls are active, we need to draw to the output buffer.
+ Stuff the plane into cairo.*/
+ if(_dec->telemetry){
+ cairo_surface_t *cs;
+ unsigned char *data;
+ unsigned char *y_row;
+ unsigned char *u_row;
+ unsigned char *v_row;
+ unsigned char *rgb_row;
+ int cstride;
+ int w;
+ int h;
+ int x;
+ int y;
+ int hdec;
+ int vdec;
+ w=_ycbcr[0].width;
+ h=_ycbcr[0].height;
+ hdec=!(_dec->state.info.pixel_fmt&1);
+ vdec=!(_dec->state.info.pixel_fmt&2);
+ /*Lazy data buffer init.
+ We could try to re-use the post-processing buffer, which would save
+ memory, but complicate the allocation logic there.
+ I don't think anyone cares about memory usage when using telemetry; it is
+ not meant for embedded devices.*/
+ if(_dec->telemetry_frame_data==NULL){
+ _dec->telemetry_frame_data=_ogg_malloc(
+ (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
+ if(_dec->telemetry_frame_data==NULL)return 0;
+ }
+ cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
+ /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
+ data=cairo_image_surface_get_data(cs);
+ if(data==NULL){
+ cairo_surface_destroy(cs);
+ return 0;
+ }
+ cstride=cairo_image_surface_get_stride(cs);
+ y_row=_ycbcr[0].data;
+ u_row=_ycbcr[1].data;
+ v_row=_ycbcr[2].data;
+ rgb_row=data;
+ for(y=0;y>hdec]-363703744)/1635200;
+ g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
+ -2672387*v_row[x>>hdec]+447306710)/3287200;
+ b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
+ rgb_row[4*x+0]=OC_CLAMP255(b);
+ rgb_row[4*x+1]=OC_CLAMP255(g);
+ rgb_row[4*x+2]=OC_CLAMP255(r);
+ }
+ y_row+=_ycbcr[0].stride;
+ u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
+ v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
+ rgb_row+=cstride;
+ }
+ /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
+ {
+ cairo_t *c;
+ const oc_fragment *frags;
+ oc_mv *frag_mvs;
+ const signed char *mb_modes;
+ oc_mb_map *mb_maps;
+ size_t nmbs;
+ size_t mbi;
+ int row2;
+ int col2;
+ int qim[3]={0,0,0};
+ if(_dec->state.nqis==2){
+ int bqi;
+ bqi=_dec->state.qis[0];
+ if(_dec->state.qis[1]>bqi)qim[1]=1;
+ if(_dec->state.qis[1]state.nqis==3){
+ int bqi;
+ int cqi;
+ int dqi;
+ bqi=_dec->state.qis[0];
+ cqi=_dec->state.qis[1];
+ dqi=_dec->state.qis[2];
+ if(cqi>bqi&&dqi>bqi){
+ if(dqi>cqi){
+ qim[1]=1;
+ qim[2]=2;
+ }
+ else{
+ qim[1]=2;
+ qim[2]=1;
+ }
+ }
+ else if(cqistate.frags;
+ frag_mvs=_dec->state.frag_mvs;
+ mb_modes=_dec->state.mb_modes;
+ mb_maps=_dec->state.mb_maps;
+ nmbs=_dec->state.nmbs;
+ row2=0;
+ col2=0;
+ for(mbi=0;mbi>1)&1))*16-16;
+ x=(col2>>1)*16;
+ cairo_set_line_width(c,1.);
+ /*Keyframe (all intra) red box.*/
+ if(_dec->state.frame_type==OC_INTRA_FRAME){
+ if(_dec->telemetry_mbmode&0x02){
+ cairo_set_source_rgba(c,1.,0,0,.5);
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,1.,0,0,.25);
+ cairo_fill(c);
+ }
+ }
+ else{
+ const signed char *frag_mv;
+ ptrdiff_t fragi;
+ for(bi=0;bi<4;bi++){
+ fragi=mb_maps[mbi][0][bi];
+ if(fragi>=0&&frags[fragi].coded){
+ frag_mv=frag_mvs[fragi];
+ break;
+ }
+ }
+ if(bi<4){
+ switch(mb_modes[mbi]){
+ case OC_MODE_INTRA:{
+ if(_dec->telemetry_mbmode&0x02){
+ cairo_set_source_rgba(c,1.,0,0,.5);
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,1.,0,0,.25);
+ cairo_fill(c);
+ }
+ }break;
+ case OC_MODE_INTER_NOMV:{
+ if(_dec->telemetry_mbmode&0x01){
+ cairo_set_source_rgba(c,0,0,1.,.5);
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,0,0,1.,.25);
+ cairo_fill(c);
+ }
+ }break;
+ case OC_MODE_INTER_MV:{
+ if(_dec->telemetry_mbmode&0x04){
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_set_source_rgba(c,0,1.,0,.5);
+ cairo_stroke(c);
+ }
+ if(_dec->telemetry_mv&0x04){
+ cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+8,y+8);
+ cairo_stroke(c);
+ }
+ }break;
+ case OC_MODE_INTER_MV_LAST:{
+ if(_dec->telemetry_mbmode&0x08){
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_set_source_rgba(c,0,1.,0,.5);
+ cairo_move_to(c,x+13.5,y+2.5);
+ cairo_line_to(c,x+2.5,y+8);
+ cairo_line_to(c,x+13.5,y+13.5);
+ cairo_stroke(c);
+ }
+ if(_dec->telemetry_mv&0x08){
+ cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+8,y+8);
+ cairo_stroke(c);
+ }
+ }break;
+ case OC_MODE_INTER_MV_LAST2:{
+ if(_dec->telemetry_mbmode&0x10){
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_set_source_rgba(c,0,1.,0,.5);
+ cairo_move_to(c,x+8,y+2.5);
+ cairo_line_to(c,x+2.5,y+8);
+ cairo_line_to(c,x+8,y+13.5);
+ cairo_move_to(c,x+13.5,y+2.5);
+ cairo_line_to(c,x+8,y+8);
+ cairo_line_to(c,x+13.5,y+13.5);
+ cairo_stroke(c);
+ }
+ if(_dec->telemetry_mv&0x10){
+ cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+8,y+8);
+ cairo_stroke(c);
+ }
+ }break;
+ case OC_MODE_GOLDEN_NOMV:{
+ if(_dec->telemetry_mbmode&0x20){
+ cairo_set_source_rgba(c,1.,1.,0,.5);
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,1.,1.,0,.25);
+ cairo_fill(c);
+ }
+ }break;
+ case OC_MODE_GOLDEN_MV:{
+ if(_dec->telemetry_mbmode&0x40){
+ cairo_rectangle(c,x+2.5,y+2.5,11,11);
+ cairo_set_source_rgba(c,1.,1.,0,.5);
+ cairo_stroke(c);
+ }
+ if(_dec->telemetry_mv&0x40){
+ cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+8,y+8);
+ cairo_stroke(c);
+ }
+ }break;
+ case OC_MODE_INTER_MV_FOUR:{
+ if(_dec->telemetry_mbmode&0x80){
+ cairo_rectangle(c,x+2.5,y+2.5,4,4);
+ cairo_rectangle(c,x+9.5,y+2.5,4,4);
+ cairo_rectangle(c,x+2.5,y+9.5,4,4);
+ cairo_rectangle(c,x+9.5,y+9.5,4,4);
+ cairo_set_source_rgba(c,0,1.,0,.5);
+ cairo_stroke(c);
+ }
+ /*4mv is odd, coded in raster order.*/
+ fragi=mb_maps[mbi][0][0];
+ if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+ frag_mv=frag_mvs[fragi];
+ cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+4,y+12);
+ cairo_stroke(c);
+ }
+ fragi=mb_maps[mbi][0][1];
+ if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+ frag_mv=frag_mvs[fragi];
+ cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+12,y+12);
+ cairo_stroke(c);
+ }
+ fragi=mb_maps[mbi][0][2];
+ if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+ frag_mv=frag_mvs[fragi];
+ cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+4,y+4);
+ cairo_stroke(c);
+ }
+ fragi=mb_maps[mbi][0][3];
+ if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+ frag_mv=frag_mvs[fragi];
+ cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
+ cairo_set_source_rgba(c,1.,1.,1.,.9);
+ cairo_set_line_width(c,3.);
+ cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,2.);
+ cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+ cairo_stroke_preserve(c);
+ cairo_set_line_width(c,1.);
+ cairo_line_to(c,x+12,y+4);
+ cairo_stroke(c);
+ }
+ }break;
+ }
+ }
+ }
+ /*qii illustration.*/
+ if(_dec->telemetry_qi&0x2){
+ cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
+ for(bi=0;bi<4;bi++){
+ ptrdiff_t fragi;
+ int qiv;
+ int xp;
+ int yp;
+ xp=x+(bi&1)*8;
+ yp=y+8-(bi&2)*4;
+ fragi=mb_maps[mbi][0][bi];
+ if(fragi>=0&&frags[fragi].coded){
+ qiv=qim[frags[fragi].qii];
+ cairo_set_line_width(c,3.);
+ cairo_set_source_rgba(c,0.,0.,0.,.5);
+ switch(qiv){
+ /*Double plus:*/
+ case 2:{
+ if((bi&1)^((bi&2)>>1)){
+ cairo_move_to(c,xp+2.5,yp+1.5);
+ cairo_line_to(c,xp+2.5,yp+3.5);
+ cairo_move_to(c,xp+1.5,yp+2.5);
+ cairo_line_to(c,xp+3.5,yp+2.5);
+ cairo_move_to(c,xp+5.5,yp+4.5);
+ cairo_line_to(c,xp+5.5,yp+6.5);
+ cairo_move_to(c,xp+4.5,yp+5.5);
+ cairo_line_to(c,xp+6.5,yp+5.5);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,0.,1.,1.,1.);
+ }
+ else{
+ cairo_move_to(c,xp+5.5,yp+1.5);
+ cairo_line_to(c,xp+5.5,yp+3.5);
+ cairo_move_to(c,xp+4.5,yp+2.5);
+ cairo_line_to(c,xp+6.5,yp+2.5);
+ cairo_move_to(c,xp+2.5,yp+4.5);
+ cairo_line_to(c,xp+2.5,yp+6.5);
+ cairo_move_to(c,xp+1.5,yp+5.5);
+ cairo_line_to(c,xp+3.5,yp+5.5);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,0.,1.,1.,1.);
+ }
+ }break;
+ /*Double minus:*/
+ case -2:{
+ cairo_move_to(c,xp+2.5,yp+2.5);
+ cairo_line_to(c,xp+5.5,yp+2.5);
+ cairo_move_to(c,xp+2.5,yp+5.5);
+ cairo_line_to(c,xp+5.5,yp+5.5);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,1.,1.,1.,1.);
+ }break;
+ /*Plus:*/
+ case 1:{
+ if(bi&2==0)yp-=2;
+ if(bi&1==0)xp-=2;
+ cairo_move_to(c,xp+4.5,yp+2.5);
+ cairo_line_to(c,xp+4.5,yp+6.5);
+ cairo_move_to(c,xp+2.5,yp+4.5);
+ cairo_line_to(c,xp+6.5,yp+4.5);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,.1,1.,.3,1.);
+ break;
+ }
+ /*Fall through.*/
+ /*Minus:*/
+ case -1:{
+ cairo_move_to(c,xp+2.5,yp+4.5);
+ cairo_line_to(c,xp+6.5,yp+4.5);
+ cairo_stroke_preserve(c);
+ cairo_set_source_rgba(c,1.,.3,.1,1.);
+ }break;
+ default:continue;
+ }
+ cairo_set_line_width(c,1.);
+ cairo_stroke(c);
+ }
+ }
+ }
+ col2++;
+ if((col2>>1)>=_dec->state.nhmbs){
+ col2=0;
+ row2+=2;
+ }
+ }
+ /*Bit usage indicator[s]:*/
+ if(_dec->telemetry_bits){
+ int widths[6];
+ int fpsn;
+ int fpsd;
+ int mult;
+ int fullw;
+ int padw;
+ int i;
+ fpsn=_dec->state.info.fps_numerator;
+ fpsd=_dec->state.info.fps_denominator;
+ mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
+ fullw=250.f*h*fpsd*mult/fpsn;
+ padw=w-24;
+ /*Header and coded block bits.*/
+ if(_dec->telemetry_frame_bytes<0||
+ _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
+ _dec->telemetry_frame_bytes=0;
+ }
+ if(_dec->telemetry_coding_bytes<0||
+ _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
+ _dec->telemetry_coding_bytes=0;
+ }
+ if(_dec->telemetry_mode_bytes<0||
+ _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
+ _dec->telemetry_mode_bytes=0;
+ }
+ if(_dec->telemetry_mv_bytes<0||
+ _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
+ _dec->telemetry_mv_bytes=0;
+ }
+ if(_dec->telemetry_qi_bytes<0||
+ _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
+ _dec->telemetry_qi_bytes=0;
+ }
+ if(_dec->telemetry_dc_bytes<0||
+ _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
+ _dec->telemetry_dc_bytes=0;
+ }
+ widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
+ widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
+ widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
+ widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
+ widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
+ widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
+ for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
+ cairo_set_source_rgba(c,.0,.0,.0,.6);
+ cairo_rectangle(c,10,h-33,widths[0]+1,5);
+ cairo_rectangle(c,10,h-29,widths[1]+1,5);
+ cairo_rectangle(c,10,h-25,widths[2]+1,5);
+ cairo_rectangle(c,10,h-21,widths[3]+1,5);
+ cairo_rectangle(c,10,h-17,widths[4]+1,5);
+ cairo_rectangle(c,10,h-13,widths[5]+1,5);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,1,0,0);
+ cairo_rectangle(c,10.5,h-32.5,widths[0],4);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,0,1,0);
+ cairo_rectangle(c,10.5,h-28.5,widths[1],4);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,0,0,1);
+ cairo_rectangle(c,10.5,h-24.5,widths[2],4);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,.6,.4,.0);
+ cairo_rectangle(c,10.5,h-20.5,widths[3],4);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,.3,.3,.3);
+ cairo_rectangle(c,10.5,h-16.5,widths[4],4);
+ cairo_fill(c);
+ cairo_set_source_rgb(c,.5,.5,.8);
+ cairo_rectangle(c,10.5,h-12.5,widths[5],4);
+ cairo_fill(c);
+ }
+ /*Master qi indicator[s]:*/
+ if(_dec->telemetry_qi&0x1){
+ cairo_text_extents_t extents;
+ char buffer[10];
+ int p;
+ int y;
+ p=0;
+ y=h-7.5;
+ if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
+ buffer[p++]=48+_dec->state.qis[0]%10;
+ if(_dec->state.nqis>=2){
+ buffer[p++]=' ';
+ if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
+ buffer[p++]=48+_dec->state.qis[1]%10;
+ }
+ if(_dec->state.nqis==3){
+ buffer[p++]=' ';
+ if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
+ buffer[p++]=48+_dec->state.qis[2]%10;
+ }
+ buffer[p++]='\0';
+ cairo_select_font_face(c,"sans",
+ CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
+ cairo_set_font_size(c,18);
+ cairo_text_extents(c,buffer,&extents);
+ cairo_set_source_rgb(c,1,1,1);
+ cairo_move_to(c,w-extents.x_advance-10,y);
+ cairo_show_text(c,buffer);
+ cairo_set_source_rgb(c,0,0,0);
+ cairo_move_to(c,w-extents.x_advance-10,y);
+ cairo_text_path(c,buffer);
+ cairo_set_line_width(c,.8);
+ cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
+ cairo_stroke(c);
+ }
+ cairo_destroy(c);
+ }
+ /*Out of the Cairo plane into the telemetry YUV buffer.*/
+ _ycbcr[0].data=_dec->telemetry_frame_data;
+ _ycbcr[0].stride=_ycbcr[0].width;
+ _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
+ _ycbcr[1].stride=_ycbcr[1].width;
+ _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
+ _ycbcr[2].stride=_ycbcr[2].width;
+ y_row=_ycbcr[0].data;
+ u_row=_ycbcr[1].data;
+ v_row=_ycbcr[2].data;
+ rgb_row=data;
+ /*This is one of the few places it's worth handling chroma on a
+ case-by-case basis.*/
+ switch(_dec->state.info.pixel_fmt){
+ case TH_PF_420:{
+ for(y=0;y>1]=OC_CLAMP255(u);
+ v_row[x>>1]=OC_CLAMP255(v);
+ }
+ y_row+=_ycbcr[0].stride<<1;
+ u_row+=_ycbcr[1].stride;
+ v_row+=_ycbcr[2].stride;
+ rgb_row+=cstride<<1;
+ }
+ }break;
+ case TH_PF_422:{
+ for(y=0;y>1]=OC_CLAMP255(u);
+ v_row[x>>1]=OC_CLAMP255(v);
+ }
+ y_row+=_ycbcr[0].stride;
+ u_row+=_ycbcr[1].stride;
+ v_row+=_ycbcr[2].stride;
+ rgb_row+=cstride;
+ }
+ }break;
+ /*case TH_PF_444:*/
+ default:{
+ for(y=0;yloop_filter_limits[qi]=(unsigned char)val;
}
- theorapackB_read(_opb,4,&val);
+ val=oc_pack_read(_opb,4);
nbits=(int)val+1;
for(qi=0;qi<64;qi++){
- theorapackB_read(_opb,nbits,&val);
+ val=oc_pack_read(_opb,nbits);
_qinfo->ac_scale[qi]=(ogg_uint16_t)val;
}
- theorapackB_read(_opb,4,&val);
+ val=oc_pack_read(_opb,4);
nbits=(int)val+1;
for(qi=0;qi<64;qi++){
- theorapackB_read(_opb,nbits,&val);
+ val=oc_pack_read(_opb,nbits);
_qinfo->dc_scale[qi]=(ogg_uint16_t)val;
}
- theorapackB_read(_opb,9,&val);
+ val=oc_pack_read(_opb,9);
nbase_mats=(int)val+1;
base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+ if(base_mats==NULL)return TH_EFAULT;
for(bmi=0;bmiqi_ranges[qti]+pli;
if(i>0){
- theorapackB_read1(_opb,&val);
+ val=oc_pack_read1(_opb);
if(!val){
int qtj;
int plj;
if(qti>0){
- theorapackB_read1(_opb,&val);
+ val=oc_pack_read1(_opb);
if(val){
qtj=qti-1;
plj=pli;
@@ -95,13 +95,13 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
continue;
}
}
- theorapackB_read(_opb,nbits,&val);
+ val=oc_pack_read(_opb,nbits);
indices[0]=(int)val;
for(qi=qri=0;qi<63;){
- theorapackB_read(_opb,oc_ilog(62-qi),&val);
+ val=oc_pack_read(_opb,oc_ilog(62-qi));
sizes[qri]=(int)val+1;
qi+=(int)val+1;
- theorapackB_read(_opb,nbits,&val);
+ val=oc_pack_read(_opb,nbits);
indices[++qri]=(int)val;
}
/*Note: The caller is responsible for cleaning up any partially
@@ -112,8 +112,20 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
}
qranges->nranges=qri;
qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+ if(qranges->sizes==NULL){
+ /*Note: The caller is responsible for cleaning up any partially
+ constructed qinfo.*/
+ _ogg_free(base_mats);
+ return TH_EFAULT;
+ }
memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+ if(qrbms==NULL){
+ /*Note: The caller is responsible for cleaning up any partially
+ constructed qinfo.*/
+ _ogg_free(base_mats);
+ return TH_EFAULT;
+ }
qranges->base_matrices=(const th_quant_base *)qrbms;
do{
bmi=indices[qri];
diff --git a/Engine/lib/libtheora/lib/dec/dequant.h b/Engine/lib/libtheora/lib/dequant.h
similarity index 82%
rename from Engine/lib/libtheora/lib/dec/dequant.h
rename to Engine/lib/libtheora/lib/dequant.h
index 928b509e5..ef25838e3 100644
--- a/Engine/lib/libtheora/lib/dec/dequant.h
+++ b/Engine/lib/libtheora/lib/dequant.h
@@ -5,21 +5,22 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
- last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $
+ last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_dequant_H)
# define _dequant_H (1)
# include "quant.h"
+# include "bitpack.h"
-int oc_quant_params_unpack(oggpack_buffer *_opb,
+int oc_quant_params_unpack(oc_pack_buf *_opb,
th_quant_info *_qinfo);
void oc_quant_params_clear(th_quant_info *_qinfo);
diff --git a/Engine/lib/libtheora/lib/enc/block_inline.h b/Engine/lib/libtheora/lib/enc/block_inline.h
deleted file mode 100644
index 008977095..000000000
--- a/Engine/lib/libtheora/lib/enc/block_inline.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: block_inline.h 14059 2007-10-28 23:43:27Z xiphmont $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static const ogg_int32_t MBOrderMap[4] = { 0, 2, 3, 1 };
-static const ogg_int32_t BlockOrderMap1[4][4] = {
- { 0, 1, 3, 2 },
- { 0, 2, 3, 1 },
- { 0, 2, 3, 1 },
- { 3, 2, 0, 1 }
-};
-
-static ogg_int32_t QuadMapToIndex1( ogg_int32_t (*BlockMap)[4][4],
- ogg_uint32_t SB, ogg_uint32_t MB,
- ogg_uint32_t B ){
- return BlockMap[SB][MBOrderMap[MB]][BlockOrderMap1[MB][B]];
-}
-
-static ogg_int32_t QuadMapToMBTopLeft( ogg_int32_t (*BlockMap)[4][4],
- ogg_uint32_t SB, ogg_uint32_t MB ){
- return BlockMap[SB][MBOrderMap[MB]][0];
-}
diff --git a/Engine/lib/libtheora/lib/enc/blockmap.c b/Engine/lib/libtheora/lib/enc/blockmap.c
deleted file mode 100644
index 5f3478fc2..000000000
--- a/Engine/lib/libtheora/lib/enc/blockmap.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: blockmap.c 14059 2007-10-28 23:43:27Z xiphmont $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static void CreateMapping ( ogg_int32_t (*BlockMap)[4][4],
- ogg_uint32_t FirstSB,
- ogg_uint32_t FirstFrag, ogg_uint32_t HFrags,
- ogg_uint32_t VFrags ){
- ogg_uint32_t i, j = 0;
- ogg_uint32_t xpos;
- ogg_uint32_t ypos;
- ogg_uint32_t SBrow, SBcol;
- ogg_uint32_t SBRows, SBCols;
- ogg_uint32_t MB, B;
-
- ogg_uint32_t SB=FirstSB;
- ogg_uint32_t FragIndex=FirstFrag;
-
- /* Set Super-Block dimensions */
- SBRows = VFrags/4 + ( VFrags%4 ? 1 : 0 );
- SBCols = HFrags/4 + ( HFrags%4 ? 1 : 0 );
-
- /* Map each Super-Block */
- for ( SBrow=0; SBrow
- FragIndex */
-
- /* Coded flag arrays and counters for them */
- unsigned char *SBCodedFlags;
- unsigned char *SBFullyFlags;
- unsigned char *MBCodedFlags;
- unsigned char *MBFullyFlags;
-
- /**********************************************************************/
- ogg_uint32_t EOB_Run;
-
- COORDINATE *FragCoordinates;
- MOTION_VECTOR MVector;
- ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction
- in half pixel MC */
- Q_LIST_ENTRY *quantized_list;
- ogg_int16_t *ReconDataBuffer;
- Q_LIST_ENTRY InvLastIntraDC;
- Q_LIST_ENTRY InvLastInterDC;
- Q_LIST_ENTRY LastIntraDC;
- Q_LIST_ENTRY LastInterDC;
-
- ogg_uint32_t BlocksToDecode; /* Blocks to be decoded this frame */
- ogg_uint32_t DcHuffChoice; /* Huffman table selection variables */
- unsigned char ACHuffChoice;
- ogg_uint32_t QuadMBListIndex;
-
- ogg_int32_t ByteCount;
-
- ogg_uint32_t bit_pattern;
- unsigned char bits_so_far;
- unsigned char NextBit;
- ogg_int32_t BitsLeft;
-
- ogg_int16_t *DequantBuffer;
-
- ogg_int32_t fp_quant_InterUV_coeffs[64];
- ogg_int32_t fp_quant_InterUV_round[64];
- ogg_int32_t fp_ZeroBinSize_InterUV[64];
-
- ogg_int16_t *TmpReconBuffer;
- ogg_int16_t *TmpDataBuffer;
-
- /* Loop filter bounding values */
- ogg_int16_t FiltBoundingValue[256];
-
- /* Naming convention for all quant matrices and related data structures:
- * Fields containing "Inter" in their name are for Inter frames, the
- * rest is Intra. */
-
- /* Dequantiser and rounding tables */
- ogg_uint16_t *QThreshTable;
- Q_LIST_ENTRY dequant_Y_coeffs[64];
- Q_LIST_ENTRY dequant_U_coeffs[64];
- Q_LIST_ENTRY dequant_V_coeffs[64];
- Q_LIST_ENTRY dequant_InterY_coeffs[64];
- Q_LIST_ENTRY dequant_InterU_coeffs[64];
- Q_LIST_ENTRY dequant_InterV_coeffs[64];
-
- Q_LIST_ENTRY *dequant_coeffs; /* currently active quantizer */
- unsigned int zigzag_index[64];
-
- HUFF_ENTRY *HuffRoot_VP3x[NUM_HUFF_TABLES];
- ogg_uint32_t *HuffCodeArray_VP3x[NUM_HUFF_TABLES];
- unsigned char *HuffCodeLengthArray_VP3x[NUM_HUFF_TABLES];
- const unsigned char *ExtraBitLengths_VP3x;
-
- th_quant_info quant_info;
- oc_quant_tables quant_tables[2][3];
-
- /* Quantiser and rounding tables */
- /* this is scheduled to be replaced a new mechanism
- that will simply reuse the dequantizer information. */
- ogg_int32_t fp_quant_Y_coeffs[64]; /* used in reiniting quantizers */
- ogg_int32_t fp_quant_U_coeffs[64];
- ogg_int32_t fp_quant_V_coeffs[64];
- ogg_int32_t fp_quant_Inter_Y_coeffs[64];
- ogg_int32_t fp_quant_Inter_U_coeffs[64];
- ogg_int32_t fp_quant_Inter_V_coeffs[64];
-
- ogg_int32_t fp_quant_Y_round[64];
- ogg_int32_t fp_quant_U_round[64];
- ogg_int32_t fp_quant_V_round[64];
- ogg_int32_t fp_quant_Inter_Y_round[64];
- ogg_int32_t fp_quant_Inter_U_round[64];
- ogg_int32_t fp_quant_Inter_V_round[64];
-
- ogg_int32_t fp_ZeroBinSize_Y[64];
- ogg_int32_t fp_ZeroBinSize_U[64];
- ogg_int32_t fp_ZeroBinSize_V[64];
- ogg_int32_t fp_ZeroBinSize_Inter_Y[64];
- ogg_int32_t fp_ZeroBinSize_Inter_U[64];
- ogg_int32_t fp_ZeroBinSize_Inter_V[64];
-
- ogg_int32_t *fquant_coeffs;
- ogg_int32_t *fquant_round;
- ogg_int32_t *fquant_ZbSize;
-
- /* Predictor used in choosing entropy table for decoding block patterns. */
- unsigned char BlockPatternPredictor;
-
- short Modifier[4][512];
- short *ModifierPointer[4];
-
- unsigned char *DataOutputInPtr;
-
- DspFunctions dsp; /* Selected functions for this platform */
-
-};
-
-/* Encoder (Compressor) instance -- installed in a theora_state */
-typedef struct CP_INSTANCE {
- /*This structure must be first.
- It contains entry points accessed by the decoder library's API wrapper, and
- is the only assumption that library makes about our internal format.*/
- oc_state_dispatch_vtbl dispatch_vtbl;
-
- /* Compressor Configuration */
- SCAN_CONFIG_DATA ScanConfig;
- CONFIG_TYPE2 Configuration;
- int GoldenFrameEnabled;
- int InterPrediction;
- int MotionCompensation;
-
- ogg_uint32_t LastKeyFrame ;
- ogg_int32_t DropCount ;
- ogg_int32_t MaxConsDroppedFrames ;
- ogg_int32_t DropFrameTriggerBytes;
- int DropFrameCandidate;
-
- /* Compressor Statistics */
- double TotErrScore;
- ogg_int64_t KeyFrameCount; /* Count of key frames. */
- ogg_int64_t TotKeyFrameBytes;
- ogg_uint32_t LastKeyFrameSize;
- ogg_uint32_t PriorKeyFrameSize[KEY_FRAME_CONTEXT];
- ogg_uint32_t PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
- ogg_int32_t FrameQuality[6];
- int DecoderErrorCode; /* Decoder error flag. */
- ogg_int32_t ThreshMapThreshold;
- ogg_int32_t TotalMotionScore;
- ogg_int64_t TotalByteCount;
- ogg_int32_t FixedQ;
-
- /* Frame Statistics */
- signed char InterCodeCount;
- ogg_int64_t CurrentFrame;
- ogg_int64_t CarryOver ;
- ogg_uint32_t LastFrameSize;
- ogg_uint32_t FrameBitCount;
- int ThisIsFirstFrame;
- int ThisIsKeyFrame;
-
- ogg_int32_t MotionScore;
- ogg_uint32_t RegulationBlocks;
- ogg_int32_t RecoveryMotionScore;
- int RecoveryBlocksAdded ;
- double ProportionRecBlocks;
- double MaxRecFactor ;
-
- /* Rate Targeting variables. */
- ogg_uint32_t ThisFrameTargetBytes;
- double BpbCorrectionFactor;
-
- /* Up regulation variables */
- ogg_uint32_t FinalPassLastPos; /* Used to regulate a final
- unrestricted high quality
- pass. */
- ogg_uint32_t LastEndSB; /* Where we were in the loop
- last time. */
- ogg_uint32_t ResidueLastEndSB; /* Where we were in the residue
- update loop last time. */
-
- /* Controlling Block Selection */
- ogg_uint32_t MVChangeFactor;
- ogg_uint32_t FourMvChangeFactor;
- ogg_uint32_t MinImprovementForNewMV;
- ogg_uint32_t ExhaustiveSearchThresh;
- ogg_uint32_t MinImprovementForFourMV;
- ogg_uint32_t FourMVThreshold;
-
- /* Module shared data structures. */
- ogg_int32_t frame_target_rate;
- ogg_int32_t BaseLineFrameTargetRate;
- ogg_int32_t min_blocks_per_frame;
- ogg_uint32_t tot_bytes_old;
-
- /*********************************************************************/
- /* Frames Used in the selecetive convolution filtering of the Y plane. */
- unsigned char *ConvDestBuffer;
- YUV_BUFFER_ENTRY *yuv0ptr;
- YUV_BUFFER_ENTRY *yuv1ptr;
- /*********************************************************************/
-
- /*********************************************************************/
- /* Token Buffers */
- ogg_uint32_t *OptimisedTokenListEb; /* Optimised token list extra bits */
- unsigned char *OptimisedTokenList; /* Optimised token list. */
- unsigned char *OptimisedTokenListHi; /* Optimised token list huffman
- table index */
-
- unsigned char *OptimisedTokenListPl; /* Plane to which the token
- belongs Y = 0 or UV = 1 */
- ogg_int32_t OptimisedTokenCount; /* Count of Optimized tokens */
- ogg_uint32_t RunHuffIndex; /* Huffman table in force at
- the start of a run */
- ogg_uint32_t RunPlaneIndex; /* The plane (Y=0 UV=1) to
- which the first token in
- an EOB run belonged. */
-
-
- ogg_uint32_t TotTokenCount;
- ogg_int32_t TokensToBeCoded;
- ogg_int32_t TokensCoded;
- /********************************************************************/
-
- /* SuperBlock, MacroBLock and Fragment Information */
- /* Coded flag arrays and counters for them */
- unsigned char *PartiallyCodedFlags;
- unsigned char *PartiallyCodedMbPatterns;
- unsigned char *UncodedMbFlags;
-
- unsigned char *extra_fragments; /* extra updates not
- recommended by pre-processor */
- ogg_int16_t *OriginalDC;
-
- ogg_uint32_t *FragmentLastQ; /* Array used to keep track of
- quality at which each
- fragment was last
- updated. */
- unsigned char *FragTokens;
- ogg_uint32_t *FragTokenCounts; /* Number of tokens per fragment */
-
- ogg_uint32_t *RunHuffIndices;
- ogg_uint32_t *LastCodedErrorScore;
- ogg_uint32_t *ModeList;
- MOTION_VECTOR *MVList;
-
- unsigned char *BlockCodedFlags;
-
- ogg_uint32_t MvListCount;
- ogg_uint32_t ModeListCount;
-
-
- unsigned char *DataOutputBuffer;
- /*********************************************************************/
-
- ogg_uint32_t RunLength;
- ogg_uint32_t MaxBitTarget; /* Cut off target for rate capping */
- double BitRateCapFactor; /* Factor relating delta frame target
- to cut off target. */
-
- unsigned char MBCodingMode; /* Coding mode flags */
-
- ogg_int32_t MVPixelOffsetY[MAX_SEARCH_SITES];
- ogg_uint32_t InterTripOutThresh;
- unsigned char MVEnabled;
- ogg_uint32_t MotionVectorSearchCount;
- ogg_uint32_t FrameMVSearcOunt;
- ogg_int32_t MVSearchSteps;
- ogg_int32_t MVOffsetX[MAX_SEARCH_SITES];
- ogg_int32_t MVOffsetY[MAX_SEARCH_SITES];
- ogg_int32_t HalfPixelRef2Offset[9]; /* Offsets for half pixel
- compensation */
- signed char HalfPixelXOffset[9]; /* Half pixel MV offsets for X */
- signed char HalfPixelYOffset[9]; /* Half pixel MV offsets for Y */
-
- ogg_uint32_t bit_pattern ;
- unsigned char bits_so_far ;
- ogg_uint32_t lastval ;
- ogg_uint32_t lastrun ;
-
- Q_LIST_ENTRY *quantized_list;
-
- MOTION_VECTOR MVector;
- ogg_uint32_t TempBitCount;
- ogg_int16_t *DCT_codes; /* Buffer that stores the result of
- Forward DCT */
- ogg_int16_t *DCTDataBuffer; /* Input data buffer for Forward DCT */
-
- /* Motion compensation related variables */
- ogg_uint32_t MvMaxExtent;
-
- double QTargetModifier[Q_TABLE_SIZE];
-
- /* instances (used for reconstructing buffers and to hold tokens etc.) */
- PP_INSTANCE pp; /* preprocessor */
- PB_INSTANCE pb; /* playback */
-
- /* ogg bitpacker for use in packet coding, other API state */
- oggpack_buffer *oggbuffer;
- int readyflag;
- int packetflag;
- int doneflag;
-
- DspFunctions dsp; /* Selected functions for this platform */
-
-} CP_INSTANCE;
-
-#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
-
-extern void ConfigurePP( PP_INSTANCE *ppi, int Level ) ;
-extern ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi,
- ogg_uint32_t * KFIndicator );
-
-extern void ClearPPInstance(PP_INSTANCE *ppi);
-extern void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs);
-extern void InitPBInstance(PB_INSTANCE *pbi);
-extern void ClearPBInstance(PB_INSTANCE *pbi);
-
-extern void IDct1( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData );
-
-extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
- ogg_int16_t * ChangePtr, ogg_uint32_t LineStep );
-
-extern void ReconInter( PB_INSTANCE *pbi, unsigned char * ReconPtr,
- unsigned char * RefPtr, ogg_int16_t * ChangePtr,
- ogg_uint32_t LineStep ) ;
-
-extern void ReconInterHalfPixel2( PB_INSTANCE *pbi, unsigned char * ReconPtr,
- unsigned char * RefPtr1,
- unsigned char * RefPtr2,
- ogg_int16_t * ChangePtr,
- ogg_uint32_t LineStep ) ;
-
-extern void SetupLoopFilter(PB_INSTANCE *pbi);
-extern void CopyBlock(unsigned char *src,
- unsigned char *dest,
- unsigned int srcstride);
-extern void LoopFilter(PB_INSTANCE *pbi);
-extern void ReconRefFrames (PB_INSTANCE *pbi);
-extern void ExpandToken( Q_LIST_ENTRY * ExpandedBlock,
- unsigned char * CoeffIndex, ogg_uint32_t Token,
- ogg_int32_t ExtraBits );
-extern void ClearDownQFragData(PB_INSTANCE *pbi);
-
-extern void select_quantiser (PB_INSTANCE *pbi, int type);
-
-extern void quantize( PB_INSTANCE *pbi,
- ogg_int16_t * DCT_block,
- Q_LIST_ENTRY * quantized_list);
-extern void UpdateQ( PB_INSTANCE *pbi, int NewQIndex );
-extern void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ );
-extern void fdct_short ( ogg_int16_t * InputData, ogg_int16_t * OutputData );
-extern ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
- ogg_int32_t FragIndex);
-extern void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
- ogg_uint32_t PixelsPerLine ) ;
-extern void ClearFragmentInfo(PB_INSTANCE * pbi);
-extern void InitFragmentInfo(PB_INSTANCE * pbi);
-extern void ClearFrameInfo(PB_INSTANCE * pbi);
-extern void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize);
-extern void InitializeFragCoordinates(PB_INSTANCE *pbi);
-extern void InitFrameDetails(PB_INSTANCE *pbi);
-extern void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer *opb);
-extern void InitQTables( PB_INSTANCE *pbi );
-extern void quant_tables_init( PB_INSTANCE *pbi, const th_quant_info *qinfo);
-extern void InitHuffmanSet( PB_INSTANCE *pbi );
-extern void ClearHuffmanSet( PB_INSTANCE *pbi );
-extern int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb);
-extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
- oggpack_buffer *opb);
-extern void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci);
-extern void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]);
-extern int ReadFilterTables(codec_setup_info *ci, oggpack_buffer *opb);
-extern void QuadDecodeDisplayFragments ( PB_INSTANCE *pbi );
-extern void PackAndWriteDFArray( CP_INSTANCE *cpi );
-extern void UpdateFragQIndex(PB_INSTANCE *pbi);
-extern void PostProcess(PB_INSTANCE *pbi);
-extern void InitMotionCompensation ( CP_INSTANCE *cpi );
-extern ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine ) ;
-extern ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
- unsigned char * SrcPtr,
- unsigned char * RefPtr,
- ogg_uint32_t FragIndex,
- ogg_int32_t LastXMV,
- ogg_int32_t LastYMV,
- ogg_uint32_t PixelsPerLine ) ;
-extern void WriteFrameHeader( CP_INSTANCE *cpi) ;
-extern ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- ogg_int32_t *MVPixelOffset,
- MOTION_VECTOR *MV );
-extern ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- MOTION_VECTOR *MV );
-extern ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- MOTION_VECTOR *MV ) ;
-extern ogg_uint32_t EncodeData(CP_INSTANCE *cpi);
-extern ogg_uint32_t PickIntra( CP_INSTANCE *cpi,
- ogg_uint32_t SBRows,
- ogg_uint32_t SBCols);
-extern ogg_uint32_t PickModes(CP_INSTANCE *cpi,
- ogg_uint32_t SBRows,
- ogg_uint32_t SBCols,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t *InterError,
- ogg_uint32_t *IntraError);
-
-extern CODING_MODE FrArrayUnpackMode(PB_INSTANCE *pbi);
-extern void CreateBlockMapping ( ogg_int32_t (*BlockMap)[4][4],
- ogg_uint32_t YSuperBlocks,
- ogg_uint32_t UVSuperBlocks,
- ogg_uint32_t HFrags, ogg_uint32_t VFrags );
-extern void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
- ogg_int32_t RecoveryBlocks ) ;
-extern void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore );
-extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
-
-extern void UpdateUMVBorder( PB_INSTANCE *pbi,
- unsigned char * DestReconPtr );
-extern void PInitFrameInfo(PP_INSTANCE * ppi);
-
-extern double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ );
-extern void ClearTmpBuffers(PB_INSTANCE * pbi);
-extern void InitTmpBuffers(PB_INSTANCE * pbi);
-extern void ScanYUVInit( PP_INSTANCE * ppi,
- SCAN_CONFIG_DATA * ScanConfigPtr);
-
-#endif /* ENCODER_INTERNAL_H */
diff --git a/Engine/lib/libtheora/lib/enc/dct.c b/Engine/lib/libtheora/lib/enc/dct.c
deleted file mode 100644
index 29bf8f269..000000000
--- a/Engine/lib/libtheora/lib/enc/dct.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dct.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-#include "dsp.h"
-#include "../cpu.h"
-
-static ogg_int32_t xC1S7 = 64277;
-static ogg_int32_t xC2S6 = 60547;
-static ogg_int32_t xC3S5 = 54491;
-static ogg_int32_t xC4S4 = 46341;
-static ogg_int32_t xC5S3 = 36410;
-static ogg_int32_t xC6S2 = 25080;
-static ogg_int32_t xC7S1 = 12785;
-
-#define SIGNBITDUPPED(X) ((signed )(((X) & 0x80000000)) >> 31)
-#define DOROUND(X) ( (SIGNBITDUPPED(X) & (0xffff)) + (X) )
-
-static void fdct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ){
- int loop;
-
- ogg_int32_t is07, is12, is34, is56;
- ogg_int32_t is0734, is1256;
- ogg_int32_t id07, id12, id34, id56;
-
- ogg_int32_t irot_input_x, irot_input_y;
- ogg_int32_t icommon_product1; /* Re-used product (c4s4 * (s12 - s56)). */
- ogg_int32_t icommon_product2; /* Re-used product (c4s4 * (d12 + d56)). */
-
- ogg_int32_t temp1, temp2; /* intermediate variable for computation */
-
- ogg_int32_t InterData[64];
- ogg_int32_t *ip = InterData;
- ogg_int16_t * op = OutputData;
- for (loop = 0; loop < 8; loop++){
- /* Pre calculate some common sums and differences. */
- is07 = InputData[0] + InputData[7];
- is12 = InputData[1] + InputData[2];
- is34 = InputData[3] + InputData[4];
- is56 = InputData[5] + InputData[6];
-
- id07 = InputData[0] - InputData[7];
- id12 = InputData[1] - InputData[2];
- id34 = InputData[3] - InputData[4];
- id56 = InputData[5] - InputData[6];
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- /* Pre-Calculate some common product terms. */
- icommon_product1 = xC4S4*(is12 - is56);
- icommon_product1 = DOROUND(icommon_product1);
- icommon_product1>>=16;
-
- icommon_product2 = xC4S4*(id12 + id56);
- icommon_product2 = DOROUND(icommon_product2);
- icommon_product2>>=16;
-
-
- ip[0] = (xC4S4*(is0734 + is1256));
- ip[0] = DOROUND(ip[0]);
- ip[0] >>= 16;
-
- ip[4] = (xC4S4*(is0734 - is1256));
- ip[4] = DOROUND(ip[4]);
- ip[4] >>= 16;
-
- /* Define inputs to rotation for outputs 2 and 6 */
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- /* Apply rotation for outputs 2 and 6. */
- temp1=xC6S2*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC2S6*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[2] = temp1 + temp2;
-
- temp1=xC6S2*irot_input_y;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC2S6*irot_input_x ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[6] = temp1 -temp2 ;
-
- /* Define inputs to rotation for outputs 1 and 7 */
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -( id34 + icommon_product2 );
-
- /* Apply rotation for outputs 1 and 7. */
-
- temp1=xC1S7*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC7S1*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[1] = temp1 - temp2;
-
- temp1=xC7S1*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC1S7*irot_input_y ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[7] = temp1 + temp2 ;
-
- /* Define inputs to rotation for outputs 3 and 5 */
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- /* Apply rotation for outputs 3 and 5. */
- temp1=xC3S5*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC5S3*irot_input_y ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[3] = temp1 - temp2 ;
-
- temp1=xC5S3*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC3S5*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- ip[5] = temp1 + temp2;
-
- /* Increment data pointer for next row. */
- InputData += 8 ;
- ip += 8; /* advance pointer to next row */
-
- }
-
-
- /* Performed DCT on rows, now transform the columns */
- ip = InterData;
- for (loop = 0; loop < 8; loop++){
- /* Pre calculate some common sums and differences. */
- is07 = ip[0 * 8] + ip[7 * 8];
- is12 = ip[1 * 8] + ip[2 * 8];
- is34 = ip[3 * 8] + ip[4 * 8];
- is56 = ip[5 * 8] + ip[6 * 8];
-
- id07 = ip[0 * 8] - ip[7 * 8];
- id12 = ip[1 * 8] - ip[2 * 8];
- id34 = ip[3 * 8] - ip[4 * 8];
- id56 = ip[5 * 8] - ip[6 * 8];
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- /* Pre-Calculate some common product terms. */
- icommon_product1 = xC4S4*(is12 - is56) ;
- icommon_product2 = xC4S4*(id12 + id56) ;
- icommon_product1 = DOROUND(icommon_product1);
- icommon_product2 = DOROUND(icommon_product2);
- icommon_product1>>=16;
- icommon_product2>>=16;
-
-
- temp1 = xC4S4*(is0734 + is1256) ;
- temp2 = xC4S4*(is0734 - is1256) ;
- temp1 = DOROUND(temp1);
- temp2 = DOROUND(temp2);
- temp1>>=16;
- temp2>>=16;
- op[0*8] = (ogg_int16_t) temp1;
- op[4*8] = (ogg_int16_t) temp2;
-
- /* Define inputs to rotation for outputs 2 and 6 */
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- /* Apply rotation for outputs 2 and 6. */
- temp1=xC6S2*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC2S6*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[2*8] = (ogg_int16_t) (temp1 + temp2);
-
- temp1=xC6S2*irot_input_y;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC2S6*irot_input_x ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[6*8] = (ogg_int16_t) (temp1 -temp2) ;
-
- /* Define inputs to rotation for outputs 1 and 7 */
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -( id34 + icommon_product2 );
-
- /* Apply rotation for outputs 1 and 7. */
- temp1=xC1S7*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC7S1*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[1*8] = (ogg_int16_t) (temp1 - temp2);
-
- temp1=xC7S1*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC1S7*irot_input_y ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[7*8] = (ogg_int16_t) (temp1 + temp2);
-
- /* Define inputs to rotation for outputs 3 and 5 */
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- /* Apply rotation for outputs 3 and 5. */
- temp1=xC3S5*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC5S3*irot_input_y ;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[3*8] = (ogg_int16_t) (temp1 - temp2) ;
-
- temp1=xC5S3*irot_input_x;
- temp1=DOROUND(temp1);
- temp1>>=16;
- temp2=xC3S5*irot_input_y;
- temp2=DOROUND(temp2);
- temp2>>=16;
- op[5*8] = (ogg_int16_t) (temp1 + temp2);
-
- /* Increment data pointer for next column. */
- ip ++;
- op ++;
- }
-}
-
-void dsp_dct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
- funcs->fdct_short = fdct_short__c;
- dsp_dct_decode_init(funcs, cpu_flags);
- dsp_idct_init(funcs, cpu_flags);
-#if defined(USE_ASM)
- if (cpu_flags & OC_CPU_X86_MMX) {
- dsp_mmx_fdct_init(funcs);
- }
-#endif
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/dct_decode.c b/Engine/lib/libtheora/lib/enc/dct_decode.c
deleted file mode 100644
index e27611610..000000000
--- a/Engine/lib/libtheora/lib/enc/dct_decode.c
+++ /dev/null
@@ -1,941 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dct_decode.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-#include "quant_lookup.h"
-
-
-#define GOLDEN_FRAME_THRESH_Q 50
-#define PUR 8
-#define PU 4
-#define PUL 2
-#define PL 1
-#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
-
-
-static const int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
-
-static void SetupBoundingValueArray_Generic(ogg_int16_t *BoundingValuePtr,
- ogg_int32_t FLimit){
-
- ogg_int32_t i;
-
- /* Set up the bounding value array. */
- memset ( BoundingValuePtr, 0, (256*sizeof(*BoundingValuePtr)) );
- for ( i = 0; i < FLimit; i++ ){
- BoundingValuePtr[127-i-FLimit] = (-FLimit+i);
- BoundingValuePtr[127-i] = -i;
- BoundingValuePtr[127+i] = i;
- BoundingValuePtr[127+i+FLimit] = FLimit-i;
- }
-}
-
-static void ExpandKFBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber ){
- ogg_uint32_t ReconPixelsPerLine;
- ogg_int32_t ReconPixelIndex;
-
- /* Select the appropriate inverse Q matrix and line stride */
- if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ){
- ReconPixelsPerLine = pbi->YStride;
- pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
- }else if ( FragmentNumber<(ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) ){
- ReconPixelsPerLine = pbi->UVStride;
- pbi->dequant_coeffs = pbi->dequant_U_coeffs;
- }else{
- ReconPixelsPerLine = pbi->UVStride;
- pbi->dequant_coeffs = pbi->dequant_V_coeffs;
- }
-
- /* Set up pointer into the quantisation buffer. */
- pbi->quantized_list = &pbi->QFragData[FragmentNumber][0];
-
- /* Invert quantisation and DCT to get pixel data. */
- switch(pbi->FragCoefEOB[FragmentNumber]){
- case 0:case 1:
- IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- case 2: case 3:
- dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- default:
- dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- }
-
- /* Convert fragment number to a pixel offset in a reconstruction buffer. */
- ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber];
-
- /* Get the pixel index for the first pixel in the fragment. */
- dsp_recon_intra8x8 (pbi->dsp, (unsigned char *)(&pbi->ThisFrameRecon[ReconPixelIndex]),
- (ogg_int16_t *)pbi->ReconDataBuffer, ReconPixelsPerLine);
-}
-
-static void ExpandBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber){
- unsigned char *LastFrameRecPtr; /* Pointer into previous frame
- reconstruction. */
- unsigned char *LastFrameRecPtr2; /* Pointer into previous frame
- reconstruction for 1/2 pixel MC. */
-
- ogg_uint32_t ReconPixelsPerLine; /* Pixels per line */
- ogg_int32_t ReconPixelIndex; /* Offset for block into a
- reconstruction buffer */
- ogg_int32_t ReconPtr2Offset; /* Offset for second
- reconstruction in half pixel
- MC */
- ogg_int32_t MVOffset; /* Baseline motion vector offset */
- ogg_int32_t MvShift ; /* Shift to correct to 1/2 or 1/4 pixel */
- ogg_int32_t MvModMask; /* Mask to determine whether 1/2
- pixel is used */
-
- /* Get coding mode for this block */
- if ( pbi->FrameType == KEY_FRAME ){
- pbi->CodingMode = CODE_INTRA;
- }else{
- /* Get Motion vector and mode for this block. */
- pbi->CodingMode = pbi->FragCodingMethod[FragmentNumber];
- }
-
- /* Select the appropriate inverse Q matrix and line stride */
- if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ) {
- ReconPixelsPerLine = pbi->YStride;
- MvShift = 1;
- MvModMask = 0x00000001;
-
- /* Select appropriate dequantiser matrix. */
- if ( pbi->CodingMode == CODE_INTRA )
- pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
- else
- pbi->dequant_coeffs = pbi->dequant_InterY_coeffs;
- }else{
- ReconPixelsPerLine = pbi->UVStride;
- MvShift = 2;
- MvModMask = 0x00000003;
-
- /* Select appropriate dequantiser matrix. */
- if ( pbi->CodingMode == CODE_INTRA )
- if ( FragmentNumber <
- (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) )
- pbi->dequant_coeffs = pbi->dequant_U_coeffs;
- else
- pbi->dequant_coeffs = pbi->dequant_V_coeffs;
- else
- if ( FragmentNumber <
- (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) )
- pbi->dequant_coeffs = pbi->dequant_InterU_coeffs;
- else
- pbi->dequant_coeffs = pbi->dequant_InterV_coeffs;
- }
-
- /* Set up pointer into the quantisation buffer. */
- pbi->quantized_list = &pbi->QFragData[FragmentNumber][0];
-
- /* Invert quantisation and DCT to get pixel data. */
- switch(pbi->FragCoefEOB[FragmentNumber]){
- case 0:case 1:
- IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- case 2: case 3:
- dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- case 4:case 5:case 6:case 7:case 8: case 9:case 10:
- dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- break;
- default:
- dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
- }
-
- /* Convert fragment number to a pixel offset in a reconstruction buffer. */
- ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber];
-
- /* Action depends on decode mode. */
- if ( pbi->CodingMode == CODE_INTER_NO_MV ){
- /* Inter with no motion vector */
- /* Reconstruct the pixel data using the last frame reconstruction
- and change data when the motion vector is (0,0), the recon is
- based on the lastframe without loop filtering---- for testing */
- dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
- &pbi->LastFrameRecon[ReconPixelIndex],
- pbi->ReconDataBuffer, ReconPixelsPerLine);
- }else if ( ModeUsesMC[pbi->CodingMode] ) {
- /* The mode uses a motion vector. */
- /* Get vector from list */
- pbi->MVector.x = pbi->FragMVect[FragmentNumber].x;
- pbi->MVector.y = pbi->FragMVect[FragmentNumber].y;
-
- /* Work out the base motion vector offset and the 1/2 pixel offset
- if any. For the U and V planes the MV specifies 1/4 pixel
- accuracy. This is adjusted to 1/2 pixel as follows ( 0->0,
- 1/4->1/2, 1/2->1/2, 3/4->1/2 ). */
- MVOffset = 0;
- ReconPtr2Offset = 0;
- if ( pbi->MVector.x > 0 ){
- MVOffset = pbi->MVector.x >> MvShift;
- if ( pbi->MVector.x & MvModMask )
- ReconPtr2Offset += 1;
- } else if ( pbi->MVector.x < 0 ) {
- MVOffset -= (-pbi->MVector.x) >> MvShift;
- if ( (-pbi->MVector.x) & MvModMask )
- ReconPtr2Offset -= 1;
- }
-
- if ( pbi->MVector.y > 0 ){
- MVOffset += (pbi->MVector.y >> MvShift) * ReconPixelsPerLine;
- if ( pbi->MVector.y & MvModMask )
- ReconPtr2Offset += ReconPixelsPerLine;
- } else if ( pbi->MVector.y < 0 ){
- MVOffset -= ((-pbi->MVector.y) >> MvShift) * ReconPixelsPerLine;
- if ( (-pbi->MVector.y) & MvModMask )
- ReconPtr2Offset -= ReconPixelsPerLine;
- }
-
- /* Set up the first of the two reconstruction buffer pointers. */
- if ( pbi->CodingMode==CODE_GOLDEN_MV ) {
- LastFrameRecPtr = &pbi->GoldenFrame[ReconPixelIndex] + MVOffset;
- }else{
- LastFrameRecPtr = &pbi->LastFrameRecon[ReconPixelIndex] + MVOffset;
- }
-
- /* Set up the second of the two reconstruction pointers. */
- LastFrameRecPtr2 = LastFrameRecPtr + ReconPtr2Offset;
-
- /* Select the appropriate reconstruction function */
- if ( (int)(LastFrameRecPtr - LastFrameRecPtr2) == 0 ) {
- /* Reconstruct the pixel dats from the reference frame and change data
- (no half pixel in this case as the two references were the same. */
- dsp_recon_inter8x8 (pbi->dsp,
- &pbi->ThisFrameRecon[ReconPixelIndex],
- LastFrameRecPtr, pbi->ReconDataBuffer,
- ReconPixelsPerLine);
- }else{
- /* Fractional pixel reconstruction. */
- /* Note that we only use two pixels per reconstruction even for
- the diagonal. */
- dsp_recon_inter8x8_half(pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
- LastFrameRecPtr, LastFrameRecPtr2,
- pbi->ReconDataBuffer, ReconPixelsPerLine);
- }
- } else if ( pbi->CodingMode == CODE_USING_GOLDEN ){
- /* Golden frame with motion vector */
- /* Reconstruct the pixel data using the golden frame
- reconstruction and change data */
- dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
- &pbi->GoldenFrame[ ReconPixelIndex ],
- pbi->ReconDataBuffer, ReconPixelsPerLine);
- } else {
- /* Simple Intra coding */
- /* Get the pixel index for the first pixel in the fragment. */
- dsp_recon_intra8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
- pbi->ReconDataBuffer, ReconPixelsPerLine);
- }
-}
-
-static void UpdateUMV_HBorders( PB_INSTANCE *pbi,
- unsigned char * DestReconPtr,
- ogg_uint32_t PlaneFragOffset ) {
- ogg_uint32_t i;
- ogg_uint32_t PixelIndex;
-
- ogg_uint32_t PlaneStride;
- ogg_uint32_t BlockVStep;
- ogg_uint32_t PlaneFragments;
- ogg_uint32_t LineFragments;
- ogg_uint32_t PlaneBorderWidth;
-
- unsigned char *SrcPtr1;
- unsigned char *SrcPtr2;
- unsigned char *DestPtr1;
- unsigned char *DestPtr2;
-
- /* Work out various plane specific values */
- if ( PlaneFragOffset == 0 ) {
- /* Y Plane */
- BlockVStep = (pbi->YStride *
- (VFRAGPIXELS - 1));
- PlaneStride = pbi->YStride;
- PlaneBorderWidth = UMV_BORDER;
- PlaneFragments = pbi->YPlaneFragments;
- LineFragments = pbi->HFragments;
- }else{
- /* U or V plane. */
- BlockVStep = (pbi->UVStride *
- (VFRAGPIXELS - 1));
- PlaneStride = pbi->UVStride;
- PlaneBorderWidth = UMV_BORDER / 2;
- PlaneFragments = pbi->UVPlaneFragments;
- LineFragments = pbi->HFragments / 2;
- }
-
- /* Setup the source and destination pointers for the top and bottom
- borders */
- PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset];
- SrcPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ];
- DestPtr1 = SrcPtr1 - (PlaneBorderWidth * PlaneStride);
-
- PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset +
- PlaneFragments - LineFragments] +
- BlockVStep;
- SrcPtr2 = &DestReconPtr[ PixelIndex - PlaneBorderWidth];
- DestPtr2 = SrcPtr2 + PlaneStride;
-
- /* Now copy the top and bottom source lines into each line of the
- respective borders */
- for ( i = 0; i < PlaneBorderWidth; i++ ) {
- memcpy( DestPtr1, SrcPtr1, PlaneStride );
- memcpy( DestPtr2, SrcPtr2, PlaneStride );
- DestPtr1 += PlaneStride;
- DestPtr2 += PlaneStride;
- }
-}
-
-static void UpdateUMV_VBorders( PB_INSTANCE *pbi,
- unsigned char * DestReconPtr,
- ogg_uint32_t PlaneFragOffset ){
- ogg_uint32_t i;
- ogg_uint32_t PixelIndex;
-
- ogg_uint32_t PlaneStride;
- ogg_uint32_t LineFragments;
- ogg_uint32_t PlaneBorderWidth;
- ogg_uint32_t PlaneHeight;
-
- unsigned char *SrcPtr1;
- unsigned char *SrcPtr2;
- unsigned char *DestPtr1;
- unsigned char *DestPtr2;
-
- /* Work out various plane specific values */
- if ( PlaneFragOffset == 0 ) {
- /* Y Plane */
- PlaneStride = pbi->YStride;
- PlaneBorderWidth = UMV_BORDER;
- LineFragments = pbi->HFragments;
- PlaneHeight = pbi->info.height;
- }else{
- /* U or V plane. */
- PlaneStride = pbi->UVStride;
- PlaneBorderWidth = UMV_BORDER / 2;
- LineFragments = pbi->HFragments / 2;
- PlaneHeight = pbi->info.height / 2;
- }
-
- /* Setup the source data values and destination pointers for the
- left and right edge borders */
- PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset];
- SrcPtr1 = &DestReconPtr[ PixelIndex ];
- DestPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ];
-
- PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset +
- LineFragments - 1] +
- (HFRAGPIXELS - 1);
- SrcPtr2 = &DestReconPtr[ PixelIndex ];
- DestPtr2 = &DestReconPtr[ PixelIndex + 1 ];
-
- /* Now copy the top and bottom source lines into each line of the
- respective borders */
- for ( i = 0; i < PlaneHeight; i++ ) {
- memset( DestPtr1, SrcPtr1[0], PlaneBorderWidth );
- memset( DestPtr2, SrcPtr2[0], PlaneBorderWidth );
- SrcPtr1 += PlaneStride;
- SrcPtr2 += PlaneStride;
- DestPtr1 += PlaneStride;
- DestPtr2 += PlaneStride;
- }
-}
-
-void UpdateUMVBorder( PB_INSTANCE *pbi,
- unsigned char * DestReconPtr ) {
- ogg_uint32_t PlaneFragOffset;
-
- /* Y plane */
- PlaneFragOffset = 0;
- UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
- UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-
- /* Then the U and V Planes */
- PlaneFragOffset = pbi->YPlaneFragments;
- UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
- UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-
- PlaneFragOffset = pbi->YPlaneFragments + pbi->UVPlaneFragments;
- UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
- UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-}
-
-static void CopyRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr,
- unsigned char * SrcReconPtr ) {
- ogg_uint32_t i;
- ogg_uint32_t PlaneLineStep; /* Pixels per line */
- ogg_uint32_t PixelIndex;
-
- unsigned char *SrcPtr; /* Pointer to line of source image data */
- unsigned char *DestPtr; /* Pointer to line of destination image data */
-
- /* Copy over only updated blocks.*/
-
- /* First Y plane */
- PlaneLineStep = pbi->YStride;
- for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
- if ( pbi->display_fragments[i] ) {
- PixelIndex = pbi->recon_pixel_index_table[i];
- SrcPtr = &SrcReconPtr[ PixelIndex ];
- DestPtr = &DestReconPtr[ PixelIndex ];
-
- dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
- }
- }
-
- /* Then U and V */
- PlaneLineStep = pbi->UVStride;
- for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) {
- if ( pbi->display_fragments[i] ) {
- PixelIndex = pbi->recon_pixel_index_table[i];
- SrcPtr = &SrcReconPtr[ PixelIndex ];
- DestPtr = &DestReconPtr[ PixelIndex ];
-
- dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-
- }
- }
-}
-
-static void CopyNotRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr,
- unsigned char * SrcReconPtr ) {
- ogg_uint32_t i;
- ogg_uint32_t PlaneLineStep; /* Pixels per line */
- ogg_uint32_t PixelIndex;
-
- unsigned char *SrcPtr; /* Pointer to line of source image data */
- unsigned char *DestPtr; /* Pointer to line of destination image data*/
-
- /* Copy over only updated blocks. */
-
- /* First Y plane */
- PlaneLineStep = pbi->YStride;
- for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
- if ( !pbi->display_fragments[i] ) {
- PixelIndex = pbi->recon_pixel_index_table[i];
- SrcPtr = &SrcReconPtr[ PixelIndex ];
- DestPtr = &DestReconPtr[ PixelIndex ];
-
- dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
- }
- }
-
- /* Then U and V */
- PlaneLineStep = pbi->UVStride;
- for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) {
- if ( !pbi->display_fragments[i] ) {
- PixelIndex = pbi->recon_pixel_index_table[i];
- SrcPtr = &SrcReconPtr[ PixelIndex ];
- DestPtr = &DestReconPtr[ PixelIndex ];
-
- dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-
- }
- }
-}
-
-void ExpandToken( Q_LIST_ENTRY * ExpandedBlock,
- unsigned char * CoeffIndex, ogg_uint32_t Token,
- ogg_int32_t ExtraBits ){
- /* Is the token is a combination run and value token. */
- if ( Token >= DCT_RUN_CATEGORY1 ){
- /* Expand the token and additional bits to a zero run length and
- data value. */
- if ( Token < DCT_RUN_CATEGORY2 ) {
- /* Decoding method depends on token */
- if ( Token < DCT_RUN_CATEGORY1B ) {
- /* Step on by the zero run length */
- *CoeffIndex += (unsigned char)((Token - DCT_RUN_CATEGORY1) + 1);
-
- /* The extra bit determines the sign. */
- if ( ExtraBits & 0x01 )
- ExpandedBlock[*CoeffIndex] = -1;
- else
- ExpandedBlock[*CoeffIndex] = 1;
- } else if ( Token == DCT_RUN_CATEGORY1B ) {
- /* Bits 0-1 determines the zero run length */
- *CoeffIndex += (6 + (ExtraBits & 0x03));
-
- /* Bit 2 determines the sign */
- if ( ExtraBits & 0x04 )
- ExpandedBlock[*CoeffIndex] = -1;
- else
- ExpandedBlock[*CoeffIndex] = 1;
- }else{
- /* Bits 0-2 determines the zero run length */
- *CoeffIndex += (10 + (ExtraBits & 0x07));
-
- /* Bit 3 determines the sign */
- if ( ExtraBits & 0x08 )
- ExpandedBlock[*CoeffIndex] = -1;
- else
- ExpandedBlock[*CoeffIndex] = 1;
- }
- }else{
- /* If token == DCT_RUN_CATEGORY2 we have a single 0 followed by
- a value */
- if ( Token == DCT_RUN_CATEGORY2 ){
- /* Step on by the zero run length */
- *CoeffIndex += 1;
-
- /* Bit 1 determines sign, bit 0 the value */
- if ( ExtraBits & 0x02 )
- ExpandedBlock[*CoeffIndex] = -(2 + (ExtraBits & 0x01));
- else
- ExpandedBlock[*CoeffIndex] = 2 + (ExtraBits & 0x01);
- }else{
- /* else we have 2->3 zeros followed by a value */
- /* Bit 0 determines the zero run length */
- *CoeffIndex += 2 + (ExtraBits & 0x01);
-
- /* Bit 2 determines the sign, bit 1 the value */
- if ( ExtraBits & 0x04 )
- ExpandedBlock[*CoeffIndex] = -(2 + ((ExtraBits & 0x02) >> 1));
- else
- ExpandedBlock[*CoeffIndex] = 2 + ((ExtraBits & 0x02) >> 1);
- }
- }
-
- /* Step on over value */
- *CoeffIndex += 1;
-
- } else if ( Token == DCT_SHORT_ZRL_TOKEN ) {
- /* Token is a ZRL token so step on by the appropriate number of zeros */
- *CoeffIndex += ExtraBits + 1;
- } else if ( Token == DCT_ZRL_TOKEN ) {
- /* Token is a ZRL token so step on by the appropriate number of zeros */
- *CoeffIndex += ExtraBits + 1;
- } else if ( Token < LOW_VAL_TOKENS ) {
- /* Token is a small single value token. */
- switch ( Token ) {
- case ONE_TOKEN:
- ExpandedBlock[*CoeffIndex] = 1;
- break;
- case MINUS_ONE_TOKEN:
- ExpandedBlock[*CoeffIndex] = -1;
- break;
- case TWO_TOKEN:
- ExpandedBlock[*CoeffIndex] = 2;
- break;
- case MINUS_TWO_TOKEN:
- ExpandedBlock[*CoeffIndex] = -2;
- break;
- }
-
- /* Step on the coefficient index. */
- *CoeffIndex += 1;
- }else{
- /* Token is a larger single value token */
- /* Expand the token and additional bits to a data value. */
- if ( Token < DCT_VAL_CATEGORY3 ) {
- /* Offset from LOW_VAL_TOKENS determines value */
- Token = Token - LOW_VAL_TOKENS;
-
- /* Extra bit determines sign */
- if ( ExtraBits )
- ExpandedBlock[*CoeffIndex] =
- -((Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN));
- else
- ExpandedBlock[*CoeffIndex] =
- (Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN);
- } else if ( Token == DCT_VAL_CATEGORY3 ) {
- /* Bit 1 determines sign, Bit 0 the value */
- if ( ExtraBits & 0x02 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT3_MIN + (ExtraBits & 0x01));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT3_MIN + (ExtraBits & 0x01);
- } else if ( Token == DCT_VAL_CATEGORY4 ) {
- /* Bit 2 determines sign, Bit 0-1 the value */
- if ( ExtraBits & 0x04 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT4_MIN + (ExtraBits & 0x03));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT4_MIN + (ExtraBits & 0x03);
- } else if ( Token == DCT_VAL_CATEGORY5 ) {
- /* Bit 3 determines sign, Bit 0-2 the value */
- if ( ExtraBits & 0x08 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT5_MIN + (ExtraBits & 0x07));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT5_MIN + (ExtraBits & 0x07);
- } else if ( Token == DCT_VAL_CATEGORY6 ) {
- /* Bit 4 determines sign, Bit 0-3 the value */
- if ( ExtraBits & 0x10 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F);
- } else if ( Token == DCT_VAL_CATEGORY7 ) {
- /* Bit 5 determines sign, Bit 0-4 the value */
- if ( ExtraBits & 0x20 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F);
- } else if ( Token == DCT_VAL_CATEGORY8 ) {
- /* Bit 9 determines sign, Bit 0-8 the value */
- if ( ExtraBits & 0x200 )
- ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF));
- else
- ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF);
- }
-
- /* Step on the coefficient index. */
- *CoeffIndex += 1;
- }
-}
-
-void ClearDownQFragData(PB_INSTANCE *pbi){
- ogg_int32_t i;
- Q_LIST_ENTRY * QFragPtr;
-
- for ( i = 0; i < pbi->CodedBlockIndex; i++ ) {
- /* Get the linear index for the current fragment. */
- QFragPtr = pbi->QFragData[pbi->CodedBlockList[i]];
- memset(QFragPtr, 0, 64*sizeof(Q_LIST_ENTRY));
- }
-}
-
-static void loop_filter_h(unsigned char * PixelPtr,
- ogg_int32_t LineLength,
- ogg_int16_t *BoundingValuePtr){
- ogg_int32_t j;
- ogg_int32_t FiltVal;
- PixelPtr-=2;
-
- for ( j = 0; j < 8; j++ ){
- FiltVal =
- ( PixelPtr[0] ) -
- ( PixelPtr[1] * 3 ) +
- ( PixelPtr[2] * 3 ) -
- ( PixelPtr[3] );
-
- FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3));
-
- PixelPtr[1] = clamp255(PixelPtr[1] + FiltVal);
- PixelPtr[2] = clamp255(PixelPtr[2] - FiltVal);
-
- PixelPtr += LineLength;
- }
-}
-
-static void loop_filter_v(unsigned char * PixelPtr,
- ogg_int32_t LineLength,
- ogg_int16_t *BoundingValuePtr){
- ogg_int32_t j;
- ogg_int32_t FiltVal;
- PixelPtr -= 2*LineLength;
-
- for ( j = 0; j < 8; j++ ) {
- FiltVal = ( (ogg_int32_t)PixelPtr[0] ) -
- ( (ogg_int32_t)PixelPtr[LineLength] * 3 ) +
- ( (ogg_int32_t)PixelPtr[2 * LineLength] * 3 ) -
- ( (ogg_int32_t)PixelPtr[3 * LineLength] );
-
- FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3));
-
- PixelPtr[LineLength] = clamp255(PixelPtr[LineLength] + FiltVal);
- PixelPtr[2 * LineLength] = clamp255(PixelPtr[2*LineLength] - FiltVal);
-
- PixelPtr ++;
- }
-}
-
-static void LoopFilter__c(PB_INSTANCE *pbi, int FLimit){
-
- int j;
- ogg_int16_t BoundingValues[256];
- ogg_int16_t *bvp = BoundingValues+127;
- unsigned char *cp = pbi->display_fragments;
- ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
- if ( FLimit == 0 ) return;
- SetupBoundingValueArray_Generic(BoundingValues, FLimit);
-
- for ( j = 0; j < 3 ; j++){
- ogg_uint32_t *bp_begin = bp;
- ogg_uint32_t *bp_end;
- int stride;
- int h;
-
- switch(j) {
- case 0: /* y */
- bp_end = bp + pbi->YPlaneFragments;
- h = pbi->HFragments;
- stride = pbi->YStride;
- break;
- default: /* u,v, 4:20 specific */
- bp_end = bp + pbi->UVPlaneFragments;
- h = pbi->HFragments >> 1;
- stride = pbi->UVStride;
- break;
- }
-
- while(bpbp_left)
- loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,bvp);
- if(bp_left>bp_begin)
- loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,bvp);
- if(bp+1LastFrameRecon[bp[0]]+8,stride,bvp);
- if(bp+hLastFrameRecon[bp[h]],stride,bvp);
- }
- bp++;
- cp++;
- }
- }
- }
-}
-
-void ReconRefFrames (PB_INSTANCE *pbi){
- ogg_int32_t i;
- unsigned char *SwapReconBuffersTemp;
-
- /* predictor multiplier up-left, up, up-right,left, shift
- Entries are packed in the order L, UL, U, UR, with missing entries
- moved to the end (before the shift parameters). */
- static const ogg_int16_t pc[16][6]={
- {0,0,0,0,0,0},
- {1,0,0,0,0,0}, /* PL */
- {1,0,0,0,0,0}, /* PUL */
- {1,0,0,0,0,0}, /* PUL|PL */
- {1,0,0,0,0,0}, /* PU */
- {1,1,0,0,1,1}, /* PU|PL */
- {0,1,0,0,0,0}, /* PU|PUL */
- {29,-26,29,0,5,31}, /* PU|PUL|PL */
- {1,0,0,0,0,0}, /* PUR */
- {75,53,0,0,7,127}, /* PUR|PL */
- {1,1,0,0,1,1}, /* PUR|PUL */
- {75,0,53,0,7,127}, /* PUR|PUL|PL */
- {1,0,0,0,0,0}, /* PUR|PU */
- {75,0,53,0,7,127}, /* PUR|PU|PL */
- {3,10,3,0,4,15}, /* PUR|PU|PUL */
- {29,-26,29,0,5,31} /* PUR|PU|PUL|PL */
- };
-
- /* boundary case bit masks. */
- static const int bc_mask[8]={
- /* normal case no boundary condition */
- PUR|PU|PUL|PL,
- /* left column */
- PUR|PU,
- /* top row */
- PL,
- /* top row, left column */
- 0,
- /* right column */
- PU|PUL|PL,
- /* right and left column */
- PU,
- /* top row, right column */
- PL,
- /* top row, right and left column */
- 0
- };
-
- /* value left value up-left, value up, value up-right, missing
- values skipped. */
- int v[4];
-
- /* fragment number left, up-left, up, up-right */
- int fn[4];
-
- /* predictor count. */
- int pcount;
-
- short wpc;
- static const short Mode2Frame[] = {
- 1, /* CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame */
- 0, /* CODE_INTRA 1 => DCT Encoded Block */
- 1, /* CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame */
- 1, /* CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame */
- 1, /* CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks */
- 2, /* CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame */
- 2, /* CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame */
- 1 /* CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks */
- };
- short Last[3];
- short PredictedDC;
- int FragsAcross=pbi->HFragments;
- int FromFragment,ToFragment;
- int FragsDown = pbi->VFragments;
-
- int WhichFrame;
- int WhichCase;
- int j,k,m,n;
-
- void (*ExpandBlockA) ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber );
-
- if ( pbi->FrameType == KEY_FRAME )
- ExpandBlockA=ExpandKFBlock;
- else
- ExpandBlockA=ExpandBlock;
-
- /* for y,u,v */
- for ( j = 0; j < 3 ; j++) {
- /* pick which fragments based on Y, U, V */
- switch(j){
- case 0: /* y */
- FromFragment = 0;
- ToFragment = pbi->YPlaneFragments;
- FragsAcross = pbi->HFragments;
- FragsDown = pbi->VFragments;
- break;
- case 1: /* u */
- FromFragment = pbi->YPlaneFragments;
- ToFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments ;
- FragsAcross = pbi->HFragments >> 1;
- FragsDown = pbi->VFragments >> 1;
- break;
- /*case 2: v */
- default:
- FromFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments;
- ToFragment = pbi->YPlaneFragments + (2 * pbi->UVPlaneFragments) ;
- FragsAcross = pbi->HFragments >> 1;
- FragsDown = pbi->VFragments >> 1;
- break;
- }
-
- /* initialize our array of last used DC Components */
- for(k=0;k<3;k++)
- Last[k]=0;
-
- i=FromFragment;
-
- /* do prediction on all of Y, U or V */
- for ( m = 0 ; m < FragsDown ; m++) {
- for ( n = 0 ; n < FragsAcross ; n++, i++){
-
- /* only do 2 prediction if fragment coded and on non intra or
- if all fragments are intra */
- if( pbi->display_fragments[i] || (pbi->FrameType == KEY_FRAME) ){
- /* Type of Fragment */
- WhichFrame = Mode2Frame[pbi->FragCodingMethod[i]];
-
- /* Check Borderline Cases */
- WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2);
-
- fn[0]=i-1;
- fn[1]=i-FragsAcross-1;
- fn[2]=i-FragsAcross;
- fn[3]=i-FragsAcross+1;
-
- /* fragment valid for prediction use if coded and it comes
- from same frame as the one we are predicting */
- for(k=pcount=wpc=0; k<4; k++) {
- int pflag;
- pflag=1<display_fragments[fn[k]] &&
- (Mode2Frame[pbi->FragCodingMethod[fn[k]]] == WhichFrame)){
- v[pcount]=pbi->QFragData[fn[k]][0];
- wpc|=pflag;
- pcount++;
- }
- }
-
- if(wpc==0){
- /* fall back to the last coded fragment */
- pbi->QFragData[i][0] += Last[WhichFrame];
-
- }else{
-
- /* don't do divide if divisor is 1 or 0 */
- PredictedDC = pc[wpc][0]*v[0];
- for(k=1; k>= pc[wpc][4];
- }
-
- /* check for outranging on the two predictors that can outrange */
- if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){
- if( abs(PredictedDC - v[2]) > 128) {
- PredictedDC = v[2];
- } else if( abs(PredictedDC - v[0]) > 128) {
- PredictedDC = v[0];
- } else if( abs(PredictedDC - v[1]) > 128) {
- PredictedDC = v[1];
- }
- }
-
- pbi->QFragData[i][0] += PredictedDC;
-
- }
-
- /* Save the last fragment coded for whatever frame we are
- predicting from */
- Last[WhichFrame] = pbi->QFragData[i][0];
-
- /* Inverse DCT and reconstitute buffer in thisframe */
- ExpandBlockA( pbi, i );
- }
- }
- }
- }
-
- /* Copy the current reconstruction back to the last frame recon buffer. */
- if(pbi->CodedBlockIndex > (ogg_int32_t) (pbi->UnitFragments >> 1)){
- SwapReconBuffersTemp = pbi->ThisFrameRecon;
- pbi->ThisFrameRecon = pbi->LastFrameRecon;
- pbi->LastFrameRecon = SwapReconBuffersTemp;
- CopyNotRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon );
- }else{
- CopyRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon );
- }
-
- /* Apply a loop filter to edge pixels of updated blocks */
- dsp_LoopFilter(pbi->dsp, pbi, pbi->quant_info.loop_filter_limits[pbi->FrameQIndex]);
-
- /* We may need to update the UMV border */
- UpdateUMVBorder(pbi, pbi->LastFrameRecon);
-
- /* Reconstruct the golden frame if necessary.
- For VFW codec only on key frames */
- if ( pbi->FrameType == KEY_FRAME ){
- CopyRecon( pbi, pbi->GoldenFrame, pbi->LastFrameRecon );
- /* We may need to update the UMV border */
- UpdateUMVBorder(pbi, pbi->GoldenFrame);
- }
-}
-
-void dsp_dct_decode_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
- funcs->LoopFilter = LoopFilter__c;
-#if defined(USE_ASM)
- // Todo: Port the dct for MSC one day.
-#if !defined (_MSC_VER)
- if (cpu_flags & OC_CPU_X86_MMX) {
- dsp_mmx_dct_decode_init(funcs);
- }
-#endif
-#endif
-}
diff --git a/Engine/lib/libtheora/lib/enc/dct_encode.c b/Engine/lib/libtheora/lib/enc/dct_encode.c
deleted file mode 100644
index 3a3c47778..000000000
--- a/Engine/lib/libtheora/lib/enc/dct_encode.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dct_encode.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-#include "codec_internal.h"
-#include "dsp.h"
-#include "quant_lookup.h"
-
-
-static int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
-
-static unsigned char TokenizeDctValue (ogg_int16_t DataValue,
- ogg_uint32_t * TokenListPtr ){
- unsigned char tokens_added = 0;
- ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
-
- /* Values are tokenised as category value and a number of additional
- bits that define the position within the category. */
-
- if ( DataValue == 0 ) return 0;
-
- if ( AbsDataVal == 1 ){
- if ( DataValue == 1 )
- TokenListPtr[0] = ONE_TOKEN;
- else
- TokenListPtr[0] = MINUS_ONE_TOKEN;
- tokens_added = 1;
- } else if ( AbsDataVal == 2 ) {
- if ( DataValue == 2 )
- TokenListPtr[0] = TWO_TOKEN;
- else
- TokenListPtr[0] = MINUS_TWO_TOKEN;
- tokens_added = 1;
- } else if ( AbsDataVal <= MAX_SINGLE_TOKEN_VALUE ) {
- TokenListPtr[0] = LOW_VAL_TOKENS + (AbsDataVal - DCT_VAL_CAT2_MIN);
- if ( DataValue > 0 )
- TokenListPtr[1] = 0;
- else
- TokenListPtr[1] = 1;
- tokens_added = 2;
- } else if ( AbsDataVal <= 8 ) {
- /* Bit 1 determines sign, Bit 0 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY3;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT3_MIN);
- else
- TokenListPtr[1] = (0x02) + (AbsDataVal - DCT_VAL_CAT3_MIN);
- tokens_added = 2;
- } else if ( AbsDataVal <= 12 ) {
- /* Bit 2 determines sign, Bit 0-2 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY4;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT4_MIN);
- else
- TokenListPtr[1] = (0x04) + (AbsDataVal - DCT_VAL_CAT4_MIN);
- tokens_added = 2;
- } else if ( AbsDataVal <= 20 ) {
- /* Bit 3 determines sign, Bit 0-2 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY5;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT5_MIN);
- else
- TokenListPtr[1] = (0x08) + (AbsDataVal - DCT_VAL_CAT5_MIN);
- tokens_added = 2;
- } else if ( AbsDataVal <= 36 ) {
- /* Bit 4 determines sign, Bit 0-3 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY6;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT6_MIN);
- else
- TokenListPtr[1] = (0x010) + (AbsDataVal - DCT_VAL_CAT6_MIN);
- tokens_added = 2;
- } else if ( AbsDataVal <= 68 ) {
- /* Bit 5 determines sign, Bit 0-4 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY7;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT7_MIN);
- else
- TokenListPtr[1] = (0x20) + (AbsDataVal - DCT_VAL_CAT7_MIN);
- tokens_added = 2;
- } else if ( AbsDataVal <= 511 ) {
- /* Bit 9 determines sign, Bit 0-8 the value */
- TokenListPtr[0] = DCT_VAL_CATEGORY8;
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT8_MIN);
- else
- TokenListPtr[1] = (0x200) + (AbsDataVal - DCT_VAL_CAT8_MIN);
- tokens_added = 2;
- } else {
- TokenListPtr[0] = DCT_VAL_CATEGORY8;
- if ( DataValue > 0 )
- TokenListPtr[1] = (511 - DCT_VAL_CAT8_MIN);
- else
- TokenListPtr[1] = (0x200) + (511 - DCT_VAL_CAT8_MIN);
- tokens_added = 2;
- }
-
- /* Return the total number of tokens added */
- return tokens_added;
-}
-
-static unsigned char TokenizeDctRunValue (unsigned char RunLength,
- ogg_int16_t DataValue,
- ogg_uint32_t * TokenListPtr ){
- unsigned char tokens_added = 0;
- ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
-
- /* Values are tokenised as category value and a number of additional
- bits that define the category. */
- if ( DataValue == 0 ) return 0;
- if ( AbsDataVal == 1 ) {
- /* Zero runs of 1-5 */
- if ( RunLength <= 5 ) {
- TokenListPtr[0] = DCT_RUN_CATEGORY1 + (RunLength - 1);
- if ( DataValue > 0 )
- TokenListPtr[1] = 0;
- else
- TokenListPtr[1] = 1;
- } else if ( RunLength <= 9 ) {
- /* Zero runs of 6-9 */
- TokenListPtr[0] = DCT_RUN_CATEGORY1B;
- if ( DataValue > 0 )
- TokenListPtr[1] = (RunLength - 6);
- else
- TokenListPtr[1] = 0x04 + (RunLength - 6);
- } else {
- /* Zero runs of 10-17 */
- TokenListPtr[0] = DCT_RUN_CATEGORY1C;
- if ( DataValue > 0 )
- TokenListPtr[1] = (RunLength - 10);
- else
- TokenListPtr[1] = 0x08 + (RunLength - 10);
- }
- tokens_added = 2;
- } else if ( AbsDataVal <= 3 ) {
- if ( RunLength == 1 ) {
- TokenListPtr[0] = DCT_RUN_CATEGORY2;
-
- /* Extra bits token bit 1 indicates sign, bit 0 indicates value */
- if ( DataValue > 0 )
- TokenListPtr[1] = (AbsDataVal - 2);
- else
- TokenListPtr[1] = (0x02) + (AbsDataVal - 2);
- tokens_added = 2;
- }else{
- TokenListPtr[0] = DCT_RUN_CATEGORY2 + 1;
-
- /* Extra bits token. */
- /* bit 2 indicates sign, bit 1 indicates value, bit 0 indicates
- run length */
- if ( DataValue > 0 )
- TokenListPtr[1] = ((AbsDataVal - 2) << 1) + (RunLength - 2);
- else
- TokenListPtr[1] = (0x04) + ((AbsDataVal - 2) << 1) + (RunLength - 2);
- tokens_added = 2;
- }
- } else {
- tokens_added = 2; /* ERROR */
- /*IssueWarning( "Bad Input to TokenizeDctRunValue" );*/
- }
-
- /* Return the total number of tokens added */
- return tokens_added;
-}
-
-static unsigned char TokenizeDctBlock (ogg_int16_t * RawData,
- ogg_uint32_t * TokenListPtr ) {
- ogg_uint32_t i;
- unsigned char run_count;
- unsigned char token_count = 0; /* Number of tokens crated. */
- ogg_uint32_t AbsData;
-
-
- /* Tokenize the block */
- for( i = 0; i < BLOCK_SIZE; i++ ){
- run_count = 0;
-
- /* Look for a zero run. */
- /* NOTE the use of & instead of && which is faster (and
- equivalent) in this instance. */
- /* NO, NO IT ISN'T --Monty */
- while( (i < BLOCK_SIZE) && (!RawData[i]) ){
- run_count++;
- i++;
- }
-
- /* If we have reached the end of the block then code EOB */
- if ( i == BLOCK_SIZE ){
- TokenListPtr[token_count] = DCT_EOB_TOKEN;
- token_count++;
- }else{
- /* If we have a short zero run followed by a low data value code
- the two as a composite token. */
- if ( run_count ){
- AbsData = abs(RawData[i]);
-
- if ( ((AbsData == 1) && (run_count <= 17)) ||
- ((AbsData <= 3) && (run_count <= 3)) ) {
- /* Tokenise the run and subsequent value combination value */
- token_count += TokenizeDctRunValue( run_count,
- RawData[i],
- &TokenListPtr[token_count] );
- }else{
-
- /* Else if we have a long non-EOB run or a run followed by a
- value token > MAX_RUN_VAL then code the run and token
- seperately */
- if ( run_count <= 8 )
- TokenListPtr[token_count] = DCT_SHORT_ZRL_TOKEN;
- else
- TokenListPtr[token_count] = DCT_ZRL_TOKEN;
-
- token_count++;
- TokenListPtr[token_count] = run_count - 1;
- token_count++;
-
- /* Now tokenize the value */
- token_count += TokenizeDctValue( RawData[i],
- &TokenListPtr[token_count] );
- }
- }else{
- /* Else there was NO zero run. */
- /* Tokenise the value */
- token_count += TokenizeDctValue( RawData[i],
- &TokenListPtr[token_count] );
- }
- }
- }
-
- /* Return the total number of tokens (including additional bits
- tokens) used. */
- return token_count;
-}
-
-ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
- ogg_int32_t FragIndex){
- ogg_uint32_t token_count;
-
- if ( cpi->pb.FrameType == KEY_FRAME ){
- /* Key frame so code block in INTRA mode. */
- cpi->pb.CodingMode = CODE_INTRA;
- }else{
- /* Get Motion vector and mode for this block. */
- cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
- }
-
- /* Tokenise the dct data. */
- token_count = TokenizeDctBlock( cpi->pb.QFragData[FragIndex],
- cpi->pb.TokenList[FragIndex] );
-
- cpi->FragTokenCounts[FragIndex] = token_count;
- cpi->TotTokenCount += token_count;
-
- /* Return number of pixels coded (i.e. 8x8). */
- return BLOCK_SIZE;
-}
-
-static int AllZeroDctData( Q_LIST_ENTRY * QuantList ){
- ogg_uint32_t i;
-
- for ( i = 0; i < 64; i ++ )
- if ( QuantList[i] != 0 )
- return 0;
-
- return 1;
-}
-
-static void MotionBlockDifference (CP_INSTANCE * cpi, unsigned char * FiltPtr,
- ogg_int16_t *DctInputPtr, ogg_int32_t MvDevisor,
- unsigned char* old_ptr1, unsigned char* new_ptr1,
- ogg_uint32_t FragIndex,ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine) {
-
- ogg_int32_t MvShift;
- ogg_int32_t MvModMask;
- ogg_int32_t AbsRefOffset;
- ogg_int32_t AbsXOffset;
- ogg_int32_t AbsYOffset;
- ogg_int32_t MVOffset; /* Baseline motion vector offset */
- ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction in
- half pixel MC */
- unsigned char *ReconPtr1; /* DCT reconstructed image pointers */
- unsigned char *ReconPtr2; /* Pointer used in half pixel MC */
-
- switch(MvDevisor) {
- case 2:
- MvShift = 1;
- MvModMask = 1;
- break;
- case 4:
- MvShift = 2;
- MvModMask = 3;
- break;
- default:
- break;
- }
-
- cpi->MVector.x = cpi->pb.FragMVect[FragIndex].x;
- cpi->MVector.y = cpi->pb.FragMVect[FragIndex].y;
-
- /* Set up the baseline offset for the motion vector. */
- MVOffset = ((cpi->MVector.y / MvDevisor) * ReconPixelsPerLine) +
- (cpi->MVector.x / MvDevisor);
-
- /* Work out the offset of the second reference position for 1/2
- pixel interpolation. For the U and V planes the MV specifies 1/4
- pixel accuracy. This is adjusted to 1/2 pixel as follows ( 0->0,
- 1/4->1/2, 1/2->1/2, 3/4->1/2 ). */
- ReconPtr2Offset = 0;
- AbsXOffset = cpi->MVector.x % MvDevisor;
- AbsYOffset = cpi->MVector.y % MvDevisor;
-
- if ( AbsXOffset ) {
- if ( cpi->MVector.x > 0 )
- ReconPtr2Offset += 1;
- else
- ReconPtr2Offset -= 1;
- }
-
- if ( AbsYOffset ) {
- if ( cpi->MVector.y > 0 )
- ReconPtr2Offset += ReconPixelsPerLine;
- else
- ReconPtr2Offset -= ReconPixelsPerLine;
- }
-
- if ( cpi->pb.CodingMode==CODE_GOLDEN_MV ) {
- ReconPtr1 = &cpi->
- pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
- } else {
- ReconPtr1 = &cpi->
- pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
- }
-
- ReconPtr1 += MVOffset;
- ReconPtr2 = ReconPtr1 + ReconPtr2Offset;
-
- AbsRefOffset = abs((int)(ReconPtr1 - ReconPtr2));
-
- /* Is the MV offset exactly pixel alligned */
- if ( AbsRefOffset == 0 ){
- dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr,
- PixelsPerLine, ReconPixelsPerLine);
- dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
- } else {
- /* Fractional pixel MVs. */
- /* Note that we only use two pixel values even for the diagonal */
- dsp_sub8x8avg2(cpi->dsp, FiltPtr, ReconPtr1,ReconPtr2,DctInputPtr,
- PixelsPerLine, ReconPixelsPerLine);
- dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
- }
-}
-
-void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
- ogg_uint32_t PixelsPerLine) {
- unsigned char *new_ptr1; /* Pointers into current frame */
- unsigned char *old_ptr1; /* Pointers into old frame */
- unsigned char *FiltPtr; /* Pointers to srf filtered pixels */
- ogg_int16_t *DctInputPtr; /* Pointer into buffer containing input to DCT */
- int LeftEdge; /* Flag if block at left edge of component */
- ogg_uint32_t ReconPixelsPerLine; /* Line length for recon buffers. */
-
- unsigned char *ReconPtr1; /* DCT reconstructed image pointers */
- ogg_int32_t MvDevisor; /* Defines MV resolution (2 = 1/2
- pixel for Y or 4 = 1/4 for UV) */
-
- new_ptr1 = &cpi->yuv1ptr[cpi->pb.pixel_index_table[FragIndex]];
- old_ptr1 = &cpi->yuv0ptr[cpi->pb.pixel_index_table[FragIndex]];
- DctInputPtr = cpi->DCTDataBuffer;
-
- /* Set plane specific values */
- if (FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments){
- ReconPixelsPerLine = cpi->pb.YStride;
- MvDevisor = 2; /* 1/2 pixel accuracy in Y */
- }else{
- ReconPixelsPerLine = cpi->pb.UVStride;
- MvDevisor = 4; /* UV planes at 1/2 resolution of Y */
- }
-
- /* adjusted / filtered pointers */
- FiltPtr = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- /* Key frame so code block in INTRA mode. */
- cpi->pb.CodingMode = CODE_INTRA;
- }else{
- /* Get Motion vector and mode for this block. */
- cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
- }
-
- /* Selection of Quantiser matrix and set other plane related values. */
- if ( FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ){
- LeftEdge = !(FragIndex%cpi->pb.HFragments);
-
- /* Select the appropriate Y quantiser matrix */
- if ( cpi->pb.CodingMode == CODE_INTRA )
- select_quantiser(&cpi->pb, BLOCK_Y);
- else
- select_quantiser(&cpi->pb, BLOCK_INTER_Y);
- } else {
- LeftEdge = !((FragIndex-cpi->pb.YPlaneFragments)%(cpi->pb.HFragments>>1));
-
- if(FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments + (ogg_int32_t)cpi->pb.UVPlaneFragments) {
- /* U plane */
- if ( cpi->pb.CodingMode == CODE_INTRA )
- select_quantiser(&cpi->pb, BLOCK_U);
- else
- select_quantiser(&cpi->pb, BLOCK_INTER_U);
- } else {
- /* V plane */
- if ( cpi->pb.CodingMode == CODE_INTRA )
- select_quantiser(&cpi->pb, BLOCK_V);
- else
- select_quantiser(&cpi->pb, BLOCK_INTER_V);
- }
- }
-
- if ( ModeUsesMC[cpi->pb.CodingMode] ){
-
- MotionBlockDifference(cpi, FiltPtr, DctInputPtr, MvDevisor,
- old_ptr1, new_ptr1, FragIndex, PixelsPerLine,
- ReconPixelsPerLine);
-
- } else if ( (cpi->pb.CodingMode==CODE_INTER_NO_MV ) ||
- ( cpi->pb.CodingMode==CODE_USING_GOLDEN ) ) {
- if ( cpi->pb.CodingMode==CODE_INTER_NO_MV ) {
- ReconPtr1 = &cpi->
- pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
- } else {
- ReconPtr1 = &cpi->
- pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
- }
-
- dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr,
- PixelsPerLine, ReconPixelsPerLine);
- dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
- } else if ( cpi->pb.CodingMode==CODE_INTRA ) {
- dsp_sub8x8_128(cpi->dsp, FiltPtr, DctInputPtr, PixelsPerLine);
- dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
- }
-
- /* Proceed to encode the data into the encode buffer if the encoder
- is enabled. */
- /* Perform a 2D DCT transform on the data. */
- dsp_fdct_short(cpi->dsp, cpi->DCTDataBuffer, cpi->DCT_codes );
-
- /* Quantize that transform data. */
- quantize ( &cpi->pb, cpi->DCT_codes, cpi->pb.QFragData[FragIndex] );
-
- if ( (cpi->pb.CodingMode == CODE_INTER_NO_MV) &&
- ( AllZeroDctData(cpi->pb.QFragData[FragIndex]) ) ) {
- cpi->pb.display_fragments[FragIndex] = 0;
- }
-
-}
diff --git a/Engine/lib/libtheora/lib/enc/dsp.c b/Engine/lib/libtheora/lib/enc/dsp.c
deleted file mode 100644
index 9fe402d4e..000000000
--- a/Engine/lib/libtheora/lib/enc/dsp.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp.c 15427 2008-10-21 02:36:19Z xiphmont $
-
- ********************************************************************/
-
-#include
-#include "codec_internal.h"
-#include "../cpu.c"
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-static void sub8x8__c (unsigned char *FiltPtr, unsigned char *ReconPtr,
- ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine) {
- int i;
-
- /* For each block row */
- for (i=8; i; i--) {
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- ReconPtr += ReconPixelsPerLine;
- DctInputPtr += 8;
- }
-}
-
-static void sub8x8_128__c (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine) {
- int i;
- /* For each block row */
- for (i=8; i; i--) {
- /* INTRA mode so code raw image data */
- /* We convert the data to 8 bit signed (by subtracting 128) as
- this reduces the internal precision requirments in the DCT
- transform. */
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- DctInputPtr += 8;
- }
-}
-
-static void sub8x8avg2__c (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
- int i;
-
- /* For each block row */
- for (i=8; i; i--) {
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- ReconPtr1 += ReconPixelsPerLine;
- ReconPtr2 += ReconPixelsPerLine;
- DctInputPtr += 8;
- }
-}
-
-static ogg_uint32_t row_sad8__c (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t SadValue;
- ogg_uint32_t SadValue1;
-
- SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
- DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
- DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
- DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
-
- SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
- DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
- DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
- DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
-
- SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
-
- return SadValue;
-}
-
-static ogg_uint32_t col_sad8x8__c (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t MaxSad = 0;
- ogg_uint32_t i;
-
- for ( i = 0; i < 4; i++ ){
- SadValue[0] += abs(Src1[0] - Src2[0]);
- SadValue[1] += abs(Src1[1] - Src2[1]);
- SadValue[2] += abs(Src1[2] - Src2[2]);
- SadValue[3] += abs(Src1[3] - Src2[3]);
- SadValue[4] += abs(Src1[4] - Src2[4]);
- SadValue[5] += abs(Src1[5] - Src2[5]);
- SadValue[6] += abs(Src1[6] - Src2[6]);
- SadValue[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 4; i++ ){
- SadValue2[0] += abs(Src1[0] - Src2[0]);
- SadValue2[1] += abs(Src1[1] - Src2[1]);
- SadValue2[2] += abs(Src1[2] - Src2[2]);
- SadValue2[3] += abs(Src1[3] - Src2[3]);
- SadValue2[4] += abs(Src1[4] - Src2[4]);
- SadValue2[5] += abs(Src1[5] - Src2[5]);
- SadValue2[6] += abs(Src1[6] - Src2[6]);
- SadValue2[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 8; i++ ){
- if ( SadValue[i] > MaxSad )
- MaxSad = SadValue[i];
- if ( SadValue2[i] > MaxSad )
- MaxSad = SadValue2[i];
- }
-
- return MaxSad;
-}
-
-static ogg_uint32_t sad8x8__c (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2)
-{
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
- sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
- sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
- sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
- sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
- sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
- sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
- sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
- /* Step to next row of block. */
- ptr1 += stride1;
- ptr2 += stride2;
- }
-
- return sad;
-}
-
-static ogg_uint32_t sad8x8_thres__c (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres)
-{
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
- sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
- sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
- sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
- sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
- sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
- sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
- sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
- if (sad > thres )
- break;
-
- /* Step to next row of block. */
- ptr1 += stride1;
- ptr2 += stride2;
- }
-
- return sad;
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres)
-{
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
- sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
- sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
- sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
- sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
- sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
- sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
- sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-
- if ( sad > thres )
- break;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr1 += RefStride;
- RefDataPtr2 += RefStride;
- }
-
- return sad;
-}
-
-static ogg_uint32_t intra8x8_err__c (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
-
- for (i=8; i; i--) {
- /* Examine alternate pixel locations. */
- XSum += DataPtr[0];
- XXSum += DataPtr[0]*DataPtr[0];
- XSum += DataPtr[1];
- XXSum += DataPtr[1]*DataPtr[1];
- XSum += DataPtr[2];
- XXSum += DataPtr[2]*DataPtr[2];
- XSum += DataPtr[3];
- XXSum += DataPtr[3]*DataPtr[3];
- XSum += DataPtr[4];
- XXSum += DataPtr[4]*DataPtr[4];
- XSum += DataPtr[5];
- XXSum += DataPtr[5]*DataPtr[5];
- XSum += DataPtr[6];
- XXSum += DataPtr[6]*DataPtr[6];
- XSum += DataPtr[7];
- XXSum += DataPtr[7]*DataPtr[7];
-
- /* Step to next row of block. */
- DataPtr += Stride;
- }
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr += RefStride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static ogg_uint32_t inter8x8_err_xy2__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr1 += RefStride;
- RefDataPtr2 += RefStride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void nop (void) { /* NOP */ }
-
-void dsp_init(DspFunctions *funcs)
-{
- funcs->save_fpu = nop;
- funcs->restore_fpu = nop;
- funcs->sub8x8 = sub8x8__c;
- funcs->sub8x8_128 = sub8x8_128__c;
- funcs->sub8x8avg2 = sub8x8avg2__c;
- funcs->row_sad8 = row_sad8__c;
- funcs->col_sad8x8 = col_sad8x8__c;
- funcs->sad8x8 = sad8x8__c;
- funcs->sad8x8_thres = sad8x8_thres__c;
- funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__c;
- funcs->intra8x8_err = intra8x8_err__c;
- funcs->inter8x8_err = inter8x8_err__c;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__c;
-}
-
-void dsp_static_init(DspFunctions *funcs)
-{
- ogg_uint32_t cpuflags;
-
- cpuflags = oc_cpu_flags_get ();
- dsp_init (funcs);
-
- dsp_recon_init (funcs, cpuflags);
- dsp_dct_init (funcs, cpuflags);
-#if defined(USE_ASM)
- if (cpuflags & OC_CPU_X86_MMX) {
- dsp_mmx_init(funcs);
- }
-# ifndef WIN32
- /* This is implemented for win32 yet */
- if (cpuflags & OC_CPU_X86_MMXEXT) {
- dsp_mmxext_init(funcs);
- }
-# endif
-#endif
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/dsp.h b/Engine/lib/libtheora/lib/enc/dsp.h
deleted file mode 100644
index 7f96f7f84..000000000
--- a/Engine/lib/libtheora/lib/enc/dsp.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp.h 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#ifndef DSP_H
-#define DSP_H
-
-#include "theora/theora.h"
-#include "../cpu.h"
-
-typedef struct
-{
- void (*save_fpu) (void);
- void (*restore_fpu) (void);
-
- void (*sub8x8) (unsigned char *FiltPtr, unsigned char *ReconPtr,
- ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine);
-
- void (*sub8x8_128) (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine);
-
- void (*sub8x8avg2) (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine);
-
- void (*copy8x8) (unsigned char *src, unsigned char *dest,
- ogg_uint32_t stride);
-
- void (*recon_intra8x8) (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep);
-
- void (*recon_inter8x8) (unsigned char *ReconPtr, unsigned char *RefPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep);
-
- void (*recon_inter8x8_half) (unsigned char *ReconPtr, unsigned char *RefPtr1,
- unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep);
-
- void (*fdct_short) (ogg_int16_t *InputData, ogg_int16_t *OutputData);
-
- ogg_uint32_t (*row_sad8) (unsigned char *Src1, unsigned char *Src2);
-
- ogg_uint32_t (*col_sad8x8) (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride);
-
- ogg_uint32_t (*sad8x8) (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2);
-
- ogg_uint32_t (*sad8x8_thres) (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres);
-
- ogg_uint32_t (*sad8x8_xy2_thres)(unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres);
-
- ogg_uint32_t (*intra8x8_err) (unsigned char *DataPtr, ogg_uint32_t Stride);
-
- ogg_uint32_t (*inter8x8_err) (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr, ogg_uint32_t RefStride);
-
- ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
-
- void (*LoopFilter) (PB_INSTANCE *pbi, int FLimit);
-
- void (*FilterVert) (unsigned char * PixelPtr,
- ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
-
- void (*IDctSlow) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-
- void (*IDct3) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-
- void (*IDct10) (ogg_int16_t *InputData,
- ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-} DspFunctions;
-
-extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_dct_decode_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_idct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-
-void dsp_init(DspFunctions *funcs);
-void dsp_static_init(DspFunctions *funcs);
-#if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__) || defined(WIN32))
-extern void dsp_mmx_init(DspFunctions *funcs);
-extern void dsp_mmxext_init(DspFunctions *funcs);
-extern void dsp_mmx_fdct_init(DspFunctions *funcs);
-extern void dsp_mmx_recon_init(DspFunctions *funcs);
-extern void dsp_mmx_dct_decode_init(DspFunctions *funcs);
-extern void dsp_mmx_idct_init(DspFunctions *funcs);
-#endif
-
-#define dsp_save_fpu(funcs) (funcs.save_fpu ())
-
-#define dsp_restore_fpu(funcs) (funcs.restore_fpu ())
-
-#define dsp_sub8x8(funcs,a1,a2,a3,a4,a5) (funcs.sub8x8 (a1,a2,a3,a4,a5))
-
-#define dsp_sub8x8_128(funcs,a1,a2,a3) (funcs.sub8x8_128 (a1,a2,a3))
-
-#define dsp_sub8x8avg2(funcs,a1,a2,a3,a4,a5,a6) (funcs.sub8x8avg2 (a1,a2,a3,a4,a5,a6))
-
-#define dsp_copy8x8(funcs,ptr1,ptr2,str1) (funcs.copy8x8 (ptr1,ptr2,str1))
-
-#define dsp_recon_intra8x8(funcs,ptr1,ptr2,str1) (funcs.recon_intra8x8 (ptr1,ptr2,str1))
-
-#define dsp_recon_inter8x8(funcs,ptr1,ptr2,ptr3,str1) \
- (funcs.recon_inter8x8 (ptr1,ptr2,ptr3,str1))
-
-#define dsp_recon_inter8x8_half(funcs,ptr1,ptr2,ptr3,ptr4,str1) \
- (funcs.recon_inter8x8_half (ptr1,ptr2,ptr3,ptr4,str1))
-
-#define dsp_fdct_short(funcs,in,out) (funcs.fdct_short (in,out))
-
-#define dsp_row_sad8(funcs,ptr1,ptr2) (funcs.row_sad8 (ptr1,ptr2))
-
-#define dsp_col_sad8x8(funcs,ptr1,ptr2,str1) (funcs.col_sad8x8 (ptr1,ptr2,str1))
-
-#define dsp_sad8x8(funcs,ptr1,str1,ptr2,str2) (funcs.sad8x8 (ptr1,str1,ptr2,str2))
-
-#define dsp_sad8x8_thres(funcs,ptr1,str1,ptr2,str2,t) (funcs.sad8x8_thres (ptr1,str1,ptr2,str2,t))
-
-#define dsp_sad8x8_xy2_thres(funcs,ptr1,str1,ptr2,ptr3,str2,t) \
- (funcs.sad8x8_xy2_thres (ptr1,str1,ptr2,ptr3,str2,t))
-
-#define dsp_intra8x8_err(funcs,ptr1,str1) (funcs.intra8x8_err (ptr1,str1))
-
-#define dsp_inter8x8_err(funcs,ptr1,str1,ptr2,str2) \
- (funcs.inter8x8_err (ptr1,str1,ptr2,str2))
-
-#define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
- (funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
-
-#define dsp_LoopFilter(funcs, ptr1, i) \
- (funcs.LoopFilter(ptr1, i))
-
-#define dsp_IDctSlow(funcs, ptr1, ptr2, ptr3) \
- (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#define dsp_IDct3(funcs, ptr1, ptr2, ptr3) \
- (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#define dsp_IDct10(funcs, ptr1, ptr2, ptr3) \
- (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#endif /* DSP_H */
diff --git a/Engine/lib/libtheora/lib/enc/encode.c b/Engine/lib/libtheora/lib/enc/encode.c
deleted file mode 100644
index 5dc89f2af..000000000
--- a/Engine/lib/libtheora/lib/enc/encode.c
+++ /dev/null
@@ -1,1479 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: encode.c 15383 2008-10-10 14:33:46Z xiphmont $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-#include "encoder_lookup.h"
-#include "block_inline.h"
-
-#define PUR 8
-#define PU 4
-#define PUL 2
-#define PL 1
-#define HIGHBITDUPPED(X) (((ogg_int16_t) X) >> 15)
-
-static ogg_uint32_t QuadCodeComponent ( CP_INSTANCE *cpi,
- ogg_uint32_t FirstSB,
- ogg_uint32_t SBRows,
- ogg_uint32_t SBCols,
- ogg_uint32_t PixelsPerLine){
-
- ogg_int32_t FragIndex; /* Fragment number */
- ogg_uint32_t MB, B; /* Macro-Block, Block indices */
- ogg_uint32_t SBrow; /* Super-Block row number */
- ogg_uint32_t SBcol; /* Super-Block row number */
- ogg_uint32_t SB=FirstSB; /* Super-Block index, initialised to first
- of this component */
- ogg_uint32_t coded_pixels=0; /* Number of pixels coded */
- int MBCodedFlag;
-
- /* actually transform and quantize the image now that we've decided
- on the modes Parse in quad-tree ordering */
-
- for ( SBrow=0; SBrowpb.BlockMap,SB,MB) >= 0 ) {
-
- MBCodedFlag = 0;
-
- /* Now actually code the blocks */
- for ( B=0; B<4; B++ ) {
- FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
- /* Does Block lie in frame: */
- if ( FragIndex >= 0 ) {
-
- /* In Frame: Is it coded: */
- if ( cpi->pb.display_fragments[FragIndex] ) {
-
- /* transform and quantize block */
- TransformQuantizeBlock( cpi, FragIndex, PixelsPerLine );
-
- /* Has the block got struck off (no MV and no data
- generated after DCT) If not then mark it and the
- assosciated MB as coded. */
- if ( cpi->pb.display_fragments[FragIndex] ) {
- /* Create linear list of coded block indices */
- cpi->pb.CodedBlockList[cpi->pb.CodedBlockIndex] = FragIndex;
- cpi->pb.CodedBlockIndex++;
-
- /* MB is still coded */
- MBCodedFlag = 1;
- cpi->MBCodingMode = cpi->pb.FragCodingMethod[FragIndex];
-
- }
- }
- }
- }
- /* If the MB is marked as coded and we are in the Y plane then */
- /* the mode list needs to be updated. */
- if ( MBCodedFlag && (FirstSB == 0) ){
- /* Make a note of the selected mode in the mode list */
- cpi->ModeList[cpi->ModeListCount] = cpi->MBCodingMode;
- cpi->ModeListCount++;
- }
- }
- }
-
- SB++;
-
- }
- }
-
- /* Return number of pixels coded */
- return coded_pixels;
-}
-
-static void EncodeDcTokenList (CP_INSTANCE *cpi) {
- ogg_int32_t i,j;
- ogg_uint32_t Token;
- ogg_uint32_t ExtraBitsToken;
- ogg_uint32_t HuffIndex;
-
- ogg_uint32_t BestDcBits;
- ogg_uint32_t DcHuffChoice[2];
- ogg_uint32_t EntropyTableBits[2][DC_HUFF_CHOICES];
-
- oggpack_buffer *opb=cpi->oggbuffer;
-
- /* Clear table data structure */
- memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*DC_HUFF_CHOICES*2 );
-
- /* Analyse token list to see which is the best entropy table to use */
- for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
- /* Count number of bits for each table option */
- Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
- for ( j = 0; j < DC_HUFF_CHOICES; j++ ){
- EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] +=
- cpi->pb.HuffCodeLengthArray_VP3x[DC_HUFF_OFFSET + j][Token];
- }
- }
-
- /* Work out which table option is best for Y */
- BestDcBits = EntropyTableBits[0][0];
- DcHuffChoice[0] = 0;
- for ( j = 1; j < DC_HUFF_CHOICES; j++ ) {
- if ( EntropyTableBits[0][j] < BestDcBits ) {
- BestDcBits = EntropyTableBits[0][j];
- DcHuffChoice[0] = j;
- }
- }
-
- /* Add the DC huffman table choice to the bitstream */
- oggpackB_write( opb, DcHuffChoice[0], DC_HUFF_CHOICE_BITS );
-
- /* Work out which table option is best for UV */
- BestDcBits = EntropyTableBits[1][0];
- DcHuffChoice[1] = 0;
- for ( j = 1; j < DC_HUFF_CHOICES; j++ ) {
- if ( EntropyTableBits[1][j] < BestDcBits ) {
- BestDcBits = EntropyTableBits[1][j];
- DcHuffChoice[1] = j;
- }
- }
-
- /* Add the DC huffman table choice to the bitstream */
- oggpackB_write( opb, DcHuffChoice[1], DC_HUFF_CHOICE_BITS );
-
- /* Encode the token list */
- for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
-
- /* Get the token and extra bits */
- Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
- ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i];
-
- /* Select the huffman table */
- if ( cpi->OptimisedTokenListPl[i] == 0)
- HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[0];
- else
- HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[1];
-
- /* Add the bits to the encode holding buffer. */
- cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token];
- oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token],
- (ogg_uint32_t)cpi->
- pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] );
-
- /* If the token is followed by an extra bits token then code it */
- if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) {
- /* Add the bits to the encode holding buffer. */
- cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token];
- oggpackB_write( opb, ExtraBitsToken,
- (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] );
- }
-
- }
-
- /* Reset the count of second order optimised tokens */
- cpi->OptimisedTokenCount = 0;
-}
-
-static void EncodeAcTokenList (CP_INSTANCE *cpi) {
- ogg_int32_t i,j;
- ogg_uint32_t Token;
- ogg_uint32_t ExtraBitsToken;
- ogg_uint32_t HuffIndex;
-
- ogg_uint32_t BestAcBits;
- ogg_uint32_t AcHuffChoice[2];
- ogg_uint32_t EntropyTableBits[2][AC_HUFF_CHOICES];
-
- oggpack_buffer *opb=cpi->oggbuffer;
-
- memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*AC_HUFF_CHOICES*2 );
-
- /* Analyse token list to see which is the best entropy table to use */
- for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
- /* Count number of bits for each table option */
- Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
- HuffIndex = cpi->OptimisedTokenListHi[i];
- for ( j = 0; j < AC_HUFF_CHOICES; j++ ) {
- EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] +=
- cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex + j][Token];
- }
- }
-
- /* Select the best set of AC tables for Y */
- BestAcBits = EntropyTableBits[0][0];
- AcHuffChoice[0] = 0;
- for ( j = 1; j < AC_HUFF_CHOICES; j++ ) {
- if ( EntropyTableBits[0][j] < BestAcBits ) {
- BestAcBits = EntropyTableBits[0][j];
- AcHuffChoice[0] = j;
- }
- }
-
- /* Add the AC-Y huffman table choice to the bitstream */
- oggpackB_write( opb, AcHuffChoice[0], AC_HUFF_CHOICE_BITS );
-
- /* Select the best set of AC tables for UV */
- BestAcBits = EntropyTableBits[1][0];
- AcHuffChoice[1] = 0;
- for ( j = 1; j < AC_HUFF_CHOICES; j++ ) {
- if ( EntropyTableBits[1][j] < BestAcBits ) {
- BestAcBits = EntropyTableBits[1][j];
- AcHuffChoice[1] = j;
- }
- }
-
- /* Add the AC-UV huffman table choice to the bitstream */
- oggpackB_write( opb, AcHuffChoice[1], AC_HUFF_CHOICE_BITS );
-
- /* Encode the token list */
- for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
- /* Get the token and extra bits */
- Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
- ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i];
-
- /* Select the huffman table */
- HuffIndex = (ogg_uint32_t)cpi->OptimisedTokenListHi[i] +
- AcHuffChoice[cpi->OptimisedTokenListPl[i]];
-
- /* Add the bits to the encode holding buffer. */
- cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token];
- oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token],
- (ogg_uint32_t)cpi->
- pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] );
-
- /* If the token is followed by an extra bits token then code it */
- if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) {
- /* Add the bits to the encode holding buffer. */
- cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token];
- oggpackB_write( opb, ExtraBitsToken,
- (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] );
- }
- }
-
- /* Reset the count of second order optimised tokens */
- cpi->OptimisedTokenCount = 0;
-}
-
-static void PackModes (CP_INSTANCE *cpi) {
- ogg_uint32_t i,j;
- unsigned char ModeIndex;
- const unsigned char *SchemeList;
-
- unsigned char BestModeSchemes[MAX_MODES];
- ogg_int32_t ModeCount[MAX_MODES];
- ogg_int32_t TmpFreq = -1;
- ogg_int32_t TmpIndex = -1;
-
- ogg_uint32_t BestScheme;
- ogg_uint32_t BestSchemeScore;
- ogg_uint32_t SchemeScore;
-
- oggpack_buffer *opb=cpi->oggbuffer;
-
- /* Build a frequency map for the modes in this frame */
- memset( ModeCount, 0, MAX_MODES*sizeof(ogg_int32_t) );
- for ( i = 0; i < cpi->ModeListCount; i++ )
- ModeCount[cpi->ModeList[i]] ++;
-
- /* Order the modes from most to least frequent. Store result as
- scheme 0 */
- for ( j = 0; j < MAX_MODES; j++ ) {
- TmpFreq = -1; /* need to re-initialize for each loop */
- /* Find the most frequent */
- for ( i = 0; i < MAX_MODES; i++ ) {
- /* Is this the best scheme so far ??? */
- if ( ModeCount[i] > TmpFreq ) {
- TmpFreq = ModeCount[i];
- TmpIndex = i;
- }
- }
- /* I don't know if the above loop ever fails to match, but it's
- better safe than sorry. Plus this takes care of gcc warning */
- if ( TmpIndex != -1 ) {
- ModeCount[TmpIndex] = -1;
- BestModeSchemes[TmpIndex] = (unsigned char)j;
- }
- }
-
- /* Default/ fallback scheme uses MODE_BITS bits per mode entry */
- BestScheme = (MODE_METHODS - 1);
- BestSchemeScore = cpi->ModeListCount * 3;
- /* Get a bit score for the available schemes. */
- for ( j = 0; j < (MODE_METHODS - 1); j++ ) {
-
- /* Reset the scheme score */
- if ( j == 0 ){
- /* Scheme 0 additional cost of sending frequency order */
- SchemeScore = 24;
- SchemeList = BestModeSchemes;
- } else {
- SchemeScore = 0;
- SchemeList = ModeSchemes[j-1];
- }
-
- /* Find the total bits to code using each avaialable scheme */
- for ( i = 0; i < cpi->ModeListCount; i++ )
- SchemeScore += ModeBitLengths[SchemeList[cpi->ModeList[i]]];
-
- /* Is this the best scheme so far ??? */
- if ( SchemeScore < BestSchemeScore ) {
- BestSchemeScore = SchemeScore;
- BestScheme = j;
- }
- }
-
- /* Encode the best scheme. */
- oggpackB_write( opb, BestScheme, (ogg_uint32_t)MODE_METHOD_BITS );
-
- /* If the chosen schems is scheme 0 send details of the mode
- frequency order */
- if ( BestScheme == 0 ) {
- for ( j = 0; j < MAX_MODES; j++ ){
- /* Note that the last two entries are implicit */
- oggpackB_write( opb, BestModeSchemes[j], (ogg_uint32_t)MODE_BITS );
- }
- SchemeList = BestModeSchemes;
- }
- else {
- SchemeList = ModeSchemes[BestScheme-1];
- }
-
- /* Are we using one of the alphabet based schemes or the fallback scheme */
- if ( BestScheme < (MODE_METHODS - 1)) {
- /* Pack and encode the Mode list */
- for ( i = 0; i < cpi->ModeListCount; i++) {
- /* Add the appropriate mode entropy token. */
- ModeIndex = SchemeList[cpi->ModeList[i]];
- oggpackB_write( opb, ModeBitPatterns[ModeIndex],
- (ogg_uint32_t)ModeBitLengths[ModeIndex] );
- }
- }else{
- /* Fall back to MODE_BITS per entry */
- for ( i = 0; i < cpi->ModeListCount; i++)
- /* Add the appropriate mode entropy token. */
- oggpackB_write( opb, cpi->ModeList[i], MODE_BITS );
- }
-
-}
-
-static void PackMotionVectors (CP_INSTANCE *cpi) {
- ogg_int32_t i;
- ogg_uint32_t MethodBits[2] = {0,0};
- const ogg_uint32_t * MvBitsPtr;
- const ogg_uint32_t * MvPatternPtr;
-
- oggpack_buffer *opb=cpi->oggbuffer;
-
- /* Choose the coding method */
- MvBitsPtr = &MvBits[MAX_MV_EXTENT];
- for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) {
- MethodBits[0] += MvBitsPtr[cpi->MVList[i].x];
- MethodBits[0] += MvBitsPtr[cpi->MVList[i].y];
- MethodBits[1] += 12; /* Simple six bits per mv component fallback
- mechanism */
- }
-
- /* Select entropy table */
- if ( MethodBits[0] < MethodBits[1] ) {
- oggpackB_write( opb, 0, 1 );
- MvBitsPtr = &MvBits[MAX_MV_EXTENT];
- MvPatternPtr = &MvPattern[MAX_MV_EXTENT];
- }else{
- oggpackB_write( opb, 1, 1 );
- MvBitsPtr = &MvBits2[MAX_MV_EXTENT];
- MvPatternPtr = &MvPattern2[MAX_MV_EXTENT];
- }
-
- /* Pack and encode the motion vectors */
- for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) {
- oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].x],
- (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].x] );
- oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].y],
- (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].y] );
- }
-
-}
-
-static void PackEOBRun( CP_INSTANCE *cpi) {
- if(cpi->RunLength == 0)
- return;
-
- /* Note the appropriate EOB or EOB run token and any extra bits in
- the optimised token list. Use the huffman index assosciated with
- the first token in the run */
-
- /* Mark out which plane the block belonged to */
- cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] =
- (unsigned char)cpi->RunPlaneIndex;
-
- /* Note the huffman index to be used */
- cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] =
- (unsigned char)cpi->RunHuffIndex;
-
- if ( cpi->RunLength <= 3 ) {
- if ( cpi->RunLength == 1 ) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TOKEN;
- } else if ( cpi->RunLength == 2 ) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_PAIR_TOKEN;
- } else {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TRIPLE_TOKEN;
- }
-
- cpi->RunLength = 0;
-
- } else {
-
- /* Choose a token appropriate to the run length. */
- if ( cpi->RunLength < 8 ) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
- DCT_REPEAT_RUN_TOKEN;
- cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
- cpi->RunLength - 4;
- cpi->RunLength = 0;
- } else if ( cpi->RunLength < 16 ) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
- DCT_REPEAT_RUN2_TOKEN;
- cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
- cpi->RunLength - 8;
- cpi->RunLength = 0;
- } else if ( cpi->RunLength < 32 ) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
- DCT_REPEAT_RUN3_TOKEN;
- cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
- cpi->RunLength - 16;
- cpi->RunLength = 0;
- } else if ( cpi->RunLength < 4096) {
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
- DCT_REPEAT_RUN4_TOKEN;
- cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
- cpi->RunLength;
- cpi->RunLength = 0;
- }
-
- }
-
- cpi->OptimisedTokenCount++;
- /* Reset run EOB length */
- cpi->RunLength = 0;
-}
-
-static void PackToken ( CP_INSTANCE *cpi, ogg_int32_t FragmentNumber,
- ogg_uint32_t HuffIndex ) {
- ogg_uint32_t Token =
- cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber]];
- ogg_uint32_t ExtraBitsToken =
- cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber] + 1];
- ogg_uint32_t OneOrTwo;
- ogg_uint32_t OneOrZero;
-
- /* Update the record of what coefficient we have got up to for this
- block and unpack the encoded token back into the quantised data
- array. */
- if ( Token == DCT_EOB_TOKEN )
- cpi->pb.FragCoeffs[FragmentNumber] = BLOCK_SIZE;
- else
- ExpandToken( cpi->pb.QFragData[FragmentNumber],
- &cpi->pb.FragCoeffs[FragmentNumber],
- Token, ExtraBitsToken );
-
- /* Update record of tokens coded and where we are in this fragment. */
- /* Is there an extra bits token */
- OneOrTwo= 1 + ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 );
- /* Advance to the next real token. */
- cpi->FragTokens[FragmentNumber] += (unsigned char)OneOrTwo;
-
- /* Update the counts of tokens coded */
- cpi->TokensCoded += OneOrTwo;
- cpi->TokensToBeCoded -= OneOrTwo;
-
- OneOrZero = ( FragmentNumber < (ogg_int32_t)cpi->pb.YPlaneFragments );
-
- if ( Token == DCT_EOB_TOKEN ) {
- if ( cpi->RunLength == 0 ) {
- cpi->RunHuffIndex = HuffIndex;
- cpi->RunPlaneIndex = 1 - OneOrZero;
- }
- cpi->RunLength++;
-
- /* we have exceeded our longest run length xmit an eob run token; */
- if ( cpi->RunLength == 4095 ) PackEOBRun(cpi);
-
- }else{
-
- /* If we have an EOB run then code it up first */
- if ( cpi->RunLength > 0 ) PackEOBRun( cpi);
-
- /* Mark out which plane the block belonged to */
- cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] =
- (unsigned char)(1 - OneOrZero);
-
- /* Note the token, extra bits and hufman table in the optimised
- token list */
- cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
- (unsigned char)Token;
- cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
- ExtraBitsToken;
- cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] =
- (unsigned char)HuffIndex;
-
- cpi->OptimisedTokenCount++;
- }
-}
-
-static ogg_uint32_t GetBlockReconErrorSlow( CP_INSTANCE *cpi,
- ogg_int32_t BlockIndex ) {
- ogg_uint32_t ErrorVal;
-
- unsigned char * SrcDataPtr =
- &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[BlockIndex]];
- unsigned char * RecDataPtr =
- &cpi->pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[BlockIndex]];
- ogg_int32_t SrcStride;
- ogg_int32_t RecStride;
-
- /* Is the block a Y block or a UV block. */
- if ( BlockIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ) {
- SrcStride = cpi->pb.info.width;
- RecStride = cpi->pb.YStride;
- }else{
- SrcStride = cpi->pb.info.width >> 1;
- RecStride = cpi->pb.UVStride;
- }
-
- ErrorVal = dsp_sad8x8 (cpi->dsp, SrcDataPtr, SrcStride, RecDataPtr, RecStride);
-
- return ErrorVal;
-}
-
-static void PackCodedVideo (CP_INSTANCE *cpi) {
- ogg_int32_t i;
- ogg_int32_t EncodedCoeffs = 1;
- ogg_int32_t FragIndex;
- ogg_uint32_t HuffIndex; /* Index to group of tables used to code a token */
-
- /* Reset the count of second order optimised tokens */
- cpi->OptimisedTokenCount = 0;
-
- cpi->TokensToBeCoded = cpi->TotTokenCount;
- cpi->TokensCoded = 0;
-
- /* Calculate the bit rate at which this frame should be capped. */
- cpi->MaxBitTarget = (ogg_uint32_t)((double)(cpi->ThisFrameTargetBytes * 8) *
- cpi->BitRateCapFactor);
-
- /* Blank the various fragment data structures before we start. */
- memset(cpi->pb.FragCoeffs, 0, cpi->pb.UnitFragments);
- memset(cpi->FragTokens, 0, cpi->pb.UnitFragments);
-
- /* Clear down the QFragData structure for all coded blocks. */
- ClearDownQFragData(&cpi->pb);
-
- /* The tree is not needed (implicit) for key frames */
- if ( cpi->pb.FrameType != KEY_FRAME ){
- /* Pack the quad tree fragment mapping. */
- PackAndWriteDFArray( cpi );
- }
-
- /* Note the number of bits used to code the tree itself. */
- cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3;
-
- /* Mode and MV data not needed for key frames. */
- if ( cpi->pb.FrameType != KEY_FRAME ){
- /* Pack and code the mode list. */
- PackModes(cpi);
- /* Pack the motion vectors */
- PackMotionVectors (cpi);
- }
-
- cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3;
-
- /* Optimise the DC tokens */
- for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
- /* Get the linear index for the current fragment. */
- FragIndex = cpi->pb.CodedBlockList[i];
-
- cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs;
- PackToken(cpi, FragIndex, DC_HUFF_OFFSET );
-
- }
-
- /* Pack any outstanding EOB tokens */
- PackEOBRun(cpi);
-
- /* Now output the optimised DC token list using the appropriate
- entropy tables. */
- EncodeDcTokenList(cpi);
-
- /* Work out the number of DC bits coded */
-
- /* Optimise the AC tokens */
- while ( EncodedCoeffs < 64 ) {
- /* Huffman table adjustment based upon coefficient number. */
- if ( EncodedCoeffs <= AC_TABLE_2_THRESH )
- HuffIndex = AC_HUFF_OFFSET;
- else if ( EncodedCoeffs <= AC_TABLE_3_THRESH )
- HuffIndex = AC_HUFF_OFFSET + AC_HUFF_CHOICES;
- else if ( EncodedCoeffs <= AC_TABLE_4_THRESH )
- HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 2);
- else
- HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 3);
-
- /* Repeatedly scan through the list of blocks. */
- for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
- /* Get the linear index for the current fragment. */
- FragIndex = cpi->pb.CodedBlockList[i];
-
- /* Should we code a token for this block on this pass. */
- if ( cpi->FragTokens[FragIndex] < cpi->FragTokenCounts[FragIndex]
- && cpi->pb.FragCoeffs[FragIndex] <= EncodedCoeffs ) {
- /* Bit pack and a token for this block */
- cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs;
- PackToken( cpi, FragIndex, HuffIndex );
- }
- }
-
- EncodedCoeffs ++;
- }
-
- /* Pack any outstanding EOB tokens */
- PackEOBRun(cpi);
-
- /* Now output the optimised AC token list using the appropriate
- entropy tables. */
- EncodeAcTokenList(cpi);
-
-}
-
-static ogg_uint32_t QuadCodeDisplayFragments (CP_INSTANCE *cpi) {
- ogg_int32_t i,j;
- ogg_uint32_t coded_pixels=0;
- int QIndex;
- int k,m,n;
-
- /* predictor multiplier up-left, up, up-right,left, shift
- Entries are packed in the order L, UL, U, UR, with missing entries
- moved to the end (before the shift parameters). */
- static const ogg_int16_t pc[16][6]={
- {0,0,0,0,0,0},
- {1,0,0,0,0,0}, /* PL */
- {1,0,0,0,0,0}, /* PUL */
- {1,0,0,0,0,0}, /* PUL|PL */
- {1,0,0,0,0,0}, /* PU */
- {1,1,0,0,1,1}, /* PU|PL */
- {0,1,0,0,0,0}, /* PU|PUL */
- {29,-26,29,0,5,31}, /* PU|PUL|PL */
- {1,0,0,0,0,0}, /* PUR */
- {75,53,0,0,7,127}, /* PUR|PL */
- {1,1,0,0,1,1}, /* PUR|PUL */
- {75,0,53,0,7,127}, /* PUR|PUL|PL */
- {1,0,0,0,0,0}, /* PUR|PU */
- {75,0,53,0,7,127}, /* PUR|PU|PL */
- {3,10,3,0,4,15}, /* PUR|PU|PUL */
- {29,-26,29,0,5,31} /* PUR|PU|PUL|PL */
- };
-
- /* boundary case bit masks. */
- static const int bc_mask[8]={
- /* normal case no boundary condition */
- PUR|PU|PUL|PL,
- /* left column */
- PUR|PU,
- /* top row */
- PL,
- /* top row, left column */
- 0,
- /* right column */
- PU|PUL|PL,
- /* right and left column */
- PU,
- /* top row, right column */
- PL,
- /* top row, right and left column */
- 0
- };
-
- /* value left value up-left, value up, value up-right, missing
- values skipped. */
- int v[4];
-
- /* fragment number left, up-left, up, up-right */
- int fn[4];
-
- /* predictor count. */
- int pcount;
-
- /*which predictor constants to use */
- ogg_int16_t wpc;
-
- /* last used inter predictor (Raster Order) */
- ogg_int16_t Last[3]; /* last value used for given frame */
-
- int FragsAcross=cpi->pb.HFragments;
- int FragsDown = cpi->pb.VFragments;
- int FromFragment,ToFragment;
- ogg_int32_t FragIndex;
- int WhichFrame;
- int WhichCase;
-
- static const ogg_int16_t Mode2Frame[] = {
- 1, /* CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame */
- 0, /* CODE_INTRA 1 => DCT Encoded Block */
- 1, /* CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame */
- 1, /* CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame */
- 1, /* CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks */
- 2, /* CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame */
- 2, /* CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame */
- 1 /* CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks */
- };
-
- ogg_int16_t PredictedDC;
-
- /* Initialise the coded block indices variables. These allow
- subsequent linear access to the quad tree ordered list of coded
- blocks */
- cpi->pb.CodedBlockIndex = 0;
-
- /* Set the inter/intra descision control variables. */
- QIndex = Q_TABLE_SIZE - 1;
- while ( QIndex >= 0 ) {
- if ( (QIndex == 0) ||
- ( cpi->pb.QThreshTable[QIndex] >= cpi->pb.ThisFrameQualityValue) )
- break;
- QIndex --;
- }
-
-
- /* Encode and tokenise the Y, U and V components */
- coded_pixels = QuadCodeComponent(cpi, 0, cpi->pb.YSBRows, cpi->pb.YSBCols,
- cpi->pb.info.width );
- coded_pixels += QuadCodeComponent(cpi, cpi->pb.YSuperBlocks,
- cpi->pb.UVSBRows,
- cpi->pb.UVSBCols,
- cpi->pb.info.width>>1 );
- coded_pixels += QuadCodeComponent(cpi,
- cpi->pb.YSuperBlocks+cpi->pb.UVSuperBlocks,
- cpi->pb.UVSBRows, cpi->pb.UVSBCols,
- cpi->pb.info.width>>1 );
-
- /* for y,u,v */
- for ( j = 0; j < 3 ; j++) {
- /* pick which fragments based on Y, U, V */
- switch(j){
- case 0: /* y */
- FromFragment = 0;
- ToFragment = cpi->pb.YPlaneFragments;
- FragsAcross = cpi->pb.HFragments;
- FragsDown = cpi->pb.VFragments;
- break;
- case 1: /* u */
- FromFragment = cpi->pb.YPlaneFragments;
- ToFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments ;
- FragsAcross = cpi->pb.HFragments >> 1;
- FragsDown = cpi->pb.VFragments >> 1;
- break;
- /*case 2: v */
- default:
- FromFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments;
- ToFragment = cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments) ;
- FragsAcross = cpi->pb.HFragments >> 1;
- FragsDown = cpi->pb.VFragments >> 1;
- break;
- }
-
- /* initialize our array of last used DC Components */
- for(k=0;k<3;k++)Last[k]=0;
- i=FromFragment;
-
- /* do prediction on all of Y, U or V */
- for ( m = 0 ; m < FragsDown ; m++) {
- for ( n = 0 ; n < FragsAcross ; n++, i++) {
- cpi->OriginalDC[i] = cpi->pb.QFragData[i][0];
-
- /* only do 2 prediction if fragment coded and on non intra or
- if all fragments are intra */
- if( cpi->pb.display_fragments[i] ||
- (cpi->pb.FrameType == KEY_FRAME) ) {
- /* Type of Fragment */
-
- WhichFrame = Mode2Frame[cpi->pb.FragCodingMethod[i]];
-
- /* Check Borderline Cases */
- WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2);
-
- fn[0]=i-1;
- fn[1]=i-FragsAcross-1;
- fn[2]=i-FragsAcross;
- fn[3]=i-FragsAcross+1;
-
- /* fragment valid for prediction use if coded and it comes
- from same frame as the one we are predicting */
- for(k=pcount=wpc=0; k<4; k++) {
- int pflag;
- pflag=1<pb.display_fragments[fn[k]] &&
- (Mode2Frame[cpi->pb.FragCodingMethod[fn[k]]] == WhichFrame)){
- v[pcount]=cpi->OriginalDC[fn[k]];
- wpc|=pflag;
- pcount++;
- }
- }
-
- if(wpc==0) {
-
- /* fall back to the last coded fragment */
- cpi->pb.QFragData[i][0] -= Last[WhichFrame];
-
- } else {
-
- /* don't do divide if divisor is 1 or 0 */
- PredictedDC = pc[wpc][0]*v[0];
- for(k=1; k>= pc[wpc][4];
-
- }
-
- /* check for outranging on the two predictors that can outrange */
- if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){
- if( abs(PredictedDC - v[2]) > 128) {
- PredictedDC = v[2];
- } else if( abs(PredictedDC - v[0]) > 128) {
- PredictedDC = v[0];
- } else if( abs(PredictedDC - v[1]) > 128) {
- PredictedDC = v[1];
- }
- }
-
- cpi->pb.QFragData[i][0] -= PredictedDC;
- }
-
- /* Save the last fragment coded for whatever frame we are
- predicting from */
-
- Last[WhichFrame] = cpi->OriginalDC[i];
-
- }
- }
- }
- }
-
- /* Pack DC tokens and adjust the ones we couldn't predict 2d */
- for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
- /* Get the linear index for the current coded fragment. */
- FragIndex = cpi->pb.CodedBlockList[i];
- coded_pixels += DPCMTokenizeBlock ( cpi, FragIndex);
-
- }
-
- /* Bit pack the video data data */
- PackCodedVideo(cpi);
-
- /* End the bit packing run. */
- /* EndAddBitsToBuffer(cpi); */
-
- /* Reconstruct the reference frames */
- ReconRefFrames(&cpi->pb);
-
- UpdateFragQIndex(&cpi->pb);
-
- /* Measure the inter reconstruction error for all the blocks that
- were coded */
- /* for use as part of the recovery monitoring process in subsequent frames. */
- for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
- cpi->LastCodedErrorScore[ cpi->pb.CodedBlockList[i] ] =
- GetBlockReconErrorSlow( cpi, cpi->pb.CodedBlockList[i] );
-
- }
-
- /* Return total number of coded pixels */
- return coded_pixels;
-}
-
-ogg_uint32_t EncodeData(CP_INSTANCE *cpi){
- ogg_uint32_t coded_pixels = 0;
-
- /* Zero the count of tokens so far this frame. */
- cpi->TotTokenCount = 0;
-
- /* Zero the mode and MV list indices. */
- cpi->ModeListCount = 0;
-
- /* Zero Decoder EOB run count */
- cpi->pb.EOB_Run = 0;
-
- dsp_save_fpu (cpi->dsp);
-
- /* Encode any fragments coded using DCT. */
- coded_pixels += QuadCodeDisplayFragments (cpi);
-
- dsp_restore_fpu (cpi->dsp);
-
- return coded_pixels;
-
-}
-
-ogg_uint32_t PickIntra( CP_INSTANCE *cpi,
- ogg_uint32_t SBRows,
- ogg_uint32_t SBCols){
-
- ogg_int32_t FragIndex; /* Fragment number */
- ogg_uint32_t MB, B; /* Macro-Block, Block indices */
- ogg_uint32_t SBrow; /* Super-Block row number */
- ogg_uint32_t SBcol; /* Super-Block row number */
- ogg_uint32_t SB=0; /* Super-Block index, initialised to first of
- this component */
- ogg_uint32_t UVRow;
- ogg_uint32_t UVColumn;
- ogg_uint32_t UVFragOffset;
-
- /* decide what block type and motion vectors to use on all of the frames */
- for ( SBrow=0; SBrowpb.BlockMap,SB,MB) >= 0 ) {
-
- cpi->MBCodingMode = CODE_INTRA;
-
- /* Now actually code the blocks. */
- for ( B=0; B<4; B++ ) {
- FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
- cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode;
- }
-
- /* Matching fragments in the U and V planes */
- UVRow = (FragIndex / (cpi->pb.HFragments * 2));
- UVColumn = (FragIndex % cpi->pb.HFragments) / 2;
- UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
-
- cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments + UVFragOffset] =
- cpi->MBCodingMode;
- cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments +
- cpi->pb.UVPlaneFragments + UVFragOffset] =
- cpi->MBCodingMode;
- }
- }
-
- /* Next Super-Block */
- SB++;
- }
- }
- return 0;
-}
-
-static void AddMotionVector(CP_INSTANCE *cpi,
- MOTION_VECTOR *ThisMotionVector) {
- cpi->MVList[cpi->MvListCount].x = ThisMotionVector->x;
- cpi->MVList[cpi->MvListCount].y = ThisMotionVector->y;
- cpi->MvListCount++;
-}
-
-static void SetFragMotionVectorAndMode(CP_INSTANCE *cpi,
- ogg_int32_t FragIndex,
- MOTION_VECTOR *ThisMotionVector){
- /* Note the coding mode and vector for each block */
- cpi->pb.FragMVect[FragIndex].x = ThisMotionVector->x;
- cpi->pb.FragMVect[FragIndex].y = ThisMotionVector->y;
- cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode;
-}
-
-static void SetMBMotionVectorsAndMode(CP_INSTANCE *cpi,
- ogg_int32_t YFragIndex,
- ogg_int32_t UFragIndex,
- ogg_int32_t VFragIndex,
- MOTION_VECTOR *ThisMotionVector){
- SetFragMotionVectorAndMode(cpi, YFragIndex, ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, YFragIndex + 1, ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
- ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
- ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, UFragIndex, ThisMotionVector);
- SetFragMotionVectorAndMode(cpi, VFragIndex, ThisMotionVector);
-}
-
-ogg_uint32_t PickModes(CP_INSTANCE *cpi,
- ogg_uint32_t SBRows, ogg_uint32_t SBCols,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t *InterError, ogg_uint32_t *IntraError) {
- ogg_int32_t YFragIndex;
- ogg_int32_t UFragIndex;
- ogg_int32_t VFragIndex;
- ogg_uint32_t MB, B; /* Macro-Block, Block indices */
- ogg_uint32_t SBrow; /* Super-Block row number */
- ogg_uint32_t SBcol; /* Super-Block row number */
- ogg_uint32_t SB=0; /* Super-Block index, initialised to first
- of this component */
-
- ogg_uint32_t MBIntraError; /* Intra error for macro block */
- ogg_uint32_t MBGFError; /* Golden frame macro block error */
- ogg_uint32_t MBGF_MVError; /* Golden frame plus MV error */
- ogg_uint32_t LastMBGF_MVError; /* Golden frame error with
- last used GF motion
- vector. */
- ogg_uint32_t MBInterError; /* Inter no MV macro block error */
- ogg_uint32_t MBLastInterError; /* Inter with last used MV */
- ogg_uint32_t MBPriorLastInterError; /* Inter with prior last MV */
- ogg_uint32_t MBInterMVError; /* Inter MV macro block error */
- ogg_uint32_t MBInterMVExError; /* Inter MV (exhaustive
- search) macro block error */
- ogg_uint32_t MBInterFOURMVError; /* Inter MV error when using 4
- motion vectors per macro
- block */
- ogg_uint32_t BestError; /* Best error so far. */
-
- MOTION_VECTOR FourMVect[6]; /* storage for last used vectors (one
- entry for each block in MB) */
- MOTION_VECTOR LastInterMVect; /* storage for last used Inter frame
- MB motion vector */
- MOTION_VECTOR PriorLastInterMVect; /* storage for prior last used
- Inter frame MB motion vector */
- MOTION_VECTOR TmpMVect; /* Temporary MV storage */
- MOTION_VECTOR LastGFMVect; /* storage for last used Golden
- Frame MB motion vector */
- MOTION_VECTOR InterMVect; /* storage for motion vector */
- MOTION_VECTOR InterMVectEx; /* storage for motion vector result
- from exhaustive search */
- MOTION_VECTOR GFMVect; /* storage for motion vector */
- MOTION_VECTOR ZeroVect;
-
- ogg_uint32_t UVRow;
- ogg_uint32_t UVColumn;
- ogg_uint32_t UVFragOffset;
-
- int MBCodedFlag;
- unsigned char QIndex;
-
- /* initialize error scores */
- *InterError = 0;
- *IntraError = 0;
-
- /* clear down the default motion vector. */
- cpi->MvListCount = 0;
- FourMVect[0].x = 0;
- FourMVect[0].y = 0;
- FourMVect[1].x = 0;
- FourMVect[1].y = 0;
- FourMVect[2].x = 0;
- FourMVect[2].y = 0;
- FourMVect[3].x = 0;
- FourMVect[3].y = 0;
- FourMVect[4].x = 0;
- FourMVect[4].y = 0;
- FourMVect[5].x = 0;
- FourMVect[5].y = 0;
- LastInterMVect.x = 0;
- LastInterMVect.y = 0;
- PriorLastInterMVect.x = 0;
- PriorLastInterMVect.y = 0;
- LastGFMVect.x = 0;
- LastGFMVect.y = 0;
- InterMVect.x = 0;
- InterMVect.y = 0;
- GFMVect.x = 0;
- GFMVect.y = 0;
-
- ZeroVect.x = 0;
- ZeroVect.y = 0;
-
- QIndex = (unsigned char)cpi->pb.FrameQIndex;
-
-
- /* change the quatization matrix to the one at best Q to compute the
- new error score */
- cpi->MinImprovementForNewMV = (MvThreshTable[QIndex] << 12);
- cpi->InterTripOutThresh = (5000<<12);
- cpi->MVChangeFactor = MVChangeFactorTable[QIndex]; /* 0.9 */
-
- if ( cpi->pb.info.quick_p ) {
- cpi->ExhaustiveSearchThresh = (1000<<12);
- cpi->FourMVThreshold = (2500<<12);
- } else {
- cpi->ExhaustiveSearchThresh = (250<<12);
- cpi->FourMVThreshold = (500<<12);
- }
- cpi->MinImprovementForFourMV = cpi->MinImprovementForNewMV * 4;
-
- if(cpi->MinImprovementForFourMV < (40<<12))
- cpi->MinImprovementForFourMV = (40<<12);
-
- cpi->FourMvChangeFactor = 8; /* cpi->MVChangeFactor - 0.05; */
-
- /* decide what block type and motion vectors to use on all of the frames */
- for ( SBrow=0; SBrowpb.BlockMap,SB,MB) < 0 ) continue;
-
- /* Is the current macro block coded (in part or in whole) */
- MBCodedFlag = 0;
- for ( B=0; B<4; B++ ) {
- YFragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
- /* Does Block lie in frame: */
- if ( YFragIndex >= 0 ) {
- /* In Frame: Is it coded: */
- if ( cpi->pb.display_fragments[YFragIndex] ) {
- MBCodedFlag = 1;
- break;
- }
- } else
- MBCodedFlag = 0;
- }
-
- /* This one isn't coded go to the next one */
- if(!MBCodedFlag) continue;
-
- /* Calculate U and V FragIndex from YFragIndex */
- YFragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB,MB);
- UVRow = (YFragIndex / (cpi->pb.HFragments * 2));
- UVColumn = (YFragIndex % cpi->pb.HFragments) / 2;
- UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
- UFragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
- VFragIndex = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments +
- UVFragOffset;
-
-
- /**************************************************************
- Find the block choice with the lowest error
-
- NOTE THAT if U or V is coded but no Y from a macro block then
- the mode will be CODE_INTER_NO_MV as this is the default
- state to which the mode data structure is initialised in
- encoder and decoder at the start of each frame. */
-
- BestError = HUGE_ERROR;
-
-
- /* Look at the intra coding error. */
- MBIntraError = GetMBIntraError( cpi, YFragIndex, PixelsPerLine );
- BestError = (BestError > MBIntraError) ? MBIntraError : BestError;
-
- /* Get the golden frame error */
- MBGFError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.GoldenFrame, YFragIndex,
- 0, 0, PixelsPerLine );
- BestError = (BestError > MBGFError) ? MBGFError : BestError;
-
- /* Calculate the 0,0 case. */
- MBInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex, 0, 0, PixelsPerLine );
- BestError = (BestError > MBInterError) ? MBInterError : BestError;
-
- /* Measure error for last MV */
- MBLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex, LastInterMVect.x,
- LastInterMVect.y, PixelsPerLine );
- BestError = (BestError > MBLastInterError) ?
- MBLastInterError : BestError;
-
- /* Measure error for prior last MV */
- MBPriorLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.LastFrameRecon,
- YFragIndex,
- PriorLastInterMVect.x,
- PriorLastInterMVect.y,
- PixelsPerLine );
- BestError = (BestError > MBPriorLastInterError) ?
- MBPriorLastInterError : BestError;
-
- /* Temporarily force usage of no motionvector blocks */
- MBInterMVError = HUGE_ERROR;
- InterMVect.x = 0; /* Set 0,0 motion vector */
- InterMVect.y = 0;
-
- /* If the best error is above the required threshold search
- for a new inter MV */
- if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) {
- /* Use a mix of heirachical and exhaustive searches for
- quick mode. */
- if ( cpi->pb.info.quick_p ) {
- MBInterMVError = GetMBMVInterError( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- cpi->MVPixelOffsetY,
- &InterMVect );
-
- /* If we still do not have a good match try an exhaustive
- MBMV search */
- if ( (MBInterMVError > cpi->ExhaustiveSearchThresh) &&
- (BestError > cpi->ExhaustiveSearchThresh) ) {
-
- MBInterMVExError =
- GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- &InterMVectEx );
-
- /* Is the Variance measure for the EX search
- better... If so then use it. */
- if ( MBInterMVExError < MBInterMVError ) {
- MBInterMVError = MBInterMVExError;
- InterMVect.x = InterMVectEx.x;
- InterMVect.y = InterMVectEx.y;
- }
- }
- }else{
- /* Use an exhaustive search */
- MBInterMVError =
- GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine,
- &InterMVect );
- }
-
-
- /* Is the improvement, if any, good enough to justify a new MV */
- if ( (16 * MBInterMVError < (BestError * cpi->MVChangeFactor)) &&
- ((MBInterMVError + cpi->MinImprovementForNewMV) < BestError) ){
- BestError = MBInterMVError;
- }
-
- }
-
- /* If the best error is still above the required threshold
- search for a golden frame MV */
- MBGF_MVError = HUGE_ERROR;
- GFMVect.x = 0; /* Set 0,0 motion vector */
- GFMVect.y = 0;
- if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) {
- /* Do an MV search in the golden reference frame */
- MBGF_MVError = GetMBMVInterError( cpi, cpi->pb.GoldenFrame,
- YFragIndex, PixelsPerLine,
- cpi->MVPixelOffsetY, &GFMVect );
-
- /* Measure error for last GFMV */
- LastMBGF_MVError = GetMBInterError( cpi, cpi->ConvDestBuffer,
- cpi->pb.GoldenFrame,
- YFragIndex, LastGFMVect.x,
- LastGFMVect.y, PixelsPerLine );
-
- /* Check against last GF motion vector and reset if the
- search has thrown a worse result. */
- if ( LastMBGF_MVError < MBGF_MVError ) {
- GFMVect.x = LastGFMVect.x;
- GFMVect.y = LastGFMVect.y;
- MBGF_MVError = LastMBGF_MVError;
- }else{
- LastGFMVect.x = GFMVect.x;
- LastGFMVect.y = GFMVect.y;
- }
-
- /* Is the improvement, if any, good enough to justify a new MV */
- if ( (16 * MBGF_MVError < (BestError * cpi->MVChangeFactor)) &&
- ((MBGF_MVError + cpi->MinImprovementForNewMV) < BestError) ) {
- BestError = MBGF_MVError;
- }
- }
-
- /* Finally... If the best error is still to high then consider
- the 4MV mode */
- MBInterFOURMVError = HUGE_ERROR;
- if ( BestError > cpi->FourMVThreshold && cpi->MotionCompensation) {
- /* Get the 4MV error. */
- MBInterFOURMVError =
- GetFOURMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
- YFragIndex, PixelsPerLine, FourMVect );
-
- /* If the improvement is great enough then use the four MV mode */
- if ( ((MBInterFOURMVError + cpi->MinImprovementForFourMV) <
- BestError) && (16 * MBInterFOURMVError <
- (BestError * cpi->FourMvChangeFactor))) {
- BestError = MBInterFOURMVError;
- }
- }
-
- /********************************************************
- end finding the best error
- *******************************************************
-
- Figure out what to do with the block we chose
-
- Over-ride and force intra if error high and Intra error similar
- Now choose a mode based on lowest error (with bias towards no MV) */
-
- if ( (BestError > cpi->InterTripOutThresh) &&
- (10 * BestError > MBIntraError * 7 ) ) {
- cpi->MBCodingMode = CODE_INTRA;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBInterError ) {
- cpi->MBCodingMode = CODE_INTER_NO_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBGFError ) {
- cpi->MBCodingMode = CODE_USING_GOLDEN;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- } else if ( BestError == MBLastInterError ) {
- cpi->MBCodingMode = CODE_INTER_LAST_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&LastInterMVect);
- } else if ( BestError == MBPriorLastInterError ) {
- cpi->MBCodingMode = CODE_INTER_PRIOR_LAST;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&PriorLastInterMVect);
-
- /* Swap the prior and last MV cases over */
- TmpMVect.x = PriorLastInterMVect.x;
- TmpMVect.y = PriorLastInterMVect.y;
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
- LastInterMVect.x = TmpMVect.x;
- LastInterMVect.y = TmpMVect.y;
-
- } else if ( BestError == MBInterMVError ) {
-
- cpi->MBCodingMode = CODE_INTER_PLUS_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&InterMVect);
-
- /* Update Prior last mv with last mv */
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
-
- /* Note last inter MV for future use */
- LastInterMVect.x = InterMVect.x;
- LastInterMVect.y = InterMVect.y;
-
- AddMotionVector( cpi, &InterMVect);
-
- } else if ( BestError == MBGF_MVError ) {
-
- cpi->MBCodingMode = CODE_GOLDEN_MV;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&GFMVect);
-
- /* Note last inter GF MV for future use */
- LastGFMVect.x = GFMVect.x;
- LastGFMVect.y = GFMVect.y;
-
- AddMotionVector( cpi, &GFMVect);
- } else if ( BestError == MBInterFOURMVError ) {
- cpi->MBCodingMode = CODE_INTER_FOURMV;
-
- /* Calculate the UV vectors as the average of the Y plane ones. */
- /* First .x component */
- FourMVect[4].x = FourMVect[0].x + FourMVect[1].x +
- FourMVect[2].x + FourMVect[3].x;
- if ( FourMVect[4].x >= 0 )
- FourMVect[4].x = (FourMVect[4].x + 2) / 4;
- else
- FourMVect[4].x = (FourMVect[4].x - 2) / 4;
- FourMVect[5].x = FourMVect[4].x;
-
- /* Then .y component */
- FourMVect[4].y = FourMVect[0].y + FourMVect[1].y +
- FourMVect[2].y + FourMVect[3].y;
- if ( FourMVect[4].y >= 0 )
- FourMVect[4].y = (FourMVect[4].y + 2) / 4;
- else
- FourMVect[4].y = (FourMVect[4].y - 2) / 4;
- FourMVect[5].y = FourMVect[4].y;
-
- SetFragMotionVectorAndMode(cpi, YFragIndex, &FourMVect[0]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + 1, &FourMVect[1]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
- &FourMVect[2]);
- SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
- &FourMVect[3]);
- SetFragMotionVectorAndMode(cpi, UFragIndex, &FourMVect[4]);
- SetFragMotionVectorAndMode(cpi, VFragIndex, &FourMVect[5]);
-
- /* Note the four MVs values for current macro-block. */
- AddMotionVector( cpi, &FourMVect[0]);
- AddMotionVector( cpi, &FourMVect[1]);
- AddMotionVector( cpi, &FourMVect[2]);
- AddMotionVector( cpi, &FourMVect[3]);
-
- /* Update Prior last mv with last mv */
- PriorLastInterMVect.x = LastInterMVect.x;
- PriorLastInterMVect.y = LastInterMVect.y;
-
- /* Note last inter MV for future use */
- LastInterMVect.x = FourMVect[3].x;
- LastInterMVect.y = FourMVect[3].y;
-
- } else {
-
- cpi->MBCodingMode = CODE_INTRA;
- SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
- VFragIndex,&ZeroVect);
- }
-
-
- /* setting up mode specific block types
- *******************************************************/
-
- *InterError += (BestError>>8);
- *IntraError += (MBIntraError>>8);
-
-
- }
- SB++;
-
- }
- }
-
- /* Return number of pixels coded */
- return 0;
-}
-
-void WriteFrameHeader( CP_INSTANCE *cpi) {
- ogg_uint32_t i;
- oggpack_buffer *opb=cpi->oggbuffer;
- /* Output the frame type (base/key frame or inter frame) */
- oggpackB_write( opb, cpi->pb.FrameType, 1 );
- /* Write out details of the current value of Q... variable resolution. */
- for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
- if ( cpi->pb.ThisFrameQualityValue == cpi->pb.QThreshTable[i] ) {
- oggpackB_write( opb, i, 6 );
- break;
- }
- }
-
- if ( i == Q_TABLE_SIZE ) {
- /* An invalid DCT value was specified. */
- /*IssueWarning( "Invalid Q Multiplier" );*/
- oggpackB_write( opb, 31, 6 );
- }
-
- /* we only support one Q index per frame */
- oggpackB_write( opb, 0, 1 );
-
- /* If the frame was a base frame then write out the frame dimensions. */
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- /* all bits reserved! */
- oggpackB_write( opb, 0, 3 );
- }
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/encoder_huffman.c b/Engine/lib/libtheora/lib/enc/encoder_huffman.c
deleted file mode 100644
index 191ada75c..000000000
--- a/Engine/lib/libtheora/lib/enc/encoder_huffman.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: encoder_huffman.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-#include "hufftables.h"
-
-static void CreateHuffmanList(HUFF_ENTRY ** HuffRoot,
- ogg_uint32_t HIndex,
- const ogg_uint32_t *FreqList ) {
- int i;
- HUFF_ENTRY *entry_ptr;
- HUFF_ENTRY *search_ptr;
-
- /* Create a HUFF entry for token zero. */
- HuffRoot[HIndex] = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*HuffRoot[HIndex]));
-
- HuffRoot[HIndex]->Previous = NULL;
- HuffRoot[HIndex]->Next = NULL;
- HuffRoot[HIndex]->ZeroChild = NULL;
- HuffRoot[HIndex]->OneChild = NULL;
- HuffRoot[HIndex]->Value = 0;
- HuffRoot[HIndex]->Frequency = FreqList[0];
-
- if ( HuffRoot[HIndex]->Frequency == 0 )
- HuffRoot[HIndex]->Frequency = 1;
-
- /* Now add entries for all the other possible tokens. */
- for ( i = 1; i < MAX_ENTROPY_TOKENS; i++ ) {
- entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
-
- entry_ptr->Value = i;
- entry_ptr->Frequency = FreqList[i];
- entry_ptr->ZeroChild = NULL;
- entry_ptr->OneChild = NULL;
-
- /* Force min value of 1. This prevents the tree getting too deep. */
- if ( entry_ptr->Frequency == 0 )
- entry_ptr->Frequency = 1;
-
- if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
- entry_ptr->Next = HuffRoot[HIndex];
- HuffRoot[HIndex]->Previous = entry_ptr;
- entry_ptr->Previous = NULL;
- HuffRoot[HIndex] = entry_ptr;
- }else{
- search_ptr = HuffRoot[HIndex];
- while ( (search_ptr->Next != NULL) &&
- (search_ptr->Frequency < entry_ptr->Frequency) ){
- search_ptr = (HUFF_ENTRY *)search_ptr->Next;
- }
-
- if ( search_ptr->Frequency < entry_ptr->Frequency ){
- entry_ptr->Next = NULL;
- entry_ptr->Previous = search_ptr;
- search_ptr->Next = entry_ptr;
- }else{
- entry_ptr->Next = search_ptr;
- entry_ptr->Previous = search_ptr->Previous;
- search_ptr->Previous->Next = entry_ptr;
- search_ptr->Previous = entry_ptr;
- }
- }
- }
-}
-
-static void CreateCodeArray( HUFF_ENTRY * HuffRoot,
- ogg_uint32_t *HuffCodeArray,
- unsigned char *HuffCodeLengthArray,
- ogg_uint32_t CodeValue,
- unsigned char CodeLength ) {
-
- /* If we are at a leaf then fill in a code array entry. */
- if ( ( HuffRoot->ZeroChild == NULL ) && ( HuffRoot->OneChild == NULL ) ){
- HuffCodeArray[HuffRoot->Value] = CodeValue;
- HuffCodeLengthArray[HuffRoot->Value] = CodeLength;
- }else{
- /* Recursive calls to scan down the tree. */
- CodeLength++;
- CreateCodeArray(HuffRoot->ZeroChild, HuffCodeArray, HuffCodeLengthArray,
- ((CodeValue << 1) + 0), CodeLength);
- CreateCodeArray(HuffRoot->OneChild, HuffCodeArray, HuffCodeLengthArray,
- ((CodeValue << 1) + 1), CodeLength);
- }
-}
-
-static void BuildHuffmanTree( HUFF_ENTRY **HuffRoot,
- ogg_uint32_t *HuffCodeArray,
- unsigned char *HuffCodeLengthArray,
- ogg_uint32_t HIndex,
- const ogg_uint32_t *FreqList ){
-
- HUFF_ENTRY *entry_ptr;
- HUFF_ENTRY *search_ptr;
-
- /* First create a sorted linked list representing the frequencies of
- each token. */
- CreateHuffmanList( HuffRoot, HIndex, FreqList );
-
- /* Now build the tree from the list. */
-
- /* While there are at least two items left in the list. */
- while ( HuffRoot[HIndex]->Next != NULL ){
- /* Create the new node as the parent of the first two in the list. */
- entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
- entry_ptr->Value = -1;
- entry_ptr->Frequency = HuffRoot[HIndex]->Frequency +
- HuffRoot[HIndex]->Next->Frequency ;
- entry_ptr->ZeroChild = HuffRoot[HIndex];
- entry_ptr->OneChild = HuffRoot[HIndex]->Next;
-
- /* If there are still more items in the list then insert the new
- node into the list. */
- if (entry_ptr->OneChild->Next != NULL ){
- /* Set up the provisional 'new root' */
- HuffRoot[HIndex] = entry_ptr->OneChild->Next;
- HuffRoot[HIndex]->Previous = NULL;
-
- /* Now scan through the remaining list to insert the new entry
- at the appropriate point. */
- if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
- entry_ptr->Next = HuffRoot[HIndex];
- HuffRoot[HIndex]->Previous = entry_ptr;
- entry_ptr->Previous = NULL;
- HuffRoot[HIndex] = entry_ptr;
- }else{
- search_ptr = HuffRoot[HIndex];
- while ( (search_ptr->Next != NULL) &&
- (search_ptr->Frequency < entry_ptr->Frequency) ){
- search_ptr = search_ptr->Next;
- }
-
- if ( search_ptr->Frequency < entry_ptr->Frequency ){
- entry_ptr->Next = NULL;
- entry_ptr->Previous = search_ptr;
- search_ptr->Next = entry_ptr;
- }else{
- entry_ptr->Next = search_ptr;
- entry_ptr->Previous = search_ptr->Previous;
- search_ptr->Previous->Next = entry_ptr;
- search_ptr->Previous = entry_ptr;
- }
- }
- }else{
- /* Build has finished. */
- entry_ptr->Next = NULL;
- entry_ptr->Previous = NULL;
- HuffRoot[HIndex] = entry_ptr;
- }
-
- /* Delete the Next/Previous properties of the children (PROB NOT NEC). */
- entry_ptr->ZeroChild->Next = NULL;
- entry_ptr->ZeroChild->Previous = NULL;
- entry_ptr->OneChild->Next = NULL;
- entry_ptr->OneChild->Previous = NULL;
-
- }
-
- /* Now build a code array from the tree. */
- CreateCodeArray( HuffRoot[HIndex], HuffCodeArray,
- HuffCodeLengthArray, 0, 0);
-}
-
-static void DestroyHuffTree(HUFF_ENTRY *root_ptr){
- if (root_ptr){
- if ( root_ptr->ZeroChild )
- DestroyHuffTree(root_ptr->ZeroChild);
-
- if ( root_ptr->OneChild )
- DestroyHuffTree(root_ptr->OneChild);
-
- _ogg_free(root_ptr);
- }
-}
-
-void ClearHuffmanSet( PB_INSTANCE *pbi ){
- int i;
-
- ClearHuffmanTrees(pbi->HuffRoot_VP3x);
-
- for ( i = 0; i < NUM_HUFF_TABLES; i++ )
- if (pbi->HuffCodeArray_VP3x[i])
- _ogg_free (pbi->HuffCodeArray_VP3x[i]);
-
- for ( i = 0; i < NUM_HUFF_TABLES; i++ )
- if (pbi->HuffCodeLengthArray_VP3x[i])
- _ogg_free (pbi->HuffCodeLengthArray_VP3x[i]);
-}
-
-void InitHuffmanSet( PB_INSTANCE *pbi ){
- int i;
-
- ClearHuffmanSet(pbi);
-
- pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
-
- for ( i = 0; i < NUM_HUFF_TABLES; i++ ){
- pbi->HuffCodeArray_VP3x[i] =
- _ogg_calloc(MAX_ENTROPY_TOKENS,
- sizeof(*pbi->HuffCodeArray_VP3x[i]));
- pbi->HuffCodeLengthArray_VP3x[i] =
- _ogg_calloc(MAX_ENTROPY_TOKENS,
- sizeof(*pbi->HuffCodeLengthArray_VP3x[i]));
- BuildHuffmanTree( pbi->HuffRoot_VP3x,
- pbi->HuffCodeArray_VP3x[i],
- pbi->HuffCodeLengthArray_VP3x[i],
- i, FrequencyCounts_VP3[i]);
- }
-}
-
-static int ReadHuffTree(HUFF_ENTRY * HuffRoot, int depth,
- oggpack_buffer *opb) {
- long bit;
- long ret;
- theora_read(opb,1,&bit);
- if(bit < 0) return OC_BADHEADER;
- else if(!bit) {
- int ret;
- if (++depth > 32) return OC_BADHEADER;
- HuffRoot->ZeroChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
- ret = ReadHuffTree(HuffRoot->ZeroChild, depth, opb);
- if (ret < 0) return ret;
- HuffRoot->OneChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
- ret = ReadHuffTree(HuffRoot->OneChild, depth, opb);
- if (ret < 0) return ret;
- HuffRoot->Value = -1;
- } else {
- HuffRoot->ZeroChild = NULL;
- HuffRoot->OneChild = NULL;
- theora_read(opb,5,&ret);
- HuffRoot->Value=ret;;
- if (HuffRoot->Value < 0) return OC_BADHEADER;
- }
- return 0;
-}
-
-int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb) {
- int i;
- for (i=0; iHuffRoot[i] = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
- ret = ReadHuffTree(ci->HuffRoot[i], 0, opb);
- if (ret) return ret;
- }
- return 0;
-}
-
-static void WriteHuffTree(HUFF_ENTRY *HuffRoot, oggpack_buffer *opb) {
- if (HuffRoot->Value >= 0) {
- oggpackB_write(opb, 1, 1);
- oggpackB_write(opb, HuffRoot->Value, 5);
- } else {
- oggpackB_write(opb, 0, 1);
- WriteHuffTree(HuffRoot->ZeroChild, opb);
- WriteHuffTree(HuffRoot->OneChild, opb);
- }
-}
-
-void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
- oggpack_buffer *opb) {
- int i;
- for(i=0; iValue = HuffSrc->Value;
- if (HuffSrc->Value < 0) {
- HuffDst->ZeroChild = CopyHuffTree(HuffSrc->ZeroChild);
- HuffDst->OneChild = CopyHuffTree(HuffSrc->OneChild);
- }
- return HuffDst;
- }
- return NULL;
-}
-
-void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci) {
- int i;
- pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
- for(i=0; iHuffRoot_VP3x[i] = CopyHuffTree(ci->HuffRoot[i]);
- }
-}
-
-void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]){
- int i;
- for(i=0; i
-#include "codec_internal.h"
-
-#include "quant_lookup.h"
-
-#define IdctAdjustBeforeShift 8
-/* cos(n*pi/16) or sin(8-n)*pi/16) */
-#define xC1S7 64277
-#define xC2S6 60547
-#define xC3S5 54491
-#define xC4S4 46341
-#define xC5S3 36410
-#define xC6S2 25080
-#define xC7S1 12785
-
-/* compute the 16 bit signed 1D inverse DCT - spec version */
-/*
-static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
- ogg_int32_t t[8], r;
- ogg_int16_t *y = InputData;
- ogg_int16_t *x = OutputData;
-
- t[0] = y[0] + y[4];
- t[0] &= 0xffff;
- t[0] = (xC4S4 * t[0]) >> 16;
-
- t[1] = y[0] - y[4];
- t[1] &= 0xffff;
- t[1] = (xC4S4 * t[1]) >> 16;
-
- t[2] = ((xC6S2 * y[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
- t[3] = ((xC2S6 * y[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
- t[4] = ((xC7S1 * y[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
- t[5] = ((xC3S5 * y[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
- t[6] = ((xC5S3 * y[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
- t[7] = ((xC1S7 * y[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
-
- r = t[4] + t[5];
- t[5] = t[4] - t[5];
- t[5] &= 0xffff;
- t[5] = (xC4S4 * (-t[5])) >> 16;
- t[4] = r;
-
- r = t[7] + t[6];
- t[6] = t[7] - t[6];
- t[6] &= 0xffff;
- t[6] = (xC4S4 * t[6]) >> 16;
- t[7] = r;
-
- r = t[0] + t[3];
- t[3] = t[0] - t[3];
- t[0] = r;
-
- r = t[1] + t[2];
- t[2] = t[1] - t[2];
- t[1] = r;
-
- r = t[6] + t[5];
- t[5] = t[6] - t[5];
- t[6] = r;
-
- r = t[0] + t[7];
- r &= 0xffff;
- x[0] = r;
-
- r = t[1] + t[6];
- r &= 0xffff;
- x[1] = r;
-
- r = t[2] + t[5];
- r &= 0xffff;
- x[2] = r;
-
- r = t[3] + t[4];
- r &= 0xffff;
- x[3] = r;
-
- r = t[3] - t[4];
- r &= 0xffff;
- x[4] = r;
-
- r = t[2] - t[5];
- r &= 0xffff;
- x[5] = r;
-
- r = t[1] - t[6];
- r &= 0xffff;
- x[6] = r;
-
- r = t[0] - t[7];
- r &= 0xffff;
- x[7] = r;
-
-}
-*/
-
-static void dequant_slow( ogg_int16_t * dequant_coeffs,
- ogg_int16_t * quantized_list,
- ogg_int32_t * DCT_block) {
- int i;
- for(i=0;i<64;i++)
- DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
-}
-
-
-
-void IDctSlow__c( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) {
- ogg_int32_t IntermediateData[64];
- ogg_int32_t * ip = IntermediateData;
- ogg_int16_t * op = OutputData;
-
- ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
- ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
- ogg_int32_t t1, t2;
-
- int loop;
-
- dequant_slow( QuantMatrix, InputData, IntermediateData);
-
- /* Inverse DCT on the rows now */
- for ( loop = 0; loop < 8; loop++){
- /* Check for non-zero values */
- if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
- t1 = (xC1S7 * ip[1]);
- t2 = (xC7S1 * ip[7]);
- t1 >>= 16;
- t2 >>= 16;
- _A = t1 + t2;
-
- t1 = (xC7S1 * ip[1]);
- t2 = (xC1S7 * ip[7]);
- t1 >>= 16;
- t2 >>= 16;
- _B = t1 - t2;
-
- t1 = (xC3S5 * ip[3]);
- t2 = (xC5S3 * ip[5]);
- t1 >>= 16;
- t2 >>= 16;
- _C = t1 + t2;
-
- t1 = (xC3S5 * ip[5]);
- t2 = (xC5S3 * ip[3]);
- t1 >>= 16;
- t2 >>= 16;
- _D = t1 - t2;
-
- t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
- t1 >>= 16;
- _Ad = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
- t1 >>= 16;
- _Bd = t1;
-
-
- _Cd = _A + _C;
- _Dd = _B + _D;
-
- t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4]));
- t1 >>= 16;
- _E = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4]));
- t1 >>= 16;
- _F = t1;
-
- t1 = (xC2S6 * ip[2]);
- t2 = (xC6S2 * ip[6]);
- t1 >>= 16;
- t2 >>= 16;
- _G = t1 + t2;
-
- t1 = (xC6S2 * ip[2]);
- t2 = (xC2S6 * ip[6]);
- t1 >>= 16;
- t2 >>= 16;
- _H = t1 - t2;
-
-
- _Ed = _E - _G;
- _Gd = _E + _G;
-
- _Add = _F + _Ad;
- _Bdd = _Bd - _H;
-
- _Fd = _F - _Ad;
- _Hd = _Bd + _H;
-
- /* Final sequence of operations over-write original inputs. */
- ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
- ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
-
- ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
- ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
-
- ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
- ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
-
- ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
- ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
-
- }
-
- ip += 8; /* next row */
- }
-
- ip = IntermediateData;
-
- for ( loop = 0; loop < 8; loop++){
- /* Check for non-zero values (bitwise or faster than ||) */
- if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
- ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
-
- t1 = (xC1S7 * ip[1*8]);
- t2 = (xC7S1 * ip[7*8]);
- t1 >>= 16;
- t2 >>= 16;
- _A = t1 + t2;
-
- t1 = (xC7S1 * ip[1*8]);
- t2 = (xC1S7 * ip[7*8]);
- t1 >>= 16;
- t2 >>= 16;
- _B = t1 - t2;
-
- t1 = (xC3S5 * ip[3*8]);
- t2 = (xC5S3 * ip[5*8]);
- t1 >>= 16;
- t2 >>= 16;
- _C = t1 + t2;
-
- t1 = (xC3S5 * ip[5*8]);
- t2 = (xC5S3 * ip[3*8]);
- t1 >>= 16;
- t2 >>= 16;
- _D = t1 - t2;
-
- t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
- t1 >>= 16;
- _Ad = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
- t1 >>= 16;
- _Bd = t1;
-
-
- _Cd = _A + _C;
- _Dd = _B + _D;
-
- t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8]));
- t1 >>= 16;
- _E = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8]));
- t1 >>= 16;
- _F = t1;
-
- t1 = (xC2S6 * ip[2*8]);
- t2 = (xC6S2 * ip[6*8]);
- t1 >>= 16;
- t2 >>= 16;
- _G = t1 + t2;
-
- t1 = (xC6S2 * ip[2*8]);
- t2 = (xC2S6 * ip[6*8]);
- t1 >>= 16;
- t2 >>= 16;
- _H = t1 - t2;
-
- _Ed = _E - _G;
- _Gd = _E + _G;
-
- _Add = _F + _Ad;
- _Bdd = _Bd - _H;
-
- _Fd = _F - _Ad;
- _Hd = _Bd + _H;
-
- _Gd += IdctAdjustBeforeShift;
- _Add += IdctAdjustBeforeShift;
- _Ed += IdctAdjustBeforeShift;
- _Fd += IdctAdjustBeforeShift;
-
- /* Final sequence of operations over-write original inputs. */
- op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
- op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
-
- op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
- op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
-
- op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
- op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
-
- op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
- op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
- }else{
- op[0*8] = 0;
- op[7*8] = 0;
- op[1*8] = 0;
- op[2*8] = 0;
- op[3*8] = 0;
- op[4*8] = 0;
- op[5*8] = 0;
- op[6*8] = 0;
- }
-
- ip++; /* next column */
- op++;
- }
-}
-
-/************************
- x x x x 0 0 0 0
- x x x 0 0 0 0 0
- x x 0 0 0 0 0 0
- x 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
-*************************/
-
-static void dequant_slow10( ogg_int16_t * dequant_coeffs,
- ogg_int16_t * quantized_list,
- ogg_int32_t * DCT_block){
- int i;
- memset(DCT_block,0, 128);
- for(i=0;i<10;i++)
- DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
-
-}
-
-void IDct10__c( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ){
- ogg_int32_t IntermediateData[64];
- ogg_int32_t * ip = IntermediateData;
- ogg_int16_t * op = OutputData;
-
- ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
- ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
- ogg_int32_t t1, t2;
-
- int loop;
-
- dequant_slow10( QuantMatrix, InputData, IntermediateData);
-
- /* Inverse DCT on the rows now */
- for ( loop = 0; loop < 4; loop++){
- /* Check for non-zero values */
- if ( ip[0] | ip[1] | ip[2] | ip[3] ){
- t1 = (xC1S7 * ip[1]);
- t1 >>= 16;
- _A = t1;
-
- t1 = (xC7S1 * ip[1]);
- t1 >>= 16;
- _B = t1 ;
-
- t1 = (xC3S5 * ip[3]);
- t1 >>= 16;
- _C = t1;
-
- t2 = (xC5S3 * ip[3]);
- t2 >>= 16;
- _D = -t2;
-
-
- t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
- t1 >>= 16;
- _Ad = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
- t1 >>= 16;
- _Bd = t1;
-
-
- _Cd = _A + _C;
- _Dd = _B + _D;
-
- t1 = (xC4S4 * ip[0] );
- t1 >>= 16;
- _E = t1;
-
- _F = t1;
-
- t1 = (xC2S6 * ip[2]);
- t1 >>= 16;
- _G = t1;
-
- t1 = (xC6S2 * ip[2]);
- t1 >>= 16;
- _H = t1 ;
-
-
- _Ed = _E - _G;
- _Gd = _E + _G;
-
- _Add = _F + _Ad;
- _Bdd = _Bd - _H;
-
- _Fd = _F - _Ad;
- _Hd = _Bd + _H;
-
- /* Final sequence of operations over-write original inputs. */
- ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
- ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
-
- ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
- ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
-
- ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
- ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
-
- ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
- ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
-
- }
-
- ip += 8; /* next row */
- }
-
- ip = IntermediateData;
-
- for ( loop = 0; loop < 8; loop++) {
- /* Check for non-zero values (bitwise or faster than ||) */
- if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
-
- t1 = (xC1S7 * ip[1*8]);
- t1 >>= 16;
- _A = t1 ;
-
- t1 = (xC7S1 * ip[1*8]);
- t1 >>= 16;
- _B = t1 ;
-
- t1 = (xC3S5 * ip[3*8]);
- t1 >>= 16;
- _C = t1 ;
-
- t2 = (xC5S3 * ip[3*8]);
- t2 >>= 16;
- _D = - t2;
-
-
- t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
- t1 >>= 16;
- _Ad = t1;
-
- t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
- t1 >>= 16;
- _Bd = t1;
-
-
- _Cd = _A + _C;
- _Dd = _B + _D;
-
- t1 = (xC4S4 * ip[0*8]);
- t1 >>= 16;
- _E = t1;
- _F = t1;
-
- t1 = (xC2S6 * ip[2*8]);
- t1 >>= 16;
- _G = t1;
-
- t1 = (xC6S2 * ip[2*8]);
- t1 >>= 16;
- _H = t1;
-
-
- _Ed = _E - _G;
- _Gd = _E + _G;
-
- _Add = _F + _Ad;
- _Bdd = _Bd - _H;
-
- _Fd = _F - _Ad;
- _Hd = _Bd + _H;
-
- _Gd += IdctAdjustBeforeShift;
- _Add += IdctAdjustBeforeShift;
- _Ed += IdctAdjustBeforeShift;
- _Fd += IdctAdjustBeforeShift;
-
- /* Final sequence of operations over-write original inputs. */
- op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
- op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
-
- op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
- op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
-
- op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
- op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
-
- op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
- op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
- }else{
- op[0*8] = 0;
- op[7*8] = 0;
- op[1*8] = 0;
- op[2*8] = 0;
- op[3*8] = 0;
- op[4*8] = 0;
- op[5*8] = 0;
- op[6*8] = 0;
- }
-
- ip++; /* next column */
- op++;
- }
-}
-
-/***************************
- x 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0
-**************************/
-
-void IDct1( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ){
- int loop;
-
- ogg_int16_t OutD;
-
- OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
-
- for(loop=0;loop<64;loop++)
- OutputData[loop]=OutD;
-
-}
-
-void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
- funcs->IDctSlow = IDctSlow__c;
- funcs->IDct10 = IDct10__c;
- funcs->IDct3 = IDct10__c;
-#if defined(USE_ASM)
- // todo: make mmx encoder idct for MSC one day...
-#if !defined (_MSC_VER)
- if (cpu_flags & OC_CPU_X86_MMX) {
- dsp_mmx_idct_init(funcs);
- }
-#endif
-#endif
-}
diff --git a/Engine/lib/libtheora/lib/enc/encoder_lookup.h b/Engine/lib/libtheora/lib/enc/encoder_lookup.h
deleted file mode 100644
index c5759869a..000000000
--- a/Engine/lib/libtheora/lib/enc/encoder_lookup.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function: simple static lookups for VP3 frame encoder
- last mod: $Id: encoder_lookup.h 15323 2008-09-19 19:43:59Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static const ogg_uint32_t MvPattern[(MAX_MV_EXTENT * 2) + 1] = {
- 0x000000ff, 0x000000fd, 0x000000fb, 0x000000f9,
- 0x000000f7, 0x000000f5, 0x000000f3, 0x000000f1,
- 0x000000ef, 0x000000ed, 0x000000eb, 0x000000e9,
- 0x000000e7, 0x000000e5, 0x000000e3, 0x000000e1,
- 0x0000006f, 0x0000006d, 0x0000006b, 0x00000069,
- 0x00000067, 0x00000065, 0x00000063, 0x00000061,
- 0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
- 0x00000009, 0x00000007, 0x00000002, 0x00000000,
- 0x00000001, 0x00000006, 0x00000008, 0x00000028,
- 0x0000002a, 0x0000002c, 0x0000002e, 0x00000060,
- 0x00000062, 0x00000064, 0x00000066, 0x00000068,
- 0x0000006a, 0x0000006c, 0x0000006e, 0x000000e0,
- 0x000000e2, 0x000000e4, 0x000000e6, 0x000000e8,
- 0x000000ea, 0x000000ec, 0x000000ee, 0x000000f0,
- 0x000000f2, 0x000000f4, 0x000000f6, 0x000000f8,
- 0x000000fa, 0x000000fc, 0x000000fe,
-};
-
-static const ogg_uint32_t MvBits[(MAX_MV_EXTENT * 2) + 1] = {
- 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 6, 6, 6, 6, 4, 4, 3, 3,
- 3, 4, 4, 6, 6, 6, 6, 7,
- 7, 7, 7, 7, 7, 7, 7, 8,
- 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8,
-};
-
-static const ogg_uint32_t MvPattern2[(MAX_MV_EXTENT * 2) + 1] = {
- 0x0000003f, 0x0000003d, 0x0000003b, 0x00000039,
- 0x00000037, 0x00000035, 0x00000033, 0x00000031,
- 0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
- 0x00000027, 0x00000025, 0x00000023, 0x00000021,
- 0x0000001f, 0x0000001d, 0x0000001b, 0x00000019,
- 0x00000017, 0x00000015, 0x00000013, 0x00000011,
- 0x0000000f, 0x0000000d, 0x0000000b, 0x00000009,
- 0x00000007, 0x00000005, 0x00000003, 0x00000000,
- 0x00000002, 0x00000004, 0x00000006, 0x00000008,
- 0x0000000a, 0x0000000c, 0x0000000e, 0x00000010,
- 0x00000012, 0x00000014, 0x00000016, 0x00000018,
- 0x0000001a, 0x0000001c, 0x0000001e, 0x00000020,
- 0x00000022, 0x00000024, 0x00000026, 0x00000028,
- 0x0000002a, 0x0000002c, 0x0000002e, 0x00000030,
- 0x00000032, 0x00000034, 0x00000036, 0x00000038,
- 0x0000003a, 0x0000003c, 0x0000003e,
-};
-
-static const ogg_uint32_t MvBits2[(MAX_MV_EXTENT * 2) + 1] = {
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6,
-};
-
-static const ogg_uint32_t ModeBitPatterns[MAX_MODES] = {
- 0x00, 0x02, 0x06, 0x0E, 0x1E, 0x3E, 0x7E, 0x7F };
-
-static const ogg_int32_t ModeBitLengths[MAX_MODES] = {
- 1, 2, 3, 4, 5, 6, 7, 7 };
-
-static const unsigned char ModeSchemes[MODE_METHODS-2][MAX_MODES] = {
- /* Last Mv dominates */
- { 3, 4, 2, 0, 1, 5, 6, 7 }, /* L P M N I G GM 4 */
- { 2, 4, 3, 0, 1, 5, 6, 7 }, /* L P N M I G GM 4 */
- { 3, 4, 1, 0, 2, 5, 6, 7 }, /* L M P N I G GM 4 */
- { 2, 4, 1, 0, 3, 5, 6, 7 }, /* L M N P I G GM 4 */
-
- /* No MV dominates */
- { 0, 4, 3, 1, 2, 5, 6, 7 }, /* N L P M I G GM 4 */
- { 0, 5, 4, 2, 3, 1, 6, 7 }, /* N G L P M I GM 4 */
-
-};
-
-
-static const ogg_uint32_t MvThreshTable[Q_TABLE_SIZE] = {
- 65, 65, 65, 65, 50, 50, 50, 50,
- 40, 40, 40, 40, 40, 40, 40, 40,
- 30, 30, 30, 30, 30, 30, 30, 30,
- 20, 20, 20, 20, 20, 20, 20, 20,
- 15, 15, 15, 15, 15, 15, 15, 15,
- 10, 10, 10, 10, 10, 10, 10, 10,
- 5, 5, 5, 5, 5, 5, 5, 5,
- 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static const ogg_uint32_t MVChangeFactorTable[Q_TABLE_SIZE] = {
- 11, 11, 11, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 13, 13, 13, 13,
- 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15
-};
diff --git a/Engine/lib/libtheora/lib/enc/encoder_quant.c b/Engine/lib/libtheora/lib/enc/encoder_quant.c
deleted file mode 100644
index a5639a233..000000000
--- a/Engine/lib/libtheora/lib/enc/encoder_quant.c
+++ /dev/null
@@ -1,558 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2005 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: encoder_quant.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-#include "quant_lookup.h"
-
-#define OC_QUANT_MAX (1024<<2)
-static const unsigned DC_QUANT_MIN[2]={4<<2,8<<2};
-static const unsigned AC_QUANT_MIN[2]={2<<2,4<<2};
-#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
-#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
-#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
-
-static int ilog(unsigned _v){
- int ret;
- for(ret=0;_v;ret++)_v>>=1;
- return ret;
-}
-
-
-void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer* _opb) {
-
- th_quant_info *_qinfo = &pbi->quant_info;
-
- const th_quant_ranges *qranges;
- const th_quant_base *base_mats[2*3*64];
- int indices[2][3][64];
- int nbase_mats;
- int nbits;
- int ci;
- int qi;
- int qri;
- int qti;
- int pli;
- int qtj;
- int plj;
- int bmi;
- int i;
-
- /*Unlike the scale tables, we can't assume the maximum value will be in
- index 0, so search for it here.*/
- i=_qinfo->loop_filter_limits[0];
- for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
- nbits=ilog(i);
- oggpackB_write(_opb,nbits,3);
- for(qi=0;qi<64;qi++){
- oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
- }
- /* 580 bits for VP3.*/
- nbits=OC_MAXI(ilog(_qinfo->ac_scale[0]),1);
- oggpackB_write(_opb,nbits-1,4);
- for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
- /* 516 bits for VP3.*/
- nbits=OC_MAXI(ilog(_qinfo->dc_scale[0]),1);
- oggpackB_write(_opb,nbits-1,4);
- for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
- /*Consolidate any duplicate base matrices.*/
- nbase_mats=0;
- for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
- qranges=_qinfo->qi_ranges[qti]+pli;
- for(qri=0;qri<=qranges->nranges;qri++){
- for(bmi=0;;bmi++){
- if(bmi>=nbase_mats){
- base_mats[bmi]=qranges->base_matrices+qri;
- indices[qti][pli][qri]=nbase_mats++;
- break;
- }
- else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
- sizeof(base_mats[bmi][0]))==0){
- indices[qti][pli][qri]=bmi;
- break;
- }
- }
- }
- }
- /*Write out the list of unique base matrices.
- 1545 bits for VP3 matrices.*/
- oggpackB_write(_opb,nbase_mats-1,9);
- for(bmi=0;bmiqi_ranges[qti]+pli;
- if(i>0){
- if(qti>0){
- if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
- memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
- qranges->nranges*sizeof(qranges->sizes[0]))==0&&
- memcmp(indices[qti][pli],indices[qti-1][pli],
- (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
- oggpackB_write(_opb,1,2);
- continue;
- }
- }
- qtj=(i-1)/3;
- plj=(i-1)%3;
- if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
- memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
- qranges->nranges*sizeof(qranges->sizes[0]))==0&&
- memcmp(indices[qti][pli],indices[qtj][plj],
- (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
- oggpackB_write(_opb,0,1+(qti>0));
- continue;
- }
- oggpackB_write(_opb,1,1);
- }
- oggpackB_write(_opb,indices[qti][pli][0],nbits);
- for(qi=qri=0;qi<63;qri++){
- oggpackB_write(_opb,qranges->sizes[qri]-1,ilog(62-qi));
- qi+=qranges->sizes[qri];
- oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
- }
- }
-}
-
-/* a copied/reconciled version of derf's theora-exp code; redundancy
- should be eliminated at some point */
-void InitQTables( PB_INSTANCE *pbi ){
- int qti; /* coding mode: intra or inter */
- int pli; /* Y U V */
- th_quant_info *qinfo = &pbi->quant_info;
-
- pbi->QThreshTable = pbi->quant_info.ac_scale;
-
- for(qti=0;qti<2;qti++){
- for(pli=0;pli<3;pli++){
- int qi; /* quality index */
- int qri; /* range iterator */
-
- for(qi=0,qri=0; qri<=qinfo->qi_ranges[qti][pli].nranges; qri++){
- th_quant_base base;
-
- ogg_uint32_t q;
- int qi_start;
- int qi_end;
- int ci;
- memcpy(base,qinfo->qi_ranges[qti][pli].base_matrices[qri],
- sizeof(base));
-
- qi_start=qi;
- if(qri==qinfo->qi_ranges[qti][pli].nranges)
- qi_end=qi+1;
- else
- qi_end=qi+qinfo->qi_ranges[qti][pli].sizes[qri];
-
- /* Iterate over quality indicies in this range */
- for(;;){
-
- /*Scale DC the coefficient from the proper table.*/
- q=((ogg_uint32_t)qinfo->dc_scale[qi]*base[0]/100)<<2;
- q=OC_CLAMPI(DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
- pbi->quant_tables[qti][pli][qi][0]=(ogg_uint16_t)q;
-
- /*Now scale AC coefficients from the proper table.*/
- for(ci=1;ci<64;ci++){
- q=((ogg_uint32_t)qinfo->ac_scale[qi]*base[ci]/100)<<2;
- q=OC_CLAMPI(AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
- pbi->quant_tables[qti][pli][qi][ci]=(ogg_uint16_t)q;
- }
-
- if(++qi>=qi_end)break;
-
- /*Interpolate the next base matrix.*/
- for(ci=0;ci<64;ci++){
- base[ci]=(unsigned char)
- ((2*((qi_end-qi)*qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
- (qi-qi_start)*qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
- +qinfo->qi_ranges[qti][pli].sizes[qri])/
- (2*qinfo->qi_ranges[qti][pli].sizes[qri]));
- }
- }
- }
- }
- }
-}
-
-static void BuildZigZagIndex(PB_INSTANCE *pbi){
- ogg_int32_t i,j;
-
- /* invert the row to zigzag coeffient order lookup table */
- for ( i = 0; i < BLOCK_SIZE; i++ ){
- j = dezigzag_index[i];
- pbi->zigzag_index[j] = i;
- }
-}
-
-static void init_quantizer ( CP_INSTANCE *cpi,
- unsigned char QIndex ){
- int i;
- double ZBinFactor;
- double RoundingFactor;
-
- double temp_fp_quant_coeffs;
- double temp_fp_quant_round;
- double temp_fp_ZeroBinSize;
- PB_INSTANCE *pbi = &cpi->pb;
-
-
- const ogg_uint16_t * temp_Y_coeffs;
- const ogg_uint16_t * temp_U_coeffs;
- const ogg_uint16_t * temp_V_coeffs;
- const ogg_uint16_t * temp_Inter_Y_coeffs;
- const ogg_uint16_t * temp_Inter_U_coeffs;
- const ogg_uint16_t * temp_Inter_V_coeffs;
- ogg_uint16_t scale_factor = cpi->pb.quant_info.ac_scale[QIndex];
-
- /* Notes on setup of quantisers. The initial multiplication by
- the scale factor is done in the ogg_int32_t domain to insure that the
- precision in the quantiser is the same as in the inverse
- quantiser where all calculations are integer. The "<< 2" is a
- normalisation factor for the forward DCT transform. */
-
- temp_Y_coeffs = pbi->quant_tables[0][0][QIndex];
- temp_U_coeffs = pbi->quant_tables[0][1][QIndex];
- temp_V_coeffs = pbi->quant_tables[0][2][QIndex];
- temp_Inter_Y_coeffs = pbi->quant_tables[1][0][QIndex];
- temp_Inter_U_coeffs = pbi->quant_tables[1][1][QIndex];
- temp_Inter_V_coeffs = pbi->quant_tables[1][2][QIndex];
-
- ZBinFactor = 0.9;
-
- switch(cpi->pb.info.sharpness){
- case 0:
- ZBinFactor = 0.65;
- if ( scale_factor <= 50 )
- RoundingFactor = 0.499;
- else
- RoundingFactor = 0.46;
- break;
- case 1:
- ZBinFactor = 0.75;
- if ( scale_factor <= 50 )
- RoundingFactor = 0.476;
- else
- RoundingFactor = 0.400;
- break;
-
- default:
- ZBinFactor = 0.9;
- if ( scale_factor <= 50 )
- RoundingFactor = 0.476;
- else
- RoundingFactor = 0.333;
- break;
- }
-
- /* Use fixed multiplier for intra Y DC */
- temp_fp_quant_coeffs = temp_Y_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Y_round[0] = (ogg_int32_t) (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Y[0] = (ogg_int32_t) (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Y_coeffs[0] = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Intra U */
- temp_fp_quant_coeffs = temp_U_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_U_round[0] = (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_U[0] = (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Intra V */
- temp_fp_quant_coeffs = temp_V_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_V_round[0] = (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_V[0] = (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
- /* Inter Y */
- temp_fp_quant_coeffs = temp_Inter_Y_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_Y_round[0]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_Y[0]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_Y_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Inter U */
- temp_fp_quant_coeffs = temp_Inter_U_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_U_round[0]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_U[0]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Inter V */
- temp_fp_quant_coeffs = temp_Inter_V_coeffs[0];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_V_round[0]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_V[0]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
- for ( i = 1; i < 64; i++ ){
- /* Intra Y */
- temp_fp_quant_coeffs = temp_Y_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Y_round[i] = (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Y[i] = (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Y_coeffs[i] = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Intra U */
- temp_fp_quant_coeffs = temp_U_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_U_round[i] = (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_U[i] = (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Intra V */
- temp_fp_quant_coeffs = temp_V_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_V_round[i] = (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_V[i] = (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Inter Y */
- temp_fp_quant_coeffs = temp_Inter_Y_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_Y_round[i]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_Y[i]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_Y_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Inter U */
- temp_fp_quant_coeffs = temp_Inter_U_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_U_round[i]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_U[i]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
- /* Inter V */
- temp_fp_quant_coeffs = temp_Inter_V_coeffs[i];
- temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
- pbi->fp_quant_Inter_V_round[i]= (0.5 + temp_fp_quant_round);
- temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
- pbi->fp_ZeroBinSize_Inter_V[i]= (0.5 + temp_fp_ZeroBinSize);
- temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
- pbi->fp_quant_Inter_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
- }
-
- pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
-
-}
-
-void select_quantiser(PB_INSTANCE *pbi, int type) {
- /* select a quantiser according to what plane has to be coded in what
- * mode. Could be extended to a more sophisticated scheme. */
-
- switch(type) {
- case BLOCK_Y:
- pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
- pbi->fquant_round = pbi->fp_quant_Y_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Y;
- break;
- case BLOCK_U:
- pbi->fquant_coeffs = pbi->fp_quant_U_coeffs;
- pbi->fquant_round = pbi->fp_quant_U_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_U;
- break;
- case BLOCK_V:
- pbi->fquant_coeffs = pbi->fp_quant_V_coeffs;
- pbi->fquant_round = pbi->fp_quant_V_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_V;
- break;
- case BLOCK_INTER_Y:
- pbi->fquant_coeffs = pbi->fp_quant_Inter_Y_coeffs;
- pbi->fquant_round = pbi->fp_quant_Inter_Y_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_Y;
- break;
- case BLOCK_INTER_U:
- pbi->fquant_coeffs = pbi->fp_quant_Inter_U_coeffs;
- pbi->fquant_round = pbi->fp_quant_Inter_U_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_U;
- break;
- case BLOCK_INTER_V:
- pbi->fquant_coeffs = pbi->fp_quant_Inter_V_coeffs;
- pbi->fquant_round = pbi->fp_quant_Inter_V_round;
- pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_V;
- break;
- }
-}
-
-
-void quantize( PB_INSTANCE *pbi,
- ogg_int16_t * DCT_block,
- Q_LIST_ENTRY * quantized_list){
- ogg_uint32_t i; /* Row index */
- Q_LIST_ENTRY val; /* Quantised value. */
-
- ogg_int32_t * FquantRoundPtr = pbi->fquant_round;
- ogg_int32_t * FquantCoeffsPtr = pbi->fquant_coeffs;
- ogg_int32_t * FquantZBinSizePtr = pbi->fquant_ZbSize;
- ogg_int16_t * DCT_blockPtr = DCT_block;
- ogg_uint32_t * ZigZagPtr = (ogg_uint32_t *)pbi->zigzag_index;
- ogg_int32_t temp;
-
- /* Set the quantized_list to default to 0 */
- memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
-
- /* Note that we add half divisor to effect rounding on positive number */
- for( i = 0; i < VFRAGPIXELS; i++) {
-
- int col;
- /* Iterate through columns */
- for( col = 0; col < 8; col++) {
- if ( DCT_blockPtr[col] >= FquantZBinSizePtr[col] ) {
- temp = FquantCoeffsPtr[col] * ( DCT_blockPtr[col] + FquantRoundPtr[col] ) ;
- val = (Q_LIST_ENTRY) (temp>>16);
- quantized_list[ZigZagPtr[col]] = ( val > 511 ) ? 511 : val;
- } else if ( DCT_blockPtr[col] <= -FquantZBinSizePtr[col] ) {
- temp = FquantCoeffsPtr[col] *
- ( DCT_blockPtr[col] - FquantRoundPtr[col] ) + MIN16;
- val = (Q_LIST_ENTRY) (temp>>16);
- quantized_list[ZigZagPtr[col]] = ( val < -511 ) ? -511 : val;
- }
- }
-
- FquantRoundPtr += 8;
- FquantCoeffsPtr += 8;
- FquantZBinSizePtr += 8;
- DCT_blockPtr += 8;
- ZigZagPtr += 8;
- }
-}
-
-static void init_dequantizer ( PB_INSTANCE *pbi,
- unsigned char QIndex ){
- int i, j;
-
- ogg_uint16_t * InterY_coeffs;
- ogg_uint16_t * InterU_coeffs;
- ogg_uint16_t * InterV_coeffs;
- ogg_uint16_t * Y_coeffs;
- ogg_uint16_t * U_coeffs;
- ogg_uint16_t * V_coeffs;
-
- Y_coeffs = pbi->quant_tables[0][0][QIndex];
- U_coeffs = pbi->quant_tables[0][1][QIndex];
- V_coeffs = pbi->quant_tables[0][2][QIndex];
- InterY_coeffs = pbi->quant_tables[1][0][QIndex];
- InterU_coeffs = pbi->quant_tables[1][1][QIndex];
- InterV_coeffs = pbi->quant_tables[1][2][QIndex];
-
- /* invert the dequant index into the quant index
- the dxer has a different order than the cxer. */
- BuildZigZagIndex(pbi);
-
- /* Reorder dequantisation coefficients into dct zigzag order. */
- for ( i = 0; i < BLOCK_SIZE; i++ ) {
- j = pbi->zigzag_index[i];
- pbi->dequant_Y_coeffs[j] = Y_coeffs[i];
- }
- for ( i = 0; i < BLOCK_SIZE; i++ ) {
- j = pbi->zigzag_index[i];
- pbi->dequant_U_coeffs[j] = U_coeffs[i];
- }
- for ( i = 0; i < BLOCK_SIZE; i++ ) {
- j = pbi->zigzag_index[i];
- pbi->dequant_V_coeffs[j] = V_coeffs[i];
- }
- for ( i = 0; i < BLOCK_SIZE; i++ ){
- j = pbi->zigzag_index[i];
- pbi->dequant_InterY_coeffs[j] = InterY_coeffs[i];
- }
- for ( i = 0; i < BLOCK_SIZE; i++ ){
- j = pbi->zigzag_index[i];
- pbi->dequant_InterU_coeffs[j] = InterU_coeffs[i];
- }
- for ( i = 0; i < BLOCK_SIZE; i++ ){
- j = pbi->zigzag_index[i];
- pbi->dequant_InterV_coeffs[j] = InterV_coeffs[i];
- }
-
- pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
-}
-
-void UpdateQ( PB_INSTANCE *pbi, int NewQIndex ){
- ogg_uint32_t qscale;
-
- /* clamp to legal bounds */
- if (NewQIndex >= Q_TABLE_SIZE) NewQIndex = Q_TABLE_SIZE - 1;
- else if (NewQIndex < 0) NewQIndex = 0;
-
- pbi->FrameQIndex = NewQIndex;
-
- qscale = pbi->quant_info.ac_scale[NewQIndex];
- pbi->ThisFrameQualityValue = qscale;
-
- /* Re-initialise the Q tables for forward and reverse transforms. */
- init_dequantizer ( pbi, (unsigned char) pbi->FrameQIndex );
-}
-
-void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ ){
- ogg_uint32_t qscale;
- PB_INSTANCE *pbi = &cpi->pb;
-
- /* Do bounds checking and convert to a float. */
- qscale = NewQ;
- if ( qscale < pbi->quant_info.ac_scale[Q_TABLE_SIZE-1] )
- qscale = pbi->quant_info.ac_scale[Q_TABLE_SIZE-1];
- else if ( qscale > pbi->quant_info.ac_scale[0] )
- qscale = pbi->quant_info.ac_scale[0];
-
- /* Set the inter/intra descision control variables. */
- pbi->FrameQIndex = Q_TABLE_SIZE - 1;
- while ((ogg_int32_t) pbi->FrameQIndex >= 0 ) {
- if ( (pbi->FrameQIndex == 0) ||
- ( pbi->quant_info.ac_scale[pbi->FrameQIndex] >= NewQ) )
- break;
- pbi->FrameQIndex --;
- }
-
- /* Re-initialise the Q tables for forward and reverse transforms. */
- init_quantizer ( cpi, pbi->FrameQIndex );
- init_dequantizer ( pbi, pbi->FrameQIndex );
-}
diff --git a/Engine/lib/libtheora/lib/enc/encoder_toplevel.c b/Engine/lib/libtheora/lib/enc/encoder_toplevel.c
deleted file mode 100644
index 9356bba23..000000000
--- a/Engine/lib/libtheora/lib/enc/encoder_toplevel.c
+++ /dev/null
@@ -1,1447 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: encoder_toplevel.c 15383 2008-10-10 14:33:46Z xiphmont $
-
- ********************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include
-#include
-#include "toplevel_lookup.h"
-#include "../internal.h"
-#include "dsp.h"
-#include "codec_internal.h"
-
-#define A_TABLE_SIZE 29
-#define DF_CANDIDATE_WINDOW 5
-
-/*
- * th_quant_info for VP3
- */
-
-/*The default quantization parameters used by VP3.1.*/
-static const int OC_VP31_RANGE_SIZES[1]={63};
-static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={
- {
- 16, 11, 10, 16, 24, 40, 51, 61,
- 12, 12, 14, 19, 26, 58, 60, 55,
- 14, 13, 16, 24, 40, 57, 69, 56,
- 14, 17, 22, 29, 51, 87, 80, 62,
- 18, 22, 37, 58, 68, 109,103, 77,
- 24, 35, 55, 64, 81, 104,113, 92,
- 49, 64, 78, 87,103, 121,120,101,
- 72, 92, 95, 98,112, 100,103, 99
- },
- {
- 16, 11, 10, 16, 24, 40, 51, 61,
- 12, 12, 14, 19, 26, 58, 60, 55,
- 14, 13, 16, 24, 40, 57, 69, 56,
- 14, 17, 22, 29, 51, 87, 80, 62,
- 18, 22, 37, 58, 68, 109,103, 77,
- 24, 35, 55, 64, 81, 104,113, 92,
- 49, 64, 78, 87,103, 121,120,101,
- 72, 92, 95, 98,112, 100,103, 99
- }
-};
-static const th_quant_base OC_VP31_BASES_INTRA_C[2]={
- {
- 17, 18, 24, 47, 99, 99, 99, 99,
- 18, 21, 26, 66, 99, 99, 99, 99,
- 24, 26, 56, 99, 99, 99, 99, 99,
- 47, 66, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
- },
- {
- 17, 18, 24, 47, 99, 99, 99, 99,
- 18, 21, 26, 66, 99, 99, 99, 99,
- 24, 26, 56, 99, 99, 99, 99, 99,
- 47, 66, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
- }
-};
-static const th_quant_base OC_VP31_BASES_INTER[2]={
- {
- 16, 16, 16, 20, 24, 28, 32, 40,
- 16, 16, 20, 24, 28, 32, 40, 48,
- 16, 20, 24, 28, 32, 40, 48, 64,
- 20, 24, 28, 32, 40, 48, 64, 64,
- 24, 28, 32, 40, 48, 64, 64, 64,
- 28, 32, 40, 48, 64, 64, 64, 96,
- 32, 40, 48, 64, 64, 64, 96,128,
- 40, 48, 64, 64, 64, 96,128,128
- },
- {
- 16, 16, 16, 20, 24, 28, 32, 40,
- 16, 16, 20, 24, 28, 32, 40, 48,
- 16, 20, 24, 28, 32, 40, 48, 64,
- 20, 24, 28, 32, 40, 48, 64, 64,
- 24, 28, 32, 40, 48, 64, 64, 64,
- 28, 32, 40, 48, 64, 64, 64, 96,
- 32, 40, 48, 64, 64, 64, 96,128,
- 40, 48, 64, 64, 64, 96,128,128
- }
-};
-
-const th_quant_info TH_VP31_QUANT_INFO={
- {
- 220,200,190,180,170,170,160,160,
- 150,150,140,140,130,130,120,120,
- 110,110,100,100, 90, 90, 90, 80,
- 80, 80, 70, 70, 70, 60, 60, 60,
- 60, 50, 50, 50, 50, 40, 40, 40,
- 40, 40, 30, 30, 30, 30, 30, 30,
- 30, 20, 20, 20, 20, 20, 20, 20,
- 20, 10, 10, 10, 10, 10, 10, 10
- },
- {
- 500,450,400,370,340,310,285,265,
- 245,225,210,195,185,180,170,160,
- 150,145,135,130,125,115,110,107,
- 100, 96, 93, 89, 85, 82, 75, 74,
- 70, 68, 64, 60, 57, 56, 52, 50,
- 49, 45, 44, 43, 40, 38, 37, 35,
- 33, 32, 30, 29, 28, 25, 24, 22,
- 21, 19, 18, 17, 15, 13, 12, 10
- },
- {
- 30,25,20,20,15,15,14,14,
- 13,13,12,12,11,11,10,10,
- 9, 9, 8, 8, 7, 7, 7, 7,
- 6, 6, 6, 6, 5, 5, 5, 5,
- 4, 4, 4, 4, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
- },
- {
- {
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y},
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C},
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}
- },
- {
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
- {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}
- }
- }
-};
-
-
-static void EClearFragmentInfo(CP_INSTANCE * cpi){
- if(cpi->extra_fragments)
- _ogg_free(cpi->extra_fragments);
- if(cpi->FragmentLastQ)
- _ogg_free(cpi->FragmentLastQ);
- if(cpi->FragTokens)
- _ogg_free(cpi->FragTokens);
- if(cpi->FragTokenCounts)
- _ogg_free(cpi->FragTokenCounts);
- if(cpi->RunHuffIndices)
- _ogg_free(cpi->RunHuffIndices);
- if(cpi->LastCodedErrorScore)
- _ogg_free(cpi->LastCodedErrorScore);
- if(cpi->ModeList)
- _ogg_free(cpi->ModeList);
- if(cpi->MVList)
- _ogg_free(cpi->MVList);
- if(cpi->DCT_codes )
- _ogg_free( cpi->DCT_codes );
- if(cpi->DCTDataBuffer )
- _ogg_free( cpi->DCTDataBuffer);
- if(cpi->quantized_list)
- _ogg_free( cpi->quantized_list);
- if(cpi->OriginalDC)
- _ogg_free( cpi->OriginalDC);
- if(cpi->PartiallyCodedFlags)
- _ogg_free(cpi->PartiallyCodedFlags);
- if(cpi->PartiallyCodedMbPatterns)
- _ogg_free(cpi->PartiallyCodedMbPatterns);
- if(cpi->UncodedMbFlags)
- _ogg_free(cpi->UncodedMbFlags);
-
- if(cpi->BlockCodedFlags)
- _ogg_free(cpi->BlockCodedFlags);
-
- cpi->extra_fragments = 0;
- cpi->FragmentLastQ = 0;
- cpi->FragTokens = 0;
- cpi->FragTokenCounts = 0;
- cpi->RunHuffIndices = 0;
- cpi->LastCodedErrorScore = 0;
- cpi->ModeList = 0;
- cpi->MVList = 0;
- cpi->DCT_codes = 0;
- cpi->DCTDataBuffer = 0;
- cpi->quantized_list = 0;
- cpi->OriginalDC = 0;
- cpi->BlockCodedFlags = 0;
-}
-
-static void EInitFragmentInfo(CP_INSTANCE * cpi){
-
- /* clear any existing info */
- EClearFragmentInfo(cpi);
-
- /* Perform Fragment Allocations */
- cpi->extra_fragments =
- _ogg_malloc(cpi->pb.UnitFragments*sizeof(unsigned char));
-
- /* A note to people reading and wondering why malloc returns aren't
- checked:
-
- lines like the following that implement a general strategy of
- 'check the return of malloc; a zero pointer means we're out of
- memory!'...:
-
- if(!cpi->extra_fragments) { EDeleteFragmentInfo(cpi); return FALSE; }
-
- ...are not useful. It's true that many platforms follow this
- malloc behavior, but many do not. The more modern malloc
- strategy is only to allocate virtual pages, which are not mapped
- until the memory on that page is touched. At *that* point, if
- the machine is out of heap, the page fails to be mapped and a
- SEGV is generated.
-
- That means that if we want to deal with out of memory conditions,
- we *must* be prepared to process a SEGV. If we implement the
- SEGV handler, there's no reason to to check malloc return; it is
- a waste of code. */
-
- cpi->FragmentLastQ =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->FragmentLastQ));
- cpi->FragTokens =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->FragTokens));
- cpi->OriginalDC =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->OriginalDC));
- cpi->FragTokenCounts =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->FragTokenCounts));
- cpi->RunHuffIndices =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->RunHuffIndices));
- cpi->LastCodedErrorScore =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->LastCodedErrorScore));
- cpi->BlockCodedFlags =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->BlockCodedFlags));
- cpi->ModeList =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->ModeList));
- cpi->MVList =
- _ogg_malloc(cpi->pb.UnitFragments*
- sizeof(*cpi->MVList));
- cpi->DCT_codes =
- _ogg_malloc(64*
- sizeof(*cpi->DCT_codes));
- cpi->DCTDataBuffer =
- _ogg_malloc(64*
- sizeof(*cpi->DCTDataBuffer));
- cpi->quantized_list =
- _ogg_malloc(64*
- sizeof(*cpi->quantized_list));
- cpi->PartiallyCodedFlags =
- _ogg_malloc(cpi->pb.MacroBlocks*
- sizeof(*cpi->PartiallyCodedFlags));
- cpi->PartiallyCodedMbPatterns =
- _ogg_malloc(cpi->pb.MacroBlocks*
- sizeof(*cpi->PartiallyCodedMbPatterns));
- cpi->UncodedMbFlags =
- _ogg_malloc(cpi->pb.MacroBlocks*
- sizeof(*cpi->UncodedMbFlags));
-
-}
-
-static void EClearFrameInfo(CP_INSTANCE * cpi) {
- if(cpi->ConvDestBuffer )
- _ogg_free(cpi->ConvDestBuffer );
- cpi->ConvDestBuffer = 0;
-
- if(cpi->yuv0ptr)
- _ogg_free(cpi->yuv0ptr);
- cpi->yuv0ptr = 0;
-
- if(cpi->yuv1ptr)
- _ogg_free(cpi->yuv1ptr);
- cpi->yuv1ptr = 0;
-
- if(cpi->OptimisedTokenListEb )
- _ogg_free(cpi->OptimisedTokenListEb);
- cpi->OptimisedTokenListEb = 0;
-
- if(cpi->OptimisedTokenList )
- _ogg_free(cpi->OptimisedTokenList);
- cpi->OptimisedTokenList = 0;
-
- if(cpi->OptimisedTokenListHi )
- _ogg_free(cpi->OptimisedTokenListHi);
- cpi->OptimisedTokenListHi = 0;
-
- if(cpi->OptimisedTokenListPl )
- _ogg_free(cpi->OptimisedTokenListPl);
- cpi->OptimisedTokenListPl = 0;
-
-}
-
-static void EInitFrameInfo(CP_INSTANCE * cpi){
- int FrameSize = cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize;
-
- /* clear any existing info */
- EClearFrameInfo(cpi);
-
- /* allocate frames */
- cpi->ConvDestBuffer =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->ConvDestBuffer));
- cpi->yuv0ptr =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->yuv0ptr));
- cpi->yuv1ptr =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->yuv1ptr));
- cpi->OptimisedTokenListEb =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->OptimisedTokenListEb));
- cpi->OptimisedTokenList =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->OptimisedTokenList));
- cpi->OptimisedTokenListHi =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->OptimisedTokenListHi));
- cpi->OptimisedTokenListPl =
- _ogg_malloc(FrameSize*
- sizeof(*cpi->OptimisedTokenListPl));
-}
-
-static void SetupKeyFrame(CP_INSTANCE *cpi) {
- /* Make sure the "last frame" buffer contains the first frame data
- as well. */
- memcpy ( cpi->yuv0ptr, cpi->yuv1ptr,
- cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize );
-
- /* Initialise the cpi->pb.display_fragments and other fragment
- structures for the first frame. */
- memset( cpi->pb.display_fragments, 1, cpi->pb.UnitFragments );
- memset( cpi->extra_fragments, 1, cpi->pb.UnitFragments );
-
- /* Set up for a KEY FRAME */
- cpi->pb.FrameType = KEY_FRAME;
-}
-
-static void AdjustKeyFrameContext(CP_INSTANCE *cpi) {
- ogg_uint32_t i;
- ogg_uint32_t AvKeyFrameFrequency =
- (ogg_uint32_t) (cpi->CurrentFrame / cpi->KeyFrameCount);
- ogg_uint32_t AvKeyFrameBytes =
- (ogg_uint32_t) (cpi->TotKeyFrameBytes / cpi->KeyFrameCount);
- ogg_uint32_t TotalWeight=0;
- ogg_int32_t AvKeyFramesPerSecond;
- ogg_int32_t MinFrameTargetRate;
-
- /* Update the frame carry over. */
- cpi->TotKeyFrameBytes += oggpackB_bytes(cpi->oggbuffer);
-
- /* reset keyframe context and calculate weighted average of last
- KEY_FRAME_CONTEXT keyframes */
- for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) {
- if ( i < KEY_FRAME_CONTEXT -1) {
- cpi->PriorKeyFrameSize[i] = cpi->PriorKeyFrameSize[i+1];
- cpi->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i+1];
- } else {
- cpi->PriorKeyFrameSize[KEY_FRAME_CONTEXT - 1] =
- oggpackB_bytes(cpi->oggbuffer);
- cpi->PriorKeyFrameDistance[KEY_FRAME_CONTEXT - 1] =
- cpi->LastKeyFrame;
- }
-
- AvKeyFrameBytes += PriorKeyFrameWeight[i] *
- cpi->PriorKeyFrameSize[i];
- AvKeyFrameFrequency += PriorKeyFrameWeight[i] *
- cpi->PriorKeyFrameDistance[i];
- TotalWeight += PriorKeyFrameWeight[i];
- }
- AvKeyFrameBytes /= TotalWeight;
- AvKeyFrameFrequency /= TotalWeight;
- AvKeyFramesPerSecond = 100 * cpi->Configuration.OutputFrameRate /
- AvKeyFrameFrequency ;
-
- /* Calculate a new target rate per frame allowing for average key
- frame frequency over newest frames . */
- if ( 100 * cpi->Configuration.TargetBandwidth >
- AvKeyFrameBytes * AvKeyFramesPerSecond &&
- (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond )){
- cpi->frame_target_rate =
- (ogg_int32_t)(100* cpi->Configuration.TargetBandwidth -
- AvKeyFrameBytes * AvKeyFramesPerSecond ) /
- ( (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ) );
- } else {
- /* don't let this number get too small!!! */
- cpi->frame_target_rate = 1;
- }
-
- /* minimum allowable frame_target_rate */
- MinFrameTargetRate = (cpi->Configuration.TargetBandwidth /
- cpi->Configuration.OutputFrameRate) / 3;
-
- if(cpi->frame_target_rate < MinFrameTargetRate ) {
- cpi->frame_target_rate = MinFrameTargetRate;
- }
-
- cpi->LastKeyFrame = 1;
- cpi->LastKeyFrameSize=oggpackB_bytes(cpi->oggbuffer);
-
-}
-
-static void UpdateFrame(CP_INSTANCE *cpi){
-
- double CorrectionFactor;
-
- /* Reset the DC predictors. */
- cpi->pb.LastIntraDC = 0;
- cpi->pb.InvLastIntraDC = 0;
- cpi->pb.LastInterDC = 0;
- cpi->pb.InvLastInterDC = 0;
-
- /* Initialise bit packing mechanism. */
- oggpackB_reset(cpi->oggbuffer);
-
- /* mark as video frame */
- oggpackB_write(cpi->oggbuffer,0,1);
-
- /* Write out the frame header information including size. */
- WriteFrameHeader(cpi);
-
- /* Copy back any extra frags that are to be updated by the codec
- as part of the background cleanup task */
- CopyBackExtraFrags(cpi);
-
- /* Encode the data. */
- EncodeData(cpi);
-
- /* Adjust drop frame trigger. */
- if ( cpi->pb.FrameType != KEY_FRAME ) {
- /* Apply decay factor then add in the last frame size. */
- cpi->DropFrameTriggerBytes =
- ((cpi->DropFrameTriggerBytes * (DF_CANDIDATE_WINDOW-1)) /
- DF_CANDIDATE_WINDOW) + oggpackB_bytes(cpi->oggbuffer);
- }else{
- /* Increase cpi->DropFrameTriggerBytes a little. Just after a key
- frame may actually be a good time to drop a frame. */
- cpi->DropFrameTriggerBytes =
- (cpi->DropFrameTriggerBytes * DF_CANDIDATE_WINDOW) /
- (DF_CANDIDATE_WINDOW-1);
- }
-
- /* Test for overshoot which may require a dropped frame next time
- around. If we are already in a drop frame condition but the
- previous frame was not dropped then the threshold for continuing
- to allow dropped frames is reduced. */
- if ( cpi->DropFrameCandidate ) {
- if ( cpi->DropFrameTriggerBytes >
- (cpi->frame_target_rate * (DF_CANDIDATE_WINDOW+1)) )
- cpi->DropFrameCandidate = 1;
- else
- cpi->DropFrameCandidate = 0;
- } else {
- if ( cpi->DropFrameTriggerBytes >
- (cpi->frame_target_rate * ((DF_CANDIDATE_WINDOW*2)-2)) )
- cpi->DropFrameCandidate = 1;
- else
- cpi->DropFrameCandidate = 0;
- }
-
- /* Update the BpbCorrectionFactor variable according to whether or
- not we were close enough with our selection of DCT quantiser. */
- if ( cpi->pb.FrameType != KEY_FRAME ) {
- /* Work out a size correction factor. */
- CorrectionFactor = (double)oggpackB_bytes(cpi->oggbuffer) /
- (double)cpi->ThisFrameTargetBytes;
-
- if ( (CorrectionFactor > 1.05) &&
- (cpi->pb.ThisFrameQualityValue <
- cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) ) {
- CorrectionFactor = 1.0 + ((CorrectionFactor - 1.0)/2);
- if ( CorrectionFactor > 1.5 )
- cpi->BpbCorrectionFactor *= 1.5;
- else
- cpi->BpbCorrectionFactor *= CorrectionFactor;
-
- /* Keep BpbCorrectionFactor within limits */
- if ( cpi->BpbCorrectionFactor > MAX_BPB_FACTOR )
- cpi->BpbCorrectionFactor = MAX_BPB_FACTOR;
- } else if ( (CorrectionFactor < 0.95) &&
- (cpi->pb.ThisFrameQualityValue > VERY_BEST_Q) ){
- CorrectionFactor = 1.0 - ((1.0 - CorrectionFactor)/2);
- if ( CorrectionFactor < 0.75 )
- cpi->BpbCorrectionFactor *= 0.75;
- else
- cpi->BpbCorrectionFactor *= CorrectionFactor;
-
- /* Keep BpbCorrectionFactor within limits */
- if ( cpi->BpbCorrectionFactor < MIN_BPB_FACTOR )
- cpi->BpbCorrectionFactor = MIN_BPB_FACTOR;
- }
- }
-
- /* Adjust carry over and or key frame context. */
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- /* Adjust the key frame context unless the key frame was very small */
- AdjustKeyFrameContext(cpi);
- } else {
- /* Update the frame carry over */
- cpi->CarryOver += ((ogg_int32_t)cpi->frame_target_rate -
- (ogg_int32_t)oggpackB_bytes(cpi->oggbuffer));
- }
- cpi->TotalByteCount += oggpackB_bytes(cpi->oggbuffer);
-}
-
-static void CompressFirstFrame(CP_INSTANCE *cpi) {
- ogg_uint32_t i;
-
- /* set up context of key frame sizes and distances for more local
- datarate control */
- for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) {
- cpi->PriorKeyFrameSize[i] = cpi->Configuration.KeyFrameDataTarget;
- cpi->PriorKeyFrameDistance[i] = cpi->pb.info.keyframe_frequency_force;
- }
-
- /* Keep track of the total number of Key Frames Coded. */
- cpi->KeyFrameCount = 1;
- cpi->LastKeyFrame = 1;
- cpi->TotKeyFrameBytes = 0;
-
- /* A key frame is not a dropped frame there for reset the count of
- consequative dropped frames. */
- cpi->DropCount = 0;
-
- SetupKeyFrame(cpi);
-
- /* Calculate a new target rate per frame allowing for average key
- frame frequency and size thus far. */
- if ( cpi->Configuration.TargetBandwidth >
- ((cpi->Configuration.KeyFrameDataTarget *
- cpi->Configuration.OutputFrameRate)/
- cpi->pb.info.keyframe_frequency) ) {
-
- cpi->frame_target_rate =
- (ogg_int32_t)((cpi->Configuration.TargetBandwidth -
- ((cpi->Configuration.KeyFrameDataTarget *
- cpi->Configuration.OutputFrameRate)/
- cpi->pb.info.keyframe_frequency)) /
- cpi->Configuration.OutputFrameRate);
- }else
- cpi->frame_target_rate = 1;
-
- /* Set baseline frame target rate. */
- cpi->BaseLineFrameTargetRate = cpi->frame_target_rate;
-
- /* A key frame is not a dropped frame there for reset the count of
- consequative dropped frames. */
- cpi->DropCount = 0;
-
- /* Initialise drop frame trigger to 5 frames worth of data. */
- cpi->DropFrameTriggerBytes = cpi->frame_target_rate * DF_CANDIDATE_WINDOW;
-
- /* Set a target size for this key frame based upon the baseline
- target and frequency */
- cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget;
-
- /* Get a DCT quantizer level for the key frame. */
- cpi->MotionScore = cpi->pb.UnitFragments;
-
- RegulateQ(cpi, cpi->pb.UnitFragments);
-
- cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
-
- /* Initialise quantizer. */
- UpdateQC(cpi, cpi->pb.ThisFrameQualityValue );
-
- /* Initialise the cpi->pb.display_fragments and other fragment
- structures for the first frame. */
- for ( i = 0; i < cpi->pb.UnitFragments; i ++ )
- cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
-
- /* Compress and output the frist frame. */
- PickIntra( cpi,
- cpi->pb.YSBRows, cpi->pb.YSBCols);
- UpdateFrame(cpi);
-
- /* Initialise the carry over rate targeting variables. */
- cpi->CarryOver = 0;
-
-}
-
-static void CompressKeyFrame(CP_INSTANCE *cpi){
- ogg_uint32_t i;
-
- /* Before we compress reset the carry over to the actual frame carry over */
- cpi->CarryOver = cpi->Configuration.TargetBandwidth * cpi->CurrentFrame /
- cpi->Configuration.OutputFrameRate - cpi->TotalByteCount;
-
- /* Keep track of the total number of Key Frames Coded */
- cpi->KeyFrameCount += 1;
-
- /* A key frame is not a dropped frame there for reset the count of
- consequative dropped frames. */
- cpi->DropCount = 0;
-
- SetupKeyFrame(cpi);
-
- /* set a target size for this frame */
- cpi->ThisFrameTargetBytes = (ogg_int32_t) cpi->frame_target_rate +
- ( (cpi->Configuration.KeyFrameDataTarget - cpi->frame_target_rate) *
- cpi->LastKeyFrame / cpi->pb.info.keyframe_frequency_force );
-
- if ( cpi->ThisFrameTargetBytes > cpi->Configuration.KeyFrameDataTarget )
- cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget;
-
- /* Get a DCT quantizer level for the key frame. */
- cpi->MotionScore = cpi->pb.UnitFragments;
-
- RegulateQ(cpi, cpi->pb.UnitFragments);
-
- cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
-
- /* Initialise DCT tables. */
- UpdateQC(cpi, cpi->pb.ThisFrameQualityValue );
-
- /* Initialise the cpi->pb.display_fragments and other fragment
- structures for the first frame. */
- for ( i = 0; i < cpi->pb.UnitFragments; i ++ )
- cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
-
-
- /* Compress and output the frist frame. */
- PickIntra( cpi,
- cpi->pb.YSBRows, cpi->pb.YSBCols);
- UpdateFrame(cpi);
-
-}
-
-static void CompressFrame( CP_INSTANCE *cpi) {
- ogg_int32_t min_blocks_per_frame;
- ogg_uint32_t i;
- int DropFrame = 0;
- ogg_uint32_t ResidueBlocksAdded=0;
- ogg_uint32_t KFIndicator = 0;
-
- double QModStep;
- double QModifier = 1.0;
-
- /* Clear down the macro block level mode and MV arrays. */
- for ( i = 0; i < cpi->pb.UnitFragments; i++ ) {
- cpi->pb.FragCodingMethod[i] = CODE_INTER_NO_MV; /* Default coding mode */
- cpi->pb.FragMVect[i].x = 0;
- cpi->pb.FragMVect[i].y = 0;
- }
-
- /* Default to delta frames. */
- cpi->pb.FrameType = DELTA_FRAME;
-
- /* Clear down the difference arrays for the current frame. */
- memset( cpi->pb.display_fragments, 0, cpi->pb.UnitFragments );
- memset( cpi->extra_fragments, 0, cpi->pb.UnitFragments );
-
- /* Calculate the target bytes for this frame. */
- cpi->ThisFrameTargetBytes = cpi->frame_target_rate;
-
- /* Correct target to try and compensate for any overall rate error
- that is developing */
-
- /* Set the max allowed Q for this frame based upon carry over
- history. First set baseline worst Q for this frame */
- cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ + 10;
- if ( cpi->Configuration.ActiveMaxQ >= Q_TABLE_SIZE )
- cpi->Configuration.ActiveMaxQ = Q_TABLE_SIZE - 1;
-
- /* Make a further adjustment based upon the carry over and recent
- history.. cpi->Configuration.ActiveMaxQ reduced by 1 for each 1/2
- seconds worth of -ve carry over up to a limit of 6. Also
- cpi->Configuration.ActiveMaxQ reduced if frame is a
- "DropFrameCandidate". Remember that if we are behind the bit
- target carry over is -ve. */
- if ( cpi->CarryOver < 0 ) {
- if ( cpi->DropFrameCandidate ) {
- cpi->Configuration.ActiveMaxQ -= 4;
- }
-
- if ( cpi->CarryOver <
- -((ogg_int32_t)cpi->Configuration.TargetBandwidth*3) )
- cpi->Configuration.ActiveMaxQ -= 6;
- else
- cpi->Configuration.ActiveMaxQ +=
- (ogg_int32_t) ((cpi->CarryOver*2) /
- (ogg_int32_t)cpi->Configuration.TargetBandwidth);
-
- /* Check that we have not dropped quality too far */
- if ( cpi->Configuration.ActiveMaxQ < cpi->Configuration.MaxQ )
- cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ;
- }
-
- /* Calculate the Q Modifier step size required to cause a step down
- from full target bandwidth to 40% of target between max Q and
- best Q */
- QModStep = 0.5 / (double)((Q_TABLE_SIZE - 1) -
- cpi->Configuration.ActiveMaxQ);
-
- /* Set up the cpi->QTargetModifier[] table. */
- for ( i = 0; i < cpi->Configuration.ActiveMaxQ; i++ ) {
- cpi->QTargetModifier[i] = QModifier;
- }
- for ( i = cpi->Configuration.ActiveMaxQ; i < Q_TABLE_SIZE; i++ ) {
- cpi->QTargetModifier[i] = QModifier;
- QModifier -= QModStep;
- }
-
- /* if we are allowed to drop frames and are falling behind (eg more
- than x frames worth of bandwidth) */
- if ( cpi->pb.info.dropframes_p &&
- ( cpi->DropCount < cpi->MaxConsDroppedFrames) &&
- ( cpi->CarryOver <
- -((ogg_int32_t)cpi->Configuration.TargetBandwidth)) &&
- ( cpi->DropFrameCandidate) ) {
- /* (we didn't do this frame so we should have some left over for
- the next frame) */
- cpi->CarryOver += cpi->frame_target_rate;
- DropFrame = 1;
- cpi->DropCount ++;
-
- /* Adjust DropFrameTriggerBytes to account for the saving achieved. */
- cpi->DropFrameTriggerBytes =
- (cpi->DropFrameTriggerBytes *
- (DF_CANDIDATE_WINDOW-1))/DF_CANDIDATE_WINDOW;
-
- /* Even if we drop a frame we should account for it when
- considering key frame seperation. */
- cpi->LastKeyFrame++;
- } else if ( cpi->CarryOver <
- -((ogg_int32_t)cpi->Configuration.TargetBandwidth * 2) ) {
- /* Reduce frame bit target by 1.75% for each 1/10th of a seconds
- worth of -ve carry over down to a minimum of 65% of its
- un-modified value. */
-
- cpi->ThisFrameTargetBytes =
- (ogg_uint32_t)(cpi->ThisFrameTargetBytes * 0.65);
- } else if ( cpi->CarryOver < 0 ) {
- /* Note that cpi->CarryOver is a -ve here hence 1.0 "+" ... */
- cpi->ThisFrameTargetBytes =
- (ogg_uint32_t)(cpi->ThisFrameTargetBytes *
- (1.0 + ( ((cpi->CarryOver * 10)/
- ((ogg_int32_t)cpi->
- Configuration.TargetBandwidth)) * 0.0175) ));
- }
-
- if ( !DropFrame ) {
- /* pick all the macroblock modes and motion vectors */
- ogg_uint32_t InterError;
- ogg_uint32_t IntraError;
-
-
- /* Set Baseline filter level. */
- ConfigurePP( &cpi->pp, cpi->pb.info.noise_sensitivity);
-
- /* Score / analyses the fragments. */
- cpi->MotionScore = YUVAnalyseFrame(&cpi->pp, &KFIndicator );
-
- /* Get the baseline Q value */
- RegulateQ( cpi, cpi->MotionScore );
-
- /* Recode blocks if the error score in last frame was high. */
- ResidueBlocksAdded = 0;
- for ( i = 0; i < cpi->pb.UnitFragments; i++ ){
- if ( !cpi->pb.display_fragments[i] ){
- if ( cpi->LastCodedErrorScore[i] >=
- ResidueErrorThresh[cpi->pb.FrameQIndex] ) {
- cpi->pb.display_fragments[i] = 1; /* Force block update */
- cpi->extra_fragments[i] = 1; /* Insures up to date
- pixel data is used. */
- ResidueBlocksAdded ++;
- }
- }
- }
-
- /* Adjust the motion score to allow for residue blocks
- added. These are assumed to have below average impact on
- bitrate (Hence ResidueBlockFactor). */
- cpi->MotionScore = cpi->MotionScore +
- (ResidueBlocksAdded / ResidueBlockFactor[cpi->pb.FrameQIndex]);
-
- /* Estimate the min number of blocks at best Q */
- min_blocks_per_frame =
- (ogg_int32_t)(cpi->ThisFrameTargetBytes /
- GetEstimatedBpb( cpi, VERY_BEST_Q ));
- if ( min_blocks_per_frame == 0 )
- min_blocks_per_frame = 1;
-
- /* If we have less than this number then consider adding in some
- extra blocks */
- if ( cpi->MotionScore < min_blocks_per_frame ) {
- min_blocks_per_frame =
- cpi->MotionScore +
- (ogg_int32_t)(((min_blocks_per_frame - cpi->MotionScore) * 4) / 3 );
- UpRegulateDataStream( cpi, VERY_BEST_Q, min_blocks_per_frame );
- }else{
- /* Reset control variable for best quality final pass. */
- cpi->FinalPassLastPos = 0;
- }
-
- /* Get the modified Q prediction taking into account extra blocks added. */
- RegulateQ( cpi, cpi->MotionScore );
-
- /* Unless we are already well ahead (4 seconds of data) of the
- projected bitrate */
- if ( cpi->CarryOver <
- (ogg_int32_t)(cpi->Configuration.TargetBandwidth * 4) ){
- /* Look at the predicted Q (pbi->FrameQIndex). Adjust the
- target bits for this frame based upon projected Q and
- re-calculate. The idea is that if the Q is better than a
- given (good enough) level then we will try and save some bits
- for use in more difficult segments. */
- cpi->ThisFrameTargetBytes =
- (ogg_int32_t) (cpi->ThisFrameTargetBytes *
- cpi->QTargetModifier[cpi->pb.FrameQIndex]);
-
- /* Recalculate Q again */
- RegulateQ( cpi, cpi->MotionScore );
- }
-
-
- /* Select modes and motion vectors for each of the blocks : return
- an error score for inter and intra */
- PickModes( cpi, cpi->pb.YSBRows, cpi->pb.YSBCols,
- cpi->pb.info.width,
- &InterError, &IntraError );
-
- /* decide whether we really should have made this frame a key frame */
- /* forcing out a keyframe if the max interval is up is done at a higher level */
- if( cpi->pb.info.keyframe_auto_p){
- if( ( 2* IntraError < 5 * InterError )
- && ( KFIndicator >= (ogg_uint32_t)
- cpi->pb.info.keyframe_auto_threshold)
- && ( cpi->LastKeyFrame > cpi->pb.info.keyframe_mindistance)
- ){
- CompressKeyFrame(cpi); /* Code a key frame */
- return;
- }
-
- }
-
- /* Increment the frames since last key frame count */
- cpi->LastKeyFrame++;
-
- /* Proceed with the frame update. */
- UpdateFrame(cpi);
- cpi->DropCount = 0;
-
- if ( cpi->MotionScore > 0 ){
- /* Note the Quantizer used for each block coded. */
- for ( i = 0; i < cpi->pb.UnitFragments; i++ ){
- if ( cpi->pb.display_fragments[i] ){
- cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
- }
- }
-
- }
- }else{
- /* even if we 'drop' a frame, a placeholder must be written as we
- currently assume fixed frame rate timebase as Ogg mapping
- invariant */
- UpdateFrame(cpi);
- }
-}
-
-/********************** The toplevel: encode ***********************/
-
-static int _ilog(unsigned int v){
- int ret=0;
- while(v){
- ret++;
- v>>=1;
- }
- return(ret);
-}
-
-static void theora_encode_dispatch_init(CP_INSTANCE *cpi);
-
-int theora_encode_init(theora_state *th, theora_info *c){
- int i;
-
- CP_INSTANCE *cpi;
-
- memset(th, 0, sizeof(*th));
- /*Currently only the 4:2:0 format is supported.*/
- if(c->pixelformat!=OC_PF_420)return OC_IMPL;
- th->internal_encode=cpi=_ogg_calloc(1,sizeof(*cpi));
- theora_encode_dispatch_init(cpi);
-
- dsp_static_init (&cpi->dsp);
- memcpy (&cpi->pb.dsp, &cpi->dsp, sizeof(DspFunctions));
-
- c->version_major=TH_VERSION_MAJOR;
- c->version_minor=TH_VERSION_MINOR;
- c->version_subminor=TH_VERSION_SUB;
-
- InitTmpBuffers(&cpi->pb);
- InitPPInstance(&cpi->pp, &cpi->dsp);
-
- /* Initialise Configuration structure to legal values */
- if(c->quality>63)c->quality=63;
- if(c->quality<0)c->quality=32;
- if(c->target_bitrate<0)c->target_bitrate=0;
- /* we clamp target_bitrate to 24 bits after setting up the encoder */
-
- cpi->Configuration.BaseQ = c->quality;
- cpi->Configuration.FirstFrameQ = c->quality;
- cpi->Configuration.MaxQ = c->quality;
- cpi->Configuration.ActiveMaxQ = c->quality;
-
- cpi->MVChangeFactor = 14;
- cpi->FourMvChangeFactor = 8;
- cpi->MinImprovementForNewMV = 25;
- cpi->ExhaustiveSearchThresh = 2500;
- cpi->MinImprovementForFourMV = 100;
- cpi->FourMVThreshold = 10000;
- cpi->BitRateCapFactor = 1.5;
- cpi->InterTripOutThresh = 5000;
- cpi->MVEnabled = 1;
- cpi->InterCodeCount = 127;
- cpi->BpbCorrectionFactor = 1.0;
- cpi->GoldenFrameEnabled = 1;
- cpi->InterPrediction = 1;
- cpi->MotionCompensation = 1;
- cpi->ThreshMapThreshold = 5;
- cpi->MaxConsDroppedFrames = 1;
-
- /* Set encoder flags. */
- /* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */
- if(!c->keyframe_auto_p)
- c->keyframe_frequency_force = c->keyframe_frequency;
-
- /* Set the frame rate variables. */
- if ( c->fps_numerator < 1 )
- c->fps_numerator = 1;
- if ( c->fps_denominator < 1 )
- c->fps_denominator = 1;
-
- /* don't go too nuts on keyframe spacing; impose a high limit to
- make certain the granulepos encoding strategy works */
- if(c->keyframe_frequency_force>32768)c->keyframe_frequency_force=32768;
- if(c->keyframe_mindistance>32768)c->keyframe_mindistance=32768;
- if(c->keyframe_mindistance>c->keyframe_frequency_force)
- c->keyframe_mindistance=c->keyframe_frequency_force;
- cpi->pb.keyframe_granule_shift=_ilog(c->keyframe_frequency_force-1);
-
- /* clamp the target_bitrate to a maximum of 24 bits so we get a
- more meaningful value when we write this out in the header. */
- if(c->target_bitrate>(1<<24)-1)c->target_bitrate=(1<<24)-1;
-
- /* copy in config */
- memcpy(&cpi->pb.info,c,sizeof(*c));
- th->i=&cpi->pb.info;
- th->granulepos=-1;
-
- /* Set up default values for QTargetModifier[Q_TABLE_SIZE] table */
- for ( i = 0; i < Q_TABLE_SIZE; i++ )
- cpi->QTargetModifier[i] = 1.0;
-
- /* Set up an encode buffer */
- cpi->oggbuffer = _ogg_malloc(sizeof(oggpack_buffer));
- oggpackB_writeinit(cpi->oggbuffer);
-
- /* Set data rate related variables. */
- cpi->Configuration.TargetBandwidth = (c->target_bitrate) / 8;
-
- cpi->Configuration.OutputFrameRate =
- (double)( c->fps_numerator /
- c->fps_denominator );
-
- cpi->frame_target_rate = cpi->Configuration.TargetBandwidth /
- cpi->Configuration.OutputFrameRate;
-
- /* Set key frame data rate target; this is nominal keyframe size */
- cpi->Configuration.KeyFrameDataTarget = (c->keyframe_data_target_bitrate *
- c->fps_denominator /
- c->fps_numerator ) / 8;
-
- /* Note the height and width in the pre-processor control structure. */
- cpi->ScanConfig.VideoFrameHeight = cpi->pb.info.height;
- cpi->ScanConfig.VideoFrameWidth = cpi->pb.info.width;
-
- InitFrameDetails(&cpi->pb);
- EInitFragmentInfo(cpi);
- EInitFrameInfo(cpi);
-
- /* Set up pre-processor config pointers. */
- cpi->ScanConfig.Yuv0ptr = cpi->yuv0ptr;
- cpi->ScanConfig.Yuv1ptr = cpi->yuv1ptr;
- cpi->ScanConfig.SrfWorkSpcPtr = cpi->ConvDestBuffer;
- cpi->ScanConfig.disp_fragments = cpi->pb.display_fragments;
- cpi->ScanConfig.RegionIndex = cpi->pb.pixel_index_table;
-
- /* Initialise the pre-processor module. */
- ScanYUVInit(&cpi->pp, &(cpi->ScanConfig));
-
- /* Initialise Motion compensation */
- InitMotionCompensation(cpi);
-
- /* Initialise the compression process. */
- /* We always start at frame 1 */
- cpi->CurrentFrame = 1;
-
- /* Reset the rate targeting correction factor. */
- cpi->BpbCorrectionFactor = 1.0;
-
- cpi->TotalByteCount = 0;
- cpi->TotalMotionScore = 0;
-
- /* Up regulation variables. */
- cpi->FinalPassLastPos = 0; /* Used to regulate a final unrestricted pass. */
- cpi->LastEndSB = 0; /* Where we were in the loop last time. */
- cpi->ResidueLastEndSB = 0; /* Where we were in the residue update
- loop last time. */
-
- InitHuffmanSet(&cpi->pb);
-
- /* This makes sure encoder version specific tables are initialised */
- memcpy(&cpi->pb.quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
- InitQTables(&cpi->pb);
-
- /* Indicate that the next frame to be compressed is the first in the
- current clip. */
- cpi->ThisIsFirstFrame = 1;
- cpi->readyflag = 1;
-
- cpi->pb.HeadersWritten = 0;
- /*We overload this flag to track header output.*/
- cpi->doneflag=-3;
-
- return 0;
-}
-
-int theora_encode_YUVin(theora_state *t,
- yuv_buffer *yuv){
- ogg_int32_t i;
- unsigned char *LocalDataPtr;
- unsigned char *InputDataPtr;
- CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-
- if(!cpi->readyflag)return OC_EINVAL;
- if(cpi->doneflag>0)return OC_EINVAL;
-
- /* If frame size has changed, abort out for now */
- if (yuv->y_height != (int)cpi->pb.info.height ||
- yuv->y_width != (int)cpi->pb.info.width )
- return(-1);
-
-
- /* Copy over input YUV to internal YUV buffers. */
- /* we invert the image for backward compatibility with VP3 */
- /* First copy over the Y data */
- LocalDataPtr = cpi->yuv1ptr + yuv->y_width*(yuv->y_height - 1);
- InputDataPtr = yuv->y;
- for ( i = 0; i < yuv->y_height; i++ ){
- memcpy( LocalDataPtr, InputDataPtr, yuv->y_width );
- LocalDataPtr -= yuv->y_width;
- InputDataPtr += yuv->y_stride;
- }
-
- /* Now copy over the U data */
- LocalDataPtr = &cpi->yuv1ptr[(yuv->y_height * yuv->y_width)];
- LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1);
- InputDataPtr = yuv->u;
- for ( i = 0; i < yuv->uv_height; i++ ){
- memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width );
- LocalDataPtr -= yuv->uv_width;
- InputDataPtr += yuv->uv_stride;
- }
-
- /* Now copy over the V data */
- LocalDataPtr =
- &cpi->yuv1ptr[((yuv->y_height*yuv->y_width)*5)/4];
- LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1);
- InputDataPtr = yuv->v;
- for ( i = 0; i < yuv->uv_height; i++ ){
- memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width );
- LocalDataPtr -= yuv->uv_width;
- InputDataPtr += yuv->uv_stride;
- }
-
- /* Special case for first frame */
- if ( cpi->ThisIsFirstFrame ){
- CompressFirstFrame(cpi);
- cpi->ThisIsFirstFrame = 0;
- cpi->ThisIsKeyFrame = 0;
- } else {
-
- /* don't allow generating invalid files that overflow the p-frame
- shift, even if keyframe_auto_p is turned off */
- if(cpi->LastKeyFrame >= (ogg_uint32_t)
- cpi->pb.info.keyframe_frequency_force)
- cpi->ThisIsKeyFrame = 1;
-
- if ( cpi->ThisIsKeyFrame ) {
- CompressKeyFrame(cpi);
- cpi->ThisIsKeyFrame = 0;
- } else {
- /* Compress the frame. */
- CompressFrame( cpi );
- }
-
- }
-
- /* Update stats variables. */
- cpi->LastFrameSize = oggpackB_bytes(cpi->oggbuffer);
- cpi->CurrentFrame++;
- cpi->packetflag=1;
-
- t->granulepos=
- ((cpi->CurrentFrame - cpi->LastKeyFrame)<pb.keyframe_granule_shift)+
- cpi->LastKeyFrame - 1;
-
- return 0;
-}
-
-int theora_encode_packetout( theora_state *t, int last_p, ogg_packet *op){
- CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
- long bytes=oggpackB_bytes(cpi->oggbuffer);
-
- if(!bytes)return(0);
- if(!cpi->packetflag)return(0);
- if(cpi->doneflag>0)return(-1);
-
- op->packet=oggpackB_get_buffer(cpi->oggbuffer);
- op->bytes=bytes;
- op->b_o_s=0;
- op->e_o_s=last_p;
-
- op->packetno=cpi->CurrentFrame;
- op->granulepos=t->granulepos;
-
- cpi->packetflag=0;
- if(last_p)cpi->doneflag=1;
-
- return 1;
-}
-
-static void _tp_writebuffer(oggpack_buffer *opb, const char *buf, const long len)
-{
- long i;
-
- for (i = 0; i < len; i++)
- oggpackB_write(opb, *buf++, 8);
-}
-
-static void _tp_writelsbint(oggpack_buffer *opb, long value)
-{
- oggpackB_write(opb, value&0xFF, 8);
- oggpackB_write(opb, value>>8&0xFF, 8);
- oggpackB_write(opb, value>>16&0xFF, 8);
- oggpackB_write(opb, value>>24&0xFF, 8);
-}
-
-/* build the initial short header for stream recognition and format */
-int theora_encode_header(theora_state *t, ogg_packet *op){
- CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
- int offset_y;
-
- oggpackB_reset(cpi->oggbuffer);
- oggpackB_write(cpi->oggbuffer,0x80,8);
- _tp_writebuffer(cpi->oggbuffer, "theora", 6);
-
- oggpackB_write(cpi->oggbuffer,TH_VERSION_MAJOR,8);
- oggpackB_write(cpi->oggbuffer,TH_VERSION_MINOR,8);
- oggpackB_write(cpi->oggbuffer,TH_VERSION_SUB,8);
-
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.width>>4,16);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.height>>4,16);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_width,24);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_height,24);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.offset_x,8);
- /* Applications use offset_y to mean offset from the top of the image; the
- * meaning in the bitstream is the opposite (from the bottom). Transform.
- */
- offset_y = cpi->pb.info.height - cpi->pb.info.frame_height -
- cpi->pb.info.offset_y;
- oggpackB_write(cpi->oggbuffer,offset_y,8);
-
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_numerator,32);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_denominator,32);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_numerator,24);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_denominator,24);
-
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.colorspace,8);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.target_bitrate,24);
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.quality,6);
-
- oggpackB_write(cpi->oggbuffer,cpi->pb.keyframe_granule_shift,5);
-
- oggpackB_write(cpi->oggbuffer,cpi->pb.info.pixelformat,2);
-
- oggpackB_write(cpi->oggbuffer,0,3); /* spare config bits */
-
- op->packet=oggpackB_get_buffer(cpi->oggbuffer);
- op->bytes=oggpackB_bytes(cpi->oggbuffer);
-
- op->b_o_s=1;
- op->e_o_s=0;
-
- op->packetno=0;
-
- op->granulepos=0;
- cpi->packetflag=0;
-
- return(0);
-}
-
-/* build the comment header packet from the passed metadata */
-int theora_encode_comment(theora_comment *tc, ogg_packet *op)
-{
- const char *vendor = theora_version_string();
- const int vendor_length = strlen(vendor);
- oggpack_buffer *opb;
-
- opb = _ogg_malloc(sizeof(oggpack_buffer));
- oggpackB_writeinit(opb);
- oggpackB_write(opb, 0x81, 8);
- _tp_writebuffer(opb, "theora", 6);
-
- _tp_writelsbint(opb, vendor_length);
- _tp_writebuffer(opb, vendor, vendor_length);
-
- _tp_writelsbint(opb, tc->comments);
- if(tc->comments){
- int i;
- for(i=0;icomments;i++){
- if(tc->user_comments[i]){
- _tp_writelsbint(opb,tc->comment_lengths[i]);
- _tp_writebuffer(opb,tc->user_comments[i],tc->comment_lengths[i]);
- }else{
- oggpackB_write(opb,0,32);
- }
- }
- }
- op->bytes=oggpack_bytes(opb);
-
- /* So we're expecting the application will free this? */
- op->packet=_ogg_malloc(oggpack_bytes(opb));
- memcpy(op->packet, oggpack_get_buffer(opb), oggpack_bytes(opb));
- oggpack_writeclear(opb);
-
- _ogg_free(opb);
-
- op->b_o_s=0;
- op->e_o_s=0;
-
- op->packetno=0;
- op->granulepos=0;
-
- return (0);
-}
-
-/* build the final header packet with the tables required
- for decode */
-int theora_encode_tables(theora_state *t, ogg_packet *op){
- CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-
- oggpackB_reset(cpi->oggbuffer);
- oggpackB_write(cpi->oggbuffer,0x82,8);
- _tp_writebuffer(cpi->oggbuffer,"theora",6);
-
- WriteQTables(&cpi->pb,cpi->oggbuffer);
- WriteHuffmanTrees(cpi->pb.HuffRoot_VP3x,cpi->oggbuffer);
-
- op->packet=oggpackB_get_buffer(cpi->oggbuffer);
- op->bytes=oggpackB_bytes(cpi->oggbuffer);
-
- op->b_o_s=0;
- op->e_o_s=0;
-
- op->packetno=0;
-
- op->granulepos=0;
- cpi->packetflag=0;
-
- cpi->pb.HeadersWritten = 1;
-
- return(0);
-}
-
-static void theora_encode_clear (theora_state *th){
- CP_INSTANCE *cpi;
- cpi=(CP_INSTANCE *)th->internal_encode;
- if(cpi){
-
- ClearHuffmanSet(&cpi->pb);
- ClearFragmentInfo(&cpi->pb);
- ClearFrameInfo(&cpi->pb);
- EClearFragmentInfo(cpi);
- EClearFrameInfo(cpi);
- ClearTmpBuffers(&cpi->pb);
- ClearPPInstance(&cpi->pp);
-
- oggpackB_writeclear(cpi->oggbuffer);
- _ogg_free(cpi->oggbuffer);
- _ogg_free(cpi);
- }
-
- memset(th,0,sizeof(*th));
-}
-
-
-/* returns, in seconds, absolute time of current packet in given
- logical stream */
-static double theora_encode_granule_time(theora_state *th,
- ogg_int64_t granulepos){
-#ifndef THEORA_DISABLE_FLOAT
- CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode);
- PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode);
-
- if(cpi)pbi=&cpi->pb;
-
- if(granulepos>=0){
- ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift;
- ogg_int64_t pframe=granulepos-(iframe<keyframe_granule_shift);
-
- return (iframe+pframe)*
- ((double)pbi->info.fps_denominator/pbi->info.fps_numerator);
-
- }
-#endif
-
- return(-1); /* negative granulepos or float calculations disabled */
-}
-
-/* returns frame number of current packet in given logical stream */
-static ogg_int64_t theora_encode_granule_frame(theora_state *th,
- ogg_int64_t granulepos){
- CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode);
- PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode);
-
- if(cpi)pbi=&cpi->pb;
-
- if(granulepos>=0){
- ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift;
- ogg_int64_t pframe=granulepos-(iframe<keyframe_granule_shift);
-
- return (iframe+pframe-1);
- }
-
- return(-1);
-}
-
-
-static int theora_encode_control(theora_state *th,int req,
- void *buf,size_t buf_sz) {
- CP_INSTANCE *cpi;
- PB_INSTANCE *pbi;
- int value;
-
- if(th == NULL)
- return TH_EFAULT;
-
- cpi = th->internal_encode;
- pbi = &cpi->pb;
-
- switch(req) {
- case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:
- {
- ogg_uint32_t keyframe_frequency_force;
- if( (buf==NULL) || (buf_sz!=sizeof(ogg_uint32_t))) return TH_EINVAL;
- keyframe_frequency_force=*(ogg_uint32_t *)buf;
-
- keyframe_frequency_force=
- OC_MINI(keyframe_frequency_force,
- 1U<pb.keyframe_granule_shift);
- cpi->pb.info.keyframe_frequency_force=
- OC_MAXI(1,keyframe_frequency_force);
- *(ogg_uint32_t *)buf=cpi->pb.info.keyframe_frequency_force;
- return 0;
- }
- case TH_ENCCTL_SET_QUANT_PARAMS:
- if( ( buf==NULL&&buf_sz!=0 )
- || ( buf!=NULL&&buf_sz!=sizeof(th_quant_info) )
- || cpi->pb.HeadersWritten ){
- return TH_EINVAL;
- }
-
- if(buf==NULL)
- memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
- else
- memcpy(&pbi->quant_info, buf, sizeof(th_quant_info));
- InitQTables(pbi);
-
- return 0;
- case TH_ENCCTL_SET_VP3_COMPATIBLE:
- if(cpi->pb.HeadersWritten)
- return TH_EINVAL;
-
- memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
- InitQTables(pbi);
-
- return 0;
- case TH_ENCCTL_SET_SPLEVEL:
- if(buf == NULL || buf_sz != sizeof(int))
- return TH_EINVAL;
-
- memcpy(&value, buf, sizeof(int));
-
- switch(value) {
- case 0:
- cpi->MotionCompensation = 1;
- pbi->info.quick_p = 0;
- break;
-
- case 1:
- cpi->MotionCompensation = 1;
- pbi->info.quick_p = 1;
- break;
-
- case 2:
- cpi->MotionCompensation = 0;
- pbi->info.quick_p = 1;
- break;
-
- default:
- return TH_EINVAL;
- }
-
- return 0;
- case TH_ENCCTL_GET_SPLEVEL_MAX:
- value = 2;
- memcpy(buf, &value, sizeof(int));
- return 0;
- default:
- return TH_EIMPL;
- }
-}
-
-static void theora_encode_dispatch_init(CP_INSTANCE *cpi){
- cpi->dispatch_vtbl.clear=theora_encode_clear;
- cpi->dispatch_vtbl.control=theora_encode_control;
- cpi->dispatch_vtbl.granule_frame=theora_encode_granule_frame;
- cpi->dispatch_vtbl.granule_time=theora_encode_granule_time;
-}
diff --git a/Engine/lib/libtheora/lib/enc/frarray.c b/Engine/lib/libtheora/lib/enc/frarray.c
deleted file mode 100644
index 51b327206..000000000
--- a/Engine/lib/libtheora/lib/enc/frarray.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: frarray.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-#include "codec_internal.h"
-#include "block_inline.h"
-
-/* Long run bit string coding */
-static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value){
- ogg_uint32_t CodedVal = 0;
- ogg_uint32_t CodedBits = 0;
-
- /* Coding scheme:
- Codeword RunLength
- 0 1
- 10x 2-3
- 110x 4-5
- 1110xx 6-9
- 11110xxx 10-17
- 111110xxxx 18-33
- 111111xxxxxxxxxxxx 34-4129 */
-
- if ( value == 1 ){
- CodedVal = 0;
- CodedBits = 1;
- } else if ( value <= 3 ) {
- CodedVal = 0x0004 + (value - 2);
- CodedBits = 3;
- } else if ( value <= 5 ) {
- CodedVal = 0x000C + (value - 4);
- CodedBits = 4;
- } else if ( value <= 9 ) {
- CodedVal = 0x0038 + (value - 6);
- CodedBits = 6;
- } else if ( value <= 17 ) {
- CodedVal = 0x00F0 + (value - 10);
- CodedBits = 8;
- } else if ( value <= 33 ) {
- CodedVal = 0x03E0 + (value - 18);
- CodedBits = 10;
- } else {
- CodedVal = 0x3F000 + (value - 34);
- CodedBits = 18;
- }
-
- /* Add the bits to the encode holding buffer. */
- oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
-
- return CodedBits;
-}
-
-/* Short run bit string coding */
-static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi,
- ogg_uint32_t value ) {
- ogg_uint32_t CodedVal = 0;
- ogg_uint32_t CodedBits = 0;
-
- /* Coding scheme:
- Codeword RunLength
- 0x 1-2
- 10x 3-4
- 110x 5-6
- 1110xx 7-10
- 11110xx 11-14
- 11111xxxx 15-30 */
-
- if ( value <= 2 ) {
- CodedVal = value - 1;
- CodedBits = 2;
- } else if ( value <= 4 ) {
- CodedVal = 0x0004 + (value - 3);
- CodedBits = 3;
-
- } else if ( value <= 6 ) {
- CodedVal = 0x000C + (value - 5);
- CodedBits = 4;
-
- } else if ( value <= 10 ) {
- CodedVal = 0x0038 + (value - 7);
- CodedBits = 6;
-
- } else if ( value <= 14 ) {
- CodedVal = 0x0078 + (value - 11);
- CodedBits = 7;
- } else {
- CodedVal = 0x01F0 + (value - 15);
- CodedBits = 9;
- }
-
- /* Add the bits to the encode holding buffer. */
- oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
-
- return CodedBits;
-}
-
-void PackAndWriteDFArray( CP_INSTANCE *cpi ){
- ogg_uint32_t i;
- unsigned char val;
- ogg_uint32_t run_count;
-
- ogg_uint32_t SB, MB, B; /* Block, MB and SB loop variables */
- ogg_uint32_t BListIndex = 0;
- ogg_uint32_t LastSbBIndex = 0;
- ogg_int32_t DfBlockIndex; /* Block index in display_fragments */
-
- /* Initialise workspaces */
- memset( cpi->pb.SBFullyFlags, 1, cpi->pb.SuperBlocks);
- memset( cpi->pb.SBCodedFlags, 0, cpi->pb.SuperBlocks );
- memset( cpi->PartiallyCodedFlags, 0, cpi->pb.SuperBlocks );
- memset( cpi->BlockCodedFlags, 0, cpi->pb.UnitFragments);
-
- for( SB = 0; SB < cpi->pb.SuperBlocks; SB++ ) {
- /* Check for coded blocks and macro-blocks */
- for ( MB=0; MB<4; MB++ ) {
- /* If MB in frame */
- if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) {
- for ( B=0; B<4; B++ ) {
- DfBlockIndex = QuadMapToIndex1( cpi->pb.BlockMap,SB, MB, B );
-
- /* Does Block lie in frame: */
- if ( DfBlockIndex >= 0 ) {
- /* In Frame: If it is not coded then this SB is only
- partly coded.: */
- if ( cpi->pb.display_fragments[DfBlockIndex] ) {
- cpi->pb.SBCodedFlags[SB] = 1; /* SB at least partly coded */
- cpi->BlockCodedFlags[BListIndex] = 1; /* Block is coded */
-
- }else{
- cpi->pb.SBFullyFlags[SB] = 0; /* SB not fully coded */
- cpi->BlockCodedFlags[BListIndex] = 0; /* Block is not coded */
- }
-
- BListIndex++;
- }
- }
- }
- }
-
- /* Is the SB fully coded or uncoded.
- If so then backup BListIndex and MBListIndex */
- if ( cpi->pb.SBFullyFlags[SB] || !cpi->pb.SBCodedFlags[SB] ) {
- BListIndex = LastSbBIndex; /* Reset to values from previous SB */
- }else{
- cpi->PartiallyCodedFlags[SB] = 1; /* Set up list of partially
- coded SBs */
- LastSbBIndex = BListIndex;
- }
- }
-
- /* Code list of partially coded Super-Block. */
- val = cpi->PartiallyCodedFlags[0];
- oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
- i = 0;
- while ( i < cpi->pb.SuperBlocks ) {
- run_count = 0;
- while ( (ipb.SuperBlocks) &&
- (cpi->PartiallyCodedFlags[i]==val) &&
- run_count<4129 ) {
- i++;
- run_count++;
- }
-
- /* Code the run */
- FrArrayCodeSBRun( cpi, run_count);
-
- if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){
- val = cpi->PartiallyCodedFlags[i];
- oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
- }else
- val = ( val == 0 ) ? 1 : 0;
- }
-
- /* RLC Super-Block fully/not coded. */
- i = 0;
-
- /* Skip partially coded blocks */
- while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
- i++;
-
- if ( i < cpi->pb.SuperBlocks ) {
- val = cpi->pb.SBFullyFlags[i];
- oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
- while ( i < cpi->pb.SuperBlocks ) {
- run_count = 0;
- while ( (i < cpi->pb.SuperBlocks) &&
- (cpi->pb.SBFullyFlags[i] == val) &&
- run_count < 4129) {
- i++;
- /* Skip partially coded blocks */
- while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
- i++;
- run_count++;
- }
-
- /* Code the run */
- FrArrayCodeSBRun( cpi, run_count );
-
- if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){
- val = cpi->PartiallyCodedFlags[i];
- oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
- }else
- val = ( val == 0 ) ? 1 : 0;
- }
- }
-
-
- /* Now code the block flags */
- if ( BListIndex > 0 ) {
- /* Code the block flags start value */
- val = cpi->BlockCodedFlags[0];
- oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
- /* Now code the block flags. */
- for ( i = 0; i < BListIndex; ) {
- run_count = 0;
- while ( (i < BListIndex) && (cpi->BlockCodedFlags[i] == val) ) {
- i++;
- run_count++;
- }
-
- FrArrayCodeBlockRun( cpi, run_count );
-
- val = ( val == 0 ) ? 1 : 0;
- }
- }
-}
diff --git a/Engine/lib/libtheora/lib/enc/frinit.c b/Engine/lib/libtheora/lib/enc/frinit.c
deleted file mode 100644
index ae6bbd64f..000000000
--- a/Engine/lib/libtheora/lib/enc/frinit.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: frinit.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-#include "codec_internal.h"
-
-
-void InitializeFragCoordinates(PB_INSTANCE *pbi){
-
- ogg_uint32_t i, j;
-
- ogg_uint32_t HorizFrags = pbi->HFragments;
- ogg_uint32_t VertFrags = pbi->VFragments;
- ogg_uint32_t StartFrag = 0;
-
- /* Y */
-
- for(i = 0; i< VertFrags; i++){
- for(j = 0; j< HorizFrags; j++){
-
- ogg_uint32_t ThisFrag = i * HorizFrags + j;
- pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
- pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
- }
- }
-
- /* U */
- HorizFrags >>= 1;
- VertFrags >>= 1;
- StartFrag = pbi->YPlaneFragments;
-
- for(i = 0; i< VertFrags; i++) {
- for(j = 0; j< HorizFrags; j++) {
- ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
- pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
- pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
- }
- }
-
- /* V */
- StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
- for(i = 0; i< VertFrags; i++) {
- for(j = 0; j< HorizFrags; j++) {
- ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
- pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
- pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
- }
- }
-}
-
-static void CalcPixelIndexTable( PB_INSTANCE *pbi){
- ogg_uint32_t i;
- ogg_uint32_t * PixelIndexTablePtr;
-
- /* Calculate the pixel index table for normal image buffers */
- PixelIndexTablePtr = pbi->pixel_index_table;
- for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
- PixelIndexTablePtr[ i ] =
- ((i / pbi->HFragments) * VFRAGPIXELS *
- pbi->info.width);
- PixelIndexTablePtr[ i ] +=
- ((i % pbi->HFragments) * HFRAGPIXELS);
- }
-
- PixelIndexTablePtr = &pbi->pixel_index_table[pbi->YPlaneFragments];
- for ( i = 0; i < ((pbi->HFragments >> 1) * pbi->VFragments); i++ ) {
- PixelIndexTablePtr[ i ] =
- ((i / (pbi->HFragments / 2) ) *
- (VFRAGPIXELS *
- (pbi->info.width / 2)) );
- PixelIndexTablePtr[ i ] +=
- ((i % (pbi->HFragments / 2) ) *
- HFRAGPIXELS) + pbi->YPlaneSize;
- }
-
- /************************************************************************/
- /* Now calculate the pixel index table for image reconstruction buffers */
- PixelIndexTablePtr = pbi->recon_pixel_index_table;
- for ( i = 0; i < pbi->YPlaneFragments; i++ ){
- PixelIndexTablePtr[ i ] =
- ((i / pbi->HFragments) * VFRAGPIXELS *
- pbi->YStride);
- PixelIndexTablePtr[ i ] +=
- ((i % pbi->HFragments) * HFRAGPIXELS) +
- pbi->ReconYDataOffset;
- }
-
- /* U blocks */
- PixelIndexTablePtr = &pbi->recon_pixel_index_table[pbi->YPlaneFragments];
- for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
- PixelIndexTablePtr[ i ] =
- ((i / (pbi->HFragments / 2) ) *
- (VFRAGPIXELS * (pbi->UVStride)) );
- PixelIndexTablePtr[ i ] +=
- ((i % (pbi->HFragments / 2) ) *
- HFRAGPIXELS) + pbi->ReconUDataOffset;
- }
-
- /* V blocks */
- PixelIndexTablePtr =
- &pbi->recon_pixel_index_table[pbi->YPlaneFragments +
- pbi->UVPlaneFragments];
-
- for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
- PixelIndexTablePtr[ i ] =
- ((i / (pbi->HFragments / 2) ) *
- (VFRAGPIXELS * (pbi->UVStride)) );
- PixelIndexTablePtr[ i ] +=
- ((i % (pbi->HFragments / 2) ) * HFRAGPIXELS) +
- pbi->ReconVDataOffset;
- }
-}
-
-void ClearFragmentInfo(PB_INSTANCE * pbi){
-
- /* free prior allocs if present */
- if(pbi->display_fragments) _ogg_free(pbi->display_fragments);
- if(pbi->pixel_index_table) _ogg_free(pbi->pixel_index_table);
- if(pbi->recon_pixel_index_table) _ogg_free(pbi->recon_pixel_index_table);
- if(pbi->FragTokenCounts) _ogg_free(pbi->FragTokenCounts);
- if(pbi->CodedBlockList) _ogg_free(pbi->CodedBlockList);
- if(pbi->FragMVect) _ogg_free(pbi->FragMVect);
- if(pbi->FragCoeffs) _ogg_free(pbi->FragCoeffs);
- if(pbi->FragCoefEOB) _ogg_free(pbi->FragCoefEOB);
- if(pbi->skipped_display_fragments) _ogg_free(pbi->skipped_display_fragments);
- if(pbi->QFragData) _ogg_free(pbi->QFragData);
- if(pbi->TokenList) _ogg_free(pbi->TokenList);
- if(pbi->FragCodingMethod) _ogg_free(pbi->FragCodingMethod);
- if(pbi->FragCoordinates) _ogg_free(pbi->FragCoordinates);
-
- if(pbi->FragQIndex) _ogg_free(pbi->FragQIndex);
- if(pbi->PPCoefBuffer) _ogg_free(pbi->PPCoefBuffer);
- if(pbi->FragmentVariances) _ogg_free(pbi->FragmentVariances);
-
- if(pbi->BlockMap) _ogg_free(pbi->BlockMap);
-
- if(pbi->SBCodedFlags) _ogg_free(pbi->SBCodedFlags);
- if(pbi->SBFullyFlags) _ogg_free(pbi->SBFullyFlags);
- if(pbi->MBFullyFlags) _ogg_free(pbi->MBFullyFlags);
- if(pbi->MBCodedFlags) _ogg_free(pbi->MBCodedFlags);
-
- if(pbi->_Nodes) _ogg_free(pbi->_Nodes);
- pbi->_Nodes = 0;
-
- pbi->QFragData = 0;
- pbi->TokenList = 0;
- pbi->skipped_display_fragments = 0;
- pbi->FragCoeffs = 0;
- pbi->FragCoefEOB = 0;
- pbi->display_fragments = 0;
- pbi->pixel_index_table = 0;
- pbi->recon_pixel_index_table = 0;
- pbi->FragTokenCounts = 0;
- pbi->CodedBlockList = 0;
- pbi->FragCodingMethod = 0;
- pbi->FragMVect = 0;
- pbi->MBCodedFlags = 0;
- pbi->MBFullyFlags = 0;
- pbi->BlockMap = 0;
-
- pbi->SBCodedFlags = 0;
- pbi->SBFullyFlags = 0;
- pbi->QFragData = 0;
- pbi->TokenList = 0;
- pbi->skipped_display_fragments = 0;
- pbi->FragCoeffs = 0;
- pbi->FragCoefEOB = 0;
- pbi->display_fragments = 0;
- pbi->pixel_index_table = 0;
- pbi->recon_pixel_index_table = 0;
- pbi->FragTokenCounts = 0;
- pbi->CodedBlockList = 0;
- pbi->FragCodingMethod = 0;
- pbi->FragCoordinates = 0;
- pbi->FragMVect = 0;
-
- pbi->PPCoefBuffer=0;
- pbi->PPCoefBuffer=0;
- pbi->FragQIndex = 0;
- pbi->FragQIndex = 0;
- pbi->FragmentVariances= 0;
- pbi->FragmentVariances = 0 ;
-}
-
-void InitFragmentInfo(PB_INSTANCE * pbi){
-
- /* clear any existing info */
- ClearFragmentInfo(pbi);
-
- /* Perform Fragment Allocations */
- pbi->display_fragments =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->display_fragments));
-
- pbi->pixel_index_table =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->pixel_index_table));
-
- pbi->recon_pixel_index_table =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->recon_pixel_index_table));
-
- pbi->FragTokenCounts =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragTokenCounts));
-
- pbi->CodedBlockList =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->CodedBlockList));
-
- pbi->FragMVect =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragMVect));
-
- pbi->FragCoeffs =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoeffs));
-
- pbi->FragCoefEOB =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoefEOB));
-
- pbi->skipped_display_fragments =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->skipped_display_fragments));
-
- pbi->QFragData =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->QFragData));
-
- pbi->TokenList =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->TokenList));
-
- pbi->FragCodingMethod =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCodingMethod));
-
- pbi->FragCoordinates =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoordinates));
-
- pbi->FragQIndex =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragQIndex));
-
- pbi->PPCoefBuffer =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->PPCoefBuffer));
-
- pbi->FragmentVariances =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragmentVariances));
-
- pbi->_Nodes =
- _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->_Nodes));
-
- /* Super Block Initialization */
- pbi->SBCodedFlags =
- _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBCodedFlags));
-
- pbi->SBFullyFlags =
- _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBFullyFlags));
-
- /* Macro Block Initialization */
- pbi->MBCodedFlags =
- _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBCodedFlags));
-
- pbi->MBFullyFlags =
- _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBFullyFlags));
-
- pbi->BlockMap =
- _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->BlockMap));
-
-}
-
-void ClearFrameInfo(PB_INSTANCE * pbi){
- if(pbi->ThisFrameRecon )
- _ogg_free(pbi->ThisFrameRecon );
- if(pbi->GoldenFrame)
- _ogg_free(pbi->GoldenFrame);
- if(pbi->LastFrameRecon)
- _ogg_free(pbi->LastFrameRecon);
- if(pbi->PostProcessBuffer)
- _ogg_free(pbi->PostProcessBuffer);
-
-
- pbi->ThisFrameRecon = 0;
- pbi->GoldenFrame = 0;
- pbi->LastFrameRecon = 0;
- pbi->PostProcessBuffer = 0;
-
-
- pbi->ThisFrameRecon = 0;
- pbi->GoldenFrame = 0;
- pbi->LastFrameRecon = 0;
- pbi->PostProcessBuffer = 0;
-
-}
-
-void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize){
-
- /* clear any existing info */
- ClearFrameInfo(pbi);
-
- /* allocate frames */
- pbi->ThisFrameRecon =
- _ogg_malloc(FrameSize*sizeof(*pbi->ThisFrameRecon));
-
- pbi->GoldenFrame =
- _ogg_malloc(FrameSize*sizeof(*pbi->GoldenFrame));
-
- pbi->LastFrameRecon =
- _ogg_malloc(FrameSize*sizeof(*pbi->LastFrameRecon));
-
- pbi->PostProcessBuffer =
- _ogg_malloc(FrameSize*sizeof(*pbi->PostProcessBuffer));
-
-}
-
-void InitFrameDetails(PB_INSTANCE *pbi){
- int FrameSize;
-
- /*pbi->PostProcessingLevel = 0;
- pbi->PostProcessingLevel = 4;
- pbi->PostProcessingLevel = 5;
- pbi->PostProcessingLevel = 6;*/
-
- pbi->PostProcessingLevel = 0;
-
-
- /* Set the frame size etc. */
-
- pbi->YPlaneSize = pbi->info.width *
- pbi->info.height;
- pbi->UVPlaneSize = pbi->YPlaneSize / 4;
- pbi->HFragments = pbi->info.width / HFRAGPIXELS;
- pbi->VFragments = pbi->info.height / VFRAGPIXELS;
- pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2;
- pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments;
- pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
-
- pbi->YStride = (pbi->info.width + STRIDE_EXTRA);
- pbi->UVStride = pbi->YStride / 2;
- pbi->ReconYPlaneSize = pbi->YStride *
- (pbi->info.height + STRIDE_EXTRA);
- pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
- FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
-
- pbi->YDataOffset = 0;
- pbi->UDataOffset = pbi->YPlaneSize;
- pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
- pbi->ReconYDataOffset =
- (pbi->YStride * UMV_BORDER) + UMV_BORDER;
- pbi->ReconUDataOffset = pbi->ReconYPlaneSize +
- (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
- pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize +
- (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
-
- /* Image dimensions in Super-Blocks */
- pbi->YSBRows = (pbi->info.height/32) +
- ( pbi->info.height%32 ? 1 : 0 );
- pbi->YSBCols = (pbi->info.width/32) +
- ( pbi->info.width%32 ? 1 : 0 );
- pbi->UVSBRows = ((pbi->info.height/2)/32) +
- ( (pbi->info.height/2)%32 ? 1 : 0 );
- pbi->UVSBCols = ((pbi->info.width/2)/32) +
- ( (pbi->info.width/2)%32 ? 1 : 0 );
-
- /* Super-Blocks per component */
- pbi->YSuperBlocks = pbi->YSBRows * pbi->YSBCols;
- pbi->UVSuperBlocks = pbi->UVSBRows * pbi->UVSBCols;
- pbi->SuperBlocks = pbi->YSuperBlocks+2*pbi->UVSuperBlocks;
-
- /* Useful externals */
- pbi->MacroBlocks = ((pbi->VFragments+1)/2)*((pbi->HFragments+1)/2);
-
- InitFragmentInfo(pbi);
- InitFrameInfo(pbi, FrameSize);
- InitializeFragCoordinates(pbi);
-
- /* Configure mapping between quad-tree and fragments */
- CreateBlockMapping ( pbi->BlockMap, pbi->YSuperBlocks,
- pbi->UVSuperBlocks, pbi->HFragments, pbi->VFragments);
-
- /* Re-initialise the pixel index table. */
-
- CalcPixelIndexTable( pbi );
-
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/hufftables.h b/Engine/lib/libtheora/lib/enc/hufftables.h
deleted file mode 100644
index eb4be22c2..000000000
--- a/Engine/lib/libtheora/lib/enc/hufftables.h
+++ /dev/null
@@ -1,1034 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: hufftables.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "../dec/huffman.h"
-#include "codec_internal.h"
-
-const unsigned char ExtraBitLengths_VP31[MAX_ENTROPY_TOKENS] = {
- 0, 0, 0, 2, 3, 4, 12,3, 6, /* EOB and Zero-run tokens. */
- 0, 0, 0, 0, /* Very low value tokens. */
- 1, 1, 1, 1, 2, 3, 4, 5, 6, 10, /* Other value tokens */
- 1, 1, 1, 1, 1, 3, 4, /* Category 1 runs. */
- 2, 3, /* Category 2 runs. */
-};
-
-#define NEW_FREQS 0 /* dbm - test new frequency tables */
-
-#if NEW_FREQS
-/* New baseline frequency tables for encoder version >= 2 */
-const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = {
- /* DC Intra bias */
- { 272, 84, 31, 36, 10, 2, 1, 92, 1,
- 701, 872, 410, 478,
- 630, 502, 417, 356, 582, 824, 985, 965, 697, 606,
- 125, 119, 40, 3, 9, 15, 10,
- 73, 37,
- },
- { 311, 107, 41, 51, 18, 4, 2, 120, 1,
- 824, 1037, 468, 541,
- 714, 555, 451, 374, 595, 819, 929, 817, 474, 220,
- 172, 142, 27, 4, 9, 10, 2,
- 98, 48,
- },
- { 353, 125, 49, 66, 24, 6, 2, 124, 1,
- 926, 1172, 512, 594,
- 766, 581, 458, 379, 590, 789, 849, 665, 306, 80,
- 204, 147, 25, 5, 12, 9, 2,
- 108, 54,
- },
- { 392, 141, 57, 75, 31, 7, 4, 138, 1,
- 1050, 1321, 559, 649,
- 806, 594, 460, 372, 568, 727, 710, 475, 155, 19,
- 251, 174, 27, 7, 16, 8, 2,
- 126, 62,
- },
- { 455, 168, 66, 87, 39, 10, 6, 124, 2,
- 1143, 1455, 592, 692,
- 824, 596, 453, 361, 542, 657, 592, 329, 78, 5,
- 269, 184, 27, 9, 19, 7, 2,
- 127, 66,
- },
- { 544, 201, 80, 102, 45, 11, 6, 99, 1,
- 1236, 1587, 610, 720,
- 833, 590, 444, 348, 506, 588, 487, 226, 39, 2,
- 253, 178, 27, 10, 20, 7, 2,
- 118, 65,
- },
- { 649, 241, 98, 121, 54, 14, 8, 84, 1,
- 1349, 1719, 634, 763,
- 847, 583, 428, 323, 456, 492, 349, 120, 13, 1,
- 231, 170, 24, 8, 19, 7, 1,
- 109, 67,
- },
- { 824, 304, 129, 158, 66, 19, 10, 44, 2,
- 1476, 1925, 644, 794,
- 838, 559, 396, 289, 392, 384, 223, 53, 3, 1,
- 159, 121, 17, 6, 16, 6, 2,
- 69, 53,
- },
-
- /* DC Inter Bias */
- { 534, 174, 71, 68, 10, 1, 1, 68, 119,
- 1674, 1526, 560, 536,
- 539, 331, 229, 168, 233, 262, 231, 149, 71, 51,
- 629, 530, 284, 126, 182, 208, 184,
- 148, 87,
- },
- { 594, 195, 77, 71, 9, 1, 1, 47, 89,
- 1723, 1592, 595, 570,
- 574, 351, 241, 176, 243, 271, 234, 144, 65, 37,
- 534, 449, 240, 117, 167, 277, 153,
- 96, 54,
- },
- { 642, 213, 88, 83, 12, 1, 1, 40, 80,
- 1751, 1630, 621, 600,
- 598, 367, 250, 183, 251, 276, 235, 143, 62, 28,
- 485, 397, 212, 110, 161, 193, 141,
- 84, 48,
- },
- { 693, 258, 114, 131, 27, 3, 1, 44, 79,
- 1794, 1644, 550, 533,
- 518, 314, 213, 154, 209, 223, 174, 97, 40, 14,
- 584, 463, 236, 138, 196, 249, 143,
- 94, 54,
- },
- { 758, 303, 144, 189, 53, 8, 1, 37, 69,
- 1842, 1732, 513, 504,
- 478, 287, 191, 137, 182, 186, 137, 72, 31, 6,
- 589, 469, 199, 128, 177, 264, 161,
- 89, 49,
- },
- { 817, 344, 170, 243, 84, 18, 2, 30, 65,
- 1836, 1733, 518, 511,
- 477, 281, 185, 130, 169, 166, 117, 59, 25, 3,
- 572, 450, 185, 121, 173, 232, 146,
- 80, 43,
- },
- { 865, 389, 204, 322, 139, 42, 9, 26, 51,
- 1848, 1766, 531, 522,
- 477, 275, 177, 122, 153, 144, 97, 50, 16, 1,
- 485, 378, 167, 115, 164, 203, 128,
- 74, 42,
- },
- { 961, 447, 243, 407, 196, 74, 26, 12, 34,
- 2003, 1942, 571, 565,
- 494, 278, 173, 116, 141, 129, 85, 44, 8, 1,
- 285, 223, 101, 66, 104, 120, 74,
- 35, 22,
- },
-
- /* AC INTRA Tables */
- /* AC Intra bias group 1 tables */
- { 245, 68, 25, 28, 5, 1, 1, 359, 4,
- 910, 904, 570, 571,
- 766, 620, 478, 375, 554, 684, 652, 441, 182, 30,
- 535, 206, 118, 77, 69, 90, 16,
- 299, 100,
- },
- { 302, 86, 32, 36, 8, 1, 1, 362, 3,
- 974, 968, 599, 599,
- 774, 635, 469, 365, 528, 628, 557, 337, 118, 14,
- 577, 219, 136, 82, 69, 65, 13,
- 317, 112,
- },
- { 348, 102, 39, 44, 9, 2, 1, 363, 3,
- 1062, 1055, 607, 609,
- 787, 626, 457, 348, 494, 550, 452, 233, 60, 2,
- 636, 244, 159, 92, 74, 68, 12,
- 327, 119,
- },
- { 400, 121, 47, 51, 11, 2, 1, 366, 3,
- 1109, 1102, 620, 622,
- 786, 624, 450, 331, 459, 490, 366, 163, 29, 1,
- 673, 257, 175, 98, 77, 63, 14,
- 344, 131,
- },
- { 470, 151, 59, 67, 15, 3, 1, 354, 4,
- 1198, 1189, 640, 643,
- 769, 603, 410, 294, 386, 381, 240, 78, 5, 1,
- 746, 282, 205, 113, 87, 64, 15,
- 368, 145,
- },
- { 553, 189, 77, 94, 24, 6, 1, 347, 4,
- 1244, 1232, 650, 653,
- 739, 551, 360, 249, 303, 261, 129, 24, 1, 1,
- 828, 313, 245, 135, 108, 77, 17,
- 403, 169,
- },
- { 701, 253, 109, 140, 42, 12, 2, 350, 6,
- 1210, 1197, 652, 647,
- 673, 495, 299, 189, 211, 151, 50, 2, 1, 1,
- 892, 336, 284, 162, 134, 101, 25,
- 455, 205,
- },
- { 924, 390, 180, 248, 85, 31, 13, 286, 14,
- 1242, 1206, 601, 577,
- 519, 342, 175, 100, 85, 36, 1, 1, 1, 1,
- 1031, 348, 346, 204, 166, 131, 34,
- 473, 197,
- },
- /* AC Inter bias group 1 tables */
- { 459, 128, 50, 48, 8, 1, 1, 224, 69,
- 1285, 1227, 587, 565,
- 573, 406, 261, 180, 228, 213, 130, 47, 11, 3,
- 1069, 540, 309, 231, 147, 279, 157,
- 383, 165,
- },
- { 524, 155, 62, 64, 14, 2, 1, 209, 63,
- 1345, 1288, 523, 507,
- 515, 358, 225, 153, 183, 160, 87, 29, 7, 2,
- 1151, 591, 365, 282, 179, 308, 133,
- 344, 157,
- },
- { 588, 181, 75, 81, 19, 3, 1, 204, 68,
- 1344, 1288, 517, 503,
- 505, 346, 216, 141, 169, 139, 71, 21, 5, 1,
- 1146, 584, 366, 286, 170, 298, 153,
- 342, 157,
- },
- { 634, 196, 82, 89, 22, 4, 1, 194, 60,
- 1356, 1312, 515, 502,
- 489, 331, 199, 127, 145, 111, 51, 14, 3, 1,
- 1156, 589, 393, 300, 182, 285, 144,
- 340, 159,
- },
- { 715, 231, 98, 113, 31, 7, 1, 181, 57,
- 1345, 1303, 498, 490,
- 448, 291, 166, 101, 106, 75, 30, 9, 1, 1,
- 1175, 584, 416, 321, 209, 333, 164,
- 330, 159,
- },
- { 825, 283, 125, 149, 44, 11, 2, 160, 59,
- 1343, 1308, 476, 469,
- 405, 247, 131, 75, 76, 47, 18, 5, 1, 1,
- 1192, 579, 432, 332, 217, 327, 176,
- 320, 154,
- },
- { 961, 361, 170, 215, 70, 20, 5, 161, 55,
- 1250, 1218, 463, 460,
- 354, 204, 101, 52, 48, 28, 11, 1, 1, 1,
- 1172, 570, 449, 350, 222, 332, 169,
- 338, 174,
- },
- { 1139, 506, 266, 387, 156, 57, 26, 114, 48,
- 1192, 1170, 366, 366,
- 226, 113, 47, 22, 22, 12, 1, 1, 1, 1,
- 1222, 551, 462, 391, 220, 322, 156,
- 290, 136,
- },
-
- /* AC Intra bias group 2 tables */
- { 245, 49, 15, 11, 1, 1, 1, 332, 38,
- 1163, 1162, 685, 683,
- 813, 623, 437, 318, 421, 424, 288, 109, 14, 1,
- 729, 303, 179, 112, 87, 199, 46,
- 364, 135,
- },
- { 305, 67, 22, 17, 2, 1, 1, 329, 39,
- 1250, 1245, 706, 705,
- 801, 584, 385, 267, 330, 296, 165, 40, 3, 1,
- 798, 340, 206, 131, 108, 258, 52,
- 382, 154,
- },
- { 356, 82, 28, 23, 3, 1, 1, 312, 42,
- 1340, 1334, 701, 703,
- 770, 545, 346, 227, 269, 223, 100, 17, 1, 1,
- 846, 359, 222, 142, 120, 284, 55,
- 379, 157,
- },
- { 402, 95, 33, 30, 4, 1, 1, 300, 43,
- 1379, 1371, 710, 714,
- 724, 486, 289, 182, 202, 144, 47, 5, 1, 1,
- 908, 394, 250, 161, 141, 350, 60,
- 391, 171,
- },
- { 499, 122, 44, 42, 7, 1, 1, 267, 45,
- 1439, 1436, 690, 694,
- 628, 385, 213, 122, 117, 62, 14, 1, 1, 1,
- 992, 441, 288, 187, 167, 446, 82,
- 378, 176,
- },
- { 641, 168, 62, 60, 12, 1, 1, 247, 49,
- 1435, 1436, 662, 669,
- 527, 298, 142, 71, 55, 22, 3, 1, 1, 1,
- 1036, 470, 319, 208, 193, 548, 106,
- 362, 184,
- },
- { 860, 274, 111, 113, 23, 4, 1, 229, 59,
- 1331, 1323, 629, 645,
- 419, 192, 72, 30, 19, 6, 1, 1, 1, 1,
- 1022, 478, 339, 225, 213, 690, 142,
- 342, 198,
- },
- { 1059, 437, 218, 285, 84, 17, 2, 152, 44,
- 1284, 1313, 530, 561,
- 212, 66, 17, 6, 3, 1, 1, 1, 1, 1,
- 1034, 485, 346, 226, 207, 819, 185,
- 248, 145,
- },
- /* AC Inter bias group 2 tables */
- { 407, 93, 31, 24, 2, 1, 1, 232, 108,
- 1365, 1349, 581, 578,
- 498, 305, 170, 100, 103, 67, 24, 5, 1, 1,
- 1175, 604, 393, 268, 209, 506, 217,
- 379, 193,
- },
- { 521, 129, 46, 39, 4, 1, 1, 199, 116,
- 1419, 1403, 543, 540,
- 446, 263, 138, 78, 75, 44, 13, 2, 1, 1,
- 1201, 605, 392, 267, 214, 533, 252,
- 334, 167,
- },
- { 575, 144, 52, 46, 6, 1, 1, 193, 124,
- 1394, 1384, 528, 528,
- 406, 227, 112, 59, 54, 28, 7, 1, 1, 1,
- 1210, 621, 412, 284, 235, 604, 265,
- 320, 167,
- },
- { 673, 174, 64, 59, 9, 1, 1, 177, 128,
- 1392, 1385, 499, 499,
- 352, 183, 85, 42, 35, 16, 3, 1, 1, 1,
- 1210, 626, 418, 289, 246, 675, 297,
- 292, 158,
- },
- { 804, 225, 85, 77, 12, 1, 1, 150, 129,
- 1387, 1384, 455, 455,
- 277, 129, 53, 23, 17, 7, 1, 1, 1, 1,
- 1212, 635, 433, 306, 268, 760, 313,
- 249, 137,
- },
- { 975, 305, 123, 117, 20, 2, 1, 135, 140,
- 1312, 1310, 401, 399,
- 201, 80, 28, 11, 8, 2, 1, 1, 1, 1,
- 1162, 623, 439, 314, 283, 906, 368,
- 203, 121,
- },
- { 1205, 452, 208, 231, 50, 6, 1, 123, 149,
- 1161, 1164, 370, 370,
- 137, 45, 14, 4, 2, 1, 1, 1, 1, 1,
- 1047, 562, 413, 300, 277, 1020, 404,
- 168, 105,
- },
- { 1297, 662, 389, 574, 200, 39, 4, 55, 120,
- 1069, 1076, 273, 265,
- 66, 14, 2, 1, 1, 1, 1, 1, 1, 1,
- 930, 475, 345, 249, 236, 1124, 376,
- 91, 56,
- },
-
- /* AC Intra bias group 3 tables */
- { 278, 55, 17, 12, 1, 1, 1, 288, 71,
- 1315, 1304, 725, 724,
- 733, 506, 307, 195, 225, 175, 77, 12, 1, 1,
- 904, 414, 246, 170, 126, 290, 205,
- 423, 185,
- },
- { 382, 80, 26, 21, 2, 1, 1, 239, 64,
- 1442, 1429, 706, 701,
- 664, 420, 239, 146, 152, 105, 34, 2, 1, 1,
- 975, 440, 263, 185, 140, 332, 229,
- 397, 169,
- },
- { 451, 97, 32, 27, 4, 1, 1, 223, 75,
- 1462, 1454, 682, 680,
- 574, 343, 179, 101, 98, 54, 9, 1, 1, 1,
- 1031, 482, 293, 210, 163, 400, 297,
- 384, 181,
- },
- { 551, 128, 43, 37, 5, 1, 1, 201, 78,
- 1497, 1487, 642, 651,
- 493, 269, 133, 70, 60, 24, 2, 1, 1, 1,
- 1065, 504, 312, 228, 178, 451, 352,
- 351, 174,
- },
- { 693, 179, 63, 54, 8, 1, 1, 169, 78,
- 1502, 1497, 580, 591,
- 375, 186, 77, 35, 21, 4, 1, 1, 1, 1,
- 1099, 533, 341, 253, 206, 542, 432,
- 306, 164,
- },
- { 867, 263, 105, 96, 16, 2, 1, 152, 81,
- 1435, 1439, 521, 525,
- 270, 107, 32, 8, 3, 1, 1, 1, 1, 1,
- 1085, 537, 361, 277, 223, 616, 549,
- 258, 156,
- },
- { 1022, 385, 182, 207, 46, 7, 1, 158, 88,
- 1290, 1318, 501, 502,
- 184, 38, 6, 1, 1, 1, 1, 1, 1, 1,
- 1023, 480, 345, 301, 232, 665, 661,
- 210, 133,
- },
- { 1184, 555, 307, 457, 185, 44, 6, 115, 41,
- 1236, 1253, 329, 340,
- 32, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1017, 385, 316, 370, 246, 672, 788,
- 85, 23,
- },
- /* AC Inter bias group 3 tables */
- { 502, 106, 33, 22, 1, 1, 1, 151, 132,
- 1446, 1451, 502, 499,
- 343, 181, 84, 42, 36, 16, 3, 1, 1, 1,
- 1211, 661, 429, 312, 242, 637, 498,
- 288, 156,
- },
- { 651, 147, 48, 35, 3, 1, 1, 145, 140,
- 1419, 1420, 469, 466,
- 281, 132, 56, 25, 18, 6, 1, 1, 1, 1,
- 1175, 656, 435, 328, 260, 715, 556,
- 252, 147,
- },
- { 749, 179, 59, 43, 4, 1, 1, 123, 135,
- 1423, 1431, 413, 409,
- 221, 95, 36, 15, 9, 2, 1, 1, 1, 1,
- 1159, 658, 444, 340, 272, 782, 656,
- 205, 124,
- },
- { 902, 243, 86, 67, 7, 1, 1, 114, 141,
- 1385, 1385, 387, 383,
- 178, 67, 22, 7, 4, 1, 1, 1, 1, 1,
- 1096, 632, 434, 339, 277, 813, 735,
- 171, 109,
- },
- { 1081, 337, 133, 112, 15, 1, 1, 92, 137,
- 1350, 1349, 311, 309,
- 115, 34, 8, 2, 1, 1, 1, 1, 1, 1,
- 1016, 595, 418, 342, 283, 870, 883,
- 114, 78,
- },
- { 1253, 467, 210, 205, 34, 3, 1, 80, 130,
- 1318, 1313, 258, 260,
- 68, 12, 2, 1, 1, 1, 1, 1, 1, 1,
- 874, 516, 378, 330, 273, 877, 1000,
- 72, 53,
- },
- { 1362, 626, 333, 423, 100, 10, 1, 73, 106,
- 1311, 1313, 241, 231,
- 31, 3, 1, 1, 1, 1, 1, 1, 1, 1,
- 620, 368, 286, 302, 245, 814, 1127,
- 34, 28,
- },
- { 1203, 743, 460, 774, 284, 36, 1, 13, 25,
- 1956, 1961, 103, 106,
- 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 248, 131, 149, 272, 165, 535, 813,
- 3, 3,
- },
-
- /* AC Intra bias group 4 tables */
- { 599, 150, 55, 50, 9, 1, 1, 181, 19,
- 1487, 1487, 625, 625,
- 473, 271, 138, 74, 71, 42, 11, 1, 1, 1,
- 1187, 591, 356, 239, 170, 351, 137,
- 395, 194,
- },
- { 758, 209, 79, 74, 15, 2, 1, 147, 25,
- 1514, 1514, 521, 520,
- 334, 165, 74, 36, 30, 11, 1, 1, 1, 1,
- 1252, 644, 409, 279, 211, 472, 203,
- 318, 171,
- },
- { 852, 252, 100, 98, 20, 3, 1, 130, 26,
- 1493, 1498, 481, 473,
- 268, 123, 51, 23, 15, 3, 1, 1, 1, 1,
- 1256, 652, 426, 294, 231, 543, 242,
- 278, 156,
- },
- { 971, 309, 130, 136, 30, 5, 1, 113, 28,
- 1458, 1467, 443, 435,
- 215, 90, 31, 12, 5, 1, 1, 1, 1, 1,
- 1232, 643, 426, 303, 243, 590, 300,
- 235, 136,
- },
- { 1100, 399, 180, 206, 53, 9, 1, 101, 29,
- 1419, 1425, 375, 374,
- 158, 47, 10, 1, 1, 1, 1, 1, 1, 1,
- 1193, 609, 426, 319, 256, 643, 383,
- 166, 103,
- },
- { 1195, 505, 249, 326, 98, 20, 3, 102, 25,
- 1370, 1356, 355, 347,
- 104, 11, 1, 1, 1, 1, 1, 1, 1, 1,
- 1100, 568, 381, 330, 261, 642, 466,
- 105, 69,
- },
- { 1176, 608, 345, 559, 244, 57, 6, 110, 9,
- 1370, 1332, 372, 367,
- 29, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 859, 427, 269, 359, 375, 608, 451,
- 35, 20,
- },
- { 1140, 613, 391, 797, 458, 180, 37, 2, 1,
- 2037, 1697, 95, 31,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 360, 49, 23, 198, 1001, 719, 160,
- 1, 1,
- },
- /* AC Inter bias group 4 tables */
- { 931, 272, 105, 96, 16, 1, 1, 91, 52,
- 1481, 1489, 347, 349,
- 174, 74, 28, 12, 8, 3, 1, 1, 1, 1,
- 1247, 719, 490, 356, 279, 706, 363,
- 187, 110,
- },
- { 1095, 358, 148, 143, 25, 3, 1, 74, 61,
- 1439, 1457, 304, 302,
- 127, 46, 15, 5, 3, 1, 1, 1, 1, 1,
- 1138, 664, 469, 347, 282, 768, 487,
- 139, 87,
- },
- { 1192, 423, 188, 189, 36, 4, 1, 64, 61,
- 1457, 1475, 284, 282,
- 106, 35, 10, 3, 1, 1, 1, 1, 1, 1,
- 1078, 624, 440, 329, 264, 744, 507,
- 117, 73,
- },
- { 1275, 496, 231, 258, 52, 6, 1, 53, 55,
- 1458, 1470, 248, 245,
- 77, 20, 5, 1, 1, 1, 1, 1, 1, 1,
- 984, 576, 414, 323, 260, 771, 569,
- 84, 54,
- },
- { 1377, 603, 302, 367, 87, 11, 1, 37, 52,
- 1522, 1532, 207, 204,
- 47, 8, 1, 1, 1, 1, 1, 1, 1, 1,
- 840, 493, 366, 291, 231, 690, 636,
- 52, 32,
- },
- { 1409, 708, 385, 529, 148, 24, 1, 23, 37,
- 1672, 1670, 163, 162,
- 22, 2, 1, 1, 1, 1, 1, 1, 1, 1,
- 647, 364, 291, 262, 210, 574, 643,
- 26, 14,
- },
- { 1348, 778, 481, 755, 245, 53, 4, 13, 19,
- 2114, 2089, 141, 139,
- 7, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 302, 183, 162, 181, 182, 344, 437,
- 8, 3,
- },
- { 1560, 769, 410, 664, 243, 58, 1, 1, 1,
- 3017, 2788, 17, 24,
- 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 34, 16, 8, 55, 134, 105, 86,
- 1, 1,
- },
-};
-
-#else /* Frequency tables for encoder version < 2 */
-
-const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = {
- /* DC Intra bias */
- { 198, 62, 22, 31, 14, 6, 6, 205, 3,
- 843, 843, 415, 516,
- 660, 509, 412, 347, 560, 779, 941, 930, 661, 377,
- 170, 155, 39, 2, 9, 15, 11,
- 128, 86,
- },
- { 299, 92, 34, 39, 15, 6, 6, 132, 1,
- 851, 851, 484, 485,
- 666, 514, 416, 351, 567, 788, 953, 943, 670, 383,
- 117, 119, 26, 4, 17, 7, 1,
- 93, 56,
- },
- { 367, 115, 42, 47, 16, 6, 6, 105, 1,
- 896, 896, 492, 493,
- 667, 510, 408, 342, 547, 760, 932, 927, 656, 379,
- 114, 103, 10, 3, 6, 2, 1,
- 88, 49,
- },
- { 462, 158, 63, 76, 28, 9, 8, 145, 1,
- 1140, 1140, 573, 574,
- 754, 562, 435, 357, 555, 742, 793, 588, 274, 81,
- 154, 117, 13, 6, 12, 2, 1,
- 104, 62,
- },
- { 558, 196, 81, 99, 36, 11, 9, 135, 1,
- 1300, 1301, 606, 607,
- 779, 560, 429, 349, 536, 680, 644, 405, 153, 30,
- 171, 120, 12, 5, 14, 3, 1,
- 104, 53,
- },
- { 635, 233, 100, 122, 46, 14, 12, 113, 1,
- 1414, 1415, 631, 631,
- 785, 555, 432, 335, 513, 611, 521, 284, 89, 13,
- 170, 113, 10, 5, 14, 3, 1,
- 102, 62,
- },
- { 720, 276, 119, 154, 62, 20, 16, 101, 1,
- 1583, 1583, 661, 661,
- 794, 556, 407, 318, 447, 472, 343, 153, 35, 1,
- 172, 115, 11, 7, 14, 3, 1,
- 112, 70,
- },
- { 853, 326, 144, 184, 80, 27, 19, 52, 1,
- 1739, 1740, 684, 685,
- 800, 540, 381, 277, 364, 352, 218, 78, 13, 1,
- 139, 109, 9, 6, 20, 2, 1,
- 94, 50,
- },
-
- /* DC Inter Bias */
- { 490, 154, 57, 53, 10, 2, 1, 238, 160,
- 1391, 1390, 579, 578,
- 491, 273, 172, 118, 152, 156, 127, 79, 41, 39,
- 712, 547, 316, 125, 183, 306, 237,
- 451, 358,
- },
- { 566, 184, 70, 65, 11, 2, 1, 235, 51,
- 1414, 1414, 599, 598,
- 510, 285, 180, 124, 157, 161, 131, 82, 42, 40,
- 738, 551, 322, 138, 195, 188, 93,
- 473, 365,
- },
- { 711, 261, 111, 126, 27, 4, 1, 137, 52,
- 1506, 1505, 645, 645,
- 567, 316, 199, 136, 172, 175, 142, 88, 45, 48,
- 548, 449, 255, 145, 184, 174, 121,
- 260, 227,
- },
- { 823, 319, 144, 175, 43, 7, 1, 53, 42,
- 1648, 1648, 653, 652,
- 583, 329, 205, 139, 175, 176, 139, 84, 44, 34,
- 467, 389, 211, 137, 181, 186, 107,
- 106, 85,
- },
- { 948, 411, 201, 276, 85, 16, 2, 39, 33,
- 1778, 1777, 584, 583,
- 489, 265, 162, 111, 140, 140, 108, 64, 38, 23,
- 428, 356, 201, 139, 186, 165, 94,
- 78, 63,
- },
- { 1002, 470, 248, 386, 153, 39, 6, 23, 23,
- 1866, 1866, 573, 573,
- 467, 249, 155, 103, 130, 128, 94, 60, 38, 14,
- 323, 263, 159, 111, 156, 153, 74,
- 46, 34,
- },
- { 1020, 518, 291, 504, 242, 78, 18, 14, 14,
- 1980, 1979, 527, 526,
- 408, 219, 132, 87, 110, 104, 79, 55, 31, 7,
- 265, 213, 129, 91, 131, 111, 50,
- 31, 20,
- },
- { 1018, 544, 320, 591, 338, 139, 47, 5, 2,
- 2123, 2123, 548, 547,
- 414, 212, 126, 83, 101, 96, 79, 60, 23, 1,
- 120, 97, 55, 39, 60, 38, 15,
- 11, 8,
- },
-
- /* AC INTRA Tables */
- /* AC Intra bias group 1 tables */
- { 242, 62, 22, 20, 4, 1, 1, 438, 1,
- 593, 593, 489, 490,
- 657, 580, 471, 374, 599, 783, 869, 770, 491, 279,
- 358, 144, 82, 54, 49, 70, 5,
- 289, 107,
- },
- { 317, 95, 38, 41, 8, 1, 1, 479, 1,
- 653, 654, 500, 501,
- 682, 611, 473, 376, 582, 762, 806, 656, 358, 155,
- 419, 162, 86, 58, 36, 34, 1,
- 315, 126,
- },
- { 382, 121, 49, 59, 15, 3, 1, 496, 1,
- 674, 674, 553, 554,
- 755, 636, 487, 391, 576, 718, 701, 488, 221, 72,
- 448, 161, 107, 56, 37, 29, 1,
- 362, 156,
- },
- { 415, 138, 57, 73, 21, 5, 1, 528, 1,
- 742, 741, 562, 563,
- 753, 669, 492, 388, 563, 664, 589, 340, 129, 26,
- 496, 184, 139, 71, 48, 33, 2,
- 387, 166,
- },
- { 496, 170, 73, 94, 31, 8, 2, 513, 1,
- 855, 855, 604, 604,
- 769, 662, 477, 356, 486, 526, 381, 183, 51, 5,
- 590, 214, 160, 85, 60, 39, 3,
- 427, 203,
- },
- { 589, 207, 89, 116, 40, 13, 3, 491, 1,
- 919, 919, 631, 631,
- 769, 633, 432, 308, 408, 378, 247, 94, 17, 1,
- 659, 247, 201, 105, 73, 51, 3,
- 466, 242,
- },
- { 727, 266, 115, 151, 49, 17, 6, 439, 1,
- 977, 977, 642, 642,
- 718, 572, 379, 243, 285, 251, 133, 40, 1, 1,
- 756, 287, 253, 126, 94, 66, 4,
- 492, 280,
- },
- { 940, 392, 180, 247, 82, 30, 14, 343, 1,
- 1064, 1064, 615, 616,
- 596, 414, 235, 146, 149, 108, 41, 1, 1, 1,
- 882, 314, 346, 172, 125, 83, 6,
- 489, 291,
- },
- /* AC Inter bias group 1 tables */
- { 440, 102, 33, 23, 2, 1, 1, 465, 85,
- 852, 852, 744, 743,
- 701, 496, 297, 193, 225, 200, 129, 58, 18, 2,
- 798, 450, 269, 202, 145, 308, 154,
- 646, 389,
- },
- { 592, 151, 53, 43, 6, 1, 1, 409, 34,
- 875, 875, 748, 747,
- 723, 510, 305, 196, 229, 201, 130, 59, 18, 2,
- 800, 436, 253, 185, 115, 194, 88,
- 642, 368,
- },
- { 759, 222, 86, 85, 17, 2, 1, 376, 46,
- 888, 888, 689, 688,
- 578, 408, 228, 143, 165, 141, 84, 35, 7, 1,
- 878, 488, 321, 244, 147, 266, 124,
- 612, 367,
- },
- { 912, 298, 122, 133, 34, 7, 1, 261, 44,
- 1092, 1091, 496, 496,
- 409, 269, 150, 95, 106, 87, 49, 16, 1, 1,
- 1102, 602, 428, 335, 193, 323, 157,
- 423, 253,
- },
- { 1072, 400, 180, 210, 60, 16, 3, 210, 40,
- 1063, 1063, 451, 451,
- 345, 221, 121, 73, 79, 64, 31, 6, 1, 1,
- 1105, 608, 462, 358, 202, 330, 155,
- 377, 228,
- },
- { 1164, 503, 254, 330, 109, 34, 9, 167, 35,
- 1038, 1037, 390, 390,
- 278, 170, 89, 54, 56, 40, 13, 1, 1, 1,
- 1110, 607, 492, 401, 218, 343, 141,
- 323, 192,
- },
- { 1173, 583, 321, 486, 196, 68, 23, 124, 23,
- 1037, 1037, 347, 346,
- 232, 139, 69, 40, 37, 20, 2, 1, 1, 1,
- 1128, 584, 506, 410, 199, 301, 113,
- 283, 159,
- },
- { 1023, 591, 366, 699, 441, 228, 113, 79, 5,
- 1056, 1056, 291, 291,
- 173, 96, 38, 19, 8, 1, 1, 1, 1, 1,
- 1187, 527, 498, 409, 147, 210, 56,
- 263, 117,
- },
-
- /* AC Intra bias group 2 tables */
- { 311, 74, 27, 27, 5, 1, 1, 470, 24,
- 665, 667, 637, 638,
- 806, 687, 524, 402, 585, 679, 609, 364, 127, 20,
- 448, 210, 131, 76, 52, 111, 19,
- 393, 195,
- },
- { 416, 104, 39, 38, 8, 1, 1, 545, 33,
- 730, 731, 692, 692,
- 866, 705, 501, 365, 495, 512, 387, 168, 39, 2,
- 517, 240, 154, 86, 64, 127, 19,
- 461, 247,
- },
- { 474, 117, 43, 42, 9, 1, 1, 560, 40,
- 783, 783, 759, 760,
- 883, 698, 466, 318, 404, 377, 215, 66, 7, 1,
- 559, 259, 176, 110, 87, 170, 22,
- 520, 278,
- },
- { 582, 149, 53, 53, 12, 2, 1, 473, 39,
- 992, 993, 712, 713,
- 792, 593, 373, 257, 299, 237, 114, 25, 1, 1,
- 710, 329, 221, 143, 116, 226, 26,
- 490, 259,
- },
- { 744, 210, 78, 77, 16, 2, 1, 417, 37,
- 1034, 1035, 728, 728,
- 718, 509, 296, 175, 184, 122, 42, 3, 1, 1,
- 791, 363, 255, 168, 145, 311, 35,
- 492, 272,
- },
- { 913, 291, 121, 128, 28, 4, 1, 334, 40,
- 1083, 1084, 711, 712,
- 624, 378, 191, 107, 95, 50, 7, 1, 1, 1,
- 876, 414, 288, 180, 164, 382, 39,
- 469, 275,
- },
- { 1065, 405, 184, 216, 53, 8, 1, 236, 36,
- 1134, 1134, 685, 686,
- 465, 253, 113, 48, 41, 9, 1, 1, 1, 1,
- 965, 451, 309, 179, 166, 429, 53,
- 414, 249,
- },
- { 1148, 548, 301, 438, 160, 42, 6, 84, 17,
- 1222, 1223, 574, 575,
- 272, 111, 23, 6, 2, 1, 1, 1, 1, 1,
- 1060, 502, 328, 159, 144, 501, 54,
- 302, 183,
- },
- /* AC Inter bias group 2 tables */
- { 403, 80, 24, 17, 1, 1, 1, 480, 90,
- 899, 899, 820, 819,
- 667, 413, 228, 133, 139, 98, 42, 10, 1, 1,
- 865, 470, 316, 222, 171, 419, 213,
- 645, 400,
- },
- { 698, 169, 59, 49, 6, 1, 1, 414, 101,
- 894, 893, 761, 761,
- 561, 338, 171, 96, 97, 64, 26, 6, 1, 1,
- 896, 494, 343, 239, 192, 493, 215,
- 583, 366,
- },
- { 914, 255, 94, 80, 10, 1, 1, 345, 128,
- 935, 935, 670, 671,
- 415, 222, 105, 55, 51, 30, 10, 1, 1, 1,
- 954, 530, 377, 274, 232, 641, 295,
- 456, 298,
- },
- { 1103, 359, 146, 135, 20, 1, 1, 235, 119,
- 1042, 1042, 508, 507,
- 293, 146, 65, 33, 30, 16, 4, 1, 1, 1,
- 1031, 561, 407, 296, 265, 813, 317,
- 301, 192,
- },
- { 1255, 504, 238, 265, 51, 5, 1, 185, 113,
- 1013, 1013, 437, 438,
- 212, 92, 41, 18, 15, 6, 1, 1, 1, 1,
- 976, 530, 386, 276, 260, 927, 357,
- 224, 148,
- },
- { 1292, 610, 332, 460, 127, 16, 1, 136, 99,
- 1014, 1015, 384, 384,
- 153, 65, 25, 11, 6, 1, 1, 1, 1, 1,
- 942, 487, 343, 241, 238, 970, 358,
- 174, 103,
- },
- { 1219, 655, 407, 700, 280, 55, 2, 100, 60,
- 1029, 1029, 337, 336,
- 119, 43, 11, 3, 2, 1, 1, 1, 1, 1,
- 894, 448, 305, 199, 213, 1005, 320,
- 136, 77,
- },
- { 1099, 675, 435, 971, 581, 168, 12, 37, 16,
- 1181, 1081, 319, 318,
- 66, 11, 6, 1, 1, 1, 1, 1, 1, 1,
- 914, 370, 235, 138, 145, 949, 128,
- 94, 41,
- },
-
- /* AC Intra bias group 3 tables */
- { 486, 112, 39, 34, 6, 1, 1, 541, 67,
- 819, 818, 762, 763,
- 813, 643, 403, 280, 332, 295, 164, 53, 6, 1,
- 632, 294, 180, 131, 105, 208, 109,
- 594, 295,
- },
- { 723, 191, 69, 65, 12, 1, 1, 445, 79,
- 865, 865, 816, 816,
- 750, 515, 290, 172, 184, 122, 46, 5, 1, 1,
- 740, 340, 213, 165, 129, 270, 168,
- 603, 326,
- },
- { 884, 264, 102, 103, 21, 3, 1, 382, 68,
- 897, 897, 836, 836,
- 684, 427, 227, 119, 119, 70, 16, 1, 1, 1,
- 771, 367, 234, 184, 143, 272, 178,
- 555, 326,
- },
- { 1028, 347, 153, 161, 36, 8, 1, 251, 44,
- 1083, 1084, 735, 735,
- 541, 289, 144, 77, 57, 23, 3, 1, 1, 1,
- 926, 422, 270, 215, 176, 301, 183,
- 443, 248,
- },
- { 1155, 465, 224, 264, 71, 14, 3, 174, 27,
- 1110, 1111, 730, 731,
- 429, 206, 79, 30, 19, 4, 1, 1, 1, 1,
- 929, 443, 279, 225, 194, 298, 196,
- 354, 223,
- },
- { 1191, 576, 296, 415, 144, 36, 8, 114, 16,
- 1162, 1162, 749, 749,
- 338, 108, 29, 8, 5, 1, 1, 1, 1, 1,
- 947, 458, 273, 207, 194, 248, 145,
- 258, 152,
- },
- { 1169, 619, 366, 603, 247, 92, 23, 46, 1,
- 1236, 1236, 774, 775,
- 191, 35, 14, 1, 1, 1, 1, 1, 1, 1,
- 913, 449, 260, 214, 194, 180, 82,
- 174, 98,
- },
- { 1006, 537, 381, 897, 504, 266, 101, 39, 1,
- 1307, 1307, 668, 667,
- 116, 3, 1, 1, 1, 1, 1, 1, 1, 1,
- 1175, 261, 295, 70, 164, 107, 31,
- 10, 76,
- },
- /* AC Inter bias group 3 tables */
- { 652, 156, 53, 43, 5, 1, 1, 368, 128,
- 983, 984, 825, 825,
- 583, 331, 163, 88, 84, 48, 15, 1, 1, 1,
- 870, 480, 316, 228, 179, 421, 244,
- 562, 349,
- },
- { 988, 280, 104, 87, 12, 1, 1, 282, 194,
- 980, 981, 738, 739,
- 395, 189, 80, 37, 31, 12, 2, 1, 1, 1,
- 862, 489, 333, 262, 214, 600, 446,
- 390, 260,
- },
- { 1176, 399, 165, 154, 24, 2, 1, 218, 224,
- 1017, 1018, 651, 651,
- 280, 111, 42, 16, 9, 3, 1, 1, 1, 1,
- 787, 469, 324, 269, 229, 686, 603,
- 267, 194,
- },
- { 1319, 530, 255, 268, 47, 4, 1, 113, 183,
- 1149, 1150, 461, 461,
- 173, 58, 17, 5, 3, 1, 1, 1, 1, 1,
- 768, 450, 305, 261, 221, 716, 835,
- 136, 97,
- },
- { 1362, 669, 355, 465, 104, 9, 1, 76, 153,
- 1253, 1253, 398, 397,
- 102, 21, 5, 1, 1, 1, 1, 1, 1, 1,
- 596, 371, 238, 228, 196, 660, 954,
- 68, 53,
- },
- { 1354, 741, 446, 702, 174, 15, 1, 38, 87,
- 1498, 1498, 294, 294,
- 43, 7, 1, 1, 1, 1, 1, 1, 1, 1,
- 381, 283, 165, 181, 155, 544, 1039,
- 25, 21,
- },
- { 1262, 885, 546, 947, 263, 18, 1, 18, 27,
- 1908, 1908, 163, 162,
- 14, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 195, 152, 83, 125, 109, 361, 827,
- 7, 5,
- },
- { 2539, 951, 369, 554, 212, 18, 1, 1, 1,
- 2290, 2289, 64, 64,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 18, 18, 9, 55, 36, 184, 323,
- 1, 1,
- },
-
-
- /* AC Intra bias group 4 tables */
- { 921, 264, 101, 100, 19, 2, 1, 331, 98,
- 1015, 1016, 799, 799,
- 512, 269, 119, 60, 50, 17, 1, 1, 1, 1,
- 841, 442, 307, 222, 182, 493, 256,
- 438, 310,
- },
- { 1147, 412, 184, 206, 50, 6, 1, 242, 141,
- 977, 976, 808, 807,
- 377, 135, 40, 10, 7, 1, 1, 1, 1, 1,
- 788, 402, 308, 223, 205, 584, 406,
- 316, 227,
- },
- { 1243, 504, 238, 310, 79, 11, 1, 184, 150,
- 983, 984, 814, 813,
- 285, 56, 10, 1, 1, 1, 1, 1, 1, 1,
- 713, 377, 287, 217, 180, 615, 558,
- 208, 164,
- },
- { 1266, 606, 329, 484, 161, 27, 1, 79, 92,
- 1187, 1188, 589, 588,
- 103, 10, 1, 1, 1, 1, 1, 1, 1, 1,
- 680, 371, 278, 221, 244, 614, 728,
- 80, 62,
- },
- { 1126, 828, 435, 705, 443, 90, 8, 10, 55,
- 1220, 1219, 350, 350,
- 28, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 602, 330, 222, 168, 158, 612, 919,
- 104, 5,
- },
- { 1210, 506, 1014, 926, 474, 240, 4, 1, 44,
- 1801, 1801, 171, 171,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 900, 132, 36, 11, 47, 191, 316,
- 2, 1,
- },
- { 1210, 506, 1014, 926, 474, 240, 4, 1, 44,
- 1801, 1801, 171, 171,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 900, 132, 36, 11, 47, 191, 316,
- 2, 1,
- },
- { 1210, 506, 1014, 926, 474, 240, 4, 1, 44,
- 1801, 1801, 171, 171,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 900, 132, 36, 11, 47, 191, 316,
- 2, 1,
- },
- /* AC Inter bias group 4 tables */
- { 1064, 325, 129, 117, 20, 2, 1, 266, 121,
- 1000, 1000, 706, 706,
- 348, 162, 67, 32, 25, 11, 1, 1, 1, 1,
- 876, 513, 363, 274, 225, 627, 384,
- 370, 251,
- },
- { 1311, 517, 238, 254, 45, 3, 1, 188, 160,
- 1070, 1070, 635, 635,
- 239, 85, 30, 11, 6, 1, 1, 1, 1, 1,
- 744, 420, 313, 239, 206, 649, 541,
- 221, 155,
- },
- { 1394, 632, 322, 385, 78, 7, 1, 134, 152,
- 1163, 1164, 607, 607,
- 185, 51, 12, 3, 1, 1, 1, 1, 1, 1,
- 631, 331, 275, 203, 182, 604, 620,
- 146, 98,
- },
- { 1410, 727, 407, 546, 146, 19, 1, 67, 88,
- 1485, 1486, 419, 418,
- 103, 18, 3, 1, 1, 1, 1, 1, 1, 1,
- 555, 261, 234, 164, 148, 522, 654,
- 67, 39,
- },
- { 1423, 822, 492, 719, 216, 22, 1, 28, 59,
- 1793, 1793, 323, 324,
- 37, 2, 1, 1, 1, 1, 1, 1, 1, 1,
- 376, 138, 158, 102, 119, 400, 604,
- 28, 9,
- },
- { 1585, 923, 563, 918, 207, 25, 1, 5, 20,
- 2229, 2230, 172, 172,
- 7, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 191, 40, 56, 22, 65, 243, 312,
- 2, 1,
- },
- { 2225, 1100, 408, 608, 133, 8, 1, 1, 1,
- 2658, 2658, 25, 24,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 8, 1, 1, 1, 1, 125, 16,
- 1, 1,
- },
- { 2539, 951, 369, 554, 212, 18, 1, 1, 1,
- 2290, 2289, 64, 64,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 18, 18, 9, 55, 36, 184, 323,
- 1, 1,
- },
-};
-
-#endif /* NEW_FREQS */
diff --git a/Engine/lib/libtheora/lib/enc/mcomp.c b/Engine/lib/libtheora/lib/enc/mcomp.c
deleted file mode 100644
index 3b6b4ac28..000000000
--- a/Engine/lib/libtheora/lib/enc/mcomp.c
+++ /dev/null
@@ -1,767 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: mcomp.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-
-/* Initialises motion compentsation. */
-void InitMotionCompensation ( CP_INSTANCE *cpi ){
- int i;
- int SearchSite=0;
- int Len;
- int LineStepY = (ogg_int32_t)cpi->pb.YStride;
-
- Len=((MAX_MV_EXTENT/2)+1)/2;
-
-
- /* How many search stages are there. */
- cpi->MVSearchSteps = 0;
-
- /* Set up offsets arrays used in half pixel correction. */
- cpi->HalfPixelRef2Offset[0] = -LineStepY - 1;
- cpi->HalfPixelRef2Offset[1] = -LineStepY;
- cpi->HalfPixelRef2Offset[2] = -LineStepY + 1;
- cpi->HalfPixelRef2Offset[3] = - 1;
- cpi->HalfPixelRef2Offset[4] = 0;
- cpi->HalfPixelRef2Offset[5] = 1;
- cpi->HalfPixelRef2Offset[6] = LineStepY - 1;
- cpi->HalfPixelRef2Offset[7] = LineStepY;
- cpi->HalfPixelRef2Offset[8] = LineStepY + 1;
-
- cpi->HalfPixelXOffset[0] = -1;
- cpi->HalfPixelXOffset[1] = 0;
- cpi->HalfPixelXOffset[2] = 1;
- cpi->HalfPixelXOffset[3] = -1;
- cpi->HalfPixelXOffset[4] = 0;
- cpi->HalfPixelXOffset[5] = 1;
- cpi->HalfPixelXOffset[6] = -1;
- cpi->HalfPixelXOffset[7] = 0;
- cpi->HalfPixelXOffset[8] = 1;
-
- cpi->HalfPixelYOffset[0] = -1;
- cpi->HalfPixelYOffset[1] = -1;
- cpi->HalfPixelYOffset[2] = -1;
- cpi->HalfPixelYOffset[3] = 0;
- cpi->HalfPixelYOffset[4] = 0;
- cpi->HalfPixelYOffset[5] = 0;
- cpi->HalfPixelYOffset[6] = 1;
- cpi->HalfPixelYOffset[7] = 1;
- cpi->HalfPixelYOffset[8] = 1;
-
-
- /* Generate offsets for 8 search sites per step. */
- while ( Len>0 ) {
- /* Another step. */
- cpi->MVSearchSteps += 1;
-
- /* Compute offsets for search sites. */
- cpi->MVOffsetX[SearchSite] = -Len;
- cpi->MVOffsetY[SearchSite++] = -Len;
- cpi->MVOffsetX[SearchSite] = 0;
- cpi->MVOffsetY[SearchSite++] = -Len;
- cpi->MVOffsetX[SearchSite] = Len;
- cpi->MVOffsetY[SearchSite++] = -Len;
- cpi->MVOffsetX[SearchSite] = -Len;
- cpi->MVOffsetY[SearchSite++] = 0;
- cpi->MVOffsetX[SearchSite] = Len;
- cpi->MVOffsetY[SearchSite++] = 0;
- cpi->MVOffsetX[SearchSite] = -Len;
- cpi->MVOffsetY[SearchSite++] = Len;
- cpi->MVOffsetX[SearchSite] = 0;
- cpi->MVOffsetY[SearchSite++] = Len;
- cpi->MVOffsetX[SearchSite] = Len;
- cpi->MVOffsetY[SearchSite++] = Len;
-
- /* Contract. */
- Len /= 2;
- }
-
- /* Compute pixel index offsets. */
- for ( i=SearchSite-1; i>=0; i-- )
- cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i];
-}
-
-static ogg_uint32_t GetInterErr (CP_INSTANCE *cpi, unsigned char * NewDataPtr,
- unsigned char * RefDataPtr1,
- unsigned char * RefDataPtr2,
- ogg_uint32_t PixelsPerLine ) {
- ogg_int32_t DiffVal;
- ogg_int32_t RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
- ogg_uint32_t RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA;
-
- /* Mode of interpolation chosen based upon on the offset of the
- second reference pointer */
- if ( RefOffset == 0 ) {
- DiffVal = dsp_inter8x8_err (cpi->dsp, NewDataPtr, PixelsPerLine,
- RefDataPtr1, RefPixelsPerLine);
- }else{
- DiffVal = dsp_inter8x8_err_xy2 (cpi->dsp, NewDataPtr, PixelsPerLine,
- RefDataPtr1,
- RefDataPtr2, RefPixelsPerLine);
- }
-
- /* Compute and return population variance as mis-match metric. */
- return DiffVal;
-}
-
-static ogg_uint32_t GetHalfPixelSumAbsDiffs (CP_INSTANCE *cpi,
- unsigned char * SrcData,
- unsigned char * RefDataPtr1,
- unsigned char * RefDataPtr2,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ErrorSoFar,
- ogg_uint32_t BestSoFar ) {
-
- ogg_uint32_t DiffVal = ErrorSoFar;
- ogg_int32_t RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
- ogg_uint32_t RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA;
-
- if ( RefOffset == 0 ) {
- /* Simple case as for non 0.5 pixel */
- DiffVal += dsp_sad8x8 (cpi->dsp, SrcData, PixelsPerLine,
- RefDataPtr1, RefPixelsPerLine);
- } else {
- DiffVal += dsp_sad8x8_xy2_thres (cpi->dsp, SrcData, PixelsPerLine,
- RefDataPtr1,
- RefDataPtr2, RefPixelsPerLine, BestSoFar);
- }
-
- return DiffVal;
-}
-
-ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine ) {
- ogg_uint32_t LocalFragIndex = FragIndex;
- ogg_uint32_t IntraError = 0;
-
- dsp_save_fpu (cpi->dsp);
-
- /* Add together the intra errors for those blocks in the macro block
- that are coded (Y only) */
- if ( cpi->pb.display_fragments[LocalFragIndex] )
- IntraError +=
- dsp_intra8x8_err (cpi->dsp, &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
- PixelsPerLine);
-
- LocalFragIndex++;
- if ( cpi->pb.display_fragments[LocalFragIndex] )
- IntraError +=
- dsp_intra8x8_err (cpi->dsp, &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
- PixelsPerLine);
-
- LocalFragIndex = FragIndex + cpi->pb.HFragments;
- if ( cpi->pb.display_fragments[LocalFragIndex] )
- IntraError +=
- dsp_intra8x8_err (cpi->dsp, &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
- PixelsPerLine);
-
- LocalFragIndex++;
- if ( cpi->pb.display_fragments[LocalFragIndex] )
- IntraError +=
- dsp_intra8x8_err (cpi->dsp, &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
- PixelsPerLine);
-
- dsp_restore_fpu (cpi->dsp);
-
- return IntraError;
-}
-
-ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
- unsigned char * SrcPtr,
- unsigned char * RefPtr,
- ogg_uint32_t FragIndex,
- ogg_int32_t LastXMV,
- ogg_int32_t LastYMV,
- ogg_uint32_t PixelsPerLine ) {
- ogg_uint32_t RefPixelsPerLine = cpi->pb.YStride;
- ogg_uint32_t LocalFragIndex = FragIndex;
- ogg_int32_t PixelIndex;
- ogg_int32_t RefPixelIndex;
- ogg_int32_t RefPixelOffset;
- ogg_int32_t RefPtr2Offset;
-
- ogg_uint32_t InterError = 0;
-
- unsigned char * SrcPtr1;
- unsigned char * RefPtr1;
-
- dsp_save_fpu (cpi->dsp);
-
- /* Work out pixel offset into source buffer. */
- PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
-
- /* Work out the pixel offset in reference buffer for the default
- motion vector */
- RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
- RefPixelOffset = ((LastYMV/2) * RefPixelsPerLine) + (LastXMV/2);
-
- /* Work out the second reference pointer offset. */
- RefPtr2Offset = 0;
- if ( LastXMV % 2 ) {
- if ( LastXMV > 0 )
- RefPtr2Offset += 1;
- else
- RefPtr2Offset -= 1;
- }
- if ( LastYMV % 2 ) {
- if ( LastYMV > 0 )
- RefPtr2Offset += RefPixelsPerLine;
- else
- RefPtr2Offset -= RefPixelsPerLine;
- }
-
- /* Add together the errors for those blocks in the macro block that
- are coded (Y only) */
- if ( cpi->pb.display_fragments[LocalFragIndex] ) {
- SrcPtr1 = &SrcPtr[PixelIndex];
- RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
- InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
- &RefPtr1[RefPtr2Offset], PixelsPerLine );
- }
-
- LocalFragIndex++;
- if ( cpi->pb.display_fragments[LocalFragIndex] ) {
- PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
- RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
- SrcPtr1 = &SrcPtr[PixelIndex];
- RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
- InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
- &RefPtr1[RefPtr2Offset], PixelsPerLine );
-
- }
-
- LocalFragIndex = FragIndex + cpi->pb.HFragments;
- if ( cpi->pb.display_fragments[LocalFragIndex] ) {
- PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
- RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
- SrcPtr1 = &SrcPtr[PixelIndex];
- RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
- InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
- &RefPtr1[RefPtr2Offset], PixelsPerLine );
- }
-
- LocalFragIndex++;
- if ( cpi->pb.display_fragments[LocalFragIndex] ) {
- PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
- RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
- SrcPtr1 = &SrcPtr[PixelIndex];
- RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
- InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
- &RefPtr1[RefPtr2Offset], PixelsPerLine );
- }
-
- dsp_restore_fpu (cpi->dsp);
-
- return InterError;
-}
-
-ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- ogg_int32_t *MVPixelOffset,
- MOTION_VECTOR *MV ) {
- ogg_uint32_t Error = 0;
- ogg_uint32_t MinError;
- ogg_uint32_t InterMVError = 0;
-
- ogg_int32_t i;
- ogg_int32_t x=0, y=0;
- ogg_int32_t step;
- ogg_int32_t SearchSite=0;
-
- unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
- unsigned char *RefPtr=NULL;
- unsigned char *CandidateBlockPtr=NULL;
- unsigned char *BestBlockPtr=NULL;
-
- ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8;
-
- int MBlockDispFrags[4];
-
- /* Half pixel variables */
- ogg_int32_t HalfPixelError;
- ogg_int32_t BestHalfPixelError;
- unsigned char BestHalfOffset;
- unsigned char * RefDataPtr1;
- unsigned char * RefDataPtr2;
-
- dsp_save_fpu (cpi->dsp);
-
- /* Note which of the four blocks in the macro block are to be
- included in the search. */
- MBlockDispFrags[0] =
- cpi->pb.display_fragments[FragIndex];
- MBlockDispFrags[1] =
- cpi->pb.display_fragments[FragIndex + 1];
- MBlockDispFrags[2] =
- cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments];
- MBlockDispFrags[3] =
- cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
-
- /* Set up the source pointers for the four source blocks. */
- SrcPtr[0] = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
- SrcPtr[1] = SrcPtr[0] + 8;
- SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
- SrcPtr[3] = SrcPtr[2] + 8;
-
- /* Set starting reference point for search. */
- RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
-
- /* Check the 0,0 candidate. */
- if ( MBlockDispFrags[0] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, RefPtr,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[1] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, RefPtr + 8,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[2] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, RefPtr + RefRow2Offset,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[3] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, RefPtr + RefRow2Offset + 8,
- PixelsPerLine + STRIDE_EXTRA);
- }
-
- /* Set starting values to results of 0, 0 vector. */
- MinError = Error;
- BestBlockPtr = RefPtr;
- x = 0;
- y = 0;
- MV->x = 0;
- MV->y = 0;
-
- /* Proceed through N-steps. */
- for ( step=0; stepMVSearchSteps; step++ ) {
- /* Search the 8-neighbours at distance pertinent to current step.*/
- for ( i=0; i<8; i++ ) {
- /* Set pointer to next candidate matching block. */
- CandidateBlockPtr = RefPtr + MVPixelOffset[SearchSite];
-
- /* Reset error */
- Error = 0;
-
- /* Get the score for the current offset */
- if ( MBlockDispFrags[0] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr,
- PixelsPerLine + STRIDE_EXTRA);
- }
-
- if ( MBlockDispFrags[1] && (Error < MinError) ) {
- Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8,
- PixelsPerLine + STRIDE_EXTRA, MinError);
- }
-
- if ( MBlockDispFrags[2] && (Error < MinError) ) {
- Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset,
- PixelsPerLine + STRIDE_EXTRA, MinError);
- }
-
- if ( MBlockDispFrags[3] && (Error < MinError) ) {
- Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8,
- PixelsPerLine + STRIDE_EXTRA, MinError);
- }
-
- if ( Error < MinError ) {
- /* Remember best match. */
- MinError = Error;
- BestBlockPtr = CandidateBlockPtr;
-
- /* Where is it. */
- x = MV->x + cpi->MVOffsetX[SearchSite];
- y = MV->y + cpi->MVOffsetY[SearchSite];
- }
-
- /* Move to next search location. */
- SearchSite += 1;
- }
-
- /* Move to best location this step. */
- RefPtr = BestBlockPtr;
- MV->x = x;
- MV->y = y;
- }
-
- /* Factor vectors to 1/2 pixel resoultion. */
- MV->x = (MV->x * 2);
- MV->y = (MV->y * 2);
-
- /* Now do the half pixel pass */
- BestHalfOffset = 4; /* Default to the no offset case. */
- BestHalfPixelError = MinError;
-
- /* Get the half pixel error for each half pixel offset */
- for ( i=0; i < 9; i++ ) {
- HalfPixelError = 0;
-
- if ( MBlockDispFrags[0] ) {
- RefDataPtr1 = BestBlockPtr;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + 8;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + RefRow2Offset;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( HalfPixelError < BestHalfPixelError ) {
- BestHalfOffset = (unsigned char)i;
- BestHalfPixelError = HalfPixelError;
- }
- }
-
- /* Half pixel adjust the MV */
- MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
- MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
- /* Get the error score for the chosen 1/2 pixel offset as a variance. */
- InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
- FragIndex, MV->x, MV->y, PixelsPerLine );
-
- dsp_restore_fpu (cpi->dsp);
-
- /* Return score of best matching block. */
- return InterMVError;
-}
-
-ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- MOTION_VECTOR *MV ) {
- ogg_uint32_t Error = 0;
- ogg_uint32_t MinError = HUGE_ERROR;
- ogg_uint32_t InterMVError = 0;
-
- ogg_int32_t i, j;
- ogg_int32_t x=0, y=0;
-
- unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
- unsigned char *RefPtr;
- unsigned char *CandidateBlockPtr=NULL;
- unsigned char *BestBlockPtr=NULL;
-
- ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8;
-
- int MBlockDispFrags[4];
-
- /* Half pixel variables */
- ogg_int32_t HalfPixelError;
- ogg_int32_t BestHalfPixelError;
- unsigned char BestHalfOffset;
- unsigned char * RefDataPtr1;
- unsigned char * RefDataPtr2;
-
- dsp_save_fpu (cpi->dsp);
-
- /* Note which of the four blocks in the macro block are to be
- included in the search. */
- MBlockDispFrags[0] = cpi->
- pb.display_fragments[FragIndex];
- MBlockDispFrags[1] = cpi->
- pb.display_fragments[FragIndex + 1];
- MBlockDispFrags[2] = cpi->
- pb.display_fragments[FragIndex + cpi->pb.HFragments];
- MBlockDispFrags[3] = cpi->
- pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
-
- /* Set up the source pointers for the four source blocks. */
- SrcPtr[0] = &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
- SrcPtr[1] = SrcPtr[0] + 8;
- SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
- SrcPtr[3] = SrcPtr[2] + 8;
-
- RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
- RefPtr = RefPtr - ((MAX_MV_EXTENT/2) * cpi->
- pb.YStride) - (MAX_MV_EXTENT/2);
-
- /* Search each pixel alligned site */
- for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
- /* Starting position in row */
- CandidateBlockPtr = RefPtr;
-
- for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ) {
- /* Reset error */
- Error = 0;
-
- /* Summ errors for each block. */
- if ( MBlockDispFrags[0] ) {
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[1] ){
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[2] ){
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset,
- PixelsPerLine + STRIDE_EXTRA);
- }
- if ( MBlockDispFrags[3] ){
- Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8,
- PixelsPerLine + STRIDE_EXTRA);
- }
-
- /* Was this the best so far */
- if ( Error < MinError ) {
- MinError = Error;
- BestBlockPtr = CandidateBlockPtr;
- x = 16 + j - MAX_MV_EXTENT;
- y = 16 + i - MAX_MV_EXTENT;
- }
-
- /* Move the the next site */
- CandidateBlockPtr ++;
- }
-
- /* Move on to the next row. */
- RefPtr += cpi->pb.YStride;
-
- }
-
- /* Factor vectors to 1/2 pixel resoultion. */
- MV->x = (x * 2);
- MV->y = (y * 2);
-
- /* Now do the half pixel pass */
- BestHalfOffset = 4; /* Default to the no offset case. */
- BestHalfPixelError = MinError;
-
- /* Get the half pixel error for each half pixel offset */
- for ( i=0; i < 9; i++ ) {
- HalfPixelError = 0;
-
- if ( MBlockDispFrags[0] ) {
- RefDataPtr1 = BestBlockPtr;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + 8;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + RefRow2Offset;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
- RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
- RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2,
- PixelsPerLine, HalfPixelError, BestHalfPixelError );
- }
-
- if ( HalfPixelError < BestHalfPixelError ){
- BestHalfOffset = (unsigned char)i;
- BestHalfPixelError = HalfPixelError;
- }
- }
-
- /* Half pixel adjust the MV */
- MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
- MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
- /* Get the error score for the chosen 1/2 pixel offset as a variance. */
- InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
- FragIndex, MV->x, MV->y, PixelsPerLine );
-
- dsp_restore_fpu (cpi->dsp);
-
- /* Return score of best matching block. */
- return InterMVError;
-}
-
-static ogg_uint32_t GetBMVExhaustiveSearch (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- MOTION_VECTOR *MV ) {
- ogg_uint32_t Error = 0;
- ogg_uint32_t MinError = HUGE_ERROR;
- ogg_uint32_t InterMVError = 0;
-
- ogg_int32_t i, j;
- ogg_int32_t x=0, y=0;
-
- unsigned char *SrcPtr = NULL;
- unsigned char *RefPtr;
- unsigned char *CandidateBlockPtr=NULL;
- unsigned char *BestBlockPtr=NULL;
-
- /* Half pixel variables */
- ogg_int32_t HalfPixelError;
- ogg_int32_t BestHalfPixelError;
- unsigned char BestHalfOffset;
- unsigned char * RefDataPtr2;
-
- /* Set up the source pointer for the block. */
- SrcPtr = &cpi->
- ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-
- RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
- RefPtr = RefPtr - ((MAX_MV_EXTENT/2) *
- cpi->pb.YStride) - (MAX_MV_EXTENT/2);
-
- /* Search each pixel alligned site */
- for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
- /* Starting position in row */
- CandidateBlockPtr = RefPtr;
-
- for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ){
- /* Get the block error score. */
- Error = dsp_sad8x8 (cpi->dsp, SrcPtr, PixelsPerLine, CandidateBlockPtr,
- PixelsPerLine + STRIDE_EXTRA);
-
- /* Was this the best so far */
- if ( Error < MinError ) {
- MinError = Error;
- BestBlockPtr = CandidateBlockPtr;
- x = 16 + j - MAX_MV_EXTENT;
- y = 16 + i - MAX_MV_EXTENT;
- }
-
- /* Move the the next site */
- CandidateBlockPtr ++;
- }
-
- /* Move on to the next row. */
- RefPtr += cpi->pb.YStride;
- }
-
- /* Factor vectors to 1/2 pixel resoultion. */
- MV->x = (x * 2);
- MV->y = (y * 2);
-
- /* Now do the half pixel pass */
- BestHalfOffset = 4; /* Default to the no offset case. */
- BestHalfPixelError = MinError;
-
- /* Get the half pixel error for each half pixel offset */
- for ( i=0; i < 9; i++ ) {
- RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[i];
- HalfPixelError =
- GetHalfPixelSumAbsDiffs(cpi, SrcPtr, BestBlockPtr, RefDataPtr2,
- PixelsPerLine, 0, BestHalfPixelError );
-
- if ( HalfPixelError < BestHalfPixelError ){
- BestHalfOffset = (unsigned char)i;
- BestHalfPixelError = HalfPixelError;
- }
- }
-
- /* Half pixel adjust the MV */
- MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
- MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
- /* Get the variance score at the chosen offset */
- RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[BestHalfOffset];
-
- InterMVError =
- GetInterErr(cpi, SrcPtr, BestBlockPtr, RefDataPtr2, PixelsPerLine );
-
- /* Return score of best matching block. */
- return InterMVError;
-}
-
-ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
- unsigned char * RefFramePtr,
- ogg_uint32_t FragIndex,
- ogg_uint32_t PixelsPerLine,
- MOTION_VECTOR *MV ) {
- ogg_uint32_t InterMVError;
-
- dsp_save_fpu (cpi->dsp);
-
- /* For the moment the 4MV mode is only deemed to be valid
- if all four Y blocks are to be updated */
- /* This may be adapted later. */
- if ( cpi->pb.display_fragments[FragIndex] &&
- cpi->pb.display_fragments[FragIndex + 1] &&
- cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments] &&
- cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1] ) {
-
- /* Reset the error score. */
- InterMVError = 0;
-
- /* Get the error component from each coded block */
- InterMVError +=
- GetBMVExhaustiveSearch(cpi, RefFramePtr, FragIndex,
- PixelsPerLine, &(MV[0]) );
- InterMVError +=
- GetBMVExhaustiveSearch(cpi, RefFramePtr, (FragIndex + 1),
- PixelsPerLine, &(MV[1]) );
- InterMVError +=
- GetBMVExhaustiveSearch(cpi, RefFramePtr,
- (FragIndex + cpi->pb.HFragments),
- PixelsPerLine, &(MV[2]) );
- InterMVError +=
- GetBMVExhaustiveSearch(cpi, RefFramePtr,
- (FragIndex + cpi->pb.HFragments + 1),
- PixelsPerLine, &(MV[3]) );
- }else{
- InterMVError = HUGE_ERROR;
- }
-
- dsp_restore_fpu (cpi->dsp);
-
- /* Return score of best matching block. */
- return InterMVError;
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/misc_common.c b/Engine/lib/libtheora/lib/enc/misc_common.c
deleted file mode 100644
index 1536a494a..000000000
--- a/Engine/lib/libtheora/lib/enc/misc_common.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: misc_common.c 15323 2008-09-19 19:43:59Z giles $
-
- ********************************************************************/
-
-#include
-#include "codec_internal.h"
-#include "block_inline.h"
-
-#define FIXED_Q 150
-#define MAX_UP_REG_LOOPS 2
-
-/* Gives the initial bytes per block estimate for each Q value */
-static const double BpbTable[Q_TABLE_SIZE] = {
- 0.42, 0.45, 0.46, 0.49, 0.51, 0.53, 0.56, 0.58,
- 0.61, 0.64, 0.68, 0.71, 0.74, 0.77, 0.80, 0.84,
- 0.89, 0.92, 0.98, 1.01, 1.04, 1.13, 1.17, 1.23,
- 1.28, 1.34, 1.41, 1.45, 1.51, 1.59, 1.69, 1.80,
- 1.84, 1.94, 2.02, 2.15, 2.23, 2.34, 2.44, 2.50,
- 2.69, 2.80, 2.87, 3.04, 3.16, 3.29, 3.59, 3.66,
- 3.86, 3.94, 4.22, 4.50, 4.64, 4.70, 5.24, 5.34,
- 5.61, 5.87, 6.11, 6.41, 6.71, 6.99, 7.36, 7.69
-};
-
-static const double KfBpbTable[Q_TABLE_SIZE] = {
- 0.74, 0.81, 0.88, 0.94, 1.00, 1.06, 1.14, 1.19,
- 1.27, 1.34, 1.42, 1.49, 1.54, 1.59, 1.66, 1.73,
- 1.80, 1.87, 1.97, 2.01, 2.08, 2.21, 2.25, 2.36,
- 2.39, 2.50, 2.55, 2.65, 2.71, 2.82, 2.95, 3.01,
- 3.11, 3.19, 3.31, 3.42, 3.58, 3.66, 3.78, 3.89,
- 4.11, 4.26, 4.36, 4.39, 4.63, 4.76, 4.85, 5.04,
- 5.26, 5.29, 5.47, 5.64, 5.76, 6.05, 6.35, 6.67,
- 6.91, 7.17, 7.40, 7.56, 8.02, 8.45, 8.86, 9.38
-};
-
-double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ ){
- ogg_uint32_t i;
- ogg_int32_t ThreshTableIndex = Q_TABLE_SIZE - 1;
- double BytesPerBlock;
-
- /* Search for the Q table index that matches the given Q. */
- for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
- if ( TargetQ >= cpi->pb.QThreshTable[i] ) {
- ThreshTableIndex = i;
- break;
- }
- }
-
- /* Adjust according to Q shift and type of frame */
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- /* Get primary prediction */
- BytesPerBlock = KfBpbTable[ThreshTableIndex];
- } else {
- /* Get primary prediction */
- BytesPerBlock = BpbTable[ThreshTableIndex];
- BytesPerBlock = BytesPerBlock * cpi->BpbCorrectionFactor;
- }
-
- return BytesPerBlock;
-}
-
-static void UpRegulateMB( CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
- ogg_uint32_t SB, ogg_uint32_t MB, int NoCheck ) {
- ogg_int32_t FragIndex;
- ogg_uint32_t B;
-
- /* Variables used in calculating corresponding row,col and index in
- UV planes */
- ogg_uint32_t UVRow;
- ogg_uint32_t UVColumn;
- ogg_uint32_t UVFragOffset;
-
- /* There may be MB's lying out of frame which must be ignored. For
- these MB's Top left block will have a negative Fragment Index. */
- if ( QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB ) >= 0 ) {
- /* Up regulate the component blocks Y then UV. */
- for ( B=0; B<4; B++ ){
- FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
- if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
- ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ){
- cpi->pb.display_fragments[FragIndex] = 1;
- cpi->extra_fragments[FragIndex] = 1;
- cpi->FragmentLastQ[FragIndex] = RegulationQ;
- cpi->MotionScore++;
- }
- }
-
- /* Check the two UV blocks */
- FragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB );
-
- UVRow = (FragIndex / (cpi->pb.HFragments * 2));
- UVColumn = (FragIndex % cpi->pb.HFragments) / 2;
- UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
-
- FragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
- if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
- ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
- cpi->pb.display_fragments[FragIndex] = 1;
- cpi->extra_fragments[FragIndex] = 1;
- cpi->FragmentLastQ[FragIndex] = RegulationQ;
- cpi->MotionScore++;
- }
-
- FragIndex += cpi->pb.UVPlaneFragments;
- if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
- ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
- cpi->pb.display_fragments[FragIndex] = 1;
- cpi->extra_fragments[FragIndex] = 1;
- cpi->FragmentLastQ[FragIndex] = RegulationQ;
- cpi->MotionScore++;
- }
- }
-}
-
-static void UpRegulateBlocks (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
- ogg_int32_t RecoveryBlocks,
- ogg_uint32_t * LastSB, ogg_uint32_t * LastMB ) {
-
- ogg_uint32_t LoopTimesRound = 0;
- ogg_uint32_t MaxSB = cpi->pb.YSBRows *
- cpi->pb.YSBCols; /* Tot super blocks in image */
- ogg_uint32_t SB, MB; /* Super-Block and macro block indices. */
-
- /* First scan for blocks for which a residue update is outstanding. */
- while ( (cpi->MotionScore < RecoveryBlocks) &&
- (LoopTimesRound < MAX_UP_REG_LOOPS) ) {
- LoopTimesRound++;
-
- for ( SB = (*LastSB); SB < MaxSB; SB++ ) {
- /* Check its four Macro-Blocks */
- for ( MB=(*LastMB); MB<4; MB++ ) {
- /* Mark relevant blocks for update */
- UpRegulateMB( cpi, RegulationQ, SB, MB, 0 );
-
- /* Keep track of the last refresh MB. */
- (*LastMB) += 1;
- if ( (*LastMB) == 4 )
- (*LastMB) = 0;
-
- /* Termination clause */
- if (cpi->MotionScore >= RecoveryBlocks) {
- /* Make sure we don't stall at SB level */
- if ( *LastMB == 0 )
- SB++;
- break;
- }
- }
-
- /* Termination clause */
- if (cpi->MotionScore >= RecoveryBlocks)
- break;
- }
-
- /* Update super block start index */
- if ( SB >= MaxSB){
- (*LastSB) = 0;
- }else{
- (*LastSB) = SB;
- }
- }
-}
-
-void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
- ogg_int32_t RecoveryBlocks ) {
- ogg_uint32_t LastPassMBPos = 0;
- ogg_uint32_t StdLastMBPos = 0;
-
- ogg_uint32_t MaxSB = cpi->pb.YSBRows *
- cpi->pb.YSBCols; /* Tot super blocks in image */
-
- ogg_uint32_t SB=0; /* Super-Block index */
- ogg_uint32_t MB; /* Macro-Block index */
-
- /* Decduct the number of blocks in an MB / 2 from the recover block count.
- This will compensate for the fact that once we start checking an MB
- we test every block in that macro block */
- if ( RecoveryBlocks > 3 )
- RecoveryBlocks -= 3;
-
- /* Up regulate blocks last coded at higher Q */
- UpRegulateBlocks( cpi, RegulationQ, RecoveryBlocks,
- &cpi->LastEndSB, &StdLastMBPos );
-
- /* If we have still not used up the minimum number of blocks and are
- at the minimum Q then run through a final pass of the data to
- insure that each block gets a final refresh. */
- if ( (RegulationQ == VERY_BEST_Q) &&
- (cpi->MotionScore < RecoveryBlocks) ) {
- if ( cpi->FinalPassLastPos < MaxSB ) {
- for ( SB = cpi->FinalPassLastPos; SB < MaxSB; SB++ ) {
- /* Check its four Macro-Blocks */
- for ( MB=LastPassMBPos; MB<4; MB++ ) {
- /* Mark relevant blocks for update */
- UpRegulateMB( cpi, RegulationQ, SB, MB, 1 );
-
- /* Keep track of the last refresh MB. */
- LastPassMBPos += 1;
- if ( LastPassMBPos == 4 ) {
- LastPassMBPos = 0;
-
- /* Increment SB index */
- cpi->FinalPassLastPos += 1;
- }
-
- /* Termination clause */
- if (cpi->MotionScore >= RecoveryBlocks)
- break;
- }
-
- /* Termination clause */
- if (cpi->MotionScore >= RecoveryBlocks)
- break;
-
- }
- }
- }
-}
-
-void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore ) {
- double PredUnitScoreBytes;
- ogg_uint32_t QIndex = Q_TABLE_SIZE - 1;
- ogg_uint32_t i;
-
- if ( UpdateScore > 0 ) {
- double TargetUnitScoreBytes = (double)cpi->ThisFrameTargetBytes /
- (double)UpdateScore;
- double LastBitError = 10000.0; /* Silly high number */
- /* Search for the best Q for the target bitrate. */
- for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
- PredUnitScoreBytes = GetEstimatedBpb( cpi, cpi->pb.QThreshTable[i] );
- if ( PredUnitScoreBytes > TargetUnitScoreBytes ) {
- if ( (PredUnitScoreBytes - TargetUnitScoreBytes) <= LastBitError ) {
- QIndex = i;
- } else {
- QIndex = i - 1;
- }
- break;
- } else {
- LastBitError = TargetUnitScoreBytes - PredUnitScoreBytes;
- }
- }
- }
-
- /* QIndex should now indicate the optimal Q. */
- cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[QIndex];
-
- /* Apply range restrictions for key frames. */
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- if ( cpi->pb.ThisFrameQualityValue > cpi->pb.QThreshTable[20] )
- cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[20];
- else if ( cpi->pb.ThisFrameQualityValue < cpi->pb.QThreshTable[50] )
- cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[50];
- }
-
- /* Limit the Q value to the maximum available value */
- if (cpi->pb.ThisFrameQualityValue >
- cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) {
- cpi->pb.ThisFrameQualityValue =
- (ogg_uint32_t)cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ];
- }
-
- if(cpi->FixedQ) {
- if ( cpi->pb.FrameType == KEY_FRAME ) {
- cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[43];
- cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
- } else {
- cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
- }
- }
-
- /* If the quantizer value has changed then re-initialise it */
- if ( cpi->pb.ThisFrameQualityValue != cpi->pb.LastFrameQualityValue ) {
- /* Initialise quality tables. */
- UpdateQC( cpi, cpi->pb.ThisFrameQualityValue );
- cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
- }
-}
-
-void CopyBackExtraFrags(CP_INSTANCE *cpi){
- ogg_uint32_t i,j;
- unsigned char * SrcPtr;
- unsigned char * DestPtr;
- ogg_uint32_t PlaneLineStep;
- ogg_uint32_t PixelIndex;
-
- /* Copy back for Y plane. */
- PlaneLineStep = cpi->pb.info.width;
- for ( i = 0; i < cpi->pb.YPlaneFragments; i++ ) {
- /* We are only interested in updated fragments. */
- if ( cpi->extra_fragments[i] ) {
- /* Get the start index for the fragment. */
- PixelIndex = cpi->pb.pixel_index_table[i];
- SrcPtr = &cpi->yuv1ptr[PixelIndex];
- DestPtr = &cpi->ConvDestBuffer[PixelIndex];
-
- for ( j = 0; j < VFRAGPIXELS; j++ ) {
- memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
-
- SrcPtr += PlaneLineStep;
- DestPtr += PlaneLineStep;
- }
- }
- }
-
- /* Now the U and V planes */
- PlaneLineStep = cpi->pb.info.width / 2;
- for ( i = cpi->pb.YPlaneFragments;
- i < (cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments)) ;
- i++ ) {
-
- /* We are only interested in updated fragments. */
- if ( cpi->extra_fragments[i] ) {
- /* Get the start index for the fragment. */
- PixelIndex = cpi->pb.pixel_index_table[i];
- SrcPtr = &cpi->yuv1ptr[PixelIndex];
- DestPtr = &cpi->ConvDestBuffer[PixelIndex];
-
- for ( j = 0; j < VFRAGPIXELS; j++ ) {
- memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
- SrcPtr += PlaneLineStep;
- DestPtr += PlaneLineStep;
- }
- }
- }
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/pb.c b/Engine/lib/libtheora/lib/enc/pb.c
deleted file mode 100644
index 42047249a..000000000
--- a/Engine/lib/libtheora/lib/enc/pb.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: pb.c 14372 2008-01-05 23:52:28Z giles $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-
-void ClearTmpBuffers(PB_INSTANCE * pbi){
-
- if(pbi->ReconDataBuffer)
- _ogg_free(pbi->ReconDataBuffer);
- if(pbi->DequantBuffer)
- _ogg_free(pbi->DequantBuffer);
- if(pbi->TmpDataBuffer)
- _ogg_free(pbi->TmpDataBuffer);
- if(pbi->TmpReconBuffer)
- _ogg_free(pbi->TmpReconBuffer);
-
-
- pbi->ReconDataBuffer=0;
- pbi->DequantBuffer = 0;
- pbi->TmpDataBuffer = 0;
- pbi->TmpReconBuffer = 0;
-
-}
-
-void InitTmpBuffers(PB_INSTANCE * pbi){
-
- /* clear any existing info */
- ClearTmpBuffers(pbi);
-
- /* Adjust the position of all of our temporary */
- pbi->ReconDataBuffer =
- _ogg_malloc(64*sizeof(*pbi->ReconDataBuffer));
-
- pbi->DequantBuffer =
- _ogg_malloc(64 * sizeof(*pbi->DequantBuffer));
-
- pbi->TmpDataBuffer =
- _ogg_malloc(64 * sizeof(*pbi->TmpDataBuffer));
-
- pbi->TmpReconBuffer =
- _ogg_malloc(64 * sizeof(*pbi->TmpReconBuffer));
-
-}
-
-void ClearPBInstance(PB_INSTANCE *pbi){
- if(pbi){
- ClearTmpBuffers(pbi);
- if (pbi->opb) {
- _ogg_free(pbi->opb);
- }
- }
-}
-
-void InitPBInstance(PB_INSTANCE *pbi){
- /* initialize whole structure to 0 */
- memset(pbi, 0, sizeof(*pbi));
-
- InitTmpBuffers(pbi);
-
- /* allocate memory for the oggpack_buffer */
- pbi->opb = _ogg_malloc(sizeof(oggpack_buffer));
-
- /* variables needing initialization (not being set to 0) */
-
- pbi->ModifierPointer[0] = &pbi->Modifier[0][255];
- pbi->ModifierPointer[1] = &pbi->Modifier[1][255];
- pbi->ModifierPointer[2] = &pbi->Modifier[2][255];
- pbi->ModifierPointer[3] = &pbi->Modifier[3][255];
-
- pbi->DecoderErrorCode = 0;
- pbi->KeyFrameType = DCT_KEY_FRAME;
- pbi->FramesHaveBeenSkipped = 0;
-}
diff --git a/Engine/lib/libtheora/lib/enc/pp.c b/Engine/lib/libtheora/lib/enc/pp.c
deleted file mode 100644
index c45289703..000000000
--- a/Engine/lib/libtheora/lib/enc/pp.c
+++ /dev/null
@@ -1,951 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: pp.c 15057 2008-06-22 21:07:32Z xiphmont $
-
- ********************************************************************/
-
-#include
-#include
-#include "codec_internal.h"
-#include "pp.h"
-#include "dsp.h"
-
-#define MAX(a, b) ((a>b)?a:b)
-#define MIN(a, b) ((aScanPixelIndexTable) _ogg_free(ppi->ScanPixelIndexTable);
- ppi->ScanPixelIndexTable=0;
-
- if(ppi->ScanDisplayFragments) _ogg_free(ppi->ScanDisplayFragments);
- ppi->ScanDisplayFragments=0;
-
- for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
- if(ppi->PrevFragments[i]){
- _ogg_free(ppi->PrevFragments[i]);
- ppi->PrevFragments[i]=0;
- }
-
- if(ppi->FragScores) _ogg_free(ppi->FragScores);
- ppi->FragScores=0;
-
- if(ppi->SameGreyDirPixels) _ogg_free(ppi->SameGreyDirPixels);
- ppi->SameGreyDirPixels=0;
-
- if(ppi->FragDiffPixels) _ogg_free(ppi->FragDiffPixels);
- ppi->FragDiffPixels=0;
-
- if(ppi->BarBlockMap) _ogg_free(ppi->BarBlockMap);
- ppi->BarBlockMap=0;
-
- if(ppi->TmpCodedMap) _ogg_free(ppi->TmpCodedMap);
- ppi->TmpCodedMap=0;
-
- if(ppi->RowChangedPixels) _ogg_free(ppi->RowChangedPixels);
- ppi->RowChangedPixels=0;
-
- if(ppi->PixelScores) _ogg_free(ppi->PixelScores);
- ppi->PixelScores=0;
-
- if(ppi->PixelChangedMap) _ogg_free(ppi->PixelChangedMap);
- ppi->PixelChangedMap=0;
-
- if(ppi->ChLocals) _ogg_free(ppi->ChLocals);
- ppi->ChLocals=0;
-
- if(ppi->yuv_differences) _ogg_free(ppi->yuv_differences);
- ppi->yuv_differences=0;
-
-}
-
-void PInitFrameInfo(PP_INSTANCE * ppi){
- int i;
- PClearFrameInfo(ppi);
-
- ppi->ScanPixelIndexTable =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanPixelIndexTable));
-
- ppi->ScanDisplayFragments =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanDisplayFragments));
-
- for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
- ppi->PrevFragments[i] =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->PrevFragments));
-
- ppi->FragScores =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
-
- ppi->SameGreyDirPixels =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->SameGreyDirPixels));
-
- ppi->FragDiffPixels =
- _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
-
- ppi->BarBlockMap=
- _ogg_malloc(3 * ppi->ScanHFragments*sizeof(*ppi->BarBlockMap));
-
- ppi->TmpCodedMap =
- _ogg_malloc(ppi->ScanHFragments*sizeof(*ppi->TmpCodedMap));
-
- ppi->RowChangedPixels =
- _ogg_malloc(3 * ppi->ScanConfig.VideoFrameHeight*
- sizeof(*ppi->RowChangedPixels));
-
- ppi->PixelScores =
- _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
- sizeof(*ppi->PixelScores) * PSCORE_CB_ROWS);
-
- ppi->PixelChangedMap =
- _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
- sizeof(*ppi->PixelChangedMap) * PMAP_CB_ROWS);
-
- ppi->ChLocals =
- _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
- sizeof(*ppi->ChLocals) * CHLOCALS_CB_ROWS);
-
- ppi->yuv_differences =
- _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
- sizeof(*ppi->yuv_differences) * YDIFF_CB_ROWS);
-}
-
-void ClearPPInstance(PP_INSTANCE *ppi){
- PClearFrameInfo(ppi);
-}
-
-
-void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs){
-
- memset(ppi,0,sizeof(*ppi));
-
- memcpy(&ppi->dsp, funcs, sizeof(DspFunctions));
-
- /* Initializations */
- ppi->PrevFrameLimit = 3; /* Must not exceed MAX_PREV_FRAMES (Note
- that this number includes the current
- frame so "1 = no effect") */
-
- /* Scan control variables. */
- ppi->HFragPixels = 8;
- ppi->VFragPixels = 8;
-
- ppi->SRFGreyThresh = 4;
- ppi->SRFColThresh = 5;
- ppi->NoiseSupLevel = 3;
- ppi->SgcLevelThresh = 3;
- ppi->SuvcLevelThresh = 4;
-
- /* Variables controlling S.A.D. breakouts. */
- ppi->GrpLowSadThresh = 10;
- ppi->GrpHighSadThresh = 64;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 16; /* (Default values for 8x8 blocks). */
-
- ppi->UVBlockThreshCorrection = 1.25;
- ppi->UVSgcCorrection = 1.5;
-
- ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
-}
-
-static void DeringBlockStrong(unsigned char *SrcPtr,
- unsigned char *DstPtr,
- ogg_int32_t Pitch,
- ogg_uint32_t FragQIndex,
- const ogg_uint32_t *QuantScale){
-
- ogg_int16_t UDMod[72];
- ogg_int16_t LRMod[72];
- unsigned int j,k,l;
- const unsigned char * Src;
- unsigned int QValue = QuantScale[FragQIndex];
-
- unsigned char p;
- unsigned char pl;
- unsigned char pr;
- unsigned char pu;
- unsigned char pd;
-
- int al;
- int ar;
- int au;
- int ad;
-
- int atot;
- int B;
- int newVal;
-
- const unsigned char *curRow = SrcPtr - 1; /* avoid negative array indexes */
- unsigned char *dstRow = DstPtr;
- const unsigned char *lastRow = SrcPtr-Pitch;
- const unsigned char *nextRow = SrcPtr+Pitch;
-
- unsigned int rowOffset = 0;
- unsigned int round = (1<<6);
-
- int High;
- int Low;
- int TmpMod;
-
- int Sharpen = SharpenModifier[FragQIndex];
- High = 3 * QValue;
- if(High>32)High=32;
- Low = 0;
-
-
- /* Initialize the Mod Data */
- Src = SrcPtr-Pitch;
- for(k=0;k<9;k++){
- for(j=0;j<8;j++){
-
- TmpMod = 32 + QValue - (abs(Src[j+Pitch]-Src[j]));
-
- if(TmpMod< -64)
- TmpMod = Sharpen;
-
- else if(TmpModHigh)
- TmpMod = High;
-
- UDMod[k*8+j] = (ogg_int16_t)TmpMod;
- }
- Src +=Pitch;
- }
-
- Src = SrcPtr-1;
-
- for(k=0;k<8;k++){
- for(j=0;j<9;j++){
- TmpMod = 32 + QValue - (abs(Src[j+1]-Src[j]));
-
- if(TmpMod< -64 )
- TmpMod = Sharpen;
-
- else if(TmpMod<0)
- TmpMod = Low;
-
- else if(TmpMod>High)
- TmpMod = High;
-
- LRMod[k*9+j] = (ogg_int16_t)TmpMod;
- }
- Src+=Pitch;
- }
-
- for(k=0;k<8;k++){
- /* In the case that this function called with same buffer for
- source and destination, To keep the c and the mmx version to have
- consistant results, intermediate buffer is used to store the
- eight pixel value before writing them to destination
- (i.e. Overwriting souce for the speical case) */
- for(l=0;l<8;l++){
-
- atot = 128;
- B = round;
- p = curRow[ rowOffset +l +1];
-
- pl = curRow[ rowOffset +l];
- al = LRMod[k*9+l];
- atot -= al;
- B += al * pl;
-
- pu = lastRow[ rowOffset +l];
- au = UDMod[k*8+l];
- atot -= au;
- B += au * pu;
-
- pd = nextRow[ rowOffset +l];
- ad = UDMod[(k+1)*8+l];
- atot -= ad;
- B += ad * pd;
-
- pr = curRow[ rowOffset +l+2];
- ar = LRMod[k*9+l+1];
- atot -= ar;
- B += ar * pr;
-
- newVal = ( atot * p + B) >> 7;
-
- dstRow[ rowOffset +l]= clamp255( newVal );
- }
- rowOffset += Pitch;
- }
-}
-
-static void DeringBlockWeak(unsigned char *SrcPtr,
- unsigned char *DstPtr,
- ogg_int32_t Pitch,
- ogg_uint32_t FragQIndex,
- const ogg_uint32_t *QuantScale){
-
- ogg_int16_t UDMod[72];
- ogg_int16_t LRMod[72];
- unsigned int j,k;
- const unsigned char * Src;
- unsigned int QValue = QuantScale[FragQIndex];
-
- unsigned char p;
- unsigned char pl;
- unsigned char pr;
- unsigned char pu;
- unsigned char pd;
-
- int al;
- int ar;
- int au;
- int ad;
-
- int atot;
- int B;
- int newVal;
-
- const unsigned char *curRow = SrcPtr-1;
- unsigned char *dstRow = DstPtr;
- const unsigned char *lastRow = SrcPtr-Pitch;
- const unsigned char *nextRow = SrcPtr+Pitch;
-
- unsigned int rowOffset = 0;
- unsigned int round = (1<<6);
-
- int High;
- int Low;
- int TmpMod;
- int Sharpen = SharpenModifier[FragQIndex];
-
- High = 3 * QValue;
- if(High>24)
- High=24;
- Low = 0 ;
-
- /* Initialize the Mod Data */
- Src=SrcPtr-Pitch;
- for(k=0;k<9;k++) {
- for(j=0;j<8;j++) {
-
- TmpMod = 32 + QValue - 2*(abs(Src[j+Pitch]-Src[j]));
-
- if(TmpMod< -64)
- TmpMod = Sharpen;
-
- else if(TmpModHigh)
- TmpMod = High;
-
- UDMod[k*8+j] = (ogg_int16_t)TmpMod;
- }
- Src +=Pitch;
- }
-
- Src = SrcPtr-1;
-
- for(k=0;k<8;k++){
- for(j=0;j<9;j++){
- TmpMod = 32 + QValue - 2*(abs(Src[j+1]-Src[j]));
-
- if(TmpMod< -64 )
- TmpMod = Sharpen;
-
- else if(TmpModHigh)
- TmpMod = High;
-
- LRMod[k*9+j] = (ogg_int16_t)TmpMod;
- }
- Src+=Pitch;
- }
-
- for(k=0;k<8;k++) {
- for(j=0;j<8;j++){
- atot = 128;
- B = round;
- p = curRow[ rowOffset +j+1];
-
- pl = curRow[ rowOffset +j];
- al = LRMod[k*9+j];
- atot -= al;
- B += al * pl;
-
- pu = lastRow[ rowOffset +j];
- au = UDMod[k*8+j];
- atot -= au;
- B += au * pu;
-
- pd = nextRow[ rowOffset +j];
- ad = UDMod[(k+1)*8+j];
- atot -= ad;
- B += ad * pd;
-
- pr = curRow[ rowOffset +j+2];
- ar = LRMod[k*9+j+1];
- atot -= ar;
- B += ar * pr;
-
- newVal = ( atot * p + B) >> 7;
-
- dstRow[ rowOffset +j] = clamp255( newVal );
- }
-
- rowOffset += Pitch;
- }
-}
-
-static void DeringFrame(PB_INSTANCE *pbi,
- unsigned char *Src, unsigned char *Dst){
- ogg_uint32_t col,row;
- unsigned char *SrcPtr;
- unsigned char *DestPtr;
- ogg_uint32_t BlocksAcross,BlocksDown;
- const ogg_uint32_t *QuantScale;
- ogg_uint32_t Block;
- ogg_uint32_t LineLength;
-
- ogg_int32_t Thresh1,Thresh2,Thresh3,Thresh4;
-
- Thresh1 = 384;
- Thresh2 = 4 * Thresh1;
- Thresh3 = 5 * Thresh2/4;
- Thresh4 = 5 * Thresh2/2;
-
- QuantScale = DeringModifierV1;
-
- BlocksAcross = pbi->HFragments;
- BlocksDown = pbi->VFragments;
-
- SrcPtr = Src + pbi->ReconYDataOffset;
- DestPtr = Dst + pbi->ReconYDataOffset;
- LineLength = pbi->YStride;
-
- Block = 0;
-
- for ( row = 0 ; row < BlocksDown; row ++){
- for (col = 0; col < BlocksAcross; col ++){
- ogg_uint32_t Quality = pbi->FragQIndex[Block];
- ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
- if( pbi->PostProcessingLevel >5 && Variance > Thresh3 ){
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
-
- if( (col > 0 &&
- pbi->FragmentVariances[Block-1] > Thresh4 ) ||
- (col + 1 < BlocksAcross &&
- pbi->FragmentVariances[Block+1] > Thresh4 ) ||
- (row + 1 < BlocksDown &&
- pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
- (row > 0 &&
- pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) ){
-
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- }
- } else if(Variance > Thresh2 ) {
-
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- } else if(Variance > Thresh1 ) {
-
- DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
-
- } else {
-
- dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
-
- }
-
- ++Block;
-
- }
- SrcPtr += 8 * LineLength;
- DestPtr += 8 * LineLength;
- }
-
- /* Then U */
-
- BlocksAcross /= 2;
- BlocksDown /= 2;
- LineLength /= 2;
-
- SrcPtr = Src + pbi->ReconUDataOffset;
- DestPtr = Dst + pbi->ReconUDataOffset;
- for ( row = 0 ; row < BlocksDown; row ++) {
- for (col = 0; col < BlocksAcross; col ++) {
- ogg_uint32_t Quality = pbi->FragQIndex[Block];
- ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
- if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
-
- }else if(Variance > Thresh2 ){
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- }else if(Variance > Thresh1 ){
- DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- }else{
- dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
- }
-
- ++Block;
-
- }
- SrcPtr += 8 * LineLength;
- DestPtr += 8 * LineLength;
- }
-
- /* Then V */
- SrcPtr = Src + pbi->ReconVDataOffset;
- DestPtr = Dst + pbi->ReconVDataOffset;
-
- for ( row = 0 ; row < BlocksDown; row ++){
- for (col = 0; col < BlocksAcross; col ++){
-
- ogg_uint32_t Quality = pbi->FragQIndex[Block];
- ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
-
- if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
-
- }else if(Variance > Thresh2 ){
- DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- }else if(Variance > Thresh1 ){
- DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
- LineLength,Quality,QuantScale);
- }else{
- dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
- }
-
- ++Block;
-
- }
- SrcPtr += 8 * LineLength;
- DestPtr += 8 * LineLength;
-
- }
-
-}
-
-void UpdateFragQIndex(PB_INSTANCE *pbi){
-
- ogg_uint32_t ThisFrameQIndex;
- ogg_uint32_t i;
-
- /* Check this frame quality index */
- ThisFrameQIndex = pbi->FrameQIndex;
-
-
- /* It is not a key frame, so only reset those are coded */
- for( i = 0; i < pbi->UnitFragments; i++ )
- if( pbi->display_fragments[i])
- pbi->FragQIndex[i] = ThisFrameQIndex;
-
-}
-
-static void DeblockLoopFilteredBand(PB_INSTANCE *pbi,
- unsigned char *SrcPtr,
- unsigned char *DesPtr,
- ogg_uint32_t PlaneLineStep,
- ogg_uint32_t FragsAcross,
- ogg_uint32_t StartFrag,
- const ogg_uint32_t *QuantScale){
- ogg_uint32_t j,k;
- ogg_uint32_t CurrentFrag=StartFrag;
- ogg_int32_t QStep;
- ogg_int32_t FLimit;
- unsigned char *Src, *Des;
- ogg_int32_t x[10];
- ogg_int32_t Sum1, Sum2;
-
- while(CurrentFrag < StartFrag + FragsAcross){
-
- Src=SrcPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*5;
- Des=DesPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*4;
-
- QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
- FLimit = ( QStep * 3 ) >> 2;
-
- for( j=0; j<8 ; j++){
- x[0] = Src[0];
- x[1] = Src[PlaneLineStep];
- x[2] = Src[PlaneLineStep*2];
- x[3] = Src[PlaneLineStep*3];
- x[4] = Src[PlaneLineStep*4];
- x[5] = Src[PlaneLineStep*5];
- x[6] = Src[PlaneLineStep*6];
- x[7] = Src[PlaneLineStep*7];
- x[8] = Src[PlaneLineStep*8];
- x[9] = Src[PlaneLineStep*9];
-
- Sum1=Sum2=0;
-
- for(k=1;k<=4;k++){
- Sum1 += abs(x[k]-x[k-1]);
- Sum2 += abs(x[k+4]-x[k+5]);
- }
-
- pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
- pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
-
- if( Sum1 < FLimit &&
- Sum2 < FLimit &&
- (x[5] - x[4]) < QStep &&
- (x[4] - x[5]) < QStep ){
-
- /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
- Des[0 ] = (x[0] + x[0] +x[0] + x[1] * 2 +
- x[2] + x[3] +x[4] + 4) >> 3;
- Des[PlaneLineStep ] = (x[0] + x[0] +x[1] + x[2] * 2 +
- x[3] + x[4] +x[5] + 4) >> 3;
- Des[PlaneLineStep*2] = (x[0] + x[1] +x[2] + x[3] * 2 +
- x[4] + x[5] +x[6] + 4) >> 3;
- Des[PlaneLineStep*3] = (x[1] + x[2] +x[3] + x[4] * 2 +
- x[5] + x[6] +x[7] + 4) >> 3;
- Des[PlaneLineStep*4] = (x[2] + x[3] +x[4] + x[5] * 2 +
- x[6] + x[7] +x[8] + 4) >> 3;
- Des[PlaneLineStep*5] = (x[3] + x[4] +x[5] + x[6] * 2 +
- x[7] + x[8] +x[9] + 4) >> 3;
- Des[PlaneLineStep*6] = (x[4] + x[5] +x[6] + x[7] * 2 +
- x[8] + x[9] +x[9] + 4) >> 3;
- Des[PlaneLineStep*7] = (x[5] + x[6] +x[7] + x[8] * 2 +
- x[9] + x[9] +x[9] + 4) >> 3;
-
- }else {
- /* copy the pixels to destination */
- Des[0 ]= (unsigned char)x[1];
- Des[PlaneLineStep ]= (unsigned char)x[2];
- Des[PlaneLineStep*2]= (unsigned char)x[3];
- Des[PlaneLineStep*3]= (unsigned char)x[4];
- Des[PlaneLineStep*4]= (unsigned char)x[5];
- Des[PlaneLineStep*5]= (unsigned char)x[6];
- Des[PlaneLineStep*6]= (unsigned char)x[7];
- Des[PlaneLineStep*7]= (unsigned char)x[8];
- }
- Src ++;
- Des ++;
- }
-
-
- /* done with filtering the horizontal edge, now let's do the
- vertical one */
- /* skip the first one */
- if(CurrentFrag==StartFrag)
- CurrentFrag++;
- else{
- Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
- Src=Des-5;
- Des-=4;
-
- QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
- FLimit = ( QStep * 3 ) >> 2;
-
- for( j=0; j<8 ; j++){
- x[0] = Src[0];
- x[1] = Src[1];
- x[2] = Src[2];
- x[3] = Src[3];
- x[4] = Src[4];
- x[5] = Src[5];
- x[6] = Src[6];
- x[7] = Src[7];
- x[8] = Src[8];
- x[9] = Src[9];
-
- Sum1=Sum2=0;
-
- for(k=1;k<=4;k++){
- Sum1 += abs(x[k]-x[k-1]);
- Sum2 += abs(x[k+4]-x[k+5]);
- }
-
- pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
- pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
-
- if( Sum1 < FLimit &&
- Sum2 < FLimit &&
- (x[5] - x[4]) < QStep &&
- (x[4] - x[5]) < QStep ){
-
- /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
- Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
- Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
- Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
- Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
- Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
- Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
- Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
- Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
- }
-
- Src += PlaneLineStep;
- Des += PlaneLineStep;
- }
- CurrentFrag ++;
- }
- }
-}
-
-static void DeblockVerticalEdgesInLoopFilteredBand(PB_INSTANCE *pbi,
- unsigned char *SrcPtr,
- unsigned char *DesPtr,
- ogg_uint32_t PlaneLineStep,
- ogg_uint32_t FragsAcross,
- ogg_uint32_t StartFrag,
- const ogg_uint32_t *QuantScale){
- ogg_uint32_t j,k;
- ogg_uint32_t CurrentFrag=StartFrag;
- ogg_int32_t QStep;
- ogg_int32_t FLimit;
- unsigned char *Src, *Des;
- ogg_int32_t x[10];
- ogg_int32_t Sum1, Sum2;
-
- while(CurrentFrag < StartFrag + FragsAcross-1) {
-
- Src=SrcPtr+8*(CurrentFrag-StartFrag+1)-5;
- Des=DesPtr+8*(CurrentFrag-StartFrag+1)-4;
-
- QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
- FLimit = ( QStep * 3)>>2 ;
-
- for( j=0; j<8 ; j++){
- x[0] = Src[0];
- x[1] = Src[1];
- x[2] = Src[2];
- x[3] = Src[3];
- x[4] = Src[4];
- x[5] = Src[5];
- x[6] = Src[6];
- x[7] = Src[7];
- x[8] = Src[8];
- x[9] = Src[9];
-
- Sum1=Sum2=0;
-
- for(k=1;k<=4;k++){
- Sum1 += abs(x[k]-x[k-1]);
- Sum2 += abs(x[k+4]-x[k+5]);
- }
-
- pbi->FragmentVariances[CurrentFrag] += ((Sum1>255)?255:Sum1);
- pbi->FragmentVariances[CurrentFrag+1] += ((Sum2>255)?255:Sum2);
-
-
- if( Sum1 < FLimit &&
- Sum2 < FLimit &&
- (x[5] - x[4]) < QStep &&
- (x[4] - x[5]) < QStep ){
-
- /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
- Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
- Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
- Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
- Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
- Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
- Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
- Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
- Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
- }
- Src +=PlaneLineStep;
- Des +=PlaneLineStep;
-
- }
- CurrentFrag ++;
- }
-}
-
-static void DeblockPlane(PB_INSTANCE *pbi,
- unsigned char *SourceBuffer,
- unsigned char *DestinationBuffer,
- ogg_uint32_t Channel ){
-
- ogg_uint32_t i,k;
- ogg_uint32_t PlaneLineStep=0;
- ogg_uint32_t StartFrag =0;
- ogg_uint32_t PixelIndex=0;
- unsigned char * SrcPtr=0, * DesPtr=0;
- ogg_uint32_t FragsAcross=0;
- ogg_uint32_t FragsDown=0;
- const ogg_uint32_t *QuantScale=0;
-
- switch( Channel ){
- case 0:
- /* Get the parameters */
- PlaneLineStep = pbi->YStride;
- FragsAcross = pbi->HFragments;
- FragsDown = pbi->VFragments;
- StartFrag = 0;
- PixelIndex = pbi->ReconYDataOffset;
- SrcPtr = & SourceBuffer[PixelIndex];
- DesPtr = & DestinationBuffer[PixelIndex];
- break;
-
- case 1:
- /* Get the parameters */
- PlaneLineStep = pbi->UVStride;
- FragsAcross = pbi->HFragments / 2;
- FragsDown = pbi->VFragments / 2;
- StartFrag = pbi->YPlaneFragments;
-
- PixelIndex = pbi->ReconUDataOffset;
- SrcPtr = & SourceBuffer[PixelIndex];
- DesPtr = & DestinationBuffer[PixelIndex];
- break;
-
- default:
- /* Get the parameters */
- PlaneLineStep = pbi->UVStride;
- FragsAcross = pbi->HFragments / 2;
- FragsDown = pbi->VFragments / 2;
- StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
-
- PixelIndex = pbi->ReconVDataOffset;
- SrcPtr = & SourceBuffer[PixelIndex];
- DesPtr = & DestinationBuffer[PixelIndex];
- break;
- }
-
- QuantScale = DcQuantScaleV1;
-
- for(i=0;i<4;i++)
- memcpy(DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep);
-
- k = 1;
-
- while( k < FragsDown ){
-
- SrcPtr += 8*PlaneLineStep;
- DesPtr += 8*PlaneLineStep;
-
- /* Filter both the horizontal and vertical block edges inside the band */
- DeblockLoopFilteredBand(pbi, SrcPtr, DesPtr, PlaneLineStep,
- FragsAcross, StartFrag, QuantScale);
-
- /* Move Pointers */
- StartFrag += FragsAcross;
-
- k ++;
- }
-
- /* The Last band */
- for(i=0;i<4;i++)
- memcpy(DesPtr+(i+4)*PlaneLineStep,
- SrcPtr+(i+4)*PlaneLineStep,
- PlaneLineStep);
-
- DeblockVerticalEdgesInLoopFilteredBand(pbi,SrcPtr,DesPtr,PlaneLineStep,
- FragsAcross,StartFrag,QuantScale);
-
-}
-
-static void DeblockFrame(PB_INSTANCE *pbi, unsigned char *SourceBuffer,
- unsigned char *DestinationBuffer){
-
- memset(pbi->FragmentVariances, 0 , sizeof(ogg_int32_t) * pbi->UnitFragments);
-
-
- UpdateFragQIndex(pbi);
-
- /* Y */
- DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 0);
-
- /* U */
- DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 1);
-
- /* V */
- DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 2);
-
-}
-
-void PostProcess(PB_INSTANCE *pbi){
-
- switch (pbi->PostProcessingLevel){
- case 8:
- /* on a slow machine, use a simpler and faster deblocking filter */
- DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
- break;
-
- case 6:
- DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
- UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
- DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
- break;
-
- case 5:
- DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
- UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
- DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
- break;
- case 4:
- DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
- break;
- case 1:
- UpdateFragQIndex(pbi);
- break;
-
- case 0:
- break;
-
- default:
- DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
- UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
- DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
- break;
- }
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/pp.h b/Engine/lib/libtheora/lib/enc/pp.h
deleted file mode 100644
index 6eb3a7604..000000000
--- a/Engine/lib/libtheora/lib/enc/pp.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: pp.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-/* Constants. */
-#define INTERNAL_BLOCK_HEIGHT 8
-#define INTERNAL_BLOCK_WIDTH 8
-
-
-/* NEW Line search values. */
-#define UP 0
-#define DOWN 1
-#define LEFT 2
-#define RIGHT 3
-
-#define FIRST_ROW 0
-#define NOT_EDGE_ROW 1
-#define LAST_ROW 2
-
-#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-#define PSCORE_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 4)
-
-/* Status values in block coding map */
-#define CANDIDATE_BLOCK_LOW -2
-#define CANDIDATE_BLOCK -1
-#define BLOCK_NOT_CODED 0
-#define BLOCK_CODED_BAR 3
-#define BLOCK_CODED_SGC 4
-#define BLOCK_CODED_LOW 4
-#define BLOCK_CODED 5
-
-#define MAX_PREV_FRAMES 16
-#define MAX_SEARCH_LINE_LEN 7
diff --git a/Engine/lib/libtheora/lib/enc/quant_lookup.h b/Engine/lib/libtheora/lib/enc/quant_lookup.h
deleted file mode 100644
index 04bbce910..000000000
--- a/Engine/lib/libtheora/lib/enc/quant_lookup.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: quant_lookup.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-#define MIN16 ((1<<16)-1)
-#define SHIFT16 (1<<16)
-
-#define MIN_LEGAL_QUANT_ENTRY 8
-#define MIN_DEQUANT_VAL 2
-#define IDCT_SCALE_FACTOR 2 /* Shift left bits to improve IDCT precision */
-#define OLD_SCHEME 1
-
-
-/******************************
- * lookup table for DCT coefficient zig-zag ordering
- * ****************************/
-
-static const ogg_uint32_t dezigzag_index[64] = {
- 0, 1, 8, 16, 9, 2, 3, 10,
- 17, 24, 32, 25, 18, 11, 4, 5,
- 12, 19, 26, 33, 40, 48, 41, 34,
- 27, 20, 13, 6, 7, 14, 21, 28,
- 35, 42, 49, 56, 57, 50, 43, 36,
- 29, 22, 15, 23, 30, 37, 44, 51,
- 58, 59, 52, 45, 38, 31, 39, 46,
- 53, 60, 61, 54, 47, 55, 62, 63
-};
-
diff --git a/Engine/lib/libtheora/lib/enc/reconstruct.c b/Engine/lib/libtheora/lib/enc/reconstruct.c
deleted file mode 100644
index 5602884af..000000000
--- a/Engine/lib/libtheora/lib/enc/reconstruct.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: reconstruct.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static void copy8x8__c (unsigned char *src,
- unsigned char *dest,
- unsigned int stride)
-{
- int j;
- for ( j = 0; j < 8; j++ ){
- ((ogg_uint32_t*)dest)[0] = ((ogg_uint32_t*)src)[0];
- ((ogg_uint32_t*)dest)[1] = ((ogg_uint32_t*)src)[1];
- src+=stride;
- dest+=stride;
- }
-}
-
-static void recon_intra8x8__c (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- ogg_uint32_t i;
-
- for (i = 8; i; i--){
- /* Convert the data back to 8 bit unsigned */
- /* Saturate the output to unsigend 8 bit values */
- ReconPtr[0] = clamp255( ChangePtr[0] + 128 );
- ReconPtr[1] = clamp255( ChangePtr[1] + 128 );
- ReconPtr[2] = clamp255( ChangePtr[2] + 128 );
- ReconPtr[3] = clamp255( ChangePtr[3] + 128 );
- ReconPtr[4] = clamp255( ChangePtr[4] + 128 );
- ReconPtr[5] = clamp255( ChangePtr[5] + 128 );
- ReconPtr[6] = clamp255( ChangePtr[6] + 128 );
- ReconPtr[7] = clamp255( ChangePtr[7] + 128 );
-
- ReconPtr += LineStep;
- ChangePtr += 8;
- }
-}
-
-static void recon_inter8x8__c (unsigned char *ReconPtr, unsigned char *RefPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
- ogg_uint32_t i;
-
- for (i = 8; i; i--){
- ReconPtr[0] = clamp255(RefPtr[0] + ChangePtr[0]);
- ReconPtr[1] = clamp255(RefPtr[1] + ChangePtr[1]);
- ReconPtr[2] = clamp255(RefPtr[2] + ChangePtr[2]);
- ReconPtr[3] = clamp255(RefPtr[3] + ChangePtr[3]);
- ReconPtr[4] = clamp255(RefPtr[4] + ChangePtr[4]);
- ReconPtr[5] = clamp255(RefPtr[5] + ChangePtr[5]);
- ReconPtr[6] = clamp255(RefPtr[6] + ChangePtr[6]);
- ReconPtr[7] = clamp255(RefPtr[7] + ChangePtr[7]);
-
- ChangePtr += 8;
- ReconPtr += LineStep;
- RefPtr += LineStep;
- }
-}
-
-static void recon_inter8x8_half__c (unsigned char *ReconPtr, unsigned char *RefPtr1,
- unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- ogg_uint32_t i;
-
- for (i = 8; i; i--){
- ReconPtr[0] = clamp255((((int)RefPtr1[0] + (int)RefPtr2[0]) >> 1) + ChangePtr[0] );
- ReconPtr[1] = clamp255((((int)RefPtr1[1] + (int)RefPtr2[1]) >> 1) + ChangePtr[1] );
- ReconPtr[2] = clamp255((((int)RefPtr1[2] + (int)RefPtr2[2]) >> 1) + ChangePtr[2] );
- ReconPtr[3] = clamp255((((int)RefPtr1[3] + (int)RefPtr2[3]) >> 1) + ChangePtr[3] );
- ReconPtr[4] = clamp255((((int)RefPtr1[4] + (int)RefPtr2[4]) >> 1) + ChangePtr[4] );
- ReconPtr[5] = clamp255((((int)RefPtr1[5] + (int)RefPtr2[5]) >> 1) + ChangePtr[5] );
- ReconPtr[6] = clamp255((((int)RefPtr1[6] + (int)RefPtr2[6]) >> 1) + ChangePtr[6] );
- ReconPtr[7] = clamp255((((int)RefPtr1[7] + (int)RefPtr2[7]) >> 1) + ChangePtr[7] );
-
- ChangePtr += 8;
- ReconPtr += LineStep;
- RefPtr1 += LineStep;
- RefPtr2 += LineStep;
- }
-}
-
-void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
- funcs->copy8x8 = copy8x8__c;
- funcs->recon_intra8x8 = recon_intra8x8__c;
- funcs->recon_inter8x8 = recon_inter8x8__c;
- funcs->recon_inter8x8_half = recon_inter8x8_half__c;
-#if defined(USE_ASM)
- if (cpu_flags & OC_CPU_X86_MMX) {
- dsp_mmx_recon_init(funcs);
- }
-#endif
-}
diff --git a/Engine/lib/libtheora/lib/enc/scan.c b/Engine/lib/libtheora/lib/enc/scan.c
deleted file mode 100644
index 5466ca438..000000000
--- a/Engine/lib/libtheora/lib/enc/scan.c
+++ /dev/null
@@ -1,2301 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: scan.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include
-#include
-#include
-#include "codec_internal.h"
-#include "dsp.h"
-
-#define MAX_SEARCH_LINE_LEN 7
-
-#define SET8_0(ptr) \
- ((ogg_uint32_t *)ptr)[0] = 0x00000000; \
- ((ogg_uint32_t *)ptr)[1] = 0x00000000;
-#define SET8_1(ptr) \
- ((ogg_uint32_t *)ptr)[0] = 0x01010101; \
- ((ogg_uint32_t *)ptr)[1] = 0x01010101;
-#define SET8_8(ptr) \
- ((ogg_uint32_t *)ptr)[0] = 0x08080808; \
- ((ogg_uint32_t *)ptr)[1] = 0x08080808;
-
-static ogg_uint32_t LineLengthScores[ MAX_SEARCH_LINE_LEN + 1 ] = {
- 0, 0, 0, 0, 2, 4, 12, 24
-};
-
-static ogg_uint32_t BodyNeighbourScore = 8;
-static double DiffDevisor = 0.0625;
-#define HISTORY_BLOCK_FACTOR 2
-#define MIN_STEP_THRESH 6
-#define SCORE_MULT_LOW 0.5
-#define SCORE_MULT_HIGH 4
-
-#define UP 0
-#define DOWN 1
-#define LEFT 2
-#define RIGHT 3
-
-#define INTERNAL_BLOCK_HEIGHT 8
-#define INTERNAL_BLOCK_WIDTH 8
-
-#define BLOCK_NOT_CODED 0
-#define BLOCK_CODED_BAR 3
-#define BLOCK_CODED_SGC 4
-#define BLOCK_CODED_LOW 4
-#define BLOCK_CODED 5
-
-#define CANDIDATE_BLOCK_LOW -2
-#define CANDIDATE_BLOCK -1
-
-#define FIRST_ROW 0
-#define NOT_EDGE_ROW 1
-#define LAST_ROW 2
-
-#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
-
-void ConfigurePP( PP_INSTANCE *ppi, int Level ) {
- switch ( Level ){
- case 0:
- ppi->SRFGreyThresh = 1;
- ppi->SRFColThresh = 1;
- ppi->NoiseSupLevel = 2;
- ppi->SgcLevelThresh = 1;
- ppi->SuvcLevelThresh = 1;
- ppi->GrpLowSadThresh = 6;
- ppi->GrpHighSadThresh = 24;
- ppi->PrimaryBlockThreshold = 2;
- ppi->SgcThresh = 10;
-
- ppi->PAKEnabled = 0;
- break;
-
- case 1:
- ppi->SRFGreyThresh = 2;
- ppi->SRFColThresh = 2;
- ppi->NoiseSupLevel = 2;
- ppi->SgcLevelThresh = 2;
- ppi->SuvcLevelThresh = 2;
- ppi->GrpLowSadThresh = 8;
- ppi->GrpHighSadThresh = 32;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 12;
-
- ppi->PAKEnabled = 1;
- break;
-
- case 2: /* Default VP3 settings */
- ppi->SRFGreyThresh = 3;
- ppi->SRFColThresh = 3;
- ppi->NoiseSupLevel = 2;
- ppi->SgcLevelThresh = 2;
- ppi->SuvcLevelThresh = 2;
- ppi->GrpLowSadThresh = 8;
- ppi->GrpHighSadThresh = 32;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 16;
-
- ppi->PAKEnabled = 1;
- break;
-
- case 3:
- ppi->SRFGreyThresh = 4;
- ppi->SRFColThresh = 4;
- ppi->NoiseSupLevel = 3;
- ppi->SgcLevelThresh = 3;
- ppi->SuvcLevelThresh = 3;
- ppi->GrpLowSadThresh = 10;
- ppi->GrpHighSadThresh = 48;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 18;
-
- ppi->PAKEnabled = 1;
- break;
-
- case 4:
- ppi->SRFGreyThresh = 5;
- ppi->SRFColThresh = 5;
- ppi->NoiseSupLevel = 3;
- ppi->SgcLevelThresh = 4;
- ppi->SuvcLevelThresh = 4;
- ppi->GrpLowSadThresh = 12;
- ppi->GrpHighSadThresh = 48;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 20;
-
- ppi->PAKEnabled = 1;
- break;
-
- case 5:
- ppi->SRFGreyThresh = 6;
- ppi->SRFColThresh = 6;
- ppi->NoiseSupLevel = 3;
- ppi->SgcLevelThresh = 4;
- ppi->SuvcLevelThresh = 4;
- ppi->GrpLowSadThresh = 12;
- ppi->GrpHighSadThresh = 64;
- ppi->PrimaryBlockThreshold = 10;
- ppi->SgcThresh = 24;
-
- ppi->PAKEnabled = 1;
- break;
-
- case 6:
- ppi->SRFGreyThresh = 6;
- ppi->SRFColThresh = 7;
- ppi->NoiseSupLevel = 3;
- ppi->SgcLevelThresh = 4;
- ppi->SuvcLevelThresh = 4;
- ppi->GrpLowSadThresh = 12;
- ppi->GrpHighSadThresh = 64;
- ppi->PrimaryBlockThreshold = 10;
- ppi->SgcThresh = 24;
-
- ppi->PAKEnabled = 1;
- break;
-
- default:
- ppi->SRFGreyThresh = 3;
- ppi->SRFColThresh = 3;
- ppi->NoiseSupLevel = 2;
- ppi->SgcLevelThresh = 2;
- ppi->SuvcLevelThresh = 2;
- ppi->GrpLowSadThresh = 10;
- ppi->GrpHighSadThresh = 32;
- ppi->PrimaryBlockThreshold = 5;
- ppi->SgcThresh = 16;
- ppi->PAKEnabled = 1;
- break;
- }
-}
-
-static void ScanCalcPixelIndexTable(PP_INSTANCE *ppi){
- ogg_uint32_t i;
- ogg_uint32_t * PixelIndexTablePtr = ppi->ScanPixelIndexTable;
-
- /* If appropriate add on extra inices for U and V planes. */
- for ( i = 0; i < (ppi->ScanYPlaneFragments); i++ ) {
- PixelIndexTablePtr[ i ] =
- ((i / ppi->ScanHFragments) *
- VFRAGPIXELS * ppi->ScanConfig.VideoFrameWidth);
- PixelIndexTablePtr[ i ] +=
- ((i % ppi->ScanHFragments) * HFRAGPIXELS);
- }
-
- PixelIndexTablePtr = &ppi->ScanPixelIndexTable[ppi->ScanYPlaneFragments];
-
- for ( i = 0; i < (ppi->ScanUVPlaneFragments * 2); i++ ){
- PixelIndexTablePtr[ i ] =
- ((i / (ppi->ScanHFragments >> 1) ) *
- (VFRAGPIXELS * (ppi->ScanConfig.VideoFrameWidth >> 1)) );
- PixelIndexTablePtr[ i ] +=
- ((i % (ppi->ScanHFragments >> 1) ) *
- HFRAGPIXELS) + ppi->YFramePixels;
- }
-}
-
-static void InitScanMapArrays(PP_INSTANCE *ppi){
- int i;
- unsigned char StepThresh;
-
- /* Clear down the fragment level map arrays for the current frame. */
- memset( ppi->FragScores, 0,
- ppi->ScanFrameFragments * sizeof(*ppi->FragScores) );
- memset( ppi->SameGreyDirPixels, 0,
- ppi->ScanFrameFragments );
- memset( ppi->FragDiffPixels, 0,
- ppi->ScanFrameFragments );
- memset( ppi->RowChangedPixels, 0,
- 3* ppi->ScanConfig.VideoFrameHeight*sizeof(*ppi->RowChangedPixels));
-
- memset( ppi->ScanDisplayFragments, BLOCK_NOT_CODED, ppi->ScanFrameFragments);
-
- /* Threshold used in setting up ppi->NoiseScoreBoostTable[] */
- StepThresh = (unsigned int)(ppi->SRFGreyThresh >> 1);
- if ( StepThresh < MIN_STEP_THRESH )
- StepThresh = MIN_STEP_THRESH;
- ppi->SrfThresh = (int)ppi->SRFGreyThresh;
-
- /* Set up various tables used to tweak pixel score values and
- scoring rules based upon absolute value of a pixel change */
- for ( i = 0; i < 256; i++ ){
- /* Score multiplier table indexed by absolute difference. */
- ppi->AbsDiff_ScoreMultiplierTable[i] = (double)i * DiffDevisor;
- if ( ppi->AbsDiff_ScoreMultiplierTable[i] < SCORE_MULT_LOW )
- ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_LOW;
- else if ( ppi->AbsDiff_ScoreMultiplierTable[i] > SCORE_MULT_HIGH)
- ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_HIGH;
-
- /* Table that facilitates a relaxation of the changed locals rules
- in NoiseScoreRow() for pixels that have changed by a large
- amount. */
- if ( i < (ppi->SrfThresh + StepThresh) )
- ppi->NoiseScoreBoostTable[i] = 0;
- else if ( i < (ppi->SrfThresh + (StepThresh * 4)) )
- ppi->NoiseScoreBoostTable[i] = 1;
- else if ( i < (ppi->SrfThresh + (StepThresh * 6)) )
- ppi->NoiseScoreBoostTable[i] = 2;
- else
- ppi->NoiseScoreBoostTable[i] = 3;
-
- }
-
- /* Set various other threshold parameters. */
-
- /* Set variables that control access to the line search algorithms. */
- ppi->LineSearchTripTresh = 16;
- if ( ppi->LineSearchTripTresh > ppi->PrimaryBlockThreshold )
- ppi->LineSearchTripTresh = (unsigned int)(ppi->PrimaryBlockThreshold + 1);
-
- /* Adjust line search length if block threshold low */
- ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
- while ( (ppi->MaxLineSearchLen > 0) &&
- (LineLengthScores[ppi->MaxLineSearchLen-1] >
- ppi->PrimaryBlockThreshold) )
- ppi->MaxLineSearchLen -= 1;
-
-}
-
-void ScanYUVInit( PP_INSTANCE * ppi, SCAN_CONFIG_DATA * ScanConfigPtr){
- int i;
-
- /* Set up the various imported data structure pointers. */
- ppi->ScanConfig.Yuv0ptr = ScanConfigPtr->Yuv0ptr;
- ppi->ScanConfig.Yuv1ptr = ScanConfigPtr->Yuv1ptr;
- ppi->ScanConfig.SrfWorkSpcPtr = ScanConfigPtr->SrfWorkSpcPtr;
- ppi->ScanConfig.disp_fragments = ScanConfigPtr->disp_fragments;
-
- ppi->ScanConfig.RegionIndex = ScanConfigPtr->RegionIndex;
-
- ppi->ScanConfig.VideoFrameWidth = ScanConfigPtr->VideoFrameWidth;
- ppi->ScanConfig.VideoFrameHeight = ScanConfigPtr->VideoFrameHeight;
-
- /* UV plane sizes. */
- ppi->VideoUVPlaneWidth = ScanConfigPtr->VideoFrameWidth / 2;
- ppi->VideoUVPlaneHeight = ScanConfigPtr->VideoFrameHeight / 2;
-
- /* Note the size of each plane in pixels. */
- ppi->YFramePixels = ppi->ScanConfig.VideoFrameWidth *
- ppi->ScanConfig.VideoFrameHeight;
- ppi->UVFramePixels = ppi->VideoUVPlaneWidth * ppi->VideoUVPlaneHeight;
-
- /* Work out various fragment related values. */
- ppi->ScanYPlaneFragments = ppi->YFramePixels /
- (HFRAGPIXELS * VFRAGPIXELS);
- ppi->ScanUVPlaneFragments = ppi->UVFramePixels /
- (HFRAGPIXELS * VFRAGPIXELS);;
- ppi->ScanHFragments = ppi->ScanConfig.VideoFrameWidth / HFRAGPIXELS;
- ppi->ScanVFragments = ppi->ScanConfig.VideoFrameHeight / VFRAGPIXELS;
- ppi->ScanFrameFragments = ppi->ScanYPlaneFragments +
- (2 * ppi->ScanUVPlaneFragments);
-
- PInitFrameInfo(ppi);
-
- /* Set up the scan pixel index table. */
- ScanCalcPixelIndexTable(ppi);
-
- /* Initialise the previous frame block history lists */
- for ( i = 0; i < MAX_PREV_FRAMES; i++ )
- memset( ppi->PrevFragments[i], BLOCK_NOT_CODED, ppi->ScanFrameFragments);
-
- /* YUVAnalyseFrame() is not called for the first frame in a sequence
- (a key frame obviously). This memset insures that for the second
- frame all blocks are marked for coding in line with the behaviour
- for other key frames. */
- memset( ppi->PrevFragments[ppi->PrevFrameLimit-1],
- BLOCK_CODED, ppi->ScanFrameFragments );
-
- /* Initialise scan arrays */
- InitScanMapArrays(ppi);
-}
-
-static void SetFromPrevious(PP_INSTANCE *ppi) {
- unsigned int i,j;
-
- /* We buld up the list of previously updated blocks in the zero
- index list of PrevFragments[] so we must start by reseting its
- contents */
- memset( ppi->PrevFragments[0], BLOCK_NOT_CODED, ppi->ScanFrameFragments );
-
- if ( ppi->PrevFrameLimit > 1 ){
- /* Now build up PrevFragments[0] from PrevFragments[1 to PrevFrameLimit] */
- for ( i = 0; i < ppi->ScanFrameFragments; i++ ){
- for ( j = 1; j < ppi->PrevFrameLimit; j++ ){
- if ( ppi->PrevFragments[j][i] > BLOCK_CODED_BAR ){
- ppi->PrevFragments[0][i] = BLOCK_CODED;
- break;
- }
- }
- }
- }
-}
-
-static void UpdatePreviousBlockLists(PP_INSTANCE *ppi) {
- int i;
-
- /* Shift previous frame block lists along. */
- for ( i = ppi->PrevFrameLimit; i > 1; i-- ){
- memcpy( ppi->PrevFragments[i], ppi->PrevFragments[i-1],
- ppi->ScanFrameFragments );
- }
-
- /* Now copy in this frames block list */
- memcpy( ppi->PrevFragments[1], ppi->ScanDisplayFragments,
- ppi->ScanFrameFragments );
-}
-
-static void CreateOutputDisplayMap( PP_INSTANCE *ppi,
- signed char *InternalFragmentsPtr,
- signed char *RecentHistoryPtr,
- unsigned char *ExternalFragmentsPtr ) {
- ogg_uint32_t i;
- ogg_uint32_t HistoryBlocksAdded = 0;
- ogg_uint32_t YBand = (ppi->ScanYPlaneFragments/8); /* 1/8th of Y image. */
-
- ppi->OutputBlocksUpdated = 0;
- for ( i = 0; i < ppi->ScanFrameFragments; i++ ) {
- if ( InternalFragmentsPtr[i] > BLOCK_NOT_CODED ) {
- ppi->OutputBlocksUpdated ++;
- ExternalFragmentsPtr[i] = 1;
- }else if ( RecentHistoryPtr[i] == BLOCK_CODED ){
- HistoryBlocksAdded ++;
- ExternalFragmentsPtr[i] = 1;
- }else{
- ExternalFragmentsPtr[i] = 0;
- }
- }
-
- /* Add in a weighting for the history blocks that have been added */
- ppi->OutputBlocksUpdated += (HistoryBlocksAdded / HISTORY_BLOCK_FACTOR);
-
- /* Now calculate a key frame candidate indicator. This is based
- upon Y data only and ignores the top and bottom 1/8 of the
- image. Also ignore history blocks and BAR blocks. */
- ppi->KFIndicator = 0;
- for ( i = YBand; i < (ppi->ScanYPlaneFragments - YBand); i++ )
- if ( InternalFragmentsPtr[i] > BLOCK_CODED_BAR )
- ppi->KFIndicator ++;
-
- /* Convert the KF score to a range 0-100 */
- ppi->KFIndicator = ((ppi->KFIndicator*100)/((ppi->ScanYPlaneFragments*3)/4));
-}
-
-static int RowSadScan( PP_INSTANCE *ppi,
- unsigned char * YuvPtr1,
- unsigned char * YuvPtr2,
- signed char * DispFragPtr){
- ogg_int32_t i, j;
- ogg_uint32_t GrpSad;
- ogg_uint32_t LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
- ogg_uint32_t LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
- signed char *LocalDispFragPtr;
- unsigned char *LocalYuvPtr1;
- unsigned char *LocalYuvPtr2;
-
- int InterestingBlocksInRow = 0;
-
- /* For each row of pixels in the row of blocks */
- for ( j = 0; j < VFRAGPIXELS; j++ ){
- /* Set local block map pointer. */
- LocalDispFragPtr = DispFragPtr;
-
- /* Set the local pixel data pointers for this row.*/
- LocalYuvPtr1 = YuvPtr1;
- LocalYuvPtr2 = YuvPtr2;
-
- /* Scan along the row of pixels If the block to which a group of
- pixels belongs is already marked for update then do nothing. */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){
- /* Calculate the SAD score for the block row */
- GrpSad = dsp_row_sad8(ppi->dsp, LocalYuvPtr1,LocalYuvPtr2);
-
- /* Now test the group SAD score */
- if ( GrpSad > LocalGrpLowSadThresh ){
- /* If SAD very high we must update else we have candidate block */
- if ( GrpSad > LocalGrpHighSadThresh ){
- /* Force update */
- *LocalDispFragPtr = BLOCK_CODED;
- }else{
- /* Possible Update required */
- *LocalDispFragPtr = CANDIDATE_BLOCK;
- }
- InterestingBlocksInRow = 1;
- }
- }
- LocalDispFragPtr++;
-
- LocalYuvPtr1 += 8;
- LocalYuvPtr2 += 8;
- }
-
- /* Increment the base data pointers to the start of the next line. */
- YuvPtr1 += ppi->PlaneStride;
- YuvPtr2 += ppi->PlaneStride;
- }
-
- return InterestingBlocksInRow;
-
-}
-
-static int ColSadScan( PP_INSTANCE *ppi,
- unsigned char * YuvPtr1,
- unsigned char * YuvPtr2,
- signed char * DispFragPtr ){
- ogg_int32_t i;
- ogg_uint32_t MaxSad;
- ogg_uint32_t LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
- ogg_uint32_t LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
- signed char * LocalDispFragPtr;
-
- unsigned char * LocalYuvPtr1;
- unsigned char * LocalYuvPtr2;
-
- int InterestingBlocksInRow = 0;
-
- /* Set the local pixel data pointers for this row. */
- LocalYuvPtr1 = YuvPtr1;
- LocalYuvPtr2 = YuvPtr2;
-
- /* Set local block map pointer. */
- LocalDispFragPtr = DispFragPtr;
-
- /* Scan along the row of blocks */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- /* Skip if block already marked to be coded. */
- if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){
- /* Calculate the SAD score for the block column */
- MaxSad = dsp_col_sad8x8(ppi->dsp, LocalYuvPtr1, LocalYuvPtr2, ppi->PlaneStride );
-
- /* Now test the group SAD score */
- if ( MaxSad > LocalGrpLowSadThresh ){
- /* If SAD very high we must update else we have candidate block */
- if ( MaxSad > LocalGrpHighSadThresh ){
- /* Force update */
- *LocalDispFragPtr = BLOCK_CODED;
- }else{
- /* Possible Update required */
- *LocalDispFragPtr = CANDIDATE_BLOCK;
- }
- InterestingBlocksInRow = 1;
- }
- }
-
- /* Increment the block map pointer. */
- LocalDispFragPtr++;
-
- /* Step data pointers on ready for next block */
- LocalYuvPtr1 += HFRAGPIXELS;
- LocalYuvPtr2 += HFRAGPIXELS;
- }
-
- return InterestingBlocksInRow;
-}
-
-static void SadPass2( PP_INSTANCE *ppi,
- ogg_int32_t RowNumber,
- signed char * DispFragPtr ){
- ogg_int32_t i;
-
- /* First row */
- if ( RowNumber == 0 ) {
- /* First block in row. */
- if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[1] == BLOCK_CODED) ||
- (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
-
- /* All but first and last in row */
- for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i+1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }
-
- /* Last block in row. */
- i = ppi->PlaneHFragments-1;
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else if ( RowNumber < (ppi->PlaneVFragments - 1) ){
- /* General case */
- /* First block in row. */
- if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[1] == BLOCK_CODED) ||
- (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
- (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED) ||
- (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
-
- /* All but first and last in row */
- for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i+1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }
-
- /* Last block in row. */
- i = ppi->PlaneHFragments-1;
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- /* Last row */
- /* First block in row. */
- if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[1] == BLOCK_CODED) ||
- (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
- (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED)){
- ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
- }else{
- ppi->TmpCodedMap[0] = DispFragPtr[0];
- }
-
- /* All but first and last in row */
- for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i+1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }
-
- /* Last block in row. */
- i = ppi->PlaneHFragments-1;
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
- (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ){
- ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }else{
- ppi->TmpCodedMap[i] = DispFragPtr[i];
- }
- }
-
- /* Now copy back the modified Fragment data */
- memcpy( &DispFragPtr[0], &ppi->TmpCodedMap[0], (ppi->PlaneHFragments) );
-}
-
-static unsigned char ApplyPakLowPass( PP_INSTANCE *ppi,
- unsigned char * SrcPtr ){
- unsigned char * SrcPtr1 = SrcPtr - 1;
- unsigned char * SrcPtr0 = SrcPtr1 - ppi->PlaneStride; /* Note the
- use of
- stride not
- width. */
- unsigned char * SrcPtr2 = SrcPtr1 + ppi->PlaneStride;
-
- return (unsigned char)( ( (ogg_uint32_t)SrcPtr0[0] +
- (ogg_uint32_t)SrcPtr0[1] +
- (ogg_uint32_t)SrcPtr0[2] +
- (ogg_uint32_t)SrcPtr1[0] +
- (ogg_uint32_t)SrcPtr1[2] +
- (ogg_uint32_t)SrcPtr2[0] +
- (ogg_uint32_t)SrcPtr2[1] +
- (ogg_uint32_t)SrcPtr2[2] ) >> 3 );
-
-}
-
-static void RowDiffScan( PP_INSTANCE *ppi,
- unsigned char * YuvPtr1,
- unsigned char * YuvPtr2,
- ogg_int16_t * YUVDiffsPtr,
- unsigned char * bits_map_ptr,
- signed char * SgcPtr,
- signed char * DispFragPtr,
- unsigned char * FDiffPixels,
- ogg_int32_t * RowDiffsPtr,
- unsigned char * ChLocalsPtr, int EdgeRow ){
-
- ogg_int32_t i,j;
- ogg_int32_t FragChangedPixels;
-
- ogg_int16_t Diff; /* Temp local workspace. */
-
- /* Cannot use kernel if at edge or if PAK disabled */
- if ( (!ppi->PAKEnabled) || EdgeRow ){
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Reset count of pixels changed for the current fragment. */
- FragChangedPixels = 0;
-
- /* Test for break out conditions to save time. */
- if (*DispFragPtr == CANDIDATE_BLOCK){
-
- /* Clear down entries in changed locals array */
- SET8_0(ChLocalsPtr);
-
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* Take a local copy of the measured difference. */
- Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
- /* Store the actual difference value */
- YUVDiffsPtr[j] = Diff;
-
- /* Test against the Level thresholds and record the results */
- SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
- /* Test against the SRF thresholds */
- bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
- FragChangedPixels += ppi->SrfThreshTable[Diff+255];
- }
- }else{
- /* If we are breaking out here mark all pixels as changed. */
- if ( *DispFragPtr > BLOCK_NOT_CODED ){
- SET8_1(bits_map_ptr);
- SET8_8(ChLocalsPtr);
- }else{
- SET8_0(ChLocalsPtr);
- }
- }
-
- *RowDiffsPtr += FragChangedPixels;
- *FDiffPixels += (unsigned char)FragChangedPixels;
-
- YuvPtr1 += HFRAGPIXELS;
- YuvPtr2 += HFRAGPIXELS;
- bits_map_ptr += HFRAGPIXELS;
- ChLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- SgcPtr ++;
- FDiffPixels ++;
-
- /* If we have a lot of changed pixels for this fragment on this
- row then the fragment is almost sure to be picked (e.g. through
- the line search) so we can mark it as selected and then ignore
- it. */
- if (FragChangedPixels >= 7){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- DispFragPtr++;
- }
- }else{
-
- /*************************************************************/
- /* First fragment of row !! */
-
- i = 0;
- /* Reset count of pixels changed for the current fragment. */
- FragChangedPixels = 0;
-
- /* Test for break out conditions to save time. */
- if (*DispFragPtr == CANDIDATE_BLOCK){
- /* Clear down entries in changed locals array */
- SET8_0(ChLocalsPtr);
-
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* Take a local copy of the measured difference. */
- Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
- /* Store the actual difference value */
- YUVDiffsPtr[j] = Diff;
-
- /* Test against the Level thresholds and record the results */
- SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
- if (j>0 && ppi->SrfPakThreshTable[Diff+255] )
- Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
- (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
- /* Test against the SRF thresholds */
- bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
- FragChangedPixels += ppi->SrfThreshTable[Diff+255];
- }
- }else{
- /* If we are breaking out here mark all pixels as changed. */
- if ( *DispFragPtr > BLOCK_NOT_CODED ){
- SET8_1(bits_map_ptr);
- SET8_8(ChLocalsPtr);
- }else{
- SET8_0(ChLocalsPtr);
- }
- }
-
- *RowDiffsPtr += FragChangedPixels;
- *FDiffPixels += (unsigned char)FragChangedPixels;
-
- YuvPtr1 += HFRAGPIXELS;
- YuvPtr2 += HFRAGPIXELS;
- bits_map_ptr += HFRAGPIXELS;
- ChLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- SgcPtr ++;
- FDiffPixels ++;
-
- /* If we have a lot of changed pixels for this fragment on this
- row then the fragment is almost sure to be picked
- (e.g. through the line search) so we can mark it as selected
- and then ignore it. */
- if (FragChangedPixels >= 7){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- DispFragPtr++;
- /*************************************************************/
- /* Fragment in between!! */
-
- for ( i = HFRAGPIXELS ; i < ppi->PlaneWidth-HFRAGPIXELS;
- i += HFRAGPIXELS ){
- /* Reset count of pixels changed for the current fragment. */
- FragChangedPixels = 0;
-
- /* Test for break out conditions to save time. */
- if (*DispFragPtr == CANDIDATE_BLOCK){
- /* Clear down entries in changed locals array */
- SET8_0(ChLocalsPtr);
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* Take a local copy of the measured difference. */
- Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
- /* Store the actual difference value */
- YUVDiffsPtr[j] = Diff;
-
- /* Test against the Level thresholds and record the results */
- SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
- if (ppi->SrfPakThreshTable[Diff+255] )
- Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
- (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
-
- /* Test against the SRF thresholds */
- bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
- FragChangedPixels += ppi->SrfThreshTable[Diff+255];
- }
- }else{
- /* If we are breaking out here mark all pixels as changed. */
- if ( *DispFragPtr > BLOCK_NOT_CODED ){
- SET8_1(bits_map_ptr);
- SET8_8(ChLocalsPtr);
- }else{
- SET8_0(ChLocalsPtr);
- }
- }
-
- *RowDiffsPtr += FragChangedPixels;
- *FDiffPixels += (unsigned char)FragChangedPixels;
-
- YuvPtr1 += HFRAGPIXELS;
- YuvPtr2 += HFRAGPIXELS;
- bits_map_ptr += HFRAGPIXELS;
- ChLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- SgcPtr ++;
- FDiffPixels ++;
-
- /* If we have a lot of changed pixels for this fragment on this
- row then the fragment is almost sure to be picked
- (e.g. through the line search) so we can mark it as selected
- and then ignore it. */
- if (FragChangedPixels >= 7){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- DispFragPtr++;
- }
- /*************************************************************/
- /* Last fragment of row !! */
-
- /* Reset count of pixels changed for the current fragment. */
- FragChangedPixels = 0;
-
- /* Test for break out conditions to save time. */
- if (*DispFragPtr == CANDIDATE_BLOCK){
- /* Clear down entries in changed locals array */
- SET8_0(ChLocalsPtr);
-
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* Take a local copy of the measured difference. */
- Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
- /* Store the actual difference value */
- YUVDiffsPtr[j] = Diff;
-
- /* Test against the Level thresholds and record the results */
- SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
- if (j<7 && ppi->SrfPakThreshTable[Diff+255] )
- Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
- (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
-
- /* Test against the SRF thresholds */
- bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
- FragChangedPixels += ppi->SrfThreshTable[Diff+255];
- }
- }else{
- /* If we are breaking out here mark all pixels as changed.*/
- if ( *DispFragPtr > BLOCK_NOT_CODED ) {
- SET8_1(bits_map_ptr);
- SET8_8(ChLocalsPtr);
- }else{
- SET8_0(ChLocalsPtr);
- }
- }
- /* If we have a lot of changed pixels for this fragment on this
- row then the fragment is almost sure to be picked (e.g. through
- the line search) so we can mark it as selected and then ignore
- it. */
- *RowDiffsPtr += FragChangedPixels;
- *FDiffPixels += (unsigned char)FragChangedPixels;
-
- /* If we have a lot of changed pixels for this fragment on this
- row then the fragment is almost sure to be picked (e.g. through
- the line search) so we can mark it as selected and then ignore
- it. */
- if (FragChangedPixels >= 7){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- DispFragPtr++;
-
- }
-}
-
-static void ConsolidateDiffScanResults( PP_INSTANCE *ppi,
- unsigned char * FDiffPixels,
- signed char * SgcScoresPtr,
- signed char * DispFragPtr ){
- ogg_int32_t i;
-
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- /* Consider only those blocks that were candidates in the
- difference scan. Ignore definite YES and NO cases. */
- if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
- if ( ((ogg_uint32_t)abs(SgcScoresPtr[i]) > ppi->BlockSgcThresh) ){
- /* Block marked for update due to Sgc change */
- DispFragPtr[i] = BLOCK_CODED_SGC;
- }else if ( FDiffPixels[i] == 0 ){
- /* Block is no longer a candidate for the main tests but will
- still be considered a candidate in RowBarEnhBlockMap() */
- DispFragPtr[i] = CANDIDATE_BLOCK_LOW;
- }
- }
- }
-}
-
-static void RowChangedLocalsScan( PP_INSTANCE *ppi,
- unsigned char * PixelMapPtr,
- unsigned char * ChLocalsPtr,
- signed char * DispFragPtr,
- unsigned char RowType ){
-
- unsigned char changed_locals = 0;
- unsigned char * PixelsChangedPtr0;
- unsigned char * PixelsChangedPtr1;
- unsigned char * PixelsChangedPtr2;
- ogg_int32_t i, j;
- ogg_int32_t LastRowIndex = ppi->PlaneWidth - 1;
-
- /* Set up the line based pointers into the bits changed map. */
- PixelsChangedPtr0 = PixelMapPtr - ppi->PlaneWidth;
- if ( PixelsChangedPtr0 < ppi->PixelChangedMap )
- PixelsChangedPtr0 += ppi->PixelMapCircularBufferSize;
- PixelsChangedPtr0 -= 1;
-
- PixelsChangedPtr1 = PixelMapPtr - 1;
-
- PixelsChangedPtr2 = PixelMapPtr + ppi->PlaneWidth;
- if ( PixelsChangedPtr2 >=
- (ppi->PixelChangedMap + ppi->PixelMapCircularBufferSize) )
- PixelsChangedPtr2 -= ppi->PixelMapCircularBufferSize;
- PixelsChangedPtr2 -= 1;
-
- if ( RowType == NOT_EDGE_ROW ){
- /* Scan through the row of pixels and calculate changed locals. */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Skip a group of 8 pixels if the assosciated fragment has no
- pixels of interest. */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- changed_locals = 0;
-
- /* If the pixel itself has changed */
- if ( PixelsChangedPtr1[1] ){
- if ( (i > 0) || (j > 0) ){
- changed_locals += PixelsChangedPtr0[0];
- changed_locals += PixelsChangedPtr1[0];
- changed_locals += PixelsChangedPtr2[0];
- }
-
- changed_locals += PixelsChangedPtr0[1];
- changed_locals += PixelsChangedPtr2[1];
-
- if ( (i + j) < LastRowIndex ){
- changed_locals += PixelsChangedPtr0[2];
- changed_locals += PixelsChangedPtr1[2];
- changed_locals += PixelsChangedPtr2[2];
- }
-
- /* Store the number of changed locals */
- *ChLocalsPtr |= changed_locals;
- }
-
- /* Increment to next pixel in the row */
- ChLocalsPtr++;
- PixelsChangedPtr0++;
- PixelsChangedPtr1++;
- PixelsChangedPtr2++;
- }
- }else{
- if ( *DispFragPtr > BLOCK_NOT_CODED )
- SET8_0(ChLocalsPtr);
-
- /* Step pointers */
- ChLocalsPtr += HFRAGPIXELS;
- PixelsChangedPtr0 += HFRAGPIXELS;
- PixelsChangedPtr1 += HFRAGPIXELS;
- PixelsChangedPtr2 += HFRAGPIXELS;
- }
-
- /* Move on to next fragment. */
- DispFragPtr++;
-
- }
- }else{
- /* Scan through the row of pixels and calculate changed locals. */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Skip a group of 8 pixels if the assosciated fragment has no
- pixels of interest */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- changed_locals = 0;
-
- /* If the pixel itself has changed */
- if ( PixelsChangedPtr1[1] ){
- if ( RowType == FIRST_ROW ){
- if ( (i > 0) || (j > 0) ){
- changed_locals += PixelsChangedPtr1[0];
- changed_locals += PixelsChangedPtr2[0];
- }
-
- changed_locals += PixelsChangedPtr2[1];
-
- if ( (i + j) < LastRowIndex ){
- changed_locals += PixelsChangedPtr1[2];
- changed_locals += PixelsChangedPtr2[2];
- }
- }else{
- if ( (i > 0) || (j > 0 ) ){
- changed_locals += PixelsChangedPtr0[0];
- changed_locals += PixelsChangedPtr1[0];
- }
-
- changed_locals += PixelsChangedPtr0[1];
-
- if ( (i + j) < LastRowIndex ){
- changed_locals += PixelsChangedPtr0[2];
- changed_locals += PixelsChangedPtr1[2];
- }
- }
-
- /* Store the number of changed locals */
- *ChLocalsPtr |= changed_locals;
- }
-
- /* Increment to next pixel in the row */
- ChLocalsPtr++;
- PixelsChangedPtr0++;
- PixelsChangedPtr1++;
- PixelsChangedPtr2++;
- }
- }else{
- if ( *DispFragPtr > BLOCK_NOT_CODED )
- SET8_0(ChLocalsPtr);
-
- /* Step pointers */
- ChLocalsPtr += HFRAGPIXELS;
- PixelsChangedPtr0 += HFRAGPIXELS;
- PixelsChangedPtr1 += HFRAGPIXELS;
- PixelsChangedPtr2 += HFRAGPIXELS;
- }
-
- /* Move on to next fragment. */
- DispFragPtr++;
- }
- }
-}
-
-static void NoiseScoreRow( PP_INSTANCE *ppi,
- unsigned char * PixelMapPtr,
- unsigned char * ChLocalsPtr,
- ogg_int16_t * YUVDiffsPtr,
- unsigned char * PixelNoiseScorePtr,
- ogg_uint32_t * FragScorePtr,
- signed char * DispFragPtr,
- ogg_int32_t * RowDiffsPtr ){
- ogg_int32_t i,j;
- unsigned char changed_locals = 0;
- ogg_int32_t Score;
- ogg_uint32_t FragScore;
- ogg_int32_t AbsDiff;
-
- /* For each pixel in the row */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Skip a group of 8 pixels if the assosciated fragment has no
- pixels of interest. */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- /* Reset the cumulative fragment score. */
- FragScore = 0;
-
- /* Pixels grouped along the row into fragments */
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- if ( PixelMapPtr[j] ){
- AbsDiff = (ogg_int32_t)( abs(YUVDiffsPtr[j]) );
- changed_locals = ChLocalsPtr[j];
-
- /* Give this pixel a score based on changed locals and level
- of its own change. */
- Score = (1 + ((ogg_int32_t)(changed_locals +
- ppi->NoiseScoreBoostTable[AbsDiff]) -
- ppi->NoiseSupLevel));
-
- /* For no zero scores adjust by a level based score multiplier. */
- if ( Score > 0 ){
- Score = ((double)Score *
- ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
- if ( Score < 1 )
- Score = 1;
- }else{
- /* Set -ve values to 0 */
- Score = 0;
-
- /* If there are no changed locals then clear the pixel
- changed flag and decrement the pixels changed in
- fragment count to speed later stages. */
- if ( changed_locals == 0 ){
- PixelMapPtr[j] = 0;
- *RowDiffsPtr -= 1;
- }
- }
-
- /* Update the pixel scores etc. */
- PixelNoiseScorePtr[j] = (unsigned char)Score;
- FragScore += (ogg_uint32_t)Score;
- }
- }
-
- /* Add fragment score (with plane correction factor) into main
- data structure */
- *FragScorePtr += (ogg_int32_t)(FragScore *
- ppi->YUVPlaneCorrectionFactor);
-
- /* If score is greater than trip threshold then mark blcok for update. */
- if ( *FragScorePtr > ppi->BlockThreshold ){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- }
-
- /* Increment the various pointers */
- FragScorePtr++;
- DispFragPtr++;
- PixelNoiseScorePtr += HFRAGPIXELS;
- PixelMapPtr += HFRAGPIXELS;
- ChLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- }
-}
-
-static void PrimaryEdgeScoreRow( PP_INSTANCE *ppi,
- unsigned char * ChangedLocalsPtr,
- ogg_int16_t * YUVDiffsPtr,
- unsigned char * PixelNoiseScorePtr,
- ogg_uint32_t * FragScorePtr,
- signed char * DispFragPtr,
- unsigned char RowType ){
- ogg_uint32_t BodyNeighbours;
- ogg_uint32_t AbsDiff;
- unsigned char changed_locals = 0;
- ogg_int32_t Score;
- ogg_uint32_t FragScore;
- unsigned char * CHLocalsPtr0;
- unsigned char * CHLocalsPtr1;
- unsigned char * CHLocalsPtr2;
- ogg_int32_t i,j;
- ogg_int32_t LastRowIndex = ppi->PlaneWidth - 1;
-
- /* Set up pointers into the current previous and next row of the
- changed locals data structure. */
- CHLocalsPtr0 = ChangedLocalsPtr - ppi->PlaneWidth;
- if ( CHLocalsPtr0 < ppi->ChLocals )
- CHLocalsPtr0 += ppi->ChLocalsCircularBufferSize;
- CHLocalsPtr0 -= 1;
-
- CHLocalsPtr1 = ChangedLocalsPtr - 1;
-
- CHLocalsPtr2 = ChangedLocalsPtr + ppi->PlaneWidth;
- if ( CHLocalsPtr2 >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
- CHLocalsPtr2 -= ppi->ChLocalsCircularBufferSize;
- CHLocalsPtr2 -= 1;
-
-
- /* The defining rule used here is as follows. */
- /* An edge pixels has 3-5 changed locals. */
- /* And one or more of these changed locals has itself got 7-8
- changed locals. */
-
- if ( RowType == NOT_EDGE_ROW ){
- /* Loop for all pixels in the row. */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Does the fragment contain anything interesting to work with. */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- /* Reset the cumulative fragment score. */
- FragScore = 0;
-
- /* Pixels grouped along the row into fragments */
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* How many changed locals has the current pixel got. */
- changed_locals = ChangedLocalsPtr[j];
-
- /* Is the pixel a suitable candidate */
- if ( (changed_locals > 2) && (changed_locals < 6) ){
- /* The pixel may qualify... have a closer look. */
- BodyNeighbours = 0;
-
- /* Count the number of "BodyNeighbours" .. Pixels that
- have 7 or more changed neighbours. */
- if ( (i > 0) || (j > 0 ) ){
- if ( CHLocalsPtr0[0] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr1[0] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr2[0] >= 7 )
- BodyNeighbours++;
- }
-
- if ( CHLocalsPtr0[1] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr2[1] >= 7 )
- BodyNeighbours++;
-
- if ( (i + j) < LastRowIndex ){
- if ( CHLocalsPtr0[2] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr1[2] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr2[2] >= 7 )
- BodyNeighbours++;
- }
-
- if ( BodyNeighbours > 0 ){
- AbsDiff = abs( YUVDiffsPtr[j] );
- Score = (ogg_int32_t)
- ( (double)(BodyNeighbours *
- BodyNeighbourScore) *
- ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
- if ( Score < 1 )
- Score = 1;
-
- /* Increment the score by a value determined by the
- number of body neighbours. */
- PixelNoiseScorePtr[j] += (unsigned char)Score;
- FragScore += (ogg_uint32_t)Score;
- }
- }
-
- /* Increment pointers into changed locals buffer */
- CHLocalsPtr0 ++;
- CHLocalsPtr1 ++;
- CHLocalsPtr2 ++;
- }
-
- /* Add fragment score (with plane correction factor) into main
- data structure */
- *FragScorePtr += (ogg_int32_t)(FragScore *
- ppi->YUVPlaneCorrectionFactor);
-
- /* If score is greater than trip threshold then mark blcok for
- update. */
- if ( *FragScorePtr > ppi->BlockThreshold ){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
-
- }else{
- /* Nothing to do for this fragment group */
- /* Advance pointers into changed locals buffer */
- CHLocalsPtr0 += HFRAGPIXELS;
- CHLocalsPtr1 += HFRAGPIXELS;
- CHLocalsPtr2 += HFRAGPIXELS;
- }
-
- /* Increment the various pointers */
- FragScorePtr++;
- DispFragPtr++;
- PixelNoiseScorePtr += HFRAGPIXELS;
- ChangedLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- }
- }else{
- /* This is either the top or bottom row of pixels in a plane. */
- /* Loop for all pixels in the row. */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Does the fragment contain anything interesting to work with. */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- /* Reset the cumulative fragment score. */
- FragScore = 0;
-
- /* Pixels grouped along the row into fragments */
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* How many changed locals has the current pixel got. */
- changed_locals = ChangedLocalsPtr[j];
-
- /* Is the pixel a suitable candidate */
- if ( (changed_locals > 2) && (changed_locals < 6) ){
- /* The pixel may qualify... have a closer look. */
- BodyNeighbours = 0;
-
- /* Count the number of "BodyNeighbours" .. Pixels
- that have 7 or more changed neighbours. */
- if ( RowType == LAST_ROW ){
- /* Test for cases where it could be the first pixel on
- the line */
- if ( (i > 0) || (j > 0) ){
- if ( CHLocalsPtr0[0] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr1[0] >= 7 )
- BodyNeighbours++;
- }
-
- if ( CHLocalsPtr0[1] >= 7 )
- BodyNeighbours++;
-
- /* Test for the end of line case */
- if ( (i + j) < LastRowIndex ){
- if ( CHLocalsPtr0[2] >= 7 )
- BodyNeighbours++;
-
- if ( CHLocalsPtr1[2] >= 7 )
- BodyNeighbours++;
- }
- }else{
- /* First Row */
- /* Test for cases where it could be the first pixel on
- the line */
- if ( (i > 0) || (j > 0) ){
- if ( CHLocalsPtr1[0] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr2[0] >= 7 )
- BodyNeighbours++;
- }
-
- /* Test for the end of line case */
- if ( CHLocalsPtr2[1] >= 7 )
- BodyNeighbours++;
-
- if ( (i + j) < LastRowIndex ){
- if ( CHLocalsPtr1[2] >= 7 )
- BodyNeighbours++;
- if ( CHLocalsPtr2[2] >= 7 )
- BodyNeighbours++;
- }
- }
-
- /* Allocate a score according to the number of Body neighbours. */
- if ( BodyNeighbours > 0 ){
- AbsDiff = abs( YUVDiffsPtr[j] );
- Score = (ogg_int32_t)
- ( (double)(BodyNeighbours * BodyNeighbourScore) *
- ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
- if ( Score < 1 )
- Score = 1;
-
- PixelNoiseScorePtr[j] += (unsigned char)Score;
- FragScore += (ogg_uint32_t)Score;
- }
- }
-
- /* Increment pointers into changed locals buffer */
- CHLocalsPtr0 ++;
- CHLocalsPtr1 ++;
- CHLocalsPtr2 ++;
- }
-
- /* Add fragment score (with plane correction factor) into main
- data structure */
- *FragScorePtr +=
- (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor);
-
- /* If score is greater than trip threshold then mark blcok for
- update. */
- if ( *FragScorePtr > ppi->BlockThreshold ){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
-
- }else{
- /* Nothing to do for this fragment group */
- /* Advance pointers into changed locals buffer */
- CHLocalsPtr0 += HFRAGPIXELS;
- CHLocalsPtr1 += HFRAGPIXELS;
- CHLocalsPtr2 += HFRAGPIXELS;
- }
-
- /* Increment the various pointers */
- FragScorePtr++;
- DispFragPtr++;
- PixelNoiseScorePtr += HFRAGPIXELS;
- ChangedLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
- }
- }
-}
-
-static void PixelLineSearch( PP_INSTANCE *ppi,
- unsigned char * ChangedLocalsPtr,
- ogg_int32_t RowNumber,
- ogg_int32_t ColNumber,
- unsigned char direction,
- ogg_uint32_t * line_length ){
- /* Exit if the pixel does not qualify or we have fallen off the edge
- of either the image plane or the row. */
- if ( (RowNumber < 0) ||
- (RowNumber >= ppi->PlaneHeight) ||
- (ColNumber < 0) ||
- (ColNumber >= ppi->PlaneWidth) ||
- ((*ChangedLocalsPtr) <= 1) ||
- ((*ChangedLocalsPtr) >= 6) ){
- /* If not then it isn't part of any line. */
- return;
- }
-
- if (*line_length < ppi->MaxLineSearchLen){
- ogg_uint32_t TmpLineLength;
- ogg_uint32_t BestLineLength;
- unsigned char * search_ptr;
-
- /* Increment the line length to include this pixel. */
- *line_length += 1;
- BestLineLength = *line_length;
-
- /* Continue search */
- /* up */
- if ( direction == UP ){
- TmpLineLength = *line_length;
-
- search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
- if ( search_ptr < ppi->ChLocals )
- search_ptr += ppi->ChLocalsCircularBufferSize;
-
- PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* up and left */
- if ( (BestLineLength < ppi->MaxLineSearchLen) &&
- ((direction == UP) || (direction == LEFT)) ){
- TmpLineLength = *line_length;
-
- search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
- if ( search_ptr < ppi->ChLocals )
- search_ptr += ppi->ChLocalsCircularBufferSize;
- search_ptr -= 1;
-
- PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber - 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* up and right */
- if ( (BestLineLength < ppi->MaxLineSearchLen) &&
- ((direction == UP) || (direction == RIGHT)) ){
- TmpLineLength = *line_length;
-
- search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
- if ( search_ptr < ppi->ChLocals )
- search_ptr += ppi->ChLocalsCircularBufferSize;
- search_ptr += 1;
-
- PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber + 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* left */
- if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == LEFT ) ){
- TmpLineLength = *line_length;
- PixelLineSearch( ppi, ChangedLocalsPtr - 1, RowNumber, ColNumber - 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* right */
- if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == RIGHT ) ){
- TmpLineLength = *line_length;
- PixelLineSearch( ppi, ChangedLocalsPtr + 1, RowNumber, ColNumber + 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* Down */
- if ( BestLineLength < ppi->MaxLineSearchLen ){
- TmpLineLength = *line_length;
- if ( direction == DOWN ){
- search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
- if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
- search_ptr -= ppi->ChLocalsCircularBufferSize;
-
- PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber, direction,
- &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
-
- /* down and left */
- if ( (BestLineLength < ppi->MaxLineSearchLen) &&
- ((direction == DOWN) || (direction == LEFT)) ){
- TmpLineLength = *line_length;
-
- search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
- if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
- search_ptr -= ppi->ChLocalsCircularBufferSize;
- search_ptr -= 1;
-
- PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber - 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
-
- /* down and right */
- if ( (BestLineLength < ppi->MaxLineSearchLen) &&
- ((direction == DOWN) || (direction == RIGHT)) ){
- TmpLineLength = *line_length;
-
- search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
- if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
- search_ptr -= ppi->ChLocalsCircularBufferSize;
- search_ptr += 1;
-
- PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber + 1,
- direction, &TmpLineLength );
-
- if ( TmpLineLength > BestLineLength )
- BestLineLength = TmpLineLength;
- }
- }
-
- /* Note the search value for this pixel. */
- *line_length = BestLineLength;
- }
-}
-
-static unsigned char LineSearchScorePixel( PP_INSTANCE *ppi,
- unsigned char * ChangedLocalsPtr,
- ogg_int32_t RowNumber,
- ogg_int32_t ColNumber ){
- ogg_uint32_t line_length = 0;
- ogg_uint32_t line_length2 = 0;
- ogg_uint32_t line_length_score = 0;
- ogg_uint32_t tmp_line_length = 0;
- ogg_uint32_t tmp_line_length2 = 0;
-
- /* Look UP and Down */
- PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
- ColNumber, UP, &tmp_line_length );
-
- if (tmp_line_length < ppi->MaxLineSearchLen) {
- /* Look DOWN */
- PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
- ColNumber, DOWN, &tmp_line_length2 );
- line_length = tmp_line_length + tmp_line_length2 - 1;
-
- if ( line_length > ppi->MaxLineSearchLen )
- line_length = ppi->MaxLineSearchLen;
- }else
- line_length = tmp_line_length;
-
- /* If no max length line found then look left and right */
- if ( line_length < ppi->MaxLineSearchLen ){
- tmp_line_length = 0;
- tmp_line_length2 = 0;
-
- PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
- ColNumber, LEFT, &tmp_line_length );
- if (tmp_line_length < ppi->MaxLineSearchLen){
- PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
- ColNumber, RIGHT, &tmp_line_length2 );
- line_length2 = tmp_line_length + tmp_line_length2 - 1;
-
- if ( line_length2 > ppi->MaxLineSearchLen )
- line_length2 = ppi->MaxLineSearchLen;
- }else
- line_length2 = tmp_line_length;
-
- }
-
- /* Take the largest line length */
- if ( line_length2 > line_length )
- line_length = line_length2;
-
- /* Create line length score */
- line_length_score = LineLengthScores[line_length];
-
- return (unsigned char)line_length_score;
-}
-
-static void LineSearchScoreRow( PP_INSTANCE *ppi,
- unsigned char * ChangedLocalsPtr,
- ogg_int16_t * YUVDiffsPtr,
- unsigned char * PixelNoiseScorePtr,
- ogg_uint32_t * FragScorePtr,
- signed char * DispFragPtr,
- ogg_int32_t RowNumber ){
- ogg_uint32_t AbsDiff;
- unsigned char changed_locals = 0;
- ogg_int32_t Score;
- ogg_uint32_t FragScore;
- ogg_int32_t i,j;
-
- /* The defining rule used here is as follows. */
- /* An edge pixels has 2-5 changed locals. */
- /* And one or more of these changed locals has itself got 7-8
- changed locals. */
-
- /* Loop for all pixels in the row. */
- for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
- /* Does the fragment contain anything interesting to work with. */
- if ( *DispFragPtr == CANDIDATE_BLOCK ){
- /* Reset the cumulative fragment score. */
- FragScore = 0;
-
- /* Pixels grouped along the row into fragments */
- for ( j = 0; j < HFRAGPIXELS; j++ ){
- /* How many changed locals has the current pixel got. */
- changed_locals = ChangedLocalsPtr[j];
-
- /* Is the pixel a suitable candidate for edge enhancement */
- if ( (changed_locals > 1) && (changed_locals < 6) &&
- (PixelNoiseScorePtr[j] < ppi->LineSearchTripTresh) ) {
- Score = (ogg_int32_t)
- LineSearchScorePixel( ppi, &ChangedLocalsPtr[j], RowNumber, i+j );
-
- if ( Score ){
- AbsDiff = abs( YUVDiffsPtr[j] );
- Score = (ogg_int32_t)
- ( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
- if ( Score < 1 )
- Score = 1;
-
- PixelNoiseScorePtr[j] += (unsigned char)Score;
- FragScore += (ogg_uint32_t)Score;
- }
- }
- }
-
- /* Add fragment score (with plane correction factor) into main
- data structure */
- *FragScorePtr +=
- (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor);
-
- /* If score is greater than trip threshold then mark blcok for update. */
- if ( *FragScorePtr > ppi->BlockThreshold ){
- *DispFragPtr = BLOCK_CODED_LOW;
- }
- }
-
- /* Increment the various pointers */
- FragScorePtr++;
- DispFragPtr++;
- PixelNoiseScorePtr += HFRAGPIXELS;
- ChangedLocalsPtr += HFRAGPIXELS;
- YUVDiffsPtr += HFRAGPIXELS;
-
- }
-}
-
-static void RowCopy( PP_INSTANCE *ppi, ogg_uint32_t BlockMapIndex ){
-
- ogg_uint32_t i,j;
-
- ogg_uint32_t PixelIndex = ppi->ScanPixelIndexTable[BlockMapIndex];
- signed char * BlockMapPtr = &ppi->ScanDisplayFragments[BlockMapIndex];
- signed char * PrevFragmentsPtr = &ppi->PrevFragments[0][BlockMapIndex];
-
- unsigned char * SourcePtr;
- unsigned char * DestPtr;
-
- /* Copy pixels from changed blocks back to reference frame. */
- for ( i = 0; i < (ogg_uint32_t)ppi->PlaneHFragments; i ++ ){
- /* If the fragement is marked for update or was recently marked
- for update (PrevFragmentsPtr[i]) */
- if ( (BlockMapPtr[i] > BLOCK_NOT_CODED) ||
- (PrevFragmentsPtr[i] == BLOCK_CODED) ){
- /* Set up the various pointers required. */
- SourcePtr = &ppi->ScanConfig.Yuv1ptr[PixelIndex];
- DestPtr = &ppi->ScanConfig.SrfWorkSpcPtr[PixelIndex];
-
- /* For each row of the block */
- for ( j = 0; j < VFRAGPIXELS; j++ ){
- /* Copy the data unaltered from source to destination */
- memcpy(DestPtr,SourcePtr,8);
-
- /* Increment pointers for next line in the block */
- SourcePtr += ppi->PlaneWidth;
- DestPtr += ppi->PlaneWidth;
- }
- }
-
- /* Increment pixel index for next block. */
- PixelIndex += HFRAGPIXELS;
- }
-}
-
-static void RowBarEnhBlockMap( PP_INSTANCE *ppi,
- signed char * UpdatedBlockMapPtr,
- signed char * BarBlockMapPtr,
- ogg_uint32_t RowNumber ){
- int i;
-
- /* Start by blanking the row in the bar block map structure. */
- memset( BarBlockMapPtr, BLOCK_NOT_CODED, ppi->PlaneHFragments );
-
- /* First row */
- if ( RowNumber == 0 ){
-
- /* For each fragment in the row. */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW. Uncoded or
- coded blocks will be ignored. */
- if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
- /* Is one of the immediate neighbours updated in the main map. */
- /* Note special cases for blocks at the start and end of rows. */
- if ( i == 0 ){
-
- if ((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1]>BLOCK_NOT_CODED ) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-
- }else if ( i == (ppi->PlaneHFragments - 1) ){
-
- if ((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1]>BLOCK_NOT_CODED) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
- }else{
- if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
- }
- }
- }
-
- } else if ( RowNumber == (ogg_uint32_t)(ppi->PlaneVFragments-1)) {
-
- /* Last row */
- /* Used to read PlaneHFragments */
-
- /* For each fragment in the row. */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
- Uncoded or coded blocks will be ignored. */
- if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
- /* Is one of the immediate neighbours updated in the main map. */
- /* Note special cases for blocks at the start and end of rows. */
- if ( i == 0 ){
- if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ))
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
- }else if ( i == (ppi->PlaneHFragments - 1) ){
- if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
- }else{
- if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
- }
- }
- }
-
- }else{
- /* All other rows */
- /* For each fragment in the row. */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW */
- /* Uncoded or coded blocks will be ignored. */
- if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
- /* Is one of the immediate neighbours updated in the main map. */
- /* Note special cases for blocks at the start and end of rows. */
- if ( i == 0 ){
-
- if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
- }else if ( i == (ppi->PlaneHFragments - 1) ){
-
- if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
- }else{
- if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
- (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ))
- BarBlockMapPtr[i] = BLOCK_CODED_BAR;
- }
- }
- }
- }
-}
-
-static void BarCopyBack( PP_INSTANCE *ppi,
- signed char * UpdatedBlockMapPtr,
- signed char * BarBlockMapPtr ){
- ogg_int32_t i;
-
- /* For each fragment in the row. */
- for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
- if ( BarBlockMapPtr[i] > BLOCK_NOT_CODED ){
- UpdatedBlockMapPtr[i] = BarBlockMapPtr[i];
- }
- }
-}
-
-static void AnalysePlane( PP_INSTANCE *ppi,
- unsigned char * PlanePtr0,
- unsigned char * PlanePtr1,
- ogg_uint32_t FragArrayOffset,
- ogg_uint32_t PWidth,
- ogg_uint32_t PHeight,
- ogg_uint32_t PStride ) {
- unsigned char * RawPlanePtr0;
- unsigned char * RawPlanePtr1;
-
- ogg_int16_t * YUVDiffsPtr;
- ogg_int16_t * YUVDiffsPtr1;
- ogg_int16_t * YUVDiffsPtr2;
-
- ogg_uint32_t FragIndex;
- ogg_uint32_t ScoreFragIndex1;
- ogg_uint32_t ScoreFragIndex2;
- ogg_uint32_t ScoreFragIndex3;
- ogg_uint32_t ScoreFragIndex4;
-
- int UpdatedOrCandidateBlocks = 0;
-
- unsigned char * ChLocalsPtr0;
- unsigned char * ChLocalsPtr1;
- unsigned char * ChLocalsPtr2;
-
- unsigned char * PixelsChangedPtr0;
- unsigned char * PixelsChangedPtr1;
-
- unsigned char * PixelScoresPtr1;
- unsigned char * PixelScoresPtr2;
-
- signed char * DispFragPtr0;
- signed char * DispFragPtr1;
- signed char * DispFragPtr2;
-
- ogg_uint32_t * FragScoresPtr1;
- ogg_uint32_t * FragScoresPtr2;
-
- ogg_int32_t * RowDiffsPtr;
- ogg_int32_t * RowDiffsPtr1;
- ogg_int32_t * RowDiffsPtr2;
-
- ogg_int32_t i,j;
-
- ogg_int32_t RowNumber1;
- ogg_int32_t RowNumber2;
- ogg_int32_t RowNumber3;
- ogg_int32_t RowNumber4;
-
- int EdgeRow;
- ogg_int32_t LineSearchRowNumber = 0;
-
- /* Variables used as temporary stores for frequently used values. */
- ogg_int32_t Row0Mod3;
- ogg_int32_t Row1Mod3;
- ogg_int32_t Row2Mod3;
- ogg_int32_t BlockRowPixels;
-
- /* Set pixel difference threshold */
- if ( FragArrayOffset == 0 ){
- /* Luminance */
- ppi->LevelThresh = (int)ppi->SgcLevelThresh;
- ppi->NegLevelThresh = -ppi->LevelThresh;
-
- ppi->SrfThresh = (int)ppi->SRFGreyThresh;
- ppi->NegSrfThresh = -ppi->SrfThresh;
-
- /* Scores correction for Y pixels. */
- ppi->YUVPlaneCorrectionFactor = 1.0;
-
- ppi->BlockThreshold = ppi->PrimaryBlockThreshold;
- ppi->BlockSgcThresh = ppi->SgcThresh;
- }else{
- /* Chrominance */
- ppi->LevelThresh = (int)ppi->SuvcLevelThresh;
- ppi->NegLevelThresh = -ppi->LevelThresh;
-
- ppi->SrfThresh = (int)ppi->SRFColThresh;
- ppi->NegSrfThresh = -ppi->SrfThresh;
-
- /* Scores correction for UV pixels. */
- ppi->YUVPlaneCorrectionFactor = 1.5;
-
- /* Block threholds different for subsampled U and V blocks */
- ppi->BlockThreshold =
- (ppi->PrimaryBlockThreshold / ppi->UVBlockThreshCorrection);
- ppi->BlockSgcThresh =
- (ppi->SgcThresh / ppi->UVSgcCorrection);
- }
-
- /* Initialise the SRF thresh table and pointer. */
- memset( ppi->SrfThreshTable, 1, 512 );
- for ( i = ppi->NegSrfThresh; i <= ppi->SrfThresh; i++ )
- ppi->SrfThreshTable[i+255] = 0;
-
- /* Initialise the PAK thresh table. */
- for ( i = -255; i <= 255; i++ )
- if ( ppi->SrfThreshTable[i+255] &&
- (i <= ppi->HighChange) &&
- (i >= ppi->NegHighChange) )
- ppi->SrfPakThreshTable[i+255] = 1;
- else
- ppi->SrfPakThreshTable[i+255] = 0;
-
- /* Initialise the SGc lookup table */
- for ( i = -255; i <= 255; i++ ){
- if ( i <= ppi->NegLevelThresh )
- ppi->SgcThreshTable[i+255] = (unsigned char) -1;
- else if ( i >= ppi->LevelThresh )
- ppi->SgcThreshTable[i+255] = 1;
- else
- ppi->SgcThreshTable[i+255] = 0;
- }
-
- /* Set up plane dimension variables */
- ppi->PlaneHFragments = PWidth / HFRAGPIXELS;
- ppi->PlaneVFragments = PHeight / VFRAGPIXELS;
- ppi->PlaneWidth = PWidth;
- ppi->PlaneHeight = PHeight;
- ppi->PlaneStride = PStride;
-
- /* Set up local pointers into the raw image data. */
- RawPlanePtr0 = PlanePtr0;
- RawPlanePtr1 = PlanePtr1;
-
- /* Note size and endo points for circular buffers. */
- ppi->YuvDiffsCircularBufferSize = YDIFF_CB_ROWS * ppi->PlaneWidth;
- ppi->ChLocalsCircularBufferSize = CHLOCALS_CB_ROWS * ppi->PlaneWidth;
- ppi->PixelMapCircularBufferSize = PMAP_CB_ROWS * ppi->PlaneWidth;
-
- /* Set high change thresh where PAK not needed */
- ppi->HighChange = ppi->SrfThresh * 4;
- ppi->NegHighChange = -ppi->HighChange;
-
- /* Set up row difference pointers. */
- RowDiffsPtr = ppi->RowChangedPixels;
- RowDiffsPtr1 = ppi->RowChangedPixels;
- RowDiffsPtr2 = ppi->RowChangedPixels;
-
- BlockRowPixels = ppi->PlaneWidth * VFRAGPIXELS;
-
- for ( i = 0; i < (ppi->PlaneVFragments + 4); i++ ){
- RowNumber1 = (i - 1);
- RowNumber2 = (i - 2);
- RowNumber3 = (i - 3);
- RowNumber4 = (i - 4);
-
- /* Pre calculate some frequently used values */
- Row0Mod3 = i % 3;
- Row1Mod3 = RowNumber1 % 3;
- Row2Mod3 = RowNumber2 % 3;
-
- /* For row diff scan last two iterations are invalid */
- if ( i < ppi->PlaneVFragments ){
- FragIndex = (i * ppi->PlaneHFragments) + FragArrayOffset;
- YUVDiffsPtr = &ppi->yuv_differences[Row0Mod3 * BlockRowPixels];
-
- PixelsChangedPtr0 = (&ppi->PixelChangedMap[Row0Mod3 * BlockRowPixels]);
- DispFragPtr0 = &ppi->ScanDisplayFragments[FragIndex];
-
- ChLocalsPtr0 = (&ppi->ChLocals[Row0Mod3 * BlockRowPixels]);
-
- }
-
- /* Set up the changed locals pointer to trail behind by one row of
- fragments. */
- if ( i > 0 ){
- /* For last iteration the ch locals and noise scans are invalid */
- if ( RowNumber1 < ppi->PlaneVFragments ){
- ScoreFragIndex1 = (RowNumber1 * ppi->PlaneHFragments) +
- FragArrayOffset;
-
- ChLocalsPtr1 = &ppi->ChLocals[Row1Mod3 * BlockRowPixels];
- PixelsChangedPtr1 =
- &ppi->PixelChangedMap[(Row1Mod3) * BlockRowPixels];
-
- PixelScoresPtr1 = &ppi->PixelScores[(RowNumber1 % 4) * BlockRowPixels];
-
- YUVDiffsPtr1 = &ppi->yuv_differences[Row1Mod3 * BlockRowPixels];
- FragScoresPtr1 = &ppi->FragScores[ScoreFragIndex1];
- DispFragPtr1 = &ppi->ScanDisplayFragments[ScoreFragIndex1];
-
- }
-
- if ( RowNumber2 >= 0 ){
- ScoreFragIndex2 = (RowNumber2 * ppi->PlaneHFragments) +
- FragArrayOffset;
- ChLocalsPtr2 = (&ppi->ChLocals[Row2Mod3 * BlockRowPixels]);
- YUVDiffsPtr2 = &ppi->yuv_differences[Row2Mod3 * BlockRowPixels];
-
- PixelScoresPtr2 = &ppi->PixelScores[(RowNumber2 % 4) * BlockRowPixels];
-
- FragScoresPtr2 = &ppi->FragScores[ScoreFragIndex2];
- DispFragPtr2 = &ppi->ScanDisplayFragments[ScoreFragIndex2];
- }else{
- ChLocalsPtr2 = NULL;
- }
- }else{
- ChLocalsPtr1 = NULL;
- ChLocalsPtr2 = NULL;
- }
-
- /* Fast break out test for obvious yes and no cases in this row of
- blocks */
- if ( i < ppi->PlaneVFragments ){
- dsp_save_fpu (ppi->dsp);
- UpdatedOrCandidateBlocks =
- RowSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
- UpdatedOrCandidateBlocks |=
- ColSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
- dsp_restore_fpu (ppi->dsp);
- }else{
- /* Make sure we still call other functions if RowSadScan() disabled */
- UpdatedOrCandidateBlocks = 1;
- }
-
- /* Consolidation and fast break ot tests at Row 1 level */
- if ( (i > 0) && (RowNumber1 < ppi->PlaneVFragments) ){
- /* Mark as coded any candidate block that lies adjacent to a
- coded block. */
- SadPass2( ppi, RowNumber1, DispFragPtr1 );
-
- /* Check results of diff scan in last set of blocks. */
- /* Eliminate NO cases and add in +SGC cases */
- ConsolidateDiffScanResults( ppi, &ppi->FragDiffPixels[ScoreFragIndex1],
- &ppi->SameGreyDirPixels[ScoreFragIndex1],
- DispFragPtr1
- );
- }
-
- for ( j = 0; j < VFRAGPIXELS; j++ ){
- /* Last two iterations do not apply */
- if ( i < ppi->PlaneVFragments ){
- /* Is the current fragment at an edge. */
- EdgeRow = ( ( (i == 0) && (j == 0) ) ||
- ( (i == (ppi->PlaneVFragments - 1)) &&
- (j == (VFRAGPIXELS - 1)) ) );
-
- /* Clear the arrays that will be used for the changed pixels maps */
- memset( PixelsChangedPtr0, 0, ppi->PlaneWidth );
-
- /* Difference scan and map each row */
- if ( UpdatedOrCandidateBlocks ){
- /* Scan the row for interesting differences */
- /* Also clear the array that will be used for changed locals map */
- RowDiffScan( ppi, RawPlanePtr0, RawPlanePtr1,
- YUVDiffsPtr, PixelsChangedPtr0,
- &ppi->SameGreyDirPixels[FragIndex],
- DispFragPtr0, &ppi->FragDiffPixels[FragIndex],
- RowDiffsPtr, ChLocalsPtr0, EdgeRow);
- }else{
- /* Clear the array that will be used for changed locals map */
- memset( ChLocalsPtr0, 0, ppi->PlaneWidth );
- }
-
- /* The actual image plane pointers must be incremented by
- stride as this may be different (more) than the plane
- width. Our own internal buffers use ppi->PlaneWidth. */
- RawPlanePtr0 += ppi->PlaneStride;
- RawPlanePtr1 += ppi->PlaneStride;
- PixelsChangedPtr0 += ppi->PlaneWidth;
- ChLocalsPtr0 += ppi->PlaneWidth;
- YUVDiffsPtr += ppi->PlaneWidth;
- RowDiffsPtr++;
- }
-
- /* Run behind calculating the changed locals data and noise scores. */
- if ( ChLocalsPtr1 != NULL ){
- /* Last few iterations do not apply */
- if ( RowNumber1 < ppi->PlaneVFragments ){
- /* Blank the next row in the pixel scores data structure. */
- memset( PixelScoresPtr1, 0, ppi->PlaneWidth );
-
- /* Don't bother doing anything if there are no changed
- pixels in this row */
- if ( *RowDiffsPtr1 ){
- /* Last valid row is a special case */
- if ( i < ppi->PlaneVFragments )
- RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1,
- DispFragPtr1,
- ( (((i-1)==0) && (j==0)) ?
- FIRST_ROW : NOT_EDGE_ROW) );
- else
- RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1,
- DispFragPtr1,
- ((j==(VFRAGPIXELS-1)) ?
- LAST_ROW : NOT_EDGE_ROW) );
-
- NoiseScoreRow( ppi, PixelsChangedPtr1, ChLocalsPtr1, YUVDiffsPtr1,
- PixelScoresPtr1, FragScoresPtr1, DispFragPtr1,
- RowDiffsPtr1 );
- }
-
- ChLocalsPtr1 += ppi->PlaneWidth;
- PixelsChangedPtr1 += ppi->PlaneWidth;
- YUVDiffsPtr1 += ppi->PlaneWidth;
- PixelScoresPtr1 += ppi->PlaneWidth;
- RowDiffsPtr1 ++;
- }
-
- /* Run edge enhancement algorithms */
- if ( RowNumber2 < ppi->PlaneVFragments ){
- if ( ChLocalsPtr2 != NULL ){
- /* Don't bother doing anything if there are no changed
- pixels in this row */
- if ( *RowDiffsPtr2 ){
- if ( RowNumber1 < ppi->PlaneVFragments ){
- PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
- PixelScoresPtr2, FragScoresPtr2,
- DispFragPtr2,
- ( (((i-2)==0) && (j==0)) ?
- FIRST_ROW : NOT_EDGE_ROW) );
- }else{
- /* Edge enhancement */
- PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
- PixelScoresPtr2, FragScoresPtr2,
- DispFragPtr2,
- ((j==(VFRAGPIXELS-1)) ?
- LAST_ROW : NOT_EDGE_ROW) );
- }
-
- /* Recursive line search */
- LineSearchScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
- PixelScoresPtr2, FragScoresPtr2,
- DispFragPtr2,
- LineSearchRowNumber );
- }
-
- ChLocalsPtr2 += ppi->PlaneWidth;
- YUVDiffsPtr2 += ppi->PlaneWidth;
- PixelScoresPtr2 += ppi->PlaneWidth;
- LineSearchRowNumber += 1;
- RowDiffsPtr2 ++;
- }
- }
- }
- }
-
- /* BAR algorithm */
- if ( (RowNumber3 >= 0) && (RowNumber3 < ppi->PlaneVFragments) ){
- ScoreFragIndex3 = (RowNumber3 * ppi->PlaneHFragments) + FragArrayOffset;
- RowBarEnhBlockMap(ppi,
- &ppi->ScanDisplayFragments[ScoreFragIndex3],
- &ppi->BarBlockMap[(RowNumber3 % 3) *
- ppi->PlaneHFragments],
- RowNumber3 );
- }
-
- /* BAR copy back and "ppi->SRF filtering" or "pixel copy back" */
- if ( (RowNumber4 >= 0) && (RowNumber4 < ppi->PlaneVFragments) ){
- /* BAR copy back stage must lag by one more row to avoid BAR blocks
- being used in BAR descisions. */
- ScoreFragIndex4 = (RowNumber4 * ppi->PlaneHFragments) + FragArrayOffset;
-
- BarCopyBack(ppi, &ppi->ScanDisplayFragments[ScoreFragIndex4],
- &ppi->BarBlockMap[(RowNumber4 % 3) * ppi->PlaneHFragments]);
-
- /* Copy over the data from any blocks marked for update into the
- output buffer. */
- RowCopy(ppi, ScoreFragIndex4);
- }
- }
-}
-
-ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi, ogg_uint32_t * KFIndicator ){
-
- /* Initialise the map arrays. */
- InitScanMapArrays(ppi);
-
- /* If the motion level in the previous frame was high then adjust
- the high and low SAD thresholds to speed things up. */
- ppi->ModifiedGrpLowSadThresh = ppi->GrpLowSadThresh;
- ppi->ModifiedGrpHighSadThresh = ppi->GrpHighSadThresh;
-
-
- /* Set up the internal plane height and width variables. */
- ppi->VideoYPlaneWidth = ppi->ScanConfig.VideoFrameWidth;
- ppi->VideoYPlaneHeight = ppi->ScanConfig.VideoFrameHeight;
- ppi->VideoUVPlaneWidth = ppi->ScanConfig.VideoFrameWidth / 2;
- ppi->VideoUVPlaneHeight = ppi->ScanConfig.VideoFrameHeight / 2;
-
- /* To start with the strides will be set from the widths */
- ppi->VideoYPlaneStride = ppi->VideoYPlaneWidth;
- ppi->VideoUPlaneStride = ppi->VideoUVPlaneWidth;
- ppi->VideoVPlaneStride = ppi->VideoUVPlaneWidth;
-
- /* Set up the plane pointers */
- ppi->YPlanePtr0 = ppi->ScanConfig.Yuv0ptr;
- ppi->YPlanePtr1 = ppi->ScanConfig.Yuv1ptr;
- ppi->UPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels);
- ppi->UPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels);
- ppi->VPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels +
- ppi->UVFramePixels);
- ppi->VPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels +
- ppi->UVFramePixels);
-
- /* Check previous frame lists and if necessary mark extra blocks for
- update. */
- SetFromPrevious(ppi);
-
- /* Ananlyse the U and V palnes. */
- AnalysePlane( ppi, ppi->UPlanePtr0, ppi->UPlanePtr1,
- ppi->ScanYPlaneFragments, ppi->VideoUVPlaneWidth,
- ppi->VideoUVPlaneHeight, ppi->VideoUPlaneStride );
- AnalysePlane( ppi, ppi->VPlanePtr0, ppi->VPlanePtr1,
- (ppi->ScanYPlaneFragments + ppi->ScanUVPlaneFragments),
- ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight,
- ppi->VideoVPlaneStride );
-
- /* Now analyse the Y plane. */
- AnalysePlane( ppi, ppi->YPlanePtr0, ppi->YPlanePtr1, 0,
- ppi->VideoYPlaneWidth, ppi->VideoYPlaneHeight,
- ppi->VideoYPlaneStride );
-
- /* Update the list of previous frame block updates. */
- UpdatePreviousBlockLists(ppi);
-
- /* Create an output block map for the calling process. */
- CreateOutputDisplayMap( ppi, ppi->ScanDisplayFragments,
- ppi->PrevFragments[0],
- ppi->ScanConfig.disp_fragments );
-
- /* Set the candidate key frame indicator (0-100) */
- *KFIndicator = ppi->KFIndicator;
-
- /* Return the normalised block count (this is actually a motion
- level weighting not a true block count). */
- return ppi->OutputBlocksUpdated;
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/toplevel_lookup.h b/Engine/lib/libtheora/lib/enc/toplevel_lookup.h
deleted file mode 100644
index bf83a15b6..000000000
--- a/Engine/lib/libtheora/lib/enc/toplevel_lookup.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: toplevel_lookup.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-const ogg_uint32_t PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1,2,3,4,5 };
-
-/* Data structures controlling addition of residue blocks */
-const ogg_uint32_t ResidueErrorThresh[Q_TABLE_SIZE] = {
- 750, 700, 650, 600, 590, 580, 570, 560,
- 550, 540, 530, 520, 510, 500, 490, 480,
- 470, 460, 450, 440, 430, 420, 410, 400,
- 390, 380, 370, 360, 350, 340, 330, 320,
- 310, 300, 290, 280, 270, 260, 250, 245,
- 240, 235, 230, 225, 220, 215, 210, 205,
- 200, 195, 190, 185, 180, 175, 170, 165,
- 160, 155, 150, 145, 140, 135, 130, 130 };
-const ogg_uint32_t ResidueBlockFactor[Q_TABLE_SIZE] = {
- 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2 };
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c
deleted file mode 100644
index 547e974e3..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3=
- 0x0003000300030003LL;
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4=
- 0x0004000400040004LL;
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- long esi;
- _pix-=_ystride*2;
- __asm__ __volatile__(
- /*mm0=0*/
- "pxor %%mm0,%%mm0\n\t"
- /*esi=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*mm7=_pix[0...8]*/
- "movq (%[pix]),%%mm7\n\t"
- /*mm4=_pix[0...8+_ystride*3]*/
- "movq (%[pix],%[s]),%%mm4\n\t"
- /*mm6=_pix[0...8]*/
- "movq %%mm7,%%mm6\n\t"
- /*Expand unsigned _pix[0...3] to 16 bits.*/
- "punpcklbw %%mm0,%%mm6\n\t"
- "movq %%mm4,%%mm5\n\t"
- /*Expand unsigned _pix[4...8] to 16 bits.*/
- "punpckhbw %%mm0,%%mm7\n\t"
- /*Expand other arrays too.*/
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
- "psubw %%mm4,%%mm6\n\t"
- "psubw %%mm5,%%mm7\n\t"
- /*mm5=mm4=_pix[0...8+_ystride]*/
- "movq (%[pix],%[ystride]),%%mm4\n\t"
- /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
- "movq (%[pix],%[ystride],2),%%mm2\n\t"
- "movq %%mm4,%%mm5\n\t"
- "movq %%mm2,%%mm3\n\t"
- "movq %%mm2,%%mm1\n\t"
- /*Expand these arrays.*/
- "punpckhbw %%mm0,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm3\n\t"
- "punpcklbw %%mm0,%%mm2\n\t"
- /*Preload...*/
- "movq %[OC_V3],%%mm0\n\t"
- /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
- "psubw %%mm5,%%mm3\n\t"
- "psubw %%mm4,%%mm2\n\t"
- /*Scale by 3.*/
- "pmullw %%mm0,%%mm3\n\t"
- "pmullw %%mm0,%%mm2\n\t"
- /*Preload...*/
- "movq %[OC_V4],%%mm0\n\t"
- /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
- 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
- "paddw %%mm7,%%mm3\n\t"
- "paddw %%mm6,%%mm2\n\t"
- /*Add 4.*/
- "paddw %%mm0,%%mm3\n\t"
- "paddw %%mm0,%%mm2\n\t"
- /*"Divide" by 8.*/
- "psraw $3,%%mm3\n\t"
- "psraw $3,%%mm2\n\t"
- /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
- /*Free up mm5.*/
- "packuswb %%mm5,%%mm4\n\t"
- /*mm0=L L L L*/
- "movq (%[ll]),%%mm0\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm2,%%mm5\n\t"
- "pxor %%mm6,%%mm6\n\t"
- "movq %%mm0,%%mm7\n\t"
- "psubw %%mm0,%%mm6\n\t"
- "psllw $1,%%mm7\n\t"
- "psllw $1,%%mm6\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm2,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm3,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm3\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm3\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm6\n\t"
- "movq %%mm2,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm2,%%mm6\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm7=-L>R_i?2L:0*/
- "pand %%mm6,%%mm7\n\t"
- "pxor %%mm6,%%mm6\n\t"
- /*mm2=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm7,%%mm2\n\t"
- "psubw %%mm0,%%mm6\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm2,%%mm5\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm3,%%mm6\n\t"
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm3\n\t"
- "psllw $1,%%mm0\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- /*mm0=-L>R_i?2L:0*/
- "pand %%mm6,%%mm0\n\t"
- /*mm3=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm0,%%mm3\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm3,%%mm5\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*Unfortunately, there's no unsigned byte+signed byte with unsigned
- saturation op code, so we have to promote things back 16 bits.*/
- "pxor %%mm0,%%mm0\n\t"
- "movq %%mm4,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- "movq %%mm1,%%mm6\n\t"
- "punpcklbw %%mm0,%%mm1\n\t"
- "punpckhbw %%mm0,%%mm6\n\t"
- /*_pix[0...8+_ystride]+=R_i*/
- "paddw %%mm2,%%mm4\n\t"
- "paddw %%mm3,%%mm5\n\t"
- /*_pix[0...8+_ystride*2]-=R_i*/
- "psubw %%mm2,%%mm1\n\t"
- "psubw %%mm3,%%mm6\n\t"
- "packuswb %%mm5,%%mm4\n\t"
- "packuswb %%mm6,%%mm1\n\t"
- /*Write it back out.*/
- "movq %%mm4,(%[pix],%[ystride])\n\t"
- "movq %%mm1,(%[pix],%[ystride],2)\n\t"
- :[s]"=&S"(esi)
- :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll),
- [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
- :"memory"
- );
-}
-
-/*This code implements the bulk of loop_filter_h().
- Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
- four p0's to one register we must transpose the values in four mmx regs.
- When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
- const ogg_int16_t *_ll){
- long esi;
- long edi;
- __asm__ __volatile__(
- /*x x x x 3 2 1 0*/
- "movd (%[pix]),%%mm0\n\t"
- /*esi=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*x x x x 7 6 5 4*/
- "movd (%[pix],%[ystride]),%%mm1\n\t"
- /*x x x x B A 9 8*/
- "movd (%[pix],%[ystride],2),%%mm2\n\t"
- /*x x x x F E D C*/
- "movd (%[pix],%[s]),%%mm3\n\t"
- /*mm0=7 3 6 2 5 1 4 0*/
- "punpcklbw %%mm1,%%mm0\n\t"
- /*mm2=F B E A D 9 C 8*/
- "punpcklbw %%mm3,%%mm2\n\t"
- /*mm1=7 3 6 2 5 1 4 0*/
- "movq %%mm0,%%mm1\n\t"
- /*mm0=F B 7 3 E A 6 2*/
- "punpckhwd %%mm2,%%mm0\n\t"
- /*mm1=D 9 5 1 C 8 4 0*/
- "punpcklwd %%mm2,%%mm1\n\t"
- "pxor %%mm7,%%mm7\n\t"
- /*mm5=D 9 5 1 C 8 4 0*/
- "movq %%mm1,%%mm5\n\t"
- /*mm1=x C x 8 x 4 x 0==pix[0]*/
- "punpcklbw %%mm7,%%mm1\n\t"
- /*mm5=x D x 9 x 5 x 1==pix[1]*/
- "punpckhbw %%mm7,%%mm5\n\t"
- /*mm3=F B 7 3 E A 6 2*/
- "movq %%mm0,%%mm3\n\t"
- /*mm0=x E x A x 6 x 2==pix[2]*/
- "punpcklbw %%mm7,%%mm0\n\t"
- /*mm3=x F x B x 7 x 3==pix[3]*/
- "punpckhbw %%mm7,%%mm3\n\t"
- /*mm1=mm1-mm3==pix[0]-pix[3]*/
- "psubw %%mm3,%%mm1\n\t"
- /*Save a copy of pix[2] for later.*/
- "movq %%mm0,%%mm4\n\t"
- /*mm0=mm0-mm5==pix[2]-pix[1]*/
- "psubw %%mm5,%%mm0\n\t"
- /*Scale by 3.*/
- "pmullw %[OC_V3],%%mm0\n\t"
- /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
- "paddw %%mm1,%%mm0\n\t"
- /*Add 4.*/
- "paddw %[OC_V4],%%mm0\n\t"
- /*"Divide" by 8, producing the residuals R_i.*/
- "psraw $3,%%mm0\n\t"
- /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
- /*mm6=L L L L*/
- "movq (%[ll]),%%mm6\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm0,%%mm1\n\t"
- "pxor %%mm2,%%mm2\n\t"
- "movq %%mm6,%%mm3\n\t"
- "psubw %%mm6,%%mm2\n\t"
- "psllw $1,%%mm3\n\t"
- "psllw $1,%%mm2\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-2L -2L -2L -2L*/
- /*mm3==2L 2L 2L 2L*/
- "pcmpgtw %%mm0,%%mm3\n\t"
- "pcmpgtw %%mm2,%%mm1\n\t"
- "pand %%mm3,%%mm0\n\t"
- "pand %%mm1,%%mm0\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm2\n\t"
- "movq %%mm0,%%mm1\n\t"
- "movq %%mm6,%%mm3\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-L -L -L -L*/
- /*mm6==L L L L*/
- /*mm2=-L>R_i?FF:00*/
- "pcmpgtw %%mm0,%%mm2\n\t"
- /*mm1=R_i>L?FF:00*/
- "pcmpgtw %%mm6,%%mm1\n\t"
- /*mm3=2L 2L 2L 2L*/
- "psllw $1,%%mm3\n\t"
- /*mm6=2L 2L 2L 2L*/
- "psllw $1,%%mm6\n\t"
- /*mm3=R_i>L?2L:0*/
- "pand %%mm1,%%mm3\n\t"
- /*mm6=-L>R_i?2L:0*/
- "pand %%mm2,%%mm6\n\t"
- /*mm0=R_i>L?R_i-2L:R_i*/
- "psubw %%mm3,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L*/
- "por %%mm2,%%mm1\n\t"
- /*mm0=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm6,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L?R_i':0*/
- "pand %%mm0,%%mm1\n\t"
- /*mm0=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*_pix[1]+=R_i;*/
- "paddw %%mm0,%%mm5\n\t"
- /*_pix[2]-=R_i;*/
- "psubw %%mm0,%%mm4\n\t"
- /*mm5=x x x x D 9 5 1*/
- "packuswb %%mm7,%%mm5\n\t"
- /*mm4=x x x x E A 6 2*/
- "packuswb %%mm7,%%mm4\n\t"
- /*mm5=E D A 9 6 5 2 1*/
- "punpcklbw %%mm4,%%mm5\n\t"
- /*edi=6 5 2 1*/
- "movd %%mm5,%%edi\n\t"
- "movw %%di,1(%[pix])\n\t"
- /*Why is there such a big stall here?*/
- "psrlq $32,%%mm5\n\t"
- "shrl $16,%%edi\n\t"
- "movw %%di,1(%[pix],%[ystride])\n\t"
- /*edi=E D A 9*/
- "movd %%mm5,%%edi\n\t"
- "movw %%di,1(%[pix],%[ystride],2)\n\t"
- "shrl $16,%%edi\n\t"
- "movw %%di,1(%[pix],%[s])\n\t"
- :[s]"=&S"(esi),[d]"=&D"(edi),
- [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
- :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
- :"memory"
- );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _pix-=2;
- loop_filter_h4(_pix,_ystride,_ll);
- loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){
- int j;
- ogg_int16_t __attribute__((aligned(8))) ll[4];
- unsigned char *cp = pbi->display_fragments;
- ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
- if ( FLimit == 0 ) return;
- ll[0]=ll[1]=ll[2]=ll[3]=FLimit;
-
- for ( j = 0; j < 3 ; j++){
- ogg_uint32_t *bp_begin = bp;
- ogg_uint32_t *bp_end;
- int stride;
- int h;
-
- switch(j) {
- case 0: /* y */
- bp_end = bp + pbi->YPlaneFragments;
- h = pbi->HFragments;
- stride = pbi->YStride;
- break;
- default: /* u,v, 4:20 specific */
- bp_end = bp + pbi->UVPlaneFragments;
- h = pbi->HFragments >> 1;
- stride = pbi->UVStride;
- break;
- }
-
- while(bpbp_left)
- loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll);
- if(bp_left>bp_begin)
- loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll);
- if(bp+1LastFrameRecon[bp[0]]+8,stride,ll);
- if(bp+hLastFrameRecon[bp[h]],stride,ll);
- }
- bp++;
- cp++;
- }
- }
- }
-
- __asm__ __volatile__("emms\n\t");
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_dct_decode_init(DspFunctions *funcs)
-{
- funcs->LoopFilter = loop_filter_mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c
deleted file mode 100644
index 3c8a46e6a..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c
+++ /dev/null
@@ -1,666 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-#define SUB_LOOP \
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
- " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */ \
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\
- " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */\
- /* convert from UINT8 to INT16 */ \
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
- " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */ \
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
- " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */ \
- /* start calculation */ \
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */ \
- " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */ \
- " movq %%mm0, (%2) \n\t" /* write answer out */ \
- " movq %%mm2, 8(%2) \n\t" /* write answer out */ \
- /* Increment pointers */ \
- " add $16, %2 \n\t" \
- " add %3, %0 \n\t" \
- " add %4, %1 \n\t"
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
- ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- SUB_LOOP
- : "+r" (FiltPtr),
- "+r" (ReconPtr),
- "+r" (DctInputPtr)
- : "m" (PixelsPerLine),
- "m" (ReconPixelsPerLine)
- : "memory"
- );
-}
-
-#define SUB_128_LOOP \
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\
- /* convert from UINT8 to INT16 */ \
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
- /* start calculation */ \
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */ \
- " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */ \
- " movq %%mm0, (%1) \n\t" /* write answer out */ \
- " movq %%mm2, 8(%1) \n\t" /* write answer out */ \
- /* Increment pointers */ \
- " add $16, %1 \n\t" \
- " add %2, %0 \n\t"
-
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
- " movq %[V128], %%mm1 \n\t"
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- SUB_128_LOOP
- : "+r" (FiltPtr),
- "+r" (DctInputPtr)
- : "m" (PixelsPerLine),
- [V128] "m" (V128)
- : "memory"
- );
-}
-
-#define SUB_AVG2_LOOP \
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \
- " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */ \
- " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */ \
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\
- " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */\
- " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */\
- /* convert from UINT8 to INT16 */ \
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \
- " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */ \
- " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */ \
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \
- " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */ \
- " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */ \
- /* average ReconPtr1 and ReconPtr2 */ \
- " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \
- " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \
- " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \
- " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
- " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
- " movq %%mm0, (%3) \n\t" /* write answer out */ \
- " movq %%mm2, 8(%3) \n\t" /* write answer out */ \
- /* Increment pointers */ \
- " add $16, %3 \n\t" \
- " add %4, %0 \n\t" \
- " add %5, %1 \n\t" \
- " add %5, %2 \n\t"
-
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- SUB_AVG2_LOOP
- : "+r" (FiltPtr),
- "+r" (ReconPtr1),
- "+r" (ReconPtr2),
- "+r" (DctInputPtr)
- : "m" (PixelsPerLine),
- "m" (ReconPixelsPerLine)
- : "memory"
- );
-}
-
-static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */
- " pxor %%mm7, %%mm7 \n\t" /* zero out mm7 for unpack */
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
-
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t" /* ; unpack low four bytes to higher precision */
- " punpckhbw %%mm7, %%mm1 \n\t" /* ; unpack high four bytes to higher precision */
-
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
- " psrlq $32, %%mm2 \n\t" /* fold and add */
- " psrlq $32, %%mm3 \n\t"
- " paddw %%mm2, %%mm0 \n\t"
- " paddw %%mm3, %%mm1 \n\t"
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
- " psrlq $16, %%mm2 \n\t"
- " psrlq $16, %%mm3 \n\t"
- " paddw %%mm2, %%mm0 \n\t"
- " paddw %%mm3, %%mm1 \n\t"
-
- " psubusw %%mm0, %%mm1 \n\t"
- " paddw %%mm0, %%mm1 \n\t" /* mm1 = max(mm1, mm0) */
- " movd %%mm1, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- :
- : "memory"
- );
- return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */
- " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */
- " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */
- " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */
- " mov $4, %%edi \n\t" /* 4 rows */
- "1: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " mov $4, %%edi \n\t" /* 4 rows */
- "2: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%edi \n\t"
- " jnz 2b \n\t"
-
- " psubusw %%mm6, %%mm7 \n\t"
- " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm7, mm6) */
- " psubusw %%mm4, %%mm5 \n\t"
- " paddw %%mm4, %%mm5 \n\t" /* mm5 = max(mm5, mm4) */
- " psubusw %%mm5, %%mm7 \n\t"
- " paddw %%mm5, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $32, %%mm6 \n\t"
- " psubusw %%mm6, %%mm7 \n\t"
- " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $16, %%mm6 \n\t"
- " psubusw %%mm6, %%mm7 \n\t"
- " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=r" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- : "r" (stride)
- : "memory", "edi"
- );
-
- return MaxSad;
-}
-
-#define SAD_LOOP \
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \
- " movq (%2), %%mm1 \n\t" \
- " movq %%mm0, %%mm2 \n\t" \
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */ \
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */ \
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ \
- " movq %%mm0, %%mm1 \n\t" \
- " punpcklbw %%mm6, %%mm0 \n\t" /* unpack to higher precision for accumulation */ \
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \
- " punpckhbw %%mm6, %%mm1 \n\t" /* unpack high four bytes to higher precision */ \
- " add %3, %1 \n\t" /* Inc pointer into the new data */ \
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ \
- " add %4, %2 \n\t" /* Inc pointer into ref data */
-
-static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
- " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- SAD_LOOP
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddw %%mm0, %%mm7 \n\t"
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $16, %%mm7 \n\t"
- " paddw %%mm0, %%mm7 \n\t"
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (DiffVal),
- "+r" (ptr1),
- "+r" (ptr2)
- : "r" (stride1),
- "r" (stride2)
- : "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres)
-{
- return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pcmpeqd %%mm5, %%mm5 \n\t" /* fefefefefefefefe in mm5 */
- " paddb %%mm5, %%mm5 \n\t"
-
- " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
- " mov $8, %%edi \n\t" /* 8 rows */
- "1: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
-
- " movq (%2), %%mm2 \n\t"
- " movq (%3), %%mm3 \n\t" /* take average of mm2 and mm3 */
- " movq %%mm2, %%mm1 \n\t"
- " pand %%mm3, %%mm1 \n\t"
- " pxor %%mm2, %%mm3 \n\t"
- " pand %%mm5, %%mm3 \n\t"
- " psrlq $1, %%mm3 \n\t"
- " paddb %%mm3, %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
-
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " punpckhbw %%mm6, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " add %4, %1 \n\t" /* Inc pointer into the new data */
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */
- " add %5, %2 \n\t" /* Inc pointer into ref data */
- " add %5, %3 \n\t" /* Inc pointer into ref data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddw %%mm0, %%mm7 \n\t"
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $16, %%mm7 \n\t"
- " paddw %%mm0, %%mm7 \n\t"
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (DiffVal),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "m" (SrcStride),
- "m" (RefStride)
- : "edi", "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%edi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq %%mm0, %%mm2 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %3, %2 \n\t" /* Inc pointer into src data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=r" (XSum),
- "=r" (XXSum),
- "+r" (DataPtr)
- : "r" (Stride)
- : "edi", "memory"
- );
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%edi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq (%3), %%mm1 \n\t"
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm6, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm6, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %4, %2 \n\t" /* Inc pointer into src data */
- " add %5, %3 \n\t" /* Inc pointer into ref data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr)
- : "m" (SrcStride),
- "m" (RefStride)
- : "edi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pcmpeqd %%mm4, %%mm4 \n\t" /* fefefefefefefefe in mm4 */
- " paddb %%mm4, %%mm4 \n\t"
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%edi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
-
- " movq (%3), %%mm2 \n\t"
- " movq (%4), %%mm3 \n\t" /* take average of mm2 and mm3 */
- " movq %%mm2, %%mm1 \n\t"
- " pand %%mm3, %%mm1 \n\t"
- " pxor %%mm2, %%mm3 \n\t"
- " pand %%mm4, %%mm3 \n\t"
- " psrlq $1, %%mm3 \n\t"
- " paddb %%mm3, %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm6, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm6, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %5, %2 \n\t" /* Inc pointer into src data */
- " add %6, %3 \n\t" /* Inc pointer into ref data */
- " add %6, %4 \n\t" /* Inc pointer into ref data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "m" (SrcStride),
- "m" (RefStride)
- : "edi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void restore_fpu (void)
-{
- __asm__ __volatile__ (
- " emms \n\t"
- );
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
- funcs->restore_fpu = restore_fpu;
- funcs->sub8x8 = sub8x8__mmx;
- funcs->sub8x8_128 = sub8x8_128__mmx;
- funcs->sub8x8avg2 = sub8x8avg2__mmx;
- funcs->row_sad8 = row_sad8__mmx;
- funcs->col_sad8x8 = col_sad8x8__mmx;
- funcs->sad8x8 = sad8x8__mmx;
- funcs->sad8x8_thres = sad8x8_thres__mmx;
- funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
- funcs->intra8x8_err = intra8x8_err__mmx;
- funcs->inter8x8_err = inter8x8_err__mmx;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c b/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c
deleted file mode 100644
index 297c3213a..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp_mmxext.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-#define SAD_MMXEXT_LOOP \
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \
- " movq (%2), %%mm1 \n\t" \
- " psadbw %%mm1, %%mm0 \n\t" \
- " add %3, %1 \n\t" /* Inc pointer into the new data */ \
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \
- " add %4, %2 \n\t" /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
-
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
- SAD_MMXEXT_LOOP
-
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
- " psadbw %%mm1, %%mm0 \n\t"
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " movd %%mm7, %0 \n\t"
-
- : "=r" (DiffVal),
- "+r" (ptr1),
- "+r" (ptr2)
- : "r" (stride1),
- "r" (stride2)
- : "memory"
- );
-
- return DiffVal;
-}
-
-#define SAD_TRES_LOOP \
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \
- " movq (%2), %%mm1 \n\t" \
- " psadbw %%mm1, %%mm0 \n\t" \
- " add %3, %1 \n\t" /* Inc pointer into the new data */ \
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \
- " add %4, %2 \n\t" /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
-
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
- SAD_TRES_LOOP
-
- " movd %%mm7, %0 \n\t"
-
- : "=r" (DiffVal),
- "+r" (ptr1),
- "+r" (ptr2)
- : "r" (stride1),
- "r" (stride2)
- : "memory"
- );
-
- return DiffVal;
-}
-
-#define SAD_XY2_TRES \
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \
- " movq (%2), %%mm1 \n\t" \
- " movq (%3), %%mm2 \n\t" \
- " pavgb %%mm2, %%mm1 \n\t" \
- " psadbw %%mm1, %%mm0 \n\t" \
- \
- " add %4, %1 \n\t" /* Inc pointer into the new data */ \
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \
- " add %5, %2 \n\t" /* Inc pointer into ref data */ \
- " add %5, %3 \n\t" /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
- SAD_XY2_TRES
-
- " movd %%mm7, %0 \n\t"
- : "=m" (DiffVal),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "m" (SrcStride),
- "m" (RefStride)
- : "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " movd (%1), %%mm0 \n\t"
- " movd (%2), %%mm1 \n\t"
- " psadbw %%mm0, %%mm1 \n\t"
- " movd 4(%1), %%mm2 \n\t"
- " movd 4(%2), %%mm3 \n\t"
- " psadbw %%mm2, %%mm3 \n\t"
-
- " pmaxsw %%mm1, %%mm3 \n\t"
- " movd %%mm3, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- :
- : "memory"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */
- " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */
- " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */
- " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */
- " mov $4, %%edi \n\t" /* 4 rows */
- "1: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " mov $4, %%edi \n\t" /* 4 rows */
- "2: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%edi \n\t"
- " jnz 2b \n\t"
-
- " pmaxsw %%mm6, %%mm7 \n\t"
- " pmaxsw %%mm4, %%mm5 \n\t"
- " pmaxsw %%mm5, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $32, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $16, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=r" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- : "r" (stride)
- : "memory", "edi"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm4, %%mm4 \n\t"
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%edi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
-
- " movq (%3), %%mm2 \n\t"
- " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */
- " pavgb %%mm2, %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm4, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm4, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %5, %2 \n\t" /* Inc pointer into src data */
- " add %6, %3 \n\t" /* Inc pointer into ref data */
- " add %6, %4 \n\t" /* Inc pointer into ref data */
-
- " dec %%edi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "m" (SrcStride),
- "m" (RefStride)
- : "edi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-void dsp_mmxext_init(DspFunctions *funcs)
-{
- funcs->row_sad8 = row_sad8__mmxext;
- funcs->col_sad8x8 = col_sad8x8__mmxext;
- funcs->sad8x8 = sad8x8__mmxext;
- funcs->sad8x8_thres = sad8x8_thres__mmxext;
- funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c
deleted file mode 100644
index 8de691f81..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: fdct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-/* mmx fdct implementation */
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
-
-/* execute stage 1 of forward DCT */
-#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp) \
- " movq " #ip0 ", %%mm0 \n\t" \
- " movq " #ip1 ", %%mm1 \n\t" \
- " movq " #ip3 ", %%mm2 \n\t" \
- " movq " #ip5 ", %%mm3 \n\t" \
- " movq %%mm0, %%mm4 \n\t" \
- " movq %%mm1, %%mm5 \n\t" \
- " movq %%mm2, %%mm6 \n\t" \
- " movq %%mm3, %%mm7 \n\t" \
- \
- " paddsw " #ip7 ", %%mm0 \n\t" /* mm0 = ip0 + ip7 = is07 */ \
- " paddsw " #ip2 ", %%mm1 \n\t" /* mm1 = ip1 + ip2 = is12 */ \
- " paddsw " #ip4 ", %%mm2 \n\t" /* mm2 = ip3 + ip4 = is34 */ \
- " paddsw " #ip6 ", %%mm3 \n\t" /* mm3 = ip5 + ip6 = is56 */ \
- " psubsw " #ip7 ", %%mm4 \n\t" /* mm4 = ip0 - ip7 = id07 */ \
- " psubsw " #ip2 ", %%mm5 \n\t" /* mm5 = ip1 - ip2 = id12 */ \
- \
- " psubsw %%mm2, %%mm0 \n\t" /* mm0 = is07 - is34 */ \
- \
- " paddsw %%mm2, %%mm2 \n\t" \
- \
- " psubsw " #ip4 ", %%mm6 \n\t" /* mm6 = ip3 - ip4 = id34 */ \
- \
- " paddsw %%mm0, %%mm2 \n\t" /* mm2 = is07 + is34 = is0734 */ \
- " psubsw %%mm3, %%mm1 \n\t" /* mm1 = is12 - is56 */ \
- " movq %%mm0," #temp " \n\t" /* Save is07 - is34 to free mm0; */ \
- " paddsw %%mm3, %%mm3 \n\t" \
- " paddsw %%mm1, %%mm3 \n\t" /* mm3 = is12 + 1s56 = is1256 */ \
- \
- " psubsw " #ip6 ", %%mm7 \n\t" /* mm7 = ip5 - ip6 = id56 */ \
- /* ------------------------------------------------------------------- */ \
- " psubsw %%mm7, %%mm5 \n\t" /* mm5 = id12 - id56 */ \
- " paddsw %%mm7, %%mm7 \n\t" \
- " paddsw %%mm5, %%mm7 \n\t" /* mm7 = id12 + id56 */ \
- /* ------------------------------------------------------------------- */ \
- " psubsw %%mm3, %%mm2 \n\t" /* mm2 = is0734 - is1256 */ \
- " paddsw %%mm3, %%mm3 \n\t" \
- \
- " movq %%mm2, %%mm0 \n\t" /* make a copy */ \
- " paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \
- \
- " pmulhw %[xC4S4], %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
- " paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \
- \
- " movq %%mm3, %%mm2 \n\t" \
- " movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \
- \
- " movq %%mm3, %%mm0 \n\t" \
- " pmulhw %[xC4S4], %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \
- " paddw %%mm2, %%mm3 \n\t" /* Truncate mm3, now it is op[0] */ \
- \
- " movq %%mm3," #ip0 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \
- " pmulhw %[xC2S6], %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
- \
- " movq " #temp ", %%mm2 \n\t" \
- " movq %%mm2, %%mm0 \n\t" \
- \
- " psrlw $15, %%mm2 \n\t" /* mm3 = xC2S6 * irot_input_y */ \
- " paddw %%mm0, %%mm3 \n\t" \
- \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- " movq %%mm5, %%mm0 \n\t" \
- \
- " movq %%mm5, %%mm2 \n\t" \
- " pmulhw %[xC6S2], %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \
- \
- " paddsw %%mm0, %%mm3 \n\t" /* ip[2] */ \
- " movq %%mm3," #ip2 " \n\t" /* Save ip2 */ \
- \
- " movq %%mm5, %%mm0 \n\t" \
- " movq %%mm5, %%mm2 \n\t" \
- \
- " pmulhw %[xC2S6], %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " movq " #temp ", %%mm3 \n\t" \
- " paddw %%mm0, %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x */ \
- \
- " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \
- " movq %%mm3, %%mm2 \n\t" \
- \
- " pmulhw %[xC6S2], %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- " psubsw %%mm5, %%mm3 \n\t" \
- \
- " movq %%mm3," #ip6 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC4S4], %%mm0 \n\t" \
- " movq %%mm1, %%mm2 \n\t" \
- " movq %%mm1, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \
- " paddw %%mm2, %%mm1 \n\t" /* Truncate mm1, now it is icommon_product1 */ \
- \
- " movq %%mm7, %%mm2 \n\t" \
- " movq %%mm7, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \
- " paddw %%mm2, %%mm7 \n\t" /* Truncate mm7, now it is icommon_product2 */ \
- /* ------------------------------------------------------------------- */ \
- " pxor %%mm0, %%mm0 \n\t" /* Clear mm0 */ \
- " psubsw %%mm6, %%mm0 \n\t" /* mm0 = - id34 */ \
- \
- " psubsw %%mm7, %%mm0 \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \
- " paddsw %%mm6, %%mm6 \n\t" \
- " paddsw %%mm0, %%mm6 \n\t" /* mm6 = id34 - icommon_product2 */ \
- \
- " psubsw %%mm1, %%mm4 \n\t" /* mm4 = id07 - icommon_product1 */ \
- " paddsw %%mm1, %%mm1 \n\t" \
- " paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC1S7], %%mm7 \n\t" \
- " movq %%mm1, %%mm2 \n\t" \
- \
- " movq %%mm1, %%mm3 \n\t" \
- " pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \
- \
- " movq %[xC7S1], %%mm7 \n\t" \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \
- " paddw %%mm2, %%mm1 \n\t" /* Trucated */ \
- \
- " pmulhw %%mm7, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x */ \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- \
- " movq %%mm0, %%mm5 \n\t" \
- " movq %%mm0, %%mm2 \n\t" \
- \
- " movq %[xC1S7], %%mm7 \n\t" \
- " pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \
- \
- " movq %[xC7S1], %%mm7 \n\t" \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \
- " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \
- \
- " pmulhw %%mm7, %%mm5 \n\t" /* mm5 = xC7S1 * irot_input_y */ \
- " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \
- \
- " psubsw %%mm5, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \
- " paddsw %%mm0, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \
- \
- " movq %%mm1," #ip1 " \n\t" \
- " movq %%mm3," #ip7 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC3S5], %%mm0 \n\t" \
- " movq %[xC5S3], %%mm1 \n\t" \
- \
- " movq %%mm6, %%mm5 \n\t" \
- " movq %%mm6, %%mm7 \n\t" \
- \
- " movq %%mm4, %%mm2 \n\t" \
- " movq %%mm4, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \
- " pmulhw %%mm1, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " psrlw $15, %%mm5 \n\t" \
- \
- " paddw %%mm3, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x */ \
- " paddw %%mm7, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y */ \
- \
- " paddw %%mm2, %%mm4 \n\t" /* Truncated */ \
- " paddw %%mm5, %%mm6 \n\t" /* Truncated */ \
- \
- " psubsw %%mm6, %%mm4 \n\t" /* ip3 */ \
- " movq %%mm4," #ip3 " \n\t" \
- \
- " movq %%mm3, %%mm4 \n\t" \
- " movq %%mm7, %%mm6 \n\t" \
- \
- " pmulhw %%mm1, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \
- " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \
- \
- " paddw %%mm2, %%mm4 \n\t" \
- " paddw %%mm5, %%mm6 \n\t" \
- \
- " paddw %%mm4, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x */ \
- " paddw %%mm6, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y */ \
- \
- " paddw %%mm7, %%mm3 \n\t" /* ip5 */ \
- " movq %%mm3," #ip5 " \n\t"
-
-#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7, \
- op0,op1,op2,op3,op4,op5,op6,op7) \
- " movq " #ip0 ", %%mm0 \n\t" /* mm0 = a0 a1 a2 a3 */ \
- " movq " #ip4 ", %%mm4 \n\t" /* mm4 = e4 e5 e6 e7 */ \
- " movq " #ip1 ", %%mm1 \n\t" /* mm1 = b0 b1 b2 b3 */ \
- " movq " #ip5 ", %%mm5 \n\t" /* mm5 = f4 f5 f6 f7 */ \
- " movq " #ip2 ", %%mm2 \n\t" /* mm2 = c0 c1 c2 c3 */ \
- " movq " #ip6 ", %%mm6 \n\t" /* mm6 = g4 g5 g6 g7 */ \
- " movq " #ip3 ", %%mm3 \n\t" /* mm3 = d0 d1 d2 d3 */ \
- " movq %%mm1," #op1 " \n\t" /* save b0 b1 b2 b3 */ \
- " movq " #ip7 ", %%mm7 \n\t" /* mm7 = h0 h1 h2 h3 */ \
- /* Transpose 2x8 block */ \
- " movq %%mm4, %%mm1 \n\t" /* mm1 = e3 e2 e1 e0 */ \
- " punpcklwd %%mm5, %%mm4 \n\t" /* mm4 = f1 e1 f0 e0 */ \
- " movq %%mm0," #op0 " \n\t" /* save a3 a2 a1 a0 */ \
- " punpckhwd %%mm5, %%mm1 \n\t" /* mm1 = f3 e3 f2 e2 */ \
- " movq %%mm6, %%mm0 \n\t" /* mm0 = g3 g2 g1 g0 */ \
- " punpcklwd %%mm7, %%mm6 \n\t" /* mm6 = h1 g1 h0 g0 */ \
- " movq %%mm4, %%mm5 \n\t" /* mm5 = f1 e1 f0 e0 */ \
- " punpckldq %%mm6, %%mm4 \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \
- " punpckhdq %%mm6, %%mm5 \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \
- " movq %%mm1, %%mm6 \n\t" /* mm6 = f3 e3 f2 e2 */ \
- " movq %%mm4," #op4 " \n\t" \
- " punpckhwd %%mm7, %%mm0 \n\t" /* mm0 = h3 g3 h2 g2 */ \
- " movq %%mm5," #op5 " \n\t" \
- " punpckhdq %%mm0, %%mm6 \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \
- " movq " #op0 ", %%mm4 \n\t" /* mm4 = a3 a2 a1 a0 */ \
- " punpckldq %%mm0, %%mm1 \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \
- " movq " #op1 ", %%mm5 \n\t" /* mm5 = b3 b2 b1 b0 */ \
- " movq %%mm4, %%mm0 \n\t" /* mm0 = a3 a2 a1 a0 */ \
- " movq %%mm6," #op7 " \n\t" \
- " punpcklwd %%mm5, %%mm0 \n\t" /* mm0 = b1 a1 b0 a0 */ \
- " movq %%mm1," #op6 " \n\t" \
- " punpckhwd %%mm5, %%mm4 \n\t" /* mm4 = b3 a3 b2 a2 */ \
- " movq %%mm2, %%mm5 \n\t" /* mm5 = c3 c2 c1 c0 */ \
- " punpcklwd %%mm3, %%mm2 \n\t" /* mm2 = d1 c1 d0 c0 */ \
- " movq %%mm0, %%mm1 \n\t" /* mm1 = b1 a1 b0 a0 */ \
- " punpckldq %%mm2, %%mm0 \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \
- " punpckhdq %%mm2, %%mm1 \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \
- " movq %%mm4, %%mm2 \n\t" /* mm2 = b3 a3 b2 a2 */ \
- " movq %%mm0," #op0 " \n\t" \
- " punpckhwd %%mm3, %%mm5 \n\t" /* mm5 = d3 c3 d2 c2 */ \
- " movq %%mm1," #op1 " \n\t" \
- " punpckhdq %%mm5, %%mm4 \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \
- " punpckldq %%mm5, %%mm2 \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \
- " movq %%mm4," #op3 " \n\t" \
- " movq %%mm2," #op2 " \n\t"
-
-
-/* This performs a 2D Forward DCT on an 8x8 block with short
- coefficients. We try to do the truncation to match the C
- version. */
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
- ogg_int16_t __attribute__((aligned(8))) temp[8*8];
-
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
- /*
- * Input data is an 8x8 block. To make processing of the data more efficent
- * we will transpose the block of data to two 4x8 blocks???
- */
- Transpose_mmx ( (%0), 16(%0), 32(%0), 48(%0), 8(%0), 24(%0), 40(%0), 56(%0),
- (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1))
- Fdct_mmx ( (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1), (%2))
-
- Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0),
- 64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1))
- Fdct_mmx (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
- Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1),
- 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1))
- Fdct_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2))
-
- Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1),
- 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1))
- Fdct_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
- " emms \n\t"
-
- : "+r" (InputData),
- "+r" (OutputData)
- : "r" (temp),
- [xC1S7] "m" (xC1S7), /* gcc 3.1+ allows named asm parameters */
- [xC2S6] "m" (xC2S6),
- [xC3S5] "m" (xC3S5),
- [xC4S4] "m" (xC4S4),
- [xC5S3] "m" (xC5S3),
- [xC6S2] "m" (xC6S2),
- [xC7S1] "m" (xC7S1)
- : "memory"
- );
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
- funcs->fdct_short = fdct_short__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c
deleted file mode 100644
index 5fc6a1f66..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c
+++ /dev/null
@@ -1,1452 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: idct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-#define ASM asm
-
-/****************************************************************************
-*
-* Description : IDCT with multiple versions based on # of non 0 coeffs
-*
-*****************************************************************************
-*/
-
-// Dequantization + inverse discrete cosine transform.
-
-// Constants used in MMX implementation of dequantization and idct.
-// All the MMX stuff works with 4 16-bit quantities at a time and
-// we create 11 constants of size 4 x 16 bits.
-// The first 4 are used to mask the individual 16-bit words within a group
-// and are used in the address-shuffling part of the dequantization.
-// The last 7 are fixed-point approximations to the cosines of angles
-// occurring in the DCT; each of these contains 4 copies of the same value.
-
-// There is only one (statically initialized) instance of this object
-// wrapped in an allocator object that forces its starting address
-// to be evenly divisible by 32. Hence the actual object occupies 2.75
-// cache lines on a Pentium processor.
-
-// Offsets in bytes used by the assembler code below
-// must of course agree with the idctConstants constructor.
-
-#define MaskOffset 0 // 4 masks come in order low word to high
-#define CosineOffset 32 // 7 cosines come in order pi/16 * (1 ... 7)
-#define EightOffset 88
-#define IdctAdjustBeforeShift 8
-
-/*
-UINT16 idctcosTbl[ 7] =
-{
- 64277, 60547, 54491, 46341, 36410, 25080, 12785
-};
-
-void fillidctconstants(void)
-{
- int j = 16;
- UINT16 * p;
- do
- {
- idctconstants[ --j] = 0;
- }
- while( j);
-
- idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535;
-
- j = 1;
- do
- {
- p = idctconstants + ( (j+3) << 2);
- p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1];
- }
- while( ++j <= 7);
-
- idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
-}
-*/
-
-ogg_uint16_t idctconstants[(4+7+1) * 4] = {
- 65535, 0, 0, 0, 0, 65535, 0, 0,
- 0, 0, 65535, 0, 0, 0, 0, 65535,
- 64277, 64277, 64277, 64277, 60547, 60547, 60547, 60547,
- 54491, 54491, 54491, 54491, 46341, 46341, 46341, 46341,
- 36410, 36410, 36410, 36410, 25080, 25080, 25080, 25080,
- 12785, 12785, 12785, 12785, 8, 8, 8, 8,
-};
-
-/* Dequantization + inverse DCT.
-
- Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
- by unsigned 16-bit quantization table entries.
- These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
- Result is scaled signed DCT coefficients (abs value < 2^15).
-
- In the data stream, the coefficients are sent in order of increasing
- total (horizontal + vertical) frequency. The exact picture is as follows:
-
- 00 01 05 06 16 17 33 34
- 02 04 07 15 20 32 35 52
- 03 10 14 21 31 36 51 53
- 11 13 22 30 37 50 54 65
-
- 12 23 27 40 47 55 64 66
- 24 26 41 46 56 63 67 74
- 25 42 45 57 62 70 73 75
- 43 44 60 61 71 72 76 77
-
- Here the position in the matrix corresponds to the (horiz,vert)
- freqency indices and the octal entry in the matrix is the position
- of the coefficient in the data stream. Thus the coefficients are sent
- in sort of a diagonal "snake".
-
- The dequantization stage "uncurls the snake" and stores the expanded
- coefficients in more convenient positions. These are not exactly the
- natural positions given above but take into account our implementation
- of the idct, which basically requires two one-dimensional idcts and
- two transposes.
-
- We fold the first transpose into the storage of the expanded coefficients.
- We don't actually do a full transpose because this would require doubling
- the size of the idct buffer; rather, we just transpose each of the 4x4
- subblocks. Using slightly varying addressing schemes in each of the
- four 4x8 idcts then allows these transforms to be done in place.
-
- Transposing the 4x4 subblocks in the matrix above gives
-
- 00 02 03 11 16 20 31 37
- 01 04 10 13 17 32 36 50
- 05 07 14 22 33 35 51 54
- 06 15 21 30 34 52 53 65
-
- 12 24 25 43 47 56 62 71
- 23 26 42 44 55 63 70 72
- 27 41 45 60 64 67 73 76
- 40 46 57 61 66 74 75 77
-
- Finally, we reverse the words in each 4 word group to clarify
- direction of shifts.
-
- 11 03 02 00 37 31 20 16
- 13 10 04 01 50 36 32 17
- 22 14 07 05 54 51 35 33
- 30 21 15 06 65 53 52 34
-
- 43 25 24 12 71 62 56 47
- 44 42 26 23 72 70 63 55
- 60 45 41 27 76 73 67 64
- 61 57 46 40 77 75 74 66
-
- This matrix then shows the 16 4x16 destination words in terms of
- the 16 4x16 input words.
-
- We implement this algorithm by manipulation of mmx registers,
- which seems to be the fastest way to proceed. It is completely
- hand-written; there does not seem to be enough recurrence to
- reasonably compartmentalize any of it. Hence the resulting
- program is ugly and bloated. Furthermore, due to the absence of
- register pressure, it is boring and artless. I hate it.
-
- The idct itself is more interesting. Since the two-dimensional dct
- basis functions are products of the one-dimesional dct basis functions,
- we can compute an inverse (or forward) dct via two 1-D transforms,
- on rows then on columns. To exploit MMX parallelism, we actually do
- both operations on columns, interposing a (partial) transpose between
- the two 1-D transforms, the first transpose being done by the expansion
- described above.
-
- The 8-sample one-dimensional DCT is a standard orthogonal expansion using
- the (unnormalized) basis functions
-
- b[k]( i) = cos( pi * k * (2i + 1) / 16);
-
- here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
- To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
- should be multiplied by 1/2.
-
- The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
- in each direction. The (unnormalized) basis functions are
-
- B[k,l]( i, j) = b[k]( i) * b[l]( j);
-
- this time k and l are the horizontal and vertical frequencies,
- i and j are the horizontal and vertical spatial coordinates;
- all indices vary from 0 ... 7 (as above)
- and there are now 4 cases of normalization.
-
- Our 1-D idct expansion uses constants C1 ... C7 given by
-
- (*) Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16)
-
- and the following 1-D algorithm transforming I0 ... I7 to R0 ... R7 :
-
- A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
- C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
- A. = C4 * (A - C) B. = C4 * (B - D)
- C. = A + C D. = B + D
-
- E = C4 * (I0 + I4) F = C4 * (I0 - I4)
- G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
- E. = E - G
- G. = E + G
-
- A.. = F + A. B.. = B. - H
- F. = F - A. H. = B. + H
-
- R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
- R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
-
- It is due to Vetterli and Lightenberg and may be found in the JPEG
- reference book by Pennebaker and Mitchell.
-
- Correctness of the algorithm follows from (*) together with the
- addition formulas for sine and cosine:
-
- cos( A + B) = cos( A) * cos( B) - sin( A) * sin( B)
- sin( A + B) = sin( A) * cos( B) + cos( A) * sin( B)
-
- Note that this implementation absorbs the difference in normalization
- between the 0th and higher frequencies, although the results produced
- are actually twice as big as they should be. Since we do this for each
- dimension, the 2-D idct results are 4x the desired results. Finally,
- taking into account that the dequantization multiplies by 4 as well,
- our actual results are 16x too big. We fix this by shifting the final
- results right by 4 bits.
-
- High precision version approximates C1 ... C7 to 16 bits.
- Since MMX only provides a signed multiply, C1 ... C5 appear to be
- negative and multiplies involving them must be adjusted to compensate
- for this. C6 and C7 do not require this adjustment since
- they are < 1/2 and are correctly treated as positive numbers.
-
- Following macro does four 8-sample one-dimensional idcts in parallel.
- This is actually not such a difficult program to write once you
- make a couple of observations (I of course was unable to make these
- observations until I'd half-written a couple of other versions).
-
- 1. Everything is easy once you are done with the multiplies.
- This is because, given X and Y in registers, one may easily
- calculate X+Y and X-Y using just those 2 registers.
-
- 2. You always need at least 2 extra registers to calculate products,
- so storing 2 temporaries is inevitable. C. and D. seem to be
- the best candidates.
-
- 3. The products should be calculated in decreasing order of complexity
- (which translates into register pressure). Since C1 ... C5 require
- adjustment (and C6, C7 do not), we begin by calculating C and D.
-*/
-
-/**************************************************************************************
- *
- * Routine: BeginIDCT
- *
- * Description: The Macro does IDct on 4 1-D Dcts
- *
- * Input: None
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: None
- *
- * Error: None
- *
- ***************************************************************************************
- */
-
-#define MtoSTR(s) #s
-
-#define Dump "call MMX_dump\n"
-
-#define BeginIDCT "#BeginIDCT\n" \
- \
- " movq " I(3)","r2"\n" \
- \
- " movq " C(3)","r6"\n" \
- " movq " r2","r4"\n" \
- " movq " J(5)","r7"\n" \
- " pmulhw " r6","r4"\n" \
- " movq " C(5)","r1"\n" \
- " pmulhw " r7","r6"\n" \
- " movq " r1","r5"\n" \
- " pmulhw " r2","r1"\n" \
- " movq " I(1)","r3"\n" \
- " pmulhw " r7","r5"\n" \
- " movq " C(1)","r0"\n" \
- " paddw " r2","r4"\n" \
- " paddw " r7","r6"\n" \
- " paddw " r1","r2"\n" \
- " movq " J(7)","r1"\n" \
- " paddw " r5","r7"\n" \
- " movq " r0","r5"\n" \
- " pmulhw " r3","r0"\n" \
- " paddsw " r7","r4"\n" \
- " pmulhw " r1","r5"\n" \
- " movq " C(7)","r7"\n" \
- " psubsw " r2","r6"\n" \
- " paddw " r3","r0"\n" \
- " pmulhw " r7","r3"\n" \
- " movq " I(2)","r2"\n" \
- " pmulhw " r1","r7"\n" \
- " paddw " r1","r5"\n" \
- " movq " r2","r1"\n" \
- " pmulhw " C(2)","r2"\n" \
- " psubsw " r5","r3"\n" \
- " movq " J(6)","r5"\n" \
- " paddsw " r7","r0"\n" \
- " movq " r5","r7"\n" \
- " psubsw " r4","r0"\n" \
- " pmulhw " C(2)","r5"\n" \
- " paddw " r1","r2"\n" \
- " pmulhw " C(6)","r1"\n" \
- " paddsw " r4","r4"\n" \
- " paddsw " r0","r4"\n" \
- " psubsw " r6","r3"\n" \
- " paddw " r7","r5"\n" \
- " paddsw " r6","r6"\n" \
- " pmulhw " C(6)","r7"\n" \
- " paddsw " r3","r6"\n" \
- " movq " r4","I(1)"\n" \
- " psubsw " r5","r1"\n" \
- " movq " C(4)","r4"\n" \
- " movq " r3","r5"\n" \
- " pmulhw " r4","r3"\n" \
- " paddsw " r2","r7"\n" \
- " movq " r6","I(2)"\n" \
- " movq " r0","r2"\n" \
- " movq " I(0)","r6"\n" \
- " pmulhw " r4","r0"\n" \
- " paddw " r3","r5"\n" \
- "\n" \
- " movq " J(4)","r3"\n" \
- " psubsw " r1","r5"\n" \
- " paddw " r0","r2"\n" \
- " psubsw " r3","r6"\n" \
- " movq " r6","r0"\n" \
- " pmulhw " r4","r6"\n" \
- " paddsw " r3","r3"\n" \
- " paddsw " r1","r1"\n" \
- " paddsw " r0","r3"\n" \
- " paddsw " r5","r1"\n" \
- " pmulhw " r3","r4"\n" \
- " paddsw " r0","r6"\n" \
- " psubsw " r2","r6"\n" \
- " paddsw " r2","r2"\n" \
- " movq " I(1)","r0"\n" \
- " paddsw " r6","r2"\n" \
- " paddw " r3","r4"\n" \
- " psubsw " r1","r2"\n" \
- "#end BeginIDCT\n"
-// end BeginIDCT macro (38 cycles).
-
-
-// Two versions of the end of the idct depending on whether we're feeding
-// into a transpose or dividing the final results by 16 and storing them.
-
-/**************************************************************************************
- *
- * Routine: RowIDCT
- *
- * Description: The Macro does 1-D IDct on 4 Rows
- *
- * Input: None
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: None
- *
- * Error: None
- *
- ***************************************************************************************
- */
-
-// RowIDCT gets ready to transpose.
-
-#define RowIDCT ASM("\n"\
- "#RowIDCT\n" \
- BeginIDCT \
- "\n" \
- " movq "I(2)","r3"\n" /* r3 = D. */ \
- " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \
- " paddsw "r1","r1"\n" /* r1 = H. + H. */ \
- " paddsw "r7","r7"\n" /* r7 = G + G */ \
- " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\
- " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \
- " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \
- " paddsw "r3","r3"\n" \
- " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\
- " paddsw "r5","r5"\n" \
- " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \
- " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\
- " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \
- " paddsw "r0","r0"\n" \
- " movq "r1","I(1)"\n" /* save R1 */ \
- " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \
- "#end RowIDCT" \
-);
-// end RowIDCT macro (8 + 38 = 46 cycles)
-
-
-/**************************************************************************************
- *
- * Routine: ColumnIDCT
- *
- * Description: The Macro does 1-D IDct on 4 columns
- *
- * Input: None
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: None
- *
- * Error: None
- *
- ***************************************************************************************
- */
-// Column IDCT normalizes and stores final results.
-
-#define ColumnIDCT ASM("\n" \
- "#ColumnIDCT\n" \
- BeginIDCT \
- "\n" \
- " paddsw "Eight","r2"\n" \
- " paddsw "r1","r1"\n" /* r1 = H. + H. */ \
- " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\
- " psraw ""$4"","r2"\n" /* r2 = NR2 */ \
- " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \
- " psraw ""$4"","r1"\n" /* r1 = NR1 */ \
- " movq "I(2)","r3"\n" /* r3 = D. */ \
- " paddsw "r7","r7"\n" /* r7 = G + G */ \
- " movq "r2","I(2)"\n" /* store NR2 at I2 */ \
- " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \
- " movq "r1","I(1)"\n" /* store NR1 at I1 */ \
- " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \
- " paddsw "Eight","r4"\n" \
- " paddsw "r3","r3"\n" /* r3 = D. + D. */ \
- " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \
- " psraw ""$4"","r4"\n" /* r4 = NR4 */ \
- " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\
- " psraw ""$4"","r3"\n" /* r3 = NR3 */ \
- " paddsw "Eight","r6"\n" \
- " paddsw "r5","r5"\n" /* r5 = B.. + B.. */ \
- " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\
- " psraw ""$4"","r6"\n" /* r6 = NR6 */ \
- " movq "r4","J(4)"\n" /* store NR4 at J4 */ \
- " psraw ""$4"","r5"\n" /* r5 = NR5 */ \
- " movq "r3","I(3)"\n" /* store NR3 at I3 */ \
- " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \
- " paddsw "Eight","r7"\n" \
- " paddsw "r0","r0"\n" /* r0 = C. + C. */ \
- " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \
- " psraw ""$4"","r7"\n" /* r7 = NR7 */ \
- " movq "r6","J(6)"\n" /* store NR6 at J6 */ \
- " psraw ""$4"","r0"\n" /* r0 = NR0 */ \
- " movq "r5","J(5)"\n" /* store NR5 at J5 */ \
- " movq "r7","J(7)"\n" /* store NR7 at J7 */ \
- " movq "r0","I(0)"\n" /* store NR0 at I0 */ \
- "#end ColumnIDCT\n" \
-);
-// end ColumnIDCT macro (38 + 19 = 57 cycles)
-
-/**************************************************************************************
- *
- * Routine: Transpose
- *
- * Description: The Macro does two 4x4 transposes in place.
- *
- * Input: None
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: None
- *
- * Error: None
- *
- ***************************************************************************************
- */
-
-/* Following macro does two 4x4 transposes in place.
-
- At entry (we assume):
-
- r0 = a3 a2 a1 a0
- I(1) = b3 b2 b1 b0
- r2 = c3 c2 c1 c0
- r3 = d3 d2 d1 d0
-
- r4 = e3 e2 e1 e0
- r5 = f3 f2 f1 f0
- r6 = g3 g2 g1 g0
- r7 = h3 h2 h1 h0
-
- At exit, we have:
-
- I(0) = d0 c0 b0 a0
- I(1) = d1 c1 b1 a1
- I(2) = d2 c2 b2 a2
- I(3) = d3 c3 b3 a3
-
- J(4) = h0 g0 f0 e0
- J(5) = h1 g1 f1 e1
- J(6) = h2 g2 f2 e2
- J(7) = h3 g3 f3 e3
-
- I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
- J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
-
- Since r1 is free at entry, we calculate the Js first. */
-
-
-#define Transpose ASM("\n#Transpose\n" \
- \
- " movq "r4","r1"\n" \
- " punpcklwd "r5","r4"\n" \
- " movq "r0","I(0)"\n" \
- " punpckhwd "r5","r1"\n" \
- " movq "r6","r0"\n" \
- " punpcklwd "r7","r6"\n" \
- " movq "r4","r5"\n" \
- " punpckldq "r6","r4"\n" \
- " punpckhdq "r6","r5"\n" \
- " movq "r1","r6"\n" \
- " movq "r4","J(4)"\n" \
- " punpckhwd "r7","r0"\n" \
- " movq "r5","J(5)"\n" \
- " punpckhdq "r0","r6"\n" \
- " movq "I(0)","r4"\n" \
- " punpckldq "r0","r1"\n" \
- " movq "I(1)","r5"\n" \
- " movq "r4","r0"\n" \
- " movq "r6","J(7)"\n" \
- " punpcklwd "r5","r0"\n" \
- " movq "r1","J(6)"\n" \
- " punpckhwd "r5","r4"\n" \
- " movq "r2","r5"\n" \
- " punpcklwd "r3","r2"\n" \
- " movq "r0","r1"\n" \
- " punpckldq "r2","r0"\n" \
- " punpckhdq "r2","r1"\n" \
- " movq "r4","r2"\n" \
- " movq "r0","I(0)"\n" \
- " punpckhwd "r3","r5"\n" \
- " movq "r1","I(1)"\n" \
- " punpckhdq "r5","r4"\n" \
- " punpckldq "r5","r2"\n" \
- \
- " movq "r4","I(3)"\n" \
- \
- " movq "r2","I(2)"\n" \
- "#end Transpose\n" \
-);
-// end Transpose macro (19 cycles).
-
-/*
-static void MMX_dump()
-{
- ASM
- ("\
- movq %mm0,(%edi)\n\
- movq %mm1,8(%edi)\n\
- movq %mm2,16(%edi)\n\
- movq %mm3,24(%edi)\n\
- movq %mm4,32(%edi)\n\
- movq %mm5,40(%edi)\n\
- movq %mm6,48(%edi)\n\
- movq %mm7,56(%edi)\n\
- ret"
- );
-}
-*/
-
-/**************************************************************************************
- *
- * Routine: MMX_idct
- *
- * Description: Perform IDCT on a 8x8 block
- *
- * Input: Pointer to input and output buffer
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: The input coefficients are in ZigZag order
- *
- * Error: None
- *
- ***************************************************************************************
- */
-void IDctSlow__mmx( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) {
-
-# define MIDM(M,I) MtoSTR(M+I*8(%ecx))
-# define M(I) MIDM( MaskOffset , I )
-# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx))
-# define C(I) MIDC( CosineOffset , I )
-# define MIDEight(M) MtoSTR(M(%ecx))
-# define Eight MIDEight(EightOffset)
-
-# define r0 "%mm0"
-# define r1 "%mm1"
-# define r2 "%mm2"
-# define r3 "%mm3"
-# define r4 "%mm4"
-# define r5 "%mm5"
-# define r6 "%mm6"
-# define r7 "%mm7"
-
- __asm__ __volatile__ (
- /* eax = quantized input */
- /* esi = quantization table */
- /* edx = destination (= idct buffer) */
- /* ecx = idctconstants */
- ""
- :
- :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
- );
-
- ASM(
- "movq (%eax), "r0"\n"
- "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */
- "movq 16(%eax), "r1"\n"
- "pmullw 16(%esi), "r1"\n" /* r1 = 13 12 11 10 */
- "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */
- "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */
- "movq 8(%eax), "r4"\n"
- "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */
- "pmullw 8(%esi), "r4"\n" /* r4 = 07 06 05 04 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */
- "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */
- "movq "r1", "r6"\n" /* r6 = 13 12 11 10 */
- "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */
- "psllq $32, "r6"\n" /* r6 = 11 10 __ __ */
- "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */
- "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */
- "pand "r6", "r7"\n" /* r7 = 11 __ __ __ */
- "por "r3", "r0"\n" /* r0 = __ 03 02 00 */
- "pxor "r7", "r6"\n" /* r6 = __ 10 __ __ */
- "por "r7", "r0"\n" /* r0 = 11 03 02 00 = R0 */
- "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */
- "movq "r4", "r3"\n" /* r3 = 07 06 05 04 */
- "movq "r0", (%edx)\n" /* write R0 = r0 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 04 */
- "movq 32(%eax), "r0"\n"
- "psllq $16, "r3"\n" /* r3 = __ __ 04 __ */
- "pmullw 32(%esi), "r0"\n" /* r0 = 23 22 21 20 */
- "pand "r1", "r7"\n" /* r7 = 13 __ __ __ */
- "por "r3", "r5"\n" /* r5 = __ __ 04 01 */
- "por "r6", "r7"\n" /* r7 = 13 10 __ __ */
- "movq 24(%eax), "r3"\n"
- "por "r5", "r7"\n" /* r7 = 13 10 04 01 = R1 */
- "pmullw 24(%esi), "r3"\n" /* r3 = 17 16 15 14 */
- "psrlq $16, "r4"\n" /* r4 = __ 07 06 05 */
- "movq "r7", 16(%edx)\n" /* write R1 = r7 */
- "movq "r4", "r5"\n" /* r5 = __ 07 06 05 */
- "movq "r0", "r7"\n" /* r7 = 23 22 21 20 */
- "psrlq $16, "r4"\n" /* r4 = __ __ 07 06 */
- "psrlq $48, "r7"\n" /* r7 = __ __ __ 23 */
- "movq "r2", "r6"\n" /* r6 = __ __ __ FF */
- "pand "r2", "r5"\n" /* r5 = __ __ __ 05 */
- "pand "r4", "r6"\n" /* r6 = __ __ __ 06 */
- "movq "r7", 80(%edx)\n" /* partial R9 = __ __ __ 23 */
- "pxor "r6", "r4"\n" /* r4 = __ __ 07 __ */
- "psrlq $32, "r1"\n" /* r1 = __ __ 13 12 */
- "por "r5", "r4"\n" /* r4 = __ __ 07 05 */
- "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */
- "pand "r2", "r1"\n" /* r1 = __ __ __ 12 */
- "movq 48(%eax), "r5"\n"
- "psllq $16, "r0"\n" /* r0 = 22 21 20 __ */
- "pmullw 48(%esi), "r5"\n" /* r5 = 33 32 31 30 */
- "pand "r0", "r7"\n" /* r7 = 22 __ __ __ */
- "movq "r1", 64(%edx)\n" /* partial R8 = __ __ __ 12 */
- "por "r4", "r7"\n" /* r7 = 22 __ 07 05 */
- "movq "r3", "r4"\n" /* r4 = 17 16 15 14 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 14 */
- "movq "M(2)", "r1"\n" /* r1 = __ FF __ __ */
- "psllq $32, "r3"\n" /* r3 = __ 14 __ __ */
- "por "r3", "r7"\n" /* r7 = 22 14 07 05 = R2 */
- "movq "r5", "r3"\n" /* r3 = 33 32 31 30 */
- "psllq $48, "r3"\n" /* r3 = 30 __ __ __ */
- "pand "r0", "r1"\n" /* r1 = __ 21 __ __ */
- "movq "r7", 32(%edx)\n" /* write R2 = r7 */
- "por "r3", "r6"\n" /* r6 = 30 __ __ 06 */
- "movq "M(1)", "r7"\n" /* r7 = __ __ FF __ */
- "por "r1", "r6"\n" /* r6 = 30 21 __ 06 */
- "movq 56(%eax), "r1"\n"
- "pand "r4", "r7"\n" /* r7 = __ __ 15 __ */
- "pmullw 56(%esi), "r1"\n" /* r1 = 37 36 35 34 */
- "por "r6", "r7"\n" /* r7 = 30 21 15 06 = R3 */
- "pand "M(1)", "r0"\n" /* r0 = __ __ 20 __ */
- "psrlq $32, "r4"\n" /* r4 = __ __ 17 16 */
- "movq "r7", 48(%edx)\n" /* write R3 = r7 */
- "movq "r4", "r6"\n" /* r6 = __ __ 17 16 */
- "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */
- "pand "r2", "r4"\n" /* r4 = __ __ __ 16 */
- "movq "M(1)", "r3"\n" /* r3 = __ __ FF __ */
- "pand "r1", "r7"\n" /* r7 = 37 __ __ __ */
- "pand "r5", "r3"\n" /* r3 = __ __ 31 __ */
- "por "r4", "r0"\n" /* r0 = __ __ 20 16 */
- "psllq $16, "r3"\n" /* r3 = __ 31 __ __ */
- "por "r0", "r7"\n" /* r7 = 37 __ 20 16 */
- "movq "M(2)", "r4"\n" /* r4 = __ FF __ __ */
- "por "r3", "r7"\n" /* r7 = 37 31 20 16 = R4 */
- "movq 80(%eax), "r0"\n"
- "movq "r4", "r3"\n" /* r3 = __ __ FF __ */
- "pmullw 80(%esi), "r0"\n" /* r0 = 53 52 51 50 */
- "pand "r5", "r4"\n" /* r4 = __ 32 __ __ */
- "movq "r7", 8(%edx)\n" /* write R4 = r7 */
- "por "r4", "r6"\n" /* r6 = __ 32 17 16 */
- "movq "r3", "r4"\n" /* r4 = __ FF __ __ */
- "psrlq $16, "r6"\n" /* r6 = __ __ 32 17 */
- "movq "r0", "r7"\n" /* r7 = 53 52 51 50 */
- "pand "r1", "r4"\n" /* r4 = __ 36 __ __ */
- "psllq $48, "r7"\n" /* r7 = 50 __ __ __ */
- "por "r4", "r6"\n" /* r6 = __ 36 32 17 */
- "movq 88(%eax), "r4"\n"
- "por "r6", "r7"\n" /* r7 = 50 36 32 17 = R5 */
- "pmullw 88(%esi), "r4"\n" /* r4 = 57 56 55 54 */
- "psrlq $16, "r3"\n" /* r3 = __ __ FF __ */
- "movq "r7", 24(%edx)\n" /* write R5 = r7 */
- "pand "r1", "r3"\n" /* r3 = __ __ 35 __ */
- "psrlq $48, "r5"\n" /* r5 = __ __ __ 33 */
- "pand "r2", "r1"\n" /* r1 = __ __ __ 34 */
- "movq 104(%eax), "r6"\n"
- "por "r3", "r5"\n" /* r5 = __ __ 35 33 */
- "pmullw 104(%esi), "r6"\n" /* r6 = 67 66 65 64 */
- "psrlq $16, "r0"\n" /* r0 = __ 53 52 51 */
- "movq "r4", "r7"\n" /* r7 = 57 56 55 54 */
- "movq "r2", "r3"\n" /* r3 = __ __ __ FF */
- "psllq $48, "r7"\n" /* r7 = 54 __ __ __ */
- "pand "r0", "r3"\n" /* r3 = __ __ __ 51 */
- "pxor "r3", "r0"\n" /* r0 = __ 53 52 __ */
- "psllq $32, "r3"\n" /* r3 = __ 51 __ __ */
- "por "r5", "r7"\n" /* r7 = 54 __ 35 33 */
- "movq "r6", "r5"\n" /* r5 = 67 66 65 64 */
- "pand "M(1)", "r6"\n" /* r6 = __ __ 65 __ */
- "por "r3", "r7"\n" /* r7 = 54 51 35 33 = R6 */
- "psllq $32, "r6"\n" /* r6 = 65 __ __ __ */
- "por "r1", "r0"\n" /* r0 = __ 53 52 34 */
- "movq "r7", 40(%edx)\n" /* write R6 = r7 */
- "por "r6", "r0"\n" /* r0 = 65 53 52 34 = R7 */
- "movq 120(%eax), "r7"\n"
- "movq "r5", "r6"\n" /* r6 = 67 66 65 64 */
- "pmullw 120(%esi), "r7"\n" /* r7 = 77 76 75 74 */
- "psrlq $32, "r5"\n" /* r5 = __ __ 67 66 */
- "pand "r2", "r6"\n" /* r6 = __ __ __ 64 */
- "movq "r5", "r1"\n" /* r1 = __ __ 67 66 */
- "movq "r0", 56(%edx)\n" /* write R7 = r0 */
- "pand "r2", "r1"\n" /* r1 = __ __ __ 66 */
- "movq 112(%eax), "r0"\n"
- "movq "r7", "r3"\n" /* r3 = 77 76 75 74 */
- "pmullw 112(%esi), "r0"\n" /* r0 = 73 72 71 70 */
- "psllq $16, "r3"\n" /* r3 = 76 75 74 __ */
- "pand "M(3)", "r7"\n" /* r7 = 77 __ __ __ */
- "pxor "r1", "r5"\n" /* r5 = __ __ 67 __ */
- "por "r5", "r6"\n" /* r6 = __ __ 67 64 */
- "movq "r3", "r5"\n" /* r5 = 76 75 74 __ */
- "pand "M(3)", "r5"\n" /* r5 = 76 __ __ __ */
- "por "r1", "r7"\n" /* r7 = 77 __ __ 66 */
- "movq 96(%eax), "r1"\n"
- "pxor "r5", "r3"\n" /* r3 = __ 75 74 __ */
- "pmullw 96(%esi), "r1"\n" /* r1 = 63 62 61 60 */
- "por "r3", "r7"\n" /* r7 = 77 75 74 66 = R15 */
- "por "r5", "r6"\n" /* r6 = 76 __ 67 64 */
- "movq "r0", "r5"\n" /* r5 = 73 72 71 70 */
- "movq "r7", 120(%edx)\n" /* store R15 = r7 */
- "psrlq $16, "r5"\n" /* r5 = __ 73 72 71 */
- "pand "M(2)", "r5"\n" /* r5 = __ 73 __ __ */
- "movq "r0", "r7"\n" /* r7 = 73 72 71 70 */
- "por "r5", "r6"\n" /* r6 = 76 73 67 64 = R14 */
- "pand "r2", "r0"\n" /* r0 = __ __ __ 70 */
- "pxor "r0", "r7"\n" /* r7 = 73 72 71 __ */
- "psllq $32, "r0"\n" /* r0 = __ 70 __ __ */
- "movq "r6", 104(%edx)\n" /* write R14 = r6 */
- "psrlq $16, "r4"\n" /* r4 = __ 57 56 55 */
- "movq 72(%eax), "r5"\n"
- "psllq $16, "r7"\n" /* r7 = 72 71 __ __ */
- "pmullw 72(%esi), "r5"\n" /* r5 = 47 46 45 44 */
- "movq "r7", "r6"\n" /* r6 = 72 71 __ __ */
- "movq "M(2)", "r3"\n" /* r3 = __ FF __ __ */
- "psllq $16, "r6"\n" /* r6 = 71 __ __ __ */
- "pand "M(3)", "r7"\n" /* r7 = 72 __ __ __ */
- "pand "r1", "r3"\n" /* r3 = __ 62 __ __ */
- "por "r0", "r7"\n" /* r7 = 72 70 __ __ */
- "movq "r1", "r0"\n" /* r0 = 63 62 61 60 */
- "pand "M(3)", "r1"\n" /* r1 = 63 __ __ __ */
- "por "r3", "r6"\n" /* r6 = 71 62 __ __ */
- "movq "r4", "r3"\n" /* r3 = __ 57 56 55 */
- "psrlq $32, "r1"\n" /* r1 = __ __ 63 __ */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 55 */
- "por "r1", "r7"\n" /* r7 = 72 70 63 __ */
- "por "r3", "r7"\n" /* r7 = 72 70 63 55 = R13 */
- "movq "r4", "r3"\n" /* r3 = __ 57 56 55 */
- "pand "M(1)", "r3"\n" /* r3 = __ __ 56 __ */
- "movq "r5", "r1"\n" /* r1 = 47 46 45 44 */
- "movq "r7", 88(%edx)\n" /* write R13 = r7 */
- "psrlq $48, "r5"\n" /* r5 = __ __ __ 47 */
- "movq 64(%eax), "r7"\n"
- "por "r3", "r6"\n" /* r6 = 71 62 56 __ */
- "pmullw 64(%esi), "r7"\n" /* r7 = 43 42 41 40 */
- "por "r5", "r6"\n" /* r6 = 71 62 56 47 = R12 */
- "pand "M(2)", "r4"\n" /* r4 = __ 57 __ __ */
- "psllq $32, "r0"\n" /* r0 = 61 60 __ __ */
- "movq "r6", 72(%edx)\n" /* write R12 = r6 */
- "movq "r0", "r6"\n" /* r6 = 61 60 __ __ */
- "pand "M(3)", "r0"\n" /* r0 = 61 __ __ __ */
- "psllq $16, "r6"\n" /* r6 = 60 __ __ __ */
- "movq 40(%eax), "r5"\n"
- "movq "r1", "r3"\n" /* r3 = 47 46 45 44 */
- "pmullw 40(%esi), "r5"\n" /* r5 = 27 26 25 24 */
- "psrlq $16, "r1"\n" /* r1 = __ 47 46 45 */
- "pand "M(1)", "r1"\n" /* r1 = __ __ 46 __ */
- "por "r4", "r0"\n" /* r0 = 61 57 __ __ */
- "pand "r7", "r2"\n" /* r2 = __ __ __ 40 */
- "por "r1", "r0"\n" /* r0 = 61 57 46 __ */
- "por "r2", "r0"\n" /* r0 = 61 57 46 40 = R11 */
- "psllq $16, "r3"\n" /* r3 = 46 45 44 __ */
- "movq "r3", "r4"\n" /* r4 = 46 45 44 __ */
- "movq "r5", "r2"\n" /* r2 = 27 26 25 24 */
- "movq "r0", 112(%edx)\n" /* write R11 = r0 */
- "psrlq $48, "r2"\n" /* r2 = __ __ __ 27 */
- "pand "M(2)", "r4"\n" /* r4 = __ 45 __ __ */
- "por "r2", "r6"\n" /* r6 = 60 __ __ 27 */
- "movq "M(1)", "r2"\n" /* r2 = __ __ FF __ */
- "por "r4", "r6"\n" /* r6 = 60 45 __ 27 */
- "pand "r7", "r2"\n" /* r2 = __ __ 41 __ */
- "psllq $32, "r3"\n" /* r3 = 44 __ __ __ */
- "por 80(%edx), "r3"\n" /* r3 = 44 __ __ 23 */
- "por "r2", "r6"\n" /* r6 = 60 45 41 27 = R10 */
- "movq "M(3)", "r2"\n" /* r2 = FF __ __ __ */
- "psllq $16, "r5"\n" /* r5 = 26 25 24 __ */
- "movq "r6", 96(%edx)\n" /* store R10 = r6 */
- "pand "r5", "r2"\n" /* r2 = 26 __ __ __ */
- "movq "M(2)", "r6"\n" /* r6 = __ FF __ __ */
- "pxor "r2", "r5"\n" /* r5 = __ 25 24 __ */
- "pand "r7", "r6"\n" /* r6 = __ 42 __ __ */
- "psrlq $32, "r2"\n" /* r2 = __ __ 26 __ */
- "pand "M(3)", "r7"\n" /* r7 = 43 __ __ __ */
- "por "r2", "r3"\n" /* r3 = 44 __ 26 23 */
- "por 64(%edx), "r7"\n" /* r7 = 43 __ __ 12 */
- "por "r3", "r6"\n" /* r6 = 44 42 26 23 = R9 */
- "por "r5", "r7"\n" /* r7 = 43 25 24 12 = R8 */
- "movq "r6", 80(%edx)\n" /* store R9 = r6 */
- "movq "r7", 64(%edx)\n" /* store R8 = r7 */
- );
- /* 123c ( / 64 coeffs < 2c / coeff) */
-# undef M
-
-/* Done w/dequant + descramble + partial transpose; now do the idct itself. */
-
-# define I( K) MtoSTR(K*16(%edx))
-# define J( K) MtoSTR(((K - 4)*16)+8(%edx))
-
- RowIDCT /* 46 c */
- Transpose /* 19 c */
-
-# undef I
-# undef J
-# define I( K) MtoSTR((K*16)+64(%edx))
-# define J( K) MtoSTR(((K-4)*16)+72(%edx))
-
- RowIDCT /* 46 c */
- Transpose /* 19 c */
-
-# undef I
-# undef J
-# define I( K) MtoSTR((K * 16)(%edx))
-# define J( K) I( K)
-
- ColumnIDCT /* 57 c */
-
-# undef I
-# undef J
-# define I( K) MtoSTR((K*16)+8(%edx))
-# define J( K) I( K)
-
- ColumnIDCT /* 57 c */
-
-# undef I
-# undef J
- /* 368 cycles ( / 64 coeff < 6 c / coeff) */
-
- ASM("emms\n");
-}
-
-/**************************************************************************************
- *
- * Routine: MMX_idct10
- *
- * Description: Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
- *
- * Input: Pointer to input and output buffer
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: The input coefficients are in transposed ZigZag order
- *
- * Error: None
- *
- ***************************************************************************************
- */
-/* --------------------------------------------------------------- */
-// This macro does four 4-sample one-dimensional idcts in parallel. Inputs
-// 4 thru 7 are assumed to be zero.
-#define BeginIDCT_10 "#BeginIDCT_10\n" \
- " movq "I(3)","r2"\n" \
- \
- " movq "C(3)","r6"\n" \
- " movq "r2","r4"\n" \
- \
- " movq "C(5)","r1"\n" \
- " pmulhw "r6","r4"\n" \
- \
- " movq "I(1)","r3"\n" \
- " pmulhw "r2","r1"\n" \
- \
- " movq "C(1)","r0"\n" \
- " paddw "r2","r4"\n" \
- \
- " pxor "r6","r6"\n" \
- " paddw "r1","r2"\n" \
- \
- " movq "I(2)","r5"\n" \
- " pmulhw "r3","r0"\n" \
- \
- " movq "r5","r1"\n" \
- " paddw "r3","r0"\n" \
- \
- " pmulhw "C(7)","r3"\n" \
- " psubsw "r2","r6"\n" \
- \
- " pmulhw "C(2)","r5"\n" \
- " psubsw "r4","r0"\n" \
- \
- " movq "I(2)","r7"\n" \
- " paddsw "r4","r4"\n" \
- \
- " paddw "r5","r7"\n" \
- " paddsw "r0","r4"\n" \
- \
- " pmulhw "C(6)","r1"\n" \
- " psubsw "r6","r3"\n" \
- \
- " movq "r4","I(1)"\n" \
- " paddsw "r6","r6"\n" \
- \
- " movq "C(4)","r4"\n" \
- " paddsw "r3","r6"\n" \
- \
- " movq "r3","r5"\n" \
- " pmulhw "r4","r3"\n" \
- \
- " movq "r6","I(2)"\n" \
- " movq "r0","r2"\n" \
- \
- " movq "I(0)","r6"\n" \
- " pmulhw "r4","r0"\n" \
- \
- " paddw "r3","r5"\n" \
- " paddw "r0","r2"\n" \
- \
- " psubsw "r1","r5"\n" \
- " pmulhw "r4","r6"\n" \
- \
- " paddw "I(0)","r6"\n" \
- " paddsw "r1","r1"\n" \
- \
- " movq "r6","r4"\n" \
- " paddsw "r5","r1"\n" \
- \
- " psubsw "r2","r6"\n" \
- " paddsw "r2","r2"\n" \
- \
- " movq "I(1)","r0"\n" \
- " paddsw "r6","r2"\n" \
- \
- " psubsw "r1","r2"\n" \
- "#end BeginIDCT_10\n"
-// end BeginIDCT_10 macro (25 cycles).
-
-#define RowIDCT_10 ASM("\n" \
- "#RowIDCT_10\n" \
- BeginIDCT_10 \
- "\n" \
- " movq "I(2)","r3"\n" /* r3 = D. */ \
- " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \
- " paddsw "r1","r1"\n" /* r1 = H. + H. */ \
- " paddsw "r7","r7"\n" /* r7 = G + G */ \
- " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\
- " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \
- " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \
- " paddsw "r3","r3"\n" \
- " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\
- " paddsw "r5","r5"\n" \
- " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \
- " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\
- " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \
- " paddsw "r0","r0"\n" \
- " movq "r1","I(1)"\n" /* save R1 */ \
- " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \
- "#end RowIDCT_10\n" \
-);
-// end RowIDCT macro (8 + 38 = 46 cycles)
-
-// Column IDCT normalizes and stores final results.
-
-#define ColumnIDCT_10 ASM("\n" \
- "#ColumnIDCT_10\n" \
- BeginIDCT_10 \
- "\n" \
- " paddsw "Eight","r2"\n" \
- " paddsw "r1","r1"\n" /* r1 = H. + H. */ \
- " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\
- " psraw ""$4"","r2"\n" /* r2 = NR2 */ \
- " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \
- " psraw ""$4"","r1"\n" /* r1 = NR1 */ \
- " movq "I(2)","r3"\n" /* r3 = D. */ \
- " paddsw "r7","r7"\n" /* r7 = G + G */ \
- " movq "r2","I(2)"\n" /* store NR2 at I2 */ \
- " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \
- " movq "r1","I(1)"\n" /* store NR1 at I1 */ \
- " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \
- " paddsw "Eight","r4"\n" \
- " paddsw "r3","r3"\n" /* r3 = D. + D. */ \
- " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \
- " psraw ""$4"","r4"\n" /* r4 = NR4 */ \
- " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\
- " psraw ""$4"","r3"\n" /* r3 = NR3 */ \
- " paddsw "Eight","r6"\n" \
- " paddsw "r5","r5"\n" /* r5 = B.. + B.. */ \
- " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\
- " psraw ""$4"","r6"\n" /* r6 = NR6 */ \
- " movq "r4","J(4)"\n" /* store NR4 at J4 */ \
- " psraw ""$4"","r5"\n" /* r5 = NR5 */ \
- " movq "r3","I(3)"\n" /* store NR3 at I3 */ \
- " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \
- " paddsw "Eight","r7"\n" \
- " paddsw "r0","r0"\n" /* r0 = C. + C. */ \
- " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \
- " psraw ""$4"","r7"\n" /* r7 = NR7 */ \
- " movq "r6","J(6)"\n" /* store NR6 at J6 */ \
- " psraw ""$4"","r0"\n" /* r0 = NR0 */ \
- " movq "r5","J(5)"\n" /* store NR5 at J5 */ \
- \
- " movq "r7","J(7)"\n" /* store NR7 at J7 */ \
- \
- " movq "r0","I(0)"\n" /* store NR0 at I0 */ \
- "#end ColumnIDCT_10\n" \
-);
-// end ColumnIDCT macro (38 + 19 = 57 cycles)
-/* --------------------------------------------------------------- */
-
-
-/* --------------------------------------------------------------- */
-/* IDCT 10 */
-void IDct10__mmx( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) {
-
-# define MIDM(M,I) MtoSTR(M+I*8(%ecx))
-# define M(I) MIDM( MaskOffset , I )
-# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx))
-# define C(I) MIDC( CosineOffset , I )
-# define MIDEight(M) MtoSTR(M(%ecx))
-# define Eight MIDEight(EightOffset)
-
-# define r0 "%mm0"
-# define r1 "%mm1"
-# define r2 "%mm2"
-# define r3 "%mm3"
-# define r4 "%mm4"
-# define r5 "%mm5"
-# define r6 "%mm6"
-# define r7 "%mm7"
-
- __asm__ __volatile__ (
- /* eax = quantized input */
- /* esi = quantization table */
- /* edx = destination (= idct buffer) */
- /* ecx = idctconstants */
- ""
- :
- :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
- );
-
- ASM(
- "movq (%eax), "r0"\n"
- "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */
- "movq 16(%eax), "r1"\n"
- "pmullw 16(%esi), "r1"\n" /* r1 = 13 12 11 10 */
- "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */
- "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */
- "movq 8(%eax), "r4"\n"
- "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */
- "pmullw 8(%esi), "r4"\n" /* r4 = 07 06 05 04 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */
- "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */
- "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */
- "psllq $32, "r1"\n" /* r1 = 11 10 __ __ */
- "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */
- "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */
- "pand "r1", "r7"\n" /* r7 = 11 __ __ __ */
- "por "r3", "r0"\n" /* r0 = __ 03 02 00 */
- "pxor "r7", "r1"\n" /* r1 = __ 10 __ __ */
- "por "r7", "r0"\n" /* r0 = 11 03 02 00 = R0 */
- "movq "r4", "r3"\n" /* r3 = 07 06 05 04 */
- "movq "r0", (%edx)\n" /* write R0 = r0 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 04 */
- "psllq $16, "r3"\n" /* r3 = __ __ 04 __ */
- "por "r3", "r5"\n" /* r5 = __ __ 04 01 */
- "por "r5", "r1"\n" /* r1 = __ 10 04 01 = R1 */
- "psrlq $16, "r4"\n" /* r4 = __ 07 06 05 */
- "movq "r1", 16(%edx)\n" /* write R1 = r1 */
- "movq "r4", "r5"\n" /* r5 = __ 07 06 05 */
- "psrlq $16, "r4"\n" /* r4 = __ __ 07 06 */
- "movq "r2", "r6"\n" /* r6 = __ __ __ FF */
- "pand "r2", "r5"\n" /* r5 = __ __ __ 05 */
- "pand "r4", "r6"\n" /* r6 = __ __ __ 06 */
- "pxor "r6", "r4"\n" /* r4 = __ __ 07 __ */
- "por "r5", "r4"\n" /* r4 = __ __ 07 05 */
- "movq "r4", 32(%edx)\n" /* write R2 = r4 */
- "movq "r6", 48(%edx)\n" /* write R3 = r6 */
- );
-# undef M
-
-/* Done w/dequant + descramble + partial transpose; now do the idct itself. */
-
-# define I( K) MtoSTR((K*16)(%edx))
-# define J( K) MtoSTR(((K - 4) * 16)+8(%edx))
-
- RowIDCT_10 /* 33 c */
- Transpose /* 19 c */
-
-# undef I
-# undef J
-//# define I( K) [edx + ( K * 16) + 64]
-//# define J( K) [edx + ( (K - 4) * 16) + 72]
-
-// RowIDCT ; 46 c
-// Transpose ; 19 c
-
-//# undef I
-//# undef J
-# define I( K) MtoSTR((K * 16)(%edx))
-# define J( K) I( K)
-
- ColumnIDCT_10 /* 44 c */
-
-# undef I
-# undef J
-# define I( K) MtoSTR((K * 16)+8(%edx))
-# define J( K) I( K)
-
- ColumnIDCT_10 /* 44 c */
-
-# undef I
-# undef J
-
- ASM("emms\n");
-}
-
-/**************************************************************************************
- *
- * Routine: MMX_idct3
- *
- * Description: Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
- *
- * Input: Pointer to input and output buffer
- *
- * Output: None
- *
- * Return: None
- *
- * Special Note: Only works for three nonzero coefficients.
- *
- * Error: None
- *
- ***************************************************************************************
- */
-/***************************************************************************************
- In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block.
- In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to
- do 1-D row idcts on the first two rows, the rest six rows remain zero anyway.
- After row IDCTs, since every column could have nonzero coefficients, we need do
- eight 1-D column IDCT. However, for each column, there are at most two nonzero
- coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the
- two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified
-
- from a full version:
-
- A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
- C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
- A. = C4 * (A - C) B. = C4 * (B - D)
- C. = A + C D. = B + D
-
- E = C4 * (I0 + I4) F = C4 * (I0 - I4)
- G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
- E. = E - G
- G. = E + G
-
- A.. = F + A. B.. = B. - H
- F. = F - A. H. = B. + H
-
- R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
- R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
-
- To:
-
-
- A = (C1 * I1) B = (C7 * I1)
- C = 0 D = 0
- A. = C4 * A B. = C4 * B
- C. = A D. = B
-
- E = C4 * I0 F = E
- G = 0 H = 0
- E. = E
- G. = E
-
- A.. = E + A. B.. = B.
- F. = E - A. H. = B.
-
- R0 = E + A R1 = E + A. + B. R3 = E + B R5 = E - A. + B.
- R7 = E - A R2 = E + A. - B. R4 = E - B R6 = F - A. - B.
-
-******************************************************************************************/
-
-#define RowIDCT_3 ASM("\n"\
- "#RowIDCT_3\n"\
- " movq "I(1)","r7"\n" /* r7 = I1 */ \
- " movq "C(1)","r0"\n" /* r0 = C1 */ \
- " movq "C(7)","r3"\n" /* r3 = C7 */ \
- " pmulhw "r7","r0"\n" /* r0 = C1 * I1 - I1 */ \
- " pmulhw "r7","r3"\n" /* r3 = C7 * I1 = B, D. */ \
- " movq "I(0)","r6"\n" /* r6 = I0 */ \
- " movq "C(4)","r4"\n" /* r4 = C4 */ \
- " paddw "r7","r0"\n" /* r0 = C1 * I1 = A, C. */ \
- " movq "r6","r1"\n" /* make a copy of I0 */ \
- " pmulhw "r4","r6"\n" /* r2 = C4 * I0 - I0 */ \
- " movq "r0","r2"\n" /* make a copy of A */ \
- " movq "r3","r5"\n" /* make a copy of B */ \
- " pmulhw "r4","r2"\n" /* r2 = C4 * A - A */ \
- " pmulhw "r4","r5"\n" /* r5 = C4 * B - B */ \
- " paddw "r1","r6"\n" /* r2 = C4 * I0 = E, F */ \
- " movq "r6","r4"\n" /* r4 = E */ \
- " paddw "r0","r2"\n" /* r2 = A. */ \
- " paddw "r3","r5"\n" /* r5 = B. */ \
- " movq "r6","r7"\n" /* r7 = E */ \
- " movq "r5","r1"\n" /* r1 = B. */ \
- /* r0 = A */ \
- /* r3 = B */ \
- /* r2 = A. */ \
- /* r5 = B. */ \
- /* r6 = E */ \
- /* r4 = E */ \
- /* r7 = E */ \
- /* r1 = B. */ \
- " psubw "r2","r6"\n" /* r6 = E - A. */ \
- " psubw "r3","r4"\n" /* r4 = E - B ----R4 */ \
- " psubw "r0","r7"\n" /* r7 = E - A ----R7 */ \
- " paddw "r2","r2"\n" /* r2 = A. + A. */ \
- " paddw "r3","r3"\n" /* r3 = B + B */ \
- " paddw "r0","r0"\n" /* r0 = A + A */ \
- " paddw "r6","r2"\n" /* r2 = E + A. */ \
- " paddw "r4","r3"\n" /* r3 = E + B ----R3 */ \
- " psubw "r1","r2"\n" /* r2 = E + A. - B. ----R2 */ \
- " psubw "r5","r6"\n" /* r6 = E - A. - B. ----R6 */ \
- " paddw "r1","r1"\n" /* r1 = B. + B. */ \
- " paddw "r5","r5"\n" /* r5 = B. + B. */ \
- " paddw "r7","r0"\n" /* r0 = E + A ----R0 */ \
- " paddw "r2","r1"\n" /* r1 = E + A. + B. -----R1 */ \
- " movq "r1","I(1)"\n" /* save r1 */ \
- " paddw "r6","r5"\n" /* r5 = E - A. + B. -----R5 */ \
- "#end RowIDCT_3\n"\
-);
-//End of RowIDCT_3
-
-#define ColumnIDCT_3 ASM("\n"\
- "#ColumnIDCT_3\n"\
- " movq "I(1)","r7"\n" /* r7 = I1 */ \
- " movq "C(1)","r0"\n" /* r0 = C1 */ \
- " movq "C(7)","r3"\n" /* r3 = C7 */ \
- " pmulhw "r7","r0"\n" /* r0 = C1 * I1 - I1 */ \
- " pmulhw "r7","r3"\n" /* r3 = C7 * I1 = B, D. */ \
- " movq "I(0)","r6"\n" /* r6 = I0 */ \
- " movq "C(4)","r4"\n" /* r4 = C4 */ \
- " paddw "r7","r0"\n" /* r0 = C1 * I1 = A, C. */ \
- " movq "r6","r1"\n" /* make a copy of I0 */ \
- " pmulhw "r4","r6"\n" /* r2 = C4 * I0 - I0 */ \
- " movq "r0","r2"\n" /* make a copy of A */ \
- " movq "r3","r5"\n" /* make a copy of B */ \
- " pmulhw "r4","r2"\n" /* r2 = C4 * A - A */ \
- " pmulhw "r4","r5"\n" /* r5 = C4 * B - B */ \
- " paddw "r1","r6"\n" /* r2 = C4 * I0 = E, F */ \
- " movq "r6","r4"\n" /* r4 = E */ \
- " paddw "Eight","r6"\n" /* +8 for shift */ \
- " paddw "Eight","r4"\n" /* +8 for shift */ \
- " paddw "r0","r2"\n" /* r2 = A. */ \
- " paddw "r3","r5"\n" /* r5 = B. */ \
- " movq "r6","r7"\n" /* r7 = E */ \
- " movq "r5","r1"\n" /* r1 = B. */ \
-/* r0 = A */ \
-/* r3 = B */ \
-/* r2 = A. */ \
-/* r5 = B. */ \
-/* r6 = E */ \
-/* r4 = E */ \
-/* r7 = E */ \
-/* r1 = B. */ \
- " psubw "r2","r6"\n" /* r6 = E - A. */ \
- " psubw "r3","r4"\n" /* r4 = E - B ----R4 */ \
- " psubw "r0","r7"\n" /* r7 = E - A ----R7 */ \
- " paddw "r2","r2"\n" /* r2 = A. + A. */ \
- " paddw "r3","r3"\n" /* r3 = B + B */ \
- " paddw "r0","r0"\n" /* r0 = A + A */ \
- " paddw "r6","r2"\n" /* r2 = E + A. */ \
- " paddw "r4","r3"\n" /* r3 = E + B ----R3 */ \
- " psraw $4,"r4"\n" /* shift */ \
- " movq "r4","J(4)"\n" /* store R4 at J4 */ \
- " psraw $4,"r3"\n" /* shift */ \
- " movq "r3","I(3)"\n" /* store R3 at I3 */ \
- " psubw "r1","r2"\n" /* r2 = E + A. - B. ----R2 */ \
- " psubw "r5","r6"\n" /* r6 = E - A. - B. ----R6 */ \
- " paddw "r1","r1"\n" /* r1 = B. + B. */ \
- " paddw "r5","r5"\n" /* r5 = B. + B. */ \
- " paddw "r7","r0"\n" /* r0 = E + A ----R0 */ \
- " paddw "r2","r1"\n" /* r1 = E + A. + B. -----R1 */ \
- " psraw $4,"r7"\n" /* shift */ \
- " psraw $4,"r2"\n" /* shift */ \
- " psraw $4,"r0"\n" /* shift */ \
- " psraw $4,"r1"\n" /* shift */ \
- " movq "r7","J(7)"\n" /* store R7 to J7 */ \
- " movq "r0","I(0)"\n" /* store R0 to I0 */ \
- " movq "r1","I(1)"\n" /* store R1 to I1 */ \
- " movq "r2","I(2)"\n" /* store R2 to I2 */ \
- " movq "r1","I(1)"\n" /* save r1 */ \
- " paddw "r6","r5"\n" /* r5 = E - A. + B. -----R5 */ \
- " psraw $4,"r5"\n" /* shift */ \
- " movq "r5","J(5)"\n" /* store R5 at J5 */ \
- " psraw $4,"r6"\n" /* shift */ \
- " movq "r6","J(6)"\n" /* store R6 at J6 */ \
- "#end ColumnIDCT_3\n"\
-);
-//End of ColumnIDCT_3
-
-void IDct3__mmx( Q_LIST_ENTRY * InputData,
- ogg_int16_t *QuantMatrix,
- ogg_int16_t * OutputData ) {
-
-# define MIDM(M,I) MtoSTR(M+I*8(%ecx))
-# define M(I) MIDM( MaskOffset , I )
-# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx))
-# define C(I) MIDC( CosineOffset , I )
-# define MIDEight(M) MtoSTR(M(%ecx))
-# define Eight MIDEight(EightOffset)
-
-# define r0 "%mm0"
-# define r1 "%mm1"
-# define r2 "%mm2"
-# define r3 "%mm3"
-# define r4 "%mm4"
-# define r5 "%mm5"
-# define r6 "%mm6"
-# define r7 "%mm7"
-
- __asm__ __volatile__ (
- /* eax = quantized input */
- /* esi = quantization table */
- /* edx = destination (= idct buffer) */
- /* ecx = idctconstants */
- ""
- :
- :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
- );
-
- ASM(
- "movq (%eax), "r0"\n"
- "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */
- "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */
- "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */
- "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */
- "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */
- "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */
- "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */
- "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */
- "por "r3", "r0"\n" /* r0 = __ 03 02 00 */
- "movq "r0", (%edx)\n" /* write R0 = r0 */
- "movq "r5", 16(%edx)\n" /* write R1 = r5 */
- );
-# undef M
-
-/* Done partial transpose; now do the idct itself. */
-
-# define I( K) MtoSTR(K*16(%edx))
-# define J( K) MtoSTR(((K - 4)*16)+8(%edx))
-
- RowIDCT_3 /* 33 c */
- Transpose /* 19 c */
-
-# undef I
-# undef J
-//# define I( K) [edx + ( K * 16) + 64]
-//# define J( K) [edx + ( (K - 4) * 16) + 72]
-
-// RowIDCT ; 46 c
-// Transpose ; 19 c
-
-//# undef I
-//# undef J
-# define I( K) MtoSTR((K * 16)(%edx))
-# define J( K) I( K)
-
- ColumnIDCT_3 /* 44 c */
-
-# undef I
-# undef J
-# define I( K) MtoSTR((K*16)+8(%edx))
-# define J( K) I( K)
-
- ColumnIDCT_3 /* 44 c */
-
-# undef I
-# undef J
-
- ASM("emms\n");
-}
-
-
-/* install our implementation in the function table */
-void dsp_mmx_idct_init(DspFunctions *funcs)
-{
- funcs->IDctSlow = IDctSlow__mmx;
- funcs->IDct10 = IDct10__mmx;
- funcs->IDct3 = IDct3__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c
deleted file mode 100644
index 7a931afe4..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: recon_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-
-static void copy8x8__mmx (unsigned char *src,
- unsigned char *dest,
- unsigned int stride)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " lea (%2, %2, 2), %%edi \n\t"
-
- " movq (%1), %%mm0 \n\t"
- " movq (%1, %2), %%mm1 \n\t"
- " movq (%1, %2, 2), %%mm2 \n\t"
- " movq (%1, %%edi), %%mm3 \n\t"
-
- " lea (%1, %2, 4), %1 \n\t"
-
- " movq %%mm0, (%0) \n\t"
- " movq %%mm1, (%0, %2) \n\t"
- " movq %%mm2, (%0, %2, 2) \n\t"
- " movq %%mm3, (%0, %%edi) \n\t"
-
- " lea (%0, %2, 4), %0 \n\t"
-
- " movq (%1), %%mm0 \n\t"
- " movq (%1, %2), %%mm1 \n\t"
- " movq (%1, %2, 2), %%mm2 \n\t"
- " movq (%1, %%edi), %%mm3 \n\t"
-
- " movq %%mm0, (%0) \n\t"
- " movq %%mm1, (%0, %2) \n\t"
- " movq %%mm2, (%0, %2, 2) \n\t"
- " movq %%mm3, (%0, %%edi) \n\t"
- : "+a" (dest)
- : "c" (src),
- "d" (stride)
- : "memory", "edi"
- );
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */
-
- " lea 128(%1), %%edi \n\t" /* Endpoint in input buffer */
- "1: \n\t"
- " movq (%1), %%mm2 \n\t" /* First four input values */
-
- " packsswb 8(%1), %%mm2 \n\t" /* pack with next(high) four values */
- " por %%mm0, %%mm0 \n\t"
- " pxor %%mm0, %%mm2 \n\t" /* Convert result to unsigned (same as add 128) */
- " lea 16(%1), %1 \n\t" /* Step source buffer */
- " cmp %%edi, %1 \n\t" /* are we done */
-
- " movq %%mm2, (%0) \n\t" /* store results */
-
- " lea (%0, %2), %0 \n\t" /* Step output buffer */
- " jc 1b \n\t" /* Loop back if we are not done */
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" (LineStep),
- [V128] "m" (V128)
- : "memory", "edi"
- );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm0, %%mm0 \n\t"
- " lea 128(%1), %%edi \n\t"
-
- "1: \n\t"
- " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */
-
- " movq (%1), %%mm4 \n\t" /* first 4 changes */
- " movq %%mm2, %%mm3 \n\t"
- " movq 8(%1), %%mm5 \n\t" /* last 4 changes */
- " punpcklbw %%mm0, %%mm2 \n\t" /* turn first 4 refs into positive 16-bit #s */
- " paddsw %%mm4, %%mm2 \n\t" /* add in first 4 changes */
- " punpckhbw %%mm0, %%mm3 \n\t" /* turn last 4 refs into positive 16-bit #s */
- " paddsw %%mm5, %%mm3 \n\t" /* add in last 4 changes */
- " add %3, %2 \n\t" /* next row of reference pixels */
- " packuswb %%mm3, %%mm2 \n\t" /* pack result to unsigned 8-bit values */
- " lea 16(%1), %1 \n\t" /* next row of changes */
- " cmp %%edi, %1 \n\t" /* are we done? */
-
- " movq %%mm2, (%0) \n\t" /* store result */
-
- " lea (%0, %3), %0 \n\t" /* next row of output */
- " jc 1b \n\t"
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" (RefPtr),
- "r" (LineStep)
- : "memory", "edi"
- );
-}
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
- unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .p2align 4 \n\t"
-
- " pxor %%mm0, %%mm0 \n\t"
- " lea 128(%1), %%edi \n\t"
-
- "1: \n\t"
- " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */
- " movq (%3), %%mm4 \n\t" /* (+3 misaligned) 8 reference pixels */
-
- " movq %%mm2, %%mm3 \n\t"
- " punpcklbw %%mm0, %%mm2 \n\t" /* mm2 = start ref1 as positive 16-bit #s */
- " movq %%mm4, %%mm5 \n\t"
- " movq (%1), %%mm6 \n\t" /* first 4 changes */
- " punpckhbw %%mm0, %%mm3 \n\t" /* mm3 = end ref1 as positive 16-bit #s */
- " movq 8(%1), %%mm7 \n\t" /* last 4 changes */
- " punpcklbw %%mm0, %%mm4 \n\t" /* mm4 = start ref2 as positive 16-bit #s */
- " punpckhbw %%mm0, %%mm5 \n\t" /* mm5 = end ref2 as positive 16-bit #s */
- " paddw %%mm4, %%mm2 \n\t" /* mm2 = start (ref1 + ref2) */
- " paddw %%mm5, %%mm3 \n\t" /* mm3 = end (ref1 + ref2) */
- " psrlw $1, %%mm2 \n\t" /* mm2 = start (ref1 + ref2)/2 */
- " psrlw $1, %%mm3 \n\t" /* mm3 = end (ref1 + ref2)/2 */
- " paddw %%mm6, %%mm2 \n\t" /* add changes to start */
- " paddw %%mm7, %%mm3 \n\t" /* add changes to end */
- " lea 16(%1), %1 \n\t" /* next row of changes */
- " packuswb %%mm3, %%mm2 \n\t" /* pack start|end to unsigned 8-bit */
- " add %4, %2 \n\t" /* next row of reference pixels */
- " add %4, %3 \n\t" /* next row of reference pixels */
- " movq %%mm2, (%0) \n\t" /* store result */
- " add %4, %0 \n\t" /* next row of output */
- " cmp %%edi, %1 \n\t" /* are we done? */
- " jc 1b \n\t"
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" (RefPtr1),
- "r" (RefPtr2),
- "m" (LineStep)
- : "memory", "edi"
- );
-}
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
- funcs->copy8x8 = copy8x8__mmx;
- funcs->recon_intra8x8 = recon_intra8x8__mmx;
- funcs->recon_inter8x8 = recon_inter8x8__mmx;
- funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c
deleted file mode 100644
index cecc0eb76..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c
+++ /dev/null
@@ -1,1605 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if 0
-//These are to let me selectively enable the C versions, these are needed
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-#endif
-
-
-static const ogg_int64_t V128 = 0x0080008000800080;
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
- ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
-
- //Make non-zero to use the C-version
-#if 0
- int i;
-
- /* For each block row */
- for (i=8; i; i--) {
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- ReconPtr += ReconPixelsPerLine;
- DctInputPtr += 8;
- }
-#else
- __asm {
- align 16
-
- pxor mm7, mm7
-
- mov eax, FiltPtr
- mov ebx, ReconPtr
- mov edx, DctInputPtr
-
- /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/
-
- /* ITERATION 1 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 2 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 3 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 4 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 5 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 6 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 7 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
- /* ITERATION 8 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm1, [ebx] /* mm1 = ReconPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- movq mm3, mm1 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
- psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
- movq [edx], mm0 /* write answer out */
- movq [8 + edx], mm2 /* write answer out */
- /* Increment pointers */
- add edx, 16
- add eax, PixelsPerLine
- add ebx, ReconPixelsPerLine
-
-
-
-
-
- };
-
-#endif
-}
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine)
-{
-
-#if 0
- int i;
- /* For each block row */
- for (i=8; i; i--) {
- /* INTRA mode so code raw image data */
- /* We convert the data to 8 bit signed (by subtracting 128) as
- this reduces the internal precision requirments in the DCT
- transform. */
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- DctInputPtr += 8;
- }
-
-#else
- __asm {
- align 16
-
- pxor mm7, mm7
-
- mov eax, FiltPtr
- mov ebx, DctInputPtr
-
- movq mm1, V128
-
- /* ITERATION 1 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 2 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 3 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 4 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 5 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 6 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 7 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
-
- /* ITERATION 8 */
- movq mm0, [eax] /* mm0 = FiltPtr */
- movq mm2, mm0 /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
- punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
- psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
- movq [ebx], mm0 /* write answer out */
- movq [8 + ebx], mm2 /* write answer out */
- /* Increment pointers */
- add ebx, 16
- add eax, PixelsPerLine
-
- };
-
-#endif
-}
-
-
-
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
-
-#if 0
- int i;
-
- /* For each block row */
- for (i=8; i; i--) {
- DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
- DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
- DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
- DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
- DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
- DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
- DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
- DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
-
- /* Start next row */
- FiltPtr += PixelsPerLine;
- ReconPtr1 += ReconPixelsPerLine;
- ReconPtr2 += ReconPixelsPerLine;
- DctInputPtr += 8;
- }
-#else
-
- __asm {
- align 16
-
- pxor mm7, mm7
-
- mov eax, FiltPtr
- mov ebx, ReconPtr1
- mov ecx, ReconPtr2
- mov edx, DctInputPtr
-
- /* ITERATION 1 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 2 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 3 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 4 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 5 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 6 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 7 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
-
- /* ITERATION 8 */
- movq mm0, [eax] ; /* mm0 = FiltPtr */
- movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
- movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
- movq mm2, mm0 ; /* dup to prepare for up conversion */
- movq mm3, mm1 ; /* dup to prepare for up conversion */
- movq mm5, mm4 ; /* dup to prepare for up conversion */
- ; /* convert from UINT8 to INT16 */
- punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
- punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
- punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
- punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
- punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
- punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
- ; /* average ReconPtr1 and ReconPtr2 */
- paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
- paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
- psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- movq [edx], mm0 ; /* write answer out */
- movq [8 + edx], mm2 ; /* write answer out */
- ; /* Increment pointers */
- add edx, 16 ;
- add eax, PixelsPerLine ;
- add ebx, ReconPixelsPerLine ;
- add ecx, ReconPixelsPerLine ;
-
- };
-
-
-
-
-
-#endif
-}
-
-static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
-{
-
-#if 0
- ogg_uint32_t SadValue;
- ogg_uint32_t SadValue1;
-
- SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
- DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
- DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
- DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
-
- SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
- DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
- DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
- DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
-
- SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
-
- return SadValue;
-
-#else
- ogg_uint32_t MaxSad;
-
-
- __asm {
- align 16
- mov ebx, Src1
- mov ecx, Src2
-
-
- pxor mm6, mm6 ; /* zero out mm6 for unpack */
- pxor mm7, mm7 ; /* zero out mm7 for unpack */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [ecx] ;
-
- movq mm2, mm0 ;
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
-
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* ; unpack low four bytes to higher precision */
- punpckhbw mm1, mm7 ; /* ; unpack high four bytes to higher precision */
-
- movq mm2, mm0 ;
- movq mm3, mm1 ;
- psrlq mm2, 32 ; /* fold and add */
- psrlq mm3, 32 ;
- paddw mm0, mm2 ;
- paddw mm1, mm3 ;
- movq mm2, mm0 ;
- movq mm3, mm1 ;
- psrlq mm2, 16 ;
- psrlq mm3, 16 ;
- paddw mm0, mm2 ;
- paddw mm1, mm3 ;
-
- psubusw mm1, mm0 ;
- paddw mm1, mm0 ; /* mm1 = max(mm1, mm0) */
- movd eax, mm1 ;
-
- and eax, 0xffff
- mov MaxSad, eax
- };
- return MaxSad;
-
-
-
-
-
-#endif
-}
-
-
-
-
-static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
-
-#if 0
- ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
- ogg_uint32_t MaxSad = 0;
- ogg_uint32_t i;
-
- for ( i = 0; i < 4; i++ ){
- SadValue[0] += abs(Src1[0] - Src2[0]);
- SadValue[1] += abs(Src1[1] - Src2[1]);
- SadValue[2] += abs(Src1[2] - Src2[2]);
- SadValue[3] += abs(Src1[3] - Src2[3]);
- SadValue[4] += abs(Src1[4] - Src2[4]);
- SadValue[5] += abs(Src1[5] - Src2[5]);
- SadValue[6] += abs(Src1[6] - Src2[6]);
- SadValue[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 4; i++ ){
- SadValue2[0] += abs(Src1[0] - Src2[0]);
- SadValue2[1] += abs(Src1[1] - Src2[1]);
- SadValue2[2] += abs(Src1[2] - Src2[2]);
- SadValue2[3] += abs(Src1[3] - Src2[3]);
- SadValue2[4] += abs(Src1[4] - Src2[4]);
- SadValue2[5] += abs(Src1[5] - Src2[5]);
- SadValue2[6] += abs(Src1[6] - Src2[6]);
- SadValue2[7] += abs(Src1[7] - Src2[7]);
-
- Src1 += stride;
- Src2 += stride;
- }
-
- for ( i = 0; i < 8; i++ ){
- if ( SadValue[i] > MaxSad )
- MaxSad = SadValue[i];
- if ( SadValue2[i] > MaxSad )
- MaxSad = SadValue2[i];
- }
-
- return MaxSad;
-#else
- ogg_uint32_t MaxSad;
-
-
- __asm {
- align 16
- mov ebx, Src1
- mov ecx, Src2
-
- pxor mm3, mm3 ; /* zero out mm3 for unpack */
- pxor mm4, mm4 ; /* mm4 low sum */
- pxor mm5, mm5 ; /* mm5 high sum */
- pxor mm6, mm6 ; /* mm6 low sum */
- pxor mm7, mm7 ; /* mm7 high sum */
- mov edi, 4 ; /* 4 rows */
- label_1: ;
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [ecx] ; /* take 8 bytes */
-
- movq mm2, mm0 ;
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */
- paddw mm4, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */
- paddw mm5, mm1 ; /* accumulate difference... */
- add ebx, stride ; /* Inc pointer into the new data */
- add ecx, stride ; /* Inc pointer into the new data */
-
- dec edi ;
- jnz label_1 ;
-
- mov edi, 4 ; /* 4 rows */
- label_2: ;
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [ecx] ; /* take 8 bytes */
-
- movq mm2, mm0 ;
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */
- paddw mm6, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */
- paddw mm7, mm1 ; /* accumulate difference... */
- add ebx, stride ; /* Inc pointer into the new data */
- add ecx, stride ; /* Inc pointer into the new data */
-
- dec edi ;
- jnz label_2 ;
-
- psubusw mm7, mm6 ;
- paddw mm7, mm6 ; /* mm7 = max(mm7, mm6) */
- psubusw mm5, mm4 ;
- paddw mm5, mm4 ; /* mm5 = max(mm5, mm4) */
- psubusw mm7, mm5 ;
- paddw mm7, mm5 ; /* mm7 = max(mm5, mm7) */
- movq mm6, mm7 ;
- psrlq mm6, 32 ;
- psubusw mm7, mm6 ;
- paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */
- movq mm6, mm7 ;
- psrlq mm6, 16 ;
- psubusw mm7, mm6 ;
- paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */
- movd eax, mm7 ;
- and eax, 0xffff ;
-
- mov MaxSad, eax
- };
-
- return MaxSad;
-
-
-#endif
-}
-
-static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2)
-{
-
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
- sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
- sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
- sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
- sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
- sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
- sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
- sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
- /* Step to next row of block. */
- ptr1 += stride1;
- ptr2 += stride2;
- }
-
- return sad;
-#else
- ogg_uint32_t DiffVal;
-
- __asm {
- align 16
-
- mov ebx, ptr1
- mov edx, ptr2
-
- pxor mm6, mm6 ; /* zero out mm6 for unpack */
- pxor mm7, mm7 ; /* mm7 contains the result */
-
- ; /* ITERATION 1 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
- ; /* ITERATION 2 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
- ; /* ITERATION 3 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
- ; /* ITERATION 4 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
- ; /* ITERATION 5 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
- ; /* ITERATION 6 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
- ; /* ITERATION 7 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
-
- ; /* ITERATION 8 */
- movq mm0, [ebx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, stride1 ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add edx, stride2 ; /* Inc pointer into ref data */
-
-
-
- ; /* ------ */
-
- movq mm0, mm7 ;
- psrlq mm7, 32 ;
- paddw mm7, mm0 ;
- movq mm0, mm7 ;
- psrlq mm7, 16 ;
- paddw mm7, mm0 ;
- movd eax, mm7 ;
- and eax, 0xffff ;
-
- mov DiffVal, eax
- };
-
- return DiffVal;
-
-
-
-#endif
-}
-
-static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres)
-{
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
- sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
- sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
- sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
- sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
- sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
- sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
- sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
- if (sad > thres )
- break;
-
- /* Step to next row of block. */
- ptr1 += stride1;
- ptr2 += stride2;
- }
-
- return sad;
-#else
- return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
-#endif
-}
-
-
-static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres)
-{
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t sad = 0;
-
- for (i=8; i; i--) {
- sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
- sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
- sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
- sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
- sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
- sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
- sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
- sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-
- if ( sad > thres )
- break;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr1 += RefStride;
- RefDataPtr2 += RefStride;
- }
-
- return sad;
-#else
- ogg_uint32_t DiffVal;
-
- __asm {
- align 16
-
- mov ebx, SrcData
- mov ecx, RefDataPtr1
- mov edx, RefDataPtr2
-
-
- pcmpeqd mm5, mm5 ; /* fefefefefefefefe in mm5 */
- paddb mm5, mm5 ;
- ;
- pxor mm6, mm6 ; /* zero out mm6 for unpack */
- pxor mm7, mm7 ; /* mm7 contains the result */
- mov edi, 8 ; /* 8 rows */
- loop_start: ;
- movq mm0, [ebx] ; /* take 8 bytes */
-
- movq mm2, [ecx] ;
- movq mm3, [edx] ; /* take average of mm2 and mm3 */
- movq mm1, mm2 ;
- pand mm1, mm3 ;
- pxor mm3, mm2 ;
- pand mm3, mm5 ;
- psrlq mm3, 1 ;
- paddb mm1, mm3 ;
-
- movq mm2, mm0 ;
-
- psubusb mm0, mm1 ; /* A - B */
- psubusb mm1, mm2 ; /* B - A */
- por mm0, mm1 ; /* and or gives abs difference */
- movq mm1, mm0 ;
-
- punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
- paddw mm7, mm0 ; /* accumulate difference... */
- punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
- add ebx, SrcStride ; /* Inc pointer into the new data */
- paddw mm7, mm1 ; /* accumulate difference... */
- add ecx, RefStride ; /* Inc pointer into ref data */
- add edx, RefStride ; /* Inc pointer into ref data */
-
- dec edi ;
- jnz loop_start ;
-
- movq mm0, mm7 ;
- psrlq mm7, 32 ;
- paddw mm7, mm0 ;
- movq mm0, mm7 ;
- psrlq mm7, 16 ;
- paddw mm7, mm0 ;
- movd eax, mm7 ;
- and eax, 0xffff ;
-
- mov DiffVal, eax
- };
-
- return DiffVal;
-
-
-
-#endif
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
-
- for (i=8; i; i--) {
- /* Examine alternate pixel locations. */
- XSum += DataPtr[0];
- XXSum += DataPtr[0]*DataPtr[0];
- XSum += DataPtr[1];
- XXSum += DataPtr[1]*DataPtr[1];
- XSum += DataPtr[2];
- XXSum += DataPtr[2]*DataPtr[2];
- XSum += DataPtr[3];
- XXSum += DataPtr[3]*DataPtr[3];
- XSum += DataPtr[4];
- XXSum += DataPtr[4]*DataPtr[4];
- XSum += DataPtr[5];
- XXSum += DataPtr[5]*DataPtr[5];
- XSum += DataPtr[6];
- XXSum += DataPtr[6]*DataPtr[6];
- XSum += DataPtr[7];
- XXSum += DataPtr[7]*DataPtr[7];
-
- /* Step to next row of block. */
- DataPtr += Stride;
- }
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-#else
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm {
- align 16
-
- mov ecx, DataPtr
-
- pxor mm5, mm5 ;
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov edi, 8 ;
- loop_start:
- movq mm0, [ecx] ; /* take 8 bytes */
- movq mm2, mm0 ;
-
- punpcklbw mm0, mm6 ;
- punpckhbw mm2, mm6 ;
-
- paddw mm5, mm0 ;
- paddw mm5, mm2 ;
-
- pmaddwd mm0, mm0 ;
- pmaddwd mm2, mm2 ;
- ;
- paddd mm7, mm0 ;
- paddd mm7, mm2 ;
-
- add ecx, Stride ; /* Inc pointer into src data */
-
- dec edi ;
- jnz loop_start ;
-
- movq mm0, mm5 ;
- psrlq mm5, 32 ;
- paddw mm5, mm0 ;
- movq mm0, mm5 ;
- psrlq mm5, 16 ;
- paddw mm5, mm0 ;
- movd edi, mm5 ;
- movsx edi, di ;
- mov eax, edi ;
-
- movq mm0, mm7 ;
- psrlq mm7, 32 ;
- paddd mm7, mm0 ;
- movd ebx, mm7 ;
-
- mov XSum, eax
- mov XXSum, ebx;
-
- };
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-
-
-
-#endif
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
-
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr += RefStride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-#else
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
-
- __asm {
- align 16
-
- mov ecx, SrcData
- mov edx, RefDataPtr
-
- pxor mm5, mm5 ;
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov edi, 8 ;
- loop_start: ;
- movq mm0, [ecx] ; /* take 8 bytes */
- movq mm1, [edx] ;
- movq mm2, mm0 ;
- movq mm3, mm1 ;
-
- punpcklbw mm0, mm6 ;
- punpcklbw mm1, mm6 ;
- punpckhbw mm2, mm6 ;
- punpckhbw mm3, mm6 ;
-
- psubsw mm0, mm1 ;
- psubsw mm2, mm3 ;
-
- paddw mm5, mm0 ;
- paddw mm5, mm2 ;
-
- pmaddwd mm0, mm0 ;
- pmaddwd mm2, mm2 ;
- ;
- paddd mm7, mm0 ;
- paddd mm7, mm2 ;
-
- add ecx, SrcStride ; /* Inc pointer into src data */
- add edx, RefStride ; /* Inc pointer into ref data */
-
- dec edi ;
- jnz loop_start ;
-
- movq mm0, mm5 ;
- psrlq mm5, 32 ;
- paddw mm5, mm0 ;
- movq mm0, mm5 ;
- psrlq mm5, 16 ;
- paddw mm5, mm0 ;
- movd edi, mm5 ;
- movsx edi, di ;
- mov eax, edi ;
-
- movq mm0, mm7 ;
- psrlq mm7, 32 ;
- paddd mm7, mm0 ;
- movd ebx, mm7 ;
-
- mov XSum, eax
- mov XXSum, ebx
-
- };
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-
-
-#endif
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-#if 0
- ogg_uint32_t i;
- ogg_uint32_t XSum=0;
- ogg_uint32_t XXSum=0;
- ogg_int32_t DiffVal;
-
- for (i=8; i; i--) {
- DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
- XSum += DiffVal;
- XXSum += DiffVal*DiffVal;
-
- /* Step to next row of block. */
- SrcData += SrcStride;
- RefDataPtr1 += RefStride;
- RefDataPtr2 += RefStride;
- }
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-#else
- ogg_uint32_t XSum;
- ogg_uint32_t XXSum;
-
- __asm {
- align 16
-
- mov ebx, SrcData
- mov ecx, RefDataPtr1
- mov edx, RefDataPtr2
-
- pcmpeqd mm4, mm4 ; /* fefefefefefefefe in mm4 */
- paddb mm4, mm4 ;
- pxor mm5, mm5 ;
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov edi, 8 ;
- loop_start: ;
- movq mm0, [ebx] ; /* take 8 bytes */
-
- movq mm2, [ecx] ;
- movq mm3, [edx] ; /* take average of mm2 and mm3 */
- movq mm1, mm2 ;
- pand mm1, mm3 ;
- pxor mm3, mm2 ;
- pand mm3, mm4 ;
- psrlq mm3, 1 ;
- paddb mm1, mm3 ;
-
- movq mm2, mm0 ;
- movq mm3, mm1 ;
-
- punpcklbw mm0, mm6 ;
- punpcklbw mm1, mm6 ;
- punpckhbw mm2, mm6 ;
- punpckhbw mm3, mm6 ;
-
- psubsw mm0, mm1 ;
- psubsw mm2, mm3 ;
-
- paddw mm5, mm0 ;
- paddw mm5, mm2 ;
-
- pmaddwd mm0, mm0 ;
- pmaddwd mm2, mm2 ;
- ;
- paddd mm7, mm0 ;
- paddd mm7, mm2 ;
-
- add ebx, SrcStride ; /* Inc pointer into src data */
- add ecx, RefStride ; /* Inc pointer into ref data */
- add edx, RefStride ; /* Inc pointer into ref data */
-
- dec edi ;
- jnz loop_start ;
-
- movq mm0, mm5 ;
- psrlq mm5, 32 ;
- paddw mm5, mm0 ;
- movq mm0, mm5 ;
- psrlq mm5, 16 ;
- paddw mm5, mm0 ;
- movd edi, mm5 ;
- movsx edi, di ;
- mov XSum, edi ; /* movl eax, edi ; Modified for vc to resuse eax*/
-
- movq mm0, mm7 ;
- psrlq mm7, 32 ;
- paddd mm7, mm0 ;
- movd XXSum, mm7 ; /*movd eax, mm7 ; Modified for vc to reuse eax */
- };
-
- return (( (XXSum<<6) - XSum*XSum ));
-
-#endif
-}
-
-static void restore_fpu (void)
-{
-
- __asm {
- emms
- }
-
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
- funcs->restore_fpu = restore_fpu;
- funcs->sub8x8 = sub8x8__mmx;
- funcs->sub8x8_128 = sub8x8_128__mmx;
- funcs->sub8x8avg2 = sub8x8avg2__mmx;
- funcs->row_sad8 = row_sad8__mmx;
- funcs->col_sad8x8 = col_sad8x8__mmx;
- funcs->sad8x8 = sad8x8__mmx;
- funcs->sad8x8_thres = sad8x8_thres__mmx;
- funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
- funcs->intra8x8_err = intra8x8_err__mmx;
- funcs->inter8x8_err = inter8x8_err__mmx;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c
deleted file mode 100644
index 65cd9c367..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c
+++ /dev/null
@@ -1,333 +0,0 @@
-;//==========================================================================
-;//
-;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
-;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
-;// PURPOSE.
-;//
-;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
-;//
-;//--------------------------------------------------------------------------
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-
-static const ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15;
-static const ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83;
-static const ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4db;
-static const ogg_int64_t xC4S4 = 0x0b505b505b505b505;
-static const ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3a;
-static const ogg_int64_t xC6S2 = 0x061f861f861f861f8;
-static const ogg_int64_t xC7S1 = 0x031f131f131f131f1;
-
-
-static __inline void Transpose_mmx( ogg_int16_t *InputData1, ogg_int16_t *OutputData1,
- ogg_int16_t *InputData2, ogg_int16_t *OutputData2)
-{
-
- __asm {
- align 16
- mov eax, InputData1
- mov ebx, InputData2
- mov ecx, OutputData1
- mov edx, OutputData2
-
-
- movq mm0, [eax] ; /* mm0 = a0 a1 a2 a3 */
- movq mm4, [ebx] ; /* mm4 = e4 e5 e6 e7 */
- movq mm1, [16 + eax] ; /* mm1 = b0 b1 b2 b3 */
- movq mm5, [16 + ebx] ; /* mm5 = f4 f5 f6 f7 */
- movq mm2, [32 + eax] ; /* mm2 = c0 c1 c2 c3 */
- movq mm6, [32 + ebx] ; /* mm6 = g4 g5 g6 g7 */
- movq mm3, [48 + eax] ; /* mm3 = d0 d1 d2 d3 */
- movq [16 + ecx], mm1 ; /* save b0 b1 b2 b3 */
- movq mm7, [48 + ebx] ; /* mm7 = h0 h1 h2 h3 */
- ; /* Transpose 2x8 block */
- movq mm1, mm4 ; /* mm1 = e3 e2 e1 e0 */
- punpcklwd mm4, mm5 ; /* mm4 = f1 e1 f0 e0 */
- movq [ecx], mm0 ; /* save a3 a2 a1 a0 */
- punpckhwd mm1, mm5 ; /* mm1 = f3 e3 f2 e2 */
- movq mm0, mm6 ; /* mm0 = g3 g2 g1 g0 */
- punpcklwd mm6, mm7 ; /* mm6 = h1 g1 h0 g0 */
- movq mm5, mm4 ; /* mm5 = f1 e1 f0 e0 */
- punpckldq mm4, mm6 ; /* mm4 = h0 g0 f0 e0 = MM4 */
- punpckhdq mm5, mm6 ; /* mm5 = h1 g1 f1 e1 = MM5 */
- movq mm6, mm1 ; /* mm6 = f3 e3 f2 e2 */
- movq [edx], mm4 ;
- punpckhwd mm0, mm7 ; /* mm0 = h3 g3 h2 g2 */
- movq [16 + edx], mm5 ;
- punpckhdq mm6, mm0 ; /* mm6 = h3 g3 f3 e3 = MM7 */
- movq mm4, [ecx] ; /* mm4 = a3 a2 a1 a0 */
- punpckldq mm1, mm0 ; /* mm1 = h2 g2 f2 e2 = MM6 */
- movq mm5, [16 + ecx] ; /* mm5 = b3 b2 b1 b0 */
- movq mm0, mm4 ; /* mm0 = a3 a2 a1 a0 */
- movq [48 + edx], mm6 ;
- punpcklwd mm0, mm5 ; /* mm0 = b1 a1 b0 a0 */
- movq [32 + edx], mm1 ;
- punpckhwd mm4, mm5 ; /* mm4 = b3 a3 b2 a2 */
- movq mm5, mm2 ; /* mm5 = c3 c2 c1 c0 */
- punpcklwd mm2, mm3 ; /* mm2 = d1 c1 d0 c0 */
- movq mm1, mm0 ; /* mm1 = b1 a1 b0 a0 */
- punpckldq mm0, mm2 ; /* mm0 = d0 c0 b0 a0 = MM0 */
- punpckhdq mm1, mm2 ; /* mm1 = d1 c1 b1 a1 = MM1 */
- movq mm2, mm4 ; /* mm2 = b3 a3 b2 a2 */
- movq [ecx], mm0 ;
- punpckhwd mm5, mm3 ; /* mm5 = d3 c3 d2 c2 */
- movq [16 + ecx], mm1 ;
- punpckhdq mm4, mm5 ; /* mm4 = d3 c3 b3 a3 = MM3 */
- punpckldq mm2, mm5 ; /* mm2 = d2 c2 b2 a2 = MM2 */
- movq [48 + ecx], mm4 ;
- movq [32 + ecx], mm2 ;
-
- };
-
-
-}
-
-static __inline void Fdct_mmx( ogg_int16_t *InputData1, ogg_int16_t *InputData2, ogg_int16_t *temp)
-{
-
- __asm {
- align 16
-
-
- mov eax, InputData1
- mov ebx, InputData2
- mov ecx, temp
- movq mm0, [eax] ;
- movq mm1, [16 + eax] ;
- movq mm2, [48 + eax] ;
- movq mm3, [16 + ebx] ;
- movq mm4, mm0 ;
- movq mm5, mm1 ;
- movq mm6, mm2 ;
- movq mm7, mm3 ;
- ;
- paddsw mm0, [48 + ebx] ; /* mm0 = ip0 + ip7 = is07 */
- paddsw mm1, [32 + eax] ; /* mm1 = ip1 + ip2 = is12 */
- paddsw mm2, [ebx] ; /* mm2 = ip3 + ip4 = is34 */
- paddsw mm3, [32 + ebx] ; /* mm3 = ip5 + ip6 = is56 */
- psubsw mm4, [48 + ebx] ; /* mm4 = ip0 - ip7 = id07 */
- psubsw mm5, [32 + eax] ; /* mm5 = ip1 - ip2 = id12 */
- ;
- psubsw mm0, mm2 ; /* mm0 = is07 - is34 */
- ;
- paddsw mm2, mm2 ;
- ;
- psubsw mm6, [ebx] ; /* mm6 = ip3 - ip4 = id34 */
- ;
- paddsw mm2, mm0 ; /* mm2 = is07 + is34 = is0734 */
- psubsw mm1, mm3 ; /* mm1 = is12 - is56 */
- movq [ecx], mm0 ; /* Save is07 - is34 to free mm0; */
- paddsw mm3, mm3 ;
- paddsw mm3, mm1 ; /* mm3 = is12 + 1s56 = is1256 */
- ;
- psubsw mm7, [32 + ebx] ; /* mm7 = ip5 - ip6 = id56 */
- ; /* ------------------------------------------------------------------- */
- psubsw mm5, mm7 ; /* mm5 = id12 - id56 */
- paddsw mm7, mm7 ;
- paddsw mm7, mm5 ; /* mm7 = id12 + id56 */
- ; /* ------------------------------------------------------------------- */
- psubsw mm2, mm3 ; /* mm2 = is0734 - is1256 */
- paddsw mm3, mm3 ;
- ;
- movq mm0, mm2 ; /* make a copy */
- paddsw mm3, mm2 ; /* mm3 = is0734 + is1256 */
- ;
- pmulhw mm0, xC4S4 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
- paddw mm0, mm2 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) */
- psrlw mm2, 15 ;
- paddw mm0, mm2 ; /* Truncate mm0, now it is op[4] */
- ;
- movq mm2, mm3 ;
- movq [ebx], mm0 ; /* save ip4, now mm0,mm2 are free */
- ;
- movq mm0, mm3 ;
- pmulhw mm3, xC4S4 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
- ;
- psrlw mm2, 15 ;
- paddw mm3, mm0 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) */
- paddw mm3, mm2 ; /* Truncate mm3, now it is op[0] */
- ;
- movq [eax], mm3 ;
- ; /* ------------------------------------------------------------------- */
- movq mm3, [ecx] ; /* mm3 = irot_input_y */
- pmulhw mm3, xC2S6 ; /* mm3 = xC2S6 * irot_input_y - irot_input_y */
- ;
- movq mm2, [ecx] ;
- movq mm0, mm2 ;
- ;
- psrlw mm2, 15 ; /* mm3 = xC2S6 * irot_input_y */
- paddw mm3, mm0 ;
- ;
- paddw mm3, mm2 ; /* Truncated */
- movq mm0, mm5 ;
- ;
- movq mm2, mm5 ;
- pmulhw mm0, xC6S2 ; /* mm0 = xC6S2 * irot_input_x */
- ;
- psrlw mm2, 15 ;
- paddw mm0, mm2 ; /* Truncated */
- ;
- paddsw mm3, mm0 ; /* ip[2] */
- movq [32 + eax], mm3 ; /* Save ip2 */
- ;
- movq mm0, mm5 ;
- movq mm2, mm5 ;
- ;
- pmulhw mm5, xC2S6 ; /* mm5 = xC2S6 * irot_input_x - irot_input_x */
- psrlw mm2, 15 ;
- ;
- movq mm3, [ecx] ;
- paddw mm5, mm0 ; /* mm5 = xC2S6 * irot_input_x */
- ;
- paddw mm5, mm2 ; /* Truncated */
- movq mm2, mm3 ;
- ;
- pmulhw mm3, xC6S2 ; /* mm3 = xC6S2 * irot_input_y */
- psrlw mm2, 15 ;
- ;
- paddw mm3, mm2 ; /* Truncated */
- psubsw mm3, mm5 ;
- ;
- movq [32 + ebx], mm3 ;
- ; /* ------------------------------------------------------------------- */
- movq mm0, xC4S4 ;
- movq mm2, mm1 ;
- movq mm3, mm1 ;
- ;
- pmulhw mm1, mm0 ; /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
- psrlw mm2, 15 ;
- ;
- paddw mm1, mm3 ; /* mm0 = xC4S4 * ( is12 - is56 ) */
- paddw mm1, mm2 ; /* Truncate mm1, now it is icommon_product1 */
- ;
- movq mm2, mm7 ;
- movq mm3, mm7 ;
- ;
- pmulhw mm7, mm0 ; /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
- psrlw mm2, 15 ;
- ;
- paddw mm7, mm3 ; /* mm7 = xC4S4 * ( id12 + id56 ) */
- paddw mm7, mm2 ; /* Truncate mm7, now it is icommon_product2 */
- ; /* ------------------------------------------------------------------- */
- pxor mm0, mm0 ; /* Clear mm0 */
- psubsw mm0, mm6 ; /* mm0 = - id34 */
- ;
- psubsw mm0, mm7 ; /* mm0 = - ( id34 + idcommon_product2 ) */
- paddsw mm6, mm6 ;
- paddsw mm6, mm0 ; /* mm6 = id34 - icommon_product2 */
- ;
- psubsw mm4, mm1 ; /* mm4 = id07 - icommon_product1 */
- paddsw mm1, mm1 ;
- paddsw mm1, mm4 ; /* mm1 = id07 + icommon_product1 */
- ; /* ------------------------------------------------------------------- */
- movq mm7, xC1S7 ;
- movq mm2, mm1 ;
- ;
- movq mm3, mm1 ;
- pmulhw mm1, mm7 ; /* mm1 = xC1S7 * irot_input_x - irot_input_x */
- ;
- movq mm7, xC7S1 ;
- psrlw mm2, 15 ;
- ;
- paddw mm1, mm3 ; /* mm1 = xC1S7 * irot_input_x */
- paddw mm1, mm2 ; /* Trucated */
- ;
- pmulhw mm3, mm7 ; /* mm3 = xC7S1 * irot_input_x */
- paddw mm3, mm2 ; /* Truncated */
- ;
- movq mm5, mm0 ;
- movq mm2, mm0 ;
- ;
- movq mm7, xC1S7 ;
- pmulhw mm0, mm7 ; /* mm0 = xC1S7 * irot_input_y - irot_input_y */
- ;
- movq mm7, xC7S1 ;
- psrlw mm2, 15 ;
- ;
- paddw mm0, mm5 ; /* mm0 = xC1S7 * irot_input_y */
- paddw mm0, mm2 ; /* Truncated */
- ;
- pmulhw mm5, mm7 ; /* mm5 = xC7S1 * irot_input_y */
- paddw mm5, mm2 ; /* Truncated */
- ;
- psubsw mm1, mm5 ; /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */
- paddsw mm3, mm0 ; /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */
- ;
- movq [16 + eax], mm1 ;
- movq [48 + ebx], mm3 ;
- ; /* ------------------------------------------------------------------- */
- movq mm0, xC3S5 ;
- movq mm1, xC5S3 ;
- ;
- movq mm5, mm6 ;
- movq mm7, mm6 ;
- ;
- movq mm2, mm4 ;
- movq mm3, mm4 ;
- ;
- pmulhw mm4, mm0 ; /* mm4 = xC3S5 * irot_input_x - irot_input_x */
- pmulhw mm6, mm1 ; /* mm6 = xC5S3 * irot_input_y - irot_input_y */
- ;
- psrlw mm2, 15 ;
- psrlw mm5, 15 ;
- ;
- paddw mm4, mm3 ; /* mm4 = xC3S5 * irot_input_x */
- paddw mm6, mm7 ; /* mm6 = xC5S3 * irot_input_y */
- ;
- paddw mm4, mm2 ; /* Truncated */
- paddw mm6, mm5 ; /* Truncated */
- ;
- psubsw mm4, mm6 ; /* ip3 */
- movq [48 + eax], mm4 ;
- ;
- movq mm4, mm3 ;
- movq mm6, mm7 ;
- ;
- pmulhw mm3, mm1 ; /* mm3 = xC5S3 * irot_input_x - irot_input_x */
- pmulhw mm7, mm0 ; /* mm7 = xC3S5 * irot_input_y - irot_input_y */
- ;
- paddw mm4, mm2 ;
- paddw mm6, mm5 ;
- ;
- paddw mm3, mm4 ; /* mm3 = xC5S3 * irot_input_x */
- paddw mm7, mm6 ; /* mm7 = xC3S5 * irot_input_y */
- ;
- paddw mm3, mm7 ; /* ip5 */
- movq [16 + ebx], mm3 ;
-
-};
-
-}
-
-
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
-
- static ogg_int16_t tmp[32];
- ogg_int16_t* align_tmp = (ogg_int16_t*)((unsigned char*)tmp + (16 - ((int)tmp)&15));
-
-
- Transpose_mmx(InputData, OutputData, InputData + 4, OutputData + 4);
- Fdct_mmx(OutputData, OutputData + 4, align_tmp);
-
- Transpose_mmx(InputData + 32, OutputData + 32, InputData + 36, OutputData + 36);
- Fdct_mmx(OutputData+32, OutputData + 36, align_tmp);
-
- Transpose_mmx(OutputData, OutputData, OutputData + 32, OutputData + 32);
- Fdct_mmx(OutputData, OutputData + 32, align_tmp);
-
- Transpose_mmx(OutputData + 4, OutputData + 4, OutputData + 36, OutputData + 36);
- Fdct_mmx(OutputData + 4, OutputData + 36, align_tmp);
-
- __asm emms
-
-}
-
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
- funcs->fdct_short = fdct_short__mmx;
-}
diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c
deleted file mode 100644
index 1e0f1f095..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: reconstruct.c,v 1.6 2003/12/03 08:59:41 arc Exp $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-
-static const unsigned __int64 V128 = 0x8080808080808080;
-
-static void copy8x8__mmx (unsigned char *src,
- unsigned char *dest,
- unsigned int stride)
-{
-
- //Is this even the fastest way to do this?
- __asm {
- align 16
-
- mov eax, src
- mov ebx, dest
- mov ecx, stride
-
- lea edi, [ecx + ecx * 2]
- movq mm0, [eax]
- movq mm1, [eax + ecx]
- movq mm2, [eax + ecx * 2]
- movq mm3, [eax + edi]
- lea eax, [eax + ecx * 4]
- movq [ebx], mm0
- movq [ebx + ecx], mm1
- movq [ebx + ecx * 2], mm2
- movq [ebx + edi], mm3
- lea ebx, [ebx + ecx * 4]
- movq mm0, [eax]
- movq mm1, [eax + ecx]
- movq mm2, [eax + ecx * 2]
- movq mm3, [eax + edi]
- movq [ebx], mm0
- movq [ebx + ecx], mm1
- movq [ebx + ecx * 2], mm2
- movq [ebx + edi], mm3
-
- };
-
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
-
- __asm {
- align 16
-
- mov eax, ReconPtr
- mov ebx, ChangePtr
- mov ecx, LineStep
-
- movq mm0, V128
-
- lea edi, [128 + ebx]
- loop_start:
- movq mm2, [ebx]
-
- packsswb mm2, [8 + ebx]
- por mm0, mm0
- pxor mm2, mm0
- lea ebx, [16 + ebx]
- cmp ebx, edi
-
- movq [eax], mm2
-
-
-
- lea eax, [eax + ecx]
- jc loop_start
-
-
- };
-
-}
-
-
-
-
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-
- __asm {
-
- align 16
-
- mov eax, ReconPtr
- mov ebx, ChangePtr
- mov ecx, LineStep
- mov edx, RefPtr
-
- pxor mm0, mm0
- lea edi, [128 + ebx]
-
- loop_start:
- movq mm2, [edx]
-
- movq mm4, [ebx]
- movq mm3, mm2
- movq mm5, [8 + ebx]
- punpcklbw mm2, mm0
- paddsw mm2, mm4
- punpckhbw mm3, mm0
- paddsw mm3, mm5
- add edx, ecx
- packuswb mm2, mm3
- lea ebx, [16 + ebx]
- cmp ebx, edi
-
- movq [eax], mm2
-
- lea eax, [eax + ecx]
- jc loop_start
-
- };
-}
-
-
-
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
- unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- __asm {
- align 16
-
- mov eax, ReconPtr
- mov ebx, ChangePtr
- mov ecx, RefPtr1
- mov edx, RefPtr2
-
- pxor mm0, mm0
- lea edi, [128 + ebx]
-
- loop_start:
- movq mm2, [ecx]
- movq mm4, [edx]
-
- movq mm3, mm2
- punpcklbw mm2, mm0
- movq mm5, mm4
- movq mm6, [ebx]
- punpckhbw mm3, mm0
- movq mm7, [8 + ebx]
- punpcklbw mm4, mm0
- punpckhbw mm5, mm0
- paddw mm2, mm4
- paddw mm3, mm5
- psrlw mm2, 1
- psrlw mm3, 1
- paddw mm2, mm6
- paddw mm3, mm7
- lea ebx, [16 + ebx]
- packuswb mm2, mm3
- add ecx, LineStep
- add edx, LineStep
- movq [eax], mm2
- add eax, LineStep
- cmp ebx, edi
- jc loop_start
-
- };
-
-}
-
-
-
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
- funcs->copy8x8 = copy8x8__mmx;
- funcs->recon_intra8x8 = recon_intra8x8__mmx;
- funcs->recon_inter8x8 = recon_inter8x8__mmx;
- funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c
deleted file mode 100644
index 547e974e3..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3=
- 0x0003000300030003LL;
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4=
- 0x0004000400040004LL;
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- long esi;
- _pix-=_ystride*2;
- __asm__ __volatile__(
- /*mm0=0*/
- "pxor %%mm0,%%mm0\n\t"
- /*esi=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*mm7=_pix[0...8]*/
- "movq (%[pix]),%%mm7\n\t"
- /*mm4=_pix[0...8+_ystride*3]*/
- "movq (%[pix],%[s]),%%mm4\n\t"
- /*mm6=_pix[0...8]*/
- "movq %%mm7,%%mm6\n\t"
- /*Expand unsigned _pix[0...3] to 16 bits.*/
- "punpcklbw %%mm0,%%mm6\n\t"
- "movq %%mm4,%%mm5\n\t"
- /*Expand unsigned _pix[4...8] to 16 bits.*/
- "punpckhbw %%mm0,%%mm7\n\t"
- /*Expand other arrays too.*/
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
- "psubw %%mm4,%%mm6\n\t"
- "psubw %%mm5,%%mm7\n\t"
- /*mm5=mm4=_pix[0...8+_ystride]*/
- "movq (%[pix],%[ystride]),%%mm4\n\t"
- /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
- "movq (%[pix],%[ystride],2),%%mm2\n\t"
- "movq %%mm4,%%mm5\n\t"
- "movq %%mm2,%%mm3\n\t"
- "movq %%mm2,%%mm1\n\t"
- /*Expand these arrays.*/
- "punpckhbw %%mm0,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm3\n\t"
- "punpcklbw %%mm0,%%mm2\n\t"
- /*Preload...*/
- "movq %[OC_V3],%%mm0\n\t"
- /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
- "psubw %%mm5,%%mm3\n\t"
- "psubw %%mm4,%%mm2\n\t"
- /*Scale by 3.*/
- "pmullw %%mm0,%%mm3\n\t"
- "pmullw %%mm0,%%mm2\n\t"
- /*Preload...*/
- "movq %[OC_V4],%%mm0\n\t"
- /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
- 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
- "paddw %%mm7,%%mm3\n\t"
- "paddw %%mm6,%%mm2\n\t"
- /*Add 4.*/
- "paddw %%mm0,%%mm3\n\t"
- "paddw %%mm0,%%mm2\n\t"
- /*"Divide" by 8.*/
- "psraw $3,%%mm3\n\t"
- "psraw $3,%%mm2\n\t"
- /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
- /*Free up mm5.*/
- "packuswb %%mm5,%%mm4\n\t"
- /*mm0=L L L L*/
- "movq (%[ll]),%%mm0\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm2,%%mm5\n\t"
- "pxor %%mm6,%%mm6\n\t"
- "movq %%mm0,%%mm7\n\t"
- "psubw %%mm0,%%mm6\n\t"
- "psllw $1,%%mm7\n\t"
- "psllw $1,%%mm6\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm2,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- "pcmpgtw %%mm3,%%mm7\n\t"
- "pcmpgtw %%mm6,%%mm5\n\t"
- "pand %%mm7,%%mm3\n\t"
- "movq %%mm0,%%mm7\n\t"
- "pand %%mm5,%%mm3\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm6\n\t"
- "movq %%mm2,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm2,%%mm6\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm2\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm7=-L>R_i?2L:0*/
- "pand %%mm6,%%mm7\n\t"
- "pxor %%mm6,%%mm6\n\t"
- /*mm2=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm7,%%mm2\n\t"
- "psubw %%mm0,%%mm6\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm2,%%mm5\n\t"
- "movq %%mm0,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "psllw $1,%%mm7\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm2\n\t"
- "movq %%mm3,%%mm5\n\t"
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm6=-L>R_i?FF:00*/
- "pcmpgtw %%mm3,%%mm6\n\t"
- /*mm5=R_i>L?FF:00*/
- "pcmpgtw %%mm0,%%mm5\n\t"
- /*mm7=R_i>L?2L:0*/
- "pand %%mm5,%%mm7\n\t"
- /*mm2=R_i>L?R_i-2L:R_i*/
- "psubw %%mm7,%%mm3\n\t"
- "psllw $1,%%mm0\n\t"
- /*mm5=-L>R_i||R_i>L*/
- "por %%mm6,%%mm5\n\t"
- /*mm0=-L>R_i?2L:0*/
- "pand %%mm6,%%mm0\n\t"
- /*mm3=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm0,%%mm3\n\t"
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- "pand %%mm3,%%mm5\n\t"
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm5,%%mm3\n\t"
- /*Unfortunately, there's no unsigned byte+signed byte with unsigned
- saturation op code, so we have to promote things back 16 bits.*/
- "pxor %%mm0,%%mm0\n\t"
- "movq %%mm4,%%mm5\n\t"
- "punpcklbw %%mm0,%%mm4\n\t"
- "punpckhbw %%mm0,%%mm5\n\t"
- "movq %%mm1,%%mm6\n\t"
- "punpcklbw %%mm0,%%mm1\n\t"
- "punpckhbw %%mm0,%%mm6\n\t"
- /*_pix[0...8+_ystride]+=R_i*/
- "paddw %%mm2,%%mm4\n\t"
- "paddw %%mm3,%%mm5\n\t"
- /*_pix[0...8+_ystride*2]-=R_i*/
- "psubw %%mm2,%%mm1\n\t"
- "psubw %%mm3,%%mm6\n\t"
- "packuswb %%mm5,%%mm4\n\t"
- "packuswb %%mm6,%%mm1\n\t"
- /*Write it back out.*/
- "movq %%mm4,(%[pix],%[ystride])\n\t"
- "movq %%mm1,(%[pix],%[ystride],2)\n\t"
- :[s]"=&S"(esi)
- :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll),
- [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
- :"memory"
- );
-}
-
-/*This code implements the bulk of loop_filter_h().
- Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
- four p0's to one register we must transpose the values in four mmx regs.
- When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
- const ogg_int16_t *_ll){
- long esi;
- long edi;
- __asm__ __volatile__(
- /*x x x x 3 2 1 0*/
- "movd (%[pix]),%%mm0\n\t"
- /*esi=_ystride*3*/
- "lea (%[ystride],%[ystride],2),%[s]\n\t"
- /*x x x x 7 6 5 4*/
- "movd (%[pix],%[ystride]),%%mm1\n\t"
- /*x x x x B A 9 8*/
- "movd (%[pix],%[ystride],2),%%mm2\n\t"
- /*x x x x F E D C*/
- "movd (%[pix],%[s]),%%mm3\n\t"
- /*mm0=7 3 6 2 5 1 4 0*/
- "punpcklbw %%mm1,%%mm0\n\t"
- /*mm2=F B E A D 9 C 8*/
- "punpcklbw %%mm3,%%mm2\n\t"
- /*mm1=7 3 6 2 5 1 4 0*/
- "movq %%mm0,%%mm1\n\t"
- /*mm0=F B 7 3 E A 6 2*/
- "punpckhwd %%mm2,%%mm0\n\t"
- /*mm1=D 9 5 1 C 8 4 0*/
- "punpcklwd %%mm2,%%mm1\n\t"
- "pxor %%mm7,%%mm7\n\t"
- /*mm5=D 9 5 1 C 8 4 0*/
- "movq %%mm1,%%mm5\n\t"
- /*mm1=x C x 8 x 4 x 0==pix[0]*/
- "punpcklbw %%mm7,%%mm1\n\t"
- /*mm5=x D x 9 x 5 x 1==pix[1]*/
- "punpckhbw %%mm7,%%mm5\n\t"
- /*mm3=F B 7 3 E A 6 2*/
- "movq %%mm0,%%mm3\n\t"
- /*mm0=x E x A x 6 x 2==pix[2]*/
- "punpcklbw %%mm7,%%mm0\n\t"
- /*mm3=x F x B x 7 x 3==pix[3]*/
- "punpckhbw %%mm7,%%mm3\n\t"
- /*mm1=mm1-mm3==pix[0]-pix[3]*/
- "psubw %%mm3,%%mm1\n\t"
- /*Save a copy of pix[2] for later.*/
- "movq %%mm0,%%mm4\n\t"
- /*mm0=mm0-mm5==pix[2]-pix[1]*/
- "psubw %%mm5,%%mm0\n\t"
- /*Scale by 3.*/
- "pmullw %[OC_V3],%%mm0\n\t"
- /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
- "paddw %%mm1,%%mm0\n\t"
- /*Add 4.*/
- "paddw %[OC_V4],%%mm0\n\t"
- /*"Divide" by 8, producing the residuals R_i.*/
- "psraw $3,%%mm0\n\t"
- /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
- /*mm6=L L L L*/
- "movq (%[ll]),%%mm6\n\t"
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- "movq %%mm0,%%mm1\n\t"
- "pxor %%mm2,%%mm2\n\t"
- "movq %%mm6,%%mm3\n\t"
- "psubw %%mm6,%%mm2\n\t"
- "psllw $1,%%mm3\n\t"
- "psllw $1,%%mm2\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-2L -2L -2L -2L*/
- /*mm3==2L 2L 2L 2L*/
- "pcmpgtw %%mm0,%%mm3\n\t"
- "pcmpgtw %%mm2,%%mm1\n\t"
- "pand %%mm3,%%mm0\n\t"
- "pand %%mm1,%%mm0\n\t"
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- "psraw $1,%%mm2\n\t"
- "movq %%mm0,%%mm1\n\t"
- "movq %%mm6,%%mm3\n\t"
- /*mm0==R_3 R_2 R_1 R_0*/
- /*mm1==R_3 R_2 R_1 R_0*/
- /*mm2==-L -L -L -L*/
- /*mm6==L L L L*/
- /*mm2=-L>R_i?FF:00*/
- "pcmpgtw %%mm0,%%mm2\n\t"
- /*mm1=R_i>L?FF:00*/
- "pcmpgtw %%mm6,%%mm1\n\t"
- /*mm3=2L 2L 2L 2L*/
- "psllw $1,%%mm3\n\t"
- /*mm6=2L 2L 2L 2L*/
- "psllw $1,%%mm6\n\t"
- /*mm3=R_i>L?2L:0*/
- "pand %%mm1,%%mm3\n\t"
- /*mm6=-L>R_i?2L:0*/
- "pand %%mm2,%%mm6\n\t"
- /*mm0=R_i>L?R_i-2L:R_i*/
- "psubw %%mm3,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L*/
- "por %%mm2,%%mm1\n\t"
- /*mm0=-L>R_i?R_i+2L:R_i*/
- "paddw %%mm6,%%mm0\n\t"
- /*mm1=-L>R_i||R_i>L?R_i':0*/
- "pand %%mm0,%%mm1\n\t"
- /*mm0=-L>R_i||R_i>L?0:R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
- "psubw %%mm1,%%mm0\n\t"
- /*_pix[1]+=R_i;*/
- "paddw %%mm0,%%mm5\n\t"
- /*_pix[2]-=R_i;*/
- "psubw %%mm0,%%mm4\n\t"
- /*mm5=x x x x D 9 5 1*/
- "packuswb %%mm7,%%mm5\n\t"
- /*mm4=x x x x E A 6 2*/
- "packuswb %%mm7,%%mm4\n\t"
- /*mm5=E D A 9 6 5 2 1*/
- "punpcklbw %%mm4,%%mm5\n\t"
- /*edi=6 5 2 1*/
- "movd %%mm5,%%edi\n\t"
- "movw %%di,1(%[pix])\n\t"
- /*Why is there such a big stall here?*/
- "psrlq $32,%%mm5\n\t"
- "shrl $16,%%edi\n\t"
- "movw %%di,1(%[pix],%[ystride])\n\t"
- /*edi=E D A 9*/
- "movd %%mm5,%%edi\n\t"
- "movw %%di,1(%[pix],%[ystride],2)\n\t"
- "shrl $16,%%edi\n\t"
- "movw %%di,1(%[pix],%[s])\n\t"
- :[s]"=&S"(esi),[d]"=&D"(edi),
- [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
- :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
- :"memory"
- );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _pix-=2;
- loop_filter_h4(_pix,_ystride,_ll);
- loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){
- int j;
- ogg_int16_t __attribute__((aligned(8))) ll[4];
- unsigned char *cp = pbi->display_fragments;
- ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
- if ( FLimit == 0 ) return;
- ll[0]=ll[1]=ll[2]=ll[3]=FLimit;
-
- for ( j = 0; j < 3 ; j++){
- ogg_uint32_t *bp_begin = bp;
- ogg_uint32_t *bp_end;
- int stride;
- int h;
-
- switch(j) {
- case 0: /* y */
- bp_end = bp + pbi->YPlaneFragments;
- h = pbi->HFragments;
- stride = pbi->YStride;
- break;
- default: /* u,v, 4:20 specific */
- bp_end = bp + pbi->UVPlaneFragments;
- h = pbi->HFragments >> 1;
- stride = pbi->UVStride;
- break;
- }
-
- while(bpbp_left)
- loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll);
- if(bp_left>bp_begin)
- loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll);
- if(bp+1LastFrameRecon[bp[0]]+8,stride,ll);
- if(bp+hLastFrameRecon[bp[h]],stride,ll);
- }
- bp++;
- cp++;
- }
- }
- }
-
- __asm__ __volatile__("emms\n\t");
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_dct_decode_init(DspFunctions *funcs)
-{
- funcs->LoopFilter = loop_filter_mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c
deleted file mode 100644
index 6c2689e63..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
- ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
-
- ".rept 8 \n\t"
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */
- " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */
- " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */
- " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */
- " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */
- /* start calculation */
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */
- " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */
- " movq %%mm0, (%2) \n\t" /* write answer out */
- " movq %%mm2, 8(%2) \n\t" /* write answer out */
- /* Increment pointers */
- " add $16, %2 \n\t"
- " add %3, %0 \n\t"
- " add %4, %1 \n\t"
- ".endr \n\t"
-
- : "+r" (FiltPtr),
- "+r" (ReconPtr),
- "+r" (DctInputPtr)
- : "r" ((ogg_uint64_t)PixelsPerLine),
- "r" ((ogg_uint64_t)ReconPixelsPerLine)
- : "memory"
- );
-}
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine)
-{
- ogg_uint64_t ppl = PixelsPerLine;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
- " movq %[V128], %%mm1 \n\t"
-
- ".rept 8 \n\t"
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */
- /* start calculation */
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */
- " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */
- " movq %%mm0, (%1) \n\t" /* write answer out */
- " movq %%mm2, 8(%1) \n\t" /* write answer out */
- /* Increment pointers */
- " add $16, %1 \n\t"
- " add %2, %0 \n\t"
- ".endr \n\t"
-
- : "+r" (FiltPtr),
- "+r" (DctInputPtr)
- : "r" (ppl), /* gcc bug? a cast won't work here, e.g. (ogg_uint64_t)PixelsPerLine */
- [V128] "m" (V128)
- : "memory"
- );
-}
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
- unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
- ogg_uint32_t PixelsPerLine,
- ogg_uint32_t ReconPixelsPerLine)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm7, %%mm7 \n\t"
-
- ".rept 8 \n\t"
- " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */
- " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */
- " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */
- " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */
- " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */
- " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */
- /* convert from UINT8 to INT16 */
- " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */
- " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */
- " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */
- " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */
- " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */
- " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */
- /* average ReconPtr1 and ReconPtr2 */
- " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */
- " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */
- " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
- " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
- " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
- " movq %%mm0, (%3) \n\t" /* write answer out */
- " movq %%mm2, 8(%3) \n\t" /* write answer out */
- /* Increment pointers */
- " add $16, %3 \n\t"
- " add %4, %0 \n\t"
- " add %5, %1 \n\t"
- " add %5, %2 \n\t"
- ".endr \n\t"
-
- : "+r" (FiltPtr),
- "+r" (ReconPtr1),
- "+r" (ReconPtr2),
- "+r" (DctInputPtr)
- : "r" ((ogg_uint64_t)PixelsPerLine),
- "r" ((ogg_uint64_t)ReconPixelsPerLine)
- : "memory"
- );
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq %%mm0, %%mm2 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %3, %2 \n\t" /* Inc pointer into src data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%rdi \n\t"
- " movsx %%di, %%rdi \n\t"
- " mov %%rdi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=r" (XSum),
- "=r" (XXSum),
- "+r" (DataPtr)
- : "r" ((ogg_uint64_t)Stride)
- : "rdi", "memory"
- );
-
- /* Compute population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
- " movq (%3), %%mm1 \n\t"
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm6, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm6, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %4, %2 \n\t" /* Inc pointer into src data */
- " add %5, %3 \n\t" /* Inc pointer into ref data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%rdi \n\t"
- " movsx %%di, %%rdi \n\t"
- " mov %%rdi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr)
- : "r" ((ogg_uint64_t)SrcStride),
- "r" ((ogg_uint64_t)RefStride)
- : "rdi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void restore_fpu (void)
-{
- __asm__ __volatile__ (
- " emms \n\t"
- );
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
- funcs->restore_fpu = restore_fpu;
- funcs->sub8x8 = sub8x8__mmx;
- funcs->sub8x8_128 = sub8x8_128__mmx;
- funcs->sub8x8avg2 = sub8x8avg2__mmx;
- funcs->intra8x8_err = intra8x8_err__mmx;
- funcs->inter8x8_err = inter8x8_err__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c b/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c
deleted file mode 100644
index f0aeed96e..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: dsp_mmxext.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
-
- ".rept 7 \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
- " psadbw %%mm1, %%mm0 \n\t"
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " add %4, %2 \n\t" /* Inc pointer into ref data */
- ".endr \n\t"
-
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
- " psadbw %%mm1, %%mm0 \n\t"
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " movd %%mm7, %0 \n\t"
-
- : "=r" (DiffVal),
- "+r" (ptr1),
- "+r" (ptr2)
- : "r" ((ogg_uint64_t)stride1),
- "r" ((ogg_uint64_t)stride2)
- : "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
- unsigned char *ptr2, ogg_uint32_t stride2,
- ogg_uint32_t thres)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
-
- ".rept 8 \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
- " psadbw %%mm1, %%mm0 \n\t"
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " add %4, %2 \n\t" /* Inc pointer into ref data */
- ".endr \n\t"
-
- " movd %%mm7, %0 \n\t"
-
- : "=r" (DiffVal),
- "+r" (ptr1),
- "+r" (ptr2)
- : "r" ((ogg_uint64_t)stride1),
- "r" ((ogg_uint64_t)stride2)
- : "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
- ogg_uint32_t thres)
-{
- ogg_uint32_t DiffVal;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
- " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */
- ".rept 8 \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t"
- " movq (%3), %%mm2 \n\t"
- " pavgb %%mm2, %%mm1 \n\t"
- " psadbw %%mm1, %%mm0 \n\t"
-
- " add %4, %1 \n\t" /* Inc pointer into the new data */
- " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */
- " add %5, %2 \n\t" /* Inc pointer into ref data */
- " add %5, %3 \n\t" /* Inc pointer into ref data */
- ".endr \n\t"
-
- " movd %%mm7, %0 \n\t"
- : "=m" (DiffVal),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "r" ((ogg_uint64_t)SrcStride),
- "r" ((ogg_uint64_t)RefStride)
- : "memory"
- );
-
- return DiffVal;
-}
-
-static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " movd (%1), %%mm0 \n\t"
- " movd (%2), %%mm1 \n\t"
- " psadbw %%mm0, %%mm1 \n\t"
- " movd 4(%1), %%mm2 \n\t"
- " movd 4(%2), %%mm3 \n\t"
- " psadbw %%mm2, %%mm3 \n\t"
-
- " pmaxsw %%mm1, %%mm3 \n\t"
- " movd %%mm3, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=m" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- :
- : "memory"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2,
- ogg_uint32_t stride)
-{
- ogg_uint32_t MaxSad;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */
- " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */
- " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */
- " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */
- " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */
- " mov $4, %%rdi \n\t" /* 4 rows */
- "1: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " mov $4, %%rdi \n\t" /* 4 rows */
- "2: \n\t"
- " movq (%1), %%mm0 \n\t" /* take 8 bytes */
- " movq (%2), %%mm1 \n\t" /* take 8 bytes */
-
- " movq %%mm0, %%mm2 \n\t"
- " psubusb %%mm1, %%mm0 \n\t" /* A - B */
- " psubusb %%mm2, %%mm1 \n\t" /* B - A */
- " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */
- " movq %%mm0, %%mm1 \n\t"
-
- " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */
- " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */
- " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */
- " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */
- " add %3, %1 \n\t" /* Inc pointer into the new data */
- " add %3, %2 \n\t" /* Inc pointer into the new data */
-
- " dec %%rdi \n\t"
- " jnz 2b \n\t"
-
- " pmaxsw %%mm6, %%mm7 \n\t"
- " pmaxsw %%mm4, %%mm5 \n\t"
- " pmaxsw %%mm5, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $32, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movq %%mm7, %%mm6 \n\t"
- " psrlq $16, %%mm6 \n\t"
- " pmaxsw %%mm6, %%mm7 \n\t"
- " movd %%mm7, %0 \n\t"
- " andl $0xffff, %0 \n\t"
-
- : "=r" (MaxSad),
- "+r" (Src1),
- "+r" (Src2)
- : "r" ((ogg_uint64_t)stride)
- : "memory", "rdi"
- );
-
- return MaxSad;
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
- unsigned char *RefDataPtr1,
- unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
- ogg_uint64_t XSum;
- ogg_uint64_t XXSum;
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm4, %%mm4 \n\t"
- " pxor %%mm5, %%mm5 \n\t"
- " pxor %%mm6, %%mm6 \n\t"
- " pxor %%mm7, %%mm7 \n\t"
- " mov $8, %%rdi \n\t"
- "1: \n\t"
- " movq (%2), %%mm0 \n\t" /* take 8 bytes */
-
- " movq (%3), %%mm2 \n\t"
- " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */
- " pavgb %%mm2, %%mm1 \n\t"
-
- " movq %%mm0, %%mm2 \n\t"
- " movq %%mm1, %%mm3 \n\t"
-
- " punpcklbw %%mm6, %%mm0 \n\t"
- " punpcklbw %%mm4, %%mm1 \n\t"
- " punpckhbw %%mm6, %%mm2 \n\t"
- " punpckhbw %%mm4, %%mm3 \n\t"
-
- " psubsw %%mm1, %%mm0 \n\t"
- " psubsw %%mm3, %%mm2 \n\t"
-
- " paddw %%mm0, %%mm5 \n\t"
- " paddw %%mm2, %%mm5 \n\t"
-
- " pmaddwd %%mm0, %%mm0 \n\t"
- " pmaddwd %%mm2, %%mm2 \n\t"
-
- " paddd %%mm0, %%mm7 \n\t"
- " paddd %%mm2, %%mm7 \n\t"
-
- " add %5, %2 \n\t" /* Inc pointer into src data */
- " add %6, %3 \n\t" /* Inc pointer into ref data */
- " add %6, %4 \n\t" /* Inc pointer into ref data */
-
- " dec %%rdi \n\t"
- " jnz 1b \n\t"
-
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $32, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movq %%mm5, %%mm0 \n\t"
- " psrlq $16, %%mm5 \n\t"
- " paddw %%mm0, %%mm5 \n\t"
- " movd %%mm5, %%edi \n\t"
- " movsx %%di, %%edi \n\t"
- " movl %%edi, %0 \n\t"
-
- " movq %%mm7, %%mm0 \n\t"
- " psrlq $32, %%mm7 \n\t"
- " paddd %%mm0, %%mm7 \n\t"
- " movd %%mm7, %1 \n\t"
-
- : "=m" (XSum),
- "=m" (XXSum),
- "+r" (SrcData),
- "+r" (RefDataPtr1),
- "+r" (RefDataPtr2)
- : "r" ((ogg_uint64_t)SrcStride),
- "r" ((ogg_uint64_t)RefStride)
- : "rdi", "memory"
- );
-
- /* Compute and return population variance as mis-match metric. */
- return (( (XXSum<<6) - XSum*XSum ));
-}
-
-void dsp_mmxext_init(DspFunctions *funcs)
-{
- funcs->row_sad8 = row_sad8__mmxext;
- funcs->col_sad8x8 = col_sad8x8__mmxext;
- funcs->sad8x8 = sad8x8__mmxext;
- funcs->sad8x8_thres = sad8x8_thres__mmxext;
- funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext;
- funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c
deleted file mode 100644
index 3765561cf..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************/
-
-/* mmx fdct implementation for x86_64 */
-/* $Id: fdct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ */
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
-
-#if defined(__MINGW32__) || defined(__CYGWIN__) || \
- defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define M(a) "_" #a
-#else
-# define M(a) #a
-#endif
-
-/* execute stage 1 of forward DCT */
-#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp) \
- " movq " #ip0 ", %%mm0 \n\t" \
- " movq " #ip1 ", %%mm1 \n\t" \
- " movq " #ip3 ", %%mm2 \n\t" \
- " movq " #ip5 ", %%mm3 \n\t" \
- " movq %%mm0, %%mm4 \n\t" \
- " movq %%mm1, %%mm5 \n\t" \
- " movq %%mm2, %%mm6 \n\t" \
- " movq %%mm3, %%mm7 \n\t" \
- \
- " paddsw " #ip7 ", %%mm0 \n\t" /* mm0 = ip0 + ip7 = is07 */ \
- " paddsw " #ip2 ", %%mm1 \n\t" /* mm1 = ip1 + ip2 = is12 */ \
- " paddsw " #ip4 ", %%mm2 \n\t" /* mm2 = ip3 + ip4 = is34 */ \
- " paddsw " #ip6 ", %%mm3 \n\t" /* mm3 = ip5 + ip6 = is56 */ \
- " psubsw " #ip7 ", %%mm4 \n\t" /* mm4 = ip0 - ip7 = id07 */ \
- " psubsw " #ip2 ", %%mm5 \n\t" /* mm5 = ip1 - ip2 = id12 */ \
- \
- " psubsw %%mm2, %%mm0 \n\t" /* mm0 = is07 - is34 */ \
- \
- " paddsw %%mm2, %%mm2 \n\t" \
- \
- " psubsw " #ip4 ", %%mm6 \n\t" /* mm6 = ip3 - ip4 = id34 */ \
- \
- " paddsw %%mm0, %%mm2 \n\t" /* mm2 = is07 + is34 = is0734 */ \
- " psubsw %%mm3, %%mm1 \n\t" /* mm1 = is12 - is56 */ \
- " movq %%mm0," #temp " \n\t" /* Save is07 - is34 to free mm0; */ \
- " paddsw %%mm3, %%mm3 \n\t" \
- " paddsw %%mm1, %%mm3 \n\t" /* mm3 = is12 + 1s56 = is1256 */ \
- \
- " psubsw " #ip6 ", %%mm7 \n\t" /* mm7 = ip5 - ip6 = id56 */ \
- /* ------------------------------------------------------------------- */ \
- " psubsw %%mm7, %%mm5 \n\t" /* mm5 = id12 - id56 */ \
- " paddsw %%mm7, %%mm7 \n\t" \
- " paddsw %%mm5, %%mm7 \n\t" /* mm7 = id12 + id56 */ \
- /* ------------------------------------------------------------------- */ \
- " psubsw %%mm3, %%mm2 \n\t" /* mm2 = is0734 - is1256 */ \
- " paddsw %%mm3, %%mm3 \n\t" \
- \
- " movq %%mm2, %%mm0 \n\t" /* make a copy */ \
- " paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \
- \
- " pmulhw %[xC4S4], %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
- " paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \
- \
- " movq %%mm3, %%mm2 \n\t" \
- " movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \
- \
- " movq %%mm3, %%mm0 \n\t" \
- " pmulhw %[xC4S4], %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \
- " paddw %%mm2, %%mm3 \n\t" /* Truncate mm3, now it is op[0] */ \
- \
- " movq %%mm3," #ip0 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \
- " pmulhw %[xC2S6], %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
- \
- " movq " #temp ", %%mm2 \n\t" \
- " movq %%mm2, %%mm0 \n\t" \
- \
- " psrlw $15, %%mm2 \n\t" /* mm3 = xC2S6 * irot_input_y */ \
- " paddw %%mm0, %%mm3 \n\t" \
- \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- " movq %%mm5, %%mm0 \n\t" \
- \
- " movq %%mm5, %%mm2 \n\t" \
- " pmulhw %[xC6S2], %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \
- \
- " paddsw %%mm0, %%mm3 \n\t" /* ip[2] */ \
- " movq %%mm3," #ip2 " \n\t" /* Save ip2 */ \
- \
- " movq %%mm5, %%mm0 \n\t" \
- " movq %%mm5, %%mm2 \n\t" \
- \
- " pmulhw %[xC2S6], %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " movq " #temp ", %%mm3 \n\t" \
- " paddw %%mm0, %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x */ \
- \
- " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \
- " movq %%mm3, %%mm2 \n\t" \
- \
- " pmulhw %[xC6S2], %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- " psubsw %%mm5, %%mm3 \n\t" \
- \
- " movq %%mm3," #ip6 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC4S4], %%mm0 \n\t" \
- " movq %%mm1, %%mm2 \n\t" \
- " movq %%mm1, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \
- " paddw %%mm2, %%mm1 \n\t" /* Truncate mm1, now it is icommon_product1 */ \
- \
- " movq %%mm7, %%mm2 \n\t" \
- " movq %%mm7, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \
- " paddw %%mm2, %%mm7 \n\t" /* Truncate mm7, now it is icommon_product2 */ \
- /* ------------------------------------------------------------------- */ \
- " pxor %%mm0, %%mm0 \n\t" /* Clear mm0 */ \
- " psubsw %%mm6, %%mm0 \n\t" /* mm0 = - id34 */ \
- \
- " psubsw %%mm7, %%mm0 \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \
- " paddsw %%mm6, %%mm6 \n\t" \
- " paddsw %%mm0, %%mm6 \n\t" /* mm6 = id34 - icommon_product2 */ \
- \
- " psubsw %%mm1, %%mm4 \n\t" /* mm4 = id07 - icommon_product1 */ \
- " paddsw %%mm1, %%mm1 \n\t" \
- " paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC1S7], %%mm7 \n\t" \
- " movq %%mm1, %%mm2 \n\t" \
- \
- " movq %%mm1, %%mm3 \n\t" \
- " pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \
- \
- " movq %[xC7S1], %%mm7 \n\t" \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \
- " paddw %%mm2, %%mm1 \n\t" /* Trucated */ \
- \
- " pmulhw %%mm7, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x */ \
- " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \
- \
- " movq %%mm0, %%mm5 \n\t" \
- " movq %%mm0, %%mm2 \n\t" \
- \
- " movq %[xC1S7], %%mm7 \n\t" \
- " pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \
- \
- " movq %[xC7S1], %%mm7 \n\t" \
- " psrlw $15, %%mm2 \n\t" \
- \
- " paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \
- " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \
- \
- " pmulhw %%mm7, %%mm5 \n\t" /* mm5 = xC7S1 * irot_input_y */ \
- " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \
- \
- " psubsw %%mm5, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \
- " paddsw %%mm0, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \
- \
- " movq %%mm1," #ip1 " \n\t" \
- " movq %%mm3," #ip7 " \n\t" \
- /* ------------------------------------------------------------------- */ \
- " movq %[xC3S5], %%mm0 \n\t" \
- " movq %[xC5S3], %%mm1 \n\t" \
- \
- " movq %%mm6, %%mm5 \n\t" \
- " movq %%mm6, %%mm7 \n\t" \
- \
- " movq %%mm4, %%mm2 \n\t" \
- " movq %%mm4, %%mm3 \n\t" \
- \
- " pmulhw %%mm0, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \
- " pmulhw %%mm1, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \
- \
- " psrlw $15, %%mm2 \n\t" \
- " psrlw $15, %%mm5 \n\t" \
- \
- " paddw %%mm3, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x */ \
- " paddw %%mm7, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y */ \
- \
- " paddw %%mm2, %%mm4 \n\t" /* Truncated */ \
- " paddw %%mm5, %%mm6 \n\t" /* Truncated */ \
- \
- " psubsw %%mm6, %%mm4 \n\t" /* ip3 */ \
- " movq %%mm4," #ip3 " \n\t" \
- \
- " movq %%mm3, %%mm4 \n\t" \
- " movq %%mm7, %%mm6 \n\t" \
- \
- " pmulhw %%mm1, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \
- " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \
- \
- " paddw %%mm2, %%mm4 \n\t" \
- " paddw %%mm5, %%mm6 \n\t" \
- \
- " paddw %%mm4, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x */ \
- " paddw %%mm6, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y */ \
- \
- " paddw %%mm7, %%mm3 \n\t" /* ip5 */ \
- " movq %%mm3," #ip5 " \n\t"
-
-#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7, \
- op0,op1,op2,op3,op4,op5,op6,op7) \
- " movq " #ip0 ", %%mm0 \n\t" /* mm0 = a0 a1 a2 a3 */ \
- " movq " #ip4 ", %%mm4 \n\t" /* mm4 = e4 e5 e6 e7 */ \
- " movq " #ip1 ", %%mm1 \n\t" /* mm1 = b0 b1 b2 b3 */ \
- " movq " #ip5 ", %%mm5 \n\t" /* mm5 = f4 f5 f6 f7 */ \
- " movq " #ip2 ", %%mm2 \n\t" /* mm2 = c0 c1 c2 c3 */ \
- " movq " #ip6 ", %%mm6 \n\t" /* mm6 = g4 g5 g6 g7 */ \
- " movq " #ip3 ", %%mm3 \n\t" /* mm3 = d0 d1 d2 d3 */ \
- " movq %%mm1," #op1 " \n\t" /* save b0 b1 b2 b3 */ \
- " movq " #ip7 ", %%mm7 \n\t" /* mm7 = h0 h1 h2 h3 */ \
- /* Transpose 2x8 block */ \
- " movq %%mm4, %%mm1 \n\t" /* mm1 = e3 e2 e1 e0 */ \
- " punpcklwd %%mm5, %%mm4 \n\t" /* mm4 = f1 e1 f0 e0 */ \
- " movq %%mm0," #op0 " \n\t" /* save a3 a2 a1 a0 */ \
- " punpckhwd %%mm5, %%mm1 \n\t" /* mm1 = f3 e3 f2 e2 */ \
- " movq %%mm6, %%mm0 \n\t" /* mm0 = g3 g2 g1 g0 */ \
- " punpcklwd %%mm7, %%mm6 \n\t" /* mm6 = h1 g1 h0 g0 */ \
- " movq %%mm4, %%mm5 \n\t" /* mm5 = f1 e1 f0 e0 */ \
- " punpckldq %%mm6, %%mm4 \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \
- " punpckhdq %%mm6, %%mm5 \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \
- " movq %%mm1, %%mm6 \n\t" /* mm6 = f3 e3 f2 e2 */ \
- " movq %%mm4," #op4 " \n\t" \
- " punpckhwd %%mm7, %%mm0 \n\t" /* mm0 = h3 g3 h2 g2 */ \
- " movq %%mm5," #op5 " \n\t" \
- " punpckhdq %%mm0, %%mm6 \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \
- " movq " #op0 ", %%mm4 \n\t" /* mm4 = a3 a2 a1 a0 */ \
- " punpckldq %%mm0, %%mm1 \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \
- " movq " #op1 ", %%mm5 \n\t" /* mm5 = b3 b2 b1 b0 */ \
- " movq %%mm4, %%mm0 \n\t" /* mm0 = a3 a2 a1 a0 */ \
- " movq %%mm6," #op7 " \n\t" \
- " punpcklwd %%mm5, %%mm0 \n\t" /* mm0 = b1 a1 b0 a0 */ \
- " movq %%mm1," #op6 " \n\t" \
- " punpckhwd %%mm5, %%mm4 \n\t" /* mm4 = b3 a3 b2 a2 */ \
- " movq %%mm2, %%mm5 \n\t" /* mm5 = c3 c2 c1 c0 */ \
- " punpcklwd %%mm3, %%mm2 \n\t" /* mm2 = d1 c1 d0 c0 */ \
- " movq %%mm0, %%mm1 \n\t" /* mm1 = b1 a1 b0 a0 */ \
- " punpckldq %%mm2, %%mm0 \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \
- " punpckhdq %%mm2, %%mm1 \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \
- " movq %%mm4, %%mm2 \n\t" /* mm2 = b3 a3 b2 a2 */ \
- " movq %%mm0," #op0 " \n\t" \
- " punpckhwd %%mm3, %%mm5 \n\t" /* mm5 = d3 c3 d2 c2 */ \
- " movq %%mm1," #op1 " \n\t" \
- " punpckhdq %%mm5, %%mm4 \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \
- " punpckldq %%mm5, %%mm2 \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \
- " movq %%mm4," #op3 " \n\t" \
- " movq %%mm2," #op2 " \n\t"
-
-
-/* This performs a 2D Forward DCT on an 8x8 block with short
- coefficients. We try to do the truncation to match the C
- version. */
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
- ogg_int16_t __attribute__((aligned(8))) temp[8*8];
-
- __asm__ __volatile__ (
- " .balign 16 \n\t"
- /*
- * Input data is an 8x8 block. To make processing of the data more efficent
- * we will transpose the block of data to two 4x8 blocks???
- */
- Transpose_mmx ( (%0), 16(%0), 32(%0), 48(%0), 8(%0), 24(%0), 40(%0), 56(%0),
- (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1))
- Fdct_mmx ( (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1), (%2))
-
- Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0),
- 64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1))
- Fdct_mmx (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
- Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1),
- 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1))
- Fdct_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2))
-
- Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1),
- 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1))
- Fdct_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
- " emms \n\t"
-
- : "+r" (InputData),
- "+r" (OutputData)
- : "r" (temp),
- [xC1S7] "m" (xC1S7), /* gcc 3.1+ allows named asm parameters */
- [xC2S6] "m" (xC2S6),
- [xC3S5] "m" (xC3S5),
- [xC4S4] "m" (xC4S4),
- [xC5S3] "m" (xC5S3),
- [xC6S2] "m" (xC6S2),
- [xC7S1] "m" (xC7S1)
- : "memory"
- );
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
- funcs->fdct_short = fdct_short__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c
deleted file mode 100644
index b87db6085..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: idct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-/* nothing implemented right now */
-void dsp_mmx_idct_init(DspFunctions *funcs)
-{
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c
deleted file mode 100644
index b9b86e982..000000000
--- a/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id: recon_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-
-static void copy8x8__mmx (unsigned char *src,
- unsigned char *dest,
- ogg_uint32_t stride)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " lea (%2, %2, 2), %%rdi \n\t"
-
- " movq (%1), %%mm0 \n\t"
- " movq (%1, %2), %%mm1 \n\t"
- " movq (%1, %2, 2), %%mm2 \n\t"
- " movq (%1, %%rdi), %%mm3 \n\t"
-
- " lea (%1, %2, 4), %1 \n\t"
-
- " movq %%mm0, (%0) \n\t"
- " movq %%mm1, (%0, %2) \n\t"
- " movq %%mm2, (%0, %2, 2) \n\t"
- " movq %%mm3, (%0, %%rdi) \n\t"
-
- " lea (%0, %2, 4), %0 \n\t"
-
- " movq (%1), %%mm0 \n\t"
- " movq (%1, %2), %%mm1 \n\t"
- " movq (%1, %2, 2), %%mm2 \n\t"
- " movq (%1, %%rdi), %%mm3 \n\t"
-
- " movq %%mm0, (%0) \n\t"
- " movq %%mm1, (%0, %2) \n\t"
- " movq %%mm2, (%0, %2, 2) \n\t"
- " movq %%mm3, (%0, %%rdi) \n\t"
- : "+a" (dest)
- : "c" (src),
- "d" ((ogg_uint64_t)stride)
- : "memory", "rdi"
- );
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */
-
- " lea 128(%1), %%rdi \n\t" /* Endpoint in input buffer */
- "1: \n\t"
- " movq (%1), %%mm2 \n\t" /* First four input values */
-
- " packsswb 8(%1), %%mm2 \n\t" /* pack with next(high) four values */
- " por %%mm0, %%mm0 \n\t"
- " pxor %%mm0, %%mm2 \n\t" /* Convert result to unsigned (same as add 128) */
- " lea 16(%1), %1 \n\t" /* Step source buffer */
- " cmp %%rdi, %1 \n\t" /* are we done */
-
- " movq %%mm2, (%0) \n\t" /* store results */
-
- " lea (%0, %2), %0 \n\t" /* Step output buffer */
- " jc 1b \n\t" /* Loop back if we are not done */
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" ((ogg_uint64_t)LineStep),
- [V128] "m" (V128)
- : "memory", "rdi"
- );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
- ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm0, %%mm0 \n\t"
- " lea 128(%1), %%rdi \n\t"
-
- "1: \n\t"
- " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */
-
- " movq (%1), %%mm4 \n\t" /* first 4 changes */
- " movq %%mm2, %%mm3 \n\t"
- " movq 8(%1), %%mm5 \n\t" /* last 4 changes */
- " punpcklbw %%mm0, %%mm2 \n\t" /* turn first 4 refs into positive 16-bit #s */
- " paddsw %%mm4, %%mm2 \n\t" /* add in first 4 changes */
- " punpckhbw %%mm0, %%mm3 \n\t" /* turn last 4 refs into positive 16-bit #s */
- " paddsw %%mm5, %%mm3 \n\t" /* add in last 4 changes */
- " add %3, %2 \n\t" /* next row of reference pixels */
- " packuswb %%mm3, %%mm2 \n\t" /* pack result to unsigned 8-bit values */
- " lea 16(%1), %1 \n\t" /* next row of changes */
- " cmp %%rdi, %1 \n\t" /* are we done? */
-
- " movq %%mm2, (%0) \n\t" /* store result */
-
- " lea (%0, %3), %0 \n\t" /* next row of output */
- " jc 1b \n\t"
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" (RefPtr),
- "r" ((ogg_uint64_t)LineStep)
- : "memory", "rdi"
- );
-}
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
- unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
- ogg_uint32_t LineStep)
-{
- __asm__ __volatile__ (
- " .balign 16 \n\t"
-
- " pxor %%mm0, %%mm0 \n\t"
- " lea 128(%1), %%rdi \n\t"
-
- "1: \n\t"
- " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */
- " movq (%3), %%mm4 \n\t" /* (+3 misaligned) 8 reference pixels */
-
- " movq %%mm2, %%mm3 \n\t"
- " punpcklbw %%mm0, %%mm2 \n\t" /* mm2 = start ref1 as positive 16-bit #s */
- " movq %%mm4, %%mm5 \n\t"
- " movq (%1), %%mm6 \n\t" /* first 4 changes */
- " punpckhbw %%mm0, %%mm3 \n\t" /* mm3 = end ref1 as positive 16-bit #s */
- " movq 8(%1), %%mm7 \n\t" /* last 4 changes */
- " punpcklbw %%mm0, %%mm4 \n\t" /* mm4 = start ref2 as positive 16-bit #s */
- " punpckhbw %%mm0, %%mm5 \n\t" /* mm5 = end ref2 as positive 16-bit #s */
- " paddw %%mm4, %%mm2 \n\t" /* mm2 = start (ref1 + ref2) */
- " paddw %%mm5, %%mm3 \n\t" /* mm3 = end (ref1 + ref2) */
- " psrlw $1, %%mm2 \n\t" /* mm2 = start (ref1 + ref2)/2 */
- " psrlw $1, %%mm3 \n\t" /* mm3 = end (ref1 + ref2)/2 */
- " paddw %%mm6, %%mm2 \n\t" /* add changes to start */
- " paddw %%mm7, %%mm3 \n\t" /* add changes to end */
- " lea 16(%1), %1 \n\t" /* next row of changes */
- " packuswb %%mm3, %%mm2 \n\t" /* pack start|end to unsigned 8-bit */
- " add %4, %2 \n\t" /* next row of reference pixels */
- " add %4, %3 \n\t" /* next row of reference pixels */
- " movq %%mm2, (%0) \n\t" /* store result */
- " add %4, %0 \n\t" /* next row of output */
- " cmp %%rdi, %1 \n\t" /* are we done? */
- " jc 1b \n\t"
- : "+r" (ReconPtr)
- : "r" (ChangePtr),
- "r" (RefPtr1),
- "r" (RefPtr2),
- "r" ((ogg_uint64_t)LineStep)
- : "memory", "rdi"
- );
-}
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
- funcs->copy8x8 = copy8x8__mmx;
- funcs->recon_intra8x8 = recon_intra8x8__mmx;
- funcs->recon_inter8x8 = recon_inter8x8__mmx;
- funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-
-#endif /* USE_ASM */
diff --git a/Engine/lib/libtheora/lib/encapiwrapper.c b/Engine/lib/libtheora/lib/encapiwrapper.c
new file mode 100644
index 000000000..874f12442
--- /dev/null
+++ b/Engine/lib/libtheora/lib/encapiwrapper.c
@@ -0,0 +1,168 @@
+#include
+#include
+#include
+#include "apiwrapper.h"
+#include "encint.h"
+#include "theora/theoraenc.h"
+
+
+
+static void th_enc_api_clear(th_api_wrapper *_api){
+ if(_api->encode)th_encode_free(_api->encode);
+ memset(_api,0,sizeof(*_api));
+}
+
+static void theora_encode_clear(theora_state *_te){
+ if(_te->i!=NULL)theora_info_clear(_te->i);
+ memset(_te,0,sizeof(*_te));
+}
+
+static int theora_encode_control(theora_state *_te,int _req,
+ void *_buf,size_t _buf_sz){
+ return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode,
+ _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_encode_granule_frame(theora_state *_te,
+ ogg_int64_t _gp){
+ return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){
+ return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={
+ (oc_state_clear_func)theora_encode_clear,
+ (oc_state_control_func)theora_encode_control,
+ (oc_state_granule_frame_func)theora_encode_granule_frame,
+ (oc_state_granule_time_func)theora_encode_granule_time,
+};
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+ th_api_info *apiinfo;
+ th_info info;
+ ogg_uint32_t keyframe_frequency_force;
+ /*Allocate our own combined API wrapper/theora_info struct.
+ We put them both in one malloc'd block so that when the API wrapper is
+ freed, the info struct goes with it.
+ This avoids having to figure out whether or not we need to free the info
+ struct in either theora_info_clear() or theora_clear().*/
+ apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo));
+ if(apiinfo==NULL)return TH_EFAULT;
+ /*Make our own copy of the info struct, since its lifetime should be
+ independent of the one we were passed in.*/
+ *&apiinfo->info=*_ci;
+ oc_theora_info2th_info(&info,_ci);
+ apiinfo->api.encode=th_encode_alloc(&info);
+ if(apiinfo->api.encode==NULL){
+ _ogg_free(apiinfo);
+ return OC_EINVAL;
+ }
+ apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear;
+ /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+ _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL;
+ _te->internal_decode=NULL;
+ _te->granulepos=0;
+ _te->i=&apiinfo->info;
+ _te->i->codec_setup=&apiinfo->api;
+ /*Set the precise requested keyframe frequency.*/
+ keyframe_frequency_force=_ci->keyframe_auto_p?
+ _ci->keyframe_frequency_force:_ci->keyframe_frequency;
+ th_encode_ctl(apiinfo->api.encode,
+ TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
+ &keyframe_frequency_force,sizeof(keyframe_frequency_force));
+ /*TODO: Additional codec setup using the extra fields in theora_info.*/
+ return 0;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+ th_api_wrapper *api;
+ th_ycbcr_buffer buf;
+ int ret;
+ api=(th_api_wrapper *)_te->i->codec_setup;
+ buf[0].width=_yuv->y_width;
+ buf[0].height=_yuv->y_height;
+ buf[0].stride=_yuv->y_stride;
+ buf[0].data=_yuv->y;
+ buf[1].width=_yuv->uv_width;
+ buf[1].height=_yuv->uv_height;
+ buf[1].stride=_yuv->uv_stride;
+ buf[1].data=_yuv->u;
+ buf[2].width=_yuv->uv_width;
+ buf[2].height=_yuv->uv_height;
+ buf[2].stride=_yuv->uv_stride;
+ buf[2].data=_yuv->v;
+ ret=th_encode_ycbcr_in(api->encode,buf);
+ if(ret<0)return ret;
+ _te->granulepos=api->encode->state.granpos;
+ return ret;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+ th_api_wrapper *api;
+ api=(th_api_wrapper *)_te->i->codec_setup;
+ return th_encode_packetout(api->encode,_last_p,_op);
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+ oc_enc_ctx *enc;
+ th_api_wrapper *api;
+ int ret;
+ api=(th_api_wrapper *)_te->i->codec_setup;
+ enc=api->encode;
+ /*If we've already started encoding, fail.*/
+ if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+ return TH_EINVAL;
+ }
+ /*Reset the state to make sure we output an info packet.*/
+ enc->packet_state=OC_PACKET_INFO_HDR;
+ ret=th_encode_flushheader(api->encode,NULL,_op);
+ return ret>=0?0:ret;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+ oggpack_buffer opb;
+ void *buf;
+ int packet_state;
+ int ret;
+ packet_state=OC_PACKET_COMMENT_HDR;
+ oggpackB_writeinit(&opb);
+ ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL,
+ th_version_string(),(th_comment *)_tc,_op);
+ if(ret>=0){
+ /*The oggpack_buffer's lifetime ends with this function, so we have to
+ copy out the packet contents.
+ Presumably the application knows it is supposed to free this.
+ This part works nothing like the Vorbis API, and the documentation on it
+ has been wrong for some time, claiming libtheora owned the memory.*/
+ buf=_ogg_malloc(_op->bytes);
+ if(buf==NULL){
+ _op->packet=NULL;
+ ret=TH_EFAULT;
+ }
+ else{
+ memcpy(buf,_op->packet,_op->bytes);
+ _op->packet=buf;
+ ret=0;
+ }
+ }
+ oggpack_writeclear(&opb);
+ return ret;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+ oc_enc_ctx *enc;
+ th_api_wrapper *api;
+ int ret;
+ api=(th_api_wrapper *)_te->i->codec_setup;
+ enc=api->encode;
+ /*If we've already started encoding, fail.*/
+ if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+ return TH_EINVAL;
+ }
+ /*Reset the state to make sure we output a setup packet.*/
+ enc->packet_state=OC_PACKET_SETUP_HDR;
+ ret=th_encode_flushheader(api->encode,NULL,_op);
+ return ret>=0?0:ret;
+}
diff --git a/Engine/lib/libtheora/lib/encfrag.c b/Engine/lib/libtheora/lib/encfrag.c
new file mode 100644
index 000000000..bb814c8e4
--- /dev/null
+++ b/Engine/lib/libtheora/lib/encfrag.c
@@ -0,0 +1,388 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include
+#include
+#include "encint.h"
+
+
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+ (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride);
+}
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+ int i;
+ for(i=0;i<8;i++){
+ int j;
+ for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]);
+ _src+=_ystride;
+ _ref+=_ystride;
+ }
+}
+
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride){
+ (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride);
+}
+
+void oc_enc_frag_sub_128_c(ogg_int16_t *_diff,
+ const unsigned char *_src,int _ystride){
+ int i;
+ for(i=0;i<8;i++){
+ int j;
+ for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128);
+ _src+=_ystride;
+ }
+}
+
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x,
+ const unsigned char *_y,int _ystride){
+ return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride);
+}
+
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+ unsigned sad;
+ int i;
+ sad=0;
+ for(i=8;i-->0;){
+ int j;
+ for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+ _src+=_ystride;
+ _ref+=_ystride;
+ }
+ return sad;
+}
+
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+ return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+ unsigned sad;
+ int i;
+ sad=0;
+ for(i=8;i-->0;){
+ int j;
+ for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+ if(sad>_thresh)break;
+ _src+=_ystride;
+ _ref+=_ystride;
+ }
+ return sad;
+}
+
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+ return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,
+ _thresh);
+}
+
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+ unsigned sad;
+ int i;
+ sad=0;
+ for(i=8;i-->0;){
+ int j;
+ for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1));
+ if(sad>_thresh)break;
+ _src+=_ystride;
+ _ref1+=_ystride;
+ _ref2+=_ystride;
+ }
+ return sad;
+}
+
+static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+ int i;
+ for(i=0;i<8;i++){
+ int t0;
+ int t1;
+ int t2;
+ int t3;
+ int t4;
+ int t5;
+ int t6;
+ int t7;
+ int r;
+ /*Hadamard stage 1:*/
+ t0=_src[0]-_ref[0]+_src[4]-_ref[4];
+ t4=_src[0]-_ref[0]-_src[4]+_ref[4];
+ t1=_src[1]-_ref[1]+_src[5]-_ref[5];
+ t5=_src[1]-_ref[1]-_src[5]+_ref[5];
+ t2=_src[2]-_ref[2]+_src[6]-_ref[6];
+ t6=_src[2]-_ref[2]-_src[6]+_ref[6];
+ t3=_src[3]-_ref[3]+_src[7]-_ref[7];
+ t7=_src[3]-_ref[3]-_src[7]+_ref[7];
+ /*Hadamard stage 2:*/
+ r=t0;
+ t0+=t2;
+ t2=r-t2;
+ r=t1;
+ t1+=t3;
+ t3=r-t3;
+ r=t4;
+ t4+=t6;
+ t6=r-t6;
+ r=t5;
+ t5+=t7;
+ t7=r-t7;
+ /*Hadamard stage 3:*/
+ _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+ _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+ _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+ _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+ _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+ _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+ _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+ _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+ _src+=_ystride;
+ _ref+=_ystride;
+ }
+}
+
+static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
+ int i;
+ for(i=0;i<8;i++){
+ int t0;
+ int t1;
+ int t2;
+ int t3;
+ int t4;
+ int t5;
+ int t6;
+ int t7;
+ int r;
+ /*Hadamard stage 1:*/
+ r=_ref1[0]+_ref2[0]>>1;
+ t4=_ref1[4]+_ref2[4]>>1;
+ t0=_src[0]-r+_src[4]-t4;
+ t4=_src[0]-r-_src[4]+t4;
+ r=_ref1[1]+_ref2[1]>>1;
+ t5=_ref1[5]+_ref2[5]>>1;
+ t1=_src[1]-r+_src[5]-t5;
+ t5=_src[1]-r-_src[5]+t5;
+ r=_ref1[2]+_ref2[2]>>1;
+ t6=_ref1[6]+_ref2[6]>>1;
+ t2=_src[2]-r+_src[6]-t6;
+ t6=_src[2]-r-_src[6]+t6;
+ r=_ref1[3]+_ref2[3]>>1;
+ t7=_ref1[7]+_ref2[7]>>1;
+ t3=_src[3]-r+_src[7]-t7;
+ t7=_src[3]-r-_src[7]+t7;
+ /*Hadamard stage 2:*/
+ r=t0;
+ t0+=t2;
+ t2=r-t2;
+ r=t1;
+ t1+=t3;
+ t3=r-t3;
+ r=t4;
+ t4+=t6;
+ t6=r-t6;
+ r=t5;
+ t5+=t7;
+ t7=r-t7;
+ /*Hadamard stage 3:*/
+ _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+ _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+ _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+ _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+ _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+ _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+ _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+ _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+ _src+=_ystride;
+ _ref1+=_ystride;
+ _ref2+=_ystride;
+ }
+}
+
+static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ int _ystride){
+ int i;
+ for(i=0;i<8;i++){
+ int t0;
+ int t1;
+ int t2;
+ int t3;
+ int t4;
+ int t5;
+ int t6;
+ int t7;
+ int r;
+ /*Hadamard stage 1:*/
+ t0=_src[0]+_src[4];
+ t4=_src[0]-_src[4];
+ t1=_src[1]+_src[5];
+ t5=_src[1]-_src[5];
+ t2=_src[2]+_src[6];
+ t6=_src[2]-_src[6];
+ t3=_src[3]+_src[7];
+ t7=_src[3]-_src[7];
+ /*Hadamard stage 2:*/
+ r=t0;
+ t0+=t2;
+ t2=r-t2;
+ r=t1;
+ t1+=t3;
+ t3=r-t3;
+ r=t4;
+ t4+=t6;
+ t6=r-t6;
+ r=t5;
+ t5+=t7;
+ t7=r-t7;
+ /*Hadamard stage 3:*/
+ _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+ _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+ _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+ _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+ _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+ _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+ _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+ _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+ _src+=_ystride;
+ }
+}
+
+unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
+ unsigned sad;
+ int t0;
+ int t1;
+ int t2;
+ int t3;
+ int t4;
+ int t5;
+ int t6;
+ int t7;
+ int r;
+ int i;
+ sad=0;
+ for(i=0;i<8;i++){
+ /*Hadamard stage 1:*/
+ t0=_buf[i*8+0]+_buf[i*8+4];
+ t4=_buf[i*8+0]-_buf[i*8+4];
+ t1=_buf[i*8+1]+_buf[i*8+5];
+ t5=_buf[i*8+1]-_buf[i*8+5];
+ t2=_buf[i*8+2]+_buf[i*8+6];
+ t6=_buf[i*8+2]-_buf[i*8+6];
+ t3=_buf[i*8+3]+_buf[i*8+7];
+ t7=_buf[i*8+3]-_buf[i*8+7];
+ /*Hadamard stage 2:*/
+ r=t0;
+ t0+=t2;
+ t2=r-t2;
+ r=t1;
+ t1+=t3;
+ t3=r-t3;
+ r=t4;
+ t4+=t6;
+ t6=r-t6;
+ r=t5;
+ t5+=t7;
+ t7=r-t7;
+ /*Hadamard stage 3:*/
+ r=abs(t0+t1);
+ r+=abs(t0-t1);
+ r+=abs(t2+t3);
+ r+=abs(t2-t3);
+ r+=abs(t4+t5);
+ r+=abs(t4-t5);
+ r+=abs(t6+t7);
+ r+=abs(t6-t7);
+ sad+=r;
+ if(sad>_thresh)break;
+ }
+ return sad;
+}
+
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+ return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+ ogg_int16_t buf[64];
+ oc_diff_hadamard(buf,_src,_ref,_ystride);
+ return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+ return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
+ _thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+ ogg_int16_t buf[64];
+ oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
+ return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride){
+ return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
+}
+
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
+ ogg_int16_t buf[64];
+ oc_intra_hadamard(buf,_src,_ystride);
+ return oc_hadamard_sad_thresh(buf,UINT_MAX)
+ -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+}
+
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+ (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride);
+}
+
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+ int i;
+ int j;
+ for(i=8;i-->0;){
+ for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1;
+ _dst+=_ystride;
+ _src1+=_ystride;
+ _src2+=_ystride;
+ }
+}
+
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){
+ (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue);
+}
+
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+ (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue);
+}
diff --git a/Engine/lib/libtheora/lib/encinfo.c b/Engine/lib/libtheora/lib/encinfo.c
new file mode 100644
index 000000000..83be1dae7
--- /dev/null
+++ b/Engine/lib/libtheora/lib/encinfo.c
@@ -0,0 +1,121 @@
+#include
+#include
+#include "internal.h"
+#include "enquant.h"
+#include "huffenc.h"
+
+
+
+/*Packs a series of octets from a given byte array into the pack buffer.
+ _opb: The pack buffer to store the octets in.
+ _buf: The byte array containing the bytes to pack.
+ _len: The number of octets to pack.*/
+static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){
+ int i;
+ for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8);
+}
+
+
+
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op){
+ unsigned char *packet;
+ int b_o_s;
+ if(_op==NULL)return TH_EFAULT;
+ switch(*_packet_state){
+ /*Codec info header.*/
+ case OC_PACKET_INFO_HDR:{
+ if(_state==NULL)return TH_EFAULT;
+ oggpackB_reset(_opb);
+ /*Mark this packet as the info header.*/
+ oggpackB_write(_opb,0x80,8);
+ /*Write the codec string.*/
+ oc_pack_octets(_opb,"theora",6);
+ /*Write the codec bitstream version.*/
+ oggpackB_write(_opb,TH_VERSION_MAJOR,8);
+ oggpackB_write(_opb,TH_VERSION_MINOR,8);
+ oggpackB_write(_opb,TH_VERSION_SUB,8);
+ /*Describe the encoded frame.*/
+ oggpackB_write(_opb,_state->info.frame_width>>4,16);
+ oggpackB_write(_opb,_state->info.frame_height>>4,16);
+ oggpackB_write(_opb,_state->info.pic_width,24);
+ oggpackB_write(_opb,_state->info.pic_height,24);
+ oggpackB_write(_opb,_state->info.pic_x,8);
+ oggpackB_write(_opb,_state->info.pic_y,8);
+ oggpackB_write(_opb,_state->info.fps_numerator,32);
+ oggpackB_write(_opb,_state->info.fps_denominator,32);
+ oggpackB_write(_opb,_state->info.aspect_numerator,24);
+ oggpackB_write(_opb,_state->info.aspect_denominator,24);
+ oggpackB_write(_opb,_state->info.colorspace,8);
+ oggpackB_write(_opb,_state->info.target_bitrate,24);
+ oggpackB_write(_opb,_state->info.quality,6);
+ oggpackB_write(_opb,_state->info.keyframe_granule_shift,5);
+ oggpackB_write(_opb,_state->info.pixel_fmt,2);
+ /*Spare configuration bits.*/
+ oggpackB_write(_opb,0,3);
+ b_o_s=1;
+ }break;
+ /*Comment header.*/
+ case OC_PACKET_COMMENT_HDR:{
+ int vendor_len;
+ int i;
+ if(_tc==NULL)return TH_EFAULT;
+ vendor_len=strlen(_vendor);
+ oggpackB_reset(_opb);
+ /*Mark this packet as the comment header.*/
+ oggpackB_write(_opb,0x81,8);
+ /*Write the codec string.*/
+ oc_pack_octets(_opb,"theora",6);
+ /*Write the vendor string.*/
+ oggpack_write(_opb,vendor_len,32);
+ oc_pack_octets(_opb,_vendor,vendor_len);
+ oggpack_write(_opb,_tc->comments,32);
+ for(i=0;i<_tc->comments;i++){
+ if(_tc->user_comments[i]!=NULL){
+ oggpack_write(_opb,_tc->comment_lengths[i],32);
+ oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]);
+ }
+ else oggpack_write(_opb,0,32);
+ }
+ b_o_s=0;
+ }break;
+ /*Codec setup header.*/
+ case OC_PACKET_SETUP_HDR:{
+ int ret;
+ oggpackB_reset(_opb);
+ /*Mark this packet as the setup header.*/
+ oggpackB_write(_opb,0x82,8);
+ /*Write the codec string.*/
+ oc_pack_octets(_opb,"theora",6);
+ /*Write the quantizer tables.*/
+ oc_quant_params_pack(_opb,_qinfo);
+ /*Write the huffman codes.*/
+ ret=oc_huff_codes_pack(_opb,_codes);
+ /*This should never happen, because we validate the tables when they
+ are set.
+ If you see, it's a good chance memory is being corrupted.*/
+ if(ret<0)return ret;
+ b_o_s=0;
+ }break;
+ /*No more headers to emit.*/
+ default:return 0;
+ }
+ /*This is kind of fugly: we hand the user a buffer which they do not own.
+ We will overwrite it when the next packet is output, so the user better be
+ done with it by then.
+ Vorbis is little better: it hands back buffers that it will free the next
+ time the headers are requested, or when the encoder is cleared.
+ Hopefully libogg2 will make this much cleaner.*/
+ packet=oggpackB_get_buffer(_opb);
+ /*If there's no packet, malloc failed while writing.*/
+ if(packet==NULL)return TH_EFAULT;
+ _op->packet=packet;
+ _op->bytes=oggpackB_bytes(_opb);
+ _op->b_o_s=b_o_s;
+ _op->e_o_s=0;
+ _op->granulepos=0;
+ _op->packetno=*_packet_state+3;
+ return ++(*_packet_state)+3;
+}
diff --git a/Engine/lib/libtheora/lib/encint.h b/Engine/lib/libtheora/lib/encint.h
new file mode 100644
index 000000000..97897d5a0
--- /dev/null
+++ b/Engine/lib/libtheora/lib/encint.h
@@ -0,0 +1,493 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#if !defined(_encint_H)
+# define _encint_H (1)
+# if defined(HAVE_CONFIG_H)
+# include "config.h"
+# endif
+# include "theora/theoraenc.h"
+# include "internal.h"
+# include "ocintrin.h"
+# include "mathops.h"
+# include "enquant.h"
+# include "huffenc.h"
+/*# define OC_COLLECT_METRICS*/
+
+
+
+typedef oc_mv oc_mv2[2];
+
+typedef struct oc_enc_opt_vtable oc_enc_opt_vtable;
+typedef struct oc_mb_enc_info oc_mb_enc_info;
+typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
+typedef struct oc_iir_filter oc_iir_filter;
+typedef struct oc_frame_metrics oc_frame_metrics;
+typedef struct oc_rc_state oc_rc_state;
+typedef struct th_enc_ctx oc_enc_ctx;
+typedef struct oc_token_checkpoint oc_token_checkpoint;
+
+
+
+/*Constants for the packet-out state machine specific to the encoder.*/
+
+/*Next packet to emit: Data packet, but none are ready yet.*/
+#define OC_PACKET_EMPTY (0)
+/*Next packet to emit: Data packet, and one is ready.*/
+#define OC_PACKET_READY (1)
+
+/*All features enabled.*/
+#define OC_SP_LEVEL_SLOW (0)
+/*Enable early skip.*/
+#define OC_SP_LEVEL_EARLY_SKIP (1)
+/*Disable motion compensation.*/
+#define OC_SP_LEVEL_NOMC (2)
+/*Maximum valid speed level.*/
+#define OC_SP_LEVEL_MAX (2)
+
+
+/*The bits used for each of the MB mode codebooks.*/
+extern const unsigned char OC_MODE_BITS[2][OC_NMODES];
+
+/*The bits used for each of the MV codebooks.*/
+extern const unsigned char OC_MV_BITS[2][64];
+
+/*The minimum value that can be stored in a SB run for each codeword.
+ The last entry is the upper bound on the length of a single SB run.*/
+extern const ogg_uint16_t OC_SB_RUN_VAL_MIN[8];
+/*The bits used for each SB run codeword.*/
+extern const unsigned char OC_SB_RUN_CODE_NBITS[7];
+
+/*The bits used for each block run length (starting with 1).*/
+extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
+
+
+
+/*Encoder specific functions with accelerated variants.*/
+struct oc_enc_opt_vtable{
+ unsigned (*frag_sad)(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+ unsigned (*frag_sad_thresh)(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+ unsigned (*frag_sad2_thresh)(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+ unsigned (*frag_satd_thresh)(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+ unsigned (*frag_satd2_thresh)(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+ unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
+ void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+ void (*frag_sub_128)(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+ void (*frag_copy2)(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+ void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]);
+ void (*frag_recon_inter)(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+ void (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+};
+
+
+void oc_enc_vtable_init(oc_enc_ctx *_enc);
+
+
+
+/*Encoder-specific macroblock information.*/
+struct oc_mb_enc_info{
+ /*Neighboring macro blocks that have MVs available from the current frame.*/
+ unsigned cneighbors[4];
+ /*Neighboring macro blocks to use for MVs from the previous frame.*/
+ unsigned pneighbors[4];
+ /*The number of current-frame neighbors.*/
+ unsigned char ncneighbors;
+ /*The number of previous-frame neighbors.*/
+ unsigned char npneighbors;
+ /*Flags indicating which MB modes have been refined.*/
+ unsigned char refined;
+ /*Motion vectors for a macro block for the current frame and the
+ previous two frames.
+ Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which
+ can be used to estimate constant velocity and constant acceleration
+ predictors.
+ Uninitialized MVs are (0,0).*/
+ oc_mv2 analysis_mv[3];
+ /*Current unrefined analysis MVs.*/
+ oc_mv unref_mv[2];
+ /*Unrefined block MVs.*/
+ oc_mv block_mv[4];
+ /*Refined block MVs.*/
+ oc_mv ref_mv[4];
+ /*Minimum motion estimation error from the analysis stage.*/
+ ogg_uint16_t error[2];
+ /*MB error for half-pel refinement for each frame type.*/
+ unsigned satd[2];
+ /*Block error for half-pel refinement.*/
+ unsigned block_satd[4];
+};
+
+
+
+/*State machine to estimate the opportunity cost of coding a MB mode.*/
+struct oc_mode_scheme_chooser{
+ /*Pointers to the a list containing the index of each mode in the mode
+ alphabet used by each scheme.
+ The first entry points to the dynamic scheme0_ranks, while the remaining 7
+ point to the constant entries stored in OC_MODE_SCHEMES.*/
+ const unsigned char *mode_ranks[8];
+ /*The ranks for each mode when coded with scheme 0.
+ These are optimized so that the more frequent modes have lower ranks.*/
+ unsigned char scheme0_ranks[OC_NMODES];
+ /*The list of modes, sorted in descending order of frequency, that
+ corresponds to the ranks above.*/
+ unsigned char scheme0_list[OC_NMODES];
+ /*The number of times each mode has been chosen so far.*/
+ int mode_counts[OC_NMODES];
+ /*The list of mode coding schemes, sorted in ascending order of bit cost.*/
+ unsigned char scheme_list[8];
+ /*The number of bits used by each mode coding scheme.*/
+ ptrdiff_t scheme_bits[8];
+};
+
+
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+
+
+
+/*A 2nd order low-pass Bessel follower.
+ We use this for rate control because it has fast reaction time, but is
+ critically damped.*/
+struct oc_iir_filter{
+ ogg_int32_t c[2];
+ ogg_int64_t g;
+ ogg_int32_t x[2];
+ ogg_int32_t y[2];
+};
+
+
+
+/*The 2-pass metrics associated with a single frame.*/
+struct oc_frame_metrics{
+ /*The log base 2 of the scale factor for this frame in Q24 format.*/
+ ogg_int32_t log_scale;
+ /*The number of application-requested duplicates of this frame.*/
+ unsigned dup_count:31;
+ /*The frame type from pass 1.*/
+ unsigned frame_type:1;
+};
+
+
+
+/*Rate control state information.*/
+struct oc_rc_state{
+ /*The target average bits per frame.*/
+ ogg_int64_t bits_per_frame;
+ /*The current buffer fullness (bits available to be used).*/
+ ogg_int64_t fullness;
+ /*The target buffer fullness.
+ This is where we'd like to be by the last keyframe the appears in the next
+ buf_delay frames.*/
+ ogg_int64_t target;
+ /*The maximum buffer fullness (total size of the buffer).*/
+ ogg_int64_t max;
+ /*The log of the number of pixels in a frame in Q57 format.*/
+ ogg_int64_t log_npixels;
+ /*The exponent used in the rate model in Q8 format.*/
+ unsigned exp[2];
+ /*The number of frames to distribute the buffer usage over.*/
+ int buf_delay;
+ /*The total drop count from the previous frame.
+ This includes duplicates explicitly requested via the
+ TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/
+ ogg_uint32_t prev_drop_count;
+ /*The log of an estimated scale factor used to obtain the real framerate, for
+ VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/
+ ogg_int64_t log_drop_scale;
+ /*The log of estimated scale factor for the rate model in Q57 format.*/
+ ogg_int64_t log_scale[2];
+ /*The log of the target quantizer level in Q57 format.*/
+ ogg_int64_t log_qtarget;
+ /*Will we drop frames to meet bitrate target?*/
+ unsigned char drop_frames;
+ /*Do we respect the maximum buffer fullness?*/
+ unsigned char cap_overflow;
+ /*Can the reservoir go negative?*/
+ unsigned char cap_underflow;
+ /*Second-order lowpass filters to track scale and VFR.*/
+ oc_iir_filter scalefilter[2];
+ int inter_count;
+ int inter_delay;
+ int inter_delay_target;
+ oc_iir_filter vfrfilter;
+ /*Two-pass mode state.
+ 0 => 1-pass encoding.
+ 1 => 1st pass of 2-pass encoding.
+ 2 => 2nd pass of 2-pass encoding.*/
+ int twopass;
+ /*Buffer for current frame metrics.*/
+ unsigned char twopass_buffer[48];
+ /*The number of bytes in the frame metrics buffer.
+ When 2-pass encoding is enabled, this is set to 0 after each frame is
+ submitted, and must be non-zero before the next frame will be accepted.*/
+ int twopass_buffer_bytes;
+ int twopass_buffer_fill;
+ /*Whether or not to force the next frame to be a keyframe.*/
+ unsigned char twopass_force_kf;
+ /*The metrics for the previous frame.*/
+ oc_frame_metrics prev_metrics;
+ /*The metrics for the current frame.*/
+ oc_frame_metrics cur_metrics;
+ /*The buffered metrics for future frames.*/
+ oc_frame_metrics *frame_metrics;
+ int nframe_metrics;
+ int cframe_metrics;
+ /*The index of the current frame in the circular metric buffer.*/
+ int frame_metrics_head;
+ /*The frame count of each type (keyframes, delta frames, and dup frames);
+ 32 bits limits us to 2.268 years at 60 fps.*/
+ ogg_uint32_t frames_total[3];
+ /*The number of frames of each type yet to be processed.*/
+ ogg_uint32_t frames_left[3];
+ /*The sum of the scale values for each frame type.*/
+ ogg_int64_t scale_sum[2];
+ /*The start of the window over which the current scale sums are taken.*/
+ int scale_window0;
+ /*The end of the window over which the current scale sums are taken.*/
+ int scale_window_end;
+ /*The frame count of each type in the current 2-pass window; this does not
+ include dup frames.*/
+ int nframes[3];
+ /*The total accumulated estimation bias.*/
+ ogg_int64_t rate_bias;
+};
+
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc);
+void oc_rc_state_clear(oc_rc_state *_rc);
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc);
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp);
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type);
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable);
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf);
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes);
+
+
+
+/*The internal encoder state.*/
+struct th_enc_ctx{
+ /*Shared encoder/decoder state.*/
+ oc_theora_state state;
+ /*Buffer in which to assemble packets.*/
+ oggpack_buffer opb;
+ /*Encoder-specific macroblock information.*/
+ oc_mb_enc_info *mb_info;
+ /*DC coefficients after prediction.*/
+ ogg_int16_t *frag_dc;
+ /*The list of coded macro blocks, in coded order.*/
+ unsigned *coded_mbis;
+ /*The number of coded macro blocks.*/
+ size_t ncoded_mbis;
+ /*Whether or not packets are ready to be emitted.
+ This takes on negative values while there are remaining header packets to
+ be emitted, reaches 0 when the codec is ready for input, and becomes
+ positive when a frame has been processed and data packets are ready.*/
+ int packet_state;
+ /*The maximum distance between keyframes.*/
+ ogg_uint32_t keyframe_frequency_force;
+ /*The number of duplicates to produce for the next frame.*/
+ ogg_uint32_t dup_count;
+ /*The number of duplicates remaining to be emitted for the current frame.*/
+ ogg_uint32_t nqueued_dups;
+ /*The number of duplicates emitted for the last frame.*/
+ ogg_uint32_t prev_dup_count;
+ /*The current speed level.*/
+ int sp_level;
+ /*Whether or not VP3 compatibility mode has been enabled.*/
+ unsigned char vp3_compatible;
+ /*Whether or not any INTER frames have been coded.*/
+ unsigned char coded_inter_frame;
+ /*Whether or not previous frame was dropped.*/
+ unsigned char prevframe_dropped;
+ /*Stores most recently chosen Huffman tables for each frame type, DC and AC
+ coefficients, and luma and chroma tokens.
+ The actual Huffman table used for a given coefficient depends not only on
+ the choice made here, but also its index in the zig-zag ordering.*/
+ unsigned char huff_idxs[2][2][2];
+ /*Current count of bits used by each MV coding mode.*/
+ size_t mv_bits[2];
+ /*The mode scheme chooser for estimating mode coding costs.*/
+ oc_mode_scheme_chooser chooser;
+ /*The number of vertical super blocks in an MCU.*/
+ int mcu_nvsbs;
+ /*The SSD error for skipping each fragment in the current MCU.*/
+ unsigned *mcu_skip_ssd;
+ /*The DCT token lists for each coefficient and each plane.*/
+ unsigned char **dct_tokens[3];
+ /*The extra bits associated with each DCT token.*/
+ ogg_uint16_t **extra_bits[3];
+ /*The number of DCT tokens for each coefficient for each plane.*/
+ ptrdiff_t ndct_tokens[3][64];
+ /*Pending EOB runs for each coefficient for each plane.*/
+ ogg_uint16_t eob_run[3][64];
+ /*The offset of the first DCT token for each coefficient for each plane.*/
+ unsigned char dct_token_offs[3][64];
+ /*The last DC coefficient for each plane and reference frame.*/
+ int dc_pred_last[3][3];
+#if defined(OC_COLLECT_METRICS)
+ /*Fragment SATD statistics for MB mode estimation metrics.*/
+ unsigned *frag_satd;
+ /*Fragment SSD statistics for MB mode estimation metrics.*/
+ unsigned *frag_ssd;
+#endif
+ /*The R-D optimization parameter.*/
+ int lambda;
+ /*The huffman tables in use.*/
+ th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+ /*The quantization parameters in use.*/
+ th_quant_info qinfo;
+ oc_iquant *enquant_tables[64][3][2];
+ oc_iquant_table enquant_table_data[64][3][2];
+ /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
+ value.
+ This is used to paramterize the rate control decisions.
+ They are kept in the log domain to simplify later processing.
+ Keep in mind these are DCT domain quantizers, and so are scaled by an
+ additional factor of 4 from the pixel domain.*/
+ ogg_int64_t log_qavg[2][64];
+ /*The buffer state used to drive rate control.*/
+ oc_rc_state rc;
+ /*Table for encoder acceleration functions.*/
+ oc_enc_opt_vtable opt_vtable;
+};
+
+
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
+#if defined(OC_COLLECT_METRICS)
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
+#endif
+
+
+
+/*Perform fullpel motion search for a single MB against both reference frames.*/
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi);
+/*Refine a MB MV for one frame.*/
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame);
+/*Refine the block MVs.*/
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi);
+
+
+
+/*Used to rollback a tokenlog transaction when we retroactively decide to skip
+ a fragment.
+ A checkpoint is taken right before each token is added.*/
+struct oc_token_checkpoint{
+ /*The color plane the token was added to.*/
+ unsigned char pli;
+ /*The zig-zag index the token was added to.*/
+ unsigned char zzi;
+ /*The outstanding EOB run count before the token was added.*/
+ ogg_uint16_t eob_run;
+ /*The token count before the token was added.*/
+ ptrdiff_t ndct_tokens;
+};
+
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc);
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _acmin);
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n);
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend);
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1);
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc);
+
+
+
+/*Utility routine to encode one of the header packets.*/
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op);
+
+
+
+/*Encoder-specific accelerated functions.*/
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
+ const ogg_int16_t _x[64]);
+
+/*Default pure-C implementations.*/
+void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif
diff --git a/Engine/lib/libtheora/lib/encode.c b/Engine/lib/libtheora/lib/encode.c
new file mode 100644
index 000000000..0c5ea6a17
--- /dev/null
+++ b/Engine/lib/libtheora/lib/encode.c
@@ -0,0 +1,1615 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: encode.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include