diff --git a/Engine/lib/libtheora/CHANGES b/Engine/lib/libtheora/CHANGES index 74183d91b..b30327e63 100644 --- a/Engine/lib/libtheora/CHANGES +++ b/Engine/lib/libtheora/CHANGES @@ -1,3 +1,65 @@ +libtheora 1.1.1 (2009 October 1) + + - Fix problems with MSVC inline assembly + - Add the missing encoder_disabled.c to the distribution + - build updates: autogen.sh should work better after switching systems + and the MSVC project now defaults to the dynamic runtime library + - Namespace some variables to avoid conflicts on wince. + +libtheora 1.1.0 (2009 September 24) + + - Fix various small issues with the example and telemetry code + - Fix handing a zero-byte packet as the first frame + - Documentation cleanup + - Two minor build fixes + +libtheora 1.1beta3 (2009 August 22) + + - Rate control fixes to smooth quality + - MSVC build now exports all of the 1.0 api + - Assorted small bug fixes + +libtheora 1.1beta2 (2009 August 12) + + - Fix a rate control problem with difficult input + - Build fixes for OpenBSD and Apple Xcode + - Examples now all use the 1.0 api + - TH_ENCCTL_SET_SPLEVEL works again + - Various bug fixes and source tree rearrangement + +libtheora 1.1beta1 (2009 August 5) + + - Support for two-pass encoding + - Performance optimization of both encoder and decoder + - Encoder supports dynamic adjustment of quality and + bitrate targets + - Encoder is generally more configurable, and all + rate control modes perform better + - Encoder now accepts 4:2:2 and 4:4:4 chroma sampling + - Decoder telemetry output shows quantization choice + and a breakdown of bitrate usage in the frame + - MSVC assembly optimizations up to date and functional + +libtheora 1.1alpha2 (2009 May 26) + + - Reduce lambda for small quantizers. + - New encoder fDCT does better on smooth gradients + - Use SATD for mode decisions (1-2% bitrate reduction) + - Assembly rewrite for new features and general speed up + - Share code between the encoder and decoder for performance + - Fix 4:2:2 decoding and telemetry + - MSVC project files updated, but assembly is disabled. + - New configure option --disable-spec to work around toolchain + detection failures. + - Limit symbol exports on MacOS X. + - Port remaining unit tests from the 1.0 release. + +libtheora 1.1alpha1 (2009 March 27) + + - Encoder rewrite with much improved vbr quality/bitrate and + better tracking of the target rate in cbr mode. + - MSVC project files do not work in this release. + libtheora 1.0 (2008 November 3) - Merge x86 assembly for forward DCT from Thusnelda branch. diff --git a/Engine/lib/libtheora/COPYING b/Engine/lib/libtheora/COPYING index 5a711972d..c8ccce4ff 100644 --- a/Engine/lib/libtheora/COPYING +++ b/Engine/lib/libtheora/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2002-2008 Xiph.Org Foundation and contributors. +Copyright (C) 2002-2009 Xiph.org Foundation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -- Neither the name of the Xiph.Org Foundation nor the names of its +- Neither the name of the Xiph.org Foundation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/Engine/lib/libtheora/LICENSE b/Engine/lib/libtheora/LICENSE new file mode 100644 index 000000000..5e5ec0846 --- /dev/null +++ b/Engine/lib/libtheora/LICENSE @@ -0,0 +1,18 @@ +Please see the file COPYING for the copyright license for this software. + +In addition to and irrespective of the copyright license associated +with this software, On2 Technologies, Inc. makes the following statement +regarding technology used in this software: + + On2 represents and warrants that it shall not assert any rights + relating to infringement of On2's registered patents, nor initiate + any litigation asserting such rights, against any person who, or + entity which utilizes the On2 VP3 Codec Software, including any + use, distribution, and sale of said Software; which make changes, + modifications, and improvements in said Software; and to use, + distribute, and sell said changes as well as applications for other + fields of use. + +This reference implementation is originally derived from the On2 VP3 +Codec Software, and the Theora video format is essentially compatible +with the VP3 video format, consisting of a backward-compatible superset. diff --git a/Engine/lib/libtheora/include/Makefile.am b/Engine/lib/libtheora/include/Makefile.am new file mode 100644 index 000000000..d5db4b40f --- /dev/null +++ b/Engine/lib/libtheora/include/Makefile.am @@ -0,0 +1,3 @@ +## Process this file with automake to produce Makefile.in + +SUBDIRS = theora diff --git a/Engine/lib/libtheora/include/Makefile.in b/Engine/lib/libtheora/include/Makefile.in new file mode 100644 index 000000000..805e6c29e --- /dev/null +++ b/Engine/lib/libtheora/include/Makefile.in @@ -0,0 +1,414 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AR = @AR@ +ARGZ_H = @ARGZ_H@ +AS = @AS@ +AWK = @AWK@ +BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@ +CAIRO_CFLAGS = @CAIRO_CFLAGS@ +CAIRO_LIBS = @CAIRO_LIBS@ +CC = @CC@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEBUG = @DEBUG@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +F77 = @F77@ +GCJ = @GCJ@ +GCJFLAGS = @GCJFLAGS@ +GETOPT_OBJS = @GETOPT_OBJS@ +GREP = @GREP@ +HAVE_BIBTEX = @HAVE_BIBTEX@ +HAVE_DOXYGEN = @HAVE_DOXYGEN@ +HAVE_PDFLATEX = @HAVE_PDFLATEX@ +HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@ +HAVE_TRANSFIG = @HAVE_TRANSFIG@ +HAVE_VALGRIND = @HAVE_VALGRIND@ +INCLTDL = @INCLTDL@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LIBADD_DL = @LIBADD_DL@ +LIBADD_DLD_LINK = @LIBADD_DLD_LINK@ +LIBADD_DLOPEN = @LIBADD_DLOPEN@ +LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@ +LIBLTDL = @LIBLTDL@ +LIBM = @LIBM@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTDLDEPS = @LTDLDEPS@ +LTDLINCL = @LTDLINCL@ +LTDLOPEN = @LTDLOPEN@ +LT_CONFIG_H = @LT_CONFIG_H@ +LT_DLLOADERS = @LT_DLLOADERS@ +LT_DLPREOPEN = @LT_DLPREOPEN@ +MAINT = @MAINT@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OGG_CFLAGS = @OGG_CFLAGS@ +OGG_LIBS = @OGG_LIBS@ +OSS_LIBS = @OSS_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PKG_CONFIG = @PKG_CONFIG@ +PNG_CFLAGS = @PNG_CFLAGS@ +PNG_LIBS = @PNG_LIBS@ +PROFILE = @PROFILE@ +RANLIB = @RANLIB@ +RC = @RC@ +SDL_CFLAGS = @SDL_CFLAGS@ +SDL_CONFIG = @SDL_CONFIG@ +SDL_LIBS = @SDL_LIBS@ +SED = @SED@ +STRIP = @STRIP@ +THDEC_LIB_AGE = @THDEC_LIB_AGE@ +THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@ +THDEC_LIB_REVISION = @THDEC_LIB_REVISION@ +THENC_LIB_AGE = @THENC_LIB_AGE@ +THENC_LIB_CURRENT = @THENC_LIB_CURRENT@ +THENC_LIB_REVISION = @THENC_LIB_REVISION@ +THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@ +THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@ +THEORA_LDFLAGS = @THEORA_LDFLAGS@ +TH_LIB_AGE = @TH_LIB_AGE@ +TH_LIB_CURRENT = @TH_LIB_CURRENT@ +TH_LIB_REVISION = @TH_LIB_REVISION@ +VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@ +VERSION = @VERSION@ +VORBISENC_LIBS = @VORBISENC_LIBS@ +VORBISFILE_LIBS = @VORBISFILE_LIBS@ +VORBIS_CFLAGS = @VORBIS_CFLAGS@ +VORBIS_LIBS = @VORBIS_LIBS@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +lt_ECHO = @lt_ECHO@ +ltdl_LIBOBJS = @ltdl_LIBOBJS@ +ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@ +sys_symbol_underscore = @sys_symbol_underscore@ + +SUBDIRS = theora +subdir = include +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +DIST_SOURCES = + +RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \ + uninstall-info-recursive all-recursive install-data-recursive \ + install-exec-recursive installdirs-recursive install-recursive \ + uninstall-recursive check-recursive installcheck-recursive +DIST_COMMON = Makefile.am Makefile.in +DIST_SUBDIRS = $(SUBDIRS) +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu include/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @set fnord $$MAKEFLAGS; amf=$$2; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +mostlyclean-recursive clean-recursive distclean-recursive \ +maintainer-clean-recursive: + @set fnord $$MAKEFLAGS; amf=$$2; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d $(distdir)/$$subdir \ + || mkdir $(distdir)/$$subdir \ + || exit 1; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" \ + distdir=../$(distdir)/$$subdir \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: + +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + +distclean-am: clean-am distclean-generic distclean-libtool \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-recursive + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +uninstall-am: uninstall-info-am + +uninstall-info: uninstall-info-recursive + +.PHONY: $(RECURSIVE_TARGETS) GTAGS all all-am check check-am clean \ + clean-generic clean-libtool clean-recursive distclean \ + distclean-generic distclean-libtool distclean-recursive \ + distclean-tags distdir dvi dvi-am dvi-recursive info info-am \ + info-recursive install install-am install-data install-data-am \ + install-data-recursive install-exec install-exec-am \ + install-exec-recursive install-info install-info-am \ + install-info-recursive install-man install-recursive \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am installdirs-recursive maintainer-clean \ + maintainer-clean-generic maintainer-clean-recursive mostlyclean \ + mostlyclean-generic mostlyclean-libtool mostlyclean-recursive \ + tags tags-recursive uninstall uninstall-am uninstall-info-am \ + uninstall-info-recursive uninstall-recursive + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/Engine/lib/libtheora/include/theora/Makefile.am b/Engine/lib/libtheora/include/theora/Makefile.am new file mode 100644 index 000000000..5479e82a5 --- /dev/null +++ b/Engine/lib/libtheora/include/theora/Makefile.am @@ -0,0 +1,7 @@ +## Process this file with automake to produce Makefile.in + +theoraincludedir = $(includedir)/theora + +theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h + +noinst_HEADERS = codec.h theoradec.h diff --git a/Engine/lib/libtheora/include/theora/Makefile.in b/Engine/lib/libtheora/include/theora/Makefile.in new file mode 100644 index 000000000..d20e60ab4 --- /dev/null +++ b/Engine/lib/libtheora/include/theora/Makefile.in @@ -0,0 +1,355 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = ../.. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AR = @AR@ +ARGZ_H = @ARGZ_H@ +AS = @AS@ +AWK = @AWK@ +BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@ +CAIRO_CFLAGS = @CAIRO_CFLAGS@ +CAIRO_LIBS = @CAIRO_LIBS@ +CC = @CC@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEBUG = @DEBUG@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +F77 = @F77@ +GCJ = @GCJ@ +GCJFLAGS = @GCJFLAGS@ +GETOPT_OBJS = @GETOPT_OBJS@ +GREP = @GREP@ +HAVE_BIBTEX = @HAVE_BIBTEX@ +HAVE_DOXYGEN = @HAVE_DOXYGEN@ +HAVE_PDFLATEX = @HAVE_PDFLATEX@ +HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@ +HAVE_TRANSFIG = @HAVE_TRANSFIG@ +HAVE_VALGRIND = @HAVE_VALGRIND@ +INCLTDL = @INCLTDL@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LIBADD_DL = @LIBADD_DL@ +LIBADD_DLD_LINK = @LIBADD_DLD_LINK@ +LIBADD_DLOPEN = @LIBADD_DLOPEN@ +LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@ +LIBLTDL = @LIBLTDL@ +LIBM = @LIBM@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTDLDEPS = @LTDLDEPS@ +LTDLINCL = @LTDLINCL@ +LTDLOPEN = @LTDLOPEN@ +LT_CONFIG_H = @LT_CONFIG_H@ +LT_DLLOADERS = @LT_DLLOADERS@ +LT_DLPREOPEN = @LT_DLPREOPEN@ +MAINT = @MAINT@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OGG_CFLAGS = @OGG_CFLAGS@ +OGG_LIBS = @OGG_LIBS@ +OSS_LIBS = @OSS_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PKG_CONFIG = @PKG_CONFIG@ +PNG_CFLAGS = @PNG_CFLAGS@ +PNG_LIBS = @PNG_LIBS@ +PROFILE = @PROFILE@ +RANLIB = @RANLIB@ +RC = @RC@ +SDL_CFLAGS = @SDL_CFLAGS@ +SDL_CONFIG = @SDL_CONFIG@ +SDL_LIBS = @SDL_LIBS@ +SED = @SED@ +STRIP = @STRIP@ +THDEC_LIB_AGE = @THDEC_LIB_AGE@ +THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@ +THDEC_LIB_REVISION = @THDEC_LIB_REVISION@ +THENC_LIB_AGE = @THENC_LIB_AGE@ +THENC_LIB_CURRENT = @THENC_LIB_CURRENT@ +THENC_LIB_REVISION = @THENC_LIB_REVISION@ +THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@ +THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@ +THEORA_LDFLAGS = @THEORA_LDFLAGS@ +TH_LIB_AGE = @TH_LIB_AGE@ +TH_LIB_CURRENT = @TH_LIB_CURRENT@ +TH_LIB_REVISION = @TH_LIB_REVISION@ +VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@ +VERSION = @VERSION@ +VORBISENC_LIBS = @VORBISENC_LIBS@ +VORBISFILE_LIBS = @VORBISFILE_LIBS@ +VORBIS_CFLAGS = @VORBIS_CFLAGS@ +VORBIS_LIBS = @VORBIS_LIBS@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +lt_ECHO = @lt_ECHO@ +ltdl_LIBOBJS = @ltdl_LIBOBJS@ +ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@ +sys_symbol_underscore = @sys_symbol_underscore@ + +theoraincludedir = $(includedir)/theora + +theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h + +noinst_HEADERS = codec.h theoradec.h +subdir = include/theora +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +DIST_SOURCES = +HEADERS = $(noinst_HEADERS) $(theorainclude_HEADERS) + +DIST_COMMON = $(noinst_HEADERS) $(theorainclude_HEADERS) Makefile.am \ + Makefile.in +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu include/theora/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: +theoraincludeHEADERS_INSTALL = $(INSTALL_HEADER) +install-theoraincludeHEADERS: $(theorainclude_HEADERS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(theoraincludedir) + @list='$(theorainclude_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f"; \ + $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f; \ + done + +uninstall-theoraincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(theorainclude_HEADERS)'; for p in $$list; do \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " rm -f $(DESTDIR)$(theoraincludedir)/$$f"; \ + rm -f $(DESTDIR)$(theoraincludedir)/$$f; \ + done + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = ../.. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(HEADERS) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(theoraincludedir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-generic distclean-libtool \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: install-theoraincludeHEADERS + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +uninstall-am: uninstall-info-am uninstall-theoraincludeHEADERS + +.PHONY: GTAGS all all-am check check-am clean clean-generic \ + clean-libtool distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am info info-am install \ + install-am install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip install-theoraincludeHEADERS installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool tags uninstall uninstall-am \ + uninstall-info-am uninstall-theoraincludeHEADERS + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/Engine/lib/libtheora/include/theora/codec.h b/Engine/lib/libtheora/include/theora/codec.h index afdc1b0fa..5c2669630 100644 --- a/Engine/lib/libtheora/include/theora/codec.h +++ b/Engine/lib/libtheora/include/theora/codec.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** @@ -24,10 +24,10 @@ * implementation for Theora, a free, * patent-unencumbered video codec. * Theora is derived from On2's VP3 codec with additional features and - * integration for Ogg multimedia formats by + * integration with Ogg multimedia formats by * the Xiph.Org Foundation. * Complete documentation of the format itself is available in - * the Theora + * the Theora * specification. * * \subsection Organization @@ -92,9 +92,9 @@ extern "C" { /*@}*/ /**The currently defined color space tags. - * See the Theora - * specification, Chapter 4, for exact details on the meaning of each of - * these color spaces.*/ + * See the Theora + * specification, Chapter 4, for exact details on the meaning + * of each of these color spaces.*/ typedef enum{ /**The color space was not specified at the encoder. It may be conveyed by an external means.*/ @@ -108,13 +108,13 @@ typedef enum{ }th_colorspace; /**The currently defined pixel format tags. - * See the Theora + * See the Theora * specification, Section 4.4, for details on the precise sample * locations.*/ typedef enum{ /**Chroma decimation by 2 in both the X and Y directions (4:2:0). - The Cb and Cr chroma planes are half the width and half the height of the - luma plane.*/ + The Cb and Cr chroma planes are half the width and half the + height of the luma plane.*/ TH_PF_420, /**Currently reserved.*/ TH_PF_RSVD, @@ -133,11 +133,11 @@ typedef enum{ /**A buffer for a single color plane in an uncompressed image. * This contains the image data in a left-to-right, top-down format. - * Each row of pixels is stored contiguously in memory, but successive rows - * need not be. + * Each row of pixels is stored contiguously in memory, but successive + * rows need not be. * Use \a stride to compute the offset of the next row. - * The encoder accepts both positive \a stride values (top-down in memory) and - * negative (bottom-up in memory). + * The encoder accepts both positive \a stride values (top-down in memory) + * and negative (bottom-up in memory). * The decoder currently always generates images with positive strides.*/ typedef struct{ /**The width of this plane.*/ @@ -151,18 +151,18 @@ typedef struct{ }th_img_plane; /**A complete image buffer for an uncompressed frame. - * The chroma planes may be decimated by a factor of two in either direction, - * as indicated by th_info#pixel_fmt. + * The chroma planes may be decimated by a factor of two in either + * direction, as indicated by th_info#pixel_fmt. * The width and height of the Y' plane must be multiples of 16. - * They may need to be cropped for display, using the rectangle specified by - * th_info#pic_x, th_info#pic_y, th_info#pic_width, and - * th_info#pic_height. + * They may need to be cropped for display, using the rectangle + * specified by th_info#pic_x, th_info#pic_y, th_info#pic_width, + * and th_info#pic_height. * All samples are 8 bits. * \note The term YUV often used to describe a colorspace is ambiguous. - * The exact parameters of the RGB to YUV conversion process aside, in many - * contexts the U and V channels actually have opposite meanings. - * To avoid this confusion, we are explicit: the name of the color channels are - * Y'CbCr, and they appear in that order, always. + * The exact parameters of the RGB to YUV conversion process aside, in + * many contexts the U and V channels actually have opposite meanings. + * To avoid this confusion, we are explicit: the name of the color + * channels are Y'CbCr, and they appear in that order, always. * The prime symbol denotes that the Y channel is non-linear. * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/ typedef th_img_plane th_ycbcr_buffer[3]; @@ -192,7 +192,7 @@ typedef th_img_plane th_ycbcr_buffer[3]; * * It is also generally recommended that the offsets and sizes should still be * multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled. - * See the Theora + * See the Theora * specification, Section 4.4, for more details. * * Frame rate, in frames per second, is stored as a rational fraction, as is @@ -230,8 +230,8 @@ typedef struct{ * #frame_height-#pic_height-#pic_y must be no larger than 255. * This slightly funny restriction is due to the fact that the offset is * specified from the top of the image for consistency with the standard - * graphics left-handed coordinate system used throughout this API, while it - * is stored in the encoded stream as an offset from the bottom.*/ + * graphics left-handed coordinate system used throughout this API, while + * it is stored in the encoded stream as an offset from the bottom.*/ ogg_uint32_t pic_y; /**\name Frame rate * The frame rate, as a fraction. @@ -259,9 +259,6 @@ typedef struct{ /**The target bit-rate in bits per second. If initializing an encoder with this struct, set this field to a non-zero value to activate CBR encoding by default.*/ - /*TODO: Current encoder does not support CBR mode, or anything like it. - We also don't really know what nominal rate each quality level - corresponds to yet.*/ int target_bitrate; /**The target quality level. Valid values range from 0 to 63, inclusive, with higher values giving @@ -314,7 +311,7 @@ typedef struct{ * A particular tag may occur more than once, and order is significant. * The character set encoding for the strings is always UTF-8, but the tag * names are limited to ASCII, and treated as case-insensitive. - * See the Theora + * See the Theora * specification, Section 6.3.3 for details. * * In filling in this structure, th_decode_headerin() will null-terminate diff --git a/Engine/lib/libtheora/include/theora/theora.h b/Engine/lib/libtheora/include/theora/theora.h index dbef71675..af6eb6f38 100644 --- a/Engine/lib/libtheora/include/theora/theora.h +++ b/Engine/lib/libtheora/include/theora/theora.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** @@ -27,11 +27,11 @@ extern "C" #include -/** \defgroup oldfuncs Legacy pre-1.0 C API */ -/* @{ */ - -/** \mainpage - * +/** \file + * The libtheora pre-1.0 legacy C API. + * + * \ingroup oldfuncs + * * \section intro Introduction * * This is the documentation for the libtheora legacy C API, declared in @@ -42,7 +42,7 @@ extern "C" * * libtheora is the reference implementation for * Theora, a free video codec. - * Theora is derived from On2's VP3 codec with improved integration for + * Theora is derived from On2's VP3 codec with improved integration with * Ogg multimedia formats by Xiph.Org. * * \section overview Overview @@ -114,21 +114,11 @@ extern "C" * checking beyond whether a header bit is present. Instead, use the * theora_decode_header() function and check the return value; or examine the * header bytes at the beginning of the Ogg page. - * - * \subsection example Example Decoder - * - * See - * examples/dump_video.c for a simple decoder implementation. - * - * \section encoding Encoding Process - * - * See - * examples/encoder_example.c for a simple encoder implementation. */ -/** \file - * The libtheora pre-1.0 legacy C API. - */ + +/** \defgroup oldfuncs Legacy pre-1.0 C API */ +/* @{ */ /** * A YUV buffer for passing uncompressed frames to and from the codec. @@ -292,14 +282,21 @@ typedef struct theora_comment{ /**\name theora_control() codes */ - -/**\anchor decctlcodes +/* \anchor decctlcodes_old * These are the available request codes for theora_control() * when called with a decoder instance. - * By convention, these are odd, to distinguish them from the - * \ref encctlcodes "encoder control codes". + * By convention decoder control codes are odd, to distinguish + * them from \ref encctlcodes_old "encoder control codes" which + * are even. + * + * Note that since the 1.0 release, both the legacy and the final + * implementation accept all the same control codes, but only the + * final API declares the newer codes. + * * Keep any experimental or vendor-specific values above \c 0x8000.*/ +/*@{*/ + /**Get the maximum post-processing level. * The decoder supports a post-processing filter that can improve * the appearance of the decoded images. This returns the highest @@ -324,9 +321,9 @@ typedef struct theora_comment{ * \param[in] buf ogg_uint32_t: The maximum distance between key * frames. * \param[out] buf ogg_uint32_t: The actual maximum distance set. - * \retval TH_FAULT \a theora_state or \a buf is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). + * \retval OC_IMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) /**Set the granule position. @@ -338,33 +335,23 @@ typedef struct theora_comment{ */ #define TH_DECCTL_SET_GRANPOS (5) +/**\anchor encctlcodes_old */ -/**\anchor encctlcodes - * These are the available request codes for theora_control() - * when called with an encoder instance. - * By convention, these are even, to distinguish them from the - * \ref decctlcodes "decoder control codes". - * Keep any experimental or vendor-specific values above \c 0x8000.*/ -/*@{*/ /**Sets the quantization parameters to use. * The parameters are copied, not stored by reference, so they can be freed * after this call. * NULL may be specified to revert to the default parameters. - * For the current encoder, scale[ci!=0][qi] must be no greater than - * scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must be no - * greater than base[qti][pli][qi-1][ci]. - * These two conditions ensure that the actual quantizer for a given \a qti, - * \a pli, and \a ci does not increase as \a qi increases. * * \param[in] buf #th_quant_info - * \retval TH_FAULT \a theora_state is NULL. - * \retval TH_EINVAL Encoding has already begun, the quantization parameters - * do not meet one of the above stated conditions, \a buf - * is NULL and \a buf_sz is not zero, or \a buf - * is non-NULL and \a buf_sz is not - * sizeof(#th_quant_info). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval OC_FAULT \a theora_state is NULL. + * \retval OC_EINVAL Encoding has already begun, the quantization parameters + * are not acceptable to this version of the encoder, + * \a buf is NULL and \a buf_sz is not zero, + * or \a buf is non-NULL and \a buf_sz is + * not sizeof(#th_quant_info). + * \retval OC_IMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_QUANT_PARAMS (2) + /**Disables any encoder features that would prevent lossless transcoding back * to VP3. * This primarily means disabling block-level QI values and not using 4MV mode @@ -389,10 +376,11 @@ typedef struct theora_comment{ * 4:2:0, the picture region is smaller than the full frame, * or if encoding has begun, preventing the quantization * tables and codebooks from being set. - * \retval TH_FAULT \a theora_state or \a buf is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(int). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_VP3_COMPATIBLE (10) + /**Gets the maximum speed level. * Higher speed levels favor quicker encoding over better quality per bit. * Depending on the encoding mode, and the internal algorithms used, quality @@ -402,25 +390,27 @@ typedef struct theora_comment{ * the current encoding mode (VBR vs. CQI, etc.). * * \param[out] buf int: The maximum encoding speed level. - * \retval TH_FAULT \a theora_state or \a buf is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(int). - * \retval TH_IMPL Not supported by this implementation in the current + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation in the current * encoding mode.*/ #define TH_ENCCTL_GET_SPLEVEL_MAX (12) + /**Sets the speed level. * By default a speed value of 1 is used. * * \param[in] buf int: The new encoding speed level. * 0 is slowest, larger values use less CPU. - * \retval TH_FAULT \a theora_state or \a buf is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(int), or the + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int), or the * encoding speed level is out of bounds. * The maximum encoding speed level may be * implementation- and encoding mode-specific, and can be * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. - * \retval TH_IMPL Not supported by this implementation in the current + * \retval OC_IMPL Not supported by this implementation in the current * encoding mode.*/ #define TH_ENCCTL_SET_SPLEVEL (14) + /*@}*/ #define OC_FAULT -1 /**< General failure */ @@ -779,8 +769,8 @@ extern void theora_comment_clear(theora_comment *tc); * This is used to provide advanced control the encoding process. * \param th A #theora_state handle. * \param req The control code to process. - * See \ref encctlcodes "the list of available control codes" - * for details. + * See \ref encctlcodes_old "the list of available + * control codes" for details. * \param buf The parameters for this control code. * \param buf_sz The size of the parameter buffer.*/ extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz); diff --git a/Engine/lib/libtheora/include/theora/theoradec.h b/Engine/lib/libtheora/include/theora/theoradec.h index 7c08caadf..b20f0e3a6 100644 --- a/Engine/lib/libtheora/include/theora/theoradec.h +++ b/Engine/lib/libtheora/include/theora/theoradec.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** @@ -38,6 +38,10 @@ extern "C" { * Keep any experimental or vendor-specific values above \c 0x8000.*/ /*@{*/ /**Gets the maximum post-processing level. + * The decoder supports a post-processing filter that can improve + * the appearance of the decoded images. This returns the highest + * level setting for this post-processor, corresponding to maximum + * improvement and computational expense. * * \param[out] _buf int: The maximum post-processing level. * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. @@ -47,6 +51,10 @@ extern "C" { /**Sets the post-processing level. * By default, post-processing is disabled. * + * Sets the level of post-processing to use when decoding the + * compressed stream. This must be a value between zero (off) + * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. + * * \param[in] _buf int: The new post-processing level. * 0 to disable; larger values use more CPU. * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. @@ -83,6 +91,15 @@ extern "C" { * \retval TH_EINVAL \a _buf_sz is not * sizeof(th_stripe_callback).*/ #define TH_DECCTL_SET_STRIPE_CB (7) + +/**Enables telemetry and sets the macroblock display mode */ +#define TH_DECCTL_SET_TELEMETRY_MBMODE (9) +/**Enables telemetry and sets the motion vector display mode */ +#define TH_DECCTL_SET_TELEMETRY_MV (11) +/**Enables telemetry and sets the adaptive quantization display mode */ +#define TH_DECCTL_SET_TELEMETRY_QI (13) +/**Enables telemetry and sets the bitstream breakdown visualization mode */ +#define TH_DECCTL_SET_TELEMETRY_BITS (15) /*@}*/ @@ -289,6 +306,7 @@ extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, * It may be freed or overwritten without notification when * subsequent frames are decoded. * \retval 0 Success + * \retval TH_EFAULT \a _dec or \a _ycbcr was NULL. */ extern int th_decode_ycbcr_out(th_dec_ctx *_dec, th_ycbcr_buffer _ycbcr); diff --git a/Engine/lib/libtheora/include/theora/theoraenc.h b/Engine/lib/libtheora/include/theora/theoraenc.h index b98285862..fdf2ab21e 100644 --- a/Engine/lib/libtheora/include/theora/theoraenc.h +++ b/Engine/lib/libtheora/include/theora/theoraenc.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** @@ -49,26 +49,20 @@ extern "C" { * NULL and \a _buf_sz is not zero, or \a _buf is * non-NULL and \a _buf_sz is not * sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS. - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval TH_EIMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_HUFFMAN_CODES (0) /**Sets the quantization parameters to use. * The parameters are copied, not stored by reference, so they can be freed * after this call. * NULL may be specified to revert to the default parameters. - * For the current encoder, scale[ci!=0][qi] must be no greater than - * scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must be no - * greater than base[qti][pli][qi-1][ci]. - * These two conditions ensure that the actual quantizer for a given \a qti, - * \a pli, and \a ci does not increase as \a qi increases. * * \param[in] _buf #th_quant_info * \retval TH_EFAULT \a _enc_ctx is NULL. - * \retval TH_EINVAL Encoding has already begun, the quantization parameters - * do not meet one of the above stated conditions, \a _buf - * is NULL and \a _buf_sz is not zero, or \a _buf - * is non-NULL and \a _buf_sz is not - * sizeof(#th_quant_info). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval TH_EINVAL Encoding has already begun, \a _buf is + * NULL and \a _buf_sz is not zero, + * or \a _buf is non-NULL and + * \a _buf_sz is not sizeof(#th_quant_info). + * \retval TH_EIMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_QUANT_PARAMS (2) /**Sets the maximum distance between key frames. * This can be changed during an encode, but will be bounded by @@ -81,12 +75,12 @@ extern "C" { * \param[out] _buf ogg_uint32_t: The actual maximum distance set. * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_uint32_t). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval TH_EIMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) /**Disables any encoder features that would prevent lossless transcoding back * to VP3. - * This primarily means disabling block-level QI values and not using 4MV mode - * when any of the luma blocks in a macro block are not coded. + * This primarily means disabling block-adaptive quantization and always coding + * all four luma blocks in a macro block when 4MV is used. * It also includes using the VP3 quantization tables and Huffman codes; if you * set them explicitly after calling this function, the resulting stream will * not be VP3-compatible. @@ -109,7 +103,7 @@ extern "C" { * tables and codebooks from being set. * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_IMPL Not supported by this implementation.*/ + * \retval TH_EIMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_VP3_COMPATIBLE (10) /**Gets the maximum speed level. * Higher speed levels favor quicker encoding over better quality per bit. @@ -117,28 +111,254 @@ extern "C" { * may actually improve, but in this case bitrate will also likely increase. * In any case, overall rate/distortion performance will probably decrease. * The maximum value, and the meaning of each value, may change depending on - * the current encoding mode (VBR vs. CQI, etc.). + * the current encoding mode (VBR vs. constant quality, etc.). * - * \param[out] _buf int: The maximum encoding speed level. + * \param[out] _buf int: The maximum encoding speed level. * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_IMPL Not supported by this implementation in the current + * \retval TH_EIMPL Not supported by this implementation in the current * encoding mode.*/ #define TH_ENCCTL_GET_SPLEVEL_MAX (12) /**Sets the speed level. - * By default, the slowest speed (0) is used. + * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL. * - * \param[in] _buf int: The new encoding speed level. - * 0 is slowest, larger values use less CPU. + * \param[in] _buf int: The new encoding speed level. + * 0 is slowest, larger values use less CPU. * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the * encoding speed level is out of bounds. * The maximum encoding speed level may be * implementation- and encoding mode-specific, and can be * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. - * \retval TH_IMPL Not supported by this implementation in the current + * \retval TH_EIMPL Not supported by this implementation in the current * encoding mode.*/ #define TH_ENCCTL_SET_SPLEVEL (14) +/**Gets the current speed level. + * The default speed level may vary according to encoder implementation, but if + * this control code is not supported (it returns #TH_EIMPL), the default may + * be assumed to be the slowest available speed (0). + * The maximum encoding speed level may be implementation- and encoding + * mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * + * \param[out] _buf int: The current encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL (16) +/**Sets the number of duplicates of the next frame to produce. + * Although libtheora can encode duplicate frames very cheaply, it costs some + * amount of CPU to detect them, and a run of duplicates cannot span a + * keyframe boundary. + * This control code tells the encoder to produce the specified number of extra + * duplicates of the next frame. + * This allows the encoder to make smarter keyframe placement decisions and + * rate control decisions, and reduces CPU usage as well, when compared to + * just submitting the same frame for encoding multiple times. + * This setting only applies to the next frame submitted for encoding. + * You MUST call th_encode_packetout() repeatedly until it returns 0, or the + * extra duplicate frames will be lost. + * + * \param[in] _buf int: The number of duplicates to produce. + * If this is negative or zero, no duplicates will be produced. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * number of duplicates is greater than or equal to the + * maximum keyframe interval. + * In the latter case, NO duplicate frames will be produced. + * You must ensure that the maximum keyframe interval is set + * larger than the maximum number of duplicates you will + * ever wish to insert prior to encoding. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_DUP_COUNT (18) +/**Modifies the default bitrate management behavior. + * Use to allow or disallow frame dropping, and to enable or disable capping + * bit reservoir overflows and underflows. + * See \ref encctlcodes "the list of available flags". + * The flags are set by default to + * #TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW. + * + * \param[in] _buf int: Any combination of + * \ref ratectlflags "the available flags": + * - #TH_RATECTL_DROP_FRAMES: Enable frame dropping. + * - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later + * use. + * - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls + * later. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int) or rate control + * is not enabled. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_FLAGS (20) +/**Sets the size of the bitrate management bit reservoir as a function + * of number of frames. + * The reservoir size affects how quickly bitrate management reacts to + * instantaneous changes in the video complexity. + * Larger reservoirs react more slowly, and provide better overall quality, but + * require more buffering by a client, adding more latency to live streams. + * By default, libtheora sets the reservoir to the maximum distance between + * keyframes, subject to a minimum and maximum limit. + * This call may be used to increase or decrease the reservoir, increasing or + * decreasing the allowed temporary variance in bitrate. + * An implementation may impose some limits on the size of a reservoir it can + * handle, in which case the actual reservoir size may not be exactly what was + * requested. + * The actual value set will be returned. + * + * \param[in] _buf int: Requested size of the reservoir measured in + * frames. + * \param[out] _buf int: The actual size of the reservoir set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or rate control + * is not enabled. The buffer has an implementation + * defined minimum and maximum size and the value in _buf + * will be adjusted to match the actual value set. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_BUFFER (22) +/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics. + * Pass 1 mode must be enabled before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * Although this does not have to be the exact rate that will be used in the + * second pass, closer values may produce better results. + * The first call returns the size of the two-pass header data, along with some + * placeholder content, and sets the encoder into pass 1 mode implicitly. + * This call sets the encoder to pass 1 mode implicitly. + * Then, a subsequent call must be made after each call to + * th_encode_ycbcr_in() to retrieve the metrics for that frame. + * An additional, final call must be made to retrieve the summary data, + * containing such information as the total number of frames, etc. + * This must be stored in place of the placeholder data that was returned + * in the first call, before the frame metrics data. + * All of this data must be presented back to the encoder during pass 2 using + * #TH_ENCCTL_2PASS_IN. + * + * \param[out] char *_buf: Returns a pointer to internal storage + * containing the two pass metrics data. + * This storage is only valid until the next call, or until the + * encoder context is freed, and must be copied by the + * application. + * \retval >=0 The number of bytes of metric data available in the + * returned buffer. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(char *), no target + * bitrate has been set, or the first call was made after + * the first frame was submitted for encoding. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_OUT (24) +/**Submits two-pass encoding metric data collected the first encoding pass to + * the second pass. + * The first call must be made before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * It sets the encoder to pass 2 mode implicitly; this cannot be disabled. + * The encoder may require reading data from some or all of the frames in + * advance, depending on, e.g., the reservoir size used in the second pass. + * You must call this function repeatedly before each frame to provide data + * until either a) it fails to consume all of the data presented or b) all of + * the pass 1 data has been consumed. + * In the first case, you must save the remaining data to be presented after + * the next frame. + * You can call this function with a NULL argument to get an upper bound on + * the number of bytes that will be required before the next frame. + * + * When pass 2 is first enabled, the default bit reservoir is set to the entire + * file; this gives maximum flexibility but can lead to very high peak rates. + * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER + * (e.g., to set it to the keyframe interval for non-live streaming), however, + * you may then need to provide more data before the next frame. + * + * \param[in] _buf char[]: A buffer containing the data returned by + * #TH_ENCCTL_2PASS_OUT in pass 1. + * You may pass NULL for \a _buf to return an upper + * bound on the number of additional bytes needed before the + * next frame. + * The summary data returned at the end of pass 1 must be at + * the head of the buffer on the first call with a + * non-NULL \a _buf, and the placeholder data + * returned at the start of pass 1 should be omitted. + * After each call you should advance this buffer by the number + * of bytes consumed. + * \retval >0 The number of bytes of metric data required/consumed. + * \retval 0 No more data is required before the next frame. + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL No target bitrate has been set, or the first call was + * made after the first frame was submitted for + * encoding. + * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible + * implementation of this library. + * \retval TH_EBADHEADER The data was invalid; this may be returned when + * attempting to read an aborted pass 1 file that still + * has the placeholder data in place of the summary + * data. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_IN (26) +/**Sets the current encoding quality. + * This is only valid so long as no bitrate has been specified, either through + * the #th_info struct used to initialize the encoder or through + * #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future + * version). + * If it is set before the headers are emitted, the target quality encoded in + * them will be updated. + * + * \param[in] _buf int: The new target quality, in the range 0...63, + * inclusive. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL A target bitrate has already been specified, or the + * quality index was not in the range 0...63. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUALITY (28) +/**Sets the current encoding bitrate. + * Once a bitrate is set, the encoder must use a rate-controlled mode for all + * future frames (this restriction may be relaxed in a future version). + * If it is set before the headers are emitted, the target bitrate encoded in + * them will be updated. + * Due to the buffer delay, the exact bitrate of each section of the encode is + * not guaranteed. + * The encoder may have already used more bits than allowed for the frames it + * has encoded, expecting to make them up in future frames, or it may have + * used fewer, holding the excess in reserve. + * The exact transition between the two bitrates is not well-defined by this + * API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS. + * After a number of frames equal to the buffer delay, one may expect further + * output to average at the target bitrate. + * + * \param[in] _buf long: The new target bitrate, in bits per second. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL The target bitrate was not positive. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_BITRATE (30) + +/*@}*/ + + +/**\name TH_ENCCTL_SET_RATE_FLAGS flags + * \anchor ratectlflags + * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/ +/*@{*/ +/**Drop frames to keep within bitrate buffer constraints. + * This can have a severe impact on quality, but is the only way to ensure that + * bitrate targets are met at low rates during sudden bursts of activity.*/ +#define TH_RATECTL_DROP_FRAMES (0x1) +/**Ignore bitrate buffer overflows. + * If the encoder uses so few bits that the reservoir of available bits + * overflows, ignore the excess. + * The encoder will not try to use these extra bits in future frames. + * At high rates this may cause the result to be undersized, but allows a + * client to play the stream using a finite buffer; it should normally be + * enabled.*/ +#define TH_RATECTL_CAP_OVERFLOW (0x2) +/**Ignore bitrate buffer underflows. + * If the encoder uses so many bits that the reservoir of available bits + * underflows, ignore the deficit. + * The encoder will not try to make up these extra bits in future frames. + * At low rates this may cause the result to be oversized; it should normally + * be disabled.*/ +#define TH_RATECTL_CAP_UNDERFLOW (0x4) /*@}*/ diff --git a/Engine/lib/libtheora/lib/Makefile.am b/Engine/lib/libtheora/lib/Makefile.am new file mode 100644 index 000000000..89ce26120 --- /dev/null +++ b/Engine/lib/libtheora/lib/Makefile.am @@ -0,0 +1,173 @@ +INCLUDES = -I$(top_srcdir)/include +AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS) + +EXTRA_DIST = \ + cpu.c \ + encoder_disabled.c \ + x86/mmxencfrag.c \ + x86/mmxfdct.c \ + x86/sse2fdct.c \ + x86/x86enc.c \ + x86/x86enc.h \ + x86/mmxfrag.c \ + x86/mmxfrag.h \ + x86/mmxidct.c \ + x86/mmxloop.h \ + x86/mmxstate.c \ + x86/x86int.h \ + x86/x86state.c \ + x86_vc + +lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la + +if THEORA_DISABLE_ENCODE +encoder_uniq_sources = \ + encoder_disabled.c + +encoder_sources = \ + $(encoder_uniq_sources) +else +encoder_uniq_x86_sources = \ + x86/mmxencfrag.c \ + x86/mmxfdct.c \ + x86/x86enc.c + +encoder_uniq_x86_64_sources = \ + x86/sse2fdct.c + +encoder_shared_x86_sources = \ + x86/mmxfrag.c \ + x86/mmxidct.c \ + x86/mmxstate.c \ + x86/x86state.c + +encoder_shared_x86_64_sources = + +if CPU_x86_64 +encoder_uniq_arch_sources = \ + $(encoder_uniq_x86_sources) \ + $(encoder_uniq_x86_64_sources) +encoder_shared_arch_sources = \ + $(encoder_shared_x86_sources) \ + $(encoder_shared_x86_64_sources) +else +if CPU_x86_32 +encoder_uniq_arch_sources = $(encoder_uniq_x86_sources) +encoder_shared_arch_sources = $(encoder_shared_x86_sources) +else +encoder_uniq_arch_sources = +encoder_shared_arch_sources = +endif +endif + +encoder_uniq_sources = \ + analyze.c \ + fdct.c \ + encfrag.c \ + encapiwrapper.c \ + encinfo.c \ + encode.c \ + enquant.c \ + huffenc.c \ + mathops.c \ + mcenc.c \ + rate.c \ + tokenize.c \ + $(encoder_uniq_arch_sources) + +encoder_sources = \ + apiwrapper.c \ + fragment.c \ + idct.c \ + internal.c \ + state.c \ + quant.c \ + $(encoder_shared_arch_sources) \ + $(encoder_uniq_sources) + +endif + +decoder_x86_sources = \ + x86/mmxidct.c \ + x86/mmxfrag.c \ + x86/mmxstate.c \ + x86/x86state.c +if CPU_x86_64 +decoder_arch_sources = $(decoder_x86_sources) +else +if CPU_x86_32 +decoder_arch_sources = $(decoder_x86_sources) +else +decoder_arch_sources = +endif +endif + +decoder_sources = \ + apiwrapper.c \ + bitpack.c \ + decapiwrapper.c \ + decinfo.c \ + decode.c \ + dequant.c \ + fragment.c \ + huffdec.c \ + idct.c \ + info.c \ + internal.c \ + quant.c \ + state.c \ + $(decoder_arch_sources) + +noinst_HEADERS = \ + cpu.h \ + internal.h \ + encint.h \ + enquant.h \ + huffenc.h \ + mathops.h \ + modedec.h \ + x86/x86enc.h \ + apiwrapper.h \ + bitpack.h \ + dct.h \ + decint.h \ + dequant.h \ + huffdec.h \ + huffman.h \ + ocintrin.h \ + quant.h \ + x86/mmxfrag.h \ + x86/mmxloop.h \ + x86/x86int.h + +libtheoradec_la_SOURCES = \ + $(decoder_sources) \ + Version_script-dec theoradec.exp +libtheoradec_la_LDFLAGS = \ + -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \ + @THEORADEC_LDFLAGS@ @CAIRO_LIBS@ + +libtheoraenc_la_SOURCES = \ + $(encoder_sources) \ + Version_script-enc theoraenc.exp +libtheoraenc_la_LDFLAGS = \ + -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \ + @THEORAENC_LDFLAGS@ $(OGG_LIBS) + +libtheora_la_SOURCES = \ + $(decoder_sources) \ + $(encoder_uniq_sources) \ + Version_script theora.exp +libtheora_la_LDFLAGS = \ + -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \ + @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS) + +debug: + $(MAKE) all CFLAGS="@DEBUG@" + +profile: + $(MAKE) all CFLAGS="@PROFILE@" + +# contstruct various symbol export list files +.def.exp : defexp.awk + awk -f defexp.awk $< > $@ diff --git a/Engine/lib/libtheora/lib/Makefile.in b/Engine/lib/libtheora/lib/Makefile.in new file mode 100644 index 000000000..f26ccdc0e --- /dev/null +++ b/Engine/lib/libtheora/lib/Makefile.in @@ -0,0 +1,845 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AR = @AR@ +ARGZ_H = @ARGZ_H@ +AS = @AS@ +AWK = @AWK@ +BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@ +CAIRO_CFLAGS = @CAIRO_CFLAGS@ +CAIRO_LIBS = @CAIRO_LIBS@ +CC = @CC@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEBUG = @DEBUG@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +F77 = @F77@ +GCJ = @GCJ@ +GCJFLAGS = @GCJFLAGS@ +GETOPT_OBJS = @GETOPT_OBJS@ +GREP = @GREP@ +HAVE_BIBTEX = @HAVE_BIBTEX@ +HAVE_DOXYGEN = @HAVE_DOXYGEN@ +HAVE_PDFLATEX = @HAVE_PDFLATEX@ +HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@ +HAVE_TRANSFIG = @HAVE_TRANSFIG@ +HAVE_VALGRIND = @HAVE_VALGRIND@ +INCLTDL = @INCLTDL@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LIBADD_DL = @LIBADD_DL@ +LIBADD_DLD_LINK = @LIBADD_DLD_LINK@ +LIBADD_DLOPEN = @LIBADD_DLOPEN@ +LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@ +LIBLTDL = @LIBLTDL@ +LIBM = @LIBM@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTDLDEPS = @LTDLDEPS@ +LTDLINCL = @LTDLINCL@ +LTDLOPEN = @LTDLOPEN@ +LT_CONFIG_H = @LT_CONFIG_H@ +LT_DLLOADERS = @LT_DLLOADERS@ +LT_DLPREOPEN = @LT_DLPREOPEN@ +MAINT = @MAINT@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OGG_CFLAGS = @OGG_CFLAGS@ +OGG_LIBS = @OGG_LIBS@ +OSS_LIBS = @OSS_LIBS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PKG_CONFIG = @PKG_CONFIG@ +PNG_CFLAGS = @PNG_CFLAGS@ +PNG_LIBS = @PNG_LIBS@ +PROFILE = @PROFILE@ +RANLIB = @RANLIB@ +RC = @RC@ +SDL_CFLAGS = @SDL_CFLAGS@ +SDL_CONFIG = @SDL_CONFIG@ +SDL_LIBS = @SDL_LIBS@ +SED = @SED@ +STRIP = @STRIP@ +THDEC_LIB_AGE = @THDEC_LIB_AGE@ +THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@ +THDEC_LIB_REVISION = @THDEC_LIB_REVISION@ +THENC_LIB_AGE = @THENC_LIB_AGE@ +THENC_LIB_CURRENT = @THENC_LIB_CURRENT@ +THENC_LIB_REVISION = @THENC_LIB_REVISION@ +THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@ +THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@ +THEORA_LDFLAGS = @THEORA_LDFLAGS@ +TH_LIB_AGE = @TH_LIB_AGE@ +TH_LIB_CURRENT = @TH_LIB_CURRENT@ +TH_LIB_REVISION = @TH_LIB_REVISION@ +VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@ +VERSION = @VERSION@ +VORBISENC_LIBS = @VORBISENC_LIBS@ +VORBISFILE_LIBS = @VORBISFILE_LIBS@ +VORBIS_CFLAGS = @VORBIS_CFLAGS@ +VORBIS_LIBS = @VORBIS_LIBS@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +lt_ECHO = @lt_ECHO@ +ltdl_LIBOBJS = @ltdl_LIBOBJS@ +ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@ +sys_symbol_underscore = @sys_symbol_underscore@ +INCLUDES = -I$(top_srcdir)/include +AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS) + +EXTRA_DIST = \ + cpu.c \ + encoder_disabled.c \ + x86/mmxencfrag.c \ + x86/mmxfdct.c \ + x86/sse2fdct.c \ + x86/x86enc.c \ + x86/x86enc.h \ + x86/mmxfrag.c \ + x86/mmxfrag.h \ + x86/mmxidct.c \ + x86/mmxloop.h \ + x86/mmxstate.c \ + x86/x86int.h \ + x86/x86state.c \ + x86_vc + + +lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la + +@THEORA_DISABLE_ENCODE_TRUE@encoder_uniq_sources = \ +@THEORA_DISABLE_ENCODE_TRUE@ encoder_disabled.c + +@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_sources = \ +@THEORA_DISABLE_ENCODE_FALSE@ analyze.c \ +@THEORA_DISABLE_ENCODE_FALSE@ fdct.c \ +@THEORA_DISABLE_ENCODE_FALSE@ encfrag.c \ +@THEORA_DISABLE_ENCODE_FALSE@ encapiwrapper.c \ +@THEORA_DISABLE_ENCODE_FALSE@ encinfo.c \ +@THEORA_DISABLE_ENCODE_FALSE@ encode.c \ +@THEORA_DISABLE_ENCODE_FALSE@ enquant.c \ +@THEORA_DISABLE_ENCODE_FALSE@ huffenc.c \ +@THEORA_DISABLE_ENCODE_FALSE@ mathops.c \ +@THEORA_DISABLE_ENCODE_FALSE@ mcenc.c \ +@THEORA_DISABLE_ENCODE_FALSE@ rate.c \ +@THEORA_DISABLE_ENCODE_FALSE@ tokenize.c \ +@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_arch_sources) + + +@THEORA_DISABLE_ENCODE_TRUE@encoder_sources = \ +@THEORA_DISABLE_ENCODE_TRUE@ $(encoder_uniq_sources) + +@THEORA_DISABLE_ENCODE_FALSE@encoder_sources = \ +@THEORA_DISABLE_ENCODE_FALSE@ apiwrapper.c \ +@THEORA_DISABLE_ENCODE_FALSE@ fragment.c \ +@THEORA_DISABLE_ENCODE_FALSE@ idct.c \ +@THEORA_DISABLE_ENCODE_FALSE@ internal.c \ +@THEORA_DISABLE_ENCODE_FALSE@ state.c \ +@THEORA_DISABLE_ENCODE_FALSE@ quant.c \ +@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_arch_sources) \ +@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_sources) + +@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_sources = \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxencfrag.c \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxfdct.c \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/x86enc.c + + +@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_64_sources = \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/sse2fdct.c + + +@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_sources = \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxfrag.c \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxidct.c \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/mmxstate.c \ +@THEORA_DISABLE_ENCODE_FALSE@ x86/x86state.c + + +@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_64_sources = + +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = $(encoder_uniq_x86_sources) +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_sources) \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_64_sources) + +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = $(encoder_shared_x86_sources) +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_sources) \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_64_sources) + + +decoder_x86_sources = \ + x86/mmxidct.c \ + x86/mmxfrag.c \ + x86/mmxstate.c \ + x86/x86state.c + +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@decoder_arch_sources = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@decoder_arch_sources = $(decoder_x86_sources) +@CPU_x86_64_TRUE@decoder_arch_sources = $(decoder_x86_sources) + +decoder_sources = \ + apiwrapper.c \ + bitpack.c \ + decapiwrapper.c \ + decinfo.c \ + decode.c \ + dequant.c \ + fragment.c \ + huffdec.c \ + idct.c \ + info.c \ + internal.c \ + quant.c \ + state.c \ + $(decoder_arch_sources) + + +noinst_HEADERS = \ + cpu.h \ + internal.h \ + encint.h \ + enquant.h \ + huffenc.h \ + mathops.h \ + modedec.h \ + x86/x86enc.h \ + apiwrapper.h \ + bitpack.h \ + dct.h \ + decint.h \ + dequant.h \ + huffdec.h \ + huffman.h \ + ocintrin.h \ + quant.h \ + x86/mmxfrag.h \ + x86/mmxloop.h \ + x86/x86int.h + + +libtheoradec_la_SOURCES = \ + $(decoder_sources) \ + Version_script-dec theoradec.exp + +libtheoradec_la_LDFLAGS = \ + -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \ + @THEORADEC_LDFLAGS@ @CAIRO_LIBS@ + + +libtheoraenc_la_SOURCES = \ + $(encoder_sources) \ + Version_script-enc theoraenc.exp + +libtheoraenc_la_LDFLAGS = \ + -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \ + @THEORAENC_LDFLAGS@ $(OGG_LIBS) + + +libtheora_la_SOURCES = \ + $(decoder_sources) \ + $(encoder_uniq_sources) \ + Version_script theora.exp + +libtheora_la_LDFLAGS = \ + -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \ + @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS) + +subdir = lib +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(lib_LTLIBRARIES) + +libtheora_la_LIBADD = +am__objects_1 = mmxidct.lo mmxfrag.lo mmxstate.lo x86state.lo +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@am__objects_2 = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@am__objects_2 = $(am__objects_1) +@CPU_x86_64_TRUE@am__objects_2 = $(am__objects_1) +am__objects_3 = apiwrapper.lo bitpack.lo decapiwrapper.lo decinfo.lo \ + decode.lo dequant.lo fragment.lo huffdec.lo idct.lo info.lo \ + internal.lo quant.lo state.lo $(am__objects_2) +@THEORA_DISABLE_ENCODE_FALSE@am__objects_4 = mmxencfrag.lo mmxfdct.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ x86enc.lo +@THEORA_DISABLE_ENCODE_FALSE@am__objects_5 = sse2fdct.lo +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \ +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_4) +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_4) \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_5) +@THEORA_DISABLE_ENCODE_TRUE@am__objects_7 = encoder_disabled.lo +@THEORA_DISABLE_ENCODE_FALSE@am__objects_7 = analyze.lo fdct.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ encfrag.lo encapiwrapper.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ encinfo.lo encode.lo enquant.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ huffenc.lo mathops.lo mcenc.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ rate.lo tokenize.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_6) +am_libtheora_la_OBJECTS = $(am__objects_3) $(am__objects_7) +libtheora_la_OBJECTS = $(am_libtheora_la_OBJECTS) +libtheoradec_la_LIBADD = +am_libtheoradec_la_OBJECTS = $(am__objects_3) +libtheoradec_la_OBJECTS = $(am_libtheoradec_la_OBJECTS) +libtheoraenc_la_LIBADD = +@THEORA_DISABLE_ENCODE_FALSE@am__objects_8 = mmxfrag.lo mmxidct.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ mmxstate.lo x86state.lo +@THEORA_DISABLE_ENCODE_FALSE@am__objects_9 = +@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \ +@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_8) +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_8) \ +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_9) +@THEORA_DISABLE_ENCODE_TRUE@am__objects_11 = $(am__objects_7) +@THEORA_DISABLE_ENCODE_FALSE@am__objects_11 = apiwrapper.lo fragment.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ idct.lo internal.lo state.lo \ +@THEORA_DISABLE_ENCODE_FALSE@ quant.lo $(am__objects_10) \ +@THEORA_DISABLE_ENCODE_FALSE@ $(am__objects_7) +am_libtheoraenc_la_OBJECTS = $(am__objects_11) +libtheoraenc_la_OBJECTS = $(am_libtheoraenc_la_OBJECTS) + +DEFS = @DEFS@ +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/analyze.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/apiwrapper.Plo ./$(DEPDIR)/bitpack.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/decapiwrapper.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/decinfo.Plo ./$(DEPDIR)/decode.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/dequant.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/encapiwrapper.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/encfrag.Plo ./$(DEPDIR)/encinfo.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/encode.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/encoder_disabled.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/enquant.Plo ./$(DEPDIR)/fdct.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/fragment.Plo ./$(DEPDIR)/huffdec.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/huffenc.Plo ./$(DEPDIR)/idct.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/info.Plo ./$(DEPDIR)/internal.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/mathops.Plo ./$(DEPDIR)/mcenc.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/mmxencfrag.Plo ./$(DEPDIR)/mmxfdct.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/mmxfrag.Plo ./$(DEPDIR)/mmxidct.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/mmxstate.Plo ./$(DEPDIR)/quant.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/rate.Plo ./$(DEPDIR)/sse2fdct.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/state.Plo ./$(DEPDIR)/tokenize.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/x86enc.Plo ./$(DEPDIR)/x86state.Plo +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CFLAGS = @CFLAGS@ +DIST_SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) \ + $(libtheoraenc_la_SOURCES) +HEADERS = $(noinst_HEADERS) + +DIST_COMMON = $(noinst_HEADERS) Makefile.am Makefile.in +SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) $(libtheoraenc_la_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .def .exp .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu lib/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +libLTLIBRARIES_INSTALL = $(INSTALL) +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(libdir) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \ + $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + p="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test -z "$dir" && dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +mmxidct.lo: x86/mmxidct.c +mmxfrag.lo: x86/mmxfrag.c +mmxstate.lo: x86/mmxstate.c +x86state.lo: x86/x86state.c +mmxencfrag.lo: x86/mmxencfrag.c +mmxfdct.lo: x86/mmxfdct.c +x86enc.lo: x86/x86enc.c +sse2fdct.lo: x86/sse2fdct.c +libtheora.la: $(libtheora_la_OBJECTS) $(libtheora_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libtheora_la_LDFLAGS) $(libtheora_la_OBJECTS) $(libtheora_la_LIBADD) $(LIBS) +libtheoradec.la: $(libtheoradec_la_OBJECTS) $(libtheoradec_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libtheoradec_la_LDFLAGS) $(libtheoradec_la_OBJECTS) $(libtheoradec_la_LIBADD) $(LIBS) +libtheoraenc.la: $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libtheoraenc_la_LDFLAGS) $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/analyze.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apiwrapper.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitpack.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decapiwrapper.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decinfo.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decode.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dequant.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encapiwrapper.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encfrag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encinfo.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoder_disabled.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/enquant.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fdct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fragment.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffdec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffenc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/internal.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mathops.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mcenc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxencfrag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfdct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfrag.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxidct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxstate.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quant.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rate.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sse2fdct.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/state.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tokenize.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86enc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86state.Plo@am__quote@ + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: +@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(COMPILE) -c `cygpath -w $<` + +.c.lo: +@AMDEP_TRUE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +mmxidct.o: x86/mmxidct.c +@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.o `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c + +mmxidct.obj: x86/mmxidct.c +@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.obj `cygpath -w x86/mmxidct.c` + +mmxidct.lo: x86/mmxidct.c +@AMDEP_TRUE@ source='x86/mmxidct.c' object='mmxidct.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxidct.Plo' tmpdepfile='$(DEPDIR)/mmxidct.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.lo `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c + +mmxfrag.o: x86/mmxfrag.c +@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.o `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c + +mmxfrag.obj: x86/mmxfrag.c +@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.obj `cygpath -w x86/mmxfrag.c` + +mmxfrag.lo: x86/mmxfrag.c +@AMDEP_TRUE@ source='x86/mmxfrag.c' object='mmxfrag.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfrag.Plo' tmpdepfile='$(DEPDIR)/mmxfrag.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.lo `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c + +mmxstate.o: x86/mmxstate.c +@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.o `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c + +mmxstate.obj: x86/mmxstate.c +@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.obj `cygpath -w x86/mmxstate.c` + +mmxstate.lo: x86/mmxstate.c +@AMDEP_TRUE@ source='x86/mmxstate.c' object='mmxstate.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxstate.Plo' tmpdepfile='$(DEPDIR)/mmxstate.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.lo `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c + +x86state.o: x86/x86state.c +@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.o `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c + +x86state.obj: x86/x86state.c +@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.obj `cygpath -w x86/x86state.c` + +x86state.lo: x86/x86state.c +@AMDEP_TRUE@ source='x86/x86state.c' object='x86state.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86state.Plo' tmpdepfile='$(DEPDIR)/x86state.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.lo `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c + +mmxencfrag.o: x86/mmxencfrag.c +@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.o `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c + +mmxencfrag.obj: x86/mmxencfrag.c +@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.obj `cygpath -w x86/mmxencfrag.c` + +mmxencfrag.lo: x86/mmxencfrag.c +@AMDEP_TRUE@ source='x86/mmxencfrag.c' object='mmxencfrag.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxencfrag.Plo' tmpdepfile='$(DEPDIR)/mmxencfrag.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.lo `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c + +mmxfdct.o: x86/mmxfdct.c +@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.o `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c + +mmxfdct.obj: x86/mmxfdct.c +@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.obj `cygpath -w x86/mmxfdct.c` + +mmxfdct.lo: x86/mmxfdct.c +@AMDEP_TRUE@ source='x86/mmxfdct.c' object='mmxfdct.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/mmxfdct.Plo' tmpdepfile='$(DEPDIR)/mmxfdct.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.lo `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c + +x86enc.o: x86/x86enc.c +@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.o `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c + +x86enc.obj: x86/x86enc.c +@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.obj `cygpath -w x86/x86enc.c` + +x86enc.lo: x86/x86enc.c +@AMDEP_TRUE@ source='x86/x86enc.c' object='x86enc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/x86enc.Plo' tmpdepfile='$(DEPDIR)/x86enc.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.lo `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c + +sse2fdct.o: x86/sse2fdct.c +@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.o `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c + +sse2fdct.obj: x86/sse2fdct.c +@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.obj `cygpath -w x86/sse2fdct.c` + +sse2fdct.lo: x86/sse2fdct.c +@AMDEP_TRUE@ source='x86/sse2fdct.c' object='sse2fdct.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@ depfile='$(DEPDIR)/sse2fdct.Plo' tmpdepfile='$(DEPDIR)/sse2fdct.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.lo `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c +CCDEPMODE = @CCDEPMODE@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + $(mkinstalldirs) $(distdir)/x86 + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(libdir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-libLTLIBRARIES + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES + +.PHONY: GTAGS all all-am check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool distclean distclean-compile \ + distclean-depend distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am info info-am install \ + install-am install-data install-data-am install-exec \ + install-exec-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool tags uninstall \ + uninstall-am uninstall-info-am uninstall-libLTLIBRARIES + + +debug: + $(MAKE) all CFLAGS="@DEBUG@" + +profile: + $(MAKE) all CFLAGS="@PROFILE@" + +# contstruct various symbol export list files +.def.exp : defexp.awk + awk -f defexp.awk $< > $@ +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/Engine/lib/libtheora/lib/Version_script b/Engine/lib/libtheora/lib/Version_script new file mode 100644 index 000000000..2ecb5e43a --- /dev/null +++ b/Engine/lib/libtheora/lib/Version_script @@ -0,0 +1,53 @@ +# +# Export file for libtheora +# +# Only the symbols listed in the global section will be callable from +# applications linking to the libraries. +# + +# We use something that looks like a versioned so filename here +# to define the old API because of a historical confusion. This +# label must be kept to maintain ABI compatibility. + +libtheora.so.1.0 +{ + global: + theora_version_string; + theora_version_number; + + theora_encode_init; + theora_encode_YUVin; + theora_encode_packetout; + theora_encode_header; + theora_encode_comment; + theora_encode_tables; + + theora_decode_header; + theora_decode_init; + theora_decode_packetin; + theora_decode_YUVout; + + theora_control; + + theora_packet_isheader; + theora_packet_iskeyframe; + + theora_granule_shift; + theora_granule_frame; + theora_granule_time; + + theora_info_init; + theora_info_clear; + + theora_clear; + + theora_comment_init; + theora_comment_add; + theora_comment_add_tag; + theora_comment_query; + theora_comment_query_count; + theora_comment_clear; + + local: + *; +}; diff --git a/Engine/lib/libtheora/lib/Version_script-dec b/Engine/lib/libtheora/lib/Version_script-dec new file mode 100644 index 000000000..cab368397 --- /dev/null +++ b/Engine/lib/libtheora/lib/Version_script-dec @@ -0,0 +1,82 @@ +# +# Export file for libtheoradec +# +# Only the symbols listed in the global section will be callable from +# applications linking to the libraries. +# + +# The 1.x API +libtheoradec_1.0 +{ + global: + th_version_string; + th_version_number; + + th_decode_headerin; + th_decode_alloc; + th_setup_free; + th_decode_ctl; + th_decode_packetin; + th_decode_ycbcr_out; + th_decode_free; + + th_packet_isheader; + th_packet_iskeyframe; + + th_granule_frame; + th_granule_time; + + th_info_init; + th_info_clear; + + th_comment_init; + th_comment_add; + th_comment_add_tag; + th_comment_query; + th_comment_query_count; + th_comment_clear; + + local: + *; +}; + +# The deprecated legacy api from the libtheora alpha releases. +# We use something that looks like a versioned so filename here +# to define the old API because of a historical confusion. This +# label must be kept to maintain ABI compatibility. + +libtheora.so.1.0 +{ + global: + theora_version_string; + theora_version_number; + + theora_decode_header; + theora_decode_init; + theora_decode_packetin; + theora_decode_YUVout; + + theora_control; + + theora_packet_isheader; + theora_packet_iskeyframe; + + theora_granule_shift; + theora_granule_frame; + theora_granule_time; + + theora_info_init; + theora_info_clear; + + theora_clear; + + theora_comment_init; + theora_comment_add; + theora_comment_add_tag; + theora_comment_query; + theora_comment_query_count; + theora_comment_clear; + + local: + *; +}; diff --git a/Engine/lib/libtheora/lib/Version_script-enc b/Engine/lib/libtheora/lib/Version_script-enc new file mode 100644 index 000000000..37699edd6 --- /dev/null +++ b/Engine/lib/libtheora/lib/Version_script-enc @@ -0,0 +1,43 @@ +# +# Export file for libtheora +# +# Only the symbols listed in the global section will be callable from +# applications linking to the libraries. +# + +# The 1.x encoder API +libtheoraenc_1.0 +{ + global: + th_encode_alloc; + th_encode_ctl; + th_encode_flushheader; + th_encode_ycbcr_in; + th_encode_packetout; + th_encode_free; + + TH_VP31_QUANT_INFO; + TH_VP31_HUFF_CODES; + + local: + *; +}; + +# The encoder portion of the deprecated alpha release api. +# We use something that looks like a versioned so filename here +# to define the old API because of a historical confusion. This +# label must be kept to maintain ABI compatibility. + +libtheora.so.1.0 +{ + global: + theora_encode_init; + theora_encode_YUVin; + theora_encode_packetout; + theora_encode_header; + theora_encode_comment; + theora_encode_tables; + + local: + *; +}; diff --git a/Engine/lib/libtheora/lib/analyze.c b/Engine/lib/libtheora/lib/analyze.c new file mode 100644 index 000000000..af01b60df --- /dev/null +++ b/Engine/lib/libtheora/lib/analyze.c @@ -0,0 +1,2709 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: mode selection code + last mod: $Id$ + + ********************************************************************/ +#include +#include +#include "encint.h" +#include "modedec.h" + + + +typedef struct oc_fr_state oc_fr_state; +typedef struct oc_qii_state oc_qii_state; +typedef struct oc_enc_pipeline_state oc_enc_pipeline_state; +typedef struct oc_rd_metric oc_rd_metric; +typedef struct oc_mode_choice oc_mode_choice; + + + +/*There are 8 possible schemes used to encode macro block modes. + Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes. + The same set of Huffman codes is used for each of these 7 schemes, but the + mode assigned to each codeword varies. + Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream, + while schemes 1-6 have a fixed mapping. + Scheme 7 just encodes each mode directly in 3 bits.*/ + +/*The mode orderings for the various mode coding schemes. + Scheme 0 uses a custom alphabet, which is not stored in this table. + This is the inverse of the equivalent table OC_MODE_ALPHABETS in the + decoder.*/ +static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={ + /*Last MV dominates.*/ + /*L P M N I G GM 4*/ + {3,4,2,0,1,5,6,7}, + /*L P N M I G GM 4*/ + {2,4,3,0,1,5,6,7}, + /*L M P N I G GM 4*/ + {3,4,1,0,2,5,6,7}, + /*L M N P I G GM 4*/ + {2,4,1,0,3,5,6,7}, + /*No MV dominates.*/ + /*N L P M I G GM 4*/ + {0,4,3,1,2,5,6,7}, + /*N G L P M I GM 4*/ + {0,5,4,2,3,1,6,7}, + /*Default ordering.*/ + /*N I M L P G GM 4*/ + {0,1,2,3,4,5,6,7} +}; + + + +/*Initialize the mode scheme chooser. + This need only be called once per encoder.*/ +void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){ + int si; + _chooser->mode_ranks[0]=_chooser->scheme0_ranks; + for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1]; +} + +/*Reset the mode scheme chooser. + This needs to be called once for each frame, including the first.*/ +static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){ + int si; + memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts)); + /*Scheme 0 starts with 24 bits to store the mode list in.*/ + _chooser->scheme_bits[0]=24; + memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits)); + for(si=0;si<8;si++){ + /*Scheme 7 should always start first, and scheme 0 should always start + last.*/ + _chooser->scheme_list[si]=7-si; + _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si; + } +} + + +/*This is the real purpose of this data structure: not actually selecting a + mode scheme, but estimating the cost of coding a given mode given all the + modes selected so far. + This is done via opportunity cost: the cost is defined as the number of bits + required to encode all the modes selected so far including the current one + using the best possible scheme, minus the number of bits required to encode + all the modes selected so far not including the current one using the best + possible scheme. + The computational expense of doing this probably makes it overkill. + Just be happy we take a greedy approach instead of trying to solve the + global mode-selection problem (which is NP-hard). + _mb_mode: The mode to determine the cost of. + Return: The number of bits required to code this mode.*/ +static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser, + int _mb_mode){ + int scheme0; + int scheme1; + int best_bits; + int mode_bits; + int si; + int scheme_bits; + scheme0=_chooser->scheme_list[0]; + scheme1=_chooser->scheme_list[1]; + best_bits=_chooser->scheme_bits[scheme0]; + mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]]; + /*Typical case: If the difference between the best scheme and the next best + is greater than 6 bits, then adding just one mode cannot change which + scheme we use.*/ + if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits; + /*Otherwise, check to see if adding this mode selects a different scheme as + the best.*/ + si=1; + best_bits+=mode_bits; + do{ + /*For any scheme except 0, we can just use the bit cost of the mode's rank + in that scheme.*/ + if(scheme1!=0){ + scheme_bits=_chooser->scheme_bits[scheme1]+ + OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]]; + } + else{ + int ri; + /*For scheme 0, incrementing the mode count could potentially change the + mode's rank. + Find the index where the mode would be moved to in the optimal list, + and use its bit cost instead of the one for the mode's current + position in the list.*/ + /*We don't recompute scheme bits; this is computing opportunity cost, not + an update.*/ + for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&& + _chooser->mode_counts[_mb_mode]>= + _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--); + scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri]; + } + if(scheme_bits=8)break; + scheme1=_chooser->scheme_list[si]; + } + while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6); + return best_bits-_chooser->scheme_bits[scheme0]; +} + +/*Incrementally update the mode counts and per-scheme bit counts and re-order + the scheme lists once a mode has been selected. + _mb_mode: The mode that was chosen.*/ +static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser, + int _mb_mode){ + int ri; + int si; + _chooser->mode_counts[_mb_mode]++; + /*Re-order the scheme0 mode list if necessary.*/ + for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){ + int pmode; + pmode=_chooser->scheme0_list[ri-1]; + if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break; + /*Reorder the mode ranking.*/ + _chooser->scheme0_ranks[pmode]++; + _chooser->scheme0_list[ri]=pmode; + } + _chooser->scheme0_ranks[_mb_mode]=ri; + _chooser->scheme0_list[ri]=_mb_mode; + /*Now add the bit cost for the mode to each scheme.*/ + for(si=0;si<8;si++){ + _chooser->scheme_bits[si]+= + OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]]; + } + /*Finally, re-order the list of schemes.*/ + for(si=1;si<8;si++){ + int sj; + int scheme0; + int bits0; + sj=si; + scheme0=_chooser->scheme_list[si]; + bits0=_chooser->scheme_bits[scheme0]; + do{ + int scheme1; + scheme1=_chooser->scheme_list[sj-1]; + if(bits0>=_chooser->scheme_bits[scheme1])break; + _chooser->scheme_list[sj]=scheme1; + } + while(--sj>0); + _chooser->scheme_list[sj]=scheme0; + } +} + + + +/*The number of bits required to encode a super block run. + _run_count: The desired run count; must be positive and less than 4130.*/ +static int oc_sb_run_bits(int _run_count){ + int i; + for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); + return OC_SB_RUN_CODE_NBITS[i]; +} + +/*The number of bits required to encode a block run. + _run_count: The desired run count; must be positive and less than 30.*/ +static int oc_block_run_bits(int _run_count){ + return OC_BLOCK_RUN_CODE_NBITS[_run_count-1]; +} + + + +/*State to track coded block flags and their bit cost.*/ +struct oc_fr_state{ + ptrdiff_t bits; + unsigned sb_partial_count:16; + unsigned sb_full_count:16; + unsigned b_coded_count_prev:8; + unsigned b_coded_count:8; + unsigned b_count:8; + signed int sb_partial:2; + signed int sb_full:2; + signed int b_coded_prev:2; + signed int b_coded:2; +}; + + + +static void oc_fr_state_init(oc_fr_state *_fr){ + _fr->bits=0; + _fr->sb_partial_count=0; + _fr->sb_full_count=0; + _fr->b_coded_count_prev=0; + _fr->b_coded_count=0; + _fr->b_count=0; + _fr->sb_partial=-1; + _fr->sb_full=-1; + _fr->b_coded_prev=-1; + _fr->b_coded=-1; +} + + +static void oc_fr_state_advance_sb(oc_fr_state *_fr, + int _sb_partial,int _sb_full){ + ptrdiff_t bits; + int sb_partial_count; + int sb_full_count; + bits=_fr->bits; + /*Extend the sb_partial run, or start a new one.*/ + sb_partial_count=_fr->sb_partial; + if(_fr->sb_partial==_sb_partial){ + if(sb_partial_count>=4129){ + bits++; + sb_partial_count=0; + } + else bits-=oc_sb_run_bits(sb_partial_count); + } + else sb_partial_count=0; + sb_partial_count++; + bits+=oc_sb_run_bits(sb_partial_count); + if(!_sb_partial){ + /*Extend the sb_full run, or start a new one.*/ + sb_full_count=_fr->sb_full_count; + if(_fr->sb_full==_sb_full){ + if(sb_full_count>=4129){ + bits++; + sb_full_count=0; + } + else bits-=oc_sb_run_bits(sb_full_count); + } + else sb_full_count=0; + sb_full_count++; + bits+=oc_sb_run_bits(sb_full_count); + _fr->sb_full=_sb_full; + _fr->sb_full_count=sb_full_count; + } + _fr->bits=bits; + _fr->sb_partial=_sb_partial; + _fr->sb_partial_count=sb_partial_count; +} + +/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16 + blocks).*/ +static void oc_fr_state_flush_sb(oc_fr_state *_fr){ + ptrdiff_t bits; + int sb_partial; + int sb_full=sb_full; + int b_coded_count; + int b_coded; + int b_count; + b_count=_fr->b_count; + if(b_count>0){ + bits=_fr->bits; + b_coded=_fr->b_coded; + b_coded_count=_fr->b_coded_count; + if(b_coded_count>=b_count){ + /*This SB was fully coded/uncoded; roll back the partial block flags.*/ + bits-=oc_block_run_bits(b_coded_count); + if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count); + sb_partial=0; + sb_full=b_coded; + b_coded=_fr->b_coded_prev; + b_coded_count=_fr->b_coded_count_prev; + } + else{ + /*It was partially coded.*/ + sb_partial=1; + /*sb_full is unused.*/ + } + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_coded_count_prev=b_coded_count; + _fr->b_count=0; + _fr->b_coded=b_coded; + _fr->b_coded_prev=b_coded; + oc_fr_state_advance_sb(_fr,sb_partial,sb_full); + } +} + +static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){ + ptrdiff_t bits; + int b_coded_count; + int b_count; + int sb_partial; + int sb_full=sb_full; + bits=_fr->bits; + /*Extend the b_coded run, or start a new one.*/ + b_coded_count=_fr->b_coded_count; + if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count); + else b_coded_count=0; + b_coded_count++; + b_count=_fr->b_count+1; + if(b_count>=16){ + /*We finished a superblock.*/ + if(b_coded_count>=16){ + /*It was fully coded/uncoded; roll back the partial block flags.*/ + if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16); + sb_partial=0; + sb_full=_b_coded; + _b_coded=_fr->b_coded_prev; + b_coded_count=_fr->b_coded_count_prev; + } + else{ + bits+=oc_block_run_bits(b_coded_count); + /*It was partially coded.*/ + sb_partial=1; + /*sb_full is unused.*/ + } + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_coded_count_prev=b_coded_count; + _fr->b_count=0; + _fr->b_coded=_b_coded; + _fr->b_coded_prev=_b_coded; + oc_fr_state_advance_sb(_fr,sb_partial,sb_full); + } + else{ + bits+=oc_block_run_bits(b_coded_count); + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_count=b_count; + _fr->b_coded=_b_coded; + } +} + +static void oc_fr_skip_block(oc_fr_state *_fr){ + oc_fr_state_advance_block(_fr,0); +} + +static void oc_fr_code_block(oc_fr_state *_fr){ + oc_fr_state_advance_block(_fr,1); +} + +static int oc_fr_cost1(const oc_fr_state *_fr){ + oc_fr_state tmp; + ptrdiff_t bits; + *&tmp=*_fr; + oc_fr_skip_block(&tmp); + bits=tmp.bits; + *&tmp=*_fr; + oc_fr_code_block(&tmp); + return (int)(tmp.bits-bits); +} + +static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){ + oc_fr_state tmp; + *&tmp=*_pre; + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + return (int)(_post->bits-tmp.bits); +} + + + +struct oc_qii_state{ + ptrdiff_t bits; + unsigned qi01_count:14; + signed int qi01:2; + unsigned qi12_count:14; + signed int qi12:2; +}; + + + +static void oc_qii_state_init(oc_qii_state *_qs){ + _qs->bits=0; + _qs->qi01_count=0; + _qs->qi01=-1; + _qs->qi12_count=0; + _qs->qi12=-1; +} + + +static void oc_qii_state_advance(oc_qii_state *_qd, + const oc_qii_state *_qs,int _qii){ + ptrdiff_t bits; + int qi01; + int qi01_count; + int qi12; + int qi12_count; + bits=_qs->bits; + qi01=_qii+1>>1; + qi01_count=_qs->qi01_count; + if(qi01==_qs->qi01){ + if(qi01_count>=4129){ + bits++; + qi01_count=0; + } + else bits-=oc_sb_run_bits(qi01_count); + } + else qi01_count=0; + qi01_count++; + bits+=oc_sb_run_bits(qi01_count); + qi12_count=_qs->qi12_count; + if(_qii){ + qi12=_qii>>1; + if(qi12==_qs->qi12){ + if(qi12_count>=4129){ + bits++; + qi12_count=0; + } + else bits-=oc_sb_run_bits(qi12_count); + } + else qi12_count=0; + qi12_count++; + bits+=oc_sb_run_bits(qi12_count); + } + else qi12=_qs->qi12; + _qd->bits=bits; + _qd->qi01=qi01; + _qd->qi01_count=qi01_count; + _qd->qi12=qi12; + _qd->qi12_count=qi12_count; +} + + + +/*Temporary encoder state for the analysis pipeline.*/ +struct oc_enc_pipeline_state{ + int bounding_values[256]; + oc_fr_state fr[3]; + oc_qii_state qs[3]; + /*Condensed dequantization tables.*/ + const ogg_uint16_t *dequant[3][3][2]; + /*Condensed quantization tables.*/ + const oc_iquant *enquant[3][3][2]; + /*Skip SSD storage for the current MCU in each plane.*/ + unsigned *skip_ssd[3]; + /*Coded/uncoded fragment lists for each plane for the current MCU.*/ + ptrdiff_t *coded_fragis[3]; + ptrdiff_t *uncoded_fragis[3]; + ptrdiff_t ncoded_fragis[3]; + ptrdiff_t nuncoded_fragis[3]; + /*The starting fragment for the current MCU in each plane.*/ + ptrdiff_t froffset[3]; + /*The starting row for the current MCU in each plane.*/ + int fragy0[3]; + /*The ending row for the current MCU in each plane.*/ + int fragy_end[3]; + /*The starting superblock for the current MCU in each plane.*/ + unsigned sbi0[3]; + /*The ending superblock for the current MCU in each plane.*/ + unsigned sbi_end[3]; + /*The number of tokens for zzi=1 for each color plane.*/ + int ndct_tokens1[3]; + /*The outstanding eob_run count for zzi=1 for each color plane.*/ + int eob_run1[3]; + /*Whether or not the loop filter is enabled.*/ + int loop_filter; +}; + + +static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){ + ptrdiff_t *coded_fragis; + unsigned mcu_nvsbs; + ptrdiff_t mcu_nfrags; + int hdec; + int vdec; + int pli; + int qii; + int qti; + /*Initialize the per-plane coded block flag trackers. + These are used for bit-estimation purposes only; the real flag bits span + all three planes, so we can't compute them in parallel.*/ + for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli); + for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli); + /*Set up the per-plane skip SSD storage pointers.*/ + mcu_nvsbs=_enc->mcu_nvsbs; + mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16; + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + _pipe->skip_ssd[0]=_enc->mcu_skip_ssd; + _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags; + _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec); + /*Set up per-plane pointers to the coded and uncoded fragments lists. + Unlike the decoder, each planes' coded and uncoded fragment list is kept + separate during the analysis stage; we only make the coded list for all + three planes contiguous right before the final packet is output + (destroying the uncoded lists, which are no longer needed).*/ + coded_fragis=_enc->state.coded_fragis; + for(pli=0;pli<3;pli++){ + _pipe->coded_fragis[pli]=coded_fragis; + coded_fragis+=_enc->state.fplanes[pli].nfrags; + _pipe->uncoded_fragis[pli]=coded_fragis; + } + memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis)); + memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis)); + /*Set up condensed quantizer tables.*/ + for(pli=0;pli<3;pli++){ + for(qii=0;qii<_enc->state.nqis;qii++){ + int qi; + qi=_enc->state.qis[qii]; + for(qti=0;qti<2;qti++){ + _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti]; + _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti]; + } + } + } + /*Initialize the tokenization state.*/ + for(pli=0;pli<3;pli++){ + _pipe->ndct_tokens1[pli]=0; + _pipe->eob_run1[pli]=0; + } + /*Initialize the bounding value array for the loop filter.*/ + _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state, + _pipe->bounding_values); +} + +/*Sets the current MCU stripe to super block row _sby. + Return: A non-zero value if this was the last MCU.*/ +static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _sby){ + const oc_fragment_plane *fplane; + unsigned mcu_nvsbs; + int sby_end; + int notdone; + int vdec; + int pli; + mcu_nvsbs=_enc->mcu_nvsbs; + sby_end=_enc->state.fplanes[0].nvsbs; + notdone=_sby+mcu_nvsbsstate.fplanes+pli; + _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs; + _pipe->fragy0[pli]=_sby<<2-vdec; + _pipe->froffset[pli]=fplane->froffset + +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags; + if(notdone){ + _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs; + _pipe->fragy_end[pli]=sby_end<<2-vdec; + } + else{ + _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs; + _pipe->fragy_end[pli]=fplane->nvfrags; + } + vdec=!(_enc->state.info.pixel_fmt&2); + } + return notdone; +} + +static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){ + int refi; + /*Copy over all the uncoded fragments from this plane and advance the uncoded + fragment list.*/ + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; + oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli], + _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); + _pipe->nuncoded_fragis[_pli]=0; + /*Perform DC prediction.*/ + oc_enc_pred_dc_frag_rows(_enc,_pli, + _pipe->fragy0[_pli],_pipe->fragy_end[_pli]); + /*Finish DC tokenization.*/ + oc_enc_tokenize_dc_frag_list(_enc,_pli, + _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli], + _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]); + _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1]; + _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1]; + /*And advance the coded fragment list.*/ + _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; + _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; + _pipe->ncoded_fragis[_pli]=0; + /*Apply the loop filter if necessary.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + if(_pipe->loop_filter){ + oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values, + refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay); + } + else _sdelay=_edelay=0; + /*To fill borders, we have an additional two pixel delay, since a fragment + in the next row could filter its top edge, using two pixels from a + fragment in this row. + But there's no reason to delay a full fragment between the two.*/ + oc_state_borders_fill_rows(&_enc->state,refi,_pli, + (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1), + (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1)); +} + + + +/*Cost information about the coded blocks in a MB.*/ +struct oc_rd_metric{ + int uncoded_ac_ssd; + int coded_ac_ssd; + int ac_bits; + int dc_flag; +}; + + + +static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits, + oc_rd_metric *_mo,oc_token_checkpoint **_stack){ + OC_ALIGN16(ogg_int16_t dct[64]); + OC_ALIGN16(ogg_int16_t data[64]); + ogg_uint16_t dc_dequant; + const ogg_uint16_t *dequant; + const oc_iquant *enquant; + ptrdiff_t frag_offs; + int ystride; + const unsigned char *src; + const unsigned char *ref; + unsigned char *dst; + int frame_type; + int nonzero; + unsigned uncoded_ssd; + unsigned coded_ssd; + int coded_dc; + oc_token_checkpoint *checkpoint; + oc_fragment *frags; + int mb_mode; + int mv_offs[2]; + int nmv_offs; + int ac_bits; + int borderi; + int qti; + int qii; + int pi; + int zzi; + int v; + int val; + int d; + int s; + int dc; + frags=_enc->state.frags; + frag_offs=_enc->state.frag_buf_offs[_fragi]; + ystride=_enc->state.ref_ystride[_pli]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs; + borderi=frags[_fragi].borderi; + qii=frags[_fragi].qii; + if(qii&~3){ +#if !defined(OC_COLLECT_METRICS) + if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){ + /*Enable early skip detection.*/ + frags[_fragi].coded=0; + return 0; + } +#endif + /*Try and code this block anyway.*/ + qii&=3; + frags[_fragi].qii=qii; + } + mb_mode=frags[_fragi].mb_mode; + ref=_enc->state.ref_frame_data[ + _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs; + dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]] + +frag_offs; + /*Motion compensation:*/ + switch(mb_mode){ + case OC_MODE_INTRA:{ + nmv_offs=0; + oc_enc_frag_sub_128(_enc,data,src,ystride); + }break; + case OC_MODE_GOLDEN_NOMV: + case OC_MODE_INTER_NOMV:{ + nmv_offs=1; + mv_offs[0]=0; + oc_enc_frag_sub(_enc,data,src,ref,ystride); + }break; + default:{ + const oc_mv *frag_mvs; + frag_mvs=(const oc_mv *)_enc->state.frag_mvs; + nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli, + frag_mvs[_fragi][0],frag_mvs[_fragi][1]); + if(nmv_offs>1){ + oc_enc_frag_copy2(_enc,dst, + ref+mv_offs[0],ref+mv_offs[1],ystride); + oc_enc_frag_sub(_enc,data,src,dst,ystride); + } + else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride); + }break; + } +#if defined(OC_COLLECT_METRICS) + { + unsigned satd; + switch(nmv_offs){ + case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break; + case 1:{ + satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX); + }break; + default:{ + satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX); + } + } + _enc->frag_satd[_fragi]=satd; + } +#endif + /*Transform:*/ + oc_enc_fdct8x8(_enc,dct,data); + /*Quantize the DC coefficient:*/ + qti=mb_mode!=OC_MODE_INTRA; + enquant=_pipe->enquant[_pli][0][qti]; + dc_dequant=_pipe->dequant[_pli][0][qti][0]; + v=dct[0]; + val=v<<1; + s=OC_SIGNMASK(val); + val+=dc_dequant+s^s; + val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s; + dc=OC_CLAMPI(-580,val,580); + nonzero=0; + /*Quantize the AC coefficients:*/ + dequant=_pipe->dequant[_pli][qii][qti]; + enquant=_pipe->enquant[_pli][qii][qti]; + for(zzi=1;zzi<64;zzi++){ + v=dct[OC_FZIG_ZAG[zzi]]; + d=dequant[zzi]; + val=v<<1; + v=abs(val); + if(v>=d){ + s=OC_SIGNMASK(val); + /*The bias added here rounds ties away from zero, since token + optimization can only decrease the magnitude of the quantized + value.*/ + val+=d+s^s; + /*Note the arithmetic right shift is not guaranteed by ANSI C. + Hopefully no one still uses ones-complement architectures.*/ + val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; + data[zzi]=OC_CLAMPI(-580,val,580); + nonzero=zzi; + } + else data[zzi]=0; + } + /*Tokenize.*/ + checkpoint=*_stack; + ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1, + _stack,qti?0:3); + /*Reconstruct. + TODO: nonzero may need to be adjusted after tokenization.*/ + if(nonzero==0){ + ogg_int16_t p; + int ci; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5); + /*LOOP VECTORIZES.*/ + for(ci=0;ci<64;ci++)data[ci]=p; + } + else{ + data[0]=dc*dc_dequant; + oc_idct8x8(&_enc->state,data,nonzero+1); + } + if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data); + else{ + oc_enc_frag_recon_inter(_enc,dst, + nmv_offs==1?ref+mv_offs[0]:dst,ystride,data); + } + frame_type=_enc->state.frame_type; +#if !defined(OC_COLLECT_METRICS) + if(frame_type!=OC_INTRA_FRAME) +#endif + { + /*In retrospect, should we have skipped this block?*/ + oc_enc_frag_sub(_enc,data,src,dst,ystride); + coded_ssd=coded_dc=0; + if(borderi<0){ + for(pi=0;pi<64;pi++){ + coded_ssd+=data[pi]*data[pi]; + coded_dc+=data[pi]; + } + } + else{ + ogg_int64_t mask; + mask=_enc->state.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + coded_ssd+=data[pi]*data[pi]; + coded_dc+=data[pi]; + } + } + /*Scale to match DCT domain.*/ + coded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + coded_ssd-=coded_dc*coded_dc>>2; +#if defined(OC_COLLECT_METRICS) + _enc->frag_ssd[_fragi]=coded_ssd; + } + if(frame_type!=OC_INTRA_FRAME){ +#endif + uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]]; + if(uncoded_ssdlambda&& + /*Don't allow luma blocks to be skipped in 4MV mode when VP3 + compatibility is enabled.*/ + (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){ + /*Hm, not worth it; roll back.*/ + oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint); + *_stack=checkpoint; + frags[_fragi].coded=0; + return 0; + } + } + else _mo->dc_flag=1; + _mo->uncoded_ac_ssd+=uncoded_ssd; + _mo->coded_ac_ssd+=coded_ssd; + _mo->ac_bits+=ac_bits; + } + oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii); + frags[_fragi].dc=dc; + frags[_fragi].coded=1; + return 1; +} + +static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){ + /*Worst case token stack usage for 4 fragments.*/ + oc_token_checkpoint stack[64*4]; + oc_token_checkpoint *stackptr; + const oc_sb_map *sb_maps; + signed char *mb_modes; + oc_fragment *frags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t *uncoded_fragis; + ptrdiff_t nuncoded_fragis; + oc_rd_metric mo; + oc_fr_state fr_checkpoint; + oc_qii_state qs_checkpoint; + int mb_mode; + int ncoded; + ptrdiff_t fragi; + int bi; + *&fr_checkpoint=*(_pipe->fr+0); + *&qs_checkpoint=*(_pipe->qs+0); + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + mb_modes=_enc->state.mb_modes; + frags=_enc->state.frags; + coded_fragis=_pipe->coded_fragis[0]; + ncoded_fragis=_pipe->ncoded_fragis[0]; + uncoded_fragis=_pipe->uncoded_fragis[0]; + nuncoded_fragis=_pipe->nuncoded_fragis[0]; + mb_mode=mb_modes[_mbi]; + ncoded=0; + stackptr=stack; + memset(&mo,0,sizeof(mo)); + for(bi=0;bi<4;bi++){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + frags[fragi].mb_mode=mb_mode; + if(oc_enc_block_transform_quantize(_enc, + _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){ + oc_fr_code_block(_pipe->fr+0); + coded_fragis[ncoded_fragis++]=fragi; + ncoded++; + } + else{ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + oc_fr_skip_block(_pipe->fr+0); + } + } + if(_enc->state.frame_type!=OC_INTRA_FRAME){ + if(ncoded>0&&!mo.dc_flag){ + int cost; + /*Some individual blocks were worth coding. + See if that's still true when accounting for mode and MV overhead.*/ + cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits + +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead); + if(mo.uncoded_ac_ssd<=cost){ + /*Taking macroblock overhead into account, it is not worth coding this + MB.*/ + oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack); + *(_pipe->fr+0)=*&fr_checkpoint; + *(_pipe->qs+0)=*&qs_checkpoint; + for(bi=0;bi<4;bi++){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + if(frags[fragi].coded){ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + frags[fragi].coded=0; + } + oc_fr_skip_block(_pipe->fr+0); + } + ncoded_fragis-=ncoded; + ncoded=0; + } + } + /*If no luma blocks coded, the mode is forced.*/ + if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV; + /*Assume that a 1MV with a single coded block is always cheaper than a 4MV + with a single coded block. + This may not be strictly true: a 4MV computes chroma MVs using (0,0) for + skipped blocks, while a 1MV does not.*/ + else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){ + mb_modes[_mbi]=OC_MODE_INTER_MV; + } + } + _pipe->ncoded_fragis[0]=ncoded_fragis; + _pipe->nuncoded_fragis[0]=nuncoded_fragis; + return ncoded; +} + +static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ + const oc_sb_map *sb_maps; + oc_sb_flags *sb_flags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t *uncoded_fragis; + ptrdiff_t nuncoded_fragis; + int sbi; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + uncoded_fragis=_pipe->uncoded_fragis[_pli]; + nuncoded_fragis=_pipe->nuncoded_fragis[_pli]; + for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ + /*Worst case token stack usage for 1 fragment.*/ + oc_token_checkpoint stack[64]; + oc_rd_metric mo; + int quadi; + int bi; + memset(&mo,0,sizeof(mo)); + for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + fragi=sb_maps[sbi][quadi][bi]; + if(fragi>=0){ + oc_token_checkpoint *stackptr; + stackptr=stack; + if(oc_enc_block_transform_quantize(_enc, + _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){ + coded_fragis[ncoded_fragis++]=fragi; + oc_fr_code_block(_pipe->fr+_pli); + } + else{ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + oc_fr_skip_block(_pipe->fr+_pli); + } + } + } + oc_fr_state_flush_sb(_pipe->fr+_pli); + sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full; + sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial; + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; + _pipe->nuncoded_fragis[_pli]=nuncoded_fragis; +} + +/*Mode decision is done by exhaustively examining all potential choices. + Obviously, doing the motion compensation, fDCT, tokenization, and then + counting the bits each token uses is computationally expensive. + Theora's EOB runs can also split the cost of these tokens across multiple + fragments, and naturally we don't know what the optimal choice of Huffman + codes will be until we know all the tokens we're going to encode in all the + fragments. + So we use a simple approach to estimating the bit cost and distortion of each + mode based upon the SATD value of the residual before coding. + The mathematics behind the technique are outlined by Kim \cite{Kim03}, but + the process (modified somewhat from that of the paper) is very simple. + We build a non-linear regression of the mappings from + (pre-transform+quantization) SATD to (post-transform+quantization) bits and + SSD for each qi. + A separate set of mappings is kept for each quantization type and color + plane. + The mappings are constructed by partitioning the SATD values into a small + number of bins (currently 24) and using a linear regression in each bin + (as opposed to the 0th-order regression used by Kim). + The bit counts and SSD measurements are obtained by examining actual encoded + frames, with appropriate lambda values and optimal Huffman codes selected. + EOB bits are assigned to the fragment that started the EOB run (as opposed to + dividing them among all the blocks in the run; though the latter approach + seems more theoretically correct, Monty's testing showed a small improvement + with the former, though that may have been merely statistical noise). + + @ARTICLE{Kim03, + author="Hyun Mun Kim", + title="Adaptive Rate Control Using Nonlinear Regression", + journal="IEEE Transactions on Circuits and Systems for Video Technology", + volume=13, + number=5, + pages="432--439", + month=May, + year=2003 + }*/ + +/*Computes (_ssd+_lambda*_rate)/(1<>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \ + +(((_ssd)&(1<>1)>>OC_BIT_SCALE) + +/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after + prediction.*/ +static unsigned oc_dct_cost2(unsigned *_ssd, + int _qi,int _pli,int _qti,int _satd){ + unsigned rmse; + int bin; + int dx; + int y0; + int z0; + int dy; + int dz; + /*SATD metrics for chroma planes vary much less than luma, so we scale them + by 4 to distribute them into the mode decision bins more evenly.*/ + _satd<<=_pli+1&2; + bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2); + dx=_satd-(bin<>OC_SAD_SHIFT),0); + *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE; + return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0); +} + +/*Select luma block-level quantizers for a MB in an INTRA frame.*/ +static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc, + const oc_qii_state *_qs,unsigned _mbi){ + const unsigned char *src; + const ptrdiff_t *frag_buf_offs; + const oc_sb_map *sb_maps; + oc_fragment *frags; + ptrdiff_t frag_offs; + ptrdiff_t fragi; + oc_qii_state qs[4][3]; + unsigned cost[4][3]; + unsigned ssd[4][3]; + unsigned rate[4][3]; + int prev[3][3]; + unsigned satd; + unsigned best_cost; + unsigned best_ssd; + unsigned best_rate; + int best_qii; + int qii; + int lambda; + int ystride; + int nqis; + int bi; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[0]; + fragi=sb_maps[_mbi>>2][_mbi&3][0]; + frag_offs=frag_buf_offs[fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + nqis=_enc->state.nqis; + lambda=_enc->lambda; + for(qii=0;qiistate.qis[qii],0,0,satd) + +(qs[0][qii].bits-_qs->bits<>2][_mbi&3][bi]; + frag_offs=frag_buf_offs[fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + for(qii=0;qiistate.qis[qii],0,0,satd); + best_ssd=ssd[bi-1][0]+cur_ssd; + best_rate=rate[bi-1][0]+cur_rate + +(qt[0].bits-qs[bi-1][0].bits<state.frags; + for(bi=3;;){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + frags[fragi].qii=best_qii; + if(bi--<=0)break; + best_qii=prev[bi][best_qii]; + } + return best_cost; +} + +/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/ +static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc, + const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){ + const unsigned char *src; + oc_fragment *frags; + ptrdiff_t frag_offs; + oc_qii_state qt[3]; + unsigned cost[3]; + unsigned satd; + unsigned best_cost; + int best_qii; + int qii; + int lambda; + int ystride; + int nqis; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[_pli]; + frag_offs=_enc->state.frag_buf_offs[_fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + nqis=_enc->state.nqis; + lambda=_enc->lambda; + best_qii=0; + for(qii=0;qiistate.qis[qii],_pli,0,satd) + +(qt[qii].bits-_qs->bits<state.frags; + frags[_fragi].qii=best_qii; + return best_cost; +} + +static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ + const oc_sb_map *sb_maps; + oc_sb_flags *sb_flags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + int sbi; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ + /*Worst case token stack usage for 1 fragment.*/ + oc_token_checkpoint stack[64]; + int quadi; + int bi; + for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + fragi=sb_maps[sbi][quadi][bi]; + if(fragi>=0){ + oc_token_checkpoint *stackptr; + oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi); + stackptr=stack; + oc_enc_block_transform_quantize(_enc, + _pipe,_pli,fragi,0,NULL,&stackptr); + coded_fragis[ncoded_fragis++]=fragi; + } + } + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; +} + +/*Analysis stage for an INTRA frame.*/ +void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){ + oc_enc_pipeline_state pipe; + const unsigned char *map_idxs; + int nmap_idxs; + oc_sb_flags *sb_flags; + signed char *mb_modes; + const oc_mb_map *mb_maps; + oc_mb_enc_info *embs; + oc_fragment *frags; + unsigned stripe_sby; + unsigned mcu_nvsbs; + int notstart; + int notdone; + int refi; + int pli; + _enc->state.frame_type=OC_INTRA_FRAME; + oc_enc_tokenize_start(_enc); + oc_enc_pipeline_init(_enc,&pipe); + /*Choose MVs and MB modes and quantize and code luma. + Must be done in Hilbert order.*/ + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + _enc->state.ncoded_fragis[0]=0; + _enc->state.ncoded_fragis[1]=0; + _enc->state.ncoded_fragis[2]=0; + sb_flags=_enc->state.sb_flags; + mb_modes=_enc->state.mb_modes; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + embs=_enc->mb_info; + frags=_enc->state.frags; + notstart=0; + notdone=1; + mcu_nvsbs=_enc->mcu_nvsbs; + for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ + unsigned sbi; + unsigned sbi_end; + notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); + sbi_end=pipe.sbi_end[0]; + for(sbi=pipe.sbi0[0];sbistate.curframe_num>0)oc_mcenc_search(_enc,mbi); + oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi); + mb_modes[mbi]=OC_MODE_INTRA; + oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0); + /*Propagate final MB mode and MVs to the chroma blocks.*/ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=OC_MODE_INTRA; + } + } + } + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); + /*Code chroma planes.*/ + for(pli=1;pli<3;pli++){ + oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe, + pli,pipe.sbi0[pli],pipe.sbi_end[pli]); + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); + _enc->state.ntotal_coded_fragis=_enc->state.nfrags; +} + + + +/*Cost information about a MB mode.*/ +struct oc_mode_choice{ + unsigned cost; + unsigned ssd; + unsigned rate; + unsigned overhead; + unsigned char qii[12]; +}; + + + +static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){ + _modec->cost=OC_MODE_RD_COST(_modec->ssd, + _modec->rate+_modec->overhead,_lambda); +} + +/*A set of skip SSD's to use to disable early skipping.*/ +static const unsigned OC_NOSKIP[12]={ + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX +}; + +/*The estimated number of bits used by a coded chroma block to specify the AC + quantizer. + TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression); + measurements suggest this is in the right ballpark, but it varies somewhat + with lambda.*/ +#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1) + +static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc, + oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ + oc_fr_state fr; + oc_qii_state qs; + unsigned ssd; + unsigned rate; + int overhead; + unsigned satd; + unsigned best_ssd; + unsigned best_rate; + int best_overhead; + int best_fri; + int best_qii; + unsigned cur_cost; + unsigned cur_ssd; + unsigned cur_rate; + int cur_overhead; + int lambda; + int nqis; + int nskipped; + int bi; + int qii; + lambda=_enc->lambda; + nqis=_enc->state.nqis; + /*We could do a trellis optimization here, but we don't make final skip + decisions until after transform+quantization, so the result wouldn't be + optimal anyway. + Instead we just use a greedy approach; for most SATD values, the + differences between the qiis are large enough to drown out the cost to + code the flags, anyway.*/ + *&fr=*_fr; + *&qs=*_qs; + ssd=rate=overhead=nskipped=0; + for(bi=0;bi<4;bi++){ + oc_fr_state ft[2]; + oc_qii_state qt[3]; + unsigned best_cost; + satd=_frag_satd[bi]; + *(ft+0)=*&fr; + oc_fr_code_block(ft+0); + oc_qii_state_advance(qt+0,&qs,0); + best_overhead=(ft[0].bits-fr.bits<state.qis[0],0,_qti,satd) + +(qt[0].bits-qs.bits<state.qis[qii],0,_qti,satd) + +(qt[qii].bits-qs.bits<qii[bi]=best_qii; + } + _modec->ssd=ssd; + _modec->rate=rate; + _modec->overhead=OC_MAXI(overhead,0); +} + +static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc, + oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ + unsigned ssd; + unsigned rate; + unsigned satd; + unsigned best_ssd; + unsigned best_rate; + int best_qii; + unsigned cur_cost; + unsigned cur_ssd; + unsigned cur_rate; + int lambda; + int nblocks; + int nqis; + int pli; + int bi; + int qii; + lambda=_enc->lambda; + nqis=_enc->state.nqis; + ssd=_modec->ssd; + rate=_modec->rate; + /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded + order, we assume a constant overhead for coded block and qii flags.*/ + nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + nblocks=(nblocks-4>>1)+4; + bi=4; + for(pli=1;pli<3;pli++){ + for(;bistate.qis[0],pli,_qti,satd) + +OC_CHROMA_QII_RATE; + best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda); + best_qii=0; + for(qii=1;qiistate.qis[qii],0,_qti,satd) + +OC_CHROMA_QII_RATE; + cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda); + if(cur_costqii[bi]=best_qii; + } + nblocks=(nblocks-4<<1)+4; + } + _modec->ssd=ssd; + _modec->rate=rate; +} + +static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe, + unsigned _mbi,unsigned _ssd[12]){ + OC_ALIGN16(ogg_int16_t buffer[64]); + const unsigned char *src; + const unsigned char *ref; + int ystride; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + ogg_int64_t mask; + unsigned uncoded_ssd; + int uncoded_dc; + unsigned dc_dequant; + int dc_flag; + int mapii; + int mapi; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + int borderi; + int pi; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + ystride=_enc->state.ref_ystride[0]; + frags=_enc->state.frags; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0]; + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride); + borderi=frags[fragi].borderi; + uncoded_ssd=uncoded_dc=0; + if(borderi<0){ + for(pi=0;pi<64;pi++){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + else{ + ogg_int64_t mask; + mask=_enc->state.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + /*Scale to match DCT domain.*/ + uncoded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + uncoded_ssd-=uncoded_dc*uncoded_dc>>2; + /*DC is a special case; if there's more than a full-quantizer improvement + in the effective DC component, always force-code the block.*/ + dc_flag=abs(uncoded_dc)>dc_dequant<<1; + uncoded_ssd|=-dc_flag; + _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd; + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=(map_nidxs-4>>1)+4; + mapii=4; + for(pli=1;pli<3;pli++){ + ystride=_enc->state.ref_ystride[pli]; + dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0]; + for(;mapiistate.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + /*Scale to match DCT domain.*/ + uncoded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + uncoded_ssd-=uncoded_dc*uncoded_dc>>2; + /*DC is a special case; if there's more than a full-quantizer improvement + in the effective DC component, always force-code the block.*/ + dc_flag=abs(uncoded_dc)>dc_dequant<<1; + uncoded_ssd|=-dc_flag; + _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd; + } + map_nidxs=(map_nidxs-4<<1)+4; + } +} + +static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi, + unsigned _frag_satd[12]){ + const unsigned char *src; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int mapii; + int mapi; + int ystride; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[0]; + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + } +} + +static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){ + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<lambda); +} + +static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const signed char *_mv, + const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ + unsigned frag_satd[12]; + const unsigned char *src; + const unsigned char *ref; + int ystride; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int mapii; + int mapi; + int mv_offs[2]; + int dx; + int dy; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[ + _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]]; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + dx=_mv[0]; + dy=_mv[1]; + _modec->rate=_modec->ssd=0; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + } + else{ + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + } + else{ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + } + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<lambda); +} + +static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _skip_ssd[12]){ + static const oc_mv OC_MV_ZERO; + oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd); +} + +static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const signed char *_mv, + const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ + int bits0; + oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd); + bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31]; + _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12) + -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); + return bits0; +} + +/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/ +static const unsigned char OC_MB_PHASE[4][4]={ + {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0} +}; + +static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _skip_ssd[12]){ + unsigned frag_satd[12]; + oc_mv lbmvs[4]; + oc_mv cbmvs[4]; + const unsigned char *src; + const unsigned char *ref; + int ystride; + const ptrdiff_t *frag_buf_offs; + oc_mv *frag_mvs; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int nqis; + int mapii; + int mapi; + int mv_offs[2]; + int dx; + int dy; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + int bits0; + int bits1; + unsigned satd; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + frag_mvs=_enc->state.frag_mvs; + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + _modec->rate=_modec->ssd=0; + for(bi=0;bi<4;bi++){ + fragi=mb_map[0][bi]; + dx=_mv[bi][0]; + dy=_mv[bi][1]; + /*Save the block MVs as the current ones while we're here; we'll replace + them if we don't ultimately choose 4MV mode.*/ + frag_mvs[fragi][0]=(signed char)dx; + frag_mvs[fragi][1]=(signed char)dy; + frag_offs=frag_buf_offs[fragi]; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ + satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + else{ + satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd; + } + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd, + _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1); + /*Figure out which blocks are being skipped and give them (0,0) MVs.*/ + bits0=0; + bits1=0; + nqis=_enc->state.nqis; + for(bi=0;bi<4;bi++){ + if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){ + memset(lbmvs+bi,0,sizeof(*lbmvs)); + } + else{ + memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs)); + bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31]; + bits1+=12; + } + } + (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs, + (const oc_mv *)lbmvs); + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + dx=cbmvs[bi][0]; + dy=cbmvs[bi][1]; + frag_offs=frag_buf_offs[fragi]; + /*TODO: We could save half these calls by re-using the results for the Cb + and Cr planes; is it worth it?*/ + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){ + satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + else{ + satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + frag_satd[mapii]=satd; + } + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR) + +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1) + -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); +} + +int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){ + oc_set_chroma_mvs_func set_chroma_mvs; + oc_enc_pipeline_state pipe; + oc_qii_state intra_luma_qs; + oc_mv last_mv; + oc_mv prior_mv; + ogg_int64_t interbits; + ogg_int64_t intrabits; + const unsigned char *map_idxs; + int nmap_idxs; + unsigned *coded_mbis; + unsigned *uncoded_mbis; + size_t ncoded_mbis; + size_t nuncoded_mbis; + oc_sb_flags *sb_flags; + signed char *mb_modes; + const oc_sb_map *sb_maps; + const oc_mb_map *mb_maps; + oc_mb_enc_info *embs; + oc_fragment *frags; + oc_mv *frag_mvs; + int qi; + unsigned stripe_sby; + unsigned mcu_nvsbs; + int notstart; + int notdone; + int vdec; + unsigned sbi; + unsigned sbi_end; + int refi; + int pli; + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt]; + _enc->state.frame_type=OC_INTER_FRAME; + oc_mode_scheme_chooser_reset(&_enc->chooser); + oc_enc_tokenize_start(_enc); + oc_enc_pipeline_init(_enc,&pipe); + if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs); + _enc->mv_bits[0]=_enc->mv_bits[1]=0; + interbits=intrabits=0; + last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0; + /*Choose MVs and MB modes and quantize and code luma. + Must be done in Hilbert order.*/ + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + qi=_enc->state.qis[0]; + coded_mbis=_enc->coded_mbis; + uncoded_mbis=coded_mbis+_enc->state.nmbs; + ncoded_mbis=0; + nuncoded_mbis=0; + _enc->state.ncoded_fragis[0]=0; + _enc->state.ncoded_fragis[1]=0; + _enc->state.ncoded_fragis[2]=0; + sb_flags=_enc->state.sb_flags; + mb_modes=_enc->state.mb_modes; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + embs=_enc->mb_info; + frags=_enc->state.frags; + frag_mvs=_enc->state.frag_mvs; + vdec=!(_enc->state.info.pixel_fmt&2); + notstart=0; + notdone=1; + mcu_nvsbs=_enc->mcu_nvsbs; + for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ + notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); + sbi_end=pipe.sbi_end[0]; + for(sbi=pipe.sbi0[0];sbisp_levelsp_levellambda*3; + if(modes[OC_MODE_INTER_MV_FOUR].cost>2][mbi&3][bi]; + frags[fragi].qii=modes[mb_mode].qii[bi]; + } + if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi, + modes[mb_mode].overhead>>OC_BIT_SCALE)>0){ + int orig_mb_mode; + orig_mb_mode=mb_mode; + mb_mode=mb_modes[mbi]; + switch(mb_mode){ + case OC_MODE_INTER_MV:{ + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + /*If we're backing out from 4MV, find the MV we're actually + using.*/ + if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){ + for(bi=0;;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(frags[fragi].coded){ + memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); + dx=frag_mvs[fragi][0]; + dy=frag_mvs[fragi][1]; + break; + } + } + mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31]; + } + /*Otherwise we used the original analysis MV.*/ + else{ + memcpy(last_mv, + embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv)); + } + _enc->mv_bits[0]+=mb_mv_bits_0; + _enc->mv_bits[1]+=12; + }break; + case OC_MODE_INTER_MV_LAST2:{ + oc_mv tmp_mv; + memcpy(tmp_mv,prior_mv,sizeof(tmp_mv)); + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + memcpy(last_mv,tmp_mv,sizeof(last_mv)); + }break; + case OC_MODE_GOLDEN_MV:{ + _enc->mv_bits[0]+=mb_gmv_bits_0; + _enc->mv_bits[1]+=12; + }break; + case OC_MODE_INTER_MV_FOUR:{ + oc_mv lbmvs[4]; + oc_mv cbmvs[4]; + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + for(bi=0;bi<4;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(frags[fragi].coded){ + memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); + memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi])); + _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31] + +OC_MV_BITS[0][frag_mvs[fragi][1]+31]; + _enc->mv_bits[1]+=12; + } + /*Replace the block MVs for not-coded blocks with (0,0).*/ + else memset(lbmvs[bi],0,sizeof(lbmvs[bi])); + } + (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs); + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=mb_mode; + frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii]; + memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi])); + } + }break; + } + coded_mbis[ncoded_mbis++]=mbi; + oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode); + interbits+=modes[mb_mode].rate+modes[mb_mode].overhead; + } + else{ + *(uncoded_mbis-++nuncoded_mbis)=mbi; + mb_mode=OC_MODE_INTER_NOMV; + dx=dy=0; + } + /*Propagate final MB mode and MVs to the chroma blocks. + This has already been done for 4MV mode, since it requires individual + block motion vectors.*/ + if(mb_mode!=OC_MODE_INTER_MV_FOUR){ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=mb_mode; + /*If we switched from 4MV mode to INTER_MV mode, then the qii + values won't have been chosen with the right MV, but it's + probaby not worth re-estimating them.*/ + frags[fragi].qii=modes[mb_mode].qii[mapii]; + frag_mvs[fragi][0]=(signed char)dx; + frag_mvs[fragi][1]=(signed char)dy; + } + } + } + oc_fr_state_flush_sb(pipe.fr+0); + sb_flags[sbi].coded_fully=pipe.fr[0].sb_full; + sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial; + } + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); + /*Code chroma planes.*/ + for(pli=1;pli<3;pli++){ + oc_enc_sb_transform_quantize_chroma(_enc,&pipe, + pli,pipe.sbi0[pli],pipe.sbi_end[pli]); + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); + /*Finish adding flagging overhead costs to inter bit counts to determine if + we should have coded a key frame instead.*/ + if(_allow_keyframe){ + if(interbits>intrabits)return 1; + /*Technically the chroma plane counts are over-estimations, because they + don't account for continuing runs from the luma planes, but the + inaccuracy is small.*/ + for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<mv_bits[0],_enc->mv_bits[1])<chooser.scheme_bits[_enc->chooser.scheme_list[0]]<intrabits)return 1; + } + _enc->ncoded_mbis=ncoded_mbis; + /*Compact the coded fragment list.*/ + { + ptrdiff_t ncoded_fragis; + ncoded_fragis=_enc->state.ncoded_fragis[0]; + for(pli=1;pli<3;pli++){ + memmove(_enc->state.coded_fragis+ncoded_fragis, + _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset, + _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis)); + ncoded_fragis+=_enc->state.ncoded_fragis[pli]; + } + _enc->state.ntotal_coded_fragis=ncoded_fragis; + } + return 0; +} + +#if defined(OC_COLLECT_METRICS) +# include +# include + +/*TODO: It may be helpful (for block-level quantizers especially) to separate + out the contributions from AC and DC into separate tables.*/ + +# define OC_ZWEIGHT (0.25) + +static void oc_mode_metrics_add(oc_mode_metrics *_metrics, + double _w,int _satd,int _rate,double _rmse){ + double rate; + /*Accumulate statistics without the scaling; this lets us change the scale + factor yet still use old data.*/ + rate=ldexp(_rate,-OC_BIT_SCALE); + if(_metrics->fragw>0){ + double dsatd; + double drate; + double drmse; + double w; + dsatd=_satd-_metrics->satd/_metrics->fragw; + drate=rate-_metrics->rate/_metrics->fragw; + drmse=_rmse-_metrics->rmse/_metrics->fragw; + w=_metrics->fragw*_w/(_metrics->fragw+_w); + _metrics->satd2+=dsatd*dsatd*w; + _metrics->satdrate+=dsatd*drate*w; + _metrics->rate2+=drate*drate*w; + _metrics->satdrmse+=dsatd*drmse*w; + _metrics->rmse2+=drmse*drmse*w; + } + _metrics->fragw+=_w; + _metrics->satd+=_satd*_w; + _metrics->rate+=rate*_w; + _metrics->rmse+=_rmse*_w; +} + +static void oc_mode_metrics_merge(oc_mode_metrics *_dst, + const oc_mode_metrics *_src,int _n){ + int i; + /*Find a non-empty set of metrics.*/ + for(i=0;i<_n&&_src[i].fragw<=0;i++); + if(i>=_n){ + memset(_dst,0,sizeof(*_dst)); + return; + } + memcpy(_dst,_src+i,sizeof(*_dst)); + /*And iterate over the remaining non-empty sets of metrics.*/ + for(i++;i<_n;i++)if(_src[i].fragw>0){ + double wa; + double wb; + double dsatd; + double drate; + double drmse; + double w; + wa=_dst->fragw; + wb=_src[i].fragw; + dsatd=_src[i].satd/wb-_dst->satd/wa; + drate=_src[i].rate/wb-_dst->rate/wa; + drmse=_src[i].rmse/wb-_dst->rmse/wa; + w=wa*wb/(wa+wb); + _dst->fragw+=_src[i].fragw; + _dst->satd+=_src[i].satd; + _dst->rate+=_src[i].rate; + _dst->rmse+=_src[i].rmse; + _dst->satd2+=_src[i].satd2+dsatd*dsatd*w; + _dst->satdrate+=_src[i].satdrate+dsatd*drate*w; + _dst->rate2+=_src[i].rate2+drate*drate*w; + _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w; + _dst->rmse2+=_src[i].rmse2+drmse*drmse*w; + } +} + +/*Compile collected SATD/rate/RMSE metrics into a form that's immediately + useful for mode decision.*/ +static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){ + int pli; + int qti; + oc_restore_fpu(&_enc->state); + /*Convert raw collected data into cleaned up sample points.*/ + for(pli=0;pli<3;pli++){ + for(qti=0;qti<2;qti++){ + double fragw; + int bin0; + int bin1; + int bin; + fragw=0; + bin0=bin1=0; + for(bin=0;bin=OC_ZWEIGHT){ + fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw; + } + /*Merge statistics and fit lines.*/ + oc_mode_metrics_merge(&metrics, + OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0); + if(metrics.fragw>0&&metrics.satd2>0){ + double a; + double b; + double msatd; + double mrate; + double mrmse; + double rate; + double rmse; + msatd=metrics.satd/metrics.fragw; + mrate=metrics.rate/metrics.fragw; + mrmse=metrics.rmse/metrics.fragw; + /*Compute the points on these lines corresponding to the actual bin + value.*/ + b=metrics.satdrate/metrics.satd2; + a=mrate-b*msatd; + rate=ldexp(a+b*(bin<>1); + return -_extra_bits; +} + +/*Handles the pure zero run tokens.*/ +static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){ + return _extra_bits+1; +} + +/*Handles a normal coefficient value token.*/ +static ptrdiff_t oc_token_skip_val(void){ + return 1; +} + +/*Handles a category 1A zero run/coefficient value combo token.*/ +static ptrdiff_t oc_token_skip_run_cat1a(int _token){ + return _token-OC_DCT_RUN_CAT1A+2; +} + +/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/ +static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){ + int run_cati; + int ncoeffs_mask; + int ncoeffs_adjust; + run_cati=_token-OC_DCT_RUN_CAT1B; + ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati); + ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati); + return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust; +} + +/*A jump table for computing the number of coefficients or blocks to skip for + a given token value. + This reduces all the conditional branches, etc., needed to parse these token + values down to one indirect jump.*/ +static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={ + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob6, + oc_token_skip_zrl, + oc_token_skip_zrl, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + oc_token_skip_run, + oc_token_skip_run, + oc_token_skip_run, + oc_token_skip_run +}; + +/*Determines the number of blocks or coefficients to be skipped for a given + token value. + _token: The token value to skip. + _extra_bits: The extra bits attached to this token. + Return: A positive value indicates that number of coefficients are to be + skipped in the current block. + Otherwise, the negative of the return value indicates that number of + blocks are to be ended. + 0 will never be returned, so that at least one coefficient in one + block will always be decoded for every token.*/ +static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){ + return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits); +} + + + +void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){ + static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ + 0,16,16,16,16,16,32,32, + 32,32,32,32,32,32,32,48, + 48,48,48,48,48,48,48,48, + 48,48,48,48,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 + }; + const oc_fragment *frags; + const unsigned *frag_satd; + const unsigned *frag_ssd; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + double fragw; + int qti; + int qii; + int qi; + int pli; + int zzi; + int token; + int eb; + oc_restore_fpu(&_enc->state); + /*Load any existing mode metrics if we haven't already.*/ + if(!oc_has_mode_metrics){ + FILE *fmetrics; + memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS)); + fmetrics=fopen("modedec.stats","rb"); + if(fmetrics!=NULL){ + fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); + fclose(fmetrics); + } + for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); + oc_has_mode_metrics=1; + } + qti=_enc->state.frame_type; + frags=_enc->state.frags; + frag_satd=_enc->frag_satd; + frag_ssd=_enc->frag_ssd; + coded_fragis=_enc->state.coded_fragis; + ncoded_fragis=fragii=0; + /*Weight the fragments by the inverse frame size; this prevents HD content + from dominating the statistics.*/ + fragw=1.0/_enc->state.nfrags; + for(pli=0;pli<3;pli++){ + ptrdiff_t ti[64]; + int eob_token[64]; + int eob_run[64]; + /*Set up token indices and eob run counts. + We don't bother trying to figure out the real cost of the runs that span + coefficients; instead we use the costs that were available when R-D + token optimization was done.*/ + for(zzi=0;zzi<64;zzi++){ + ti[zzi]=_enc->dct_token_offs[pli][zzi]; + if(ti[zzi]>0){ + token=_enc->dct_tokens[pli][zzi][0]; + eb=_enc->extra_bits[pli][zzi][0]; + eob_token[zzi]=token; + eob_run[zzi]=-oc_dct_token_skip(token,eb); + } + else{ + eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; + eob_run[zzi]=0; + } + } + /*Scan the list of coded fragments for this plane.*/ + ncoded_fragis+=_enc->state.ncoded_fragis[pli]; + for(;fragii0){ + /*We've reached the end of the block.*/ + eob_run[zzi]--; + break; + } + huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1] + +OC_ZZI_HUFF_OFFSET[zzi]; + if(eob_token[zzi]huff_codes[huffi][eob_token[zzi]].nbits + +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]]; + eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; + } + token=_enc->dct_tokens[pli][zzi][ti[zzi]]; + eb=_enc->extra_bits[pli][zzi][ti[zzi]]; + ti[zzi]++; + skip=oc_dct_token_skip(token,eb); + if(skip<0){ + eob_token[zzi]=token; + eob_run[zzi]=-skip; + } + else{ + /*A regular DCT value token; accumulate the bits for it.*/ + frag_bits+=_enc->huff_codes[huffi][token].nbits + +OC_DCT_TOKEN_EXTRA_BITS[token]; + zzi+=skip; + } + } + mb_mode=frags[fragi].mb_mode; + qi=_enc->state.qis[frags[fragi].qii]; + satd=frag_satd[fragi]<<(pli+1&2); + bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1); + oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin, + fragw,satd,frag_bits<state.nqis;qii++){ + oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]); + } +} + +void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){ + FILE *fmetrics; + int qi; + /*Generate sample points for complete list of QI values.*/ + for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); + fmetrics=fopen("modedec.stats","wb"); + if(fmetrics!=NULL){ + fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); + fclose(fmetrics); + } + fprintf(stdout, + "/*File generated by libtheora with OC_COLLECT_METRICS" + " defined at compile time.*/\n" + "#if !defined(_modedec_H)\n" + "# define _modedec_H (1)\n" + "\n" + "\n" + "\n" + "# if defined(OC_COLLECT_METRICS)\n" + "typedef struct oc_mode_metrics oc_mode_metrics;\n" + "# endif\n" + "typedef struct oc_mode_rd oc_mode_rd;\n" + "\n" + "\n" + "\n" + "/*The number of extra bits of precision at which to store rate" + " metrics.*/\n" + "# define OC_BIT_SCALE (%i)\n" + "/*The number of extra bits of precision at which to store RMSE metrics.\n" + " This must be at least half OC_BIT_SCALE (rounded up).*/\n" + "# define OC_RMSE_SCALE (%i)\n" + "/*The number of bins to partition statistics into.*/\n" + "# define OC_SAD_BINS (%i)\n" + "/*The number of bits of precision to drop" + " from SAD scores to assign them to a\n" + " bin.*/\n" + "# define OC_SAD_SHIFT (%i)\n" + "\n" + "\n" + "\n" + "# if defined(OC_COLLECT_METRICS)\n" + "struct oc_mode_metrics{\n" + " double fragw;\n" + " double satd;\n" + " double rate;\n" + " double rmse;\n" + " double satd2;\n" + " double satdrate;\n" + " double rate2;\n" + " double satdrmse;\n" + " double rmse2;\n" + "};\n" + "\n" + "\n" + "int oc_has_mode_metrics;\n" + "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n" + "# endif\n" + "\n" + "\n" + "\n" + "struct oc_mode_rd{\n" + " ogg_int16_t rate;\n" + " ogg_int16_t rmse;\n" + "};\n" + "\n" + "\n" + "# if !defined(OC_COLLECT_METRICS)\n" + "static const\n" + "# endif\n" + "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n", + OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT); + for(qi=0;qi<64;qi++){ + int pli; + fprintf(stdout," {\n"); + for(pli=0;pli<3;pli++){ + int qti; + fprintf(stdout," {\n"); + for(qti=0;qti<2;qti++){ + int bin; + static const char *pl_names[3]={"Y'","Cb","Cr"}; + static const char *qti_names[2]={"INTRA","INTER"}; + fprintf(stdout," /*%s qi=%i %s*/\n", + pl_names[pli],qi,qti_names[qti]); + fprintf(stdout," {\n"); + fprintf(stdout," "); + for(bin=0;bininternal_decode!=NULL){ - (*((oc_state_dispatch_vtbl *)_th->internal_decode)->clear)(_th); + (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th); } if(_th->internal_encode!=NULL){ - (*((oc_state_dispatch_vtbl *)_th->internal_encode)->clear)(_th); + (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th); } if(_th->i!=NULL)theora_info_clear(_th->i); memset(_th,0,sizeof(*_th)); @@ -59,11 +59,11 @@ void theora_clear(theora_state *_th){ int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){ /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->control)(_th, + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th, _req,_buf,_buf_sz); } else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->control)(_th, + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th, _req,_buf,_buf_sz); } else return TH_EINVAL; @@ -72,11 +72,11 @@ int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){ ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){ /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_frame)( + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)( _th,_gp); } else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_frame)( + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)( _th,_gp); } else return -1; @@ -85,11 +85,11 @@ ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){ double theora_granule_time(theora_state *_th, ogg_int64_t _gp){ /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_time)( + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)( _th,_gp); } else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_time)( + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)( _th,_gp); } else return -1; diff --git a/Engine/lib/libtheora/lib/dec/apiwrapper.h b/Engine/lib/libtheora/lib/apiwrapper.h similarity index 92% rename from Engine/lib/libtheora/lib/dec/apiwrapper.h rename to Engine/lib/libtheora/lib/apiwrapper.h index 211021fc0..93454d7bd 100644 --- a/Engine/lib/libtheora/lib/dec/apiwrapper.h +++ b/Engine/lib/libtheora/lib/apiwrapper.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** @@ -20,9 +20,8 @@ # include # include # include "theora/theoradec.h" -/*# include "theora/theoraenc.h"*/ -typedef struct th_enc_ctx th_enc_ctx; -# include "../internal.h" +# include "theora/theoraenc.h" +# include "internal.h" typedef struct th_api_wrapper th_api_wrapper; typedef struct th_api_info th_api_info; diff --git a/Engine/lib/libtheora/lib/bitpack.c b/Engine/lib/libtheora/lib/bitpack.c new file mode 100644 index 000000000..8195003ba --- /dev/null +++ b/Engine/lib/libtheora/lib/bitpack.c @@ -0,0 +1,111 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "bitpack.h" + +/*We're 'MSb' endian; if we write a word but read individual bits, + then we'll read the MSb first.*/ + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){ + memset(_b,0,sizeof(*_b)); + _b->ptr=_buf; + _b->stop=_buf+_bytes; +} + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + while(available<=OC_PB_WINDOW_SIZE-8&&ptrptr=ptr; + if(_bits>available){ + if(ptr>=stop){ + _b->eof=1; + available=OC_LOTS_OF_BITS; + } + else window|=*ptr>>(available&7); + } + _b->bits=available; + return window; +} + +int oc_pack_look1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + if(available<1)_b->window=window=oc_pack_refill(_b,1); + return window>>OC_PB_WINDOW_SIZE-1; +} + +void oc_pack_adv1(oc_pack_buf *_b){ + _b->window<<=1; + _b->bits--; +} + +/*Here we assume that 0<=_bits&&_bits<=32.*/ +long oc_pack_read(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(available<_bits){ + window=oc_pack_refill(_b,_bits); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-_bits; + available-=_bits; + window<<=1; + window<<=_bits-1; + _b->bits=available; + _b->window=window; + return result; +} + +int oc_pack_read1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + int result; + window=_b->window; + available=_b->bits; + if(available<1){ + window=oc_pack_refill(_b,1); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-1; + available--; + window<<=1; + _b->bits=available; + _b->window=window; + return result; +} + +long oc_pack_bytes_left(oc_pack_buf *_b){ + if(_b->eof)return -1; + return _b->stop-_b->ptr+(_b->bits>>3); +} diff --git a/Engine/lib/libtheora/lib/dec/bitpack.h b/Engine/lib/libtheora/lib/bitpack.h similarity index 51% rename from Engine/lib/libtheora/lib/dec/bitpack.h rename to Engine/lib/libtheora/lib/bitpack.h index 1bff3fa50..a020a292f 100644 --- a/Engine/lib/libtheora/lib/dec/bitpack.h +++ b/Engine/lib/libtheora/lib/bitpack.h @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** @@ -16,23 +16,44 @@ ********************************************************************/ #if !defined(_bitpack_H) # define _bitpack_H (1) -# include +# include -void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes); -int theorapackB_look1(oggpack_buffer *_b,long *_ret); -void theorapackB_adv1(oggpack_buffer *_b); + + +typedef unsigned long oc_pb_window; +typedef struct oc_pack_buf oc_pack_buf; + + + +# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT) +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +# define OC_LOTS_OF_BITS (0x40000000) + + + +struct oc_pack_buf{ + oc_pb_window window; + const unsigned char *ptr; + const unsigned char *stop; + int bits; + int eof; +}; + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes); +int oc_pack_look1(oc_pack_buf *_b); +void oc_pack_adv1(oc_pack_buf *_b); /*Here we assume 0<=_bits&&_bits<=32.*/ -int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret); -int theorapackB_read1(oggpack_buffer *_b,long *_ret); -long theorapackB_bytes(oggpack_buffer *_b); -long theorapackB_bits(oggpack_buffer *_b); -unsigned char *theorapackB_get_buffer(oggpack_buffer *_b); +long oc_pack_read(oc_pack_buf *_b,int _bits); +int oc_pack_read1(oc_pack_buf *_b); +/* returns -1 for read beyond EOF, or the number of whole bytes available */ +long oc_pack_bytes_left(oc_pack_buf *_b); /*These two functions are implemented locally in huffdec.c*/ /*Read in bits without advancing the bitptr. Here we assume 0<=_bits&&_bits<=32.*/ -/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/ -/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/ - +/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/ +/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/ #endif diff --git a/Engine/lib/libtheora/lib/cpu.c b/Engine/lib/libtheora/lib/cpu.c index 8da50d070..a863aad7f 100644 --- a/Engine/lib/libtheora/lib/cpu.c +++ b/Engine/lib/libtheora/lib/cpu.c @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** @@ -14,13 +14,13 @@ Originally written by Rudolf Marek. function: - last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $ + last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include "cpu.h" -#if !defined(USE_ASM) +#if !defined(OC_X86_ASM) static ogg_uint32_t oc_cpu_flags_get(void){ return 0; } @@ -166,7 +166,7 @@ static ogg_uint32_t oc_cpu_flags_get(void){ /* D M A c i t n e h t u A*/ else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| /* C S N y b e d o e G*/ - ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){ + ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ /*AMD, Geode:*/ cpuid(0x80000000,eax,ebx,ecx,edx); if(eax<0x80000001)flags=0; @@ -192,7 +192,6 @@ static ogg_uint32_t oc_cpu_flags_get(void){ The C3-2 (Nehemiah) cores appear to, as well.*/ cpuid(1,eax,ebx,ecx,edx); flags=oc_parse_intel_flags(edx,ecx); - cpuid(0x80000000,eax,ebx,ecx,edx); if(eax>=0x80000001){ /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. We need to check this even if the Intel test succeeds to pick up 3DNow! diff --git a/Engine/lib/libtheora/lib/cpu.h b/Engine/lib/libtheora/lib/cpu.h index efe5e9f52..a43c957a3 100644 --- a/Engine/lib/libtheora/lib/cpu.h +++ b/Engine/lib/libtheora/lib/cpu.h @@ -5,12 +5,12 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $ + last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ diff --git a/Engine/lib/libtheora/lib/dec/dct.h b/Engine/lib/libtheora/lib/dct.h similarity index 90% rename from Engine/lib/libtheora/lib/dec/dct.h rename to Engine/lib/libtheora/lib/dct.h index 09043dc51..24ba6f111 100644 --- a/Engine/lib/libtheora/lib/dec/dct.h +++ b/Engine/lib/libtheora/lib/dct.h @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ diff --git a/Engine/lib/libtheora/lib/dec/bitpack.c b/Engine/lib/libtheora/lib/dec/bitpack.c deleted file mode 100644 index 3836150c2..000000000 --- a/Engine/lib/libtheora/lib/dec/bitpack.c +++ /dev/null @@ -1,121 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: packing variable sized words into an octet stream - last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -/*We're 'MSb' endian; if we write a word but read individual bits, - then we'll read the MSb first.*/ - -#include -#include -#include "bitpack.h" - -void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){ - memset(_b,0,sizeof(*_b)); - _b->buffer=_b->ptr=_buf; - _b->storage=_bytes; -} - -int theorapackB_look1(oggpack_buffer *_b,long *_ret){ - if(_b->endbyte>=_b->storage){ - *_ret=0L; - return -1; - } - *_ret=(_b->ptr[0]>>7-_b->endbit)&1; - return 0; -} - -void theorapackB_adv1(oggpack_buffer *_b){ - if(++(_b->endbit)>7){ - _b->endbit=0; - _b->ptr++; - _b->endbyte++; - } -} - -/*Here we assume that 0<=_bits&&_bits<=32.*/ -int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){ - long ret; - long m; - long d; - int fail; - m=32-_bits; - _bits+=_b->endbit; - d=_b->storage-_b->endbyte; - if(d<=4){ - /*Not the main path.*/ - if(d*8<_bits){ - *_ret=0L; - fail=-1; - goto overflow; - } - /*Special case to avoid reading _b->ptr[0], which might be past the end of - the buffer; also skips some useless accounting.*/ - else if(!_bits){ - *_ret=0L; - return 0; - } - } - ret=_b->ptr[0]<<24+_b->endbit; - if(_bits>8){ - ret|=_b->ptr[1]<<16+_b->endbit; - if(_bits>16){ - ret|=_b->ptr[2]<<8+_b->endbit; - if(_bits>24){ - ret|=_b->ptr[3]<<_b->endbit; - if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit; - } - } - } - *_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1); - fail=0; -overflow: - _b->ptr+=_bits>>3; - _b->endbyte+=_bits>>3; - _b->endbit=_bits&7; - return fail; -} - -int theorapackB_read1(oggpack_buffer *_b,long *_ret){ - int fail; - if(_b->endbyte>=_b->storage){ - /*Not the main path.*/ - *_ret=0L; - fail=-1; - } - else{ - *_ret=(_b->ptr[0]>>7-_b->endbit)&1; - fail=0; - } - _b->endbit++; - if(_b->endbit>7){ - _b->endbit=0; - _b->ptr++; - _b->endbyte++; - } - return fail; -} - -long theorapackB_bytes(oggpack_buffer *_b){ - return _b->endbyte+(_b->endbit+7>>3); -} - -long theorapackB_bits(oggpack_buffer *_b){ - return _b->endbyte*8+_b->endbit; -} - -unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){ - return _b->buffer; -} diff --git a/Engine/lib/libtheora/lib/dec/decode.c b/Engine/lib/libtheora/lib/dec/decode.c deleted file mode 100644 index 5804cf709..000000000 --- a/Engine/lib/libtheora/lib/dec/decode.c +++ /dev/null @@ -1,2057 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $ - - ********************************************************************/ - -#include -#include -#include -#include "decint.h" -#if defined(OC_DUMP_IMAGES) -# include -# include "png.h" -#endif - -/*No post-processing.*/ -#define OC_PP_LEVEL_DISABLED (0) -/*Keep track of DC qi for each block only.*/ -#define OC_PP_LEVEL_TRACKDCQI (1) -/*Deblock the luma plane.*/ -#define OC_PP_LEVEL_DEBLOCKY (2) -/*Dering the luma plane.*/ -#define OC_PP_LEVEL_DERINGY (3) -/*Stronger luma plane deringing.*/ -#define OC_PP_LEVEL_SDERINGY (4) -/*Deblock the chroma planes.*/ -#define OC_PP_LEVEL_DEBLOCKC (5) -/*Dering the chroma planes.*/ -#define OC_PP_LEVEL_DERINGC (6) -/*Stronger chroma plane deringing.*/ -#define OC_PP_LEVEL_SDERINGC (7) -/*Maximum valid post-processing level.*/ -#define OC_PP_LEVEL_MAX (7) - - - -/*The mode alphabets for the various mode coding schemes. - Scheme 0 uses a custom alphabet, which is not stored in this table.*/ -static const int OC_MODE_ALPHABETS[7][OC_NMODES]={ - /*Last MV dominates */ - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, - OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, - OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, - OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR - }, - /*No MV dominates.*/ - { - OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, - OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, - OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - /*Default ordering.*/ - { - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, - OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - } -}; - - -static int oc_sb_run_unpack(oggpack_buffer *_opb){ - long bits; - int ret; - /*Coding scheme: - Codeword Run Length - 0 1 - 10x 2-3 - 110x 4-5 - 1110xx 6-9 - 11110xxx 10-17 - 111110xxxx 18-33 - 111111xxxxxxxxxxxx 34-4129*/ - theorapackB_read1(_opb,&bits); - if(bits==0)return 1; - theorapackB_read(_opb,2,&bits); - if((bits&2)==0)return 2+(int)bits; - else if((bits&1)==0){ - theorapackB_read1(_opb,&bits); - return 4+(int)bits; - } - theorapackB_read(_opb,3,&bits); - if((bits&4)==0)return 6+(int)bits; - else if((bits&2)==0){ - ret=10+((bits&1)<<2); - theorapackB_read(_opb,2,&bits); - return ret+(int)bits; - } - else if((bits&1)==0){ - theorapackB_read(_opb,4,&bits); - return 18+(int)bits; - } - theorapackB_read(_opb,12,&bits); - return 34+(int)bits; -} - -static int oc_block_run_unpack(oggpack_buffer *_opb){ - long bits; - long bits2; - /*Coding scheme: - Codeword Run Length - 0x 1-2 - 10x 3-4 - 110x 5-6 - 1110xx 7-10 - 11110xx 11-14 - 11111xxxx 15-30*/ - theorapackB_read(_opb,2,&bits); - if((bits&2)==0)return 1+(int)bits; - else if((bits&1)==0){ - theorapackB_read1(_opb,&bits); - return 3+(int)bits; - } - theorapackB_read(_opb,2,&bits); - if((bits&2)==0)return 5+(int)bits; - else if((bits&1)==0){ - theorapackB_read(_opb,2,&bits); - return 7+(int)bits; - } - theorapackB_read(_opb,3,&bits); - if((bits&4)==0)return 11+bits; - theorapackB_read(_opb,2,&bits2); - return 15+((bits&3)<<2)+bits2; -} - - - -static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info, - const th_setup_info *_setup){ - int qti; - int pli; - int qi; - int ret; - ret=oc_state_init(&_dec->state,_info); - if(ret<0)return ret; - oc_huff_trees_copy(_dec->huff_tables, - (const oc_huff_node *const *)_setup->huff_tables); - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - _dec->state.dequant_tables[qti][pli]= - _dec->state.dequant_table_data[qti][pli]; - } - oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, - &_setup->qinfo); - for(qi=0;qi<64;qi++){ - int qsum; - qsum=0; - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - qsum+=_dec->state.dequant_tables[qti][pli][qi][18]+ - _dec->state.dequant_tables[qti][pli][qi][19]+ - _dec->state.dequant_tables[qti][pli][qi][26]+ - _dec->state.dequant_tables[qti][pli][qi][27]<<(pli==0); - } - _dec->pp_sharp_mod[qi]=-(qsum>>11); - } - _dec->dct_tokens=(unsigned char **)oc_calloc_2d(64, - _dec->state.nfrags,sizeof(_dec->dct_tokens[0][0])); - _dec->extra_bits=(ogg_uint16_t **)oc_calloc_2d(64, - _dec->state.nfrags,sizeof(_dec->extra_bits[0][0])); - memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, - sizeof(_dec->state.loop_filter_limits)); - _dec->pp_level=OC_PP_LEVEL_DISABLED; - _dec->dc_qis=NULL; - _dec->variances=NULL; - _dec->pp_frame_data=NULL; - _dec->stripe_cb.ctx=NULL; - _dec->stripe_cb.stripe_decoded=NULL; - return 0; -} - -static void oc_dec_clear(oc_dec_ctx *_dec){ - _ogg_free(_dec->pp_frame_data); - _ogg_free(_dec->variances); - _ogg_free(_dec->dc_qis); - oc_free_2d(_dec->extra_bits); - oc_free_2d(_dec->dct_tokens); - oc_huff_trees_clear(_dec->huff_tables); - oc_state_clear(&_dec->state); -} - - -static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ - long val; - /*Check to make sure this is a data packet.*/ - theorapackB_read1(&_dec->opb,&val); - if(val!=0)return TH_EBADPACKET; - /*Read in the frame type (I or P).*/ - theorapackB_read1(&_dec->opb,&val); - _dec->state.frame_type=(int)val; - /*Read in the current qi.*/ - theorapackB_read(&_dec->opb,6,&val); - _dec->state.qis[0]=(int)val; - theorapackB_read1(&_dec->opb,&val); - if(!val)_dec->state.nqis=1; - else{ - theorapackB_read(&_dec->opb,6,&val); - _dec->state.qis[1]=(int)val; - theorapackB_read1(&_dec->opb,&val); - if(!val)_dec->state.nqis=2; - else{ - theorapackB_read(&_dec->opb,6,&val); - _dec->state.qis[2]=(int)val; - _dec->state.nqis=3; - } - } - if(_dec->state.frame_type==OC_INTRA_FRAME){ - /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. - Most of the other unused bits in the VP3 headers were eliminated. - I don't know why these remain.*/ - /* I wanted to eliminate wasted bits, but not all config wiggle room --Monty */ - theorapackB_read(&_dec->opb,3,&val); - if(val!=0)return TH_EIMPL; - } - return 0; -} - -/*Mark all fragments as coded and in OC_MODE_INTRA. - This also builds up the coded fragment list (in coded order), and clears the - uncoded fragment list. - It does not update the coded macro block list, as that is not used when - decoding INTRA frames.*/ -static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ - oc_sb *sb; - oc_sb *sb_end; - int pli; - int ncoded_fragis; - int prev_ncoded_fragis; - prev_ncoded_fragis=ncoded_fragis=0; - sb=sb_end=_dec->state.sbs; - for(pli=0;pli<3;pli++){ - const oc_fragment_plane *fplane; - fplane=_dec->state.fplanes+pli; - sb_end+=fplane->nsbs; - for(;sbquad_valid&1<map[quadi][bi]; - if(fragi>=0){ - oc_fragment *frag; - frag=_dec->state.frags+fragi; - frag->coded=1; - frag->mbmode=OC_MODE_INTRA; - _dec->state.coded_fragis[ncoded_fragis++]=fragi; - } - } - } - } - _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; - prev_ncoded_fragis=ncoded_fragis; - _dec->state.nuncoded_fragis[pli]=0; - } -} - -/*Decodes the bit flags for whether or not each super block is partially coded - or not. - Return: The number of partially coded super blocks.*/ -static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ - oc_sb *sb; - oc_sb *sb_end; - long val; - int flag; - int npartial; - int run_count; - theorapackB_read1(&_dec->opb,&val); - flag=(int)val; - sb=_dec->state.sbs; - sb_end=sb+_dec->state.nsbs; - run_count=npartial=0; - while(sbopb); - full_run=run_count>=4129; - do{ - sb->coded_partially=flag; - sb->coded_fully=0; - npartial+=flag; - sb++; - } - while(--run_count>0&&sbopb,&val); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ - return npartial; -} - -/*Decodes the bit flags for whether or not each non-partially-coded super - block is fully coded or not. - This function should only be called if there is at least one - non-partially-coded super block. - Return: The number of partially coded super blocks.*/ -static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ - oc_sb *sb; - oc_sb *sb_end; - long val; - int flag; - int run_count; - sb=_dec->state.sbs; - sb_end=sb+_dec->state.nsbs; - /*Skip partially coded super blocks.*/ - for(;sb->coded_partially;sb++); - theorapackB_read1(&_dec->opb,&val); - flag=(int)val; - while(sbopb); - full_run=run_count>=4129; - for(;sbcoded_partially)continue; - if(run_count--<=0)break; - sb->coded_fully=flag; - } - if(full_run&&sbopb,&val); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ -} - -static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){ - oc_sb *sb; - oc_sb *sb_end; - long val; - int npartial; - int pli; - int flag; - int run_count; - int ncoded_fragis; - int prev_ncoded_fragis; - int nuncoded_fragis; - int prev_nuncoded_fragis; - npartial=oc_dec_partial_sb_flags_unpack(_dec); - if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec); - if(npartial>0){ - theorapackB_read1(&_dec->opb,&val); - flag=!(int)val; - } - else flag=0; - run_count=0; - prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0; - sb=sb_end=_dec->state.sbs; - for(pli=0;pli<3;pli++){ - const oc_fragment_plane *fplane; - fplane=_dec->state.fplanes+pli; - sb_end+=fplane->nsbs; - for(;sbquad_valid&1<map[quadi][bi]; - if(fragi>=0){ - oc_fragment *frag; - frag=_dec->state.frags+fragi; - if(sb->coded_fully)frag->coded=1; - else if(!sb->coded_partially)frag->coded=0; - else{ - if(run_count<=0){ - run_count=oc_block_run_unpack(&_dec->opb); - flag=!flag; - } - run_count--; - frag->coded=flag; - } - if(frag->coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi; - else *(_dec->state.uncoded_fragis-++nuncoded_fragis)=fragi; - } - } - } - } - _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; - prev_ncoded_fragis=ncoded_fragis; - _dec->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis; - prev_nuncoded_fragis=nuncoded_fragis; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ -} - - - -typedef int (*oc_mode_unpack_func)(oggpack_buffer *_opb); - -static int oc_vlc_mode_unpack(oggpack_buffer *_opb){ - long val; - int i; - for(i=0;i<7;i++){ - theorapackB_read1(_opb,&val); - if(!val)break; - } - return i; -} - -static int oc_clc_mode_unpack(oggpack_buffer *_opb){ - long val; - theorapackB_read(_opb,3,&val); - return (int)val; -} - -/*Unpacks the list of macro block modes for INTER frames.*/ -static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ - oc_mode_unpack_func mode_unpack; - oc_mb *mb; - oc_mb *mb_end; - const int *alphabet; - long val; - int scheme0_alphabet[8]; - int mode_scheme; - theorapackB_read(&_dec->opb,3,&val); - mode_scheme=(int)val; - if(mode_scheme==0){ - int mi; - /*Just in case, initialize the modes to something. - If the bitstream doesn't contain each index exactly once, it's likely - corrupt and the rest of the packet is garbage anyway, but this way we - won't crash, and we'll decode SOMETHING.*/ - /*LOOP VECTORIZES.*/ - for(mi=0;miopb,3,&val); - scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; - } - alphabet=scheme0_alphabet; - } - else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; - if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; - else mode_unpack=oc_vlc_mode_unpack; - mb=_dec->state.mbs; - mb_end=mb+_dec->state.nmbs; - for(;mbmode!=OC_MODE_INVALID){ - int bi; - for(bi=0;bi<4;bi++){ - int fragi; - fragi=mb->map[0][bi]; - if(fragi>=0&&_dec->state.frags[fragi].coded)break; - } - if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)]; - else mb->mode=OC_MODE_INTER_NOMV; - } - } -} - - - -typedef int (*oc_mv_comp_unpack_func)(oggpack_buffer *_opb); - -static int oc_vlc_mv_comp_unpack(oggpack_buffer *_opb){ - long bits; - int mvsigned[2]; - theorapackB_read(_opb,3,&bits); - switch(bits){ - case 0:return 0; - case 1:return 1; - case 2:return -1; - case 3: - case 4:{ - mvsigned[0]=(int)(bits-1); - theorapackB_read1(_opb,&bits); - }break; - /*case 5: - case 6: - case 7:*/ - default:{ - mvsigned[0]=1<>1); - bits&=1; - }break; - } - mvsigned[1]=-mvsigned[0]; - return mvsigned[bits]; -} - -static int oc_clc_mv_comp_unpack(oggpack_buffer *_opb){ - long bits; - int mvsigned[2]; - theorapackB_read(_opb,6,&bits); - mvsigned[0]=bits>>1; - mvsigned[1]=-mvsigned[0]; - return mvsigned[bits&1]; -} - -/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro - block modes and motion vectors to the individual fragments.*/ -static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ - oc_set_chroma_mvs_func set_chroma_mvs; - oc_mv_comp_unpack_func mv_comp_unpack; - oc_mb *mb; - oc_mb *mb_end; - const int *map_idxs; - long val; - int map_nidxs; - oc_mv last_mv[2]; - oc_mv cbmvs[4]; - set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; - theorapackB_read1(&_dec->opb,&val); - mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; - map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; - map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; - memset(last_mv,0,sizeof(last_mv)); - mb=_dec->state.mbs; - mb_end=mb+_dec->state.nmbs; - for(;mbmode!=OC_MODE_INVALID){ - oc_fragment *frag; - oc_mv mbmv; - int coded[13]; - int codedi; - int ncoded; - int mapi; - int mapii; - int fragi; - int mb_mode; - /*Search for at least one coded fragment.*/ - ncoded=mapii=0; - do{ - mapi=map_idxs[mapii]; - fragi=mb->map[mapi>>2][mapi&3]; - if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi; - } - while(++mapiimode; - switch(mb_mode){ - case OC_MODE_INTER_MV_FOUR:{ - oc_mv lbmvs[4]; - int bi; - /*Mark the tail of the list, so we don't accidentally go past it.*/ - coded[ncoded]=-1; - for(bi=codedi=0;bi<4;bi++){ - if(coded[codedi]==bi){ - codedi++; - frag=_dec->state.frags+mb->map[0][bi]; - frag->mbmode=mb_mode; - frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - } - else lbmvs[bi][0]=lbmvs[bi][1]=0; - } - if(codedi>0){ - last_mv[1][0]=last_mv[0][0]; - last_mv[1][1]=last_mv[0][1]; - last_mv[0][0]=lbmvs[coded[codedi-1]][0]; - last_mv[0][1]=lbmvs[coded[codedi-1]][1]; - } - if(codedistate.frags+mb->map[mapi>>2][bi]; - frag->mbmode=mb_mode; - frag->mv[0]=cbmvs[bi][0]; - frag->mv[1]=cbmvs[bi][1]; - } - } - }break; - case OC_MODE_INTER_MV:{ - last_mv[1][0]=last_mv[0][0]; - last_mv[1][1]=last_mv[0][1]; - mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - }break; - case OC_MODE_INTER_MV_LAST:{ - mbmv[0]=last_mv[0][0]; - mbmv[1]=last_mv[0][1]; - }break; - case OC_MODE_INTER_MV_LAST2:{ - mbmv[0]=last_mv[1][0]; - mbmv[1]=last_mv[1][1]; - last_mv[1][0]=last_mv[0][0]; - last_mv[1][1]=last_mv[0][1]; - last_mv[0][0]=mbmv[0]; - last_mv[0][1]=mbmv[1]; - }break; - case OC_MODE_GOLDEN_MV:{ - mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - }break; - default:mbmv[0]=mbmv[1]=0;break; - } - /*4MV mode fills in the fragments itself. - For all other modes we can use this common code.*/ - if(mb_mode!=OC_MODE_INTER_MV_FOUR){ - for(codedi=0;codedimap[mapi>>2][mapi&3]; - frag=_dec->state.frags+fragi; - frag->mbmode=mb_mode; - frag->mv[0]=mbmv[0]; - frag->mv[1]=mbmv[1]; - } - } - } -} - -static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ - oc_fragment *frag; - int *coded_fragi; - int *coded_fragi_end; - int ncoded_fragis; - ncoded_fragis=_dec->state.ncoded_fragis[0]+ - _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; - if(ncoded_fragis<=0)return; - coded_fragi=_dec->state.coded_fragis; - coded_fragi_end=coded_fragi+ncoded_fragis; - if(_dec->state.nqis==1){ - /*If this frame has only a single qi value, then just set it in all coded - fragments.*/ - while(coded_fragistate.frags[*coded_fragi++].qi=_dec->state.qis[0]; - } - } - else{ - long val; - int flag; - int nqi1; - int run_count; - /*Otherwise, we decode a qi index for each fragment, using two passes of - the same binary RLE scheme used for super-block coded bits. - The first pass marks each fragment as having a qii of 0 or greater than - 0, and the second pass (if necessary), distinguishes between a qii of - 1 and 2. - At first we just store the qii in the fragment. - After all the qii's are decoded, we make a final pass to replace them - with the corresponding qi's for this frame.*/ - theorapackB_read1(&_dec->opb,&val); - flag=(int)val; - run_count=nqi1=0; - while(coded_fragiopb); - full_run=run_count>=4129; - do{ - _dec->state.frags[*coded_fragi++].qi=flag; - nqi1+=flag; - } - while(--run_count>0&&coded_fragiopb,&val); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ - /*If we have 3 different qi's for this frame, and there was at least one - fragment with a non-zero qi, make the second pass.*/ - if(_dec->state.nqis==3&&nqi1>0){ - /*Skip qii==0 fragments.*/ - for(coded_fragi=_dec->state.coded_fragis; - _dec->state.frags[*coded_fragi].qi==0;coded_fragi++); - theorapackB_read1(&_dec->opb,&val); - flag=(int)val; - while(coded_fragiopb); - full_run=run_count>=4129; - for(;coded_fragistate.frags+*coded_fragi; - if(frag->qi==0)continue; - if(run_count--<=0)break; - frag->qi+=flag; - } - if(full_run&&coded_fragiopb,&val); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ - } - /*Finally, translate qii's to qi's.*/ - for(coded_fragi=_dec->state.coded_fragis;coded_fragistate.frags+*coded_fragi; - frag->qi=_dec->state.qis[frag->qi]; - } - } -} - - - -/*Returns the decoded value of the given token. - It CANNOT be called for any of the EOB tokens. - _token: The token value to skip. - _extra_bits: The extra bits attached to this token. - Return: The decoded coefficient value.*/ -typedef int (*oc_token_dec1val_func)(int _token,int _extra_bits); - -/*Handles zero run tokens.*/ -static int oc_token_dec1val_zrl(void){ - return 0; -} - -/*Handles 1, -1, 2 and -2 tokens.*/ -static int oc_token_dec1val_const(int _token){ - static const int CONST_VALS[4]={1,-1,2,-2}; - return CONST_VALS[_token-OC_NDCT_ZRL_TOKEN_MAX]; -} - -/*Handles DCT value tokens category 2.*/ -static int oc_token_dec1val_cat2(int _token,int _extra_bits){ - int valsigned[2]; - valsigned[0]=_token-OC_DCT_VAL_CAT2+3; - valsigned[1]=-valsigned[0]; - return valsigned[_extra_bits]; -} - -/*Handles DCT value tokens categories 3 through 8.*/ -static int oc_token_dec1val_cati(int _token,int _extra_bits){ - static const int VAL_CAT_OFFS[6]={ - OC_NDCT_VAL_CAT2_SIZE+3, - OC_NDCT_VAL_CAT2_SIZE+5, - OC_NDCT_VAL_CAT2_SIZE+9, - OC_NDCT_VAL_CAT2_SIZE+17, - OC_NDCT_VAL_CAT2_SIZE+33, - OC_NDCT_VAL_CAT2_SIZE+65 - }; - static const int VAL_CAT_MASKS[6]={ - 0x001,0x003,0x007,0x00F,0x01F,0x1FF - }; - static const int VAL_CAT_SHIFTS[6]={1,2,3,4,5,9}; - int valsigned[2]; - int cati; - cati=_token-OC_NDCT_VAL_CAT2_MAX; - valsigned[0]=VAL_CAT_OFFS[cati]+(_extra_bits&VAL_CAT_MASKS[cati]); - valsigned[1]=-valsigned[0]; - return valsigned[_extra_bits>>VAL_CAT_SHIFTS[cati]&1]; -} - -/*A jump table for compute the first coefficient value the given token value - represents.*/ -static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS- - OC_NDCT_EOB_TOKEN_MAX]={ - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_const, - (oc_token_dec1val_func)oc_token_dec1val_const, - (oc_token_dec1val_func)oc_token_dec1val_const, - (oc_token_dec1val_func)oc_token_dec1val_const, - oc_token_dec1val_cat2, - oc_token_dec1val_cat2, - oc_token_dec1val_cat2, - oc_token_dec1val_cat2, - oc_token_dec1val_cati, - oc_token_dec1val_cati, - oc_token_dec1val_cati, - oc_token_dec1val_cati, - oc_token_dec1val_cati, - oc_token_dec1val_cati, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl, - (oc_token_dec1val_func)oc_token_dec1val_zrl -}; - -/*Returns the decoded value of the given token. - It CANNOT be called for any of the EOB tokens. - _token: The token value to skip. - _extra_bits: The extra bits attached to this token. - Return: The decoded coefficient value.*/ -static int oc_dct_token_dec1val(int _token,int _extra_bits){ - return (*OC_TOKEN_DEC1VAL_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token, - _extra_bits); -} - -/*Unpacks the DC coefficient tokens. - Unlike when unpacking the AC coefficient tokens, we actually need to decode - the DC coefficient values now so that we can do DC prediction. - _huff_idx: The index of the Huffman table to use for each color plane. - _ntoks_left: The number of tokens left to be decoded in each color plane for - each coefficient. - This is updated as EOB tokens and zero run tokens are decoded. - Return: The length of any outstanding EOB run.*/ -static int oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3], - int _ntoks_left[3][64]){ - long val; - int *coded_fragi; - int *coded_fragi_end; - int run_counts[64]; - int cfi; - int eobi; - int eobs; - int ti; - int ebi; - int pli; - int rli; - eobs=0; - ti=ebi=0; - coded_fragi_end=coded_fragi=_dec->state.coded_fragis; - for(pli=0;pli<3;pli++){ - coded_fragi_end+=_dec->state.ncoded_fragis[pli]; - memset(run_counts,0,sizeof(run_counts)); - _dec->eob_runs[pli][0]=eobs; - /*Continue any previous EOB run, if there was one.*/ - for(eobi=eobs;eobi-->0&&coded_fragistate.frags[*coded_fragi++].dc=0; - } - cfi=0; - while(eobs<_ntoks_left[pli][0]-cfi){ - int token; - int neb; - int eb; - int skip; - cfi+=eobs; - run_counts[63]+=eobs; - token=oc_huff_token_decode(&_dec->opb, - _dec->huff_tables[_huff_idxs[pli]]); - _dec->dct_tokens[0][ti++]=(unsigned char)token; - neb=OC_DCT_TOKEN_EXTRA_BITS[token]; - if(neb){ - theorapackB_read(&_dec->opb,neb,&val); - eb=(int)val; - _dec->extra_bits[0][ebi++]=(ogg_uint16_t)eb; - } - else eb=0; - skip=oc_dct_token_skip(token,eb); - if(skip<0){ - eobs=eobi=-skip; - while(eobi-->0&&coded_fragistate.frags[*coded_fragi++].dc=0; - } - } - else{ - run_counts[skip-1]++; - cfi++; - eobs=0; - _dec->state.frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb); - } - } - _dec->ti0[pli][0]=ti; - _dec->ebi0[pli][0]=ebi; - /*Set the EOB count to the portion of the last EOB run which extends past - this coefficient.*/ - eobs=eobs+cfi-_ntoks_left[pli][0]; - /*Add the portion of the last EOB which was included in this coefficient to - to the longest run length.*/ - run_counts[63]+=_ntoks_left[pli][0]-cfi; - /*And convert the run_counts array to a moment table.*/ - for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; - /*Finally, subtract off the number of coefficients that have been - accounted for by runs started in this coefficient.*/ - for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; - } - return eobs; -} - -/*Unpacks the AC coefficient tokens. - This can completely discard coefficient values while unpacking, and so is - somewhat simpler than unpacking the DC coefficient tokens. - _huff_idx: The index of the Huffman table to use for each color plane. - _ntoks_left: The number of tokens left to be decoded in each color plane for - each coefficient. - This is updated as EOB tokens and zero run tokens are decoded. - _eobs: The length of any outstanding EOB run from previous - coefficients. - Return: The length of any outstanding EOB run.*/ -static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[3], - int _ntoks_left[3][64],int _eobs){ - long val; - int run_counts[64]; - int cfi; - int ti; - int ebi; - int pli; - int rli; - ti=ebi=0; - for(pli=0;pli<3;pli++){ - memset(run_counts,0,sizeof(run_counts)); - _dec->eob_runs[pli][_zzi]=_eobs; - cfi=0; - while(_eobs<_ntoks_left[pli][_zzi]-cfi){ - int token; - int neb; - int eb; - int skip; - cfi+=_eobs; - run_counts[63]+=_eobs; - token=oc_huff_token_decode(&_dec->opb, - _dec->huff_tables[_huff_idxs[pli]]); - _dec->dct_tokens[_zzi][ti++]=(unsigned char)token; - neb=OC_DCT_TOKEN_EXTRA_BITS[token]; - if(neb){ - theorapackB_read(&_dec->opb,neb,&val); - eb=(int)val; - _dec->extra_bits[_zzi][ebi++]=(ogg_uint16_t)eb; - } - else eb=0; - skip=oc_dct_token_skip(token,eb); - if(skip<0)_eobs=-skip; - else{ - run_counts[skip-1]++; - cfi++; - _eobs=0; - } - } - _dec->ti0[pli][_zzi]=ti; - _dec->ebi0[pli][_zzi]=ebi; - /*Set the EOB count to the portion of the last EOB run which extends past - this coefficient.*/ - _eobs=_eobs+cfi-_ntoks_left[pli][_zzi]; - /*Add the portion of the last EOB which was included in this coefficient to - to the longest run length.*/ - run_counts[63]+=_ntoks_left[pli][_zzi]-cfi; - /*And convert the run_counts array to a moment table.*/ - for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; - /*Finally, subtract off the number of coefficients that have been - accounted for by runs started in this coefficient.*/ - for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; - } - return _eobs; -} - -/*Tokens describing the DCT coefficients that belong to each fragment are - stored in the bitstream grouped by coefficient, not by fragment. - - This means that we either decode all the tokens in order, building up a - separate coefficient list for each fragment as we go, and then go back and - do the iDCT on each fragment, or we have to create separate lists of tokens - for each coefficient, so that we can pull the next token required off the - head of the appropriate list when decoding a specific fragment. - - The former was VP3's choice, and it meant 2*w*h extra storage for all the - decoded coefficient values. - - We take the second option, which lets us store just one or three bytes per - token (generally far fewer than the number of coefficients, due to EOB - tokens and zero runs), and which requires us to only maintain a counter for - each of the 64 coefficients, instead of a counter for every fragment to - determine where the next token goes. - - Actually, we use 3 counters per coefficient, one for each color plane, so we - can decode all color planes simultaneously. - - This lets color conversion, etc., be done as soon as a full MCU (one or - two super block rows) is decoded, while the image data is still in cache.*/ - -static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ - static const int OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; - long val; - int ntoks_left[3][64]; - int huff_idxs[3]; - int pli; - int zzi; - int hgi; - int huffi_y; - int huffi_c; - int eobs; - for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ - ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; - } - theorapackB_read(&_dec->opb,4,&val); - huffi_y=(int)val; - theorapackB_read(&_dec->opb,4,&val); - huffi_c=(int)val; - huff_idxs[0]=huffi_y; - huff_idxs[1]=huff_idxs[2]=huffi_c; - _dec->eob_runs[0][0]=0; - eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); - theorapackB_read(&_dec->opb,4,&val); - huffi_y=(int)val; - theorapackB_read(&_dec->opb,4,&val); - huffi_c=(int)val; - zzi=1; - for(hgi=1;hgi<5;hgi++){ - huff_idxs[0]=huffi_y+(hgi<<4); - huff_idxs[1]=huff_idxs[2]=huffi_c+(hgi<<4); - for(;zzi0); - *_zzi=zzi; -} - -/*Expands a constant, single-value token.*/ -static void oc_token_expand_const(int _token,int _extra_bits, - ogg_int16_t _dct_coeffs[128],int *_zzi){ - _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token); -} - -/*Expands category 2 single-valued tokens.*/ -static void oc_token_expand_cat2(int _token,int _extra_bits, - ogg_int16_t _dct_coeffs[128],int *_zzi){ - _dct_coeffs[(*_zzi)++]= - (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits); -} - -/*Expands category 3 through 8 single-valued tokens.*/ -static void oc_token_expand_cati(int _token,int _extra_bits, - ogg_int16_t _dct_coeffs[128],int *_zzi){ - _dct_coeffs[(*_zzi)++]= - (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits); -} - -/*Expands a category 1a zero run/value combo token.*/ -static void oc_token_expand_run_cat1a(int _token,int _extra_bits, - ogg_int16_t _dct_coeffs[128],int *_zzi){ - int zzi; - int rl; - zzi=*_zzi; - /*LOOP VECTORIZES.*/ - for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0; - _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1)); - *_zzi=zzi; -} - -/*Expands all other zero run/value combo tokens.*/ -static void oc_token_expand_run(int _token,int _extra_bits, - ogg_int16_t _dct_coeffs[128],int *_zzi){ - static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 6,10,1,2 - }; - static const int NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 3,7,0,1 - }; - static const int VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 0,0,0,1 - }; - static const int VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 0,0,1,1 - }; - static const int VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 1,1,2,2 - }; - static const int SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 2,3,1,2 - }; - int valsigned[2]; - int zzi; - int rl; - _token-=OC_DCT_RUN_CAT1B; - rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token]; - zzi=*_zzi; - /*LOOP VECTORIZES.*/ - while(rl-->0)_dct_coeffs[zzi++]=0; - valsigned[0]=VALUE_ADJUST[_token]+ - (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]); - valsigned[1]=-valsigned[0]; - _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[ - _extra_bits>>SIGN_SHIFT[_token]]; - *_zzi=zzi; -} - -/*A jump table for expanding token values into coefficient values. - This reduces all the conditional branches, etc., needed to parse these token - values down to one indirect jump.*/ -static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE[TH_NDCT_TOKENS- - OC_NDCT_EOB_TOKEN_MAX]={ - oc_token_expand_zrl, - oc_token_expand_zrl, - oc_token_expand_const, - oc_token_expand_const, - oc_token_expand_const, - oc_token_expand_const, - oc_token_expand_cat2, - oc_token_expand_cat2, - oc_token_expand_cat2, - oc_token_expand_cat2, - oc_token_expand_cati, - oc_token_expand_cati, - oc_token_expand_cati, - oc_token_expand_cati, - oc_token_expand_cati, - oc_token_expand_cati, - oc_token_expand_run_cat1a, - oc_token_expand_run_cat1a, - oc_token_expand_run_cat1a, - oc_token_expand_run_cat1a, - oc_token_expand_run_cat1a, - oc_token_expand_run, - oc_token_expand_run, - oc_token_expand_run, - oc_token_expand_run -}; - -/*Expands a single token into the given coefficient list. - This fills in the zeros for zero runs as well as coefficient values, and - updates the index of the current coefficient. - It CANNOT be called for any of the EOB tokens. - _token: The token value to expand. - _extra_bits: The extra bits associated with the token. - _dct_coeffs: The current list of coefficients, in zig-zag order. - _zzi: A pointer to the zig-zag index of the next coefficient to write - to. - This is updated before the function returns.*/ -static void oc_dct_token_expand(int _token,int _extra_bits, - ogg_int16_t *_dct_coeffs,int *_zzi){ - (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token, - _extra_bits,_dct_coeffs,_zzi); -} - - - -static int oc_dec_postprocess_init(oc_dec_ctx *_dec){ - /*pp_level 0: disabled; free any memory used and return*/ - if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){ - if(_dec->dc_qis!=NULL){ - _ogg_free(_dec->dc_qis); - _dec->dc_qis=NULL; - _ogg_free(_dec->variances); - _dec->variances=NULL; - _ogg_free(_dec->pp_frame_data); - _dec->pp_frame_data=NULL; - } - return 1; - } - if(_dec->dc_qis==NULL){ - /*If we haven't been tracking DC quantization indices, there's no point in - starting now.*/ - if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; - _dec->dc_qis=(unsigned char *)_ogg_malloc( - _dec->state.nfrags*sizeof(_dec->dc_qis[0])); - memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); - } - else{ - int *coded_fragi; - int *coded_fragi_end; - unsigned char qi0; - /*Update the DC quantization index of each coded block.*/ - qi0=(unsigned char)_dec->state.qis[0]; - coded_fragi_end=_dec->state.coded_fragis+_dec->state.ncoded_fragis[0]+ - _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; - for(coded_fragi=_dec->state.coded_fragis;coded_fragidc_qis[*coded_fragi]=qi0; - } - } - /*pp_level 1: Stop after updating DC quantization indices.*/ - if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){ - if(_dec->variances!=NULL){ - _ogg_free(_dec->variances); - _dec->variances=NULL; - _ogg_free(_dec->pp_frame_data); - _dec->pp_frame_data=NULL; - } - return 1; - } - if(_dec->variances==NULL|| - _dec->pp_frame_has_chroma!=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ - size_t frame_sz; - frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height; - if(_dec->pp_levelvariances=(int *)_ogg_realloc(_dec->variances, - _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0])); - _dec->pp_frame_data=(unsigned char *)_ogg_realloc( - _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0])); - _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; - _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; - _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; - _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ - (1-_dec->pp_frame_buf[0].height)*_dec->pp_frame_buf[0].stride; - } - else{ - size_t y_sz; - size_t c_sz; - int c_w; - int c_h; - _dec->variances=(int *)_ogg_realloc(_dec->variances, - _dec->state.nfrags*sizeof(_dec->variances[0])); - y_sz=frame_sz; - c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); - c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); - c_sz=c_w*c_h; - frame_sz+=c_sz<<1; - _dec->pp_frame_data=(unsigned char *)_ogg_realloc( - _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0])); - _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; - _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; - _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; - _dec->pp_frame_buf[0].data=_dec->pp_frame_data; - _dec->pp_frame_buf[1].width=c_w; - _dec->pp_frame_buf[1].height=c_h; - _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; - _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; - _dec->pp_frame_buf[2].width=c_w; - _dec->pp_frame_buf[2].height=c_h; - _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; - _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; - oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); - } - _dec->pp_frame_has_chroma=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); - } - /*If we're not processing chroma, copy the reference frame's chroma planes.*/ - if(_dec->pp_levelpp_frame_buf+1, - _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, - sizeof(_dec->pp_frame_buf[1])*2); - } - return 0; -} - - - -typedef struct{ - int ti[3][64]; - int ebi[3][64]; - int eob_runs[3][64]; - int bounding_values[256]; - int *coded_fragis[3]; - int *uncoded_fragis[3]; - int fragy0[3]; - int fragy_end[3]; - int ncoded_fragis[3]; - int nuncoded_fragis[3]; - int pred_last[3][3]; - int mcu_nvfrags; - int loop_filter; - int pp_level; -}oc_dec_pipeline_state; - - - -/*Initialize the main decoding pipeline.*/ -static void oc_dec_pipeline_init(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe){ - int *coded_fragi_end; - int *uncoded_fragi_end; - int pli; - /*If chroma is sub-sampled in the vertical direction, we have to decode two - super block rows of Y' for each super block row of Cb and Cr.*/ - _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2); - /*Initialize the token and extra bits indices for each plane and - coefficient.*/ - memset(_pipe->ti[0],0,sizeof(_pipe->ti[0])); - memset(_pipe->ebi[0],0,sizeof(_pipe->ebi[0])); - for(pli=1;pli<3;pli++){ - memcpy(_pipe->ti[pli],_dec->ti0[pli-1],sizeof(_pipe->ti[0])); - memcpy(_pipe->ebi[pli],_dec->ebi0[pli-1],sizeof(_pipe->ebi[0])); - } - /*Also copy over the initial the EOB run counts.*/ - memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); - /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ - coded_fragi_end=_dec->state.coded_fragis; - uncoded_fragi_end=_dec->state.uncoded_fragis; - for(pli=0;pli<3;pli++){ - _pipe->coded_fragis[pli]=coded_fragi_end; - _pipe->uncoded_fragis[pli]=uncoded_fragi_end; - coded_fragi_end+=_dec->state.ncoded_fragis[pli]; - uncoded_fragi_end-=_dec->state.nuncoded_fragis[pli]; - } - /*Set the previous DC predictor to 0 for all color planes and frame types.*/ - memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); - /*Initialize the bounding value array for the loop filter.*/ - _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state, - _pipe->bounding_values); - /*Initialize any buffers needed for post-processing. - We also save the current post-processing level, to guard against the user - changing it from a callback.*/ - if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; - /*If we don't have enough information to post-process, disable it, regardless - of the user-requested level.*/ - else{ - _pipe->pp_level=OC_PP_LEVEL_DISABLED; - memcpy(_dec->pp_frame_buf, - _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], - sizeof(_dec->pp_frame_buf[0])*3); - } -} - -/*Undo the DC prediction in a single plane of an MCU (one or two super block - rows). - As a side effect, the number of coded and uncoded fragments in this plane of - the MCU is also computed.*/ -static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe,int _pli){ - /*Undo the DC prediction.*/ - oc_fragment_plane *fplane; - oc_fragment *frag; - int *pred_last; - int ncoded_fragis; - int fragx; - int fragy; - int fragy0; - int fragy_end; - /*Compute the first and last fragment row of the current MCU for this - plane.*/ - fplane=_dec->state.fplanes+_pli; - fragy0=_pipe->fragy0[_pli]; - fragy_end=_pipe->fragy_end[_pli]; - frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags); - ncoded_fragis=0; - pred_last=_pipe->pred_last[_pli]; - for(fragy=fragy0;fragynhfrags;fragx++,frag++){ - if(!frag->coded)continue; - pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+= - oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last); - ncoded_fragis++; - } - } - _pipe->ncoded_fragis[_pli]=ncoded_fragis; - /*Also save the number of uncoded fragments so we know how many to copy.*/ - _pipe->nuncoded_fragis[_pli]= - (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis; -} - -/*Reconstructs all coded fragments in a single MCU (one or two super block - rows). - This requires that each coded fragment have a proper macro block mode and - motion vector (if not in INTRA mode), and have it's DC value decoded, with - the DC prediction process reversed, and the number of coded and uncoded - fragments in this plane of the MCU be counted. - The token lists for each color plane and coefficient should also be filled - in, along with initial token offsets, extra bits offsets, and EOB run - counts.*/ -static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe,int _pli){ - /*Decode the AC coefficients.*/ - int *ti; - int *ebi; - int *eob_runs; - int *coded_fragi; - int *coded_fragi_end; - ti=_pipe->ti[_pli]; - ebi=_pipe->ebi[_pli]; - eob_runs=_pipe->eob_runs[_pli]; - coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli]; - coded_fragi_end+=_pipe->ncoded_fragis[_pli]; - for(;coded_fragistate.frags+fragi; - for(zzi=0;zzi<64;){ - int token; - int eb; - last_zzi=zzi; - if(eob_runs[zzi]){ - eob_runs[zzi]--; - break; - } - else{ - int ebflag; - token=_dec->dct_tokens[zzi][ti[zzi]++]; - ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0; - eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag; - ebi[zzi]+=ebflag; - if(tokendc; - iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli]; - /*last_zzi is always initialized. - If your compiler thinks otherwise, it is dumb.*/ - oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi, - iquants[_dec->state.qis[0]][0],iquants[frag->qi]); - } - _pipe->coded_fragis[_pli]=coded_fragi; - /*Right now the reconstructed MCU has only the coded blocks in it.*/ - /*TODO: We make the decision here to always copy the uncoded blocks into it - from the reference frame. - We could also copy the coded blocks back over the reference frame, if we - wait for an additional MCU to be decoded, which might be faster if only a - small number of blocks are coded. - However, this introduces more latency, creating a larger cache footprint. - It's unknown which decision is better, but this one results in simpler - code, and the hard case (high bitrate, high resolution) is handled - correctly.*/ - /*Copy the uncoded blocks from the previous reference frame.*/ - _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; - oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli], - _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); -} - -/*Filter a horizontal block edge.*/ -static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, - int *_variance0,int *_variance1){ - unsigned char *rdst; - const unsigned char *rsrc; - unsigned char *cdst; - const unsigned char *csrc; - int r[10]; - int sum0; - int sum1; - int bx; - int by; - rdst=_dst; - rsrc=_src; - for(bx=0;bx<8;bx++){ - cdst=rdst; - csrc=rsrc; - for(by=0;by<10;by++){ - r[by]=*csrc; - csrc+=_src_ystride; - } - sum0=sum1=0; - for(by=0;by<4;by++){ - sum0+=abs(r[by+1]-r[by]); - sum1+=abs(r[by+5]-r[by+6]); - } - *_variance0+=OC_MINI(255,sum0); - *_variance1+=OC_MINI(255,sum1); - if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ - *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); - cdst+=_dst_ystride; - *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); - cdst+=_dst_ystride; - for(by=0;by<4;by++){ - *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ - r[by+4]+r[by+5]+r[by+6]+4>>3); - cdst+=_dst_ystride; - } - *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); - cdst+=_dst_ystride; - *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); - } - else{ - for(by=1;by<=8;by++){ - *cdst=(unsigned char)r[by]; - cdst+=_dst_ystride; - } - } - rdst++; - rsrc++; - } -} - -/*Filter a vertical block edge.*/ -static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, - int _qstep,int _flimit,int *_variances){ - unsigned char *rdst; - const unsigned char *rsrc; - unsigned char *cdst; - int r[10]; - int sum0; - int sum1; - int bx; - int by; - cdst=_dst; - for(by=0;by<8;by++){ - rsrc=cdst-1; - rdst=cdst; - for(bx=0;bx<10;bx++)r[bx]=*rsrc++; - sum0=sum1=0; - for(bx=0;bx<4;bx++){ - sum0+=abs(r[bx+1]-r[bx]); - sum1+=abs(r[bx+5]-r[bx+6]); - } - _variances[0]+=OC_MINI(255,sum0); - _variances[1]+=OC_MINI(255,sum1); - if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ - *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); - *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); - for(bx=0;bx<4;bx++){ - *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ - r[bx+4]+r[bx+5]+r[bx+6]+4>>3); - } - *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); - *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); - } - else for(bx=1;bx<=8;bx++)*rdst++=(unsigned char)r[bx]; - cdst+=_dst_ystride; - } -} - -static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, - th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, - int _fragy_end){ - oc_fragment_plane *fplane; - int *variance; - unsigned char *dc_qi; - unsigned char *dst; - const unsigned char *src; - int notstart; - int notdone; - int froffset; - int flimit; - int qstep; - int y_end; - int y; - int x; - _dst+=_pli; - _src+=_pli; - fplane=_dec->state.fplanes+_pli; - froffset=fplane->froffset+_fragy0*fplane->nhfrags; - variance=_dec->variances+froffset; - dc_qi=_dec->dc_qis+froffset; - notstart=_fragy0>0; - notdone=_fragy_endnvfrags; - /*We want to clear an extra row of variances, except at the end.*/ - memset(variance+(fplane->nhfrags&-notstart),0, - (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0])); - /*Except for the first time, we want to point to the middle of the row.*/ - y=(_fragy0<<3)+(notstart<<2); - dst=_dst->data+y*_dst->stride; - src=_src->data+y*_src->stride; - for(;y<4;y++){ - memcpy(dst,src,_dst->width*sizeof(dst[0])); - dst+=_dst->stride; - src+=_src->stride; - } - /*We also want to skip the last row in the frame for this loop.*/ - y_end=_fragy_end-!notdone<<3; - for(;ypp_dc_scale[*dc_qi]; - flimit=(qstep*3)>>2; - oc_filter_hedge(dst,_dst->stride,src-_src->stride,_src->stride, - qstep,flimit,variance,variance+fplane->nhfrags); - variance++; - dc_qi++; - for(x=8;x<_dst->width;x+=8){ - qstep=_dec->pp_dc_scale[*dc_qi]; - flimit=(qstep*3)>>2; - oc_filter_hedge(dst+x,_dst->stride,src+x-_src->stride,_src->stride, - qstep,flimit,variance,variance+fplane->nhfrags); - oc_filter_vedge(dst+x-(_dst->stride<<2)-4,_dst->stride, - qstep,flimit,variance-1); - variance++; - dc_qi++; - } - dst+=_dst->stride<<3; - src+=_src->stride<<3; - } - /*And finally, handle the last row in the frame, if it's in the range.*/ - if(!notdone){ - for(;y<_dst->height;y++){ - memcpy(dst,src,_dst->width*sizeof(dst[0])); - dst+=_dst->stride; - src+=_src->stride; - } - /*Filter the last row of vertical block edges.*/ - dc_qi++; - for(x=8;x<_dst->width;x+=8){ - qstep=_dec->pp_dc_scale[*dc_qi++]; - flimit=(qstep*3)>>2; - oc_filter_vedge(dst+x-(_dst->stride<<3)-4,_dst->stride, - qstep,flimit,variance++); - } - } -} - -static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, - int _dc_scale,int _sharp_mod,int _strong){ - static const int MOD_MAX[2]={24,32}; - static const int MOD_SHIFT[2]={1,0}; - const unsigned char *psrc; - const unsigned char *src; - const unsigned char *nsrc; - unsigned char *dst; - int vmod[72]; - int hmod[72]; - int mod_hi; - int by; - int bx; - mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]); - dst=_idata; - src=dst; - psrc=src-(_ystride&-!(_b&4)); - for(by=0;by<9;by++){ - for(bx=0;bx<8;bx++){ - int mod; - mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7); - for(bx=1;bx<7;bx++){ - a=128; - b=64; - w=hmod[(bx<<3)+by]; - a-=w; - b+=w*src[bx-1]; - w=vmod[(by<<3)+bx]; - a-=w; - b+=w*psrc[bx]; - w=vmod[(by+1<<3)+bx]; - a-=w; - b+=w*nsrc[bx]; - w=hmod[(bx+1<<3)+by]; - a-=w; - b+=w*src[bx+1]; - dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); - } - a=128; - b=64; - w=hmod[(7<<3)+by]; - a-=w; - b+=w*src[6]; - w=vmod[(by<<3)+7]; - a-=w; - b+=w*psrc[7]; - w=vmod[(by+1<<3)+7]; - a-=w; - b+=w*nsrc[7]; - w=hmod[(8<<3)+by]; - a-=w; - b+=w*src[7+!(_b&2)]; - dst[7]=OC_CLAMP255(a*src[7]+b>>7); - dst+=_ystride; - psrc=src; - src=nsrc; - nsrc+=_ystride&-(!(_b&8)|by<6); - } -} - -#define OC_DERING_THRESH1 (384) -#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) -#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) -#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) - -static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, - int _pli,int _fragy0,int _fragy_end){ - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag; - int *variance; - unsigned char *idata; - int sthresh; - int strong; - int froffset; - int y_end; - int y; - int x; - iplane=_img+_pli; - fplane=_dec->state.fplanes+_pli; - froffset=fplane->froffset+_fragy0*fplane->nhfrags; - variance=_dec->variances+froffset; - frag=_dec->state.frags+froffset; - strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); - sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; - y=_fragy0<<3; - idata=iplane->data+y*iplane->stride; - y_end=_fragy_end<<3; - for(;ywidth;x+=8){ - int b; - int qi; - int var; - qi=frag->qi; - var=*variance; - b=(x<=0)|(x+8>=iplane->width)<<1|(y<=0)<<2|(y+8>=iplane->height)<<3; - if(strong&&var>sthresh){ - oc_dering_block(idata+x,iplane->stride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| - !(b&2)&&variance[1]>OC_DERING_THRESH4|| - !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4|| - !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){ - oc_dering_block(idata+x,iplane->stride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - oc_dering_block(idata+x,iplane->stride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - } - } - else if(var>OC_DERING_THRESH2){ - oc_dering_block(idata+x,iplane->stride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - } - else if(var>OC_DERING_THRESH1){ - oc_dering_block(idata+x,iplane->stride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); - } - frag++; - variance++; - } - idata+=iplane->stride<<3; - } -} - - - -th_dec_ctx *th_decode_alloc(const th_info *_info, - const th_setup_info *_setup){ - oc_dec_ctx *dec; - if(_info==NULL||_setup==NULL)return NULL; - dec=_ogg_malloc(sizeof(*dec)); - if(oc_dec_init(dec,_info,_setup)<0){ - _ogg_free(dec); - return NULL; - } - dec->state.curframe_num=0; - return dec; -} - -void th_decode_free(th_dec_ctx *_dec){ - if(_dec!=NULL){ - oc_dec_clear(_dec); - _ogg_free(_dec); - } -} - -int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, - size_t _buf_sz){ - switch(_req){ - case TH_DECCTL_GET_PPLEVEL_MAX:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - (*(int *)_buf)=OC_PP_LEVEL_MAX; - return 0; - }break; - case TH_DECCTL_SET_PPLEVEL:{ - int pp_level; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - pp_level=*(int *)_buf; - if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; - _dec->pp_level=pp_level; - return 0; - }break; - case TH_DECCTL_SET_GRANPOS:{ - ogg_int64_t granpos; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; - granpos=*(ogg_int64_t *)_buf; - if(granpos<0)return TH_EINVAL; - _dec->state.granpos=granpos; - _dec->state.keyframe_num= - granpos>>_dec->state.info.keyframe_granule_shift; - _dec->state.curframe_num=_dec->state.keyframe_num+ - (granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); - return 0; - }break; - case TH_DECCTL_SET_STRIPE_CB:{ - th_stripe_callback *cb; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; - cb=(th_stripe_callback *)_buf; - _dec->stripe_cb.ctx=cb->ctx; - _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; - return 0; - }break; - default:return TH_EIMPL; - } -} - -int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, - ogg_int64_t *_granpos){ - int ret; - if(_dec==NULL||_op==NULL)return TH_EFAULT; - /*A completely empty packet indicates a dropped frame and is treated exactly - like an inter frame with no coded blocks. - Only proceed if we have a non-empty packet.*/ - if(_op->bytes!=0){ - oc_dec_pipeline_state pipe; - th_ycbcr_buffer stripe_buf; - int stripe_fragy; - int refi; - int pli; - int notstart; - int notdone; - theorapackB_readinit(&_dec->opb,_op->packet,_op->bytes); - ret=oc_dec_frame_header_unpack(_dec); - if(ret<0)return ret; - /*Select a free buffer to use for the reconstructed version of this - frame.*/ - if(_dec->state.frame_type!=OC_INTRA_FRAME&& - (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| - _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ - th_info *info; - size_t yplane_sz; - size_t cplane_sz; - int yhstride; - int yvstride; - int chstride; - int cvstride; - /*We're decoding an INTER frame, but have no initialized reference - buffers (i.e., decoding did not start on a key frame). - We initialize them to a solid gray here.*/ - _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; - _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; - _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1; - info=&_dec->state.info; - yhstride=info->frame_width+2*OC_UMV_PADDING; - yvstride=info->frame_height+2*OC_UMV_PADDING; - chstride=yhstride>>!(info->pixel_fmt&1); - cvstride=yvstride>>!(info->pixel_fmt&2); - yplane_sz=(size_t)yhstride*yvstride; - cplane_sz=(size_t)chstride*cvstride; - memset(_dec->state.ref_frame_data,0x80,yplane_sz+2*cplane_sz); - } - else{ - for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| - refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); - _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; - } - if(_dec->state.frame_type==OC_INTRA_FRAME){ - oc_dec_mark_all_intra(_dec); - _dec->state.keyframe_num=_dec->state.curframe_num; - }else{ - oc_dec_coded_flags_unpack(_dec); - oc_dec_mb_modes_unpack(_dec); - oc_dec_mv_unpack_and_frag_modes_fill(_dec); - } - oc_dec_block_qis_unpack(_dec); - oc_dec_residual_tokens_unpack(_dec); - /*Update granule position. - This must be done before the striped decode callbacks so that the - application knows what to do with the frame data.*/ - _dec->state.granpos= - (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+ - (_dec->state.curframe_num-_dec->state.keyframe_num); - _dec->state.curframe_num++; - if(_granpos!=NULL)*_granpos=_dec->state.granpos; - /*All of the rest of the operations -- DC prediction reversal, - reconstructing coded fragments, copying uncoded fragments, loop - filtering, extending borders, and out-of-loop post-processing -- should - be pipelined. - I.e., DC prediction reversal, reconstruction, and uncoded fragment - copying are done for one or two super block rows, then loop filtering is - run as far as it can, then bordering copying, then post-processing. - For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super - block rows, and one chroma. - Otherwise, an MCU consists of one super block row from each plane. - Inside each MCU, we perform all of the steps on one color plane before - moving on to the next. - After reconstruction, the additional filtering stages introduce a delay - since they need some pixels from the next fragment row. - Thus the actual number of decoded rows available is slightly smaller for - the first MCU, and slightly larger for the last. - - This entire process allows us to operate on the data while it is still in - cache, resulting in big performance improvements. - An application callback allows further application processing (blitting - to video memory, color conversion, etc.) to also use the data while it's - in cache.*/ - oc_dec_pipeline_init(_dec,&pipe); - oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); - notstart=0; - notdone=1; - for(stripe_fragy=notstart=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){ - int avail_fragy0; - int avail_fragy_end; - avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; - notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli; - /*Compute the first and last fragment row of the current MCU for this - plane.*/ - frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); - pipe.fragy0[pli]=stripe_fragy>>frag_shift; - pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, - pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift)); - oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli); - oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli); - sdelay=edelay=0; - if(pipe.loop_filter){ - sdelay+=notstart; - edelay+=notdone; - oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values, - refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - } - /*To fill the borders, we have an additional two pixel delay, since a - fragment in the next row could filter its top edge, using two pixels - from a fragment in this row. - But there's no reason to delay a full fragment between the two.*/ - oc_state_borders_fill_rows(&_dec->state,refi,pli, - (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), - (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); - /*Out-of-loop post-processing.*/ - pp_offset=3*(pli!=0); - if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ - /*Perform de-blocking in one plane.*/ - sdelay+=notstart; - edelay+=notdone; - oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, - _dec->state.ref_frame_bufs[refi],pli, - pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ - /*Perform de-ringing in one plane.*/ - sdelay+=notstart; - edelay+=notdone; - oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, - pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - } - } - /*If no post-processing is done, we still need to delay a row for the - loop filter, thanks to the strange filtering order VP3 chose.*/ - else if(pipe.loop_filter){ - sdelay+=notstart; - edelay+=notdone; - } - /*Compute the intersection of the available rows in all planes. - If chroma is sub-sampled, the effect of each of its delays is - doubled, but luma might have more post-processing filters enabled - than chroma, so we don't know up front which one is the limiting - factor.*/ - avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){ - /*Make the callback, ensuring we flip the sense of the "start" and - "end" of the available region upside down.*/ - (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, - _dec->state.fplanes[0].nvfrags-avail_fragy_end, - _dec->state.fplanes[0].nvfrags-avail_fragy0); - } - notstart=1; - } - /*Finish filling in the reference frame borders.*/ - for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); - /*Update the reference frame indices.*/ - if(_dec->state.frame_type==OC_INTRA_FRAME){ - /*The new frame becomes both the previous and gold reference frames.*/ - _dec->state.ref_frame_idx[OC_FRAME_GOLD]= - _dec->state.ref_frame_idx[OC_FRAME_PREV]= - _dec->state.ref_frame_idx[OC_FRAME_SELF]; - } - else{ - /*Otherwise, just replace the previous reference frame.*/ - _dec->state.ref_frame_idx[OC_FRAME_PREV]= - _dec->state.ref_frame_idx[OC_FRAME_SELF]; - } -#if defined(OC_DUMP_IMAGES) - /*Don't dump images for dropped frames.*/ - oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); -#endif - return 0; - } - else{ - /*Just update the granule position and return.*/ - _dec->state.granpos= - (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+ - (_dec->state.curframe_num-_dec->state.keyframe_num); - _dec->state.curframe_num++; - if(_granpos!=NULL)*_granpos=_dec->state.granpos; - return TH_DUPFRAME; - } -} - -int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ - oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); - return 0; -} diff --git a/Engine/lib/libtheora/lib/dec/fragment.c b/Engine/lib/libtheora/lib/dec/fragment.c deleted file mode 100644 index 77f1c7f6b..000000000 --- a/Engine/lib/libtheora/lib/dec/fragment.c +++ /dev/null @@ -1,199 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: fragment.c 15469 2008-10-30 12:49:42Z tterribe $ - - ********************************************************************/ - -#include "../internal.h" - -void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst, - int _dst_ystride,const ogg_int16_t *_residue){ - _state->opt_vtable.frag_recon_intra(_dst,_dst_ystride,_residue); -} - -void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++){ - int res; - res=*_residue++; - _dst[j]=OC_CLAMP255(res+128); - } - _dst+=_dst_ystride; - } -} - -void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, - int _dst_ystride,const unsigned char *_src,int _src_ystride, - const ogg_int16_t *_residue){ - _state->opt_vtable.frag_recon_inter(_dst,_dst_ystride,_src,_src_ystride, - _residue); -} - -void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++){ - int res; - res=*_residue++; - _dst[j]=OC_CLAMP255(res+_src[j]); - } - _dst+=_dst_ystride; - _src+=_src_ystride; - } -} - -void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst, - int _dst_ystride,const unsigned char *_src1,int _src1_ystride, - const unsigned char *_src2,int _src2_ystride,const ogg_int16_t *_residue){ - _state->opt_vtable.frag_recon_inter2(_dst,_dst_ystride,_src1,_src1_ystride, - _src2,_src2_ystride,_residue); -} - -void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++){ - int res; - res=*_residue++; - _dst[j]=OC_CLAMP255(res+((int)_src1[j]+_src2[j]>>1)); - } - _dst+=_dst_ystride; - _src1+=_src1_ystride; - _src2+=_src2_ystride; - } -} - -/*Computes the predicted DC value for the given fragment. - This requires that the fully decoded DC values be available for the left, - upper-left, upper, and upper-right fragments (if they exist). - _frag: The fragment to predict the DC value for. - _fplane: The fragment plane the fragment belongs to. - _x: The x-coordinate of the fragment. - _y: The y-coordinate of the fragment. - _pred_last: The last fully-decoded DC value for each predictor frame - (OC_FRAME_GOLD, OC_FRAME_PREV and OC_FRAME_SELF). - This should be initialized to 0's for the first fragment in each - color plane. - Return: The predicted DC value for this fragment.*/ -int oc_frag_pred_dc(const oc_fragment *_frag, - const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]){ - static const int PRED_SCALE[16][4]={ - /*0*/ - {0,0,0,0}, - /*OC_PL*/ - {1,0,0,0}, - /*OC_PUL*/ - {1,0,0,0}, - /*OC_PL|OC_PUL*/ - {1,0,0,0}, - /*OC_PU*/ - {1,0,0,0}, - /*OC_PL|OC_PU*/ - {1,1,0,0}, - /*OC_PUL|OC_PU*/ - {0,1,0,0}, - /*OC_PL|OC_PUL|PC_PU*/ - {29,-26,29,0}, - /*OC_PUR*/ - {1,0,0,0}, - /*OC_PL|OC_PUR*/ - {75,53,0,0}, - /*OC_PUL|OC_PUR*/ - {1,1,0,0}, - /*OC_PL|OC_PUL|OC_PUR*/ - {75,0,53,0}, - /*OC_PU|OC_PUR*/ - {1,0,0,0}, - /*OC_PL|OC_PU|OC_PUR*/ - {75,0,53,0}, - /*OC_PUL|OC_PU|OC_PUR*/ - {3,10,3,0}, - /*OC_PL|OC_PUL|OC_PU|OC_PUR*/ - {29,-26,29,0} - }; - static const int PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5}; - static const int PRED_RMASK[16]={0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31}; - static const int BC_MASK[8]={ - /*No boundary condition.*/ - OC_PL|OC_PUL|OC_PU|OC_PUR, - /*Left column.*/ - OC_PU|OC_PUR, - /*Top row.*/ - OC_PL, - /*Top row, left column.*/ - 0, - /*Right column.*/ - OC_PL|OC_PUL|OC_PU, - /*Right and left column.*/ - OC_PU, - /*Top row, right column.*/ - OC_PL, - /*Top row, right and left column.*/ - 0 - }; - /*Predictor fragments, left, up-left, up, up-right.*/ - const oc_fragment *predfr[4]; - /*The frame used for prediction for this fragment.*/ - int pred_frame; - /*The boundary condition flags.*/ - int bc; - /*DC predictor values: left, up-left, up, up-right, missing values - skipped.*/ - int p[4]; - /*Predictor count.*/ - int np; - /*Which predictor constants to use.*/ - int pflags; - /*The predicted DC value.*/ - int ret; - int i; - pred_frame=OC_FRAME_FOR_MODE[_frag->mbmode]; - bc=(_x==0)+((_y==0)<<1)+((_x+1==_fplane->nhfrags)<<2); - predfr[0]=_frag-1; - predfr[1]=_frag-_fplane->nhfrags-1; - predfr[2]=predfr[1]+1; - predfr[3]=predfr[2]+1; - np=0; - pflags=0; - for(i=0;i<4;i++){ - int pflag; - pflag=1<coded&& - OC_FRAME_FOR_MODE[predfr[i]->mbmode]==pred_frame){ - p[np++]=predfr[i]->dc; - pflags|=pflag; - } - } - if(pflags==0)return _pred_last[pred_frame]; - else{ - ret=PRED_SCALE[pflags][0]*p[0]; - /*LOOP VECTORIZES.*/ - for(i=1;i128)ret=p[2]; - else if(abs(ret-p[0])>128)ret=p[0]; - else if(abs(ret-p[1])>128)ret=p[1]; - } - return ret; -} diff --git a/Engine/lib/libtheora/lib/dec/huffdec.c b/Engine/lib/libtheora/lib/dec/huffdec.c deleted file mode 100644 index 86c52b62f..000000000 --- a/Engine/lib/libtheora/lib/dec/huffdec.c +++ /dev/null @@ -1,325 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: huffdec.c 15431 2008-10-21 05:04:02Z giles $ - - ********************************************************************/ - -#include -#include -#include "huffdec.h" -#include "decint.h" - - -/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/ -#define _ogg_offsetof(_type,_field)\ - ((size_t)((char *)&((_type *)0)->_field-(char *)0)) - -/*These two functions are really part of the bitpack.c module, but - they are only used here. Declaring local static versions so they - can be inlined saves considerable function call overhead.*/ - -/*Read in bits without advancing the bitptr. - Here we assume 0<=_bits&&_bits<=32.*/ -static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){ - long ret; - long m; - long d; - m=32-_bits; - _bits+=_b->endbit; - d=_b->storage-_b->endbyte; - if(d<=4){ - /*Not the main path.*/ - if(d<=0){ - *_ret=0L; - return -(_bits>d*8); - } - /*If we have some bits left, but not enough, return the ones we have.*/ - if(d*8<_bits)_bits=d*8; - } - ret=_b->ptr[0]<<24+_b->endbit; - if(_bits>8){ - ret|=_b->ptr[1]<<16+_b->endbit; - if(_bits>16){ - ret|=_b->ptr[2]<<8+_b->endbit; - if(_bits>24){ - ret|=_b->ptr[3]<<_b->endbit; - if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit; - } - } - } - *_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1); - return 0; -} - -/*advance the bitptr*/ -static void theorapackB_adv(oggpack_buffer *_b,int _bits){ - _bits+=_b->endbit; - _b->ptr+=_bits>>3; - _b->endbyte+=_bits>>3; - _b->endbit=_bits&7; -} - - -/*The log_2 of the size of a lookup table is allowed to grow to relative to - the number of unique nodes it contains. - E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is - wasted (each node will have an amortized cost of at most 20 bytes when using - 4-byte pointers). - Larger numbers can decode tokens with fewer read operations, while smaller - numbers may save more space (requiring as little as 8 bytes amortized per - node, though there will be more nodes). - With a sample file: - 32233473 read calls are required when no tree collapsing is done (100.0%). - 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%). - 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%). - 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%). - 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%). - Since a value of 1 gets us the vast majority of the speed-up with only a - small amount of wasted memory, this is what we use.*/ -#define OC_HUFF_SLUSH (1) - - -/*Allocates a Huffman tree node that represents a subtree of depth _nbits. - _nbits: The depth of the subtree. - If this is 0, the node is a leaf node. - Otherwise 1<<_nbits pointers are allocated for children. - Return: The newly allocated and fully initialized Huffman tree node.*/ -static oc_huff_node *oc_huff_node_alloc(int _nbits){ - oc_huff_node *ret; - size_t size; - size=_ogg_offsetof(oc_huff_node,nodes); - if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits); - ret=_ogg_calloc(1,size); - ret->nbits=(unsigned char)_nbits; - return ret; -} - -/*Frees a Huffman tree node allocated with oc_huf_node_alloc. - _node: The node to free. - This may be NULL.*/ -static void oc_huff_node_free(oc_huff_node *_node){ - _ogg_free(_node); -} - -/*Frees the memory used by a Huffman tree. - _node: The Huffman tree to free. - This may be NULL.*/ -static void oc_huff_tree_free(oc_huff_node *_node){ - if(_node==NULL)return; - if(_node->nbits){ - int nchildren; - int i; - int inext; - nchildren=1<<_node->nbits; - for(i=0;inodes[i]!=NULL?1<<_node->nbits-_node->nodes[i]->depth:1); - oc_huff_tree_free(_node->nodes[i]); - } - } - oc_huff_node_free(_node); -} - -/*Unpacks a sub-tree from the given buffer. - _opb: The buffer to unpack from. - _binode: The location to store a pointer to the sub-tree in. - _depth: The current depth of the tree. - This is used to prevent infinite recursion. - Return: 0 on success, or a negative value on error.*/ -static int oc_huff_tree_unpack(oggpack_buffer *_opb, - oc_huff_node **_binode,int _depth){ - oc_huff_node *binode; - long bits; - /*Prevent infinite recursion.*/ - if(++_depth>32)return TH_EBADHEADER; - if(theorapackB_read1(_opb,&bits)<0)return TH_EBADHEADER; - /*Read an internal node:*/ - if(!bits){ - int ret; - binode=oc_huff_node_alloc(1); - binode->depth=(unsigned char)(_depth>1); - ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth); - if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth); - if(ret<0){ - oc_huff_tree_free(binode); - *_binode=NULL; - return ret; - } - } - /*Read a leaf node:*/ - else{ - if(theorapackB_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return TH_EBADHEADER; - binode=oc_huff_node_alloc(0); - binode->depth=(unsigned char)(_depth>1); - binode->token=(unsigned char)bits; - } - *_binode=binode; - return 0; -} - -/*Finds the depth of shortest branch of the given sub-tree. - The tree must be binary. - _binode: The root of the given sub-tree. - _binode->nbits must be 0 or 1. - Return: The smallest depth of a leaf node in this sub-tree. - 0 indicates this sub-tree is a leaf node.*/ -static int oc_huff_tree_mindepth(oc_huff_node *_binode){ - int depth0; - int depth1; - if(_binode->nbits==0)return 0; - depth0=oc_huff_tree_mindepth(_binode->nodes[0]); - depth1=oc_huff_tree_mindepth(_binode->nodes[1]); - return OC_MINI(depth0,depth1)+1; -} - -/*Finds the number of internal nodes at a given depth, plus the number of - leaves at that depth or shallower. - The tree must be binary. - _binode: The root of the given sub-tree. - _binode->nbits must be 0 or 1. - Return: The number of entries that would be contained in a jump table of the - given depth.*/ -static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){ - if(_binode->nbits==0||_depth<=0)return 1; - else{ - return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+ - oc_huff_tree_occupancy(_binode->nodes[1],_depth-1); - } -} - -static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode); - -/*Fills the given nodes table with all the children in the sub-tree at the - given depth. - The nodes in the sub-tree with a depth less than that stored in the table - are freed. - The sub-tree must be binary and complete up until the given depth. - _nodes: The nodes table to fill. - _binode: The root of the sub-tree to fill it with. - _binode->nbits must be 0 or 1. - _level: The current level in the table. - 0 indicates that the current node should be stored, regardless of - whether it is a leaf node or an internal node. - _depth: The depth of the nodes to fill the table with, relative to their - parent.*/ -static void oc_huff_node_fill(oc_huff_node **_nodes, - oc_huff_node *_binode,int _level,int _depth){ - if(_level<=0||_binode->nbits==0){ - int i; - _binode->depth=(unsigned char)(_depth-_level); - _nodes[0]=oc_huff_tree_collapse(_binode); - for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0]; - } - else{ - _level--; - oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth); - oc_huff_node_fill(_nodes+(1<<_level),_binode->nodes[1],_level,_depth); - oc_huff_node_free(_binode); - } -} - -/*Finds the largest complete sub-tree rooted at the current node and collapses - it into a single node. - This procedure is then applied recursively to all the children of that node. - _binode: The root of the sub-tree to collapse. - _binode->nbits must be 0 or 1. - Return: The new root of the collapsed sub-tree.*/ -static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){ - oc_huff_node *root; - int mindepth; - int depth; - int loccupancy; - int occupancy; - depth=mindepth=oc_huff_tree_mindepth(_binode); - occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth; - oc_huff_node_fill(root->nodes,_binode,depth,depth); - return root; -} - -/*Makes a copy of the given Huffman tree. - _node: The Huffman tree to copy. - Return: The copy of the Huffman tree.*/ -static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node){ - oc_huff_node *ret; - ret=oc_huff_node_alloc(_node->nbits); - ret->depth=_node->depth; - if(_node->nbits){ - int nchildren; - int i; - int inext; - nchildren=1<<_node->nbits; - for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i]); - inext=i+(1<<_node->nbits-ret->nodes[i]->depth); - while(++inodes[i]=ret->nodes[i-1]; - } - } - else ret->token=_node->token; - return ret; -} - -/*Unpacks a set of Huffman trees, and reduces them to a collapsed - representation. - _opb: The buffer to unpack the trees from. - _nodes: The table to fill with the Huffman trees. - Return: 0 on success, or a negative value on error.*/ -int oc_huff_trees_unpack(oggpack_buffer *_opb, - oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ - int i; - for(i=0;inbits!=0){ - theorapackB_look(_opb,_node->nbits,&bits); - _node=_node->nodes[bits]; - theorapackB_adv(_opb,_node->depth); - } - return _node->token; -} diff --git a/Engine/lib/libtheora/lib/dec/idct.h b/Engine/lib/libtheora/lib/dec/idct.h deleted file mode 100644 index 3ee53712e..000000000 --- a/Engine/lib/libtheora/lib/dec/idct.h +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -/*Inverse DCT transforms.*/ -#include -#if !defined(_idct_H) -# define _idct_H (1) - -void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]); -void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]); - -#endif diff --git a/Engine/lib/libtheora/lib/dec/ocintrin.h b/Engine/lib/libtheora/lib/dec/ocintrin.h deleted file mode 100644 index 317f5aeae..000000000 --- a/Engine/lib/libtheora/lib/dec/ocintrin.h +++ /dev/null @@ -1,88 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -/*Some common macros for potential platform-specific optimization.*/ -#include -#if !defined(_ocintrin_H) -# define _ocintrin_H (1) - -/*Some specific platforms may have optimized intrinsic or inline assembly - versions of these functions which can substantially improve performance. - We define macros for them to allow easy incorporation of these non-ANSI - features.*/ - -/*Branchless, but not correct for differences larger than INT_MAX. -static int oc_mini(int _a,int _b){ - int ambsign; - ambsign=_a-_b>>sizeof(int)*8-1; - return (_a&~ambsign)+(_b&ambsign); -}*/ - - -#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a)) -#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a)) -/*Clamps an integer into the given range. - If _a>_c, then the lower bound _a is respected over the upper bound _c (this - behavior is required to meet our documented API behavior). - _a: The lower bound. - _b: The value to clamp. - _c: The upper boud.*/ -#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) -#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255)))) -/*Divides an integer by a power of two, truncating towards 0. - _dividend: The integer to divide. - _shift: The non-negative power of two to divide by. - _rmask: (1<<_shift)-1*/ -#define OC_DIV_POW2(_dividend,_shift,_rmask)\ - ((_dividend)+(((_dividend)>>sizeof(_dividend)*8-1)&(_rmask))>>(_shift)) -/*Divides _x by 65536, truncating towards 0.*/ -#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF) -/*Divides _x by 2, truncating towards 0.*/ -#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1) -/*Divides _x by 8, truncating towards 0.*/ -#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7) -/*Divides _x by 16, truncating towards 0.*/ -#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF) -/*Right shifts _dividend by _shift, adding _rval, and subtracting one for - negative dividends first.. - When _rval is (1<<_shift-1), this is equivalent to division with rounding - ties towards positive infinity.*/ -#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\ - ((_dividend)+((_dividend)>>sizeof(_dividend)*8-1)+(_rval)>>(_shift)) -/*Swaps two integers _a and _b if _a>_b.*/ -#define OC_SORT2I(_a,_b)\ - if((_a)>(_b)){\ - int t__;\ - t__=(_a);\ - (_a)=(_b);\ - (_b)=t__;\ - } - - - -/*All of these macros should expect floats as arguments.*/ -#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a)) -#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a)) -#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c))) -#define OC_FABSF(_f) ((float)fabs(_f)) -#define OC_SQRTF(_f) ((float)sqrt(_f)) -#define OC_POWF(_b,_e) ((float)pow(_b,_e)) -#define OC_LOGF(_f) ((float)log(_f)) -#define OC_IFLOORF(_f) ((int)floor(_f)) -#define OC_ICEILF(_f) ((int)ceil(_f)) - -#endif diff --git a/Engine/lib/libtheora/lib/dec/quant.c b/Engine/lib/libtheora/lib/dec/quant.c deleted file mode 100644 index 5cb7784db..000000000 --- a/Engine/lib/libtheora/lib/dec/quant.c +++ /dev/null @@ -1,122 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#include -#include -#include -#include "quant.h" -#include "decint.h" - -static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; -static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; - -/*Initializes the dequantization tables from a set of quantizer info. - Currently the dequantizer (and elsewhere enquantizer) tables are expected to - be initialized as pointing to the storage reserved for them in the - oc_theora_state (resp. oc_enc_ctx) structure. - If some tables are duplicates of others, the pointers will be adjusted to - point to a single copy of the tables, but the storage for them will not be - freed. - If you're concerned about the memory footprint, the obvious thing to do is - to move the storage out of its fixed place in the structures and allocate - it on demand. - However, a much, much better option is to only store the quantization - matrices being used for the current frame, and to recalculate these as the - qi values change between frames (this is what VP3 did).*/ -void oc_dequant_tables_init(oc_quant_table *_dequant[2][3], - int _pp_dc_scale[64],const th_quant_info *_qinfo){ - /*coding mode: intra or inter.*/ - int qti; - /*Y', C_b, C_r*/ - int pli; - for(qti=0;qti<2;qti++){ - for(pli=0;pli<3;pli++){ - oc_quant_tables stage; - /*Quality index.*/ - int qi; - /*Range iterator.*/ - int qri; - for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){ - th_quant_base base; - ogg_uint32_t q; - int qi_start; - int qi_end; - int ci; - memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], - sizeof(base)); - qi_start=qi; - if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; - else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; - /*Iterate over quality indicies in this range.*/ - for(;;){ - ogg_uint32_t qfac; - /*In the original VP3.2 code, the rounding offset and the size of the - dead zone around 0 were controlled by a "sharpness" parameter. - The size of our dead zone is now controlled by the per-coefficient - quality thresholds returned by our HVS module. - We round down from a more accurate value when the quality of the - reconstruction does not fall below our threshold and it saves bits. - Hence, all of that VP3.2 code is gone from here, and the remaining - floating point code has been implemented as equivalent integer code - with exact precision.*/ - qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; - /*For postprocessing, not dequantization.*/ - if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); - /*Scale DC the coefficient from the proper table.*/ - q=(qfac/100)<<2; - q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); - stage[qi][0]=(ogg_uint16_t)q; - /*Now scale AC coefficients from the proper table.*/ - for(ci=1;ci<64;ci++){ - q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2; - q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); - stage[qi][ci]=(ogg_uint16_t)q; - } - if(++qi>=qi_end)break; - /*Interpolate the next base matrix.*/ - for(ci=0;ci<64;ci++){ - base[ci]=(unsigned char)( - (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ - (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) - +_qinfo->qi_ranges[qti][pli].sizes[qri])/ - (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); - } - } - } - /*Staging matrices complete; commit to memory only if this isn't a - duplicate of a preceeding plane. - This simple check helps us improve cache coherency later.*/ - { - int dupe; - int qtj; - int plj; - dupe=0; - for(qtj=0;qtj<=qti;qtj++){ - for(plj=0;plj<(qtj - -#if defined(USE_ASM) - -static const __attribute__((aligned(8),used)) int OC_FZIG_ZAGMMX[64]={ - 0, 8, 1, 2, 9,16,24,17, - 10, 3,32,11,18,25, 4,12, - 5,26,19,40,33,34,41,48, - 27, 6,13,20,28,21,14, 7, - 56,49,42,35,43,50,57,36, - 15,22,29,30,23,44,37,58, - 51,59,38,45,52,31,60,53, - 46,39,47,54,61,62,55,63 -}; - - - -void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){ - ogg_int16_t __attribute__((aligned(8))) res_buf[64]; - int dst_framei; - int dst_ystride; - int zzi; - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - dequantize fewer coefficients and use a smaller transform when the block - ends with a long zero run instead of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ - /*Special case only having a DC component.*/ - if(_last_zzi<2){ - ogg_uint16_t p; - /*Why is the iquant product rounded in this case and no others? - Who knows.*/ - p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5); - /*Fill res_buf with p.*/ - __asm__ __volatile__( - /*mm0=0000 0000 0000 AAAA*/ - "movd %[p],%%mm0\n\t" - /*mm1=0000 0000 0000 AAAA*/ - "movd %[p],%%mm1\n\t" - /*mm0=0000 0000 AAAA 0000*/ - "pslld $16,%%mm0\n\t" - /*mm0=0000 0000 AAAA AAAA*/ - "por %%mm1,%%mm0\n\t" - /*mm0=AAAA AAAA AAAA AAAA*/ - "punpcklwd %%mm0,%%mm0\n\t" - "movq %%mm0,(%[res_buf])\n\t" - "movq %%mm0,8(%[res_buf])\n\t" - "movq %%mm0,16(%[res_buf])\n\t" - "movq %%mm0,24(%[res_buf])\n\t" - "movq %%mm0,32(%[res_buf])\n\t" - "movq %%mm0,40(%[res_buf])\n\t" - "movq %%mm0,48(%[res_buf])\n\t" - "movq %%mm0,56(%[res_buf])\n\t" - "movq %%mm0,64(%[res_buf])\n\t" - "movq %%mm0,72(%[res_buf])\n\t" - "movq %%mm0,80(%[res_buf])\n\t" - "movq %%mm0,88(%[res_buf])\n\t" - "movq %%mm0,96(%[res_buf])\n\t" - "movq %%mm0,104(%[res_buf])\n\t" - "movq %%mm0,112(%[res_buf])\n\t" - "movq %%mm0,120(%[res_buf])\n\t" - : - :[res_buf]"r"(res_buf),[p]"r"((unsigned)p) - :"memory" - ); - } - else{ - /*Then, fill in the remainder of the coefficients with 0's, and perform - the iDCT.*/ - /*First zero the buffer.*/ - /*On K7, etc., this could be replaced with movntq and sfence.*/ - __asm__ __volatile__( - "pxor %%mm0,%%mm0\n\t" - "movq %%mm0,(%[res_buf])\n\t" - "movq %%mm0,8(%[res_buf])\n\t" - "movq %%mm0,16(%[res_buf])\n\t" - "movq %%mm0,24(%[res_buf])\n\t" - "movq %%mm0,32(%[res_buf])\n\t" - "movq %%mm0,40(%[res_buf])\n\t" - "movq %%mm0,48(%[res_buf])\n\t" - "movq %%mm0,56(%[res_buf])\n\t" - "movq %%mm0,64(%[res_buf])\n\t" - "movq %%mm0,72(%[res_buf])\n\t" - "movq %%mm0,80(%[res_buf])\n\t" - "movq %%mm0,88(%[res_buf])\n\t" - "movq %%mm0,96(%[res_buf])\n\t" - "movq %%mm0,104(%[res_buf])\n\t" - "movq %%mm0,112(%[res_buf])\n\t" - "movq %%mm0,120(%[res_buf])\n\t" - : - :[res_buf]"r"(res_buf) - :"memory" - ); - res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant); - /*This is planned to be rewritten in MMX.*/ - for(zzi=1;zzi<_ncoefs;zzi++){ - int ci; - ci=OC_FZIG_ZAG[zzi]; - res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]* - _ac_iquant[ci]); - } - if(_last_zzi<10)oc_idct8x8_10_mmx(res_buf); - else oc_idct8x8_mmx(res_buf); - } - /*Fill in the target buffer.*/ - dst_framei=_state->ref_frame_idx[OC_FRAME_SELF]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - /*For now ystride values in all ref frames assumed to be equal.*/ - if(_frag->mbmode==OC_MODE_INTRA){ - oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf); - } - else{ - int ref_framei; - int ref_ystride; - int mvoffsets[2]; - ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]]; - ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride; - if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],_frag->mv[1], - ref_ystride,_pli)>1){ - oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride, - _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf); - } - else{ - oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf); - } - } - oc_restore_fpu(_state); -} - -/*Copies the fragments specified by the lists of fragment indices from one - frame to another. - _fragis: A pointer to a list of fragment indices. - _nfragis: The number of fragment indices to copy. - _dst_frame: The reference frame to copy to. - _src_frame: The reference frame to copy from. - _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli){ - const int *fragi; - const int *fragi_end; - int dst_framei; - ptrdiff_t dst_ystride; - int src_framei; - ptrdiff_t src_ystride; - dst_framei=_state->ref_frame_idx[_dst_frame]; - src_framei=_state->ref_frame_idx[_src_frame]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride; - fragi_end=_fragis+_nfragis; - for(fragi=_fragis;fragifrags+*fragi; - dst=frag->buffer[dst_framei]; - src=frag->buffer[src_framei]; - __asm__ __volatile__( - /*src+0*src_ystride*/ - "movq (%[src]),%%mm0\n\t" - /*s=src_ystride*3*/ - "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t" - /*src+1*src_ystride*/ - "movq (%[src],%[src_ystride]),%%mm1\n\t" - /*src+2*src_ystride*/ - "movq (%[src],%[src_ystride],2),%%mm2\n\t" - /*src+3*src_ystride*/ - "movq (%[src],%[s]),%%mm3\n\t" - /*dst+0*dst_ystride*/ - "movq %%mm0,(%[dst])\n\t" - /*s=dst_ystride*3*/ - "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t" - /*dst+1*dst_ystride*/ - "movq %%mm1,(%[dst],%[dst_ystride])\n\t" - /*Pointer to next 4.*/ - "lea (%[src],%[src_ystride],4),%[src]\n\t" - /*dst+2*dst_ystride*/ - "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t" - /*dst+3*dst_ystride*/ - "movq %%mm3,(%[dst],%[s])\n\t" - /*Pointer to next 4.*/ - "lea (%[dst],%[dst_ystride],4),%[dst]\n\t" - /*src+0*src_ystride*/ - "movq (%[src]),%%mm0\n\t" - /*s=src_ystride*3*/ - "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t" - /*src+1*src_ystride*/ - "movq (%[src],%[src_ystride]),%%mm1\n\t" - /*src+2*src_ystride*/ - "movq (%[src],%[src_ystride],2),%%mm2\n\t" - /*src+3*src_ystride*/ - "movq (%[src],%[s]),%%mm3\n\t" - /*dst+0*dst_ystride*/ - "movq %%mm0,(%[dst])\n\t" - /*s=dst_ystride*3*/ - "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t" - /*dst+1*dst_ystride*/ - "movq %%mm1,(%[dst],%[dst_ystride])\n\t" - /*dst+2*dst_ystride*/ - "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t" - /*dst+3*dst_ystride*/ - "movq %%mm3,(%[dst],%[s])\n\t" - :[s]"=&r"(s) - :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride), - [src_ystride]"r"(src_ystride) - :"memory" - ); - } - /*This needs to be removed when decode specific functions are implemented:*/ - __asm__ __volatile__("emms\n\t"); -} - -static void loop_filter_v(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - ptrdiff_t s; - _pix-=_ystride*2; - __asm__ __volatile__( - /*mm0=0*/ - "pxor %%mm0,%%mm0\n\t" - /*s=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*mm7=_pix[0...8]*/ - "movq (%[pix]),%%mm7\n\t" - /*mm4=_pix[0...8+_ystride*3]*/ - "movq (%[pix],%[s]),%%mm4\n\t" - /*mm6=_pix[0...8]*/ - "movq %%mm7,%%mm6\n\t" - /*Expand unsigned _pix[0...3] to 16 bits.*/ - "punpcklbw %%mm0,%%mm6\n\t" - "movq %%mm4,%%mm5\n\t" - /*Expand unsigned _pix[4...8] to 16 bits.*/ - "punpckhbw %%mm0,%%mm7\n\t" - /*Expand other arrays too.*/ - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/ - "psubw %%mm4,%%mm6\n\t" - "psubw %%mm5,%%mm7\n\t" - /*mm5=mm4=_pix[0...8+_ystride]*/ - "movq (%[pix],%[ystride]),%%mm4\n\t" - /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/ - "movq (%[pix],%[ystride],2),%%mm2\n\t" - "movq %%mm4,%%mm5\n\t" - "movq %%mm2,%%mm3\n\t" - "movq %%mm2,%%mm1\n\t" - /*Expand these arrays.*/ - "punpckhbw %%mm0,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm3\n\t" - "punpcklbw %%mm0,%%mm2\n\t" - /*mm0=3 3 3 3 - mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/ - "pcmpeqw %%mm0,%%mm0\n\t" - "psubw %%mm5,%%mm3\n\t" - "psrlw $14,%%mm0\n\t" - "psubw %%mm4,%%mm2\n\t" - /*Scale by 3.*/ - "pmullw %%mm0,%%mm3\n\t" - "pmullw %%mm0,%%mm2\n\t" - /*mm0=4 4 4 4 - f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+ - 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/ - "psrlw $1,%%mm0\n\t" - "paddw %%mm7,%%mm3\n\t" - "psllw $2,%%mm0\n\t" - "paddw %%mm6,%%mm2\n\t" - /*Add 4.*/ - "paddw %%mm0,%%mm3\n\t" - "paddw %%mm0,%%mm2\n\t" - /*"Divide" by 8.*/ - "psraw $3,%%mm3\n\t" - "psraw $3,%%mm2\n\t" - /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/ - /*Free up mm5.*/ - "packuswb %%mm5,%%mm4\n\t" - /*mm0=L L L L*/ - "movq (%[ll]),%%mm0\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm2,%%mm5\n\t" - "pxor %%mm6,%%mm6\n\t" - "movq %%mm0,%%mm7\n\t" - "psubw %%mm0,%%mm6\n\t" - "psllw $1,%%mm7\n\t" - "psllw $1,%%mm6\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm2,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm3,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm3\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm3\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm6\n\t" - "movq %%mm2,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm2,%%mm6\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm7=-L>R_i?2L:0*/ - "pand %%mm6,%%mm7\n\t" - "pxor %%mm6,%%mm6\n\t" - /*mm2=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm7,%%mm2\n\t" - "psubw %%mm0,%%mm6\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm2,%%mm5\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm2\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm3,%%mm6\n\t" - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm3\n\t" - "psllw $1,%%mm0\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - /*mm0=-L>R_i?2L:0*/ - "pand %%mm6,%%mm0\n\t" - /*mm3=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm0,%%mm3\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm3,%%mm5\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*Unfortunately, there's no unsigned byte+signed byte with unsigned - saturation op code, so we have to promote things back 16 bits.*/ - "pxor %%mm0,%%mm0\n\t" - "movq %%mm4,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - "movq %%mm1,%%mm6\n\t" - "punpcklbw %%mm0,%%mm1\n\t" - "punpckhbw %%mm0,%%mm6\n\t" - /*_pix[0...8+_ystride]+=R_i*/ - "paddw %%mm2,%%mm4\n\t" - "paddw %%mm3,%%mm5\n\t" - /*_pix[0...8+_ystride*2]-=R_i*/ - "psubw %%mm2,%%mm1\n\t" - "psubw %%mm3,%%mm6\n\t" - "packuswb %%mm5,%%mm4\n\t" - "packuswb %%mm6,%%mm1\n\t" - /*Write it back out.*/ - "movq %%mm4,(%[pix],%[ystride])\n\t" - "movq %%mm1,(%[pix],%[ystride],2)\n\t" - :[s]"=&r"(s) - :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll) - :"memory" - ); -} - -/*This code implements the bulk of loop_filter_h(). - Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all - four p0's to one register we must transpose the values in four mmx regs. - When half is done we repeat this for the rest.*/ -static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride, - const ogg_int16_t *_ll){ - ptrdiff_t s; - /*d doesn't technically need to be 64-bit on x86-64, but making it so will - help avoid partial register stalls.*/ - ptrdiff_t d; - __asm__ __volatile__( - /*x x x x 3 2 1 0*/ - "movd (%[pix]),%%mm0\n\t" - /*s=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*x x x x 7 6 5 4*/ - "movd (%[pix],%[ystride]),%%mm1\n\t" - /*x x x x B A 9 8*/ - "movd (%[pix],%[ystride],2),%%mm2\n\t" - /*x x x x F E D C*/ - "movd (%[pix],%[s]),%%mm3\n\t" - /*mm0=7 3 6 2 5 1 4 0*/ - "punpcklbw %%mm1,%%mm0\n\t" - /*mm2=F B E A D 9 C 8*/ - "punpcklbw %%mm3,%%mm2\n\t" - /*mm1=7 3 6 2 5 1 4 0*/ - "movq %%mm0,%%mm1\n\t" - /*mm0=F B 7 3 E A 6 2*/ - "punpckhwd %%mm2,%%mm0\n\t" - /*mm1=D 9 5 1 C 8 4 0*/ - "punpcklwd %%mm2,%%mm1\n\t" - "pxor %%mm7,%%mm7\n\t" - /*mm5=D 9 5 1 C 8 4 0*/ - "movq %%mm1,%%mm5\n\t" - /*mm1=x C x 8 x 4 x 0==pix[0]*/ - "punpcklbw %%mm7,%%mm1\n\t" - /*mm5=x D x 9 x 5 x 1==pix[1]*/ - "punpckhbw %%mm7,%%mm5\n\t" - /*mm3=F B 7 3 E A 6 2*/ - "movq %%mm0,%%mm3\n\t" - /*mm0=x E x A x 6 x 2==pix[2]*/ - "punpcklbw %%mm7,%%mm0\n\t" - /*mm3=x F x B x 7 x 3==pix[3]*/ - "punpckhbw %%mm7,%%mm3\n\t" - /*mm1=mm1-mm3==pix[0]-pix[3]*/ - "psubw %%mm3,%%mm1\n\t" - /*Save a copy of pix[2] for later.*/ - "movq %%mm0,%%mm4\n\t" - /*mm2=3 3 3 3 - mm0=mm0-mm5==pix[2]-pix[1]*/ - "pcmpeqw %%mm2,%%mm2\n\t" - "psubw %%mm5,%%mm0\n\t" - "psrlw $14,%%mm2\n\t" - /*Scale by 3.*/ - "pmullw %%mm2,%%mm0\n\t" - /*mm2=4 4 4 4 - f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/ - "psrlw $1,%%mm2\n\t" - "paddw %%mm1,%%mm0\n\t" - "psllw $2,%%mm2\n\t" - /*Add 4.*/ - "paddw %%mm2,%%mm0\n\t" - /*"Divide" by 8, producing the residuals R_i.*/ - "psraw $3,%%mm0\n\t" - /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/ - /*mm6=L L L L*/ - "movq (%[ll]),%%mm6\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm0,%%mm1\n\t" - "pxor %%mm2,%%mm2\n\t" - "movq %%mm6,%%mm3\n\t" - "psubw %%mm6,%%mm2\n\t" - "psllw $1,%%mm3\n\t" - "psllw $1,%%mm2\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-2L -2L -2L -2L*/ - /*mm3==2L 2L 2L 2L*/ - "pcmpgtw %%mm0,%%mm3\n\t" - "pcmpgtw %%mm2,%%mm1\n\t" - "pand %%mm3,%%mm0\n\t" - "pand %%mm1,%%mm0\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm2\n\t" - "movq %%mm0,%%mm1\n\t" - "movq %%mm6,%%mm3\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-L -L -L -L*/ - /*mm6==L L L L*/ - /*mm2=-L>R_i?FF:00*/ - "pcmpgtw %%mm0,%%mm2\n\t" - /*mm1=R_i>L?FF:00*/ - "pcmpgtw %%mm6,%%mm1\n\t" - /*mm3=2L 2L 2L 2L*/ - "psllw $1,%%mm3\n\t" - /*mm6=2L 2L 2L 2L*/ - "psllw $1,%%mm6\n\t" - /*mm3=R_i>L?2L:0*/ - "pand %%mm1,%%mm3\n\t" - /*mm6=-L>R_i?2L:0*/ - "pand %%mm2,%%mm6\n\t" - /*mm0=R_i>L?R_i-2L:R_i*/ - "psubw %%mm3,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L*/ - "por %%mm2,%%mm1\n\t" - /*mm0=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm6,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L?R_i':0*/ - "pand %%mm0,%%mm1\n\t" - /*mm0=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*mm0=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*_pix[1]+=R_i;*/ - "paddw %%mm0,%%mm5\n\t" - /*_pix[2]-=R_i;*/ - "psubw %%mm0,%%mm4\n\t" - /*mm5=x x x x D 9 5 1*/ - "packuswb %%mm7,%%mm5\n\t" - /*mm4=x x x x E A 6 2*/ - "packuswb %%mm7,%%mm4\n\t" - /*mm5=E D A 9 6 5 2 1*/ - "punpcklbw %%mm4,%%mm5\n\t" - /*d=6 5 2 1*/ - "movd %%mm5,%[d]\n\t" - "movw %w[d],1(%[pix])\n\t" - /*Why is there such a big stall here?*/ - "psrlq $32,%%mm5\n\t" - "shr $16,%[d]\n\t" - "movw %w[d],1(%[pix],%[ystride])\n\t" - /*d=E D A 9*/ - "movd %%mm5,%[d]\n\t" - "movw %w[d],1(%[pix],%[ystride],2)\n\t" - "shr $16,%[d]\n\t" - "movw %w[d],1(%[pix],%[s])\n\t" - :[s]"=&r"(s),[d]"=&r"(d), - [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll) - : - :"memory" - ); -} - -static void loop_filter_h(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - _pix-=2; - loop_filter_h4(_pix,_ystride,_ll); - loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); -} - -/*We copy the whole function because the MMX routines will be inlined 4 times, - and we can do just a single emms call at the end this way. - We also do not use the _bv lookup table, instead computing the values that - would lie in it on the fly.*/ - -/*Apply the loop filter to a given set of fragment rows in the given plane. - The filter may be run on the bottom edge, affecting pixels in the next row of - fragments, so this row also needs to be available. - _bv: The bounding values array. - _refi: The index of the frame buffer to filter. - _pli: The color plane to filter. - _fragy0: The Y coordinate of the first fragment row to filter. - _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end){ - ogg_int16_t __attribute__((aligned(8))) ll[4]; - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag_top; - oc_fragment *frag0; - oc_fragment *frag; - oc_fragment *frag_end; - oc_fragment *frag0_end; - oc_fragment *frag_bot; - ll[0]=ll[1]=ll[2]=ll[3]= - (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]]; - iplane=_state->ref_frame_bufs[_refi]+_pli; - fplane=_state->fplanes+_pli; - /*The following loops are constructed somewhat non-intuitively on purpose. - The main idea is: if a block boundary has at least one coded fragment on - it, the filter is applied to it. - However, the order that the filters are applied in matters, and VP3 chose - the somewhat strange ordering used below.*/ - frag_top=_state->frags+fplane->froffset; - frag0=frag_top+_fragy0*fplane->nhfrags; - frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags; - frag_bot=_state->frags+fplane->froffset+fplane->nfrags; - while(frag0nhfrags; - while(fragcoded){ - if(frag>frag0){ - loop_filter_h(frag->buffer[_refi],iplane->stride,ll); - } - if(frag0>frag_top){ - loop_filter_v(frag->buffer[_refi],iplane->stride,ll); - } - if(frag+1coded){ - loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll); - } - if(frag+fplane->nhfragsnhfrags)->coded){ - loop_filter_v((frag+fplane->nhfrags)->buffer[_refi], - iplane->stride,ll); - } - } - frag++; - } - frag0+=fplane->nhfrags; - } - /*This needs to be removed when decode specific functions are implemented:*/ - __asm__ __volatile__("emms\n\t"); -} - -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86/x86int.h b/Engine/lib/libtheora/lib/dec/x86/x86int.h deleted file mode 100644 index 05f9c57c1..000000000 --- a/Engine/lib/libtheora/lib/dec/x86/x86int.h +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#if !defined(_x86_x86int_H) -# define _x86_x86int_H (1) -# include "../../internal.h" - -void oc_state_vtable_init_x86(oc_theora_state *_state); - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue); -void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); -void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue); -void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli); -void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); -void oc_restore_fpu_mmx(void); -void oc_idct8x8_mmx(ogg_int16_t _y[64]); -void oc_idct8x8_10_mmx(ogg_int16_t _y[64]); -void oc_fill_idct_constants_mmx(void); -void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end); -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c deleted file mode 100644 index e87e0640d..000000000 --- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c +++ /dev/null @@ -1,214 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: - - ********************************************************************/ -#include "../../internal.h" - -/* ------------------------------------------------------------------------ - MMX reconstruction fragment routines for Visual Studio. - Tested with VS2005. Should compile for VS2003 and VC6 as well. - - Initial implementation 2007 by Nils Pipenbrinck. - ---------------------------------------------------------------------*/ - -#if defined(USE_ASM) - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue){ - /* --------------------------------------------------------------------- - This function does the inter reconstruction step with 8 iterations - unrolled. The iteration for each instruction is noted by the #id in the - comments (in case you want to reconstruct it) - --------------------------------------------------------------------- */ - _asm{ - mov edi, [_residue] /* load residue ptr */ - mov eax, 0x00800080 /* generate constant */ - mov ebx, [_dst_ystride] /* load dst-stride */ - mov edx, [_dst] /* load dest pointer */ - - /* unrolled loop begins here */ - - movd mm0, eax /* load constant */ - movq mm1, [edi+ 8*0] /* #1 load low residue */ - movq mm2, [edi+ 8*1] /* #1 load high residue */ - punpckldq mm0, mm0 /* build constant */ - movq mm3, [edi+ 8*2] /* #2 load low residue */ - movq mm4, [edi+ 8*3] /* #2 load high residue */ - movq mm5, [edi+ 8*4] /* #3 load low residue */ - movq mm6, [edi+ 8*5] /* #3 load high residue */ - paddsw mm1, mm0 /* #1 bias low residue */ - paddsw mm2, mm0 /* #1 bias high residue */ - packuswb mm1, mm2 /* #1 pack to byte */ - paddsw mm3, mm0 /* #2 bias low residue */ - paddsw mm4, mm0 /* #2 bias high residue */ - packuswb mm3, mm4 /* #2 pack to byte */ - paddsw mm5, mm0 /* #3 bias low residue */ - paddsw mm6, mm0 /* #3 bias high residue */ - packuswb mm5, mm6 /* #3 pack to byte */ - movq [edx], mm1 /* #1 write row */ - movq [edx + ebx], mm3 /* #2 write row */ - movq [edx + ebx*2], mm5 /* #3 write row */ - movq mm1, [edi+ 8*6] /* #4 load low residue */ - lea ecx, [ebx + ebx*2] /* make dst_ystride * 3 */ - movq mm2, [edi+ 8*7] /* #4 load high residue */ - movq mm3, [edi+ 8*8] /* #5 load low residue */ - lea esi, [ebx*4 + ebx] /* make dst_ystride * 5 */ - movq mm4, [edi+ 8*9] /* #5 load high residue */ - movq mm5, [edi+ 8*10] /* #6 load low residue */ - lea eax, [ecx*2 + ebx] /* make dst_ystride * 7 */ - movq mm6, [edi+ 8*11] /* #6 load high residue */ - paddsw mm1, mm0 /* #4 bias low residue */ - paddsw mm2, mm0 /* #4 bias high residue */ - packuswb mm1, mm2 /* #4 pack to byte */ - paddsw mm3, mm0 /* #5 bias low residue */ - paddsw mm4, mm0 /* #5 bias high residue */ - packuswb mm3, mm4 /* #5 pack to byte */ - paddsw mm5, mm0 /* #6 bias low residue */ - paddsw mm6, mm0 /* #6 bias high residue */ - packuswb mm5, mm6 /* #6 pack to byte */ - movq [edx + ecx], mm1 /* #4 write row */ - movq [edx + ebx*4], mm3 /* #5 write row */ - movq [edx + esi], mm5 /* #6 write row */ - movq mm1, [edi+ 8*12] /* #7 load low residue */ - movq mm2, [edi+ 8*13] /* #7 load high residue */ - movq mm3, [edi+ 8*14] /* #8 load low residue */ - movq mm4, [edi+ 8*15] /* #8 load high residue */ - paddsw mm1, mm0 /* #7 bias low residue */ - paddsw mm2, mm0 /* #7 bias high residue */ - packuswb mm1, mm2 /* #7 pack to byte */ - paddsw mm3, mm0 /* #8 bias low residue */ - paddsw mm4, mm0 /* #8 bias high residue */ - packuswb mm3, mm4 /* #8 pack to byte */ - movq [edx + ecx*2], mm1 /* #7 write row */ - movq [edx + eax], mm3 /* #8 write row */ - } -} - - - -void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride, - const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){ - /* --------------------------------------------------------------------- - This function does the inter reconstruction step with two iterations - running in parallel to hide some load-latencies and break the dependency - chains. The iteration for each instruction is noted by the #id in the - comments (in case you want to reconstruct it) - --------------------------------------------------------------------- */ - _asm{ - pxor mm0, mm0 /* generate constant 0 */ - mov esi, [_src] - mov edi, [_residue] - mov eax, [_src_ystride] - mov edx, [_dst] - mov ebx, [_dst_ystride] - mov ecx, 4 - - align 16 - -nextchunk: - movq mm3, [esi] /* #1 load source */ - movq mm1, [edi+0] /* #1 load residium low */ - movq mm2, [edi+8] /* #1 load residium high */ - movq mm7, [esi+eax] /* #2 load source */ - movq mm4, mm3 /* #1 get copy of src */ - movq mm5, [edi+16] /* #2 load residium low */ - punpckhbw mm4, mm0 /* #1 expand high source */ - movq mm6, [edi+24] /* #2 load residium high */ - punpcklbw mm3, mm0 /* #1 expand low source */ - paddsw mm4, mm2 /* #1 add residium high */ - movq mm2, mm7 /* #2 get copy of src */ - paddsw mm3, mm1 /* #1 add residium low */ - punpckhbw mm2, mm0 /* #2 expand high source */ - packuswb mm3, mm4 /* #1 final row pixels */ - punpcklbw mm7, mm0 /* #2 expand low source */ - movq [edx], mm3 /* #1 write row */ - paddsw mm2, mm6 /* #2 add residium high */ - add edi, 32 /* residue += 4 */ - paddsw mm7, mm5 /* #2 add residium low */ - sub ecx, 1 /* update loop counter */ - packuswb mm7, mm2 /* #2 final row */ - lea esi, [esi+eax*2] /* src += stride * 2 */ - movq [edx + ebx], mm7 /* #2 write row */ - lea edx, [edx+ebx*2] /* dst += stride * 2 */ - jne nextchunk - } -} - - -void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride, - const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue){ - /* --------------------------------------------------------------------- - This function does the inter2 reconstruction step.The building of the - average is done with a bit-twiddeling trick to avoid excessive register - copy work during byte to word conversion. - - average = (a & b) + (((a ^ b) & 0xfe) >> 1); - - (shown for a single byte; it's done with 8 of them at a time) - - Slightly faster than the obvious method using add and shift, but not - earthshaking improvement either. - - If anyone comes up with a way that produces bit-identical outputs - using the pavgb instruction let me know and I'll do the 3dnow codepath. - --------------------------------------------------------------------- */ - _asm{ - mov eax, 0xfefefefe - mov esi, [_src1] - mov edi, [_src2] - movd mm1, eax - mov ebx, [_residue] - mov edx, [_dst] - mov eax, [_dst_ystride] - punpckldq mm1, mm1 /* replicate lsb32 */ - mov ecx, 8 /* init loop counter */ - pxor mm0, mm0 /* constant zero */ - sub edx, eax /* dst -= dst_stride */ - - align 16 - -nextrow: - movq mm2, [esi] /* load source1 */ - movq mm3, [edi] /* load source2 */ - movq mm5, [ebx + 0] /* load lower residue */ - movq mm6, [ebx + 8] /* load higer residue */ - add esi, _src1_ystride /* src1 += src1_stride */ - add edi, _src2_ystride /* src2 += src1_stride */ - movq mm4, mm2 /* get copy of source1 */ - pand mm2, mm3 /* s1 & s2 (avg part) */ - pxor mm3, mm4 /* s1 ^ s2 (avg part) */ - add ebx, 16 /* residue++ */ - pand mm3, mm1 /* mask out low bits */ - psrlq mm3, 1 /* shift xor avg-part */ - paddd mm3, mm2 /* build final average */ - add edx, eax /* dst += dst_stride */ - movq mm2, mm3 /* get copy of average */ - punpckhbw mm3, mm0 /* average high */ - punpcklbw mm2, mm0 /* average low */ - paddsw mm3, mm6 /* high + residue */ - paddsw mm2, mm5 /* low + residue */ - sub ecx, 1 /* update loop counter */ - packuswb mm2, mm3 /* pack and saturate */ - movq [edx], mm2 /* write row */ - jne nextrow - } -} - -void oc_restore_fpu_mmx(void){ - _asm { emms } -} - -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c deleted file mode 100644 index 2c171594f..000000000 --- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c +++ /dev/null @@ -1,1006 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: - - ********************************************************************/ - -/* ------------------------------------------------------------------- - MMX based IDCT for the theora codec. - - Originally written by Rudolf Marek, based on code from On2's VP3. - Converted to Visual Studio inline assembly by Nils Pipenbrinck. - - ---------------------------------------------------------------------*/ -#if defined(USE_ASM) - -#include -#include "../dct.h" -#include "../idct.h" -#include "x86int.h" - -/*A table of constants used by the MMX routines.*/ -static const __declspec(align(16)) ogg_uint16_t - OC_IDCT_CONSTS[(7+1)*4]={ - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - 8, 8, 8, 8 -}; - - -void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){ - _asm { - mov edx, [_y] - mov eax, offset OC_IDCT_CONSTS - movq mm2, [edx + 30H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 18H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 10H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 38H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 20H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 28H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 10H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 20H], mm6 - movq mm2, mm0 - movq mm6, [edx] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 08H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 10H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - movq mm3, [edx + 20H] - psubw mm4, mm7 - paddw mm1, mm1 - paddw mm7, mm7 - paddw mm1, mm2 - paddw mm7, mm4 - psubw mm4, mm3 - paddw mm3, mm3 - psubw mm6, mm5 - paddw mm5, mm5 - paddw mm3, mm4 - paddw mm5, mm6 - psubw mm7, mm0 - paddw mm0, mm0 - movq [edx + 10H], mm1 - paddw mm0, mm7 - movq mm1, mm4 - punpcklwd mm4, mm5 - movq [edx], mm0 - punpckhwd mm1, mm5 - movq mm0, mm6 - punpcklwd mm6, mm7 - movq mm5, mm4 - punpckldq mm4, mm6 - punpckhdq mm5, mm6 - movq mm6, mm1 - movq [edx + 08H], mm4 - punpckhwd mm0, mm7 - movq [edx + 18H], mm5 - punpckhdq mm6, mm0 - movq mm4, [edx] - punpckldq mm1, mm0 - movq mm5, [edx + 10H] - movq mm0, mm4 - movq [edx + 38H], mm6 - punpcklwd mm0, mm5 - movq [edx + 28H], mm1 - punpckhwd mm4, mm5 - movq mm5, mm2 - punpcklwd mm2, mm3 - movq mm1, mm0 - punpckldq mm0, mm2 - punpckhdq mm1, mm2 - movq mm2, mm4 - movq [edx], mm0 - punpckhwd mm5, mm3 - movq [edx + 10H], mm1 - punpckhdq mm4, mm5 - punpckldq mm2, mm5 - movq [edx + 30H], mm4 - movq [edx + 20H], mm2 - movq mm2, [edx + 70H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 58H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 50H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 78H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 60H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 68H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 50H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 60H], mm6 - movq mm2, mm0 - movq mm6, [edx + 40H] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 48H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 50H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - movq mm3, [edx + 60H] - psubw mm4, mm7 - paddw mm1, mm1 - paddw mm7, mm7 - paddw mm1, mm2 - paddw mm7, mm4 - psubw mm4, mm3 - paddw mm3, mm3 - psubw mm6, mm5 - paddw mm5, mm5 - paddw mm3, mm4 - paddw mm5, mm6 - psubw mm7, mm0 - paddw mm0, mm0 - movq [edx + 50H], mm1 - paddw mm0, mm7 - movq mm1, mm4 - punpcklwd mm4, mm5 - movq [edx + 40H], mm0 - punpckhwd mm1, mm5 - movq mm0, mm6 - punpcklwd mm6, mm7 - movq mm5, mm4 - punpckldq mm4, mm6 - punpckhdq mm5, mm6 - movq mm6, mm1 - movq [edx + 48H], mm4 - punpckhwd mm0, mm7 - movq [edx + 58H], mm5 - punpckhdq mm6, mm0 - movq mm4, [edx + 40H] - punpckldq mm1, mm0 - movq mm5, [edx + 50H] - movq mm0, mm4 - movq [edx + 78H], mm6 - punpcklwd mm0, mm5 - movq [edx + 68H], mm1 - punpckhwd mm4, mm5 - movq mm5, mm2 - punpcklwd mm2, mm3 - movq mm1, mm0 - punpckldq mm0, mm2 - punpckhdq mm1, mm2 - movq mm2, mm4 - movq [edx + 40H], mm0 - punpckhwd mm5, mm3 - movq [edx + 50H], mm1 - punpckhdq mm4, mm5 - punpckldq mm2, mm5 - movq [edx + 70H], mm4 - movq [edx + 60H], mm2 - movq mm2, [edx + 30H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 50H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 10H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 70H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 20H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 60H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 10H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 20H], mm6 - movq mm2, mm0 - movq mm6, [edx] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 40H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 10H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - paddw mm2, [eax + 38H] - paddw mm1, mm1 - paddw mm1, mm2 - psraw mm2, 4 - psubw mm4, mm7 - psraw mm1, 4 - movq mm3, [edx + 20H] - paddw mm7, mm7 - movq [edx + 20H], mm2 - paddw mm7, mm4 - movq [edx + 10H], mm1 - psubw mm4, mm3 - paddw mm4, [eax + 38H] - paddw mm3, mm3 - paddw mm3, mm4 - psraw mm4, 4 - psubw mm6, mm5 - psraw mm3, 4 - paddw mm6, [eax + 38H] - paddw mm5, mm5 - paddw mm5, mm6 - psraw mm6, 4 - movq [edx + 40H], mm4 - psraw mm5, 4 - movq [edx + 30H], mm3 - psubw mm7, mm0 - paddw mm7, [eax + 38H] - paddw mm0, mm0 - paddw mm0, mm7 - psraw mm7, 4 - movq [edx + 60H], mm6 - psraw mm0, 4 - movq [edx + 50H], mm5 - movq [edx + 70H], mm7 - movq [edx], mm0 - movq mm2, [edx + 38H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 58H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 18H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 78H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 28H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 68H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 18H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 28H], mm6 - movq mm2, mm0 - movq mm6, [edx + 08H] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 48H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 18H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - paddw mm2, [eax + 38H] - paddw mm1, mm1 - paddw mm1, mm2 - psraw mm2, 4 - psubw mm4, mm7 - psraw mm1, 4 - movq mm3, [edx + 28H] - paddw mm7, mm7 - movq [edx + 28H], mm2 - paddw mm7, mm4 - movq [edx + 18H], mm1 - psubw mm4, mm3 - paddw mm4, [eax + 38H] - paddw mm3, mm3 - paddw mm3, mm4 - psraw mm4, 4 - psubw mm6, mm5 - psraw mm3, 4 - paddw mm6, [eax + 38H] - paddw mm5, mm5 - paddw mm5, mm6 - psraw mm6, 4 - movq [edx + 48H], mm4 - psraw mm5, 4 - movq [edx + 38H], mm3 - psubw mm7, mm0 - paddw mm7, [eax + 38H] - paddw mm0, mm0 - paddw mm0, mm7 - psraw mm7, 4 - movq [edx + 68H], mm6 - psraw mm0, 4 - movq [edx + 58H], mm5 - movq [edx + 78H], mm7 - movq [edx + 08H], mm0 - /* emms */ - } -} - - -void oc_idct8x8_mmx(ogg_int16_t _y[64]){ - _asm { - mov edx, [_y] - mov eax, offset OC_IDCT_CONSTS - movq mm2, [edx + 30H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 18H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 10H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 38H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 20H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 28H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 10H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 20H], mm6 - movq mm2, mm0 - movq mm6, [edx] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 08H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 10H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - movq mm3, [edx + 20H] - psubw mm4, mm7 - paddw mm1, mm1 - paddw mm7, mm7 - paddw mm1, mm2 - paddw mm7, mm4 - psubw mm4, mm3 - paddw mm3, mm3 - psubw mm6, mm5 - paddw mm5, mm5 - paddw mm3, mm4 - paddw mm5, mm6 - psubw mm7, mm0 - paddw mm0, mm0 - movq [edx + 10H], mm1 - paddw mm0, mm7 - movq mm1, mm4 - punpcklwd mm4, mm5 - movq [edx], mm0 - punpckhwd mm1, mm5 - movq mm0, mm6 - punpcklwd mm6, mm7 - movq mm5, mm4 - punpckldq mm4, mm6 - punpckhdq mm5, mm6 - movq mm6, mm1 - movq [edx + 08H], mm4 - punpckhwd mm0, mm7 - movq [edx + 18H], mm5 - punpckhdq mm6, mm0 - movq mm4, [edx] - punpckldq mm1, mm0 - movq mm5, [edx + 10H] - movq mm0, mm4 - movq [edx + 38H], mm6 - punpcklwd mm0, mm5 - movq [edx + 28H], mm1 - punpckhwd mm4, mm5 - movq mm5, mm2 - punpcklwd mm2, mm3 - movq mm1, mm0 - punpckldq mm0, mm2 - punpckhdq mm1, mm2 - movq mm2, mm4 - movq [edx], mm0 - punpckhwd mm5, mm3 - movq [edx + 10H], mm1 - punpckhdq mm4, mm5 - punpckldq mm2, mm5 - movq [edx + 30H], mm4 - movq [edx + 20H], mm2 - movq mm2, [edx + 70H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 58H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 50H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 78H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 60H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 68H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 50H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 60H], mm6 - movq mm2, mm0 - movq mm6, [edx + 40H] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 48H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 50H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - movq mm3, [edx + 60H] - psubw mm4, mm7 - paddw mm1, mm1 - paddw mm7, mm7 - paddw mm1, mm2 - paddw mm7, mm4 - psubw mm4, mm3 - paddw mm3, mm3 - psubw mm6, mm5 - paddw mm5, mm5 - paddw mm3, mm4 - paddw mm5, mm6 - psubw mm7, mm0 - paddw mm0, mm0 - movq [edx + 50H], mm1 - paddw mm0, mm7 - movq mm1, mm4 - punpcklwd mm4, mm5 - movq [edx + 40H], mm0 - punpckhwd mm1, mm5 - movq mm0, mm6 - punpcklwd mm6, mm7 - movq mm5, mm4 - punpckldq mm4, mm6 - punpckhdq mm5, mm6 - movq mm6, mm1 - movq [edx + 48H], mm4 - punpckhwd mm0, mm7 - movq [edx + 58H], mm5 - punpckhdq mm6, mm0 - movq mm4, [edx + 40H] - punpckldq mm1, mm0 - movq mm5, [edx + 50H] - movq mm0, mm4 - movq [edx + 78H], mm6 - punpcklwd mm0, mm5 - movq [edx + 68H], mm1 - punpckhwd mm4, mm5 - movq mm5, mm2 - punpcklwd mm2, mm3 - movq mm1, mm0 - punpckldq mm0, mm2 - punpckhdq mm1, mm2 - movq mm2, mm4 - movq [edx + 40H], mm0 - punpckhwd mm5, mm3 - movq [edx + 50H], mm1 - punpckhdq mm4, mm5 - punpckldq mm2, mm5 - movq [edx + 70H], mm4 - movq [edx + 60H], mm2 - movq mm2, [edx + 30H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 50H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 10H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 70H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 20H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 60H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 10H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 20H], mm6 - movq mm2, mm0 - movq mm6, [edx] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 40H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 10H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - paddw mm2, [eax + 38H] - paddw mm1, mm1 - paddw mm1, mm2 - psraw mm2, 4 - psubw mm4, mm7 - psraw mm1, 4 - movq mm3, [edx + 20H] - paddw mm7, mm7 - movq [edx + 20H], mm2 - paddw mm7, mm4 - movq [edx + 10H], mm1 - psubw mm4, mm3 - paddw mm4, [eax + 38H] - paddw mm3, mm3 - paddw mm3, mm4 - psraw mm4, 4 - psubw mm6, mm5 - psraw mm3, 4 - paddw mm6, [eax + 38H] - paddw mm5, mm5 - paddw mm5, mm6 - psraw mm6, 4 - movq [edx + 40H], mm4 - psraw mm5, 4 - movq [edx + 30H], mm3 - psubw mm7, mm0 - paddw mm7, [eax + 38H] - paddw mm0, mm0 - paddw mm0, mm7 - psraw mm7, 4 - movq [edx + 60H], mm6 - psraw mm0, 4 - movq [edx + 50H], mm5 - movq [edx + 70H], mm7 - movq [edx], mm0 - movq mm2, [edx + 38H] - movq mm6, [eax + 10H] - movq mm4, mm2 - movq mm7, [edx + 58H] - pmulhw mm4, mm6 - movq mm1, [eax + 20H] - pmulhw mm6, mm7 - movq mm5, mm1 - pmulhw mm1, mm2 - movq mm3, [edx + 18H] - pmulhw mm5, mm7 - movq mm0, [eax] - paddw mm4, mm2 - paddw mm6, mm7 - paddw mm2, mm1 - movq mm1, [edx + 78H] - paddw mm7, mm5 - movq mm5, mm0 - pmulhw mm0, mm3 - paddw mm4, mm7 - pmulhw mm5, mm1 - movq mm7, [eax + 30H] - psubw mm6, mm2 - paddw mm0, mm3 - pmulhw mm3, mm7 - movq mm2, [edx + 28H] - pmulhw mm7, mm1 - paddw mm5, mm1 - movq mm1, mm2 - pmulhw mm2, [eax + 08H] - psubw mm3, mm5 - movq mm5, [edx + 68H] - paddw mm0, mm7 - movq mm7, mm5 - psubw mm0, mm4 - pmulhw mm5, [eax + 08H] - paddw mm2, mm1 - pmulhw mm1, [eax + 28H] - paddw mm4, mm4 - paddw mm4, mm0 - psubw mm3, mm6 - paddw mm5, mm7 - paddw mm6, mm6 - pmulhw mm7, [eax + 28H] - paddw mm6, mm3 - movq [edx + 18H], mm4 - psubw mm1, mm5 - movq mm4, [eax + 18H] - movq mm5, mm3 - pmulhw mm3, mm4 - paddw mm7, mm2 - movq [edx + 28H], mm6 - movq mm2, mm0 - movq mm6, [edx + 08H] - pmulhw mm0, mm4 - paddw mm5, mm3 - movq mm3, [edx + 48H] - psubw mm5, mm1 - paddw mm2, mm0 - psubw mm6, mm3 - movq mm0, mm6 - pmulhw mm6, mm4 - paddw mm3, mm3 - paddw mm1, mm1 - paddw mm3, mm0 - paddw mm1, mm5 - pmulhw mm4, mm3 - paddw mm6, mm0 - psubw mm6, mm2 - paddw mm2, mm2 - movq mm0, [edx + 18H] - paddw mm2, mm6 - paddw mm4, mm3 - psubw mm2, mm1 - paddw mm2, [eax + 38H] - paddw mm1, mm1 - paddw mm1, mm2 - psraw mm2, 4 - psubw mm4, mm7 - psraw mm1, 4 - movq mm3, [edx + 28H] - paddw mm7, mm7 - movq [edx + 28H], mm2 - paddw mm7, mm4 - movq [edx + 18H], mm1 - psubw mm4, mm3 - paddw mm4, [eax + 38H] - paddw mm3, mm3 - paddw mm3, mm4 - psraw mm4, 4 - psubw mm6, mm5 - psraw mm3, 4 - paddw mm6, [eax + 38H] - paddw mm5, mm5 - paddw mm5, mm6 - psraw mm6, 4 - movq [edx + 48H], mm4 - psraw mm5, 4 - movq [edx + 38H], mm3 - psubw mm7, mm0 - paddw mm7, [eax + 38H] - paddw mm0, mm0 - paddw mm0, mm7 - psraw mm7, 4 - movq [edx + 68H], mm6 - psraw mm0, 4 - movq [edx + 58H], mm5 - movq [edx + 78H], mm7 - movq [edx + 08H], mm0 - /* emms */ - } -} - -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c deleted file mode 100644 index 62d06dc89..000000000 --- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c +++ /dev/null @@ -1,377 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: - - ********************************************************************/ - -/* ------------------------------------------------------------------- - MMX based loop filter for the theora codec. - - Originally written by Rudolf Marek, based on code from On2's VP3. - Converted to Visual Studio inline assembly by Nils Pipenbrinck. - - Note: I can't test these since my example files never get into the - loop filters, but the code has been converted semi-automatic from - the GCC sources, so it ought to work. - ---------------------------------------------------------------------*/ -#include "../../internal.h" -#include "x86int.h" -#include - -#if defined(USE_ASM) - - - -static void loop_filter_v(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - _asm { - mov eax, [_pix] - mov edx, [_ystride] - mov ebx, [_ll] - - /* _pix -= ystride */ - sub eax, edx - /* mm0=0 */ - pxor mm0, mm0 - /* _pix -= ystride */ - sub eax, edx - /* esi=_ystride*3 */ - lea esi, [edx + edx*2] - - /* mm7=_pix[0...8]*/ - movq mm7, [eax] - /* mm4=_pix[0...8+_ystride*3]*/ - movq mm4, [eax + esi] - /* mm6=_pix[0...8]*/ - movq mm6, mm7 - /* Expand unsigned _pix[0...3] to 16 bits.*/ - punpcklbw mm6, mm0 - movq mm5, mm4 - /* Expand unsigned _pix[4...7] to 16 bits.*/ - punpckhbw mm7, mm0 - punpcklbw mm4, mm0 - /* Expand other arrays too.*/ - punpckhbw mm5, mm0 - /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/ - psubw mm6, mm4 - psubw mm7, mm5 - /*mm5=mm4=_pix[0...7+_ystride]*/ - movq mm4, [eax + edx] - /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/ - movq mm2, [eax + edx*2] - movq mm5, mm4 - movq mm3, mm2 - movq mm1, mm2 - /*Expand these arrays.*/ - punpckhbw mm5, mm0 - punpcklbw mm4, mm0 - punpckhbw mm3, mm0 - punpcklbw mm2, mm0 - pcmpeqw mm0, mm0 - /*mm0=3 3 3 3 - mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/ - psubw mm3, mm5 - psrlw mm0, 14 - psubw mm2, mm4 - /*Scale by 3.*/ - pmullw mm3, mm0 - pmullw mm2, mm0 - /*mm0=4 4 4 4 - f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+ - 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/ - psrlw mm0, 1 - paddw mm3, mm7 - psllw mm0, 2 - paddw mm2, mm6 - /*Add 4.*/ - paddw mm3, mm0 - paddw mm2, mm0 - /*"Divide" by 8.*/ - psraw mm3, 3 - psraw mm2, 3 - /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/ - /*Free up mm5.*/ - packuswb mm4, mm5 - /*mm0=L L L L*/ - movq mm0, [ebx] - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - movq mm5, mm2 - pxor mm6, mm6 - movq mm7, mm0 - psubw mm6, mm0 - psllw mm7, 1 - psllw mm6, 1 - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - pcmpgtw mm7, mm2 - pcmpgtw mm5, mm6 - pand mm2, mm7 - movq mm7, mm0 - pand mm2, mm5 - psllw mm7, 1 - movq mm5, mm3 - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - pcmpgtw mm7, mm3 - pcmpgtw mm5, mm6 - pand mm3, mm7 - movq mm7, mm0 - pand mm3, mm5 - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - psraw mm6, 1 - movq mm5, mm2 - psllw mm7, 1 - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm5=R_i>L?FF:00*/ - pcmpgtw mm5, mm0 - /*mm6=-L>R_i?FF:00*/ - pcmpgtw mm6, mm2 - /*mm7=R_i>L?2L:0*/ - pand mm7, mm5 - /*mm2=R_i>L?R_i-2L:R_i*/ - psubw mm2, mm7 - movq mm7, mm0 - /*mm5=-L>R_i||R_i>L*/ - por mm5, mm6 - psllw mm7, 1 - /*mm7=-L>R_i?2L:0*/ - pand mm7, mm6 - pxor mm6, mm6 - /*mm2=-L>R_i?R_i+2L:R_i*/ - paddw mm2, mm7 - psubw mm6, mm0 - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - pand mm5, mm2 - movq mm7, mm0 - /*mm2=-L>R_i||R_i>L?0:R_i*/ - psubw mm2, mm5 - psllw mm7, 1 - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - psubw mm2, mm5 - movq mm5, mm3 - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm6=-L>R_i?FF:00*/ - pcmpgtw mm6, mm3 - /*mm5=R_i>L?FF:00*/ - pcmpgtw mm5, mm0 - /*mm7=R_i>L?2L:0*/ - pand mm7, mm5 - /*mm2=R_i>L?R_i-2L:R_i*/ - psubw mm3, mm7 - psllw mm0, 1 - /*mm5=-L>R_i||R_i>L*/ - por mm5, mm6 - /*mm0=-L>R_i?2L:0*/ - pand mm0, mm6 - /*mm3=-L>R_i?R_i+2L:R_i*/ - paddw mm3, mm0 - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - pand mm5, mm3 - /*mm2=-L>R_i||R_i>L?0:R_i*/ - psubw mm3, mm5 - /*mm3=-L>R_i||R_i>L?-R_i':R_i*/ - psubw mm3, mm5 - /*Unfortunately, there's no unsigned byte+signed byte with unsigned - saturation op code, so we have to promote things back 16 bits.*/ - pxor mm0, mm0 - movq mm5, mm4 - punpcklbw mm4, mm0 - punpckhbw mm5, mm0 - movq mm6, mm1 - punpcklbw mm1, mm0 - punpckhbw mm6, mm0 - /*_pix[0...8+_ystride]+=R_i*/ - paddw mm4, mm2 - paddw mm5, mm3 - /*_pix[0...8+_ystride*2]-=R_i*/ - psubw mm1, mm2 - psubw mm6, mm3 - packuswb mm4, mm5 - packuswb mm1, mm6 - /*Write it back out.*/ - movq [eax + edx], mm4 - movq [eax + edx*2], mm1 - } -} - -/*This code implements the bulk of loop_filter_h(). - Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all - four p0's to one register we must transpose the values in four mmx regs. - When half is done we repeat this for the rest.*/ -static void loop_filter_h4(unsigned char *_pix,long _ystride, - const ogg_int16_t *_ll){ - /* todo: merge the comments from the GCC sources */ - _asm { - mov ecx, [_pix] - mov edx, [_ystride] - mov eax, [_ll] - /*esi=_ystride*3*/ - lea esi, [edx + edx*2] - - movd mm0, dword ptr [ecx] - movd mm1, dword ptr [ecx + edx] - movd mm2, dword ptr [ecx + edx*2] - movd mm3, dword ptr [ecx + esi] - punpcklbw mm0, mm1 - punpcklbw mm2, mm3 - movq mm1, mm0 - punpckhwd mm0, mm2 - punpcklwd mm1, mm2 - pxor mm7, mm7 - movq mm5, mm1 - punpcklbw mm1, mm7 - punpckhbw mm5, mm7 - movq mm3, mm0 - punpcklbw mm0, mm7 - punpckhbw mm3, mm7 - psubw mm1, mm3 - movq mm4, mm0 - pcmpeqw mm2, mm2 - psubw mm0, mm5 - psrlw mm2, 14 - pmullw mm0, mm2 - psrlw mm2, 1 - paddw mm0, mm1 - psllw mm2, 2 - paddw mm0, mm2 - psraw mm0, 3 - movq mm6, qword ptr [eax] - movq mm1, mm0 - pxor mm2, mm2 - movq mm3, mm6 - psubw mm2, mm6 - psllw mm3, 1 - psllw mm2, 1 - pcmpgtw mm3, mm0 - pcmpgtw mm1, mm2 - pand mm0, mm3 - pand mm0, mm1 - psraw mm2, 1 - movq mm1, mm0 - movq mm3, mm6 - pcmpgtw mm2, mm0 - pcmpgtw mm1, mm6 - psllw mm3, 1 - psllw mm6, 1 - pand mm3, mm1 - pand mm6, mm2 - psubw mm0, mm3 - por mm1, mm2 - paddw mm0, mm6 - pand mm1, mm0 - psubw mm0, mm1 - psubw mm0, mm1 - paddw mm5, mm0 - psubw mm4, mm0 - packuswb mm5, mm7 - packuswb mm4, mm7 - punpcklbw mm5, mm4 - movd edi, mm5 - mov word ptr [ecx + 01H], di - psrlq mm5, 32 - shr edi, 16 - mov word ptr [ecx + edx + 01H], di - movd edi, mm5 - mov word ptr [ecx + edx*2 + 01H], di - shr edi, 16 - mov word ptr [ecx + esi + 01H], di - } -} - -static void loop_filter_h(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - _pix-=2; - loop_filter_h4(_pix,_ystride,_ll); - loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); -} - - -/*We copy the whole function because the MMX routines will be inlined 4 times, - and we can do just a single emms call at the end this way. - We also do not use the _bv lookup table, instead computing the values that - would lie in it on the fly.*/ - -/*Apply the loop filter to a given set of fragment rows in the given plane. - The filter may be run on the bottom edge, affecting pixels in the next row of - fragments, so this row also needs to be available. - _bv: The bounding values array. - _refi: The index of the frame buffer to filter. - _pli: The color plane to filter. - _fragy0: The Y coordinate of the first fragment row to filter. - _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end){ - ogg_int16_t __declspec(align(8)) ll[4]; - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag_top; - oc_fragment *frag0; - oc_fragment *frag; - oc_fragment *frag_end; - oc_fragment *frag0_end; - oc_fragment *frag_bot; - ll[0]=ll[1]=ll[2]=ll[3]= - (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]]; - iplane=_state->ref_frame_bufs[_refi]+_pli; - fplane=_state->fplanes+_pli; - /*The following loops are constructed somewhat non-intuitively on purpose. - The main idea is: if a block boundary has at least one coded fragment on - it, the filter is applied to it. - However, the order that the filters are applied in matters, and VP3 chose - the somewhat strange ordering used below.*/ - frag_top=_state->frags+fplane->froffset; - frag0=frag_top+_fragy0*fplane->nhfrags; - frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags; - frag_bot=_state->frags+fplane->froffset+fplane->nfrags; - while(frag0nhfrags; - while(fragcoded){ - if(frag>frag0){ - loop_filter_h(frag->buffer[_refi],iplane->stride,ll); - } - if(frag0>frag_top){ - loop_filter_v(frag->buffer[_refi],iplane->stride,ll); - } - if(frag+1coded){ - loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll); - } - if(frag+fplane->nhfragsnhfrags)->coded){ - loop_filter_v((frag+fplane->nhfrags)->buffer[_refi], - iplane->stride,ll); - } - } - frag++; - } - frag0+=fplane->nhfrags; - } - - /*This needs to be removed when decode specific functions are implemented:*/ - _mm_empty(); -} - -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c b/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c deleted file mode 100644 index 526ef53f3..000000000 --- a/Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c +++ /dev/null @@ -1,189 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -/* ------------------------------------------------------------------------ - MMX acceleration of complete fragment reconstruction algorithm. - Originally written by Rudolf Marek. - - Conversion to MSC intrinsics by Nils Pipenbrinck. - ---------------------------------------------------------------------*/ -#if defined(USE_ASM) - -#include "../../internal.h" -#include "../idct.h" -#include "x86int.h" -#include - -static const unsigned char OC_FZIG_ZAGMMX[64]= -{ - 0, 8, 1, 2, 9,16,24,17, - 10, 3,32,11,18,25, 4,12, - 5,26,19,40,33,34,41,48, - 27, 6,13,20,28,21,14, 7, - 56,49,42,35,43,50,57,36, - 15,22,29,30,23,44,37,58, - 51,59,38,45,52,31,60,53, - 46,39,47,54,61,62,55,63 -}; - -/* Fill a block with value */ -static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){ - __m64 t = _value; - _dst[0] = t; _dst[1] = t; _dst[2] = t; _dst[3] = t; - _dst[4] = t; _dst[5] = t; _dst[6] = t; _dst[7] = t; - _dst[8] = t; _dst[9] = t; _dst[10] = t; _dst[11] = t; - _dst[12] = t; _dst[13] = t; _dst[14] = t; _dst[15] = t; -} - -/* copy a block of 8 byte elements using different strides */ -static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride, - unsigned char * _src, int _src_ystride){ - __m64 a,b,c,d,e,f,g,h; - a = *(__m64*)(_src + 0 * _src_ystride); - b = *(__m64*)(_src + 1 * _src_ystride); - c = *(__m64*)(_src + 2 * _src_ystride); - d = *(__m64*)(_src + 3 * _src_ystride); - e = *(__m64*)(_src + 4 * _src_ystride); - f = *(__m64*)(_src + 5 * _src_ystride); - g = *(__m64*)(_src + 6 * _src_ystride); - h = *(__m64*)(_src + 7 * _src_ystride); - *(__m64*)(_dst + 0 * _dst_ystride) = a; - *(__m64*)(_dst + 1 * _dst_ystride) = b; - *(__m64*)(_dst + 2 * _dst_ystride) = c; - *(__m64*)(_dst + 3 * _dst_ystride) = d; - *(__m64*)(_dst + 4 * _dst_ystride) = e; - *(__m64*)(_dst + 5 * _dst_ystride) = f; - *(__m64*)(_dst + 6 * _dst_ystride) = g; - *(__m64*)(_dst + 7 * _dst_ystride) = h; -} - -void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){ - ogg_int16_t __declspec(align(16)) res_buf[64]; - int dst_framei; - int dst_ystride; - int zzi; - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - dequantize fewer coefficients and use a smaller transform when the block - ends with a long zero run instead of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ - /*Special case only having a DC component.*/ - if(_last_zzi<2){ - __m64 p; - /*Why is the iquant product rounded in this case and no others? Who knows.*/ - p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5); - /* broadcast 16 bits into all 4 mmx subregisters */ - p = _m_punpcklwd (p,p); - p = _m_punpckldq (p,p); - loc_fill_mmx_value ((__m64 *)res_buf, p); - } - else{ - /*Then, fill in the remainder of the coefficients with 0's, and perform - the iDCT.*/ - /*First zero the buffer.*/ - /*On K7, etc., this could be replaced with movntq and sfence.*/ - loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64()); - - res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant); - /*This is planned to be rewritten in MMX.*/ - for(zzi=1;zzi<_ncoefs;zzi++) - { - int ci; - ci=OC_FZIG_ZAG[zzi]; - res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]* - _ac_iquant[ci]); - } - - if(_last_zzi<10){ - oc_idct8x8_10_mmx(res_buf); - } - else { - oc_idct8x8_mmx(res_buf); - } - } - /*Fill in the target buffer.*/ - dst_framei=_state->ref_frame_idx[OC_FRAME_SELF]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - /*For now ystride values in all ref frames assumed to be equal.*/ - if(_frag->mbmode==OC_MODE_INTRA){ - oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf); - } - else{ - int ref_framei; - int ref_ystride; - int mvoffsets[2]; - ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]]; - ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride; - if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0], - _frag->mv[1],ref_ystride,_pli)>1){ - oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride, - _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf); - } - else{ - oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf); - } - } - - _mm_empty(); -} - - -void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli){ - const int *fragi; - const int *fragi_end; - int dst_framei; - int dst_ystride; - int src_framei; - int src_ystride; - dst_framei=_state->ref_frame_idx[_dst_frame]; - src_framei=_state->ref_frame_idx[_src_frame]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride; - fragi_end=_fragis+_nfragis; - for(fragi=_fragis;fragifrags+*fragi; - loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride, - frag->buffer[src_framei], src_ystride); - } - _m_empty(); -} - -#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h b/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h deleted file mode 100644 index be5016100..000000000 --- a/Engine/lib/libtheora/lib/dec/x86_vc/x86int.h +++ /dev/null @@ -1,49 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#if !defined(_x86_x86int_vc_H) -# define _x86_x86int_vc_H (1) -# include "../../internal.h" - -void oc_state_vtable_init_x86(oc_theora_state *_state); - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue); - -void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); - -void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue); - -void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli); - -void oc_restore_fpu_mmx(void); - -void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); - -void oc_idct8x8_mmx(ogg_int16_t _y[64]); -void oc_idct8x8_10_mmx(ogg_int16_t _y[64]); - -void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end); - -#endif diff --git a/Engine/lib/libtheora/lib/dec/decapiwrapper.c b/Engine/lib/libtheora/lib/decapiwrapper.c similarity index 95% rename from Engine/lib/libtheora/lib/dec/decapiwrapper.c rename to Engine/lib/libtheora/lib/decapiwrapper.c index bceec6c26..12ea475d1 100644 --- a/Engine/lib/libtheora/lib/dec/decapiwrapper.c +++ b/Engine/lib/libtheora/lib/decapiwrapper.c @@ -5,7 +5,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** @@ -19,6 +19,7 @@ #include #include #include "apiwrapper.h" +#include "decint.h" #include "theora/theoradec.h" static void th_dec_api_clear(th_api_wrapper *_api){ @@ -47,7 +48,7 @@ static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){ return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); } -static const oc_state_dispatch_vtbl OC_DEC_DISPATCH_VTBL={ +static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={ (oc_state_clear_func)theora_decode_clear, (oc_state_control_func)theora_decode_control, (oc_state_granule_frame_func)theora_decode_granule_frame, @@ -95,6 +96,7 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){ This avoids having to figure out whether or not we need to free the info struct in either theora_info_clear() or theora_clear().*/ apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo)); + if(apiinfo==NULL)return OC_FAULT; /*Make our own copy of the info struct, since its lifetime should be independent of the one we were passed in.*/ *&apiinfo->info=*_ci; @@ -130,6 +132,7 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){ theora_info struct like the ones that are used in a theora_state struct.*/ if(api==NULL){ _ci->codec_setup=_ogg_calloc(1,sizeof(*api)); + if(_ci->codec_setup==NULL)return OC_FAULT; api=(th_api_wrapper *)_ci->codec_setup; api->clear=(oc_setup_clear_func)th_dec_api_clear; } @@ -167,12 +170,14 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ th_api_wrapper *api; + th_dec_ctx *decode; th_ycbcr_buffer buf; int ret; if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; api=(th_api_wrapper *)_td->i->codec_setup; - if(!api->decode)return OC_FAULT; - ret=th_decode_ycbcr_out(api->decode,buf); + decode=(th_dec_ctx *)api->decode; + if(!decode)return OC_FAULT; + ret=th_decode_ycbcr_out(decode,buf); if(ret>=0){ _yuv->y_width=buf[0].width; _yuv->y_height=buf[0].height; diff --git a/Engine/lib/libtheora/lib/dec/decinfo.c b/Engine/lib/libtheora/lib/decinfo.c similarity index 77% rename from Engine/lib/libtheora/lib/dec/decinfo.c rename to Engine/lib/libtheora/lib/decinfo.c index 3c4ba868a..845eb1361 100644 --- a/Engine/lib/libtheora/lib/dec/decinfo.c +++ b/Engine/lib/libtheora/lib/decinfo.c @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ @@ -27,30 +27,30 @@ _opb: The pack buffer to read the octets from. _buf: The byte array to store the unpacked bytes in. _len: The number of octets to unpack.*/ -static void oc_unpack_octets(oggpack_buffer *_opb,char *_buf,size_t _len){ +static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){ while(_len-->0){ long val; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); *_buf++=(char)val; } } /*Unpacks a 32-bit integer encoded by octets in little-endian form.*/ -static long oc_unpack_length(oggpack_buffer *_opb){ +static long oc_unpack_length(oc_pack_buf *_opb){ long ret[4]; int i; - for(i=0;i<4;i++)theorapackB_read(_opb,8,ret+i); + for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8); return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24; } -static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){ +static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){ long val; /*Check the codec bitstream version.*/ - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); _info->version_major=(unsigned char)val; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); _info->version_minor=(unsigned char)val; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); _info->version_subminor=(unsigned char)val; /*verify we can parse this bitstream version. We accept earlier minors and all subminors, by spec*/ @@ -60,25 +60,21 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){ return TH_EVERSION; } /*Read the encoded frame description.*/ - theorapackB_read(_opb,16,&val); + val=oc_pack_read(_opb,16); _info->frame_width=(ogg_uint32_t)val<<4; - theorapackB_read(_opb,16,&val); + val=oc_pack_read(_opb,16); _info->frame_height=(ogg_uint32_t)val<<4; - theorapackB_read(_opb,24,&val); + val=oc_pack_read(_opb,24); _info->pic_width=(ogg_uint32_t)val; - theorapackB_read(_opb,24,&val); + val=oc_pack_read(_opb,24); _info->pic_height=(ogg_uint32_t)val; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); _info->pic_x=(ogg_uint32_t)val; - /*Note: The sense of pic_y is inverted in what we pass back to the - application compared to how it is stored in the bitstream. - This is because the bitstream uses a right-handed coordinate system, while - applications expect a left-handed one.*/ - theorapackB_read(_opb,8,&val); - _info->pic_y=_info->frame_height-_info->pic_height-(ogg_uint32_t)val; - theorapackB_read(_opb,32,&val); + val=oc_pack_read(_opb,8); + _info->pic_y=(ogg_uint32_t)val; + val=oc_pack_read(_opb,32); _info->fps_numerator=(ogg_uint32_t)val; - theorapackB_read(_opb,32,&val); + val=oc_pack_read(_opb,32); _info->fps_denominator=(ogg_uint32_t)val; if(_info->frame_width==0||_info->frame_height==0|| _info->pic_width+_info->pic_x>_info->frame_width|| @@ -86,38 +82,46 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){ _info->fps_numerator==0||_info->fps_denominator==0){ return TH_EBADHEADER; } - theorapackB_read(_opb,24,&val); + /*Note: The sense of pic_y is inverted in what we pass back to the + application compared to how it is stored in the bitstream. + This is because the bitstream uses a right-handed coordinate system, while + applications expect a left-handed one.*/ + _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y; + val=oc_pack_read(_opb,24); _info->aspect_numerator=(ogg_uint32_t)val; - theorapackB_read(_opb,24,&val); + val=oc_pack_read(_opb,24); _info->aspect_denominator=(ogg_uint32_t)val; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); _info->colorspace=(th_colorspace)val; - theorapackB_read(_opb,24,&val); + val=oc_pack_read(_opb,24); _info->target_bitrate=(int)val; - theorapackB_read(_opb,6,&val); + val=oc_pack_read(_opb,6); _info->quality=(int)val; - theorapackB_read(_opb,5,&val); + val=oc_pack_read(_opb,5); _info->keyframe_granule_shift=(int)val; - theorapackB_read(_opb,2,&val); + val=oc_pack_read(_opb,2); _info->pixel_fmt=(th_pixel_fmt)val; if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER; - if(theorapackB_read(_opb,3,&val)<0||val!=0)return TH_EBADHEADER; + val=oc_pack_read(_opb,3); + if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; return 0; } -static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){ +static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){ long len; int i; /*Read the vendor string.*/ len=oc_unpack_length(_opb); - if(len<0||theorapackB_bytes(_opb)+len>_opb->storage)return TH_EBADHEADER; + if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER; _tc->vendor=_ogg_malloc((size_t)len+1); + if(_tc->vendor==NULL)return TH_EFAULT; oc_unpack_octets(_opb,_tc->vendor,len); _tc->vendor[len]='\0'; /*Read the user comments.*/ _tc->comments=(int)oc_unpack_length(_opb); - if(_tc->comments<0||_tc->comments>(LONG_MAX>>2)|| - theorapackB_bytes(_opb)+((long)_tc->comments<<2)>_opb->storage){ + len=_tc->comments; + if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){ + _tc->comments=0; return TH_EBADHEADER; } _tc->comment_lengths=(int *)_ogg_malloc( @@ -126,19 +130,23 @@ static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){ _tc->comments*sizeof(_tc->user_comments[0])); for(i=0;i<_tc->comments;i++){ len=oc_unpack_length(_opb); - if(len<0||theorapackB_bytes(_opb)+len>_opb->storage){ + if(len<0||len>oc_pack_bytes_left(_opb)){ _tc->comments=i; return TH_EBADHEADER; } _tc->comment_lengths[i]=len; _tc->user_comments[i]=_ogg_malloc((size_t)len+1); + if(_tc->user_comments[i]==NULL){ + _tc->comments=i; + return TH_EFAULT; + } oc_unpack_octets(_opb,_tc->user_comments[i],len); _tc->user_comments[i][len]='\0'; } - return theorapackB_read(_opb,0,&len)<0?TH_EBADHEADER:0; + return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0; } -static int oc_setup_unpack(oggpack_buffer *_opb,th_setup_info *_setup){ +static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){ int ret; /*Read the quantizer tables.*/ ret=oc_quant_params_unpack(_opb,&_setup->qinfo); @@ -152,13 +160,13 @@ static void oc_setup_clear(th_setup_info *_setup){ oc_huff_trees_clear(_setup->huff_tables); } -static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info, +static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info, th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){ char buffer[6]; long val; int packtype; int ret; - theorapackB_read(_opb,8,&val); + val=oc_pack_read(_opb,8); packtype=(int)val; /*If we're at a data packet and we have received all three headers, we're done.*/ @@ -198,6 +206,7 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info, return TH_EBADHEADER; } setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup)); + if(setup==NULL)return TH_EFAULT; ret=oc_setup_unpack(_opb,setup); if(ret<0){ oc_setup_clear(setup); @@ -222,13 +231,11 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info, stream until it returns 0.*/ int th_decode_headerin(th_info *_info,th_comment *_tc, th_setup_info **_setup,ogg_packet *_op){ - oggpack_buffer opb; - int ret; + oc_pack_buf opb; if(_op==NULL)return TH_EBADHEADER; if(_info==NULL)return TH_EFAULT; - theorapackB_readinit(&opb,_op->packet,_op->bytes); - ret=oc_dec_headerin(&opb,_info,_tc,_setup,_op); - return ret; + oc_pack_readinit(&opb,_op->packet,_op->bytes); + return oc_dec_headerin(&opb,_info,_tc,_setup,_op); } void th_setup_free(th_setup_info *_setup){ diff --git a/Engine/lib/libtheora/lib/dec/decint.h b/Engine/lib/libtheora/lib/decint.h similarity index 68% rename from Engine/lib/libtheora/lib/dec/decint.h rename to Engine/lib/libtheora/lib/decint.h index 7924c0e0c..261b67631 100644 --- a/Engine/lib/libtheora/lib/dec/decint.h +++ b/Engine/lib/libtheora/lib/decint.h @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ @@ -19,13 +19,12 @@ #if !defined(_decint_H) # define _decint_H (1) # include "theora/theoradec.h" -# include "../internal.h" +# include "internal.h" # include "bitpack.h" typedef struct th_setup_info oc_setup_info; typedef struct th_dec_ctx oc_dec_ctx; -# include "idct.h" # include "huffdec.h" # include "dequant.h" @@ -54,24 +53,20 @@ struct th_dec_ctx{ when a frame has been processed and a data packet is ready.*/ int packet_state; /*Buffer in which to assemble packets.*/ - oggpack_buffer opb; + oc_pack_buf opb; /*Huffman decode trees.*/ oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; - /*The index of one past the last token in each plane for each coefficient. - The final entries are the total number of tokens for each coefficient.*/ - int ti0[3][64]; - /*The index of one past the last extra bits entry in each plane for each - coefficient. - The final entries are the total number of extra bits entries for each - coefficient.*/ - int ebi0[3][64]; + /*The index of the first token in each plane for each coefficient.*/ + ptrdiff_t ti0[3][64]; /*The number of outstanding EOB runs at the start of each coefficient in each plane.*/ - int eob_runs[3][64]; + ptrdiff_t eob_runs[3][64]; /*The DCT token lists.*/ - unsigned char **dct_tokens; + unsigned char *dct_tokens; /*The extra bits associated with DCT tokens.*/ - ogg_uint16_t **extra_bits; + unsigned char *extra_bits; + /*The number of dct tokens unpacked so far.*/ + int dct_tokens_count; /*The out-of-loop post-processing level.*/ int pp_level; /*The DC scale used for out-of-loop deblocking.*/ @@ -85,11 +80,28 @@ struct th_dec_ctx{ /*The storage for the post-processed frame buffer.*/ unsigned char *pp_frame_data; /*Whether or not the post-processsed frame buffer has space for chroma.*/ - int pp_frame_has_chroma; - /*The buffer used for the post-processed frame.*/ + int pp_frame_state; + /*The buffer used for the post-processed frame. + Note that this is _not_ guaranteed to have the same strides and offsets as + the reference frame buffers.*/ th_ycbcr_buffer pp_frame_buf; /*The striped decode callback function.*/ th_stripe_callback stripe_cb; +# if defined(HAVE_CAIRO) + /*Output metrics for debugging.*/ + int telemetry; + int telemetry_mbmode; + int telemetry_mv; + int telemetry_qi; + int telemetry_bits; + int telemetry_frame_bytes; + int telemetry_coding_bytes; + int telemetry_mode_bytes; + int telemetry_mv_bytes; + int telemetry_qi_bytes; + int telemetry_dc_bytes; + unsigned char *telemetry_frame_data; +# endif }; #endif diff --git a/Engine/lib/libtheora/lib/decode.c b/Engine/lib/libtheora/lib/decode.c new file mode 100644 index 000000000..7be66463d --- /dev/null +++ b/Engine/lib/libtheora/lib/decode.c @@ -0,0 +1,2943 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $ + + ********************************************************************/ + +#include +#include +#include +#include "decint.h" +#if defined(OC_DUMP_IMAGES) +# include +# include "png.h" +#endif +#if defined(HAVE_CAIRO) +# include +#endif + + +/*No post-processing.*/ +#define OC_PP_LEVEL_DISABLED (0) +/*Keep track of DC qi for each block only.*/ +#define OC_PP_LEVEL_TRACKDCQI (1) +/*Deblock the luma plane.*/ +#define OC_PP_LEVEL_DEBLOCKY (2) +/*Dering the luma plane.*/ +#define OC_PP_LEVEL_DERINGY (3) +/*Stronger luma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGY (4) +/*Deblock the chroma planes.*/ +#define OC_PP_LEVEL_DEBLOCKC (5) +/*Dering the chroma planes.*/ +#define OC_PP_LEVEL_DERINGC (6) +/*Stronger chroma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGC (7) +/*Maximum valid post-processing level.*/ +#define OC_PP_LEVEL_MAX (7) + + + +/*The mode alphabets for the various mode coding schemes. + Scheme 0 uses a custom alphabet, which is not stored in this table.*/ +static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={ + /*Last MV dominates */ + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, + OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR + }, + /*No MV dominates.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + /*Default ordering.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + } +}; + + +/*The original DCT tokens are extended and reordered during the construction of + the Huffman tables. + The extension means more bits can be read with fewer calls to the bitpacker + during the Huffman decoding process (at the cost of larger Huffman tables), + and fewer tokens require additional extra bits (reducing the average storage + per decoded token). + The revised ordering reveals essential information in the token value + itself; specifically, whether or not there are additional extra bits to read + and the parameter to which those extra bits are applied. + The token is used to fetch a code word from the OC_DCT_CODE_WORD table below. + The extra bits are added into code word at the bit position inferred from the + token value, giving the final code word from which all required parameters + are derived. + The number of EOBs and the leading zero run length can be extracted directly. + The coefficient magnitude is optionally negated before extraction, according + to a 'flip' bit.*/ + +/*The number of additional extra bits that are decoded with each of the + internal DCT tokens.*/ +static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={ + 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6 +}; + +/*Whether or not an internal token needs any additional extra bits.*/ +#define OC_DCT_TOKEN_NEEDS_MORE(token) \ + (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \ + sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS))) + +/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/ +#define OC_DCT_TOKEN_FAT_EOB (0) + +/*The number of EOBs to use for an end-of-frame token. + Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which + is not yet available everywhere; this should be equivalent.*/ +#define OC_DCT_EOB_FINISH (~(size_t)0>>1) + +/*The location of the (6) run legth bits in the code word. + These are placed at index 0 and given 8 bits (even though 6 would suffice) + because it may be faster to extract the lower byte on some platforms.*/ +#define OC_DCT_CW_RLEN_SHIFT (0) +/*The location of the (12) EOB bits in the code word.*/ +#define OC_DCT_CW_EOB_SHIFT (8) +/*The location of the (1) flip bit in the code word. + This must be right under the magnitude bits.*/ +#define OC_DCT_CW_FLIP_BIT (20) +/*The location of the (11) token magnitude bits in the code word. + These must be last, and rely on a sign-extending right shift.*/ +#define OC_DCT_CW_MAG_SHIFT (21) + +/*Pack the given fields into a code word.*/ +#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \ + ((_eobs)<state,_info,3); + if(ret<0)return ret; + ret=oc_huff_trees_copy(_dec->huff_tables, + (const oc_huff_node *const *)_setup->huff_tables); + if(ret<0){ + oc_state_clear(&_dec->state); + return ret; + } + /*For each fragment, allocate one byte for every DCT coefficient token, plus + one byte for extra-bits for each token, plus one more byte for the long + EOB run, just in case it's the very last token and has a run length of + one.*/ + _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)* + _dec->state.nfrags*sizeof(_dec->dct_tokens[0])); + if(_dec->dct_tokens==NULL){ + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); + return TH_EFAULT; + } + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ + _dec->state.dequant_tables[qi][pli][qti]= + _dec->state.dequant_table_data[qi][pli][qti]; + } + oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, + &_setup->qinfo); + for(qi=0;qi<64;qi++){ + int qsum; + qsum=0; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+ + _dec->state.dequant_tables[qti][pli][qi][17]+ + _dec->state.dequant_tables[qti][pli][qi][18]+ + _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0); + } + _dec->pp_sharp_mod[qi]=-(qsum>>11); + } + memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, + sizeof(_dec->state.loop_filter_limits)); + _dec->pp_level=OC_PP_LEVEL_DISABLED; + _dec->dc_qis=NULL; + _dec->variances=NULL; + _dec->pp_frame_data=NULL; + _dec->stripe_cb.ctx=NULL; + _dec->stripe_cb.stripe_decoded=NULL; +#if defined(HAVE_CAIRO) + _dec->telemetry=0; + _dec->telemetry_bits=0; + _dec->telemetry_qi=0; + _dec->telemetry_mbmode=0; + _dec->telemetry_mv=0; + _dec->telemetry_frame_data=NULL; +#endif + return 0; +} + +static void oc_dec_clear(oc_dec_ctx *_dec){ +#if defined(HAVE_CAIRO) + _ogg_free(_dec->telemetry_frame_data); +#endif + _ogg_free(_dec->pp_frame_data); + _ogg_free(_dec->variances); + _ogg_free(_dec->dc_qis); + _ogg_free(_dec->dct_tokens); + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); +} + + +static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ + long val; + /*Check to make sure this is a data packet.*/ + val=oc_pack_read1(&_dec->opb); + if(val!=0)return TH_EBADPACKET; + /*Read in the frame type (I or P).*/ + val=oc_pack_read1(&_dec->opb); + _dec->state.frame_type=(int)val; + /*Read in the qi list.*/ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[0]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=1; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[1]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=2; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[2]=(unsigned char)val; + _dec->state.nqis=3; + } + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. + Most of the other unused bits in the VP3 headers were eliminated. + I don't know why these remain.*/ + /*I wanted to eliminate wasted bits, but not all config wiggle room + --Monty.*/ + val=oc_pack_read(&_dec->opb,3); + if(val!=0)return TH_EIMPL; + } + return 0; +} + +/*Mark all fragments as coded and in OC_MODE_INTRA. + This also builds up the coded fragment list (in coded order), and clears the + uncoded fragment list. + It does not update the coded macro block list nor the super block flags, as + those are not used when decoding INTRA frames.*/ +static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ + const oc_sb_map *sb_maps; + const oc_sb_flags *sb_flags; + oc_fragment *frags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t prev_ncoded_fragis; + unsigned nsbs; + unsigned sbi; + int pli; + coded_fragis=_dec->state.coded_fragis; + prev_ncoded_fragis=ncoded_fragis=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + frags[fragi].coded=1; + frags[fragi].mb_mode=OC_MODE_INTRA; + coded_fragis[ncoded_fragis++]=fragi; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; +} + +/*Decodes the bit flags indicating whether each super block is partially coded + or not. + Return: The number of partially coded super blocks.*/ +static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned npartial; + unsigned run_count; + long val; + int flag; + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + sbi=npartial=0; + while(sbiopb); + full_run=run_count>=4129; + do{ + sb_flags[sbi].coded_partially=flag; + sb_flags[sbi].coded_fully=0; + npartial+=flag; + sbi++; + } + while(--run_count>0&&sbiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + return npartial; +} + +/*Decodes the bit flags for whether or not each non-partially-coded super + block is fully coded or not. + This function should only be called if there is at least one + non-partially-coded super block. + Return: The number of partially coded super blocks.*/ +static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned run_count; + long val; + int flag; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + /*Skip partially coded super blocks.*/ + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;sbiopb); + flag=(int)val; + } + else flag=!flag; + } + while(sbistate.nsbs)oc_dec_coded_sb_flags_unpack(_dec); + if(npartial>0){ + val=oc_pack_read1(&_dec->opb); + flag=!(int)val; + } + else flag=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=run_count=0; + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + int coded; + if(sb_flags[sbi].coded_fully)coded=1; + else if(!sb_flags[sbi].coded_partially)coded=0; + else{ + if(run_count<=0){ + run_count=oc_block_run_unpack(&_dec->opb); + flag=!flag; + } + run_count--; + coded=flag; + } + if(coded)coded_fragis[ncoded_fragis++]=fragi; + else *(uncoded_fragis-++nuncoded_fragis)=fragi; + frags[fragi].coded=coded; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ +} + + + +typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mode_unpack(oc_pack_buf *_opb){ + long val; + int i; + for(i=0;i<7;i++){ + val=oc_pack_read1(_opb); + if(!val)break; + } + return i; +} + +static int oc_clc_mode_unpack(oc_pack_buf *_opb){ + long val; + val=oc_pack_read(_opb,3); + return (int)val; +} + +/*Unpacks the list of macro block modes for INTER frames.*/ +static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + signed char *mb_modes; + const oc_fragment *frags; + const unsigned char *alphabet; + unsigned char scheme0_alphabet[8]; + oc_mode_unpack_func mode_unpack; + size_t nmbs; + size_t mbi; + long val; + int mode_scheme; + val=oc_pack_read(&_dec->opb,3); + mode_scheme=(int)val; + if(mode_scheme==0){ + int mi; + /*Just in case, initialize the modes to something. + If the bitstream doesn't contain each index exactly once, it's likely + corrupt and the rest of the packet is garbage anyway, but this way we + won't crash, and we'll decode SOMETHING.*/ + /*LOOP VECTORIZES*/ + for(mi=0;miopb,3); + scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; + } + alphabet=scheme0_alphabet; + } + else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; + if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; + else mode_unpack=oc_vlc_mode_unpack; + mb_modes=_dec->state.mb_modes; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + frags=_dec->state.frags; + for(mbi=0;mbiopb)]; + /*There were none: INTER_NOMV is forced.*/ + else mb_modes[mbi]=OC_MODE_INTER_NOMV; + } + } +} + + + +typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,3); + switch(bits){ + case 0:return 0; + case 1:return 1; + case 2:return -1; + case 3: + case 4:{ + mv=(int)(bits-1); + bits=oc_pack_read1(_opb); + }break; + /*case 5: + case 6: + case 7:*/ + default:{ + mv=1<>1); + bits&=1; + }break; + } + mask=-(int)bits; + return mv+mask^mask; +} + +static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,6); + mv=(int)bits>>1; + mask=-((int)bits&1); + return mv+mask^mask; +} + +/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro + block modes and motion vectors to the individual fragments.*/ +static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + const signed char *mb_modes; + oc_set_chroma_mvs_func set_chroma_mvs; + oc_mv_comp_unpack_func mv_comp_unpack; + oc_fragment *frags; + oc_mv *frag_mvs; + const unsigned char *map_idxs; + int map_nidxs; + oc_mv last_mv[2]; + oc_mv cbmvs[4]; + size_t nmbs; + size_t mbi; + long val; + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; + val=oc_pack_read1(&_dec->opb); + mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; + map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; + memset(last_mv,0,sizeof(last_mv)); + frags=_dec->state.frags; + frag_mvs=_dec->state.frag_mvs; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + mb_modes=_dec->state.mb_modes; + nmbs=_dec->state.nmbs; + for(mbi=0;mbi>2][mapi&3]; + if(frags[fragi].coded)coded[ncoded++]=mapi; + } + while(++mapiiopb); + lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi])); + } + else lbmvs[bi][0]=lbmvs[bi][1]=0; + } + if(codedi>0){ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0])); + } + if(codedi>2][bi]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi])); + } + } + }break; + case OC_MODE_INTER_MV:{ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break; + case OC_MODE_INTER_MV_LAST2:{ + memcpy(mbmv,last_mv[1],sizeof(mbmv)); + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],mbmv,sizeof(last_mv[0])); + }break; + case OC_MODE_GOLDEN_MV:{ + mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + default:memset(mbmv,0,sizeof(mbmv));break; + } + /*4MV mode fills in the fragments itself. + For all other modes we can use this common code.*/ + if(mb_mode!=OC_MODE_INTER_MV_FOUR){ + for(codedi=0;codedi>2][mapi&3]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv)); + } + } + } + } +} + +static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ + oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t fragi; + ncoded_fragis=_dec->state.ntotal_coded_fragis; + if(ncoded_fragis<=0)return; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + if(_dec->state.nqis==1){ + /*If this frame has only a single qi value, then just use it for all coded + fragments.*/ + for(fragii=0;fragiiopb); + flag=(int)val; + nqi1=0; + fragii=0; + while(fragiiopb); + full_run=run_count>=4129; + do{ + frags[coded_fragis[fragii++]].qii=flag; + nqi1+=flag; + } + while(--run_count>0&&fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + /*If we have 3 different qi's for this frame, and there was at least one + fragment with a non-zero qi, make the second pass.*/ + if(_dec->state.nqis==3&&nqi1>0){ + /*Skip qii==0 fragments.*/ + for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + while(fragiidct_tokens; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=fragii=eobs=ti=0; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + ptrdiff_t eobi; + int rli; + ncoded_fragis+=_dec->state.ncoded_fragis[pli]; + memset(run_counts,0,sizeof(run_counts)); + _dec->eob_runs[pli][0]=eobs; + _dec->ti0[pli][0]=ti; + /*Continue any previous EOB run, if there was one.*/ + eobi=eobs; + if(ncoded_fragis-fragii0)frags[coded_fragis[fragii++]].dc=0; + while(fragiiopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH; + if(eobs){ + eobi=OC_MINI(eobs,ncoded_fragis-fragii); + eob_count+=eobi; + eobs-=eobi; + while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; + } + else{ + int coeff; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + if(skip)coeff=0; + run_counts[skip]++; + frags[coded_fragis[fragii++]].dc=coeff; + } + } + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return eobs; +} + +/*Unpacks the AC coefficient tokens. + This can completely discard coefficient values while unpacking, and so is + somewhat simpler than unpacking the DC coefficient tokens. + _huff_idx: The index of the Huffman table to use for each color plane. + _ntoks_left: The number of tokens left to be decoded in each color plane for + each coefficient. + This is updated as EOB tokens and zero run tokens are decoded. + _eobs: The length of any outstanding EOB run from previous + coefficients. + Return: The length of any outstanding EOB run.*/ +static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2], + ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){ + unsigned char *dct_tokens; + ptrdiff_t ti; + int pli; + dct_tokens=_dec->dct_tokens; + ti=_dec->dct_tokens_count; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + size_t ntoks_left; + size_t ntoks; + int rli; + _dec->eob_runs[pli][_zzi]=_eobs; + _dec->ti0[pli][_zzi]=ti; + ntoks_left=_ntoks_left[pli][_zzi]; + memset(run_counts,0,sizeof(run_counts)); + eob_count=0; + ntoks=0; + while(ntoks+_eobsopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH; + if(_eobs==0){ + run_counts[skip]++; + ntoks++; + } + } + /*Add the portion of the last EOB run actually used by this coefficient.*/ + eob_count+=ntoks_left-ntoks; + /*And remove it from the remaining EOB count.*/ + _eobs-=ntoks_left-ntoks; + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return _eobs; +} + +/*Tokens describing the DCT coefficients that belong to each fragment are + stored in the bitstream grouped by coefficient, not by fragment. + + This means that we either decode all the tokens in order, building up a + separate coefficient list for each fragment as we go, and then go back and + do the iDCT on each fragment, or we have to create separate lists of tokens + for each coefficient, so that we can pull the next token required off the + head of the appropriate list when decoding a specific fragment. + + The former was VP3's choice, and it meant 2*w*h extra storage for all the + decoded coefficient values. + + We take the second option, which lets us store just one to three bytes per + token (generally far fewer than the number of coefficients, due to EOB + tokens and zero runs), and which requires us to only maintain a counter for + each of the 64 coefficients, instead of a counter for every fragment to + determine where the next token goes. + + We actually use 3 counters per coefficient, one for each color plane, so we + can decode all color planes simultaneously. + This lets color conversion, etc., be done as soon as a full MCU (one or + two super block rows) is decoded, while the image data is still in cache.*/ + +static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ + static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; + ptrdiff_t ntoks_left[3][64]; + int huff_idxs[2]; + ptrdiff_t eobs; + long val; + int pli; + int zzi; + int hgi; + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; + } + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + _dec->eob_runs[0][0]=0; + eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); +#if defined(HAVE_CAIRO) + _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + zzi=1; + for(hgi=1;hgi<5;hgi++){ + huff_idxs[0]+=16; + huff_idxs[1]+=16; + for(;zzipp_level<=OC_PP_LEVEL_DISABLED){ + if(_dec->dc_qis!=NULL){ + _ogg_free(_dec->dc_qis); + _dec->dc_qis=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->dc_qis==NULL){ + /*If we haven't been tracking DC quantization indices, there's no point in + starting now.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; + _dec->dc_qis=(unsigned char *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->dc_qis[0])); + if(_dec->dc_qis==NULL)return 1; + memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); + } + else{ + unsigned char *dc_qis; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + unsigned char qi0; + /*Update the DC quantization index of each coded block.*/ + dc_qis=_dec->dc_qis; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[0]+ + _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; + qi0=(unsigned char)_dec->state.qis[0]; + for(fragii=0;fragiipp_level<=OC_PP_LEVEL_TRACKDCQI){ + if(_dec->variances!=NULL){ + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->variances==NULL){ + size_t frame_sz; + size_t c_sz; + int c_w; + int c_h; + frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + /*Allocate space for the chroma planes, even if we're not going to use + them; this simplifies allocation state management, though it may waste + memory on the few systems that don't overcommit pages.*/ + frame_sz+=c_sz<<1; + _dec->pp_frame_data=(unsigned char *)_ogg_malloc( + frame_sz*sizeof(_dec->pp_frame_data[0])); + _dec->variances=(int *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->variances[0])); + if(_dec->variances==NULL||_dec->pp_frame_data==NULL){ + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + return 1; + } + /*Force an update of the PP buffer pointers.*/ + _dec->pp_frame_state=0; + } + /*Update the PP buffer pointers if necessary.*/ + if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ + if(_dec->pp_levelpp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ + (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride; + } + else{ + size_t y_sz; + size_t c_sz; + int c_w; + int c_h; + /*Otherwise, set up pointers to all three PP planes.*/ + y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data; + _dec->pp_frame_buf[1].width=c_w; + _dec->pp_frame_buf[1].height=c_h; + _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; + _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; + _dec->pp_frame_buf[2].width=c_w; + _dec->pp_frame_buf[2].height=c_h; + _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; + _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; + oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); + } + _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); + } + /*If we're not processing chroma, copy the reference frame's chroma planes.*/ + if(_dec->pp_levelpp_frame_buf+1, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, + sizeof(_dec->pp_frame_buf[1])*2); + } + return 0; +} + + + +typedef struct{ + int bounding_values[256]; + ptrdiff_t ti[3][64]; + ptrdiff_t eob_runs[3][64]; + const ptrdiff_t *coded_fragis[3]; + const ptrdiff_t *uncoded_fragis[3]; + ptrdiff_t ncoded_fragis[3]; + ptrdiff_t nuncoded_fragis[3]; + const ogg_uint16_t *dequant[3][3][2]; + int fragy0[3]; + int fragy_end[3]; + int pred_last[3][3]; + int mcu_nvfrags; + int loop_filter; + int pp_level; +}oc_dec_pipeline_state; + + + +/*Initialize the main decoding pipeline.*/ +static void oc_dec_pipeline_init(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe){ + const ptrdiff_t *coded_fragis; + const ptrdiff_t *uncoded_fragis; + int pli; + int qii; + int qti; + /*If chroma is sub-sampled in the vertical direction, we have to decode two + super block rows of Y' for each super block row of Cb and Cr.*/ + _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2); + /*Initialize the token and extra bits indices for each plane and + coefficient.*/ + memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti)); + /*Also copy over the initial the EOB run counts.*/ + memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); + /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + for(pli=0;pli<3;pli++){ + ptrdiff_t ncoded_fragis; + _pipe->coded_fragis[pli]=coded_fragis; + _pipe->uncoded_fragis[pli]=uncoded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[pli]; + coded_fragis+=ncoded_fragis; + uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags; + } + /*Set up condensed quantizer tables.*/ + for(pli=0;pli<3;pli++){ + for(qii=0;qii<_dec->state.nqis;qii++){ + for(qti=0;qti<2;qti++){ + _pipe->dequant[pli][qii][qti]= + _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti]; + } + } + } + /*Set the previous DC predictor to 0 for all color planes and frame types.*/ + memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); + /*Initialize the bounding value array for the loop filter.*/ + _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state, + _pipe->bounding_values); + /*Initialize any buffers needed for post-processing. + We also save the current post-processing level, to guard against the user + changing it from a callback.*/ + if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; + /*If we don't have enough information to post-process, disable it, regardless + of the user-requested level.*/ + else{ + _pipe->pp_level=OC_PP_LEVEL_DISABLED; + memcpy(_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], + sizeof(_dec->pp_frame_buf[0])*3); + } +} + +/*Undo the DC prediction in a single plane of an MCU (one or two super block + rows). + As a side effect, the number of coded and uncoded fragments in this plane of + the MCU is also computed.*/ +static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + const oc_fragment_plane *fplane; + oc_fragment *frags; + int *pred_last; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragi; + int fragx; + int fragy; + int fragy0; + int fragy_end; + int nhfrags; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + fplane=_dec->state.fplanes+_pli; + fragy0=_pipe->fragy0[_pli]; + fragy_end=_pipe->fragy_end[_pli]; + nhfrags=fplane->nhfrags; + pred_last=_pipe->pred_last[_pli]; + frags=_dec->state.frags; + ncoded_fragis=0; + fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags; + for(fragy=fragy0;fragy=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + pred_last[ref]=frags[fragi].dc+=pred; + ncoded_fragis++; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; + /*Also save the number of uncoded fragments so we know how many to copy.*/ + _pipe->nuncoded_fragis[_pli]= + (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis; +} + +/*Reconstructs all coded fragments in a single MCU (one or two super block + rows). + This requires that each coded fragment have a proper macro block mode and + motion vector (if not in INTRA mode), and have it's DC value decoded, with + the DC prediction process reversed, and the number of coded and uncoded + fragments in this plane of the MCU be counted. + The token lists for each color plane and coefficient should also be filled + in, along with initial token offsets, extra bits offsets, and EOB run + counts.*/ +static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + unsigned char *dct_tokens; + const unsigned char *dct_fzig_zag; + ogg_uint16_t dc_quant[2]; + const oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t *ti; + ptrdiff_t *eob_runs; + int qti; + dct_tokens=_dec->dct_tokens; + dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag; + frags=_dec->state.frags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + ti=_pipe->ti[_pli]; + eob_runs=_pipe->eob_runs[_pli]; + for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0]; + for(fragii=0;fragiidequant[_pli][frags[fragi].qii][qti]; + /*Decode the AC coefficients.*/ + for(zzi=0;zzi<64;){ + int token; + last_zzi=zzi; + if(eob_runs[zzi]){ + eob_runs[zzi]--; + break; + } + else{ + ptrdiff_t eob; + int cw; + int rlen; + int coeff; + int lti; + lti=ti[zzi]; + token=dct_tokens[lti++]; + cw=OC_DCT_CODE_WORD[token]; + /*These parts could be done branchless, but the branches are fairly + predictable and the C code translates into more than a few + instructions, so it's worth it to avoid them.*/ + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + cw+=dct_tokens[lti++]<>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(token==OC_DCT_TOKEN_FAT_EOB){ + eob+=dct_tokens[lti++]<<8; + if(eob==0)eob=OC_DCT_EOB_FINISH; + } + rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + eob_runs[zzi]=eob; + ti[zzi]=lti; + zzi+=rlen; + dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]); + zzi+=!eob; + } + } + /*TODO: zzi should be exactly 64 here. + If it's not, we should report some kind of warning.*/ + zzi=OC_MINI(zzi,64); + dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc; + /*last_zzi is always initialized. + If your compiler thinks otherwise, it is dumb.*/ + oc_state_frag_recon(&_dec->state,fragi,_pli, + dct_coeffs,last_zzi,dc_quant[qti]); + } + _pipe->coded_fragis[_pli]+=ncoded_fragis; + /*Right now the reconstructed MCU has only the coded blocks in it.*/ + /*TODO: We make the decision here to always copy the uncoded blocks into it + from the reference frame. + We could also copy the coded blocks back over the reference frame, if we + wait for an additional MCU to be decoded, which might be faster if only a + small number of blocks are coded. + However, this introduces more latency, creating a larger cache footprint. + It's unknown which decision is better, but this one results in simpler + code, and the hard case (high bitrate, high resolution) is handled + correctly.*/ + /*Copy the uncoded blocks from the previous reference frame.*/ + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; + oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli], + _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); +} + +/*Filter a horizontal block edge.*/ +static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, + int *_variance0,int *_variance1){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + const unsigned char *csrc; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + rdst=_dst; + rsrc=_src; + for(bx=0;bx<8;bx++){ + cdst=rdst; + csrc=rsrc; + for(by=0;by<10;by++){ + r[by]=*csrc; + csrc+=_src_ystride; + } + sum0=sum1=0; + for(by=0;by<4;by++){ + sum0+=abs(r[by+1]-r[by]); + sum1+=abs(r[by+5]-r[by+6]); + } + *_variance0+=OC_MINI(255,sum0); + *_variance1+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + cdst+=_dst_ystride; + for(by=0;by<4;by++){ + *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ + r[by+4]+r[by+5]+r[by+6]+4>>3); + cdst+=_dst_ystride; + } + *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + else{ + for(by=1;by<=8;by++){ + *cdst=(unsigned char)r[by]; + cdst+=_dst_ystride; + } + } + rdst++; + rsrc++; + } +} + +/*Filter a vertical block edge.*/ +static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, + int _qstep,int _flimit,int *_variances){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + cdst=_dst; + for(by=0;by<8;by++){ + rsrc=cdst-1; + rdst=cdst; + for(bx=0;bx<10;bx++)r[bx]=*rsrc++; + sum0=sum1=0; + for(bx=0;bx<4;bx++){ + sum0+=abs(r[bx+1]-r[bx]); + sum1+=abs(r[bx+5]-r[bx+6]); + } + _variances[0]+=OC_MINI(255,sum0); + _variances[1]+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + for(bx=0;bx<4;bx++){ + *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ + r[bx+4]+r[bx+5]+r[bx+6]+4>>3); + } + *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + cdst+=_dst_ystride; + } +} + +static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, + th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, + int _fragy_end){ + oc_fragment_plane *fplane; + int *variance; + unsigned char *dc_qi; + unsigned char *dst; + const unsigned char *src; + ptrdiff_t froffset; + int dst_ystride; + int src_ystride; + int nhfrags; + int width; + int notstart; + int notdone; + int flimit; + int qstep; + int y_end; + int y; + int x; + _dst+=_pli; + _src+=_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + dc_qi=_dec->dc_qis+froffset; + notstart=_fragy0>0; + notdone=_fragy_endnvfrags; + /*We want to clear an extra row of variances, except at the end.*/ + memset(variance+(nhfrags&-notstart),0, + (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0]))); + /*Except for the first time, we want to point to the middle of the row.*/ + y=(_fragy0<<3)+(notstart<<2); + dst_ystride=_dst->stride; + src_ystride=_src->stride; + dst=_dst->data+y*(ptrdiff_t)dst_ystride; + src=_src->data+y*(ptrdiff_t)src_ystride; + width=_dst->width; + for(;y<4;y++){ + memcpy(dst,src,width*sizeof(dst[0])); + dst+=dst_ystride; + src+=src_ystride; + } + /*We also want to skip the last row in the frame for this loop.*/ + y_end=_fragy_end-!notdone<<3; + for(;ypp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + variance++; + dc_qi++; + for(x=8;xpp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride, + qstep,flimit,variance-1); + variance++; + dc_qi++; + } + dst+=dst_ystride<<3; + src+=src_ystride<<3; + } + /*And finally, handle the last row in the frame, if it's in the range.*/ + if(!notdone){ + int height; + height=_dst->height; + for(;ypp_dc_scale[*dc_qi++]; + flimit=(qstep*3)>>2; + oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride, + qstep,flimit,variance++); + } + } +} + +static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, + int _dc_scale,int _sharp_mod,int _strong){ + static const unsigned char OC_MOD_MAX[2]={24,32}; + static const unsigned char OC_MOD_SHIFT[2]={1,0}; + const unsigned char *psrc; + const unsigned char *src; + const unsigned char *nsrc; + unsigned char *dst; + int vmod[72]; + int hmod[72]; + int mod_hi; + int by; + int bx; + mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]); + dst=_idata; + src=dst; + psrc=src-(_ystride&-!(_b&4)); + for(by=0;by<9;by++){ + for(bx=0;bx<8;bx++){ + int mod; + mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7); + for(bx=1;bx<7;bx++){ + a=128; + b=64; + w=hmod[(bx<<3)+by]; + a-=w; + b+=w*src[bx-1]; + w=vmod[(by<<3)+bx]; + a-=w; + b+=w*psrc[bx]; + w=vmod[(by+1<<3)+bx]; + a-=w; + b+=w*nsrc[bx]; + w=hmod[(bx+1<<3)+by]; + a-=w; + b+=w*src[bx+1]; + dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); + } + a=128; + b=64; + w=hmod[(7<<3)+by]; + a-=w; + b+=w*src[6]; + w=vmod[(by<<3)+7]; + a-=w; + b+=w*psrc[7]; + w=vmod[(by+1<<3)+7]; + a-=w; + b+=w*nsrc[7]; + w=hmod[(8<<3)+by]; + a-=w; + b+=w*src[7+!(_b&2)]; + dst[7]=OC_CLAMP255(a*src[7]+b>>7); + dst+=_ystride; + psrc=src; + src=nsrc; + nsrc+=_ystride&-(!(_b&8)|by<6); + } +} + +#define OC_DERING_THRESH1 (384) +#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) +#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) +#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) + +static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, + int _pli,int _fragy0,int _fragy_end){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + oc_fragment *frag; + int *variance; + unsigned char *idata; + ptrdiff_t froffset; + int ystride; + int nhfrags; + int sthresh; + int strong; + int y_end; + int width; + int height; + int y; + int x; + iplane=_img+_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + frag=_dec->state.frags+froffset; + strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); + sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; + y=_fragy0<<3; + ystride=iplane->stride; + idata=iplane->data+y*(ptrdiff_t)ystride; + y_end=_fragy_end<<3; + width=iplane->width; + height=iplane->height; + for(;ystate.qis[frag->qii]; + var=*variance; + b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3; + if(strong&&var>sthresh){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| + !(b&2)&&variance[1]>OC_DERING_THRESH4|| + !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4|| + !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + } + else if(var>OC_DERING_THRESH2){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + else if(var>OC_DERING_THRESH1){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); + } + frag++; + variance++; + } + idata+=ystride<<3; + } +} + + + +th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){ + oc_dec_ctx *dec; + if(_info==NULL||_setup==NULL)return NULL; + dec=_ogg_malloc(sizeof(*dec)); + if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){ + _ogg_free(dec); + return NULL; + } + dec->state.curframe_num=0; + return dec; +} + +void th_decode_free(th_dec_ctx *_dec){ + if(_dec!=NULL){ + oc_dec_clear(_dec); + _ogg_free(_dec); + } +} + +int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, + size_t _buf_sz){ + switch(_req){ + case TH_DECCTL_GET_PPLEVEL_MAX:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + (*(int *)_buf)=OC_PP_LEVEL_MAX; + return 0; + }break; + case TH_DECCTL_SET_PPLEVEL:{ + int pp_level; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + pp_level=*(int *)_buf; + if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; + _dec->pp_level=pp_level; + return 0; + }break; + case TH_DECCTL_SET_GRANPOS:{ + ogg_int64_t granpos; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; + granpos=*(ogg_int64_t *)_buf; + if(granpos<0)return TH_EINVAL; + _dec->state.granpos=granpos; + _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift) + -_dec->state.granpos_bias; + _dec->state.curframe_num=_dec->state.keyframe_num + +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); + return 0; + }break; + case TH_DECCTL_SET_STRIPE_CB:{ + th_stripe_callback *cb; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; + cb=(th_stripe_callback *)_buf; + _dec->stripe_cb.ctx=cb->ctx; + _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; + return 0; + }break; +#ifdef HAVE_CAIRO + case TH_DECCTL_SET_TELEMETRY_MBMODE:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mbmode=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_MV:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mv=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_QI:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_qi=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_BITS:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_bits=*(int *)_buf; + return 0; + }break; +#endif + default:return TH_EIMPL; + } +} + +/*We're decoding an INTER frame, but have no initialized reference + buffers (i.e., decoding did not start on a key frame). + We initialize them to a solid gray here.*/ +static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){ + th_info *info; + size_t yplane_sz; + size_t cplane_sz; + int yhstride; + int yheight; + int chstride; + int cheight; + _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; + _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=1; + info=&_dec->state.info; + yhstride=info->frame_width+2*OC_UMV_PADDING; + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>!(info->pixel_fmt&1); + cheight=yheight>>!(info->pixel_fmt&2); + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz); +} + +int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, + ogg_int64_t *_granpos){ + int ret; + if(_dec==NULL||_op==NULL)return TH_EFAULT; + /*A completely empty packet indicates a dropped frame and is treated exactly + like an inter frame with no coded blocks. + Only proceed if we have a non-empty packet.*/ + if(_op->bytes!=0){ + oc_dec_pipeline_state pipe; + th_ycbcr_buffer stripe_buf; + int stripe_fragy; + int refi; + int pli; + int notstart; + int notdone; + oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes); +#if defined(HAVE_CAIRO) + _dec->telemetry_frame_bytes=_op->bytes; +#endif + ret=oc_dec_frame_header_unpack(_dec); + if(ret<0)return ret; + /*Select a free buffer to use for the reconstructed version of this + frame.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME&& + (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| + refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + oc_dec_mark_all_intra(_dec); + _dec->state.keyframe_num=_dec->state.curframe_num; +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes= + _dec->telemetry_mode_bytes= + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + else{ + oc_dec_coded_flags_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mb_modes_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mv_unpack_and_frag_modes_fill(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + oc_dec_block_qis_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_residual_tokens_unpack(_dec); + /*Update granule position. + This must be done before the striped decode callbacks so that the + application knows what to do with the frame data.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + /*All of the rest of the operations -- DC prediction reversal, + reconstructing coded fragments, copying uncoded fragments, loop + filtering, extending borders, and out-of-loop post-processing -- should + be pipelined. + I.e., DC prediction reversal, reconstruction, and uncoded fragment + copying are done for one or two super block rows, then loop filtering is + run as far as it can, then bordering copying, then post-processing. + For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super + block rows, and one chroma. + Otherwise, an MCU consists of one super block row from each plane. + Inside each MCU, we perform all of the steps on one color plane before + moving on to the next. + After reconstruction, the additional filtering stages introduce a delay + since they need some pixels from the next fragment row. + Thus the actual number of decoded rows available is slightly smaller for + the first MCU, and slightly larger for the last. + + This entire process allows us to operate on the data while it is still in + cache, resulting in big performance improvements. + An application callback allows further application processing (blitting + to video memory, color conversion, etc.) to also use the data while it's + in cache.*/ + oc_dec_pipeline_init(_dec,&pipe); + oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); + notstart=0; + notdone=1; + for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){ + int avail_fragy0; + int avail_fragy_end; + avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; + notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); + pipe.fragy0[pli]=stripe_fragy>>frag_shift; + pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, + pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift)); + oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli); + oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli); + sdelay=edelay=0; + if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values, + refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + /*To fill the borders, we have an additional two pixel delay, since a + fragment in the next row could filter its top edge, using two pixels + from a fragment in this row. + But there's no reason to delay a full fragment between the two.*/ + oc_state_borders_fill_rows(&_dec->state,refi,pli, + (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), + (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); + /*Out-of-loop post-processing.*/ + pp_offset=3*(pli!=0); + if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ + /*Perform de-blocking in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[refi],pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ + /*Perform de-ringing in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + } + /*If no post-processing is done, we still need to delay a row for the + loop filter, thanks to the strange filtering order VP3 chose.*/ + else if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + } + /*Compute the intersection of the available rows in all planes. + If chroma is sub-sampled, the effect of each of its delays is + doubled, but luma might have more post-processing filters enabled + than chroma, so we don't know up front which one is the limiting + factor.*/ + avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){ + /*The callback might want to use the FPU, so let's make sure they can. + We violate all kinds of ABI restrictions by not doing this until + now, but none of them actually matter since we don't use floating + point ourselves.*/ + oc_restore_fpu(&_dec->state); + /*Make the callback, ensuring we flip the sense of the "start" and + "end" of the available region upside down.*/ + (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, + _dec->state.fplanes[0].nvfrags-avail_fragy_end, + _dec->state.fplanes[0].nvfrags-avail_fragy0); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); + /*Update the reference frame indices.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*The new frame becomes both the previous and gold reference frames.*/ + _dec->state.ref_frame_idx[OC_FRAME_GOLD]= + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + /*Otherwise, just replace the previous reference frame.*/ + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG + gamma values, if nothing else).*/ + oc_restore_fpu(&_dec->state); +#if defined(OC_DUMP_IMAGES) + /*Don't dump images for dropped frames.*/ + oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); +#endif + return 0; + } + else{ + if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){ + int refi; + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_PREV]; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi], + sizeof(_dec->pp_frame_buf[0])*3); + } + /*Just update the granule position and return.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + return TH_DUPFRAME; + } +} + +int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ + if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT; + oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); +#if defined(HAVE_CAIRO) + /*If telemetry ioctls are active, we need to draw to the output buffer. + Stuff the plane into cairo.*/ + if(_dec->telemetry){ + cairo_surface_t *cs; + unsigned char *data; + unsigned char *y_row; + unsigned char *u_row; + unsigned char *v_row; + unsigned char *rgb_row; + int cstride; + int w; + int h; + int x; + int y; + int hdec; + int vdec; + w=_ycbcr[0].width; + h=_ycbcr[0].height; + hdec=!(_dec->state.info.pixel_fmt&1); + vdec=!(_dec->state.info.pixel_fmt&2); + /*Lazy data buffer init. + We could try to re-use the post-processing buffer, which would save + memory, but complicate the allocation logic there. + I don't think anyone cares about memory usage when using telemetry; it is + not meant for embedded devices.*/ + if(_dec->telemetry_frame_data==NULL){ + _dec->telemetry_frame_data=_ogg_malloc( + (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data)); + if(_dec->telemetry_frame_data==NULL)return 0; + } + cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h); + /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/ + data=cairo_image_surface_get_data(cs); + if(data==NULL){ + cairo_surface_destroy(cs); + return 0; + } + cstride=cairo_image_surface_get_stride(cs); + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + for(y=0;y>hdec]-363703744)/1635200; + g=(3827562*y_row[x]-1287801*u_row[x>>hdec] + -2672387*v_row[x>>hdec]+447306710)/3287200; + b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600; + rgb_row[4*x+0]=OC_CLAMP255(b); + rgb_row[4*x+1]=OC_CLAMP255(g); + rgb_row[4*x+2]=OC_CLAMP255(r); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride&-((y&1)|!vdec); + v_row+=_ycbcr[2].stride&-((y&1)|!vdec); + rgb_row+=cstride; + } + /*Draw coded identifier for each macroblock (stored in Hilbert order).*/ + { + cairo_t *c; + const oc_fragment *frags; + oc_mv *frag_mvs; + const signed char *mb_modes; + oc_mb_map *mb_maps; + size_t nmbs; + size_t mbi; + int row2; + int col2; + int qim[3]={0,0,0}; + if(_dec->state.nqis==2){ + int bqi; + bqi=_dec->state.qis[0]; + if(_dec->state.qis[1]>bqi)qim[1]=1; + if(_dec->state.qis[1]state.nqis==3){ + int bqi; + int cqi; + int dqi; + bqi=_dec->state.qis[0]; + cqi=_dec->state.qis[1]; + dqi=_dec->state.qis[2]; + if(cqi>bqi&&dqi>bqi){ + if(dqi>cqi){ + qim[1]=1; + qim[2]=2; + } + else{ + qim[1]=2; + qim[2]=1; + } + } + else if(cqistate.frags; + frag_mvs=_dec->state.frag_mvs; + mb_modes=_dec->state.mb_modes; + mb_maps=_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + row2=0; + col2=0; + for(mbi=0;mbi>1)&1))*16-16; + x=(col2>>1)*16; + cairo_set_line_width(c,1.); + /*Keyframe (all intra) red box.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + } + else{ + const signed char *frag_mv; + ptrdiff_t fragi; + for(bi=0;bi<4;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + frag_mv=frag_mvs[fragi]; + break; + } + } + if(bi<4){ + switch(mb_modes[mbi]){ + case OC_MODE_INTRA:{ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_NOMV:{ + if(_dec->telemetry_mbmode&0x01){ + cairo_set_source_rgba(c,0,0,1.,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0,0,1.,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_MV:{ + if(_dec->telemetry_mbmode&0x04){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x04){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST:{ + if(_dec->telemetry_mbmode&0x08){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x08){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST2:{ + if(_dec->telemetry_mbmode&0x10){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+8,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+8,y+13.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+8,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x10){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_GOLDEN_NOMV:{ + if(_dec->telemetry_mbmode&0x20){ + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_GOLDEN_MV:{ + if(_dec->telemetry_mbmode&0x40){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x40){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_FOUR:{ + if(_dec->telemetry_mbmode&0x80){ + cairo_rectangle(c,x+2.5,y+2.5,4,4); + cairo_rectangle(c,x+9.5,y+2.5,4,4); + cairo_rectangle(c,x+2.5,y+9.5,4,4); + cairo_rectangle(c,x+9.5,y+9.5,4,4); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + /*4mv is odd, coded in raster order.*/ + fragi=mb_maps[mbi][0][0]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][1]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][2]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+4); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][3]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+4); + cairo_stroke(c); + } + }break; + } + } + } + /*qii illustration.*/ + if(_dec->telemetry_qi&0x2){ + cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE); + for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + int qiv; + int xp; + int yp; + xp=x+(bi&1)*8; + yp=y+8-(bi&2)*4; + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + qiv=qim[frags[fragi].qii]; + cairo_set_line_width(c,3.); + cairo_set_source_rgba(c,0.,0.,0.,.5); + switch(qiv){ + /*Double plus:*/ + case 2:{ + if((bi&1)^((bi&2)>>1)){ + cairo_move_to(c,xp+2.5,yp+1.5); + cairo_line_to(c,xp+2.5,yp+3.5); + cairo_move_to(c,xp+1.5,yp+2.5); + cairo_line_to(c,xp+3.5,yp+2.5); + cairo_move_to(c,xp+5.5,yp+4.5); + cairo_line_to(c,xp+5.5,yp+6.5); + cairo_move_to(c,xp+4.5,yp+5.5); + cairo_line_to(c,xp+6.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + else{ + cairo_move_to(c,xp+5.5,yp+1.5); + cairo_line_to(c,xp+5.5,yp+3.5); + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+6.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+2.5,yp+6.5); + cairo_move_to(c,xp+1.5,yp+5.5); + cairo_line_to(c,xp+3.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + }break; + /*Double minus:*/ + case -2:{ + cairo_move_to(c,xp+2.5,yp+2.5); + cairo_line_to(c,xp+5.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+5.5); + cairo_line_to(c,xp+5.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,1.,1.); + }break; + /*Plus:*/ + case 1:{ + if(bi&2==0)yp-=2; + if(bi&1==0)xp-=2; + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+4.5,yp+6.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,.1,1.,.3,1.); + break; + } + /*Fall through.*/ + /*Minus:*/ + case -1:{ + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,.3,.1,1.); + }break; + default:continue; + } + cairo_set_line_width(c,1.); + cairo_stroke(c); + } + } + } + col2++; + if((col2>>1)>=_dec->state.nhmbs){ + col2=0; + row2+=2; + } + } + /*Bit usage indicator[s]:*/ + if(_dec->telemetry_bits){ + int widths[6]; + int fpsn; + int fpsd; + int mult; + int fullw; + int padw; + int i; + fpsn=_dec->state.info.fps_numerator; + fpsd=_dec->state.info.fps_denominator; + mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits); + fullw=250.f*h*fpsd*mult/fpsn; + padw=w-24; + /*Header and coded block bits.*/ + if(_dec->telemetry_frame_bytes<0|| + _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){ + _dec->telemetry_frame_bytes=0; + } + if(_dec->telemetry_coding_bytes<0|| + _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_coding_bytes=0; + } + if(_dec->telemetry_mode_bytes<0|| + _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mode_bytes=0; + } + if(_dec->telemetry_mv_bytes<0|| + _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mv_bytes=0; + } + if(_dec->telemetry_qi_bytes<0|| + _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_qi_bytes=0; + } + if(_dec->telemetry_dc_bytes<0|| + _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_dc_bytes=0; + } + widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw; + widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw; + widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw; + widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw; + widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw; + widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw; + for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w; + cairo_set_source_rgba(c,.0,.0,.0,.6); + cairo_rectangle(c,10,h-33,widths[0]+1,5); + cairo_rectangle(c,10,h-29,widths[1]+1,5); + cairo_rectangle(c,10,h-25,widths[2]+1,5); + cairo_rectangle(c,10,h-21,widths[3]+1,5); + cairo_rectangle(c,10,h-17,widths[4]+1,5); + cairo_rectangle(c,10,h-13,widths[5]+1,5); + cairo_fill(c); + cairo_set_source_rgb(c,1,0,0); + cairo_rectangle(c,10.5,h-32.5,widths[0],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,1,0); + cairo_rectangle(c,10.5,h-28.5,widths[1],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,0,1); + cairo_rectangle(c,10.5,h-24.5,widths[2],4); + cairo_fill(c); + cairo_set_source_rgb(c,.6,.4,.0); + cairo_rectangle(c,10.5,h-20.5,widths[3],4); + cairo_fill(c); + cairo_set_source_rgb(c,.3,.3,.3); + cairo_rectangle(c,10.5,h-16.5,widths[4],4); + cairo_fill(c); + cairo_set_source_rgb(c,.5,.5,.8); + cairo_rectangle(c,10.5,h-12.5,widths[5],4); + cairo_fill(c); + } + /*Master qi indicator[s]:*/ + if(_dec->telemetry_qi&0x1){ + cairo_text_extents_t extents; + char buffer[10]; + int p; + int y; + p=0; + y=h-7.5; + if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10; + buffer[p++]=48+_dec->state.qis[0]%10; + if(_dec->state.nqis>=2){ + buffer[p++]=' '; + if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10; + buffer[p++]=48+_dec->state.qis[1]%10; + } + if(_dec->state.nqis==3){ + buffer[p++]=' '; + if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10; + buffer[p++]=48+_dec->state.qis[2]%10; + } + buffer[p++]='\0'; + cairo_select_font_face(c,"sans", + CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD); + cairo_set_font_size(c,18); + cairo_text_extents(c,buffer,&extents); + cairo_set_source_rgb(c,1,1,1); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_show_text(c,buffer); + cairo_set_source_rgb(c,0,0,0); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_text_path(c,buffer); + cairo_set_line_width(c,.8); + cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND); + cairo_stroke(c); + } + cairo_destroy(c); + } + /*Out of the Cairo plane into the telemetry YUV buffer.*/ + _ycbcr[0].data=_dec->telemetry_frame_data; + _ycbcr[0].stride=_ycbcr[0].width; + _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride; + _ycbcr[1].stride=_ycbcr[1].width; + _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride; + _ycbcr[2].stride=_ycbcr[2].width; + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + /*This is one of the few places it's worth handling chroma on a + case-by-case basis.*/ + switch(_dec->state.info.pixel_fmt){ + case TH_PF_420:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride<<1; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride<<1; + } + }break; + case TH_PF_422:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride; + } + }break; + /*case TH_PF_444:*/ + default:{ + for(y=0;yloop_filter_limits[qi]=(unsigned char)val; } - theorapackB_read(_opb,4,&val); + val=oc_pack_read(_opb,4); nbits=(int)val+1; for(qi=0;qi<64;qi++){ - theorapackB_read(_opb,nbits,&val); + val=oc_pack_read(_opb,nbits); _qinfo->ac_scale[qi]=(ogg_uint16_t)val; } - theorapackB_read(_opb,4,&val); + val=oc_pack_read(_opb,4); nbits=(int)val+1; for(qi=0;qi<64;qi++){ - theorapackB_read(_opb,nbits,&val); + val=oc_pack_read(_opb,nbits); _qinfo->dc_scale[qi]=(ogg_uint16_t)val; } - theorapackB_read(_opb,9,&val); + val=oc_pack_read(_opb,9); nbase_mats=(int)val+1; base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0])); + if(base_mats==NULL)return TH_EFAULT; for(bmi=0;bmiqi_ranges[qti]+pli; if(i>0){ - theorapackB_read1(_opb,&val); + val=oc_pack_read1(_opb); if(!val){ int qtj; int plj; if(qti>0){ - theorapackB_read1(_opb,&val); + val=oc_pack_read1(_opb); if(val){ qtj=qti-1; plj=pli; @@ -95,13 +95,13 @@ int oc_quant_params_unpack(oggpack_buffer *_opb, continue; } } - theorapackB_read(_opb,nbits,&val); + val=oc_pack_read(_opb,nbits); indices[0]=(int)val; for(qi=qri=0;qi<63;){ - theorapackB_read(_opb,oc_ilog(62-qi),&val); + val=oc_pack_read(_opb,oc_ilog(62-qi)); sizes[qri]=(int)val+1; qi+=(int)val+1; - theorapackB_read(_opb,nbits,&val); + val=oc_pack_read(_opb,nbits); indices[++qri]=(int)val; } /*Note: The caller is responsible for cleaning up any partially @@ -112,8 +112,20 @@ int oc_quant_params_unpack(oggpack_buffer *_opb, } qranges->nranges=qri; qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0])); + if(qranges->sizes==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0])); qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0])); + if(qrbms==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } qranges->base_matrices=(const th_quant_base *)qrbms; do{ bmi=indices[qri]; diff --git a/Engine/lib/libtheora/lib/dec/dequant.h b/Engine/lib/libtheora/lib/dequant.h similarity index 82% rename from Engine/lib/libtheora/lib/dec/dequant.h rename to Engine/lib/libtheora/lib/dequant.h index 928b509e5..ef25838e3 100644 --- a/Engine/lib/libtheora/lib/dec/dequant.h +++ b/Engine/lib/libtheora/lib/dequant.h @@ -5,21 +5,22 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #if !defined(_dequant_H) # define _dequant_H (1) # include "quant.h" +# include "bitpack.h" -int oc_quant_params_unpack(oggpack_buffer *_opb, +int oc_quant_params_unpack(oc_pack_buf *_opb, th_quant_info *_qinfo); void oc_quant_params_clear(th_quant_info *_qinfo); diff --git a/Engine/lib/libtheora/lib/enc/block_inline.h b/Engine/lib/libtheora/lib/enc/block_inline.h deleted file mode 100644 index 008977095..000000000 --- a/Engine/lib/libtheora/lib/enc/block_inline.h +++ /dev/null @@ -1,37 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: block_inline.h 14059 2007-10-28 23:43:27Z xiphmont $ - - ********************************************************************/ - -#include "codec_internal.h" - -static const ogg_int32_t MBOrderMap[4] = { 0, 2, 3, 1 }; -static const ogg_int32_t BlockOrderMap1[4][4] = { - { 0, 1, 3, 2 }, - { 0, 2, 3, 1 }, - { 0, 2, 3, 1 }, - { 3, 2, 0, 1 } -}; - -static ogg_int32_t QuadMapToIndex1( ogg_int32_t (*BlockMap)[4][4], - ogg_uint32_t SB, ogg_uint32_t MB, - ogg_uint32_t B ){ - return BlockMap[SB][MBOrderMap[MB]][BlockOrderMap1[MB][B]]; -} - -static ogg_int32_t QuadMapToMBTopLeft( ogg_int32_t (*BlockMap)[4][4], - ogg_uint32_t SB, ogg_uint32_t MB ){ - return BlockMap[SB][MBOrderMap[MB]][0]; -} diff --git a/Engine/lib/libtheora/lib/enc/blockmap.c b/Engine/lib/libtheora/lib/enc/blockmap.c deleted file mode 100644 index 5f3478fc2..000000000 --- a/Engine/lib/libtheora/lib/enc/blockmap.c +++ /dev/null @@ -1,99 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: blockmap.c 14059 2007-10-28 23:43:27Z xiphmont $ - - ********************************************************************/ - -#include "codec_internal.h" - -static void CreateMapping ( ogg_int32_t (*BlockMap)[4][4], - ogg_uint32_t FirstSB, - ogg_uint32_t FirstFrag, ogg_uint32_t HFrags, - ogg_uint32_t VFrags ){ - ogg_uint32_t i, j = 0; - ogg_uint32_t xpos; - ogg_uint32_t ypos; - ogg_uint32_t SBrow, SBcol; - ogg_uint32_t SBRows, SBCols; - ogg_uint32_t MB, B; - - ogg_uint32_t SB=FirstSB; - ogg_uint32_t FragIndex=FirstFrag; - - /* Set Super-Block dimensions */ - SBRows = VFrags/4 + ( VFrags%4 ? 1 : 0 ); - SBCols = HFrags/4 + ( HFrags%4 ? 1 : 0 ); - - /* Map each Super-Block */ - for ( SBrow=0; SBrow - FragIndex */ - - /* Coded flag arrays and counters for them */ - unsigned char *SBCodedFlags; - unsigned char *SBFullyFlags; - unsigned char *MBCodedFlags; - unsigned char *MBFullyFlags; - - /**********************************************************************/ - ogg_uint32_t EOB_Run; - - COORDINATE *FragCoordinates; - MOTION_VECTOR MVector; - ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction - in half pixel MC */ - Q_LIST_ENTRY *quantized_list; - ogg_int16_t *ReconDataBuffer; - Q_LIST_ENTRY InvLastIntraDC; - Q_LIST_ENTRY InvLastInterDC; - Q_LIST_ENTRY LastIntraDC; - Q_LIST_ENTRY LastInterDC; - - ogg_uint32_t BlocksToDecode; /* Blocks to be decoded this frame */ - ogg_uint32_t DcHuffChoice; /* Huffman table selection variables */ - unsigned char ACHuffChoice; - ogg_uint32_t QuadMBListIndex; - - ogg_int32_t ByteCount; - - ogg_uint32_t bit_pattern; - unsigned char bits_so_far; - unsigned char NextBit; - ogg_int32_t BitsLeft; - - ogg_int16_t *DequantBuffer; - - ogg_int32_t fp_quant_InterUV_coeffs[64]; - ogg_int32_t fp_quant_InterUV_round[64]; - ogg_int32_t fp_ZeroBinSize_InterUV[64]; - - ogg_int16_t *TmpReconBuffer; - ogg_int16_t *TmpDataBuffer; - - /* Loop filter bounding values */ - ogg_int16_t FiltBoundingValue[256]; - - /* Naming convention for all quant matrices and related data structures: - * Fields containing "Inter" in their name are for Inter frames, the - * rest is Intra. */ - - /* Dequantiser and rounding tables */ - ogg_uint16_t *QThreshTable; - Q_LIST_ENTRY dequant_Y_coeffs[64]; - Q_LIST_ENTRY dequant_U_coeffs[64]; - Q_LIST_ENTRY dequant_V_coeffs[64]; - Q_LIST_ENTRY dequant_InterY_coeffs[64]; - Q_LIST_ENTRY dequant_InterU_coeffs[64]; - Q_LIST_ENTRY dequant_InterV_coeffs[64]; - - Q_LIST_ENTRY *dequant_coeffs; /* currently active quantizer */ - unsigned int zigzag_index[64]; - - HUFF_ENTRY *HuffRoot_VP3x[NUM_HUFF_TABLES]; - ogg_uint32_t *HuffCodeArray_VP3x[NUM_HUFF_TABLES]; - unsigned char *HuffCodeLengthArray_VP3x[NUM_HUFF_TABLES]; - const unsigned char *ExtraBitLengths_VP3x; - - th_quant_info quant_info; - oc_quant_tables quant_tables[2][3]; - - /* Quantiser and rounding tables */ - /* this is scheduled to be replaced a new mechanism - that will simply reuse the dequantizer information. */ - ogg_int32_t fp_quant_Y_coeffs[64]; /* used in reiniting quantizers */ - ogg_int32_t fp_quant_U_coeffs[64]; - ogg_int32_t fp_quant_V_coeffs[64]; - ogg_int32_t fp_quant_Inter_Y_coeffs[64]; - ogg_int32_t fp_quant_Inter_U_coeffs[64]; - ogg_int32_t fp_quant_Inter_V_coeffs[64]; - - ogg_int32_t fp_quant_Y_round[64]; - ogg_int32_t fp_quant_U_round[64]; - ogg_int32_t fp_quant_V_round[64]; - ogg_int32_t fp_quant_Inter_Y_round[64]; - ogg_int32_t fp_quant_Inter_U_round[64]; - ogg_int32_t fp_quant_Inter_V_round[64]; - - ogg_int32_t fp_ZeroBinSize_Y[64]; - ogg_int32_t fp_ZeroBinSize_U[64]; - ogg_int32_t fp_ZeroBinSize_V[64]; - ogg_int32_t fp_ZeroBinSize_Inter_Y[64]; - ogg_int32_t fp_ZeroBinSize_Inter_U[64]; - ogg_int32_t fp_ZeroBinSize_Inter_V[64]; - - ogg_int32_t *fquant_coeffs; - ogg_int32_t *fquant_round; - ogg_int32_t *fquant_ZbSize; - - /* Predictor used in choosing entropy table for decoding block patterns. */ - unsigned char BlockPatternPredictor; - - short Modifier[4][512]; - short *ModifierPointer[4]; - - unsigned char *DataOutputInPtr; - - DspFunctions dsp; /* Selected functions for this platform */ - -}; - -/* Encoder (Compressor) instance -- installed in a theora_state */ -typedef struct CP_INSTANCE { - /*This structure must be first. - It contains entry points accessed by the decoder library's API wrapper, and - is the only assumption that library makes about our internal format.*/ - oc_state_dispatch_vtbl dispatch_vtbl; - - /* Compressor Configuration */ - SCAN_CONFIG_DATA ScanConfig; - CONFIG_TYPE2 Configuration; - int GoldenFrameEnabled; - int InterPrediction; - int MotionCompensation; - - ogg_uint32_t LastKeyFrame ; - ogg_int32_t DropCount ; - ogg_int32_t MaxConsDroppedFrames ; - ogg_int32_t DropFrameTriggerBytes; - int DropFrameCandidate; - - /* Compressor Statistics */ - double TotErrScore; - ogg_int64_t KeyFrameCount; /* Count of key frames. */ - ogg_int64_t TotKeyFrameBytes; - ogg_uint32_t LastKeyFrameSize; - ogg_uint32_t PriorKeyFrameSize[KEY_FRAME_CONTEXT]; - ogg_uint32_t PriorKeyFrameDistance[KEY_FRAME_CONTEXT]; - ogg_int32_t FrameQuality[6]; - int DecoderErrorCode; /* Decoder error flag. */ - ogg_int32_t ThreshMapThreshold; - ogg_int32_t TotalMotionScore; - ogg_int64_t TotalByteCount; - ogg_int32_t FixedQ; - - /* Frame Statistics */ - signed char InterCodeCount; - ogg_int64_t CurrentFrame; - ogg_int64_t CarryOver ; - ogg_uint32_t LastFrameSize; - ogg_uint32_t FrameBitCount; - int ThisIsFirstFrame; - int ThisIsKeyFrame; - - ogg_int32_t MotionScore; - ogg_uint32_t RegulationBlocks; - ogg_int32_t RecoveryMotionScore; - int RecoveryBlocksAdded ; - double ProportionRecBlocks; - double MaxRecFactor ; - - /* Rate Targeting variables. */ - ogg_uint32_t ThisFrameTargetBytes; - double BpbCorrectionFactor; - - /* Up regulation variables */ - ogg_uint32_t FinalPassLastPos; /* Used to regulate a final - unrestricted high quality - pass. */ - ogg_uint32_t LastEndSB; /* Where we were in the loop - last time. */ - ogg_uint32_t ResidueLastEndSB; /* Where we were in the residue - update loop last time. */ - - /* Controlling Block Selection */ - ogg_uint32_t MVChangeFactor; - ogg_uint32_t FourMvChangeFactor; - ogg_uint32_t MinImprovementForNewMV; - ogg_uint32_t ExhaustiveSearchThresh; - ogg_uint32_t MinImprovementForFourMV; - ogg_uint32_t FourMVThreshold; - - /* Module shared data structures. */ - ogg_int32_t frame_target_rate; - ogg_int32_t BaseLineFrameTargetRate; - ogg_int32_t min_blocks_per_frame; - ogg_uint32_t tot_bytes_old; - - /*********************************************************************/ - /* Frames Used in the selecetive convolution filtering of the Y plane. */ - unsigned char *ConvDestBuffer; - YUV_BUFFER_ENTRY *yuv0ptr; - YUV_BUFFER_ENTRY *yuv1ptr; - /*********************************************************************/ - - /*********************************************************************/ - /* Token Buffers */ - ogg_uint32_t *OptimisedTokenListEb; /* Optimised token list extra bits */ - unsigned char *OptimisedTokenList; /* Optimised token list. */ - unsigned char *OptimisedTokenListHi; /* Optimised token list huffman - table index */ - - unsigned char *OptimisedTokenListPl; /* Plane to which the token - belongs Y = 0 or UV = 1 */ - ogg_int32_t OptimisedTokenCount; /* Count of Optimized tokens */ - ogg_uint32_t RunHuffIndex; /* Huffman table in force at - the start of a run */ - ogg_uint32_t RunPlaneIndex; /* The plane (Y=0 UV=1) to - which the first token in - an EOB run belonged. */ - - - ogg_uint32_t TotTokenCount; - ogg_int32_t TokensToBeCoded; - ogg_int32_t TokensCoded; - /********************************************************************/ - - /* SuperBlock, MacroBLock and Fragment Information */ - /* Coded flag arrays and counters for them */ - unsigned char *PartiallyCodedFlags; - unsigned char *PartiallyCodedMbPatterns; - unsigned char *UncodedMbFlags; - - unsigned char *extra_fragments; /* extra updates not - recommended by pre-processor */ - ogg_int16_t *OriginalDC; - - ogg_uint32_t *FragmentLastQ; /* Array used to keep track of - quality at which each - fragment was last - updated. */ - unsigned char *FragTokens; - ogg_uint32_t *FragTokenCounts; /* Number of tokens per fragment */ - - ogg_uint32_t *RunHuffIndices; - ogg_uint32_t *LastCodedErrorScore; - ogg_uint32_t *ModeList; - MOTION_VECTOR *MVList; - - unsigned char *BlockCodedFlags; - - ogg_uint32_t MvListCount; - ogg_uint32_t ModeListCount; - - - unsigned char *DataOutputBuffer; - /*********************************************************************/ - - ogg_uint32_t RunLength; - ogg_uint32_t MaxBitTarget; /* Cut off target for rate capping */ - double BitRateCapFactor; /* Factor relating delta frame target - to cut off target. */ - - unsigned char MBCodingMode; /* Coding mode flags */ - - ogg_int32_t MVPixelOffsetY[MAX_SEARCH_SITES]; - ogg_uint32_t InterTripOutThresh; - unsigned char MVEnabled; - ogg_uint32_t MotionVectorSearchCount; - ogg_uint32_t FrameMVSearcOunt; - ogg_int32_t MVSearchSteps; - ogg_int32_t MVOffsetX[MAX_SEARCH_SITES]; - ogg_int32_t MVOffsetY[MAX_SEARCH_SITES]; - ogg_int32_t HalfPixelRef2Offset[9]; /* Offsets for half pixel - compensation */ - signed char HalfPixelXOffset[9]; /* Half pixel MV offsets for X */ - signed char HalfPixelYOffset[9]; /* Half pixel MV offsets for Y */ - - ogg_uint32_t bit_pattern ; - unsigned char bits_so_far ; - ogg_uint32_t lastval ; - ogg_uint32_t lastrun ; - - Q_LIST_ENTRY *quantized_list; - - MOTION_VECTOR MVector; - ogg_uint32_t TempBitCount; - ogg_int16_t *DCT_codes; /* Buffer that stores the result of - Forward DCT */ - ogg_int16_t *DCTDataBuffer; /* Input data buffer for Forward DCT */ - - /* Motion compensation related variables */ - ogg_uint32_t MvMaxExtent; - - double QTargetModifier[Q_TABLE_SIZE]; - - /* instances (used for reconstructing buffers and to hold tokens etc.) */ - PP_INSTANCE pp; /* preprocessor */ - PB_INSTANCE pb; /* playback */ - - /* ogg bitpacker for use in packet coding, other API state */ - oggpack_buffer *oggbuffer; - int readyflag; - int packetflag; - int doneflag; - - DspFunctions dsp; /* Selected functions for this platform */ - -} CP_INSTANCE; - -#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255)))) - -extern void ConfigurePP( PP_INSTANCE *ppi, int Level ) ; -extern ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi, - ogg_uint32_t * KFIndicator ); - -extern void ClearPPInstance(PP_INSTANCE *ppi); -extern void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs); -extern void InitPBInstance(PB_INSTANCE *pbi); -extern void ClearPBInstance(PB_INSTANCE *pbi); - -extern void IDct1( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ); - -extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr, - ogg_int16_t * ChangePtr, ogg_uint32_t LineStep ); - -extern void ReconInter( PB_INSTANCE *pbi, unsigned char * ReconPtr, - unsigned char * RefPtr, ogg_int16_t * ChangePtr, - ogg_uint32_t LineStep ) ; - -extern void ReconInterHalfPixel2( PB_INSTANCE *pbi, unsigned char * ReconPtr, - unsigned char * RefPtr1, - unsigned char * RefPtr2, - ogg_int16_t * ChangePtr, - ogg_uint32_t LineStep ) ; - -extern void SetupLoopFilter(PB_INSTANCE *pbi); -extern void CopyBlock(unsigned char *src, - unsigned char *dest, - unsigned int srcstride); -extern void LoopFilter(PB_INSTANCE *pbi); -extern void ReconRefFrames (PB_INSTANCE *pbi); -extern void ExpandToken( Q_LIST_ENTRY * ExpandedBlock, - unsigned char * CoeffIndex, ogg_uint32_t Token, - ogg_int32_t ExtraBits ); -extern void ClearDownQFragData(PB_INSTANCE *pbi); - -extern void select_quantiser (PB_INSTANCE *pbi, int type); - -extern void quantize( PB_INSTANCE *pbi, - ogg_int16_t * DCT_block, - Q_LIST_ENTRY * quantized_list); -extern void UpdateQ( PB_INSTANCE *pbi, int NewQIndex ); -extern void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ ); -extern void fdct_short ( ogg_int16_t * InputData, ogg_int16_t * OutputData ); -extern ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi, - ogg_int32_t FragIndex); -extern void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex, - ogg_uint32_t PixelsPerLine ) ; -extern void ClearFragmentInfo(PB_INSTANCE * pbi); -extern void InitFragmentInfo(PB_INSTANCE * pbi); -extern void ClearFrameInfo(PB_INSTANCE * pbi); -extern void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize); -extern void InitializeFragCoordinates(PB_INSTANCE *pbi); -extern void InitFrameDetails(PB_INSTANCE *pbi); -extern void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer *opb); -extern void InitQTables( PB_INSTANCE *pbi ); -extern void quant_tables_init( PB_INSTANCE *pbi, const th_quant_info *qinfo); -extern void InitHuffmanSet( PB_INSTANCE *pbi ); -extern void ClearHuffmanSet( PB_INSTANCE *pbi ); -extern int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb); -extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES], - oggpack_buffer *opb); -extern void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci); -extern void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]); -extern int ReadFilterTables(codec_setup_info *ci, oggpack_buffer *opb); -extern void QuadDecodeDisplayFragments ( PB_INSTANCE *pbi ); -extern void PackAndWriteDFArray( CP_INSTANCE *cpi ); -extern void UpdateFragQIndex(PB_INSTANCE *pbi); -extern void PostProcess(PB_INSTANCE *pbi); -extern void InitMotionCompensation ( CP_INSTANCE *cpi ); -extern ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine ) ; -extern ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi, - unsigned char * SrcPtr, - unsigned char * RefPtr, - ogg_uint32_t FragIndex, - ogg_int32_t LastXMV, - ogg_int32_t LastYMV, - ogg_uint32_t PixelsPerLine ) ; -extern void WriteFrameHeader( CP_INSTANCE *cpi) ; -extern ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - ogg_int32_t *MVPixelOffset, - MOTION_VECTOR *MV ); -extern ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - MOTION_VECTOR *MV ); -extern ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - MOTION_VECTOR *MV ) ; -extern ogg_uint32_t EncodeData(CP_INSTANCE *cpi); -extern ogg_uint32_t PickIntra( CP_INSTANCE *cpi, - ogg_uint32_t SBRows, - ogg_uint32_t SBCols); -extern ogg_uint32_t PickModes(CP_INSTANCE *cpi, - ogg_uint32_t SBRows, - ogg_uint32_t SBCols, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t *InterError, - ogg_uint32_t *IntraError); - -extern CODING_MODE FrArrayUnpackMode(PB_INSTANCE *pbi); -extern void CreateBlockMapping ( ogg_int32_t (*BlockMap)[4][4], - ogg_uint32_t YSuperBlocks, - ogg_uint32_t UVSuperBlocks, - ogg_uint32_t HFrags, ogg_uint32_t VFrags ); -extern void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ, - ogg_int32_t RecoveryBlocks ) ; -extern void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore ); -extern void CopyBackExtraFrags(CP_INSTANCE *cpi); - -extern void UpdateUMVBorder( PB_INSTANCE *pbi, - unsigned char * DestReconPtr ); -extern void PInitFrameInfo(PP_INSTANCE * ppi); - -extern double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ ); -extern void ClearTmpBuffers(PB_INSTANCE * pbi); -extern void InitTmpBuffers(PB_INSTANCE * pbi); -extern void ScanYUVInit( PP_INSTANCE * ppi, - SCAN_CONFIG_DATA * ScanConfigPtr); - -#endif /* ENCODER_INTERNAL_H */ diff --git a/Engine/lib/libtheora/lib/enc/dct.c b/Engine/lib/libtheora/lib/enc/dct.c deleted file mode 100644 index 29bf8f269..000000000 --- a/Engine/lib/libtheora/lib/enc/dct.c +++ /dev/null @@ -1,268 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct.c 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include "codec_internal.h" -#include "dsp.h" -#include "../cpu.h" - -static ogg_int32_t xC1S7 = 64277; -static ogg_int32_t xC2S6 = 60547; -static ogg_int32_t xC3S5 = 54491; -static ogg_int32_t xC4S4 = 46341; -static ogg_int32_t xC5S3 = 36410; -static ogg_int32_t xC6S2 = 25080; -static ogg_int32_t xC7S1 = 12785; - -#define SIGNBITDUPPED(X) ((signed )(((X) & 0x80000000)) >> 31) -#define DOROUND(X) ( (SIGNBITDUPPED(X) & (0xffff)) + (X) ) - -static void fdct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ){ - int loop; - - ogg_int32_t is07, is12, is34, is56; - ogg_int32_t is0734, is1256; - ogg_int32_t id07, id12, id34, id56; - - ogg_int32_t irot_input_x, irot_input_y; - ogg_int32_t icommon_product1; /* Re-used product (c4s4 * (s12 - s56)). */ - ogg_int32_t icommon_product2; /* Re-used product (c4s4 * (d12 + d56)). */ - - ogg_int32_t temp1, temp2; /* intermediate variable for computation */ - - ogg_int32_t InterData[64]; - ogg_int32_t *ip = InterData; - ogg_int16_t * op = OutputData; - for (loop = 0; loop < 8; loop++){ - /* Pre calculate some common sums and differences. */ - is07 = InputData[0] + InputData[7]; - is12 = InputData[1] + InputData[2]; - is34 = InputData[3] + InputData[4]; - is56 = InputData[5] + InputData[6]; - - id07 = InputData[0] - InputData[7]; - id12 = InputData[1] - InputData[2]; - id34 = InputData[3] - InputData[4]; - id56 = InputData[5] - InputData[6]; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - /* Pre-Calculate some common product terms. */ - icommon_product1 = xC4S4*(is12 - is56); - icommon_product1 = DOROUND(icommon_product1); - icommon_product1>>=16; - - icommon_product2 = xC4S4*(id12 + id56); - icommon_product2 = DOROUND(icommon_product2); - icommon_product2>>=16; - - - ip[0] = (xC4S4*(is0734 + is1256)); - ip[0] = DOROUND(ip[0]); - ip[0] >>= 16; - - ip[4] = (xC4S4*(is0734 - is1256)); - ip[4] = DOROUND(ip[4]); - ip[4] >>= 16; - - /* Define inputs to rotation for outputs 2 and 6 */ - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - /* Apply rotation for outputs 2 and 6. */ - temp1=xC6S2*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC2S6*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - ip[2] = temp1 + temp2; - - temp1=xC6S2*irot_input_y; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC2S6*irot_input_x ; - temp2=DOROUND(temp2); - temp2>>=16; - ip[6] = temp1 -temp2 ; - - /* Define inputs to rotation for outputs 1 and 7 */ - irot_input_x = icommon_product1 + id07; - irot_input_y = -( id34 + icommon_product2 ); - - /* Apply rotation for outputs 1 and 7. */ - - temp1=xC1S7*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC7S1*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - ip[1] = temp1 - temp2; - - temp1=xC7S1*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC1S7*irot_input_y ; - temp2=DOROUND(temp2); - temp2>>=16; - ip[7] = temp1 + temp2 ; - - /* Define inputs to rotation for outputs 3 and 5 */ - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - /* Apply rotation for outputs 3 and 5. */ - temp1=xC3S5*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC5S3*irot_input_y ; - temp2=DOROUND(temp2); - temp2>>=16; - ip[3] = temp1 - temp2 ; - - temp1=xC5S3*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC3S5*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - ip[5] = temp1 + temp2; - - /* Increment data pointer for next row. */ - InputData += 8 ; - ip += 8; /* advance pointer to next row */ - - } - - - /* Performed DCT on rows, now transform the columns */ - ip = InterData; - for (loop = 0; loop < 8; loop++){ - /* Pre calculate some common sums and differences. */ - is07 = ip[0 * 8] + ip[7 * 8]; - is12 = ip[1 * 8] + ip[2 * 8]; - is34 = ip[3 * 8] + ip[4 * 8]; - is56 = ip[5 * 8] + ip[6 * 8]; - - id07 = ip[0 * 8] - ip[7 * 8]; - id12 = ip[1 * 8] - ip[2 * 8]; - id34 = ip[3 * 8] - ip[4 * 8]; - id56 = ip[5 * 8] - ip[6 * 8]; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - /* Pre-Calculate some common product terms. */ - icommon_product1 = xC4S4*(is12 - is56) ; - icommon_product2 = xC4S4*(id12 + id56) ; - icommon_product1 = DOROUND(icommon_product1); - icommon_product2 = DOROUND(icommon_product2); - icommon_product1>>=16; - icommon_product2>>=16; - - - temp1 = xC4S4*(is0734 + is1256) ; - temp2 = xC4S4*(is0734 - is1256) ; - temp1 = DOROUND(temp1); - temp2 = DOROUND(temp2); - temp1>>=16; - temp2>>=16; - op[0*8] = (ogg_int16_t) temp1; - op[4*8] = (ogg_int16_t) temp2; - - /* Define inputs to rotation for outputs 2 and 6 */ - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - /* Apply rotation for outputs 2 and 6. */ - temp1=xC6S2*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC2S6*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - op[2*8] = (ogg_int16_t) (temp1 + temp2); - - temp1=xC6S2*irot_input_y; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC2S6*irot_input_x ; - temp2=DOROUND(temp2); - temp2>>=16; - op[6*8] = (ogg_int16_t) (temp1 -temp2) ; - - /* Define inputs to rotation for outputs 1 and 7 */ - irot_input_x = icommon_product1 + id07; - irot_input_y = -( id34 + icommon_product2 ); - - /* Apply rotation for outputs 1 and 7. */ - temp1=xC1S7*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC7S1*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - op[1*8] = (ogg_int16_t) (temp1 - temp2); - - temp1=xC7S1*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC1S7*irot_input_y ; - temp2=DOROUND(temp2); - temp2>>=16; - op[7*8] = (ogg_int16_t) (temp1 + temp2); - - /* Define inputs to rotation for outputs 3 and 5 */ - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - /* Apply rotation for outputs 3 and 5. */ - temp1=xC3S5*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC5S3*irot_input_y ; - temp2=DOROUND(temp2); - temp2>>=16; - op[3*8] = (ogg_int16_t) (temp1 - temp2) ; - - temp1=xC5S3*irot_input_x; - temp1=DOROUND(temp1); - temp1>>=16; - temp2=xC3S5*irot_input_y; - temp2=DOROUND(temp2); - temp2>>=16; - op[5*8] = (ogg_int16_t) (temp1 + temp2); - - /* Increment data pointer for next column. */ - ip ++; - op ++; - } -} - -void dsp_dct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags) -{ - funcs->fdct_short = fdct_short__c; - dsp_dct_decode_init(funcs, cpu_flags); - dsp_idct_init(funcs, cpu_flags); -#if defined(USE_ASM) - if (cpu_flags & OC_CPU_X86_MMX) { - dsp_mmx_fdct_init(funcs); - } -#endif -} - diff --git a/Engine/lib/libtheora/lib/enc/dct_decode.c b/Engine/lib/libtheora/lib/enc/dct_decode.c deleted file mode 100644 index e27611610..000000000 --- a/Engine/lib/libtheora/lib/enc/dct_decode.c +++ /dev/null @@ -1,941 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct_decode.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" -#include "quant_lookup.h" - - -#define GOLDEN_FRAME_THRESH_Q 50 -#define PUR 8 -#define PU 4 -#define PUL 2 -#define PL 1 -#define HIGHBITDUPPED(X) (((signed short) X) >> 15) - - -static const int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 }; - -static void SetupBoundingValueArray_Generic(ogg_int16_t *BoundingValuePtr, - ogg_int32_t FLimit){ - - ogg_int32_t i; - - /* Set up the bounding value array. */ - memset ( BoundingValuePtr, 0, (256*sizeof(*BoundingValuePtr)) ); - for ( i = 0; i < FLimit; i++ ){ - BoundingValuePtr[127-i-FLimit] = (-FLimit+i); - BoundingValuePtr[127-i] = -i; - BoundingValuePtr[127+i] = i; - BoundingValuePtr[127+i+FLimit] = FLimit-i; - } -} - -static void ExpandKFBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber ){ - ogg_uint32_t ReconPixelsPerLine; - ogg_int32_t ReconPixelIndex; - - /* Select the appropriate inverse Q matrix and line stride */ - if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ){ - ReconPixelsPerLine = pbi->YStride; - pbi->dequant_coeffs = pbi->dequant_Y_coeffs; - }else if ( FragmentNumber<(ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) ){ - ReconPixelsPerLine = pbi->UVStride; - pbi->dequant_coeffs = pbi->dequant_U_coeffs; - }else{ - ReconPixelsPerLine = pbi->UVStride; - pbi->dequant_coeffs = pbi->dequant_V_coeffs; - } - - /* Set up pointer into the quantisation buffer. */ - pbi->quantized_list = &pbi->QFragData[FragmentNumber][0]; - - /* Invert quantisation and DCT to get pixel data. */ - switch(pbi->FragCoefEOB[FragmentNumber]){ - case 0:case 1: - IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - case 2: case 3: - dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - case 4:case 5:case 6:case 7:case 8: case 9:case 10: - dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - default: - dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - } - - /* Convert fragment number to a pixel offset in a reconstruction buffer. */ - ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber]; - - /* Get the pixel index for the first pixel in the fragment. */ - dsp_recon_intra8x8 (pbi->dsp, (unsigned char *)(&pbi->ThisFrameRecon[ReconPixelIndex]), - (ogg_int16_t *)pbi->ReconDataBuffer, ReconPixelsPerLine); -} - -static void ExpandBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber){ - unsigned char *LastFrameRecPtr; /* Pointer into previous frame - reconstruction. */ - unsigned char *LastFrameRecPtr2; /* Pointer into previous frame - reconstruction for 1/2 pixel MC. */ - - ogg_uint32_t ReconPixelsPerLine; /* Pixels per line */ - ogg_int32_t ReconPixelIndex; /* Offset for block into a - reconstruction buffer */ - ogg_int32_t ReconPtr2Offset; /* Offset for second - reconstruction in half pixel - MC */ - ogg_int32_t MVOffset; /* Baseline motion vector offset */ - ogg_int32_t MvShift ; /* Shift to correct to 1/2 or 1/4 pixel */ - ogg_int32_t MvModMask; /* Mask to determine whether 1/2 - pixel is used */ - - /* Get coding mode for this block */ - if ( pbi->FrameType == KEY_FRAME ){ - pbi->CodingMode = CODE_INTRA; - }else{ - /* Get Motion vector and mode for this block. */ - pbi->CodingMode = pbi->FragCodingMethod[FragmentNumber]; - } - - /* Select the appropriate inverse Q matrix and line stride */ - if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ) { - ReconPixelsPerLine = pbi->YStride; - MvShift = 1; - MvModMask = 0x00000001; - - /* Select appropriate dequantiser matrix. */ - if ( pbi->CodingMode == CODE_INTRA ) - pbi->dequant_coeffs = pbi->dequant_Y_coeffs; - else - pbi->dequant_coeffs = pbi->dequant_InterY_coeffs; - }else{ - ReconPixelsPerLine = pbi->UVStride; - MvShift = 2; - MvModMask = 0x00000003; - - /* Select appropriate dequantiser matrix. */ - if ( pbi->CodingMode == CODE_INTRA ) - if ( FragmentNumber < - (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) ) - pbi->dequant_coeffs = pbi->dequant_U_coeffs; - else - pbi->dequant_coeffs = pbi->dequant_V_coeffs; - else - if ( FragmentNumber < - (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) ) - pbi->dequant_coeffs = pbi->dequant_InterU_coeffs; - else - pbi->dequant_coeffs = pbi->dequant_InterV_coeffs; - } - - /* Set up pointer into the quantisation buffer. */ - pbi->quantized_list = &pbi->QFragData[FragmentNumber][0]; - - /* Invert quantisation and DCT to get pixel data. */ - switch(pbi->FragCoefEOB[FragmentNumber]){ - case 0:case 1: - IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - case 2: case 3: - dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - case 4:case 5:case 6:case 7:case 8: case 9:case 10: - dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - break; - default: - dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer ); - } - - /* Convert fragment number to a pixel offset in a reconstruction buffer. */ - ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber]; - - /* Action depends on decode mode. */ - if ( pbi->CodingMode == CODE_INTER_NO_MV ){ - /* Inter with no motion vector */ - /* Reconstruct the pixel data using the last frame reconstruction - and change data when the motion vector is (0,0), the recon is - based on the lastframe without loop filtering---- for testing */ - dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex], - &pbi->LastFrameRecon[ReconPixelIndex], - pbi->ReconDataBuffer, ReconPixelsPerLine); - }else if ( ModeUsesMC[pbi->CodingMode] ) { - /* The mode uses a motion vector. */ - /* Get vector from list */ - pbi->MVector.x = pbi->FragMVect[FragmentNumber].x; - pbi->MVector.y = pbi->FragMVect[FragmentNumber].y; - - /* Work out the base motion vector offset and the 1/2 pixel offset - if any. For the U and V planes the MV specifies 1/4 pixel - accuracy. This is adjusted to 1/2 pixel as follows ( 0->0, - 1/4->1/2, 1/2->1/2, 3/4->1/2 ). */ - MVOffset = 0; - ReconPtr2Offset = 0; - if ( pbi->MVector.x > 0 ){ - MVOffset = pbi->MVector.x >> MvShift; - if ( pbi->MVector.x & MvModMask ) - ReconPtr2Offset += 1; - } else if ( pbi->MVector.x < 0 ) { - MVOffset -= (-pbi->MVector.x) >> MvShift; - if ( (-pbi->MVector.x) & MvModMask ) - ReconPtr2Offset -= 1; - } - - if ( pbi->MVector.y > 0 ){ - MVOffset += (pbi->MVector.y >> MvShift) * ReconPixelsPerLine; - if ( pbi->MVector.y & MvModMask ) - ReconPtr2Offset += ReconPixelsPerLine; - } else if ( pbi->MVector.y < 0 ){ - MVOffset -= ((-pbi->MVector.y) >> MvShift) * ReconPixelsPerLine; - if ( (-pbi->MVector.y) & MvModMask ) - ReconPtr2Offset -= ReconPixelsPerLine; - } - - /* Set up the first of the two reconstruction buffer pointers. */ - if ( pbi->CodingMode==CODE_GOLDEN_MV ) { - LastFrameRecPtr = &pbi->GoldenFrame[ReconPixelIndex] + MVOffset; - }else{ - LastFrameRecPtr = &pbi->LastFrameRecon[ReconPixelIndex] + MVOffset; - } - - /* Set up the second of the two reconstruction pointers. */ - LastFrameRecPtr2 = LastFrameRecPtr + ReconPtr2Offset; - - /* Select the appropriate reconstruction function */ - if ( (int)(LastFrameRecPtr - LastFrameRecPtr2) == 0 ) { - /* Reconstruct the pixel dats from the reference frame and change data - (no half pixel in this case as the two references were the same. */ - dsp_recon_inter8x8 (pbi->dsp, - &pbi->ThisFrameRecon[ReconPixelIndex], - LastFrameRecPtr, pbi->ReconDataBuffer, - ReconPixelsPerLine); - }else{ - /* Fractional pixel reconstruction. */ - /* Note that we only use two pixels per reconstruction even for - the diagonal. */ - dsp_recon_inter8x8_half(pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex], - LastFrameRecPtr, LastFrameRecPtr2, - pbi->ReconDataBuffer, ReconPixelsPerLine); - } - } else if ( pbi->CodingMode == CODE_USING_GOLDEN ){ - /* Golden frame with motion vector */ - /* Reconstruct the pixel data using the golden frame - reconstruction and change data */ - dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex], - &pbi->GoldenFrame[ ReconPixelIndex ], - pbi->ReconDataBuffer, ReconPixelsPerLine); - } else { - /* Simple Intra coding */ - /* Get the pixel index for the first pixel in the fragment. */ - dsp_recon_intra8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex], - pbi->ReconDataBuffer, ReconPixelsPerLine); - } -} - -static void UpdateUMV_HBorders( PB_INSTANCE *pbi, - unsigned char * DestReconPtr, - ogg_uint32_t PlaneFragOffset ) { - ogg_uint32_t i; - ogg_uint32_t PixelIndex; - - ogg_uint32_t PlaneStride; - ogg_uint32_t BlockVStep; - ogg_uint32_t PlaneFragments; - ogg_uint32_t LineFragments; - ogg_uint32_t PlaneBorderWidth; - - unsigned char *SrcPtr1; - unsigned char *SrcPtr2; - unsigned char *DestPtr1; - unsigned char *DestPtr2; - - /* Work out various plane specific values */ - if ( PlaneFragOffset == 0 ) { - /* Y Plane */ - BlockVStep = (pbi->YStride * - (VFRAGPIXELS - 1)); - PlaneStride = pbi->YStride; - PlaneBorderWidth = UMV_BORDER; - PlaneFragments = pbi->YPlaneFragments; - LineFragments = pbi->HFragments; - }else{ - /* U or V plane. */ - BlockVStep = (pbi->UVStride * - (VFRAGPIXELS - 1)); - PlaneStride = pbi->UVStride; - PlaneBorderWidth = UMV_BORDER / 2; - PlaneFragments = pbi->UVPlaneFragments; - LineFragments = pbi->HFragments / 2; - } - - /* Setup the source and destination pointers for the top and bottom - borders */ - PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset]; - SrcPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ]; - DestPtr1 = SrcPtr1 - (PlaneBorderWidth * PlaneStride); - - PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset + - PlaneFragments - LineFragments] + - BlockVStep; - SrcPtr2 = &DestReconPtr[ PixelIndex - PlaneBorderWidth]; - DestPtr2 = SrcPtr2 + PlaneStride; - - /* Now copy the top and bottom source lines into each line of the - respective borders */ - for ( i = 0; i < PlaneBorderWidth; i++ ) { - memcpy( DestPtr1, SrcPtr1, PlaneStride ); - memcpy( DestPtr2, SrcPtr2, PlaneStride ); - DestPtr1 += PlaneStride; - DestPtr2 += PlaneStride; - } -} - -static void UpdateUMV_VBorders( PB_INSTANCE *pbi, - unsigned char * DestReconPtr, - ogg_uint32_t PlaneFragOffset ){ - ogg_uint32_t i; - ogg_uint32_t PixelIndex; - - ogg_uint32_t PlaneStride; - ogg_uint32_t LineFragments; - ogg_uint32_t PlaneBorderWidth; - ogg_uint32_t PlaneHeight; - - unsigned char *SrcPtr1; - unsigned char *SrcPtr2; - unsigned char *DestPtr1; - unsigned char *DestPtr2; - - /* Work out various plane specific values */ - if ( PlaneFragOffset == 0 ) { - /* Y Plane */ - PlaneStride = pbi->YStride; - PlaneBorderWidth = UMV_BORDER; - LineFragments = pbi->HFragments; - PlaneHeight = pbi->info.height; - }else{ - /* U or V plane. */ - PlaneStride = pbi->UVStride; - PlaneBorderWidth = UMV_BORDER / 2; - LineFragments = pbi->HFragments / 2; - PlaneHeight = pbi->info.height / 2; - } - - /* Setup the source data values and destination pointers for the - left and right edge borders */ - PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset]; - SrcPtr1 = &DestReconPtr[ PixelIndex ]; - DestPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ]; - - PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset + - LineFragments - 1] + - (HFRAGPIXELS - 1); - SrcPtr2 = &DestReconPtr[ PixelIndex ]; - DestPtr2 = &DestReconPtr[ PixelIndex + 1 ]; - - /* Now copy the top and bottom source lines into each line of the - respective borders */ - for ( i = 0; i < PlaneHeight; i++ ) { - memset( DestPtr1, SrcPtr1[0], PlaneBorderWidth ); - memset( DestPtr2, SrcPtr2[0], PlaneBorderWidth ); - SrcPtr1 += PlaneStride; - SrcPtr2 += PlaneStride; - DestPtr1 += PlaneStride; - DestPtr2 += PlaneStride; - } -} - -void UpdateUMVBorder( PB_INSTANCE *pbi, - unsigned char * DestReconPtr ) { - ogg_uint32_t PlaneFragOffset; - - /* Y plane */ - PlaneFragOffset = 0; - UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset ); - UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset ); - - /* Then the U and V Planes */ - PlaneFragOffset = pbi->YPlaneFragments; - UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset ); - UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset ); - - PlaneFragOffset = pbi->YPlaneFragments + pbi->UVPlaneFragments; - UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset ); - UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset ); -} - -static void CopyRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr, - unsigned char * SrcReconPtr ) { - ogg_uint32_t i; - ogg_uint32_t PlaneLineStep; /* Pixels per line */ - ogg_uint32_t PixelIndex; - - unsigned char *SrcPtr; /* Pointer to line of source image data */ - unsigned char *DestPtr; /* Pointer to line of destination image data */ - - /* Copy over only updated blocks.*/ - - /* First Y plane */ - PlaneLineStep = pbi->YStride; - for ( i = 0; i < pbi->YPlaneFragments; i++ ) { - if ( pbi->display_fragments[i] ) { - PixelIndex = pbi->recon_pixel_index_table[i]; - SrcPtr = &SrcReconPtr[ PixelIndex ]; - DestPtr = &DestReconPtr[ PixelIndex ]; - - dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep); - } - } - - /* Then U and V */ - PlaneLineStep = pbi->UVStride; - for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) { - if ( pbi->display_fragments[i] ) { - PixelIndex = pbi->recon_pixel_index_table[i]; - SrcPtr = &SrcReconPtr[ PixelIndex ]; - DestPtr = &DestReconPtr[ PixelIndex ]; - - dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep); - - } - } -} - -static void CopyNotRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr, - unsigned char * SrcReconPtr ) { - ogg_uint32_t i; - ogg_uint32_t PlaneLineStep; /* Pixels per line */ - ogg_uint32_t PixelIndex; - - unsigned char *SrcPtr; /* Pointer to line of source image data */ - unsigned char *DestPtr; /* Pointer to line of destination image data*/ - - /* Copy over only updated blocks. */ - - /* First Y plane */ - PlaneLineStep = pbi->YStride; - for ( i = 0; i < pbi->YPlaneFragments; i++ ) { - if ( !pbi->display_fragments[i] ) { - PixelIndex = pbi->recon_pixel_index_table[i]; - SrcPtr = &SrcReconPtr[ PixelIndex ]; - DestPtr = &DestReconPtr[ PixelIndex ]; - - dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep); - } - } - - /* Then U and V */ - PlaneLineStep = pbi->UVStride; - for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) { - if ( !pbi->display_fragments[i] ) { - PixelIndex = pbi->recon_pixel_index_table[i]; - SrcPtr = &SrcReconPtr[ PixelIndex ]; - DestPtr = &DestReconPtr[ PixelIndex ]; - - dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep); - - } - } -} - -void ExpandToken( Q_LIST_ENTRY * ExpandedBlock, - unsigned char * CoeffIndex, ogg_uint32_t Token, - ogg_int32_t ExtraBits ){ - /* Is the token is a combination run and value token. */ - if ( Token >= DCT_RUN_CATEGORY1 ){ - /* Expand the token and additional bits to a zero run length and - data value. */ - if ( Token < DCT_RUN_CATEGORY2 ) { - /* Decoding method depends on token */ - if ( Token < DCT_RUN_CATEGORY1B ) { - /* Step on by the zero run length */ - *CoeffIndex += (unsigned char)((Token - DCT_RUN_CATEGORY1) + 1); - - /* The extra bit determines the sign. */ - if ( ExtraBits & 0x01 ) - ExpandedBlock[*CoeffIndex] = -1; - else - ExpandedBlock[*CoeffIndex] = 1; - } else if ( Token == DCT_RUN_CATEGORY1B ) { - /* Bits 0-1 determines the zero run length */ - *CoeffIndex += (6 + (ExtraBits & 0x03)); - - /* Bit 2 determines the sign */ - if ( ExtraBits & 0x04 ) - ExpandedBlock[*CoeffIndex] = -1; - else - ExpandedBlock[*CoeffIndex] = 1; - }else{ - /* Bits 0-2 determines the zero run length */ - *CoeffIndex += (10 + (ExtraBits & 0x07)); - - /* Bit 3 determines the sign */ - if ( ExtraBits & 0x08 ) - ExpandedBlock[*CoeffIndex] = -1; - else - ExpandedBlock[*CoeffIndex] = 1; - } - }else{ - /* If token == DCT_RUN_CATEGORY2 we have a single 0 followed by - a value */ - if ( Token == DCT_RUN_CATEGORY2 ){ - /* Step on by the zero run length */ - *CoeffIndex += 1; - - /* Bit 1 determines sign, bit 0 the value */ - if ( ExtraBits & 0x02 ) - ExpandedBlock[*CoeffIndex] = -(2 + (ExtraBits & 0x01)); - else - ExpandedBlock[*CoeffIndex] = 2 + (ExtraBits & 0x01); - }else{ - /* else we have 2->3 zeros followed by a value */ - /* Bit 0 determines the zero run length */ - *CoeffIndex += 2 + (ExtraBits & 0x01); - - /* Bit 2 determines the sign, bit 1 the value */ - if ( ExtraBits & 0x04 ) - ExpandedBlock[*CoeffIndex] = -(2 + ((ExtraBits & 0x02) >> 1)); - else - ExpandedBlock[*CoeffIndex] = 2 + ((ExtraBits & 0x02) >> 1); - } - } - - /* Step on over value */ - *CoeffIndex += 1; - - } else if ( Token == DCT_SHORT_ZRL_TOKEN ) { - /* Token is a ZRL token so step on by the appropriate number of zeros */ - *CoeffIndex += ExtraBits + 1; - } else if ( Token == DCT_ZRL_TOKEN ) { - /* Token is a ZRL token so step on by the appropriate number of zeros */ - *CoeffIndex += ExtraBits + 1; - } else if ( Token < LOW_VAL_TOKENS ) { - /* Token is a small single value token. */ - switch ( Token ) { - case ONE_TOKEN: - ExpandedBlock[*CoeffIndex] = 1; - break; - case MINUS_ONE_TOKEN: - ExpandedBlock[*CoeffIndex] = -1; - break; - case TWO_TOKEN: - ExpandedBlock[*CoeffIndex] = 2; - break; - case MINUS_TWO_TOKEN: - ExpandedBlock[*CoeffIndex] = -2; - break; - } - - /* Step on the coefficient index. */ - *CoeffIndex += 1; - }else{ - /* Token is a larger single value token */ - /* Expand the token and additional bits to a data value. */ - if ( Token < DCT_VAL_CATEGORY3 ) { - /* Offset from LOW_VAL_TOKENS determines value */ - Token = Token - LOW_VAL_TOKENS; - - /* Extra bit determines sign */ - if ( ExtraBits ) - ExpandedBlock[*CoeffIndex] = - -((Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN)); - else - ExpandedBlock[*CoeffIndex] = - (Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN); - } else if ( Token == DCT_VAL_CATEGORY3 ) { - /* Bit 1 determines sign, Bit 0 the value */ - if ( ExtraBits & 0x02 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT3_MIN + (ExtraBits & 0x01)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT3_MIN + (ExtraBits & 0x01); - } else if ( Token == DCT_VAL_CATEGORY4 ) { - /* Bit 2 determines sign, Bit 0-1 the value */ - if ( ExtraBits & 0x04 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT4_MIN + (ExtraBits & 0x03)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT4_MIN + (ExtraBits & 0x03); - } else if ( Token == DCT_VAL_CATEGORY5 ) { - /* Bit 3 determines sign, Bit 0-2 the value */ - if ( ExtraBits & 0x08 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT5_MIN + (ExtraBits & 0x07)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT5_MIN + (ExtraBits & 0x07); - } else if ( Token == DCT_VAL_CATEGORY6 ) { - /* Bit 4 determines sign, Bit 0-3 the value */ - if ( ExtraBits & 0x10 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F); - } else if ( Token == DCT_VAL_CATEGORY7 ) { - /* Bit 5 determines sign, Bit 0-4 the value */ - if ( ExtraBits & 0x20 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F); - } else if ( Token == DCT_VAL_CATEGORY8 ) { - /* Bit 9 determines sign, Bit 0-8 the value */ - if ( ExtraBits & 0x200 ) - ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF)); - else - ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF); - } - - /* Step on the coefficient index. */ - *CoeffIndex += 1; - } -} - -void ClearDownQFragData(PB_INSTANCE *pbi){ - ogg_int32_t i; - Q_LIST_ENTRY * QFragPtr; - - for ( i = 0; i < pbi->CodedBlockIndex; i++ ) { - /* Get the linear index for the current fragment. */ - QFragPtr = pbi->QFragData[pbi->CodedBlockList[i]]; - memset(QFragPtr, 0, 64*sizeof(Q_LIST_ENTRY)); - } -} - -static void loop_filter_h(unsigned char * PixelPtr, - ogg_int32_t LineLength, - ogg_int16_t *BoundingValuePtr){ - ogg_int32_t j; - ogg_int32_t FiltVal; - PixelPtr-=2; - - for ( j = 0; j < 8; j++ ){ - FiltVal = - ( PixelPtr[0] ) - - ( PixelPtr[1] * 3 ) + - ( PixelPtr[2] * 3 ) - - ( PixelPtr[3] ); - - FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3)); - - PixelPtr[1] = clamp255(PixelPtr[1] + FiltVal); - PixelPtr[2] = clamp255(PixelPtr[2] - FiltVal); - - PixelPtr += LineLength; - } -} - -static void loop_filter_v(unsigned char * PixelPtr, - ogg_int32_t LineLength, - ogg_int16_t *BoundingValuePtr){ - ogg_int32_t j; - ogg_int32_t FiltVal; - PixelPtr -= 2*LineLength; - - for ( j = 0; j < 8; j++ ) { - FiltVal = ( (ogg_int32_t)PixelPtr[0] ) - - ( (ogg_int32_t)PixelPtr[LineLength] * 3 ) + - ( (ogg_int32_t)PixelPtr[2 * LineLength] * 3 ) - - ( (ogg_int32_t)PixelPtr[3 * LineLength] ); - - FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3)); - - PixelPtr[LineLength] = clamp255(PixelPtr[LineLength] + FiltVal); - PixelPtr[2 * LineLength] = clamp255(PixelPtr[2*LineLength] - FiltVal); - - PixelPtr ++; - } -} - -static void LoopFilter__c(PB_INSTANCE *pbi, int FLimit){ - - int j; - ogg_int16_t BoundingValues[256]; - ogg_int16_t *bvp = BoundingValues+127; - unsigned char *cp = pbi->display_fragments; - ogg_uint32_t *bp = pbi->recon_pixel_index_table; - - if ( FLimit == 0 ) return; - SetupBoundingValueArray_Generic(BoundingValues, FLimit); - - for ( j = 0; j < 3 ; j++){ - ogg_uint32_t *bp_begin = bp; - ogg_uint32_t *bp_end; - int stride; - int h; - - switch(j) { - case 0: /* y */ - bp_end = bp + pbi->YPlaneFragments; - h = pbi->HFragments; - stride = pbi->YStride; - break; - default: /* u,v, 4:20 specific */ - bp_end = bp + pbi->UVPlaneFragments; - h = pbi->HFragments >> 1; - stride = pbi->UVStride; - break; - } - - while(bpbp_left) - loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,bvp); - if(bp_left>bp_begin) - loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,bvp); - if(bp+1LastFrameRecon[bp[0]]+8,stride,bvp); - if(bp+hLastFrameRecon[bp[h]],stride,bvp); - } - bp++; - cp++; - } - } - } -} - -void ReconRefFrames (PB_INSTANCE *pbi){ - ogg_int32_t i; - unsigned char *SwapReconBuffersTemp; - - /* predictor multiplier up-left, up, up-right,left, shift - Entries are packed in the order L, UL, U, UR, with missing entries - moved to the end (before the shift parameters). */ - static const ogg_int16_t pc[16][6]={ - {0,0,0,0,0,0}, - {1,0,0,0,0,0}, /* PL */ - {1,0,0,0,0,0}, /* PUL */ - {1,0,0,0,0,0}, /* PUL|PL */ - {1,0,0,0,0,0}, /* PU */ - {1,1,0,0,1,1}, /* PU|PL */ - {0,1,0,0,0,0}, /* PU|PUL */ - {29,-26,29,0,5,31}, /* PU|PUL|PL */ - {1,0,0,0,0,0}, /* PUR */ - {75,53,0,0,7,127}, /* PUR|PL */ - {1,1,0,0,1,1}, /* PUR|PUL */ - {75,0,53,0,7,127}, /* PUR|PUL|PL */ - {1,0,0,0,0,0}, /* PUR|PU */ - {75,0,53,0,7,127}, /* PUR|PU|PL */ - {3,10,3,0,4,15}, /* PUR|PU|PUL */ - {29,-26,29,0,5,31} /* PUR|PU|PUL|PL */ - }; - - /* boundary case bit masks. */ - static const int bc_mask[8]={ - /* normal case no boundary condition */ - PUR|PU|PUL|PL, - /* left column */ - PUR|PU, - /* top row */ - PL, - /* top row, left column */ - 0, - /* right column */ - PU|PUL|PL, - /* right and left column */ - PU, - /* top row, right column */ - PL, - /* top row, right and left column */ - 0 - }; - - /* value left value up-left, value up, value up-right, missing - values skipped. */ - int v[4]; - - /* fragment number left, up-left, up, up-right */ - int fn[4]; - - /* predictor count. */ - int pcount; - - short wpc; - static const short Mode2Frame[] = { - 1, /* CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame */ - 0, /* CODE_INTRA 1 => DCT Encoded Block */ - 1, /* CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame */ - 1, /* CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame */ - 1, /* CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks */ - 2, /* CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame */ - 2, /* CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame */ - 1 /* CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks */ - }; - short Last[3]; - short PredictedDC; - int FragsAcross=pbi->HFragments; - int FromFragment,ToFragment; - int FragsDown = pbi->VFragments; - - int WhichFrame; - int WhichCase; - int j,k,m,n; - - void (*ExpandBlockA) ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber ); - - if ( pbi->FrameType == KEY_FRAME ) - ExpandBlockA=ExpandKFBlock; - else - ExpandBlockA=ExpandBlock; - - /* for y,u,v */ - for ( j = 0; j < 3 ; j++) { - /* pick which fragments based on Y, U, V */ - switch(j){ - case 0: /* y */ - FromFragment = 0; - ToFragment = pbi->YPlaneFragments; - FragsAcross = pbi->HFragments; - FragsDown = pbi->VFragments; - break; - case 1: /* u */ - FromFragment = pbi->YPlaneFragments; - ToFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments ; - FragsAcross = pbi->HFragments >> 1; - FragsDown = pbi->VFragments >> 1; - break; - /*case 2: v */ - default: - FromFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments; - ToFragment = pbi->YPlaneFragments + (2 * pbi->UVPlaneFragments) ; - FragsAcross = pbi->HFragments >> 1; - FragsDown = pbi->VFragments >> 1; - break; - } - - /* initialize our array of last used DC Components */ - for(k=0;k<3;k++) - Last[k]=0; - - i=FromFragment; - - /* do prediction on all of Y, U or V */ - for ( m = 0 ; m < FragsDown ; m++) { - for ( n = 0 ; n < FragsAcross ; n++, i++){ - - /* only do 2 prediction if fragment coded and on non intra or - if all fragments are intra */ - if( pbi->display_fragments[i] || (pbi->FrameType == KEY_FRAME) ){ - /* Type of Fragment */ - WhichFrame = Mode2Frame[pbi->FragCodingMethod[i]]; - - /* Check Borderline Cases */ - WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2); - - fn[0]=i-1; - fn[1]=i-FragsAcross-1; - fn[2]=i-FragsAcross; - fn[3]=i-FragsAcross+1; - - /* fragment valid for prediction use if coded and it comes - from same frame as the one we are predicting */ - for(k=pcount=wpc=0; k<4; k++) { - int pflag; - pflag=1<display_fragments[fn[k]] && - (Mode2Frame[pbi->FragCodingMethod[fn[k]]] == WhichFrame)){ - v[pcount]=pbi->QFragData[fn[k]][0]; - wpc|=pflag; - pcount++; - } - } - - if(wpc==0){ - /* fall back to the last coded fragment */ - pbi->QFragData[i][0] += Last[WhichFrame]; - - }else{ - - /* don't do divide if divisor is 1 or 0 */ - PredictedDC = pc[wpc][0]*v[0]; - for(k=1; k>= pc[wpc][4]; - } - - /* check for outranging on the two predictors that can outrange */ - if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){ - if( abs(PredictedDC - v[2]) > 128) { - PredictedDC = v[2]; - } else if( abs(PredictedDC - v[0]) > 128) { - PredictedDC = v[0]; - } else if( abs(PredictedDC - v[1]) > 128) { - PredictedDC = v[1]; - } - } - - pbi->QFragData[i][0] += PredictedDC; - - } - - /* Save the last fragment coded for whatever frame we are - predicting from */ - Last[WhichFrame] = pbi->QFragData[i][0]; - - /* Inverse DCT and reconstitute buffer in thisframe */ - ExpandBlockA( pbi, i ); - } - } - } - } - - /* Copy the current reconstruction back to the last frame recon buffer. */ - if(pbi->CodedBlockIndex > (ogg_int32_t) (pbi->UnitFragments >> 1)){ - SwapReconBuffersTemp = pbi->ThisFrameRecon; - pbi->ThisFrameRecon = pbi->LastFrameRecon; - pbi->LastFrameRecon = SwapReconBuffersTemp; - CopyNotRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon ); - }else{ - CopyRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon ); - } - - /* Apply a loop filter to edge pixels of updated blocks */ - dsp_LoopFilter(pbi->dsp, pbi, pbi->quant_info.loop_filter_limits[pbi->FrameQIndex]); - - /* We may need to update the UMV border */ - UpdateUMVBorder(pbi, pbi->LastFrameRecon); - - /* Reconstruct the golden frame if necessary. - For VFW codec only on key frames */ - if ( pbi->FrameType == KEY_FRAME ){ - CopyRecon( pbi, pbi->GoldenFrame, pbi->LastFrameRecon ); - /* We may need to update the UMV border */ - UpdateUMVBorder(pbi, pbi->GoldenFrame); - } -} - -void dsp_dct_decode_init (DspFunctions *funcs, ogg_uint32_t cpu_flags) -{ - funcs->LoopFilter = LoopFilter__c; -#if defined(USE_ASM) - // Todo: Port the dct for MSC one day. -#if !defined (_MSC_VER) - if (cpu_flags & OC_CPU_X86_MMX) { - dsp_mmx_dct_decode_init(funcs); - } -#endif -#endif -} diff --git a/Engine/lib/libtheora/lib/enc/dct_encode.c b/Engine/lib/libtheora/lib/enc/dct_encode.c deleted file mode 100644 index 3a3c47778..000000000 --- a/Engine/lib/libtheora/lib/enc/dct_encode.c +++ /dev/null @@ -1,469 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct_encode.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include -#include "codec_internal.h" -#include "dsp.h" -#include "quant_lookup.h" - - -static int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 }; - -static unsigned char TokenizeDctValue (ogg_int16_t DataValue, - ogg_uint32_t * TokenListPtr ){ - unsigned char tokens_added = 0; - ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue ); - - /* Values are tokenised as category value and a number of additional - bits that define the position within the category. */ - - if ( DataValue == 0 ) return 0; - - if ( AbsDataVal == 1 ){ - if ( DataValue == 1 ) - TokenListPtr[0] = ONE_TOKEN; - else - TokenListPtr[0] = MINUS_ONE_TOKEN; - tokens_added = 1; - } else if ( AbsDataVal == 2 ) { - if ( DataValue == 2 ) - TokenListPtr[0] = TWO_TOKEN; - else - TokenListPtr[0] = MINUS_TWO_TOKEN; - tokens_added = 1; - } else if ( AbsDataVal <= MAX_SINGLE_TOKEN_VALUE ) { - TokenListPtr[0] = LOW_VAL_TOKENS + (AbsDataVal - DCT_VAL_CAT2_MIN); - if ( DataValue > 0 ) - TokenListPtr[1] = 0; - else - TokenListPtr[1] = 1; - tokens_added = 2; - } else if ( AbsDataVal <= 8 ) { - /* Bit 1 determines sign, Bit 0 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY3; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT3_MIN); - else - TokenListPtr[1] = (0x02) + (AbsDataVal - DCT_VAL_CAT3_MIN); - tokens_added = 2; - } else if ( AbsDataVal <= 12 ) { - /* Bit 2 determines sign, Bit 0-2 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY4; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT4_MIN); - else - TokenListPtr[1] = (0x04) + (AbsDataVal - DCT_VAL_CAT4_MIN); - tokens_added = 2; - } else if ( AbsDataVal <= 20 ) { - /* Bit 3 determines sign, Bit 0-2 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY5; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT5_MIN); - else - TokenListPtr[1] = (0x08) + (AbsDataVal - DCT_VAL_CAT5_MIN); - tokens_added = 2; - } else if ( AbsDataVal <= 36 ) { - /* Bit 4 determines sign, Bit 0-3 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY6; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT6_MIN); - else - TokenListPtr[1] = (0x010) + (AbsDataVal - DCT_VAL_CAT6_MIN); - tokens_added = 2; - } else if ( AbsDataVal <= 68 ) { - /* Bit 5 determines sign, Bit 0-4 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY7; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT7_MIN); - else - TokenListPtr[1] = (0x20) + (AbsDataVal - DCT_VAL_CAT7_MIN); - tokens_added = 2; - } else if ( AbsDataVal <= 511 ) { - /* Bit 9 determines sign, Bit 0-8 the value */ - TokenListPtr[0] = DCT_VAL_CATEGORY8; - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT8_MIN); - else - TokenListPtr[1] = (0x200) + (AbsDataVal - DCT_VAL_CAT8_MIN); - tokens_added = 2; - } else { - TokenListPtr[0] = DCT_VAL_CATEGORY8; - if ( DataValue > 0 ) - TokenListPtr[1] = (511 - DCT_VAL_CAT8_MIN); - else - TokenListPtr[1] = (0x200) + (511 - DCT_VAL_CAT8_MIN); - tokens_added = 2; - } - - /* Return the total number of tokens added */ - return tokens_added; -} - -static unsigned char TokenizeDctRunValue (unsigned char RunLength, - ogg_int16_t DataValue, - ogg_uint32_t * TokenListPtr ){ - unsigned char tokens_added = 0; - ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue ); - - /* Values are tokenised as category value and a number of additional - bits that define the category. */ - if ( DataValue == 0 ) return 0; - if ( AbsDataVal == 1 ) { - /* Zero runs of 1-5 */ - if ( RunLength <= 5 ) { - TokenListPtr[0] = DCT_RUN_CATEGORY1 + (RunLength - 1); - if ( DataValue > 0 ) - TokenListPtr[1] = 0; - else - TokenListPtr[1] = 1; - } else if ( RunLength <= 9 ) { - /* Zero runs of 6-9 */ - TokenListPtr[0] = DCT_RUN_CATEGORY1B; - if ( DataValue > 0 ) - TokenListPtr[1] = (RunLength - 6); - else - TokenListPtr[1] = 0x04 + (RunLength - 6); - } else { - /* Zero runs of 10-17 */ - TokenListPtr[0] = DCT_RUN_CATEGORY1C; - if ( DataValue > 0 ) - TokenListPtr[1] = (RunLength - 10); - else - TokenListPtr[1] = 0x08 + (RunLength - 10); - } - tokens_added = 2; - } else if ( AbsDataVal <= 3 ) { - if ( RunLength == 1 ) { - TokenListPtr[0] = DCT_RUN_CATEGORY2; - - /* Extra bits token bit 1 indicates sign, bit 0 indicates value */ - if ( DataValue > 0 ) - TokenListPtr[1] = (AbsDataVal - 2); - else - TokenListPtr[1] = (0x02) + (AbsDataVal - 2); - tokens_added = 2; - }else{ - TokenListPtr[0] = DCT_RUN_CATEGORY2 + 1; - - /* Extra bits token. */ - /* bit 2 indicates sign, bit 1 indicates value, bit 0 indicates - run length */ - if ( DataValue > 0 ) - TokenListPtr[1] = ((AbsDataVal - 2) << 1) + (RunLength - 2); - else - TokenListPtr[1] = (0x04) + ((AbsDataVal - 2) << 1) + (RunLength - 2); - tokens_added = 2; - } - } else { - tokens_added = 2; /* ERROR */ - /*IssueWarning( "Bad Input to TokenizeDctRunValue" );*/ - } - - /* Return the total number of tokens added */ - return tokens_added; -} - -static unsigned char TokenizeDctBlock (ogg_int16_t * RawData, - ogg_uint32_t * TokenListPtr ) { - ogg_uint32_t i; - unsigned char run_count; - unsigned char token_count = 0; /* Number of tokens crated. */ - ogg_uint32_t AbsData; - - - /* Tokenize the block */ - for( i = 0; i < BLOCK_SIZE; i++ ){ - run_count = 0; - - /* Look for a zero run. */ - /* NOTE the use of & instead of && which is faster (and - equivalent) in this instance. */ - /* NO, NO IT ISN'T --Monty */ - while( (i < BLOCK_SIZE) && (!RawData[i]) ){ - run_count++; - i++; - } - - /* If we have reached the end of the block then code EOB */ - if ( i == BLOCK_SIZE ){ - TokenListPtr[token_count] = DCT_EOB_TOKEN; - token_count++; - }else{ - /* If we have a short zero run followed by a low data value code - the two as a composite token. */ - if ( run_count ){ - AbsData = abs(RawData[i]); - - if ( ((AbsData == 1) && (run_count <= 17)) || - ((AbsData <= 3) && (run_count <= 3)) ) { - /* Tokenise the run and subsequent value combination value */ - token_count += TokenizeDctRunValue( run_count, - RawData[i], - &TokenListPtr[token_count] ); - }else{ - - /* Else if we have a long non-EOB run or a run followed by a - value token > MAX_RUN_VAL then code the run and token - seperately */ - if ( run_count <= 8 ) - TokenListPtr[token_count] = DCT_SHORT_ZRL_TOKEN; - else - TokenListPtr[token_count] = DCT_ZRL_TOKEN; - - token_count++; - TokenListPtr[token_count] = run_count - 1; - token_count++; - - /* Now tokenize the value */ - token_count += TokenizeDctValue( RawData[i], - &TokenListPtr[token_count] ); - } - }else{ - /* Else there was NO zero run. */ - /* Tokenise the value */ - token_count += TokenizeDctValue( RawData[i], - &TokenListPtr[token_count] ); - } - } - } - - /* Return the total number of tokens (including additional bits - tokens) used. */ - return token_count; -} - -ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi, - ogg_int32_t FragIndex){ - ogg_uint32_t token_count; - - if ( cpi->pb.FrameType == KEY_FRAME ){ - /* Key frame so code block in INTRA mode. */ - cpi->pb.CodingMode = CODE_INTRA; - }else{ - /* Get Motion vector and mode for this block. */ - cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex]; - } - - /* Tokenise the dct data. */ - token_count = TokenizeDctBlock( cpi->pb.QFragData[FragIndex], - cpi->pb.TokenList[FragIndex] ); - - cpi->FragTokenCounts[FragIndex] = token_count; - cpi->TotTokenCount += token_count; - - /* Return number of pixels coded (i.e. 8x8). */ - return BLOCK_SIZE; -} - -static int AllZeroDctData( Q_LIST_ENTRY * QuantList ){ - ogg_uint32_t i; - - for ( i = 0; i < 64; i ++ ) - if ( QuantList[i] != 0 ) - return 0; - - return 1; -} - -static void MotionBlockDifference (CP_INSTANCE * cpi, unsigned char * FiltPtr, - ogg_int16_t *DctInputPtr, ogg_int32_t MvDevisor, - unsigned char* old_ptr1, unsigned char* new_ptr1, - ogg_uint32_t FragIndex,ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) { - - ogg_int32_t MvShift; - ogg_int32_t MvModMask; - ogg_int32_t AbsRefOffset; - ogg_int32_t AbsXOffset; - ogg_int32_t AbsYOffset; - ogg_int32_t MVOffset; /* Baseline motion vector offset */ - ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction in - half pixel MC */ - unsigned char *ReconPtr1; /* DCT reconstructed image pointers */ - unsigned char *ReconPtr2; /* Pointer used in half pixel MC */ - - switch(MvDevisor) { - case 2: - MvShift = 1; - MvModMask = 1; - break; - case 4: - MvShift = 2; - MvModMask = 3; - break; - default: - break; - } - - cpi->MVector.x = cpi->pb.FragMVect[FragIndex].x; - cpi->MVector.y = cpi->pb.FragMVect[FragIndex].y; - - /* Set up the baseline offset for the motion vector. */ - MVOffset = ((cpi->MVector.y / MvDevisor) * ReconPixelsPerLine) + - (cpi->MVector.x / MvDevisor); - - /* Work out the offset of the second reference position for 1/2 - pixel interpolation. For the U and V planes the MV specifies 1/4 - pixel accuracy. This is adjusted to 1/2 pixel as follows ( 0->0, - 1/4->1/2, 1/2->1/2, 3/4->1/2 ). */ - ReconPtr2Offset = 0; - AbsXOffset = cpi->MVector.x % MvDevisor; - AbsYOffset = cpi->MVector.y % MvDevisor; - - if ( AbsXOffset ) { - if ( cpi->MVector.x > 0 ) - ReconPtr2Offset += 1; - else - ReconPtr2Offset -= 1; - } - - if ( AbsYOffset ) { - if ( cpi->MVector.y > 0 ) - ReconPtr2Offset += ReconPixelsPerLine; - else - ReconPtr2Offset -= ReconPixelsPerLine; - } - - if ( cpi->pb.CodingMode==CODE_GOLDEN_MV ) { - ReconPtr1 = &cpi-> - pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]]; - } else { - ReconPtr1 = &cpi-> - pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]]; - } - - ReconPtr1 += MVOffset; - ReconPtr2 = ReconPtr1 + ReconPtr2Offset; - - AbsRefOffset = abs((int)(ReconPtr1 - ReconPtr2)); - - /* Is the MV offset exactly pixel alligned */ - if ( AbsRefOffset == 0 ){ - dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr, - PixelsPerLine, ReconPixelsPerLine); - dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine); - } else { - /* Fractional pixel MVs. */ - /* Note that we only use two pixel values even for the diagonal */ - dsp_sub8x8avg2(cpi->dsp, FiltPtr, ReconPtr1,ReconPtr2,DctInputPtr, - PixelsPerLine, ReconPixelsPerLine); - dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine); - } -} - -void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex, - ogg_uint32_t PixelsPerLine) { - unsigned char *new_ptr1; /* Pointers into current frame */ - unsigned char *old_ptr1; /* Pointers into old frame */ - unsigned char *FiltPtr; /* Pointers to srf filtered pixels */ - ogg_int16_t *DctInputPtr; /* Pointer into buffer containing input to DCT */ - int LeftEdge; /* Flag if block at left edge of component */ - ogg_uint32_t ReconPixelsPerLine; /* Line length for recon buffers. */ - - unsigned char *ReconPtr1; /* DCT reconstructed image pointers */ - ogg_int32_t MvDevisor; /* Defines MV resolution (2 = 1/2 - pixel for Y or 4 = 1/4 for UV) */ - - new_ptr1 = &cpi->yuv1ptr[cpi->pb.pixel_index_table[FragIndex]]; - old_ptr1 = &cpi->yuv0ptr[cpi->pb.pixel_index_table[FragIndex]]; - DctInputPtr = cpi->DCTDataBuffer; - - /* Set plane specific values */ - if (FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments){ - ReconPixelsPerLine = cpi->pb.YStride; - MvDevisor = 2; /* 1/2 pixel accuracy in Y */ - }else{ - ReconPixelsPerLine = cpi->pb.UVStride; - MvDevisor = 4; /* UV planes at 1/2 resolution of Y */ - } - - /* adjusted / filtered pointers */ - FiltPtr = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]]; - - if ( cpi->pb.FrameType == KEY_FRAME ) { - /* Key frame so code block in INTRA mode. */ - cpi->pb.CodingMode = CODE_INTRA; - }else{ - /* Get Motion vector and mode for this block. */ - cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex]; - } - - /* Selection of Quantiser matrix and set other plane related values. */ - if ( FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ){ - LeftEdge = !(FragIndex%cpi->pb.HFragments); - - /* Select the appropriate Y quantiser matrix */ - if ( cpi->pb.CodingMode == CODE_INTRA ) - select_quantiser(&cpi->pb, BLOCK_Y); - else - select_quantiser(&cpi->pb, BLOCK_INTER_Y); - } else { - LeftEdge = !((FragIndex-cpi->pb.YPlaneFragments)%(cpi->pb.HFragments>>1)); - - if(FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments + (ogg_int32_t)cpi->pb.UVPlaneFragments) { - /* U plane */ - if ( cpi->pb.CodingMode == CODE_INTRA ) - select_quantiser(&cpi->pb, BLOCK_U); - else - select_quantiser(&cpi->pb, BLOCK_INTER_U); - } else { - /* V plane */ - if ( cpi->pb.CodingMode == CODE_INTRA ) - select_quantiser(&cpi->pb, BLOCK_V); - else - select_quantiser(&cpi->pb, BLOCK_INTER_V); - } - } - - if ( ModeUsesMC[cpi->pb.CodingMode] ){ - - MotionBlockDifference(cpi, FiltPtr, DctInputPtr, MvDevisor, - old_ptr1, new_ptr1, FragIndex, PixelsPerLine, - ReconPixelsPerLine); - - } else if ( (cpi->pb.CodingMode==CODE_INTER_NO_MV ) || - ( cpi->pb.CodingMode==CODE_USING_GOLDEN ) ) { - if ( cpi->pb.CodingMode==CODE_INTER_NO_MV ) { - ReconPtr1 = &cpi-> - pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]]; - } else { - ReconPtr1 = &cpi-> - pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]]; - } - - dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr, - PixelsPerLine, ReconPixelsPerLine); - dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine); - } else if ( cpi->pb.CodingMode==CODE_INTRA ) { - dsp_sub8x8_128(cpi->dsp, FiltPtr, DctInputPtr, PixelsPerLine); - dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine); - } - - /* Proceed to encode the data into the encode buffer if the encoder - is enabled. */ - /* Perform a 2D DCT transform on the data. */ - dsp_fdct_short(cpi->dsp, cpi->DCTDataBuffer, cpi->DCT_codes ); - - /* Quantize that transform data. */ - quantize ( &cpi->pb, cpi->DCT_codes, cpi->pb.QFragData[FragIndex] ); - - if ( (cpi->pb.CodingMode == CODE_INTER_NO_MV) && - ( AllZeroDctData(cpi->pb.QFragData[FragIndex]) ) ) { - cpi->pb.display_fragments[FragIndex] = 0; - } - -} diff --git a/Engine/lib/libtheora/lib/enc/dsp.c b/Engine/lib/libtheora/lib/enc/dsp.c deleted file mode 100644 index 9fe402d4e..000000000 --- a/Engine/lib/libtheora/lib/enc/dsp.c +++ /dev/null @@ -1,422 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp.c 15427 2008-10-21 02:36:19Z xiphmont $ - - ********************************************************************/ - -#include -#include "codec_internal.h" -#include "../cpu.c" - -#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2) -#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b))) -#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b)))) - -static void sub8x8__c (unsigned char *FiltPtr, unsigned char *ReconPtr, - ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) { - int i; - - /* For each block row */ - for (i=8; i; i--) { - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]); - - /* Start next row */ - FiltPtr += PixelsPerLine; - ReconPtr += ReconPixelsPerLine; - DctInputPtr += 8; - } -} - -static void sub8x8_128__c (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine) { - int i; - /* For each block row */ - for (i=8; i; i--) { - /* INTRA mode so code raw image data */ - /* We convert the data to 8 bit signed (by subtracting 128) as - this reduces the internal precision requirments in the DCT - transform. */ - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128); - - /* Start next row */ - FiltPtr += PixelsPerLine; - DctInputPtr += 8; - } -} - -static void sub8x8avg2__c (unsigned char *FiltPtr, unsigned char *ReconPtr1, - unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - int i; - - /* For each block row */ - for (i=8; i; i--) { - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0])); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1])); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2])); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3])); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4])); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5])); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6])); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7])); - - /* Start next row */ - FiltPtr += PixelsPerLine; - ReconPtr1 += ReconPixelsPerLine; - ReconPtr2 += ReconPixelsPerLine; - DctInputPtr += 8; - } -} - -static ogg_uint32_t row_sad8__c (unsigned char *Src1, unsigned char *Src2) -{ - ogg_uint32_t SadValue; - ogg_uint32_t SadValue1; - - SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) + - DSP_OP_ABS_DIFF (Src1[1], Src2[1]) + - DSP_OP_ABS_DIFF (Src1[2], Src2[2]) + - DSP_OP_ABS_DIFF (Src1[3], Src2[3]); - - SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) + - DSP_OP_ABS_DIFF (Src1[5], Src2[5]) + - DSP_OP_ABS_DIFF (Src1[6], Src2[6]) + - DSP_OP_ABS_DIFF (Src1[7], Src2[7]); - - SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1; - - return SadValue; -} - -static ogg_uint32_t col_sad8x8__c (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride) -{ - ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0}; - ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0}; - ogg_uint32_t MaxSad = 0; - ogg_uint32_t i; - - for ( i = 0; i < 4; i++ ){ - SadValue[0] += abs(Src1[0] - Src2[0]); - SadValue[1] += abs(Src1[1] - Src2[1]); - SadValue[2] += abs(Src1[2] - Src2[2]); - SadValue[3] += abs(Src1[3] - Src2[3]); - SadValue[4] += abs(Src1[4] - Src2[4]); - SadValue[5] += abs(Src1[5] - Src2[5]); - SadValue[6] += abs(Src1[6] - Src2[6]); - SadValue[7] += abs(Src1[7] - Src2[7]); - - Src1 += stride; - Src2 += stride; - } - - for ( i = 0; i < 4; i++ ){ - SadValue2[0] += abs(Src1[0] - Src2[0]); - SadValue2[1] += abs(Src1[1] - Src2[1]); - SadValue2[2] += abs(Src1[2] - Src2[2]); - SadValue2[3] += abs(Src1[3] - Src2[3]); - SadValue2[4] += abs(Src1[4] - Src2[4]); - SadValue2[5] += abs(Src1[5] - Src2[5]); - SadValue2[6] += abs(Src1[6] - Src2[6]); - SadValue2[7] += abs(Src1[7] - Src2[7]); - - Src1 += stride; - Src2 += stride; - } - - for ( i = 0; i < 8; i++ ){ - if ( SadValue[i] > MaxSad ) - MaxSad = SadValue[i]; - if ( SadValue2[i] > MaxSad ) - MaxSad = SadValue2[i]; - } - - return MaxSad; -} - -static ogg_uint32_t sad8x8__c (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2) -{ - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]); - sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]); - sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]); - sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]); - sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]); - sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]); - sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]); - sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]); - - /* Step to next row of block. */ - ptr1 += stride1; - ptr2 += stride2; - } - - return sad; -} - -static ogg_uint32_t sad8x8_thres__c (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres) -{ - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]); - sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]); - sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]); - sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]); - sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]); - sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]); - sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]); - sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]); - - if (sad > thres ) - break; - - /* Step to next row of block. */ - ptr1 += stride1; - ptr2 += stride2; - } - - return sad; -} - -static ogg_uint32_t sad8x8_xy2_thres__c (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres) -{ - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0])); - sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1])); - sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2])); - sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3])); - sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4])); - sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5])); - sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6])); - sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7])); - - if ( sad > thres ) - break; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr1 += RefStride; - RefDataPtr2 += RefStride; - } - - return sad; -} - -static ogg_uint32_t intra8x8_err__c (unsigned char *DataPtr, ogg_uint32_t Stride) -{ - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - - for (i=8; i; i--) { - /* Examine alternate pixel locations. */ - XSum += DataPtr[0]; - XXSum += DataPtr[0]*DataPtr[0]; - XSum += DataPtr[1]; - XXSum += DataPtr[1]*DataPtr[1]; - XSum += DataPtr[2]; - XXSum += DataPtr[2]*DataPtr[2]; - XSum += DataPtr[3]; - XXSum += DataPtr[3]*DataPtr[3]; - XSum += DataPtr[4]; - XXSum += DataPtr[4]*DataPtr[4]; - XSum += DataPtr[5]; - XXSum += DataPtr[5]*DataPtr[5]; - XSum += DataPtr[6]; - XXSum += DataPtr[6]*DataPtr[6]; - XSum += DataPtr[7]; - XXSum += DataPtr[7]*DataPtr[7]; - - /* Step to next row of block. */ - DataPtr += Stride; - } - - /* Compute population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum ) ); -} - -static ogg_uint32_t inter8x8_err__c (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr, ogg_uint32_t RefStride) -{ - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - ogg_int32_t DiffVal; - - for (i=8; i; i--) { - DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr += RefStride; - } - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -static ogg_uint32_t inter8x8_err_xy2__c (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride) -{ - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - ogg_int32_t DiffVal; - - for (i=8; i; i--) { - DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr1 += RefStride; - RefDataPtr2 += RefStride; - } - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -static void nop (void) { /* NOP */ } - -void dsp_init(DspFunctions *funcs) -{ - funcs->save_fpu = nop; - funcs->restore_fpu = nop; - funcs->sub8x8 = sub8x8__c; - funcs->sub8x8_128 = sub8x8_128__c; - funcs->sub8x8avg2 = sub8x8avg2__c; - funcs->row_sad8 = row_sad8__c; - funcs->col_sad8x8 = col_sad8x8__c; - funcs->sad8x8 = sad8x8__c; - funcs->sad8x8_thres = sad8x8_thres__c; - funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__c; - funcs->intra8x8_err = intra8x8_err__c; - funcs->inter8x8_err = inter8x8_err__c; - funcs->inter8x8_err_xy2 = inter8x8_err_xy2__c; -} - -void dsp_static_init(DspFunctions *funcs) -{ - ogg_uint32_t cpuflags; - - cpuflags = oc_cpu_flags_get (); - dsp_init (funcs); - - dsp_recon_init (funcs, cpuflags); - dsp_dct_init (funcs, cpuflags); -#if defined(USE_ASM) - if (cpuflags & OC_CPU_X86_MMX) { - dsp_mmx_init(funcs); - } -# ifndef WIN32 - /* This is implemented for win32 yet */ - if (cpuflags & OC_CPU_X86_MMXEXT) { - dsp_mmxext_init(funcs); - } -# endif -#endif -} - diff --git a/Engine/lib/libtheora/lib/enc/dsp.h b/Engine/lib/libtheora/lib/enc/dsp.h deleted file mode 100644 index 7f96f7f84..000000000 --- a/Engine/lib/libtheora/lib/enc/dsp.h +++ /dev/null @@ -1,166 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp.h 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#ifndef DSP_H -#define DSP_H - -#include "theora/theora.h" -#include "../cpu.h" - -typedef struct -{ - void (*save_fpu) (void); - void (*restore_fpu) (void); - - void (*sub8x8) (unsigned char *FiltPtr, unsigned char *ReconPtr, - ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine); - - void (*sub8x8_128) (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine); - - void (*sub8x8avg2) (unsigned char *FiltPtr, unsigned char *ReconPtr1, - unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine); - - void (*copy8x8) (unsigned char *src, unsigned char *dest, - ogg_uint32_t stride); - - void (*recon_intra8x8) (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep); - - void (*recon_inter8x8) (unsigned char *ReconPtr, unsigned char *RefPtr, - ogg_int16_t *ChangePtr, ogg_uint32_t LineStep); - - void (*recon_inter8x8_half) (unsigned char *ReconPtr, unsigned char *RefPtr1, - unsigned char *RefPtr2, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep); - - void (*fdct_short) (ogg_int16_t *InputData, ogg_int16_t *OutputData); - - ogg_uint32_t (*row_sad8) (unsigned char *Src1, unsigned char *Src2); - - ogg_uint32_t (*col_sad8x8) (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride); - - ogg_uint32_t (*sad8x8) (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2); - - ogg_uint32_t (*sad8x8_thres) (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres); - - ogg_uint32_t (*sad8x8_xy2_thres)(unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres); - - ogg_uint32_t (*intra8x8_err) (unsigned char *DataPtr, ogg_uint32_t Stride); - - ogg_uint32_t (*inter8x8_err) (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr, ogg_uint32_t RefStride); - - ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride); - - void (*LoopFilter) (PB_INSTANCE *pbi, int FLimit); - - void (*FilterVert) (unsigned char * PixelPtr, - ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr); - - void (*IDctSlow) (ogg_int16_t *InputData, - ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData); - - void (*IDct3) (ogg_int16_t *InputData, - ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData); - - void (*IDct10) (ogg_int16_t *InputData, - ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData); -} DspFunctions; - -extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags); -extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags); -extern void dsp_dct_decode_init(DspFunctions *funcs, ogg_uint32_t cpu_flags); -extern void dsp_idct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags); - -void dsp_init(DspFunctions *funcs); -void dsp_static_init(DspFunctions *funcs); -#if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__) || defined(WIN32)) -extern void dsp_mmx_init(DspFunctions *funcs); -extern void dsp_mmxext_init(DspFunctions *funcs); -extern void dsp_mmx_fdct_init(DspFunctions *funcs); -extern void dsp_mmx_recon_init(DspFunctions *funcs); -extern void dsp_mmx_dct_decode_init(DspFunctions *funcs); -extern void dsp_mmx_idct_init(DspFunctions *funcs); -#endif - -#define dsp_save_fpu(funcs) (funcs.save_fpu ()) - -#define dsp_restore_fpu(funcs) (funcs.restore_fpu ()) - -#define dsp_sub8x8(funcs,a1,a2,a3,a4,a5) (funcs.sub8x8 (a1,a2,a3,a4,a5)) - -#define dsp_sub8x8_128(funcs,a1,a2,a3) (funcs.sub8x8_128 (a1,a2,a3)) - -#define dsp_sub8x8avg2(funcs,a1,a2,a3,a4,a5,a6) (funcs.sub8x8avg2 (a1,a2,a3,a4,a5,a6)) - -#define dsp_copy8x8(funcs,ptr1,ptr2,str1) (funcs.copy8x8 (ptr1,ptr2,str1)) - -#define dsp_recon_intra8x8(funcs,ptr1,ptr2,str1) (funcs.recon_intra8x8 (ptr1,ptr2,str1)) - -#define dsp_recon_inter8x8(funcs,ptr1,ptr2,ptr3,str1) \ - (funcs.recon_inter8x8 (ptr1,ptr2,ptr3,str1)) - -#define dsp_recon_inter8x8_half(funcs,ptr1,ptr2,ptr3,ptr4,str1) \ - (funcs.recon_inter8x8_half (ptr1,ptr2,ptr3,ptr4,str1)) - -#define dsp_fdct_short(funcs,in,out) (funcs.fdct_short (in,out)) - -#define dsp_row_sad8(funcs,ptr1,ptr2) (funcs.row_sad8 (ptr1,ptr2)) - -#define dsp_col_sad8x8(funcs,ptr1,ptr2,str1) (funcs.col_sad8x8 (ptr1,ptr2,str1)) - -#define dsp_sad8x8(funcs,ptr1,str1,ptr2,str2) (funcs.sad8x8 (ptr1,str1,ptr2,str2)) - -#define dsp_sad8x8_thres(funcs,ptr1,str1,ptr2,str2,t) (funcs.sad8x8_thres (ptr1,str1,ptr2,str2,t)) - -#define dsp_sad8x8_xy2_thres(funcs,ptr1,str1,ptr2,ptr3,str2,t) \ - (funcs.sad8x8_xy2_thres (ptr1,str1,ptr2,ptr3,str2,t)) - -#define dsp_intra8x8_err(funcs,ptr1,str1) (funcs.intra8x8_err (ptr1,str1)) - -#define dsp_inter8x8_err(funcs,ptr1,str1,ptr2,str2) \ - (funcs.inter8x8_err (ptr1,str1,ptr2,str2)) - -#define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \ - (funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2)) - -#define dsp_LoopFilter(funcs, ptr1, i) \ - (funcs.LoopFilter(ptr1, i)) - -#define dsp_IDctSlow(funcs, ptr1, ptr2, ptr3) \ - (funcs.IDctSlow(ptr1, ptr2, ptr3)) - -#define dsp_IDct3(funcs, ptr1, ptr2, ptr3) \ - (funcs.IDctSlow(ptr1, ptr2, ptr3)) - -#define dsp_IDct10(funcs, ptr1, ptr2, ptr3) \ - (funcs.IDctSlow(ptr1, ptr2, ptr3)) - -#endif /* DSP_H */ diff --git a/Engine/lib/libtheora/lib/enc/encode.c b/Engine/lib/libtheora/lib/enc/encode.c deleted file mode 100644 index 5dc89f2af..000000000 --- a/Engine/lib/libtheora/lib/enc/encode.c +++ /dev/null @@ -1,1479 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encode.c 15383 2008-10-10 14:33:46Z xiphmont $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" -#include "encoder_lookup.h" -#include "block_inline.h" - -#define PUR 8 -#define PU 4 -#define PUL 2 -#define PL 1 -#define HIGHBITDUPPED(X) (((ogg_int16_t) X) >> 15) - -static ogg_uint32_t QuadCodeComponent ( CP_INSTANCE *cpi, - ogg_uint32_t FirstSB, - ogg_uint32_t SBRows, - ogg_uint32_t SBCols, - ogg_uint32_t PixelsPerLine){ - - ogg_int32_t FragIndex; /* Fragment number */ - ogg_uint32_t MB, B; /* Macro-Block, Block indices */ - ogg_uint32_t SBrow; /* Super-Block row number */ - ogg_uint32_t SBcol; /* Super-Block row number */ - ogg_uint32_t SB=FirstSB; /* Super-Block index, initialised to first - of this component */ - ogg_uint32_t coded_pixels=0; /* Number of pixels coded */ - int MBCodedFlag; - - /* actually transform and quantize the image now that we've decided - on the modes Parse in quad-tree ordering */ - - for ( SBrow=0; SBrowpb.BlockMap,SB,MB) >= 0 ) { - - MBCodedFlag = 0; - - /* Now actually code the blocks */ - for ( B=0; B<4; B++ ) { - FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B ); - - /* Does Block lie in frame: */ - if ( FragIndex >= 0 ) { - - /* In Frame: Is it coded: */ - if ( cpi->pb.display_fragments[FragIndex] ) { - - /* transform and quantize block */ - TransformQuantizeBlock( cpi, FragIndex, PixelsPerLine ); - - /* Has the block got struck off (no MV and no data - generated after DCT) If not then mark it and the - assosciated MB as coded. */ - if ( cpi->pb.display_fragments[FragIndex] ) { - /* Create linear list of coded block indices */ - cpi->pb.CodedBlockList[cpi->pb.CodedBlockIndex] = FragIndex; - cpi->pb.CodedBlockIndex++; - - /* MB is still coded */ - MBCodedFlag = 1; - cpi->MBCodingMode = cpi->pb.FragCodingMethod[FragIndex]; - - } - } - } - } - /* If the MB is marked as coded and we are in the Y plane then */ - /* the mode list needs to be updated. */ - if ( MBCodedFlag && (FirstSB == 0) ){ - /* Make a note of the selected mode in the mode list */ - cpi->ModeList[cpi->ModeListCount] = cpi->MBCodingMode; - cpi->ModeListCount++; - } - } - } - - SB++; - - } - } - - /* Return number of pixels coded */ - return coded_pixels; -} - -static void EncodeDcTokenList (CP_INSTANCE *cpi) { - ogg_int32_t i,j; - ogg_uint32_t Token; - ogg_uint32_t ExtraBitsToken; - ogg_uint32_t HuffIndex; - - ogg_uint32_t BestDcBits; - ogg_uint32_t DcHuffChoice[2]; - ogg_uint32_t EntropyTableBits[2][DC_HUFF_CHOICES]; - - oggpack_buffer *opb=cpi->oggbuffer; - - /* Clear table data structure */ - memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*DC_HUFF_CHOICES*2 ); - - /* Analyse token list to see which is the best entropy table to use */ - for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) { - /* Count number of bits for each table option */ - Token = (ogg_uint32_t)cpi->OptimisedTokenList[i]; - for ( j = 0; j < DC_HUFF_CHOICES; j++ ){ - EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] += - cpi->pb.HuffCodeLengthArray_VP3x[DC_HUFF_OFFSET + j][Token]; - } - } - - /* Work out which table option is best for Y */ - BestDcBits = EntropyTableBits[0][0]; - DcHuffChoice[0] = 0; - for ( j = 1; j < DC_HUFF_CHOICES; j++ ) { - if ( EntropyTableBits[0][j] < BestDcBits ) { - BestDcBits = EntropyTableBits[0][j]; - DcHuffChoice[0] = j; - } - } - - /* Add the DC huffman table choice to the bitstream */ - oggpackB_write( opb, DcHuffChoice[0], DC_HUFF_CHOICE_BITS ); - - /* Work out which table option is best for UV */ - BestDcBits = EntropyTableBits[1][0]; - DcHuffChoice[1] = 0; - for ( j = 1; j < DC_HUFF_CHOICES; j++ ) { - if ( EntropyTableBits[1][j] < BestDcBits ) { - BestDcBits = EntropyTableBits[1][j]; - DcHuffChoice[1] = j; - } - } - - /* Add the DC huffman table choice to the bitstream */ - oggpackB_write( opb, DcHuffChoice[1], DC_HUFF_CHOICE_BITS ); - - /* Encode the token list */ - for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) { - - /* Get the token and extra bits */ - Token = (ogg_uint32_t)cpi->OptimisedTokenList[i]; - ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i]; - - /* Select the huffman table */ - if ( cpi->OptimisedTokenListPl[i] == 0) - HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[0]; - else - HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[1]; - - /* Add the bits to the encode holding buffer. */ - cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token]; - oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token], - (ogg_uint32_t)cpi-> - pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] ); - - /* If the token is followed by an extra bits token then code it */ - if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) { - /* Add the bits to the encode holding buffer. */ - cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token]; - oggpackB_write( opb, ExtraBitsToken, - (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] ); - } - - } - - /* Reset the count of second order optimised tokens */ - cpi->OptimisedTokenCount = 0; -} - -static void EncodeAcTokenList (CP_INSTANCE *cpi) { - ogg_int32_t i,j; - ogg_uint32_t Token; - ogg_uint32_t ExtraBitsToken; - ogg_uint32_t HuffIndex; - - ogg_uint32_t BestAcBits; - ogg_uint32_t AcHuffChoice[2]; - ogg_uint32_t EntropyTableBits[2][AC_HUFF_CHOICES]; - - oggpack_buffer *opb=cpi->oggbuffer; - - memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*AC_HUFF_CHOICES*2 ); - - /* Analyse token list to see which is the best entropy table to use */ - for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) { - /* Count number of bits for each table option */ - Token = (ogg_uint32_t)cpi->OptimisedTokenList[i]; - HuffIndex = cpi->OptimisedTokenListHi[i]; - for ( j = 0; j < AC_HUFF_CHOICES; j++ ) { - EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] += - cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex + j][Token]; - } - } - - /* Select the best set of AC tables for Y */ - BestAcBits = EntropyTableBits[0][0]; - AcHuffChoice[0] = 0; - for ( j = 1; j < AC_HUFF_CHOICES; j++ ) { - if ( EntropyTableBits[0][j] < BestAcBits ) { - BestAcBits = EntropyTableBits[0][j]; - AcHuffChoice[0] = j; - } - } - - /* Add the AC-Y huffman table choice to the bitstream */ - oggpackB_write( opb, AcHuffChoice[0], AC_HUFF_CHOICE_BITS ); - - /* Select the best set of AC tables for UV */ - BestAcBits = EntropyTableBits[1][0]; - AcHuffChoice[1] = 0; - for ( j = 1; j < AC_HUFF_CHOICES; j++ ) { - if ( EntropyTableBits[1][j] < BestAcBits ) { - BestAcBits = EntropyTableBits[1][j]; - AcHuffChoice[1] = j; - } - } - - /* Add the AC-UV huffman table choice to the bitstream */ - oggpackB_write( opb, AcHuffChoice[1], AC_HUFF_CHOICE_BITS ); - - /* Encode the token list */ - for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) { - /* Get the token and extra bits */ - Token = (ogg_uint32_t)cpi->OptimisedTokenList[i]; - ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i]; - - /* Select the huffman table */ - HuffIndex = (ogg_uint32_t)cpi->OptimisedTokenListHi[i] + - AcHuffChoice[cpi->OptimisedTokenListPl[i]]; - - /* Add the bits to the encode holding buffer. */ - cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token]; - oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token], - (ogg_uint32_t)cpi-> - pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] ); - - /* If the token is followed by an extra bits token then code it */ - if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) { - /* Add the bits to the encode holding buffer. */ - cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token]; - oggpackB_write( opb, ExtraBitsToken, - (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] ); - } - } - - /* Reset the count of second order optimised tokens */ - cpi->OptimisedTokenCount = 0; -} - -static void PackModes (CP_INSTANCE *cpi) { - ogg_uint32_t i,j; - unsigned char ModeIndex; - const unsigned char *SchemeList; - - unsigned char BestModeSchemes[MAX_MODES]; - ogg_int32_t ModeCount[MAX_MODES]; - ogg_int32_t TmpFreq = -1; - ogg_int32_t TmpIndex = -1; - - ogg_uint32_t BestScheme; - ogg_uint32_t BestSchemeScore; - ogg_uint32_t SchemeScore; - - oggpack_buffer *opb=cpi->oggbuffer; - - /* Build a frequency map for the modes in this frame */ - memset( ModeCount, 0, MAX_MODES*sizeof(ogg_int32_t) ); - for ( i = 0; i < cpi->ModeListCount; i++ ) - ModeCount[cpi->ModeList[i]] ++; - - /* Order the modes from most to least frequent. Store result as - scheme 0 */ - for ( j = 0; j < MAX_MODES; j++ ) { - TmpFreq = -1; /* need to re-initialize for each loop */ - /* Find the most frequent */ - for ( i = 0; i < MAX_MODES; i++ ) { - /* Is this the best scheme so far ??? */ - if ( ModeCount[i] > TmpFreq ) { - TmpFreq = ModeCount[i]; - TmpIndex = i; - } - } - /* I don't know if the above loop ever fails to match, but it's - better safe than sorry. Plus this takes care of gcc warning */ - if ( TmpIndex != -1 ) { - ModeCount[TmpIndex] = -1; - BestModeSchemes[TmpIndex] = (unsigned char)j; - } - } - - /* Default/ fallback scheme uses MODE_BITS bits per mode entry */ - BestScheme = (MODE_METHODS - 1); - BestSchemeScore = cpi->ModeListCount * 3; - /* Get a bit score for the available schemes. */ - for ( j = 0; j < (MODE_METHODS - 1); j++ ) { - - /* Reset the scheme score */ - if ( j == 0 ){ - /* Scheme 0 additional cost of sending frequency order */ - SchemeScore = 24; - SchemeList = BestModeSchemes; - } else { - SchemeScore = 0; - SchemeList = ModeSchemes[j-1]; - } - - /* Find the total bits to code using each avaialable scheme */ - for ( i = 0; i < cpi->ModeListCount; i++ ) - SchemeScore += ModeBitLengths[SchemeList[cpi->ModeList[i]]]; - - /* Is this the best scheme so far ??? */ - if ( SchemeScore < BestSchemeScore ) { - BestSchemeScore = SchemeScore; - BestScheme = j; - } - } - - /* Encode the best scheme. */ - oggpackB_write( opb, BestScheme, (ogg_uint32_t)MODE_METHOD_BITS ); - - /* If the chosen schems is scheme 0 send details of the mode - frequency order */ - if ( BestScheme == 0 ) { - for ( j = 0; j < MAX_MODES; j++ ){ - /* Note that the last two entries are implicit */ - oggpackB_write( opb, BestModeSchemes[j], (ogg_uint32_t)MODE_BITS ); - } - SchemeList = BestModeSchemes; - } - else { - SchemeList = ModeSchemes[BestScheme-1]; - } - - /* Are we using one of the alphabet based schemes or the fallback scheme */ - if ( BestScheme < (MODE_METHODS - 1)) { - /* Pack and encode the Mode list */ - for ( i = 0; i < cpi->ModeListCount; i++) { - /* Add the appropriate mode entropy token. */ - ModeIndex = SchemeList[cpi->ModeList[i]]; - oggpackB_write( opb, ModeBitPatterns[ModeIndex], - (ogg_uint32_t)ModeBitLengths[ModeIndex] ); - } - }else{ - /* Fall back to MODE_BITS per entry */ - for ( i = 0; i < cpi->ModeListCount; i++) - /* Add the appropriate mode entropy token. */ - oggpackB_write( opb, cpi->ModeList[i], MODE_BITS ); - } - -} - -static void PackMotionVectors (CP_INSTANCE *cpi) { - ogg_int32_t i; - ogg_uint32_t MethodBits[2] = {0,0}; - const ogg_uint32_t * MvBitsPtr; - const ogg_uint32_t * MvPatternPtr; - - oggpack_buffer *opb=cpi->oggbuffer; - - /* Choose the coding method */ - MvBitsPtr = &MvBits[MAX_MV_EXTENT]; - for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) { - MethodBits[0] += MvBitsPtr[cpi->MVList[i].x]; - MethodBits[0] += MvBitsPtr[cpi->MVList[i].y]; - MethodBits[1] += 12; /* Simple six bits per mv component fallback - mechanism */ - } - - /* Select entropy table */ - if ( MethodBits[0] < MethodBits[1] ) { - oggpackB_write( opb, 0, 1 ); - MvBitsPtr = &MvBits[MAX_MV_EXTENT]; - MvPatternPtr = &MvPattern[MAX_MV_EXTENT]; - }else{ - oggpackB_write( opb, 1, 1 ); - MvBitsPtr = &MvBits2[MAX_MV_EXTENT]; - MvPatternPtr = &MvPattern2[MAX_MV_EXTENT]; - } - - /* Pack and encode the motion vectors */ - for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) { - oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].x], - (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].x] ); - oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].y], - (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].y] ); - } - -} - -static void PackEOBRun( CP_INSTANCE *cpi) { - if(cpi->RunLength == 0) - return; - - /* Note the appropriate EOB or EOB run token and any extra bits in - the optimised token list. Use the huffman index assosciated with - the first token in the run */ - - /* Mark out which plane the block belonged to */ - cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] = - (unsigned char)cpi->RunPlaneIndex; - - /* Note the huffman index to be used */ - cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] = - (unsigned char)cpi->RunHuffIndex; - - if ( cpi->RunLength <= 3 ) { - if ( cpi->RunLength == 1 ) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TOKEN; - } else if ( cpi->RunLength == 2 ) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_PAIR_TOKEN; - } else { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TRIPLE_TOKEN; - } - - cpi->RunLength = 0; - - } else { - - /* Choose a token appropriate to the run length. */ - if ( cpi->RunLength < 8 ) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = - DCT_REPEAT_RUN_TOKEN; - cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] = - cpi->RunLength - 4; - cpi->RunLength = 0; - } else if ( cpi->RunLength < 16 ) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = - DCT_REPEAT_RUN2_TOKEN; - cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] = - cpi->RunLength - 8; - cpi->RunLength = 0; - } else if ( cpi->RunLength < 32 ) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = - DCT_REPEAT_RUN3_TOKEN; - cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] = - cpi->RunLength - 16; - cpi->RunLength = 0; - } else if ( cpi->RunLength < 4096) { - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = - DCT_REPEAT_RUN4_TOKEN; - cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] = - cpi->RunLength; - cpi->RunLength = 0; - } - - } - - cpi->OptimisedTokenCount++; - /* Reset run EOB length */ - cpi->RunLength = 0; -} - -static void PackToken ( CP_INSTANCE *cpi, ogg_int32_t FragmentNumber, - ogg_uint32_t HuffIndex ) { - ogg_uint32_t Token = - cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber]]; - ogg_uint32_t ExtraBitsToken = - cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber] + 1]; - ogg_uint32_t OneOrTwo; - ogg_uint32_t OneOrZero; - - /* Update the record of what coefficient we have got up to for this - block and unpack the encoded token back into the quantised data - array. */ - if ( Token == DCT_EOB_TOKEN ) - cpi->pb.FragCoeffs[FragmentNumber] = BLOCK_SIZE; - else - ExpandToken( cpi->pb.QFragData[FragmentNumber], - &cpi->pb.FragCoeffs[FragmentNumber], - Token, ExtraBitsToken ); - - /* Update record of tokens coded and where we are in this fragment. */ - /* Is there an extra bits token */ - OneOrTwo= 1 + ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ); - /* Advance to the next real token. */ - cpi->FragTokens[FragmentNumber] += (unsigned char)OneOrTwo; - - /* Update the counts of tokens coded */ - cpi->TokensCoded += OneOrTwo; - cpi->TokensToBeCoded -= OneOrTwo; - - OneOrZero = ( FragmentNumber < (ogg_int32_t)cpi->pb.YPlaneFragments ); - - if ( Token == DCT_EOB_TOKEN ) { - if ( cpi->RunLength == 0 ) { - cpi->RunHuffIndex = HuffIndex; - cpi->RunPlaneIndex = 1 - OneOrZero; - } - cpi->RunLength++; - - /* we have exceeded our longest run length xmit an eob run token; */ - if ( cpi->RunLength == 4095 ) PackEOBRun(cpi); - - }else{ - - /* If we have an EOB run then code it up first */ - if ( cpi->RunLength > 0 ) PackEOBRun( cpi); - - /* Mark out which plane the block belonged to */ - cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] = - (unsigned char)(1 - OneOrZero); - - /* Note the token, extra bits and hufman table in the optimised - token list */ - cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = - (unsigned char)Token; - cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] = - ExtraBitsToken; - cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] = - (unsigned char)HuffIndex; - - cpi->OptimisedTokenCount++; - } -} - -static ogg_uint32_t GetBlockReconErrorSlow( CP_INSTANCE *cpi, - ogg_int32_t BlockIndex ) { - ogg_uint32_t ErrorVal; - - unsigned char * SrcDataPtr = - &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[BlockIndex]]; - unsigned char * RecDataPtr = - &cpi->pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[BlockIndex]]; - ogg_int32_t SrcStride; - ogg_int32_t RecStride; - - /* Is the block a Y block or a UV block. */ - if ( BlockIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ) { - SrcStride = cpi->pb.info.width; - RecStride = cpi->pb.YStride; - }else{ - SrcStride = cpi->pb.info.width >> 1; - RecStride = cpi->pb.UVStride; - } - - ErrorVal = dsp_sad8x8 (cpi->dsp, SrcDataPtr, SrcStride, RecDataPtr, RecStride); - - return ErrorVal; -} - -static void PackCodedVideo (CP_INSTANCE *cpi) { - ogg_int32_t i; - ogg_int32_t EncodedCoeffs = 1; - ogg_int32_t FragIndex; - ogg_uint32_t HuffIndex; /* Index to group of tables used to code a token */ - - /* Reset the count of second order optimised tokens */ - cpi->OptimisedTokenCount = 0; - - cpi->TokensToBeCoded = cpi->TotTokenCount; - cpi->TokensCoded = 0; - - /* Calculate the bit rate at which this frame should be capped. */ - cpi->MaxBitTarget = (ogg_uint32_t)((double)(cpi->ThisFrameTargetBytes * 8) * - cpi->BitRateCapFactor); - - /* Blank the various fragment data structures before we start. */ - memset(cpi->pb.FragCoeffs, 0, cpi->pb.UnitFragments); - memset(cpi->FragTokens, 0, cpi->pb.UnitFragments); - - /* Clear down the QFragData structure for all coded blocks. */ - ClearDownQFragData(&cpi->pb); - - /* The tree is not needed (implicit) for key frames */ - if ( cpi->pb.FrameType != KEY_FRAME ){ - /* Pack the quad tree fragment mapping. */ - PackAndWriteDFArray( cpi ); - } - - /* Note the number of bits used to code the tree itself. */ - cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3; - - /* Mode and MV data not needed for key frames. */ - if ( cpi->pb.FrameType != KEY_FRAME ){ - /* Pack and code the mode list. */ - PackModes(cpi); - /* Pack the motion vectors */ - PackMotionVectors (cpi); - } - - cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3; - - /* Optimise the DC tokens */ - for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { - /* Get the linear index for the current fragment. */ - FragIndex = cpi->pb.CodedBlockList[i]; - - cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs; - PackToken(cpi, FragIndex, DC_HUFF_OFFSET ); - - } - - /* Pack any outstanding EOB tokens */ - PackEOBRun(cpi); - - /* Now output the optimised DC token list using the appropriate - entropy tables. */ - EncodeDcTokenList(cpi); - - /* Work out the number of DC bits coded */ - - /* Optimise the AC tokens */ - while ( EncodedCoeffs < 64 ) { - /* Huffman table adjustment based upon coefficient number. */ - if ( EncodedCoeffs <= AC_TABLE_2_THRESH ) - HuffIndex = AC_HUFF_OFFSET; - else if ( EncodedCoeffs <= AC_TABLE_3_THRESH ) - HuffIndex = AC_HUFF_OFFSET + AC_HUFF_CHOICES; - else if ( EncodedCoeffs <= AC_TABLE_4_THRESH ) - HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 2); - else - HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 3); - - /* Repeatedly scan through the list of blocks. */ - for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { - /* Get the linear index for the current fragment. */ - FragIndex = cpi->pb.CodedBlockList[i]; - - /* Should we code a token for this block on this pass. */ - if ( cpi->FragTokens[FragIndex] < cpi->FragTokenCounts[FragIndex] - && cpi->pb.FragCoeffs[FragIndex] <= EncodedCoeffs ) { - /* Bit pack and a token for this block */ - cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs; - PackToken( cpi, FragIndex, HuffIndex ); - } - } - - EncodedCoeffs ++; - } - - /* Pack any outstanding EOB tokens */ - PackEOBRun(cpi); - - /* Now output the optimised AC token list using the appropriate - entropy tables. */ - EncodeAcTokenList(cpi); - -} - -static ogg_uint32_t QuadCodeDisplayFragments (CP_INSTANCE *cpi) { - ogg_int32_t i,j; - ogg_uint32_t coded_pixels=0; - int QIndex; - int k,m,n; - - /* predictor multiplier up-left, up, up-right,left, shift - Entries are packed in the order L, UL, U, UR, with missing entries - moved to the end (before the shift parameters). */ - static const ogg_int16_t pc[16][6]={ - {0,0,0,0,0,0}, - {1,0,0,0,0,0}, /* PL */ - {1,0,0,0,0,0}, /* PUL */ - {1,0,0,0,0,0}, /* PUL|PL */ - {1,0,0,0,0,0}, /* PU */ - {1,1,0,0,1,1}, /* PU|PL */ - {0,1,0,0,0,0}, /* PU|PUL */ - {29,-26,29,0,5,31}, /* PU|PUL|PL */ - {1,0,0,0,0,0}, /* PUR */ - {75,53,0,0,7,127}, /* PUR|PL */ - {1,1,0,0,1,1}, /* PUR|PUL */ - {75,0,53,0,7,127}, /* PUR|PUL|PL */ - {1,0,0,0,0,0}, /* PUR|PU */ - {75,0,53,0,7,127}, /* PUR|PU|PL */ - {3,10,3,0,4,15}, /* PUR|PU|PUL */ - {29,-26,29,0,5,31} /* PUR|PU|PUL|PL */ - }; - - /* boundary case bit masks. */ - static const int bc_mask[8]={ - /* normal case no boundary condition */ - PUR|PU|PUL|PL, - /* left column */ - PUR|PU, - /* top row */ - PL, - /* top row, left column */ - 0, - /* right column */ - PU|PUL|PL, - /* right and left column */ - PU, - /* top row, right column */ - PL, - /* top row, right and left column */ - 0 - }; - - /* value left value up-left, value up, value up-right, missing - values skipped. */ - int v[4]; - - /* fragment number left, up-left, up, up-right */ - int fn[4]; - - /* predictor count. */ - int pcount; - - /*which predictor constants to use */ - ogg_int16_t wpc; - - /* last used inter predictor (Raster Order) */ - ogg_int16_t Last[3]; /* last value used for given frame */ - - int FragsAcross=cpi->pb.HFragments; - int FragsDown = cpi->pb.VFragments; - int FromFragment,ToFragment; - ogg_int32_t FragIndex; - int WhichFrame; - int WhichCase; - - static const ogg_int16_t Mode2Frame[] = { - 1, /* CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame */ - 0, /* CODE_INTRA 1 => DCT Encoded Block */ - 1, /* CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame */ - 1, /* CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame */ - 1, /* CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks */ - 2, /* CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame */ - 2, /* CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame */ - 1 /* CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks */ - }; - - ogg_int16_t PredictedDC; - - /* Initialise the coded block indices variables. These allow - subsequent linear access to the quad tree ordered list of coded - blocks */ - cpi->pb.CodedBlockIndex = 0; - - /* Set the inter/intra descision control variables. */ - QIndex = Q_TABLE_SIZE - 1; - while ( QIndex >= 0 ) { - if ( (QIndex == 0) || - ( cpi->pb.QThreshTable[QIndex] >= cpi->pb.ThisFrameQualityValue) ) - break; - QIndex --; - } - - - /* Encode and tokenise the Y, U and V components */ - coded_pixels = QuadCodeComponent(cpi, 0, cpi->pb.YSBRows, cpi->pb.YSBCols, - cpi->pb.info.width ); - coded_pixels += QuadCodeComponent(cpi, cpi->pb.YSuperBlocks, - cpi->pb.UVSBRows, - cpi->pb.UVSBCols, - cpi->pb.info.width>>1 ); - coded_pixels += QuadCodeComponent(cpi, - cpi->pb.YSuperBlocks+cpi->pb.UVSuperBlocks, - cpi->pb.UVSBRows, cpi->pb.UVSBCols, - cpi->pb.info.width>>1 ); - - /* for y,u,v */ - for ( j = 0; j < 3 ; j++) { - /* pick which fragments based on Y, U, V */ - switch(j){ - case 0: /* y */ - FromFragment = 0; - ToFragment = cpi->pb.YPlaneFragments; - FragsAcross = cpi->pb.HFragments; - FragsDown = cpi->pb.VFragments; - break; - case 1: /* u */ - FromFragment = cpi->pb.YPlaneFragments; - ToFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments ; - FragsAcross = cpi->pb.HFragments >> 1; - FragsDown = cpi->pb.VFragments >> 1; - break; - /*case 2: v */ - default: - FromFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments; - ToFragment = cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments) ; - FragsAcross = cpi->pb.HFragments >> 1; - FragsDown = cpi->pb.VFragments >> 1; - break; - } - - /* initialize our array of last used DC Components */ - for(k=0;k<3;k++)Last[k]=0; - i=FromFragment; - - /* do prediction on all of Y, U or V */ - for ( m = 0 ; m < FragsDown ; m++) { - for ( n = 0 ; n < FragsAcross ; n++, i++) { - cpi->OriginalDC[i] = cpi->pb.QFragData[i][0]; - - /* only do 2 prediction if fragment coded and on non intra or - if all fragments are intra */ - if( cpi->pb.display_fragments[i] || - (cpi->pb.FrameType == KEY_FRAME) ) { - /* Type of Fragment */ - - WhichFrame = Mode2Frame[cpi->pb.FragCodingMethod[i]]; - - /* Check Borderline Cases */ - WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2); - - fn[0]=i-1; - fn[1]=i-FragsAcross-1; - fn[2]=i-FragsAcross; - fn[3]=i-FragsAcross+1; - - /* fragment valid for prediction use if coded and it comes - from same frame as the one we are predicting */ - for(k=pcount=wpc=0; k<4; k++) { - int pflag; - pflag=1<pb.display_fragments[fn[k]] && - (Mode2Frame[cpi->pb.FragCodingMethod[fn[k]]] == WhichFrame)){ - v[pcount]=cpi->OriginalDC[fn[k]]; - wpc|=pflag; - pcount++; - } - } - - if(wpc==0) { - - /* fall back to the last coded fragment */ - cpi->pb.QFragData[i][0] -= Last[WhichFrame]; - - } else { - - /* don't do divide if divisor is 1 or 0 */ - PredictedDC = pc[wpc][0]*v[0]; - for(k=1; k>= pc[wpc][4]; - - } - - /* check for outranging on the two predictors that can outrange */ - if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){ - if( abs(PredictedDC - v[2]) > 128) { - PredictedDC = v[2]; - } else if( abs(PredictedDC - v[0]) > 128) { - PredictedDC = v[0]; - } else if( abs(PredictedDC - v[1]) > 128) { - PredictedDC = v[1]; - } - } - - cpi->pb.QFragData[i][0] -= PredictedDC; - } - - /* Save the last fragment coded for whatever frame we are - predicting from */ - - Last[WhichFrame] = cpi->OriginalDC[i]; - - } - } - } - } - - /* Pack DC tokens and adjust the ones we couldn't predict 2d */ - for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { - /* Get the linear index for the current coded fragment. */ - FragIndex = cpi->pb.CodedBlockList[i]; - coded_pixels += DPCMTokenizeBlock ( cpi, FragIndex); - - } - - /* Bit pack the video data data */ - PackCodedVideo(cpi); - - /* End the bit packing run. */ - /* EndAddBitsToBuffer(cpi); */ - - /* Reconstruct the reference frames */ - ReconRefFrames(&cpi->pb); - - UpdateFragQIndex(&cpi->pb); - - /* Measure the inter reconstruction error for all the blocks that - were coded */ - /* for use as part of the recovery monitoring process in subsequent frames. */ - for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { - cpi->LastCodedErrorScore[ cpi->pb.CodedBlockList[i] ] = - GetBlockReconErrorSlow( cpi, cpi->pb.CodedBlockList[i] ); - - } - - /* Return total number of coded pixels */ - return coded_pixels; -} - -ogg_uint32_t EncodeData(CP_INSTANCE *cpi){ - ogg_uint32_t coded_pixels = 0; - - /* Zero the count of tokens so far this frame. */ - cpi->TotTokenCount = 0; - - /* Zero the mode and MV list indices. */ - cpi->ModeListCount = 0; - - /* Zero Decoder EOB run count */ - cpi->pb.EOB_Run = 0; - - dsp_save_fpu (cpi->dsp); - - /* Encode any fragments coded using DCT. */ - coded_pixels += QuadCodeDisplayFragments (cpi); - - dsp_restore_fpu (cpi->dsp); - - return coded_pixels; - -} - -ogg_uint32_t PickIntra( CP_INSTANCE *cpi, - ogg_uint32_t SBRows, - ogg_uint32_t SBCols){ - - ogg_int32_t FragIndex; /* Fragment number */ - ogg_uint32_t MB, B; /* Macro-Block, Block indices */ - ogg_uint32_t SBrow; /* Super-Block row number */ - ogg_uint32_t SBcol; /* Super-Block row number */ - ogg_uint32_t SB=0; /* Super-Block index, initialised to first of - this component */ - ogg_uint32_t UVRow; - ogg_uint32_t UVColumn; - ogg_uint32_t UVFragOffset; - - /* decide what block type and motion vectors to use on all of the frames */ - for ( SBrow=0; SBrowpb.BlockMap,SB,MB) >= 0 ) { - - cpi->MBCodingMode = CODE_INTRA; - - /* Now actually code the blocks. */ - for ( B=0; B<4; B++ ) { - FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B ); - cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode; - } - - /* Matching fragments in the U and V planes */ - UVRow = (FragIndex / (cpi->pb.HFragments * 2)); - UVColumn = (FragIndex % cpi->pb.HFragments) / 2; - UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn; - - cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments + UVFragOffset] = - cpi->MBCodingMode; - cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments + - cpi->pb.UVPlaneFragments + UVFragOffset] = - cpi->MBCodingMode; - } - } - - /* Next Super-Block */ - SB++; - } - } - return 0; -} - -static void AddMotionVector(CP_INSTANCE *cpi, - MOTION_VECTOR *ThisMotionVector) { - cpi->MVList[cpi->MvListCount].x = ThisMotionVector->x; - cpi->MVList[cpi->MvListCount].y = ThisMotionVector->y; - cpi->MvListCount++; -} - -static void SetFragMotionVectorAndMode(CP_INSTANCE *cpi, - ogg_int32_t FragIndex, - MOTION_VECTOR *ThisMotionVector){ - /* Note the coding mode and vector for each block */ - cpi->pb.FragMVect[FragIndex].x = ThisMotionVector->x; - cpi->pb.FragMVect[FragIndex].y = ThisMotionVector->y; - cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode; -} - -static void SetMBMotionVectorsAndMode(CP_INSTANCE *cpi, - ogg_int32_t YFragIndex, - ogg_int32_t UFragIndex, - ogg_int32_t VFragIndex, - MOTION_VECTOR *ThisMotionVector){ - SetFragMotionVectorAndMode(cpi, YFragIndex, ThisMotionVector); - SetFragMotionVectorAndMode(cpi, YFragIndex + 1, ThisMotionVector); - SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments, - ThisMotionVector); - SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1, - ThisMotionVector); - SetFragMotionVectorAndMode(cpi, UFragIndex, ThisMotionVector); - SetFragMotionVectorAndMode(cpi, VFragIndex, ThisMotionVector); -} - -ogg_uint32_t PickModes(CP_INSTANCE *cpi, - ogg_uint32_t SBRows, ogg_uint32_t SBCols, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t *InterError, ogg_uint32_t *IntraError) { - ogg_int32_t YFragIndex; - ogg_int32_t UFragIndex; - ogg_int32_t VFragIndex; - ogg_uint32_t MB, B; /* Macro-Block, Block indices */ - ogg_uint32_t SBrow; /* Super-Block row number */ - ogg_uint32_t SBcol; /* Super-Block row number */ - ogg_uint32_t SB=0; /* Super-Block index, initialised to first - of this component */ - - ogg_uint32_t MBIntraError; /* Intra error for macro block */ - ogg_uint32_t MBGFError; /* Golden frame macro block error */ - ogg_uint32_t MBGF_MVError; /* Golden frame plus MV error */ - ogg_uint32_t LastMBGF_MVError; /* Golden frame error with - last used GF motion - vector. */ - ogg_uint32_t MBInterError; /* Inter no MV macro block error */ - ogg_uint32_t MBLastInterError; /* Inter with last used MV */ - ogg_uint32_t MBPriorLastInterError; /* Inter with prior last MV */ - ogg_uint32_t MBInterMVError; /* Inter MV macro block error */ - ogg_uint32_t MBInterMVExError; /* Inter MV (exhaustive - search) macro block error */ - ogg_uint32_t MBInterFOURMVError; /* Inter MV error when using 4 - motion vectors per macro - block */ - ogg_uint32_t BestError; /* Best error so far. */ - - MOTION_VECTOR FourMVect[6]; /* storage for last used vectors (one - entry for each block in MB) */ - MOTION_VECTOR LastInterMVect; /* storage for last used Inter frame - MB motion vector */ - MOTION_VECTOR PriorLastInterMVect; /* storage for prior last used - Inter frame MB motion vector */ - MOTION_VECTOR TmpMVect; /* Temporary MV storage */ - MOTION_VECTOR LastGFMVect; /* storage for last used Golden - Frame MB motion vector */ - MOTION_VECTOR InterMVect; /* storage for motion vector */ - MOTION_VECTOR InterMVectEx; /* storage for motion vector result - from exhaustive search */ - MOTION_VECTOR GFMVect; /* storage for motion vector */ - MOTION_VECTOR ZeroVect; - - ogg_uint32_t UVRow; - ogg_uint32_t UVColumn; - ogg_uint32_t UVFragOffset; - - int MBCodedFlag; - unsigned char QIndex; - - /* initialize error scores */ - *InterError = 0; - *IntraError = 0; - - /* clear down the default motion vector. */ - cpi->MvListCount = 0; - FourMVect[0].x = 0; - FourMVect[0].y = 0; - FourMVect[1].x = 0; - FourMVect[1].y = 0; - FourMVect[2].x = 0; - FourMVect[2].y = 0; - FourMVect[3].x = 0; - FourMVect[3].y = 0; - FourMVect[4].x = 0; - FourMVect[4].y = 0; - FourMVect[5].x = 0; - FourMVect[5].y = 0; - LastInterMVect.x = 0; - LastInterMVect.y = 0; - PriorLastInterMVect.x = 0; - PriorLastInterMVect.y = 0; - LastGFMVect.x = 0; - LastGFMVect.y = 0; - InterMVect.x = 0; - InterMVect.y = 0; - GFMVect.x = 0; - GFMVect.y = 0; - - ZeroVect.x = 0; - ZeroVect.y = 0; - - QIndex = (unsigned char)cpi->pb.FrameQIndex; - - - /* change the quatization matrix to the one at best Q to compute the - new error score */ - cpi->MinImprovementForNewMV = (MvThreshTable[QIndex] << 12); - cpi->InterTripOutThresh = (5000<<12); - cpi->MVChangeFactor = MVChangeFactorTable[QIndex]; /* 0.9 */ - - if ( cpi->pb.info.quick_p ) { - cpi->ExhaustiveSearchThresh = (1000<<12); - cpi->FourMVThreshold = (2500<<12); - } else { - cpi->ExhaustiveSearchThresh = (250<<12); - cpi->FourMVThreshold = (500<<12); - } - cpi->MinImprovementForFourMV = cpi->MinImprovementForNewMV * 4; - - if(cpi->MinImprovementForFourMV < (40<<12)) - cpi->MinImprovementForFourMV = (40<<12); - - cpi->FourMvChangeFactor = 8; /* cpi->MVChangeFactor - 0.05; */ - - /* decide what block type and motion vectors to use on all of the frames */ - for ( SBrow=0; SBrowpb.BlockMap,SB,MB) < 0 ) continue; - - /* Is the current macro block coded (in part or in whole) */ - MBCodedFlag = 0; - for ( B=0; B<4; B++ ) { - YFragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B ); - - /* Does Block lie in frame: */ - if ( YFragIndex >= 0 ) { - /* In Frame: Is it coded: */ - if ( cpi->pb.display_fragments[YFragIndex] ) { - MBCodedFlag = 1; - break; - } - } else - MBCodedFlag = 0; - } - - /* This one isn't coded go to the next one */ - if(!MBCodedFlag) continue; - - /* Calculate U and V FragIndex from YFragIndex */ - YFragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB,MB); - UVRow = (YFragIndex / (cpi->pb.HFragments * 2)); - UVColumn = (YFragIndex % cpi->pb.HFragments) / 2; - UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn; - UFragIndex = cpi->pb.YPlaneFragments + UVFragOffset; - VFragIndex = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments + - UVFragOffset; - - - /************************************************************** - Find the block choice with the lowest error - - NOTE THAT if U or V is coded but no Y from a macro block then - the mode will be CODE_INTER_NO_MV as this is the default - state to which the mode data structure is initialised in - encoder and decoder at the start of each frame. */ - - BestError = HUGE_ERROR; - - - /* Look at the intra coding error. */ - MBIntraError = GetMBIntraError( cpi, YFragIndex, PixelsPerLine ); - BestError = (BestError > MBIntraError) ? MBIntraError : BestError; - - /* Get the golden frame error */ - MBGFError = GetMBInterError( cpi, cpi->ConvDestBuffer, - cpi->pb.GoldenFrame, YFragIndex, - 0, 0, PixelsPerLine ); - BestError = (BestError > MBGFError) ? MBGFError : BestError; - - /* Calculate the 0,0 case. */ - MBInterError = GetMBInterError( cpi, cpi->ConvDestBuffer, - cpi->pb.LastFrameRecon, - YFragIndex, 0, 0, PixelsPerLine ); - BestError = (BestError > MBInterError) ? MBInterError : BestError; - - /* Measure error for last MV */ - MBLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer, - cpi->pb.LastFrameRecon, - YFragIndex, LastInterMVect.x, - LastInterMVect.y, PixelsPerLine ); - BestError = (BestError > MBLastInterError) ? - MBLastInterError : BestError; - - /* Measure error for prior last MV */ - MBPriorLastInterError = GetMBInterError( cpi, cpi->ConvDestBuffer, - cpi->pb.LastFrameRecon, - YFragIndex, - PriorLastInterMVect.x, - PriorLastInterMVect.y, - PixelsPerLine ); - BestError = (BestError > MBPriorLastInterError) ? - MBPriorLastInterError : BestError; - - /* Temporarily force usage of no motionvector blocks */ - MBInterMVError = HUGE_ERROR; - InterMVect.x = 0; /* Set 0,0 motion vector */ - InterMVect.y = 0; - - /* If the best error is above the required threshold search - for a new inter MV */ - if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) { - /* Use a mix of heirachical and exhaustive searches for - quick mode. */ - if ( cpi->pb.info.quick_p ) { - MBInterMVError = GetMBMVInterError( cpi, cpi->pb.LastFrameRecon, - YFragIndex, PixelsPerLine, - cpi->MVPixelOffsetY, - &InterMVect ); - - /* If we still do not have a good match try an exhaustive - MBMV search */ - if ( (MBInterMVError > cpi->ExhaustiveSearchThresh) && - (BestError > cpi->ExhaustiveSearchThresh) ) { - - MBInterMVExError = - GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon, - YFragIndex, PixelsPerLine, - &InterMVectEx ); - - /* Is the Variance measure for the EX search - better... If so then use it. */ - if ( MBInterMVExError < MBInterMVError ) { - MBInterMVError = MBInterMVExError; - InterMVect.x = InterMVectEx.x; - InterMVect.y = InterMVectEx.y; - } - } - }else{ - /* Use an exhaustive search */ - MBInterMVError = - GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon, - YFragIndex, PixelsPerLine, - &InterMVect ); - } - - - /* Is the improvement, if any, good enough to justify a new MV */ - if ( (16 * MBInterMVError < (BestError * cpi->MVChangeFactor)) && - ((MBInterMVError + cpi->MinImprovementForNewMV) < BestError) ){ - BestError = MBInterMVError; - } - - } - - /* If the best error is still above the required threshold - search for a golden frame MV */ - MBGF_MVError = HUGE_ERROR; - GFMVect.x = 0; /* Set 0,0 motion vector */ - GFMVect.y = 0; - if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) { - /* Do an MV search in the golden reference frame */ - MBGF_MVError = GetMBMVInterError( cpi, cpi->pb.GoldenFrame, - YFragIndex, PixelsPerLine, - cpi->MVPixelOffsetY, &GFMVect ); - - /* Measure error for last GFMV */ - LastMBGF_MVError = GetMBInterError( cpi, cpi->ConvDestBuffer, - cpi->pb.GoldenFrame, - YFragIndex, LastGFMVect.x, - LastGFMVect.y, PixelsPerLine ); - - /* Check against last GF motion vector and reset if the - search has thrown a worse result. */ - if ( LastMBGF_MVError < MBGF_MVError ) { - GFMVect.x = LastGFMVect.x; - GFMVect.y = LastGFMVect.y; - MBGF_MVError = LastMBGF_MVError; - }else{ - LastGFMVect.x = GFMVect.x; - LastGFMVect.y = GFMVect.y; - } - - /* Is the improvement, if any, good enough to justify a new MV */ - if ( (16 * MBGF_MVError < (BestError * cpi->MVChangeFactor)) && - ((MBGF_MVError + cpi->MinImprovementForNewMV) < BestError) ) { - BestError = MBGF_MVError; - } - } - - /* Finally... If the best error is still to high then consider - the 4MV mode */ - MBInterFOURMVError = HUGE_ERROR; - if ( BestError > cpi->FourMVThreshold && cpi->MotionCompensation) { - /* Get the 4MV error. */ - MBInterFOURMVError = - GetFOURMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon, - YFragIndex, PixelsPerLine, FourMVect ); - - /* If the improvement is great enough then use the four MV mode */ - if ( ((MBInterFOURMVError + cpi->MinImprovementForFourMV) < - BestError) && (16 * MBInterFOURMVError < - (BestError * cpi->FourMvChangeFactor))) { - BestError = MBInterFOURMVError; - } - } - - /******************************************************** - end finding the best error - ******************************************************* - - Figure out what to do with the block we chose - - Over-ride and force intra if error high and Intra error similar - Now choose a mode based on lowest error (with bias towards no MV) */ - - if ( (BestError > cpi->InterTripOutThresh) && - (10 * BestError > MBIntraError * 7 ) ) { - cpi->MBCodingMode = CODE_INTRA; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&ZeroVect); - } else if ( BestError == MBInterError ) { - cpi->MBCodingMode = CODE_INTER_NO_MV; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&ZeroVect); - } else if ( BestError == MBGFError ) { - cpi->MBCodingMode = CODE_USING_GOLDEN; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&ZeroVect); - } else if ( BestError == MBLastInterError ) { - cpi->MBCodingMode = CODE_INTER_LAST_MV; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&LastInterMVect); - } else if ( BestError == MBPriorLastInterError ) { - cpi->MBCodingMode = CODE_INTER_PRIOR_LAST; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&PriorLastInterMVect); - - /* Swap the prior and last MV cases over */ - TmpMVect.x = PriorLastInterMVect.x; - TmpMVect.y = PriorLastInterMVect.y; - PriorLastInterMVect.x = LastInterMVect.x; - PriorLastInterMVect.y = LastInterMVect.y; - LastInterMVect.x = TmpMVect.x; - LastInterMVect.y = TmpMVect.y; - - } else if ( BestError == MBInterMVError ) { - - cpi->MBCodingMode = CODE_INTER_PLUS_MV; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&InterMVect); - - /* Update Prior last mv with last mv */ - PriorLastInterMVect.x = LastInterMVect.x; - PriorLastInterMVect.y = LastInterMVect.y; - - /* Note last inter MV for future use */ - LastInterMVect.x = InterMVect.x; - LastInterMVect.y = InterMVect.y; - - AddMotionVector( cpi, &InterMVect); - - } else if ( BestError == MBGF_MVError ) { - - cpi->MBCodingMode = CODE_GOLDEN_MV; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&GFMVect); - - /* Note last inter GF MV for future use */ - LastGFMVect.x = GFMVect.x; - LastGFMVect.y = GFMVect.y; - - AddMotionVector( cpi, &GFMVect); - } else if ( BestError == MBInterFOURMVError ) { - cpi->MBCodingMode = CODE_INTER_FOURMV; - - /* Calculate the UV vectors as the average of the Y plane ones. */ - /* First .x component */ - FourMVect[4].x = FourMVect[0].x + FourMVect[1].x + - FourMVect[2].x + FourMVect[3].x; - if ( FourMVect[4].x >= 0 ) - FourMVect[4].x = (FourMVect[4].x + 2) / 4; - else - FourMVect[4].x = (FourMVect[4].x - 2) / 4; - FourMVect[5].x = FourMVect[4].x; - - /* Then .y component */ - FourMVect[4].y = FourMVect[0].y + FourMVect[1].y + - FourMVect[2].y + FourMVect[3].y; - if ( FourMVect[4].y >= 0 ) - FourMVect[4].y = (FourMVect[4].y + 2) / 4; - else - FourMVect[4].y = (FourMVect[4].y - 2) / 4; - FourMVect[5].y = FourMVect[4].y; - - SetFragMotionVectorAndMode(cpi, YFragIndex, &FourMVect[0]); - SetFragMotionVectorAndMode(cpi, YFragIndex + 1, &FourMVect[1]); - SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments, - &FourMVect[2]); - SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1, - &FourMVect[3]); - SetFragMotionVectorAndMode(cpi, UFragIndex, &FourMVect[4]); - SetFragMotionVectorAndMode(cpi, VFragIndex, &FourMVect[5]); - - /* Note the four MVs values for current macro-block. */ - AddMotionVector( cpi, &FourMVect[0]); - AddMotionVector( cpi, &FourMVect[1]); - AddMotionVector( cpi, &FourMVect[2]); - AddMotionVector( cpi, &FourMVect[3]); - - /* Update Prior last mv with last mv */ - PriorLastInterMVect.x = LastInterMVect.x; - PriorLastInterMVect.y = LastInterMVect.y; - - /* Note last inter MV for future use */ - LastInterMVect.x = FourMVect[3].x; - LastInterMVect.y = FourMVect[3].y; - - } else { - - cpi->MBCodingMode = CODE_INTRA; - SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, - VFragIndex,&ZeroVect); - } - - - /* setting up mode specific block types - *******************************************************/ - - *InterError += (BestError>>8); - *IntraError += (MBIntraError>>8); - - - } - SB++; - - } - } - - /* Return number of pixels coded */ - return 0; -} - -void WriteFrameHeader( CP_INSTANCE *cpi) { - ogg_uint32_t i; - oggpack_buffer *opb=cpi->oggbuffer; - /* Output the frame type (base/key frame or inter frame) */ - oggpackB_write( opb, cpi->pb.FrameType, 1 ); - /* Write out details of the current value of Q... variable resolution. */ - for ( i = 0; i < Q_TABLE_SIZE; i++ ) { - if ( cpi->pb.ThisFrameQualityValue == cpi->pb.QThreshTable[i] ) { - oggpackB_write( opb, i, 6 ); - break; - } - } - - if ( i == Q_TABLE_SIZE ) { - /* An invalid DCT value was specified. */ - /*IssueWarning( "Invalid Q Multiplier" );*/ - oggpackB_write( opb, 31, 6 ); - } - - /* we only support one Q index per frame */ - oggpackB_write( opb, 0, 1 ); - - /* If the frame was a base frame then write out the frame dimensions. */ - if ( cpi->pb.FrameType == KEY_FRAME ) { - /* all bits reserved! */ - oggpackB_write( opb, 0, 3 ); - } -} - diff --git a/Engine/lib/libtheora/lib/enc/encoder_huffman.c b/Engine/lib/libtheora/lib/enc/encoder_huffman.c deleted file mode 100644 index 191ada75c..000000000 --- a/Engine/lib/libtheora/lib/enc/encoder_huffman.c +++ /dev/null @@ -1,310 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encoder_huffman.c 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" -#include "hufftables.h" - -static void CreateHuffmanList(HUFF_ENTRY ** HuffRoot, - ogg_uint32_t HIndex, - const ogg_uint32_t *FreqList ) { - int i; - HUFF_ENTRY *entry_ptr; - HUFF_ENTRY *search_ptr; - - /* Create a HUFF entry for token zero. */ - HuffRoot[HIndex] = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*HuffRoot[HIndex])); - - HuffRoot[HIndex]->Previous = NULL; - HuffRoot[HIndex]->Next = NULL; - HuffRoot[HIndex]->ZeroChild = NULL; - HuffRoot[HIndex]->OneChild = NULL; - HuffRoot[HIndex]->Value = 0; - HuffRoot[HIndex]->Frequency = FreqList[0]; - - if ( HuffRoot[HIndex]->Frequency == 0 ) - HuffRoot[HIndex]->Frequency = 1; - - /* Now add entries for all the other possible tokens. */ - for ( i = 1; i < MAX_ENTROPY_TOKENS; i++ ) { - entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr)); - - entry_ptr->Value = i; - entry_ptr->Frequency = FreqList[i]; - entry_ptr->ZeroChild = NULL; - entry_ptr->OneChild = NULL; - - /* Force min value of 1. This prevents the tree getting too deep. */ - if ( entry_ptr->Frequency == 0 ) - entry_ptr->Frequency = 1; - - if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){ - entry_ptr->Next = HuffRoot[HIndex]; - HuffRoot[HIndex]->Previous = entry_ptr; - entry_ptr->Previous = NULL; - HuffRoot[HIndex] = entry_ptr; - }else{ - search_ptr = HuffRoot[HIndex]; - while ( (search_ptr->Next != NULL) && - (search_ptr->Frequency < entry_ptr->Frequency) ){ - search_ptr = (HUFF_ENTRY *)search_ptr->Next; - } - - if ( search_ptr->Frequency < entry_ptr->Frequency ){ - entry_ptr->Next = NULL; - entry_ptr->Previous = search_ptr; - search_ptr->Next = entry_ptr; - }else{ - entry_ptr->Next = search_ptr; - entry_ptr->Previous = search_ptr->Previous; - search_ptr->Previous->Next = entry_ptr; - search_ptr->Previous = entry_ptr; - } - } - } -} - -static void CreateCodeArray( HUFF_ENTRY * HuffRoot, - ogg_uint32_t *HuffCodeArray, - unsigned char *HuffCodeLengthArray, - ogg_uint32_t CodeValue, - unsigned char CodeLength ) { - - /* If we are at a leaf then fill in a code array entry. */ - if ( ( HuffRoot->ZeroChild == NULL ) && ( HuffRoot->OneChild == NULL ) ){ - HuffCodeArray[HuffRoot->Value] = CodeValue; - HuffCodeLengthArray[HuffRoot->Value] = CodeLength; - }else{ - /* Recursive calls to scan down the tree. */ - CodeLength++; - CreateCodeArray(HuffRoot->ZeroChild, HuffCodeArray, HuffCodeLengthArray, - ((CodeValue << 1) + 0), CodeLength); - CreateCodeArray(HuffRoot->OneChild, HuffCodeArray, HuffCodeLengthArray, - ((CodeValue << 1) + 1), CodeLength); - } -} - -static void BuildHuffmanTree( HUFF_ENTRY **HuffRoot, - ogg_uint32_t *HuffCodeArray, - unsigned char *HuffCodeLengthArray, - ogg_uint32_t HIndex, - const ogg_uint32_t *FreqList ){ - - HUFF_ENTRY *entry_ptr; - HUFF_ENTRY *search_ptr; - - /* First create a sorted linked list representing the frequencies of - each token. */ - CreateHuffmanList( HuffRoot, HIndex, FreqList ); - - /* Now build the tree from the list. */ - - /* While there are at least two items left in the list. */ - while ( HuffRoot[HIndex]->Next != NULL ){ - /* Create the new node as the parent of the first two in the list. */ - entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr)); - entry_ptr->Value = -1; - entry_ptr->Frequency = HuffRoot[HIndex]->Frequency + - HuffRoot[HIndex]->Next->Frequency ; - entry_ptr->ZeroChild = HuffRoot[HIndex]; - entry_ptr->OneChild = HuffRoot[HIndex]->Next; - - /* If there are still more items in the list then insert the new - node into the list. */ - if (entry_ptr->OneChild->Next != NULL ){ - /* Set up the provisional 'new root' */ - HuffRoot[HIndex] = entry_ptr->OneChild->Next; - HuffRoot[HIndex]->Previous = NULL; - - /* Now scan through the remaining list to insert the new entry - at the appropriate point. */ - if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){ - entry_ptr->Next = HuffRoot[HIndex]; - HuffRoot[HIndex]->Previous = entry_ptr; - entry_ptr->Previous = NULL; - HuffRoot[HIndex] = entry_ptr; - }else{ - search_ptr = HuffRoot[HIndex]; - while ( (search_ptr->Next != NULL) && - (search_ptr->Frequency < entry_ptr->Frequency) ){ - search_ptr = search_ptr->Next; - } - - if ( search_ptr->Frequency < entry_ptr->Frequency ){ - entry_ptr->Next = NULL; - entry_ptr->Previous = search_ptr; - search_ptr->Next = entry_ptr; - }else{ - entry_ptr->Next = search_ptr; - entry_ptr->Previous = search_ptr->Previous; - search_ptr->Previous->Next = entry_ptr; - search_ptr->Previous = entry_ptr; - } - } - }else{ - /* Build has finished. */ - entry_ptr->Next = NULL; - entry_ptr->Previous = NULL; - HuffRoot[HIndex] = entry_ptr; - } - - /* Delete the Next/Previous properties of the children (PROB NOT NEC). */ - entry_ptr->ZeroChild->Next = NULL; - entry_ptr->ZeroChild->Previous = NULL; - entry_ptr->OneChild->Next = NULL; - entry_ptr->OneChild->Previous = NULL; - - } - - /* Now build a code array from the tree. */ - CreateCodeArray( HuffRoot[HIndex], HuffCodeArray, - HuffCodeLengthArray, 0, 0); -} - -static void DestroyHuffTree(HUFF_ENTRY *root_ptr){ - if (root_ptr){ - if ( root_ptr->ZeroChild ) - DestroyHuffTree(root_ptr->ZeroChild); - - if ( root_ptr->OneChild ) - DestroyHuffTree(root_ptr->OneChild); - - _ogg_free(root_ptr); - } -} - -void ClearHuffmanSet( PB_INSTANCE *pbi ){ - int i; - - ClearHuffmanTrees(pbi->HuffRoot_VP3x); - - for ( i = 0; i < NUM_HUFF_TABLES; i++ ) - if (pbi->HuffCodeArray_VP3x[i]) - _ogg_free (pbi->HuffCodeArray_VP3x[i]); - - for ( i = 0; i < NUM_HUFF_TABLES; i++ ) - if (pbi->HuffCodeLengthArray_VP3x[i]) - _ogg_free (pbi->HuffCodeLengthArray_VP3x[i]); -} - -void InitHuffmanSet( PB_INSTANCE *pbi ){ - int i; - - ClearHuffmanSet(pbi); - - pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31; - - for ( i = 0; i < NUM_HUFF_TABLES; i++ ){ - pbi->HuffCodeArray_VP3x[i] = - _ogg_calloc(MAX_ENTROPY_TOKENS, - sizeof(*pbi->HuffCodeArray_VP3x[i])); - pbi->HuffCodeLengthArray_VP3x[i] = - _ogg_calloc(MAX_ENTROPY_TOKENS, - sizeof(*pbi->HuffCodeLengthArray_VP3x[i])); - BuildHuffmanTree( pbi->HuffRoot_VP3x, - pbi->HuffCodeArray_VP3x[i], - pbi->HuffCodeLengthArray_VP3x[i], - i, FrequencyCounts_VP3[i]); - } -} - -static int ReadHuffTree(HUFF_ENTRY * HuffRoot, int depth, - oggpack_buffer *opb) { - long bit; - long ret; - theora_read(opb,1,&bit); - if(bit < 0) return OC_BADHEADER; - else if(!bit) { - int ret; - if (++depth > 32) return OC_BADHEADER; - HuffRoot->ZeroChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY)); - ret = ReadHuffTree(HuffRoot->ZeroChild, depth, opb); - if (ret < 0) return ret; - HuffRoot->OneChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY)); - ret = ReadHuffTree(HuffRoot->OneChild, depth, opb); - if (ret < 0) return ret; - HuffRoot->Value = -1; - } else { - HuffRoot->ZeroChild = NULL; - HuffRoot->OneChild = NULL; - theora_read(opb,5,&ret); - HuffRoot->Value=ret;; - if (HuffRoot->Value < 0) return OC_BADHEADER; - } - return 0; -} - -int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb) { - int i; - for (i=0; iHuffRoot[i] = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY)); - ret = ReadHuffTree(ci->HuffRoot[i], 0, opb); - if (ret) return ret; - } - return 0; -} - -static void WriteHuffTree(HUFF_ENTRY *HuffRoot, oggpack_buffer *opb) { - if (HuffRoot->Value >= 0) { - oggpackB_write(opb, 1, 1); - oggpackB_write(opb, HuffRoot->Value, 5); - } else { - oggpackB_write(opb, 0, 1); - WriteHuffTree(HuffRoot->ZeroChild, opb); - WriteHuffTree(HuffRoot->OneChild, opb); - } -} - -void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES], - oggpack_buffer *opb) { - int i; - for(i=0; iValue = HuffSrc->Value; - if (HuffSrc->Value < 0) { - HuffDst->ZeroChild = CopyHuffTree(HuffSrc->ZeroChild); - HuffDst->OneChild = CopyHuffTree(HuffSrc->OneChild); - } - return HuffDst; - } - return NULL; -} - -void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci) { - int i; - pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31; - for(i=0; iHuffRoot_VP3x[i] = CopyHuffTree(ci->HuffRoot[i]); - } -} - -void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]){ - int i; - for(i=0; i -#include "codec_internal.h" - -#include "quant_lookup.h" - -#define IdctAdjustBeforeShift 8 -/* cos(n*pi/16) or sin(8-n)*pi/16) */ -#define xC1S7 64277 -#define xC2S6 60547 -#define xC3S5 54491 -#define xC4S4 46341 -#define xC5S3 36410 -#define xC6S2 25080 -#define xC7S1 12785 - -/* compute the 16 bit signed 1D inverse DCT - spec version */ -/* -static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) { - ogg_int32_t t[8], r; - ogg_int16_t *y = InputData; - ogg_int16_t *x = OutputData; - - t[0] = y[0] + y[4]; - t[0] &= 0xffff; - t[0] = (xC4S4 * t[0]) >> 16; - - t[1] = y[0] - y[4]; - t[1] &= 0xffff; - t[1] = (xC4S4 * t[1]) >> 16; - - t[2] = ((xC6S2 * y[2]) >> 16) - ((xC2S6 * y[6]) >> 16); - t[3] = ((xC2S6 * y[2]) >> 16) + ((xC6S2 * y[6]) >> 16); - t[4] = ((xC7S1 * y[1]) >> 16) - ((xC1S7 * y[7]) >> 16); - t[5] = ((xC3S5 * y[5]) >> 16) - ((xC5S3 * y[3]) >> 16); - t[6] = ((xC5S3 * y[5]) >> 16) + ((xC3S5 * y[3]) >> 16); - t[7] = ((xC1S7 * y[1]) >> 16) + ((xC7S1 * y[7]) >> 16); - - r = t[4] + t[5]; - t[5] = t[4] - t[5]; - t[5] &= 0xffff; - t[5] = (xC4S4 * (-t[5])) >> 16; - t[4] = r; - - r = t[7] + t[6]; - t[6] = t[7] - t[6]; - t[6] &= 0xffff; - t[6] = (xC4S4 * t[6]) >> 16; - t[7] = r; - - r = t[0] + t[3]; - t[3] = t[0] - t[3]; - t[0] = r; - - r = t[1] + t[2]; - t[2] = t[1] - t[2]; - t[1] = r; - - r = t[6] + t[5]; - t[5] = t[6] - t[5]; - t[6] = r; - - r = t[0] + t[7]; - r &= 0xffff; - x[0] = r; - - r = t[1] + t[6]; - r &= 0xffff; - x[1] = r; - - r = t[2] + t[5]; - r &= 0xffff; - x[2] = r; - - r = t[3] + t[4]; - r &= 0xffff; - x[3] = r; - - r = t[3] - t[4]; - r &= 0xffff; - x[4] = r; - - r = t[2] - t[5]; - r &= 0xffff; - x[5] = r; - - r = t[1] - t[6]; - r &= 0xffff; - x[6] = r; - - r = t[0] - t[7]; - r &= 0xffff; - x[7] = r; - -} -*/ - -static void dequant_slow( ogg_int16_t * dequant_coeffs, - ogg_int16_t * quantized_list, - ogg_int32_t * DCT_block) { - int i; - for(i=0;i<64;i++) - DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i]; -} - - - -void IDctSlow__c( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ) { - ogg_int32_t IntermediateData[64]; - ogg_int32_t * ip = IntermediateData; - ogg_int16_t * op = OutputData; - - ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H; - ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; - ogg_int32_t t1, t2; - - int loop; - - dequant_slow( QuantMatrix, InputData, IntermediateData); - - /* Inverse DCT on the rows now */ - for ( loop = 0; loop < 8; loop++){ - /* Check for non-zero values */ - if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { - t1 = (xC1S7 * ip[1]); - t2 = (xC7S1 * ip[7]); - t1 >>= 16; - t2 >>= 16; - _A = t1 + t2; - - t1 = (xC7S1 * ip[1]); - t2 = (xC1S7 * ip[7]); - t1 >>= 16; - t2 >>= 16; - _B = t1 - t2; - - t1 = (xC3S5 * ip[3]); - t2 = (xC5S3 * ip[5]); - t1 >>= 16; - t2 >>= 16; - _C = t1 + t2; - - t1 = (xC3S5 * ip[5]); - t2 = (xC5S3 * ip[3]); - t1 >>= 16; - t2 >>= 16; - _D = t1 - t2; - - t1 = (xC4S4 * (ogg_int16_t)(_A - _C)); - t1 >>= 16; - _Ad = t1; - - t1 = (xC4S4 * (ogg_int16_t)(_B - _D)); - t1 >>= 16; - _Bd = t1; - - - _Cd = _A + _C; - _Dd = _B + _D; - - t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4])); - t1 >>= 16; - _E = t1; - - t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4])); - t1 >>= 16; - _F = t1; - - t1 = (xC2S6 * ip[2]); - t2 = (xC6S2 * ip[6]); - t1 >>= 16; - t2 >>= 16; - _G = t1 + t2; - - t1 = (xC6S2 * ip[2]); - t2 = (xC2S6 * ip[6]); - t1 >>= 16; - t2 >>= 16; - _H = t1 - t2; - - - _Ed = _E - _G; - _Gd = _E + _G; - - _Add = _F + _Ad; - _Bdd = _Bd - _H; - - _Fd = _F - _Ad; - _Hd = _Bd + _H; - - /* Final sequence of operations over-write original inputs. */ - ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0); - ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0); - - ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0); - ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0); - - ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0); - ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0); - - ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0); - ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0); - - } - - ip += 8; /* next row */ - } - - ip = IntermediateData; - - for ( loop = 0; loop < 8; loop++){ - /* Check for non-zero values (bitwise or faster than ||) */ - if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | - ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { - - t1 = (xC1S7 * ip[1*8]); - t2 = (xC7S1 * ip[7*8]); - t1 >>= 16; - t2 >>= 16; - _A = t1 + t2; - - t1 = (xC7S1 * ip[1*8]); - t2 = (xC1S7 * ip[7*8]); - t1 >>= 16; - t2 >>= 16; - _B = t1 - t2; - - t1 = (xC3S5 * ip[3*8]); - t2 = (xC5S3 * ip[5*8]); - t1 >>= 16; - t2 >>= 16; - _C = t1 + t2; - - t1 = (xC3S5 * ip[5*8]); - t2 = (xC5S3 * ip[3*8]); - t1 >>= 16; - t2 >>= 16; - _D = t1 - t2; - - t1 = (xC4S4 * (ogg_int16_t)(_A - _C)); - t1 >>= 16; - _Ad = t1; - - t1 = (xC4S4 * (ogg_int16_t)(_B - _D)); - t1 >>= 16; - _Bd = t1; - - - _Cd = _A + _C; - _Dd = _B + _D; - - t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8])); - t1 >>= 16; - _E = t1; - - t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8])); - t1 >>= 16; - _F = t1; - - t1 = (xC2S6 * ip[2*8]); - t2 = (xC6S2 * ip[6*8]); - t1 >>= 16; - t2 >>= 16; - _G = t1 + t2; - - t1 = (xC6S2 * ip[2*8]); - t2 = (xC2S6 * ip[6*8]); - t1 >>= 16; - t2 >>= 16; - _H = t1 - t2; - - _Ed = _E - _G; - _Gd = _E + _G; - - _Add = _F + _Ad; - _Bdd = _Bd - _H; - - _Fd = _F - _Ad; - _Hd = _Bd + _H; - - _Gd += IdctAdjustBeforeShift; - _Add += IdctAdjustBeforeShift; - _Ed += IdctAdjustBeforeShift; - _Fd += IdctAdjustBeforeShift; - - /* Final sequence of operations over-write original inputs. */ - op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4); - op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4); - - op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4); - op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4); - - op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4); - op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4); - - op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4); - op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4); - }else{ - op[0*8] = 0; - op[7*8] = 0; - op[1*8] = 0; - op[2*8] = 0; - op[3*8] = 0; - op[4*8] = 0; - op[5*8] = 0; - op[6*8] = 0; - } - - ip++; /* next column */ - op++; - } -} - -/************************ - x x x x 0 0 0 0 - x x x 0 0 0 0 0 - x x 0 0 0 0 0 0 - x 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 -*************************/ - -static void dequant_slow10( ogg_int16_t * dequant_coeffs, - ogg_int16_t * quantized_list, - ogg_int32_t * DCT_block){ - int i; - memset(DCT_block,0, 128); - for(i=0;i<10;i++) - DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i]; - -} - -void IDct10__c( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ){ - ogg_int32_t IntermediateData[64]; - ogg_int32_t * ip = IntermediateData; - ogg_int16_t * op = OutputData; - - ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H; - ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; - ogg_int32_t t1, t2; - - int loop; - - dequant_slow10( QuantMatrix, InputData, IntermediateData); - - /* Inverse DCT on the rows now */ - for ( loop = 0; loop < 4; loop++){ - /* Check for non-zero values */ - if ( ip[0] | ip[1] | ip[2] | ip[3] ){ - t1 = (xC1S7 * ip[1]); - t1 >>= 16; - _A = t1; - - t1 = (xC7S1 * ip[1]); - t1 >>= 16; - _B = t1 ; - - t1 = (xC3S5 * ip[3]); - t1 >>= 16; - _C = t1; - - t2 = (xC5S3 * ip[3]); - t2 >>= 16; - _D = -t2; - - - t1 = (xC4S4 * (ogg_int16_t)(_A - _C)); - t1 >>= 16; - _Ad = t1; - - t1 = (xC4S4 * (ogg_int16_t)(_B - _D)); - t1 >>= 16; - _Bd = t1; - - - _Cd = _A + _C; - _Dd = _B + _D; - - t1 = (xC4S4 * ip[0] ); - t1 >>= 16; - _E = t1; - - _F = t1; - - t1 = (xC2S6 * ip[2]); - t1 >>= 16; - _G = t1; - - t1 = (xC6S2 * ip[2]); - t1 >>= 16; - _H = t1 ; - - - _Ed = _E - _G; - _Gd = _E + _G; - - _Add = _F + _Ad; - _Bdd = _Bd - _H; - - _Fd = _F - _Ad; - _Hd = _Bd + _H; - - /* Final sequence of operations over-write original inputs. */ - ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0); - ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0); - - ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0); - ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0); - - ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0); - ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0); - - ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0); - ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0); - - } - - ip += 8; /* next row */ - } - - ip = IntermediateData; - - for ( loop = 0; loop < 8; loop++) { - /* Check for non-zero values (bitwise or faster than ||) */ - if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) { - - t1 = (xC1S7 * ip[1*8]); - t1 >>= 16; - _A = t1 ; - - t1 = (xC7S1 * ip[1*8]); - t1 >>= 16; - _B = t1 ; - - t1 = (xC3S5 * ip[3*8]); - t1 >>= 16; - _C = t1 ; - - t2 = (xC5S3 * ip[3*8]); - t2 >>= 16; - _D = - t2; - - - t1 = (xC4S4 * (ogg_int16_t)(_A - _C)); - t1 >>= 16; - _Ad = t1; - - t1 = (xC4S4 * (ogg_int16_t)(_B - _D)); - t1 >>= 16; - _Bd = t1; - - - _Cd = _A + _C; - _Dd = _B + _D; - - t1 = (xC4S4 * ip[0*8]); - t1 >>= 16; - _E = t1; - _F = t1; - - t1 = (xC2S6 * ip[2*8]); - t1 >>= 16; - _G = t1; - - t1 = (xC6S2 * ip[2*8]); - t1 >>= 16; - _H = t1; - - - _Ed = _E - _G; - _Gd = _E + _G; - - _Add = _F + _Ad; - _Bdd = _Bd - _H; - - _Fd = _F - _Ad; - _Hd = _Bd + _H; - - _Gd += IdctAdjustBeforeShift; - _Add += IdctAdjustBeforeShift; - _Ed += IdctAdjustBeforeShift; - _Fd += IdctAdjustBeforeShift; - - /* Final sequence of operations over-write original inputs. */ - op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4); - op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4); - - op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4); - op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4); - - op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4); - op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4); - - op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4); - op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4); - }else{ - op[0*8] = 0; - op[7*8] = 0; - op[1*8] = 0; - op[2*8] = 0; - op[3*8] = 0; - op[4*8] = 0; - op[5*8] = 0; - op[6*8] = 0; - } - - ip++; /* next column */ - op++; - } -} - -/*************************** - x 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 -**************************/ - -void IDct1( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ){ - int loop; - - ogg_int16_t OutD; - - OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5); - - for(loop=0;loop<64;loop++) - OutputData[loop]=OutD; - -} - -void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags) -{ - funcs->IDctSlow = IDctSlow__c; - funcs->IDct10 = IDct10__c; - funcs->IDct3 = IDct10__c; -#if defined(USE_ASM) - // todo: make mmx encoder idct for MSC one day... -#if !defined (_MSC_VER) - if (cpu_flags & OC_CPU_X86_MMX) { - dsp_mmx_idct_init(funcs); - } -#endif -#endif -} diff --git a/Engine/lib/libtheora/lib/enc/encoder_lookup.h b/Engine/lib/libtheora/lib/enc/encoder_lookup.h deleted file mode 100644 index c5759869a..000000000 --- a/Engine/lib/libtheora/lib/enc/encoder_lookup.h +++ /dev/null @@ -1,120 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: simple static lookups for VP3 frame encoder - last mod: $Id: encoder_lookup.h 15323 2008-09-19 19:43:59Z giles $ - - ********************************************************************/ - -#include "codec_internal.h" - -static const ogg_uint32_t MvPattern[(MAX_MV_EXTENT * 2) + 1] = { - 0x000000ff, 0x000000fd, 0x000000fb, 0x000000f9, - 0x000000f7, 0x000000f5, 0x000000f3, 0x000000f1, - 0x000000ef, 0x000000ed, 0x000000eb, 0x000000e9, - 0x000000e7, 0x000000e5, 0x000000e3, 0x000000e1, - 0x0000006f, 0x0000006d, 0x0000006b, 0x00000069, - 0x00000067, 0x00000065, 0x00000063, 0x00000061, - 0x0000002f, 0x0000002d, 0x0000002b, 0x00000029, - 0x00000009, 0x00000007, 0x00000002, 0x00000000, - 0x00000001, 0x00000006, 0x00000008, 0x00000028, - 0x0000002a, 0x0000002c, 0x0000002e, 0x00000060, - 0x00000062, 0x00000064, 0x00000066, 0x00000068, - 0x0000006a, 0x0000006c, 0x0000006e, 0x000000e0, - 0x000000e2, 0x000000e4, 0x000000e6, 0x000000e8, - 0x000000ea, 0x000000ec, 0x000000ee, 0x000000f0, - 0x000000f2, 0x000000f4, 0x000000f6, 0x000000f8, - 0x000000fa, 0x000000fc, 0x000000fe, -}; - -static const ogg_uint32_t MvBits[(MAX_MV_EXTENT * 2) + 1] = { - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, - 6, 6, 6, 6, 4, 4, 3, 3, - 3, 4, 4, 6, 6, 6, 6, 7, - 7, 7, 7, 7, 7, 7, 7, 8, - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, -}; - -static const ogg_uint32_t MvPattern2[(MAX_MV_EXTENT * 2) + 1] = { - 0x0000003f, 0x0000003d, 0x0000003b, 0x00000039, - 0x00000037, 0x00000035, 0x00000033, 0x00000031, - 0x0000002f, 0x0000002d, 0x0000002b, 0x00000029, - 0x00000027, 0x00000025, 0x00000023, 0x00000021, - 0x0000001f, 0x0000001d, 0x0000001b, 0x00000019, - 0x00000017, 0x00000015, 0x00000013, 0x00000011, - 0x0000000f, 0x0000000d, 0x0000000b, 0x00000009, - 0x00000007, 0x00000005, 0x00000003, 0x00000000, - 0x00000002, 0x00000004, 0x00000006, 0x00000008, - 0x0000000a, 0x0000000c, 0x0000000e, 0x00000010, - 0x00000012, 0x00000014, 0x00000016, 0x00000018, - 0x0000001a, 0x0000001c, 0x0000001e, 0x00000020, - 0x00000022, 0x00000024, 0x00000026, 0x00000028, - 0x0000002a, 0x0000002c, 0x0000002e, 0x00000030, - 0x00000032, 0x00000034, 0x00000036, 0x00000038, - 0x0000003a, 0x0000003c, 0x0000003e, -}; - -static const ogg_uint32_t MvBits2[(MAX_MV_EXTENT * 2) + 1] = { - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, -}; - -static const ogg_uint32_t ModeBitPatterns[MAX_MODES] = { - 0x00, 0x02, 0x06, 0x0E, 0x1E, 0x3E, 0x7E, 0x7F }; - -static const ogg_int32_t ModeBitLengths[MAX_MODES] = { - 1, 2, 3, 4, 5, 6, 7, 7 }; - -static const unsigned char ModeSchemes[MODE_METHODS-2][MAX_MODES] = { - /* Last Mv dominates */ - { 3, 4, 2, 0, 1, 5, 6, 7 }, /* L P M N I G GM 4 */ - { 2, 4, 3, 0, 1, 5, 6, 7 }, /* L P N M I G GM 4 */ - { 3, 4, 1, 0, 2, 5, 6, 7 }, /* L M P N I G GM 4 */ - { 2, 4, 1, 0, 3, 5, 6, 7 }, /* L M N P I G GM 4 */ - - /* No MV dominates */ - { 0, 4, 3, 1, 2, 5, 6, 7 }, /* N L P M I G GM 4 */ - { 0, 5, 4, 2, 3, 1, 6, 7 }, /* N G L P M I GM 4 */ - -}; - - -static const ogg_uint32_t MvThreshTable[Q_TABLE_SIZE] = { - 65, 65, 65, 65, 50, 50, 50, 50, - 40, 40, 40, 40, 40, 40, 40, 40, - 30, 30, 30, 30, 30, 30, 30, 30, - 20, 20, 20, 20, 20, 20, 20, 20, - 15, 15, 15, 15, 15, 15, 15, 15, - 10, 10, 10, 10, 10, 10, 10, 10, - 5, 5, 5, 5, 5, 5, 5, 5, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const ogg_uint32_t MVChangeFactorTable[Q_TABLE_SIZE] = { - 11, 11, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15 -}; diff --git a/Engine/lib/libtheora/lib/enc/encoder_quant.c b/Engine/lib/libtheora/lib/enc/encoder_quant.c deleted file mode 100644 index a5639a233..000000000 --- a/Engine/lib/libtheora/lib/enc/encoder_quant.c +++ /dev/null @@ -1,558 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2005 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encoder_quant.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" -#include "quant_lookup.h" - -#define OC_QUANT_MAX (1024<<2) -static const unsigned DC_QUANT_MIN[2]={4<<2,8<<2}; -static const unsigned AC_QUANT_MIN[2]={2<<2,4<<2}; -#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a)) -#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a)) -#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) - -static int ilog(unsigned _v){ - int ret; - for(ret=0;_v;ret++)_v>>=1; - return ret; -} - - -void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer* _opb) { - - th_quant_info *_qinfo = &pbi->quant_info; - - const th_quant_ranges *qranges; - const th_quant_base *base_mats[2*3*64]; - int indices[2][3][64]; - int nbase_mats; - int nbits; - int ci; - int qi; - int qri; - int qti; - int pli; - int qtj; - int plj; - int bmi; - int i; - - /*Unlike the scale tables, we can't assume the maximum value will be in - index 0, so search for it here.*/ - i=_qinfo->loop_filter_limits[0]; - for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); - nbits=ilog(i); - oggpackB_write(_opb,nbits,3); - for(qi=0;qi<64;qi++){ - oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); - } - /* 580 bits for VP3.*/ - nbits=OC_MAXI(ilog(_qinfo->ac_scale[0]),1); - oggpackB_write(_opb,nbits-1,4); - for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); - /* 516 bits for VP3.*/ - nbits=OC_MAXI(ilog(_qinfo->dc_scale[0]),1); - oggpackB_write(_opb,nbits-1,4); - for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); - /*Consolidate any duplicate base matrices.*/ - nbase_mats=0; - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - qranges=_qinfo->qi_ranges[qti]+pli; - for(qri=0;qri<=qranges->nranges;qri++){ - for(bmi=0;;bmi++){ - if(bmi>=nbase_mats){ - base_mats[bmi]=qranges->base_matrices+qri; - indices[qti][pli][qri]=nbase_mats++; - break; - } - else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], - sizeof(base_mats[bmi][0]))==0){ - indices[qti][pli][qri]=bmi; - break; - } - } - } - } - /*Write out the list of unique base matrices. - 1545 bits for VP3 matrices.*/ - oggpackB_write(_opb,nbase_mats-1,9); - for(bmi=0;bmiqi_ranges[qti]+pli; - if(i>0){ - if(qti>0){ - if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& - memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, - qranges->nranges*sizeof(qranges->sizes[0]))==0&& - memcmp(indices[qti][pli],indices[qti-1][pli], - (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ - oggpackB_write(_opb,1,2); - continue; - } - } - qtj=(i-1)/3; - plj=(i-1)%3; - if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& - memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, - qranges->nranges*sizeof(qranges->sizes[0]))==0&& - memcmp(indices[qti][pli],indices[qtj][plj], - (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ - oggpackB_write(_opb,0,1+(qti>0)); - continue; - } - oggpackB_write(_opb,1,1); - } - oggpackB_write(_opb,indices[qti][pli][0],nbits); - for(qi=qri=0;qi<63;qri++){ - oggpackB_write(_opb,qranges->sizes[qri]-1,ilog(62-qi)); - qi+=qranges->sizes[qri]; - oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); - } - } -} - -/* a copied/reconciled version of derf's theora-exp code; redundancy - should be eliminated at some point */ -void InitQTables( PB_INSTANCE *pbi ){ - int qti; /* coding mode: intra or inter */ - int pli; /* Y U V */ - th_quant_info *qinfo = &pbi->quant_info; - - pbi->QThreshTable = pbi->quant_info.ac_scale; - - for(qti=0;qti<2;qti++){ - for(pli=0;pli<3;pli++){ - int qi; /* quality index */ - int qri; /* range iterator */ - - for(qi=0,qri=0; qri<=qinfo->qi_ranges[qti][pli].nranges; qri++){ - th_quant_base base; - - ogg_uint32_t q; - int qi_start; - int qi_end; - int ci; - memcpy(base,qinfo->qi_ranges[qti][pli].base_matrices[qri], - sizeof(base)); - - qi_start=qi; - if(qri==qinfo->qi_ranges[qti][pli].nranges) - qi_end=qi+1; - else - qi_end=qi+qinfo->qi_ranges[qti][pli].sizes[qri]; - - /* Iterate over quality indicies in this range */ - for(;;){ - - /*Scale DC the coefficient from the proper table.*/ - q=((ogg_uint32_t)qinfo->dc_scale[qi]*base[0]/100)<<2; - q=OC_CLAMPI(DC_QUANT_MIN[qti],q,OC_QUANT_MAX); - pbi->quant_tables[qti][pli][qi][0]=(ogg_uint16_t)q; - - /*Now scale AC coefficients from the proper table.*/ - for(ci=1;ci<64;ci++){ - q=((ogg_uint32_t)qinfo->ac_scale[qi]*base[ci]/100)<<2; - q=OC_CLAMPI(AC_QUANT_MIN[qti],q,OC_QUANT_MAX); - pbi->quant_tables[qti][pli][qi][ci]=(ogg_uint16_t)q; - } - - if(++qi>=qi_end)break; - - /*Interpolate the next base matrix.*/ - for(ci=0;ci<64;ci++){ - base[ci]=(unsigned char) - ((2*((qi_end-qi)*qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ - (qi-qi_start)*qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) - +qinfo->qi_ranges[qti][pli].sizes[qri])/ - (2*qinfo->qi_ranges[qti][pli].sizes[qri])); - } - } - } - } - } -} - -static void BuildZigZagIndex(PB_INSTANCE *pbi){ - ogg_int32_t i,j; - - /* invert the row to zigzag coeffient order lookup table */ - for ( i = 0; i < BLOCK_SIZE; i++ ){ - j = dezigzag_index[i]; - pbi->zigzag_index[j] = i; - } -} - -static void init_quantizer ( CP_INSTANCE *cpi, - unsigned char QIndex ){ - int i; - double ZBinFactor; - double RoundingFactor; - - double temp_fp_quant_coeffs; - double temp_fp_quant_round; - double temp_fp_ZeroBinSize; - PB_INSTANCE *pbi = &cpi->pb; - - - const ogg_uint16_t * temp_Y_coeffs; - const ogg_uint16_t * temp_U_coeffs; - const ogg_uint16_t * temp_V_coeffs; - const ogg_uint16_t * temp_Inter_Y_coeffs; - const ogg_uint16_t * temp_Inter_U_coeffs; - const ogg_uint16_t * temp_Inter_V_coeffs; - ogg_uint16_t scale_factor = cpi->pb.quant_info.ac_scale[QIndex]; - - /* Notes on setup of quantisers. The initial multiplication by - the scale factor is done in the ogg_int32_t domain to insure that the - precision in the quantiser is the same as in the inverse - quantiser where all calculations are integer. The "<< 2" is a - normalisation factor for the forward DCT transform. */ - - temp_Y_coeffs = pbi->quant_tables[0][0][QIndex]; - temp_U_coeffs = pbi->quant_tables[0][1][QIndex]; - temp_V_coeffs = pbi->quant_tables[0][2][QIndex]; - temp_Inter_Y_coeffs = pbi->quant_tables[1][0][QIndex]; - temp_Inter_U_coeffs = pbi->quant_tables[1][1][QIndex]; - temp_Inter_V_coeffs = pbi->quant_tables[1][2][QIndex]; - - ZBinFactor = 0.9; - - switch(cpi->pb.info.sharpness){ - case 0: - ZBinFactor = 0.65; - if ( scale_factor <= 50 ) - RoundingFactor = 0.499; - else - RoundingFactor = 0.46; - break; - case 1: - ZBinFactor = 0.75; - if ( scale_factor <= 50 ) - RoundingFactor = 0.476; - else - RoundingFactor = 0.400; - break; - - default: - ZBinFactor = 0.9; - if ( scale_factor <= 50 ) - RoundingFactor = 0.476; - else - RoundingFactor = 0.333; - break; - } - - /* Use fixed multiplier for intra Y DC */ - temp_fp_quant_coeffs = temp_Y_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Y_round[0] = (ogg_int32_t) (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Y[0] = (ogg_int32_t) (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Y_coeffs[0] = (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Intra U */ - temp_fp_quant_coeffs = temp_U_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_U_round[0] = (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_U[0] = (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Intra V */ - temp_fp_quant_coeffs = temp_V_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_V_round[0] = (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_V[0] = (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - - /* Inter Y */ - temp_fp_quant_coeffs = temp_Inter_Y_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_Y_round[0]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_Y[0]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_Y_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Inter U */ - temp_fp_quant_coeffs = temp_Inter_U_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_U_round[0]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_U[0]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Inter V */ - temp_fp_quant_coeffs = temp_Inter_V_coeffs[0]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_V_round[0]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_V[0]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - - for ( i = 1; i < 64; i++ ){ - /* Intra Y */ - temp_fp_quant_coeffs = temp_Y_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Y_round[i] = (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Y[i] = (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Y_coeffs[i] = (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Intra U */ - temp_fp_quant_coeffs = temp_U_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_U_round[i] = (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_U[i] = (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Intra V */ - temp_fp_quant_coeffs = temp_V_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_V_round[i] = (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_V[i] = (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Inter Y */ - temp_fp_quant_coeffs = temp_Inter_Y_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_Y_round[i]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_Y[i]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_Y_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Inter U */ - temp_fp_quant_coeffs = temp_Inter_U_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_U_round[i]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_U[i]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - /* Inter V */ - temp_fp_quant_coeffs = temp_Inter_V_coeffs[i]; - temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor; - pbi->fp_quant_Inter_V_round[i]= (0.5 + temp_fp_quant_round); - temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor; - pbi->fp_ZeroBinSize_Inter_V[i]= (0.5 + temp_fp_ZeroBinSize); - temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs; - pbi->fp_quant_Inter_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs); - - - } - - pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs; - -} - -void select_quantiser(PB_INSTANCE *pbi, int type) { - /* select a quantiser according to what plane has to be coded in what - * mode. Could be extended to a more sophisticated scheme. */ - - switch(type) { - case BLOCK_Y: - pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs; - pbi->fquant_round = pbi->fp_quant_Y_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Y; - break; - case BLOCK_U: - pbi->fquant_coeffs = pbi->fp_quant_U_coeffs; - pbi->fquant_round = pbi->fp_quant_U_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_U; - break; - case BLOCK_V: - pbi->fquant_coeffs = pbi->fp_quant_V_coeffs; - pbi->fquant_round = pbi->fp_quant_V_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_V; - break; - case BLOCK_INTER_Y: - pbi->fquant_coeffs = pbi->fp_quant_Inter_Y_coeffs; - pbi->fquant_round = pbi->fp_quant_Inter_Y_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_Y; - break; - case BLOCK_INTER_U: - pbi->fquant_coeffs = pbi->fp_quant_Inter_U_coeffs; - pbi->fquant_round = pbi->fp_quant_Inter_U_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_U; - break; - case BLOCK_INTER_V: - pbi->fquant_coeffs = pbi->fp_quant_Inter_V_coeffs; - pbi->fquant_round = pbi->fp_quant_Inter_V_round; - pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_V; - break; - } -} - - -void quantize( PB_INSTANCE *pbi, - ogg_int16_t * DCT_block, - Q_LIST_ENTRY * quantized_list){ - ogg_uint32_t i; /* Row index */ - Q_LIST_ENTRY val; /* Quantised value. */ - - ogg_int32_t * FquantRoundPtr = pbi->fquant_round; - ogg_int32_t * FquantCoeffsPtr = pbi->fquant_coeffs; - ogg_int32_t * FquantZBinSizePtr = pbi->fquant_ZbSize; - ogg_int16_t * DCT_blockPtr = DCT_block; - ogg_uint32_t * ZigZagPtr = (ogg_uint32_t *)pbi->zigzag_index; - ogg_int32_t temp; - - /* Set the quantized_list to default to 0 */ - memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) ); - - /* Note that we add half divisor to effect rounding on positive number */ - for( i = 0; i < VFRAGPIXELS; i++) { - - int col; - /* Iterate through columns */ - for( col = 0; col < 8; col++) { - if ( DCT_blockPtr[col] >= FquantZBinSizePtr[col] ) { - temp = FquantCoeffsPtr[col] * ( DCT_blockPtr[col] + FquantRoundPtr[col] ) ; - val = (Q_LIST_ENTRY) (temp>>16); - quantized_list[ZigZagPtr[col]] = ( val > 511 ) ? 511 : val; - } else if ( DCT_blockPtr[col] <= -FquantZBinSizePtr[col] ) { - temp = FquantCoeffsPtr[col] * - ( DCT_blockPtr[col] - FquantRoundPtr[col] ) + MIN16; - val = (Q_LIST_ENTRY) (temp>>16); - quantized_list[ZigZagPtr[col]] = ( val < -511 ) ? -511 : val; - } - } - - FquantRoundPtr += 8; - FquantCoeffsPtr += 8; - FquantZBinSizePtr += 8; - DCT_blockPtr += 8; - ZigZagPtr += 8; - } -} - -static void init_dequantizer ( PB_INSTANCE *pbi, - unsigned char QIndex ){ - int i, j; - - ogg_uint16_t * InterY_coeffs; - ogg_uint16_t * InterU_coeffs; - ogg_uint16_t * InterV_coeffs; - ogg_uint16_t * Y_coeffs; - ogg_uint16_t * U_coeffs; - ogg_uint16_t * V_coeffs; - - Y_coeffs = pbi->quant_tables[0][0][QIndex]; - U_coeffs = pbi->quant_tables[0][1][QIndex]; - V_coeffs = pbi->quant_tables[0][2][QIndex]; - InterY_coeffs = pbi->quant_tables[1][0][QIndex]; - InterU_coeffs = pbi->quant_tables[1][1][QIndex]; - InterV_coeffs = pbi->quant_tables[1][2][QIndex]; - - /* invert the dequant index into the quant index - the dxer has a different order than the cxer. */ - BuildZigZagIndex(pbi); - - /* Reorder dequantisation coefficients into dct zigzag order. */ - for ( i = 0; i < BLOCK_SIZE; i++ ) { - j = pbi->zigzag_index[i]; - pbi->dequant_Y_coeffs[j] = Y_coeffs[i]; - } - for ( i = 0; i < BLOCK_SIZE; i++ ) { - j = pbi->zigzag_index[i]; - pbi->dequant_U_coeffs[j] = U_coeffs[i]; - } - for ( i = 0; i < BLOCK_SIZE; i++ ) { - j = pbi->zigzag_index[i]; - pbi->dequant_V_coeffs[j] = V_coeffs[i]; - } - for ( i = 0; i < BLOCK_SIZE; i++ ){ - j = pbi->zigzag_index[i]; - pbi->dequant_InterY_coeffs[j] = InterY_coeffs[i]; - } - for ( i = 0; i < BLOCK_SIZE; i++ ){ - j = pbi->zigzag_index[i]; - pbi->dequant_InterU_coeffs[j] = InterU_coeffs[i]; - } - for ( i = 0; i < BLOCK_SIZE; i++ ){ - j = pbi->zigzag_index[i]; - pbi->dequant_InterV_coeffs[j] = InterV_coeffs[i]; - } - - pbi->dequant_coeffs = pbi->dequant_Y_coeffs; -} - -void UpdateQ( PB_INSTANCE *pbi, int NewQIndex ){ - ogg_uint32_t qscale; - - /* clamp to legal bounds */ - if (NewQIndex >= Q_TABLE_SIZE) NewQIndex = Q_TABLE_SIZE - 1; - else if (NewQIndex < 0) NewQIndex = 0; - - pbi->FrameQIndex = NewQIndex; - - qscale = pbi->quant_info.ac_scale[NewQIndex]; - pbi->ThisFrameQualityValue = qscale; - - /* Re-initialise the Q tables for forward and reverse transforms. */ - init_dequantizer ( pbi, (unsigned char) pbi->FrameQIndex ); -} - -void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ ){ - ogg_uint32_t qscale; - PB_INSTANCE *pbi = &cpi->pb; - - /* Do bounds checking and convert to a float. */ - qscale = NewQ; - if ( qscale < pbi->quant_info.ac_scale[Q_TABLE_SIZE-1] ) - qscale = pbi->quant_info.ac_scale[Q_TABLE_SIZE-1]; - else if ( qscale > pbi->quant_info.ac_scale[0] ) - qscale = pbi->quant_info.ac_scale[0]; - - /* Set the inter/intra descision control variables. */ - pbi->FrameQIndex = Q_TABLE_SIZE - 1; - while ((ogg_int32_t) pbi->FrameQIndex >= 0 ) { - if ( (pbi->FrameQIndex == 0) || - ( pbi->quant_info.ac_scale[pbi->FrameQIndex] >= NewQ) ) - break; - pbi->FrameQIndex --; - } - - /* Re-initialise the Q tables for forward and reverse transforms. */ - init_quantizer ( cpi, pbi->FrameQIndex ); - init_dequantizer ( pbi, pbi->FrameQIndex ); -} diff --git a/Engine/lib/libtheora/lib/enc/encoder_toplevel.c b/Engine/lib/libtheora/lib/enc/encoder_toplevel.c deleted file mode 100644 index 9356bba23..000000000 --- a/Engine/lib/libtheora/lib/enc/encoder_toplevel.c +++ /dev/null @@ -1,1447 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encoder_toplevel.c 15383 2008-10-10 14:33:46Z xiphmont $ - - ********************************************************************/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include -#include -#include "toplevel_lookup.h" -#include "../internal.h" -#include "dsp.h" -#include "codec_internal.h" - -#define A_TABLE_SIZE 29 -#define DF_CANDIDATE_WINDOW 5 - -/* - * th_quant_info for VP3 - */ - -/*The default quantization parameters used by VP3.1.*/ -static const int OC_VP31_RANGE_SIZES[1]={63}; -static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={ - { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 58, 68, 109,103, 77, - 24, 35, 55, 64, 81, 104,113, 92, - 49, 64, 78, 87,103, 121,120,101, - 72, 92, 95, 98,112, 100,103, 99 - }, - { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 58, 68, 109,103, 77, - 24, 35, 55, 64, 81, 104,113, 92, - 49, 64, 78, 87,103, 121,120,101, - 72, 92, 95, 98,112, 100,103, 99 - } -}; -static const th_quant_base OC_VP31_BASES_INTRA_C[2]={ - { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - }, - { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - } -}; -static const th_quant_base OC_VP31_BASES_INTER[2]={ - { - 16, 16, 16, 20, 24, 28, 32, 40, - 16, 16, 20, 24, 28, 32, 40, 48, - 16, 20, 24, 28, 32, 40, 48, 64, - 20, 24, 28, 32, 40, 48, 64, 64, - 24, 28, 32, 40, 48, 64, 64, 64, - 28, 32, 40, 48, 64, 64, 64, 96, - 32, 40, 48, 64, 64, 64, 96,128, - 40, 48, 64, 64, 64, 96,128,128 - }, - { - 16, 16, 16, 20, 24, 28, 32, 40, - 16, 16, 20, 24, 28, 32, 40, 48, - 16, 20, 24, 28, 32, 40, 48, 64, - 20, 24, 28, 32, 40, 48, 64, 64, - 24, 28, 32, 40, 48, 64, 64, 64, - 28, 32, 40, 48, 64, 64, 64, 96, - 32, 40, 48, 64, 64, 64, 96,128, - 40, 48, 64, 64, 64, 96,128,128 - } -}; - -const th_quant_info TH_VP31_QUANT_INFO={ - { - 220,200,190,180,170,170,160,160, - 150,150,140,140,130,130,120,120, - 110,110,100,100, 90, 90, 90, 80, - 80, 80, 70, 70, 70, 60, 60, 60, - 60, 50, 50, 50, 50, 40, 40, 40, - 40, 40, 30, 30, 30, 30, 30, 30, - 30, 20, 20, 20, 20, 20, 20, 20, - 20, 10, 10, 10, 10, 10, 10, 10 - }, - { - 500,450,400,370,340,310,285,265, - 245,225,210,195,185,180,170,160, - 150,145,135,130,125,115,110,107, - 100, 96, 93, 89, 85, 82, 75, 74, - 70, 68, 64, 60, 57, 56, 52, 50, - 49, 45, 44, 43, 40, 38, 37, 35, - 33, 32, 30, 29, 28, 25, 24, 22, - 21, 19, 18, 17, 15, 13, 12, 10 - }, - { - 30,25,20,20,15,15,14,14, - 13,13,12,12,11,11,10,10, - 9, 9, 8, 8, 7, 7, 7, 7, - 6, 6, 6, 6, 5, 5, 5, 5, - 4, 4, 4, 4, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C} - }, - { - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER} - } - } -}; - - -static void EClearFragmentInfo(CP_INSTANCE * cpi){ - if(cpi->extra_fragments) - _ogg_free(cpi->extra_fragments); - if(cpi->FragmentLastQ) - _ogg_free(cpi->FragmentLastQ); - if(cpi->FragTokens) - _ogg_free(cpi->FragTokens); - if(cpi->FragTokenCounts) - _ogg_free(cpi->FragTokenCounts); - if(cpi->RunHuffIndices) - _ogg_free(cpi->RunHuffIndices); - if(cpi->LastCodedErrorScore) - _ogg_free(cpi->LastCodedErrorScore); - if(cpi->ModeList) - _ogg_free(cpi->ModeList); - if(cpi->MVList) - _ogg_free(cpi->MVList); - if(cpi->DCT_codes ) - _ogg_free( cpi->DCT_codes ); - if(cpi->DCTDataBuffer ) - _ogg_free( cpi->DCTDataBuffer); - if(cpi->quantized_list) - _ogg_free( cpi->quantized_list); - if(cpi->OriginalDC) - _ogg_free( cpi->OriginalDC); - if(cpi->PartiallyCodedFlags) - _ogg_free(cpi->PartiallyCodedFlags); - if(cpi->PartiallyCodedMbPatterns) - _ogg_free(cpi->PartiallyCodedMbPatterns); - if(cpi->UncodedMbFlags) - _ogg_free(cpi->UncodedMbFlags); - - if(cpi->BlockCodedFlags) - _ogg_free(cpi->BlockCodedFlags); - - cpi->extra_fragments = 0; - cpi->FragmentLastQ = 0; - cpi->FragTokens = 0; - cpi->FragTokenCounts = 0; - cpi->RunHuffIndices = 0; - cpi->LastCodedErrorScore = 0; - cpi->ModeList = 0; - cpi->MVList = 0; - cpi->DCT_codes = 0; - cpi->DCTDataBuffer = 0; - cpi->quantized_list = 0; - cpi->OriginalDC = 0; - cpi->BlockCodedFlags = 0; -} - -static void EInitFragmentInfo(CP_INSTANCE * cpi){ - - /* clear any existing info */ - EClearFragmentInfo(cpi); - - /* Perform Fragment Allocations */ - cpi->extra_fragments = - _ogg_malloc(cpi->pb.UnitFragments*sizeof(unsigned char)); - - /* A note to people reading and wondering why malloc returns aren't - checked: - - lines like the following that implement a general strategy of - 'check the return of malloc; a zero pointer means we're out of - memory!'...: - - if(!cpi->extra_fragments) { EDeleteFragmentInfo(cpi); return FALSE; } - - ...are not useful. It's true that many platforms follow this - malloc behavior, but many do not. The more modern malloc - strategy is only to allocate virtual pages, which are not mapped - until the memory on that page is touched. At *that* point, if - the machine is out of heap, the page fails to be mapped and a - SEGV is generated. - - That means that if we want to deal with out of memory conditions, - we *must* be prepared to process a SEGV. If we implement the - SEGV handler, there's no reason to to check malloc return; it is - a waste of code. */ - - cpi->FragmentLastQ = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->FragmentLastQ)); - cpi->FragTokens = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->FragTokens)); - cpi->OriginalDC = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->OriginalDC)); - cpi->FragTokenCounts = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->FragTokenCounts)); - cpi->RunHuffIndices = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->RunHuffIndices)); - cpi->LastCodedErrorScore = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->LastCodedErrorScore)); - cpi->BlockCodedFlags = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->BlockCodedFlags)); - cpi->ModeList = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->ModeList)); - cpi->MVList = - _ogg_malloc(cpi->pb.UnitFragments* - sizeof(*cpi->MVList)); - cpi->DCT_codes = - _ogg_malloc(64* - sizeof(*cpi->DCT_codes)); - cpi->DCTDataBuffer = - _ogg_malloc(64* - sizeof(*cpi->DCTDataBuffer)); - cpi->quantized_list = - _ogg_malloc(64* - sizeof(*cpi->quantized_list)); - cpi->PartiallyCodedFlags = - _ogg_malloc(cpi->pb.MacroBlocks* - sizeof(*cpi->PartiallyCodedFlags)); - cpi->PartiallyCodedMbPatterns = - _ogg_malloc(cpi->pb.MacroBlocks* - sizeof(*cpi->PartiallyCodedMbPatterns)); - cpi->UncodedMbFlags = - _ogg_malloc(cpi->pb.MacroBlocks* - sizeof(*cpi->UncodedMbFlags)); - -} - -static void EClearFrameInfo(CP_INSTANCE * cpi) { - if(cpi->ConvDestBuffer ) - _ogg_free(cpi->ConvDestBuffer ); - cpi->ConvDestBuffer = 0; - - if(cpi->yuv0ptr) - _ogg_free(cpi->yuv0ptr); - cpi->yuv0ptr = 0; - - if(cpi->yuv1ptr) - _ogg_free(cpi->yuv1ptr); - cpi->yuv1ptr = 0; - - if(cpi->OptimisedTokenListEb ) - _ogg_free(cpi->OptimisedTokenListEb); - cpi->OptimisedTokenListEb = 0; - - if(cpi->OptimisedTokenList ) - _ogg_free(cpi->OptimisedTokenList); - cpi->OptimisedTokenList = 0; - - if(cpi->OptimisedTokenListHi ) - _ogg_free(cpi->OptimisedTokenListHi); - cpi->OptimisedTokenListHi = 0; - - if(cpi->OptimisedTokenListPl ) - _ogg_free(cpi->OptimisedTokenListPl); - cpi->OptimisedTokenListPl = 0; - -} - -static void EInitFrameInfo(CP_INSTANCE * cpi){ - int FrameSize = cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize; - - /* clear any existing info */ - EClearFrameInfo(cpi); - - /* allocate frames */ - cpi->ConvDestBuffer = - _ogg_malloc(FrameSize* - sizeof(*cpi->ConvDestBuffer)); - cpi->yuv0ptr = - _ogg_malloc(FrameSize* - sizeof(*cpi->yuv0ptr)); - cpi->yuv1ptr = - _ogg_malloc(FrameSize* - sizeof(*cpi->yuv1ptr)); - cpi->OptimisedTokenListEb = - _ogg_malloc(FrameSize* - sizeof(*cpi->OptimisedTokenListEb)); - cpi->OptimisedTokenList = - _ogg_malloc(FrameSize* - sizeof(*cpi->OptimisedTokenList)); - cpi->OptimisedTokenListHi = - _ogg_malloc(FrameSize* - sizeof(*cpi->OptimisedTokenListHi)); - cpi->OptimisedTokenListPl = - _ogg_malloc(FrameSize* - sizeof(*cpi->OptimisedTokenListPl)); -} - -static void SetupKeyFrame(CP_INSTANCE *cpi) { - /* Make sure the "last frame" buffer contains the first frame data - as well. */ - memcpy ( cpi->yuv0ptr, cpi->yuv1ptr, - cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize ); - - /* Initialise the cpi->pb.display_fragments and other fragment - structures for the first frame. */ - memset( cpi->pb.display_fragments, 1, cpi->pb.UnitFragments ); - memset( cpi->extra_fragments, 1, cpi->pb.UnitFragments ); - - /* Set up for a KEY FRAME */ - cpi->pb.FrameType = KEY_FRAME; -} - -static void AdjustKeyFrameContext(CP_INSTANCE *cpi) { - ogg_uint32_t i; - ogg_uint32_t AvKeyFrameFrequency = - (ogg_uint32_t) (cpi->CurrentFrame / cpi->KeyFrameCount); - ogg_uint32_t AvKeyFrameBytes = - (ogg_uint32_t) (cpi->TotKeyFrameBytes / cpi->KeyFrameCount); - ogg_uint32_t TotalWeight=0; - ogg_int32_t AvKeyFramesPerSecond; - ogg_int32_t MinFrameTargetRate; - - /* Update the frame carry over. */ - cpi->TotKeyFrameBytes += oggpackB_bytes(cpi->oggbuffer); - - /* reset keyframe context and calculate weighted average of last - KEY_FRAME_CONTEXT keyframes */ - for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) { - if ( i < KEY_FRAME_CONTEXT -1) { - cpi->PriorKeyFrameSize[i] = cpi->PriorKeyFrameSize[i+1]; - cpi->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i+1]; - } else { - cpi->PriorKeyFrameSize[KEY_FRAME_CONTEXT - 1] = - oggpackB_bytes(cpi->oggbuffer); - cpi->PriorKeyFrameDistance[KEY_FRAME_CONTEXT - 1] = - cpi->LastKeyFrame; - } - - AvKeyFrameBytes += PriorKeyFrameWeight[i] * - cpi->PriorKeyFrameSize[i]; - AvKeyFrameFrequency += PriorKeyFrameWeight[i] * - cpi->PriorKeyFrameDistance[i]; - TotalWeight += PriorKeyFrameWeight[i]; - } - AvKeyFrameBytes /= TotalWeight; - AvKeyFrameFrequency /= TotalWeight; - AvKeyFramesPerSecond = 100 * cpi->Configuration.OutputFrameRate / - AvKeyFrameFrequency ; - - /* Calculate a new target rate per frame allowing for average key - frame frequency over newest frames . */ - if ( 100 * cpi->Configuration.TargetBandwidth > - AvKeyFrameBytes * AvKeyFramesPerSecond && - (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond )){ - cpi->frame_target_rate = - (ogg_int32_t)(100* cpi->Configuration.TargetBandwidth - - AvKeyFrameBytes * AvKeyFramesPerSecond ) / - ( (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ) ); - } else { - /* don't let this number get too small!!! */ - cpi->frame_target_rate = 1; - } - - /* minimum allowable frame_target_rate */ - MinFrameTargetRate = (cpi->Configuration.TargetBandwidth / - cpi->Configuration.OutputFrameRate) / 3; - - if(cpi->frame_target_rate < MinFrameTargetRate ) { - cpi->frame_target_rate = MinFrameTargetRate; - } - - cpi->LastKeyFrame = 1; - cpi->LastKeyFrameSize=oggpackB_bytes(cpi->oggbuffer); - -} - -static void UpdateFrame(CP_INSTANCE *cpi){ - - double CorrectionFactor; - - /* Reset the DC predictors. */ - cpi->pb.LastIntraDC = 0; - cpi->pb.InvLastIntraDC = 0; - cpi->pb.LastInterDC = 0; - cpi->pb.InvLastInterDC = 0; - - /* Initialise bit packing mechanism. */ - oggpackB_reset(cpi->oggbuffer); - - /* mark as video frame */ - oggpackB_write(cpi->oggbuffer,0,1); - - /* Write out the frame header information including size. */ - WriteFrameHeader(cpi); - - /* Copy back any extra frags that are to be updated by the codec - as part of the background cleanup task */ - CopyBackExtraFrags(cpi); - - /* Encode the data. */ - EncodeData(cpi); - - /* Adjust drop frame trigger. */ - if ( cpi->pb.FrameType != KEY_FRAME ) { - /* Apply decay factor then add in the last frame size. */ - cpi->DropFrameTriggerBytes = - ((cpi->DropFrameTriggerBytes * (DF_CANDIDATE_WINDOW-1)) / - DF_CANDIDATE_WINDOW) + oggpackB_bytes(cpi->oggbuffer); - }else{ - /* Increase cpi->DropFrameTriggerBytes a little. Just after a key - frame may actually be a good time to drop a frame. */ - cpi->DropFrameTriggerBytes = - (cpi->DropFrameTriggerBytes * DF_CANDIDATE_WINDOW) / - (DF_CANDIDATE_WINDOW-1); - } - - /* Test for overshoot which may require a dropped frame next time - around. If we are already in a drop frame condition but the - previous frame was not dropped then the threshold for continuing - to allow dropped frames is reduced. */ - if ( cpi->DropFrameCandidate ) { - if ( cpi->DropFrameTriggerBytes > - (cpi->frame_target_rate * (DF_CANDIDATE_WINDOW+1)) ) - cpi->DropFrameCandidate = 1; - else - cpi->DropFrameCandidate = 0; - } else { - if ( cpi->DropFrameTriggerBytes > - (cpi->frame_target_rate * ((DF_CANDIDATE_WINDOW*2)-2)) ) - cpi->DropFrameCandidate = 1; - else - cpi->DropFrameCandidate = 0; - } - - /* Update the BpbCorrectionFactor variable according to whether or - not we were close enough with our selection of DCT quantiser. */ - if ( cpi->pb.FrameType != KEY_FRAME ) { - /* Work out a size correction factor. */ - CorrectionFactor = (double)oggpackB_bytes(cpi->oggbuffer) / - (double)cpi->ThisFrameTargetBytes; - - if ( (CorrectionFactor > 1.05) && - (cpi->pb.ThisFrameQualityValue < - cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) ) { - CorrectionFactor = 1.0 + ((CorrectionFactor - 1.0)/2); - if ( CorrectionFactor > 1.5 ) - cpi->BpbCorrectionFactor *= 1.5; - else - cpi->BpbCorrectionFactor *= CorrectionFactor; - - /* Keep BpbCorrectionFactor within limits */ - if ( cpi->BpbCorrectionFactor > MAX_BPB_FACTOR ) - cpi->BpbCorrectionFactor = MAX_BPB_FACTOR; - } else if ( (CorrectionFactor < 0.95) && - (cpi->pb.ThisFrameQualityValue > VERY_BEST_Q) ){ - CorrectionFactor = 1.0 - ((1.0 - CorrectionFactor)/2); - if ( CorrectionFactor < 0.75 ) - cpi->BpbCorrectionFactor *= 0.75; - else - cpi->BpbCorrectionFactor *= CorrectionFactor; - - /* Keep BpbCorrectionFactor within limits */ - if ( cpi->BpbCorrectionFactor < MIN_BPB_FACTOR ) - cpi->BpbCorrectionFactor = MIN_BPB_FACTOR; - } - } - - /* Adjust carry over and or key frame context. */ - if ( cpi->pb.FrameType == KEY_FRAME ) { - /* Adjust the key frame context unless the key frame was very small */ - AdjustKeyFrameContext(cpi); - } else { - /* Update the frame carry over */ - cpi->CarryOver += ((ogg_int32_t)cpi->frame_target_rate - - (ogg_int32_t)oggpackB_bytes(cpi->oggbuffer)); - } - cpi->TotalByteCount += oggpackB_bytes(cpi->oggbuffer); -} - -static void CompressFirstFrame(CP_INSTANCE *cpi) { - ogg_uint32_t i; - - /* set up context of key frame sizes and distances for more local - datarate control */ - for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) { - cpi->PriorKeyFrameSize[i] = cpi->Configuration.KeyFrameDataTarget; - cpi->PriorKeyFrameDistance[i] = cpi->pb.info.keyframe_frequency_force; - } - - /* Keep track of the total number of Key Frames Coded. */ - cpi->KeyFrameCount = 1; - cpi->LastKeyFrame = 1; - cpi->TotKeyFrameBytes = 0; - - /* A key frame is not a dropped frame there for reset the count of - consequative dropped frames. */ - cpi->DropCount = 0; - - SetupKeyFrame(cpi); - - /* Calculate a new target rate per frame allowing for average key - frame frequency and size thus far. */ - if ( cpi->Configuration.TargetBandwidth > - ((cpi->Configuration.KeyFrameDataTarget * - cpi->Configuration.OutputFrameRate)/ - cpi->pb.info.keyframe_frequency) ) { - - cpi->frame_target_rate = - (ogg_int32_t)((cpi->Configuration.TargetBandwidth - - ((cpi->Configuration.KeyFrameDataTarget * - cpi->Configuration.OutputFrameRate)/ - cpi->pb.info.keyframe_frequency)) / - cpi->Configuration.OutputFrameRate); - }else - cpi->frame_target_rate = 1; - - /* Set baseline frame target rate. */ - cpi->BaseLineFrameTargetRate = cpi->frame_target_rate; - - /* A key frame is not a dropped frame there for reset the count of - consequative dropped frames. */ - cpi->DropCount = 0; - - /* Initialise drop frame trigger to 5 frames worth of data. */ - cpi->DropFrameTriggerBytes = cpi->frame_target_rate * DF_CANDIDATE_WINDOW; - - /* Set a target size for this key frame based upon the baseline - target and frequency */ - cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget; - - /* Get a DCT quantizer level for the key frame. */ - cpi->MotionScore = cpi->pb.UnitFragments; - - RegulateQ(cpi, cpi->pb.UnitFragments); - - cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue; - - /* Initialise quantizer. */ - UpdateQC(cpi, cpi->pb.ThisFrameQualityValue ); - - /* Initialise the cpi->pb.display_fragments and other fragment - structures for the first frame. */ - for ( i = 0; i < cpi->pb.UnitFragments; i ++ ) - cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue; - - /* Compress and output the frist frame. */ - PickIntra( cpi, - cpi->pb.YSBRows, cpi->pb.YSBCols); - UpdateFrame(cpi); - - /* Initialise the carry over rate targeting variables. */ - cpi->CarryOver = 0; - -} - -static void CompressKeyFrame(CP_INSTANCE *cpi){ - ogg_uint32_t i; - - /* Before we compress reset the carry over to the actual frame carry over */ - cpi->CarryOver = cpi->Configuration.TargetBandwidth * cpi->CurrentFrame / - cpi->Configuration.OutputFrameRate - cpi->TotalByteCount; - - /* Keep track of the total number of Key Frames Coded */ - cpi->KeyFrameCount += 1; - - /* A key frame is not a dropped frame there for reset the count of - consequative dropped frames. */ - cpi->DropCount = 0; - - SetupKeyFrame(cpi); - - /* set a target size for this frame */ - cpi->ThisFrameTargetBytes = (ogg_int32_t) cpi->frame_target_rate + - ( (cpi->Configuration.KeyFrameDataTarget - cpi->frame_target_rate) * - cpi->LastKeyFrame / cpi->pb.info.keyframe_frequency_force ); - - if ( cpi->ThisFrameTargetBytes > cpi->Configuration.KeyFrameDataTarget ) - cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget; - - /* Get a DCT quantizer level for the key frame. */ - cpi->MotionScore = cpi->pb.UnitFragments; - - RegulateQ(cpi, cpi->pb.UnitFragments); - - cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue; - - /* Initialise DCT tables. */ - UpdateQC(cpi, cpi->pb.ThisFrameQualityValue ); - - /* Initialise the cpi->pb.display_fragments and other fragment - structures for the first frame. */ - for ( i = 0; i < cpi->pb.UnitFragments; i ++ ) - cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue; - - - /* Compress and output the frist frame. */ - PickIntra( cpi, - cpi->pb.YSBRows, cpi->pb.YSBCols); - UpdateFrame(cpi); - -} - -static void CompressFrame( CP_INSTANCE *cpi) { - ogg_int32_t min_blocks_per_frame; - ogg_uint32_t i; - int DropFrame = 0; - ogg_uint32_t ResidueBlocksAdded=0; - ogg_uint32_t KFIndicator = 0; - - double QModStep; - double QModifier = 1.0; - - /* Clear down the macro block level mode and MV arrays. */ - for ( i = 0; i < cpi->pb.UnitFragments; i++ ) { - cpi->pb.FragCodingMethod[i] = CODE_INTER_NO_MV; /* Default coding mode */ - cpi->pb.FragMVect[i].x = 0; - cpi->pb.FragMVect[i].y = 0; - } - - /* Default to delta frames. */ - cpi->pb.FrameType = DELTA_FRAME; - - /* Clear down the difference arrays for the current frame. */ - memset( cpi->pb.display_fragments, 0, cpi->pb.UnitFragments ); - memset( cpi->extra_fragments, 0, cpi->pb.UnitFragments ); - - /* Calculate the target bytes for this frame. */ - cpi->ThisFrameTargetBytes = cpi->frame_target_rate; - - /* Correct target to try and compensate for any overall rate error - that is developing */ - - /* Set the max allowed Q for this frame based upon carry over - history. First set baseline worst Q for this frame */ - cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ + 10; - if ( cpi->Configuration.ActiveMaxQ >= Q_TABLE_SIZE ) - cpi->Configuration.ActiveMaxQ = Q_TABLE_SIZE - 1; - - /* Make a further adjustment based upon the carry over and recent - history.. cpi->Configuration.ActiveMaxQ reduced by 1 for each 1/2 - seconds worth of -ve carry over up to a limit of 6. Also - cpi->Configuration.ActiveMaxQ reduced if frame is a - "DropFrameCandidate". Remember that if we are behind the bit - target carry over is -ve. */ - if ( cpi->CarryOver < 0 ) { - if ( cpi->DropFrameCandidate ) { - cpi->Configuration.ActiveMaxQ -= 4; - } - - if ( cpi->CarryOver < - -((ogg_int32_t)cpi->Configuration.TargetBandwidth*3) ) - cpi->Configuration.ActiveMaxQ -= 6; - else - cpi->Configuration.ActiveMaxQ += - (ogg_int32_t) ((cpi->CarryOver*2) / - (ogg_int32_t)cpi->Configuration.TargetBandwidth); - - /* Check that we have not dropped quality too far */ - if ( cpi->Configuration.ActiveMaxQ < cpi->Configuration.MaxQ ) - cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ; - } - - /* Calculate the Q Modifier step size required to cause a step down - from full target bandwidth to 40% of target between max Q and - best Q */ - QModStep = 0.5 / (double)((Q_TABLE_SIZE - 1) - - cpi->Configuration.ActiveMaxQ); - - /* Set up the cpi->QTargetModifier[] table. */ - for ( i = 0; i < cpi->Configuration.ActiveMaxQ; i++ ) { - cpi->QTargetModifier[i] = QModifier; - } - for ( i = cpi->Configuration.ActiveMaxQ; i < Q_TABLE_SIZE; i++ ) { - cpi->QTargetModifier[i] = QModifier; - QModifier -= QModStep; - } - - /* if we are allowed to drop frames and are falling behind (eg more - than x frames worth of bandwidth) */ - if ( cpi->pb.info.dropframes_p && - ( cpi->DropCount < cpi->MaxConsDroppedFrames) && - ( cpi->CarryOver < - -((ogg_int32_t)cpi->Configuration.TargetBandwidth)) && - ( cpi->DropFrameCandidate) ) { - /* (we didn't do this frame so we should have some left over for - the next frame) */ - cpi->CarryOver += cpi->frame_target_rate; - DropFrame = 1; - cpi->DropCount ++; - - /* Adjust DropFrameTriggerBytes to account for the saving achieved. */ - cpi->DropFrameTriggerBytes = - (cpi->DropFrameTriggerBytes * - (DF_CANDIDATE_WINDOW-1))/DF_CANDIDATE_WINDOW; - - /* Even if we drop a frame we should account for it when - considering key frame seperation. */ - cpi->LastKeyFrame++; - } else if ( cpi->CarryOver < - -((ogg_int32_t)cpi->Configuration.TargetBandwidth * 2) ) { - /* Reduce frame bit target by 1.75% for each 1/10th of a seconds - worth of -ve carry over down to a minimum of 65% of its - un-modified value. */ - - cpi->ThisFrameTargetBytes = - (ogg_uint32_t)(cpi->ThisFrameTargetBytes * 0.65); - } else if ( cpi->CarryOver < 0 ) { - /* Note that cpi->CarryOver is a -ve here hence 1.0 "+" ... */ - cpi->ThisFrameTargetBytes = - (ogg_uint32_t)(cpi->ThisFrameTargetBytes * - (1.0 + ( ((cpi->CarryOver * 10)/ - ((ogg_int32_t)cpi-> - Configuration.TargetBandwidth)) * 0.0175) )); - } - - if ( !DropFrame ) { - /* pick all the macroblock modes and motion vectors */ - ogg_uint32_t InterError; - ogg_uint32_t IntraError; - - - /* Set Baseline filter level. */ - ConfigurePP( &cpi->pp, cpi->pb.info.noise_sensitivity); - - /* Score / analyses the fragments. */ - cpi->MotionScore = YUVAnalyseFrame(&cpi->pp, &KFIndicator ); - - /* Get the baseline Q value */ - RegulateQ( cpi, cpi->MotionScore ); - - /* Recode blocks if the error score in last frame was high. */ - ResidueBlocksAdded = 0; - for ( i = 0; i < cpi->pb.UnitFragments; i++ ){ - if ( !cpi->pb.display_fragments[i] ){ - if ( cpi->LastCodedErrorScore[i] >= - ResidueErrorThresh[cpi->pb.FrameQIndex] ) { - cpi->pb.display_fragments[i] = 1; /* Force block update */ - cpi->extra_fragments[i] = 1; /* Insures up to date - pixel data is used. */ - ResidueBlocksAdded ++; - } - } - } - - /* Adjust the motion score to allow for residue blocks - added. These are assumed to have below average impact on - bitrate (Hence ResidueBlockFactor). */ - cpi->MotionScore = cpi->MotionScore + - (ResidueBlocksAdded / ResidueBlockFactor[cpi->pb.FrameQIndex]); - - /* Estimate the min number of blocks at best Q */ - min_blocks_per_frame = - (ogg_int32_t)(cpi->ThisFrameTargetBytes / - GetEstimatedBpb( cpi, VERY_BEST_Q )); - if ( min_blocks_per_frame == 0 ) - min_blocks_per_frame = 1; - - /* If we have less than this number then consider adding in some - extra blocks */ - if ( cpi->MotionScore < min_blocks_per_frame ) { - min_blocks_per_frame = - cpi->MotionScore + - (ogg_int32_t)(((min_blocks_per_frame - cpi->MotionScore) * 4) / 3 ); - UpRegulateDataStream( cpi, VERY_BEST_Q, min_blocks_per_frame ); - }else{ - /* Reset control variable for best quality final pass. */ - cpi->FinalPassLastPos = 0; - } - - /* Get the modified Q prediction taking into account extra blocks added. */ - RegulateQ( cpi, cpi->MotionScore ); - - /* Unless we are already well ahead (4 seconds of data) of the - projected bitrate */ - if ( cpi->CarryOver < - (ogg_int32_t)(cpi->Configuration.TargetBandwidth * 4) ){ - /* Look at the predicted Q (pbi->FrameQIndex). Adjust the - target bits for this frame based upon projected Q and - re-calculate. The idea is that if the Q is better than a - given (good enough) level then we will try and save some bits - for use in more difficult segments. */ - cpi->ThisFrameTargetBytes = - (ogg_int32_t) (cpi->ThisFrameTargetBytes * - cpi->QTargetModifier[cpi->pb.FrameQIndex]); - - /* Recalculate Q again */ - RegulateQ( cpi, cpi->MotionScore ); - } - - - /* Select modes and motion vectors for each of the blocks : return - an error score for inter and intra */ - PickModes( cpi, cpi->pb.YSBRows, cpi->pb.YSBCols, - cpi->pb.info.width, - &InterError, &IntraError ); - - /* decide whether we really should have made this frame a key frame */ - /* forcing out a keyframe if the max interval is up is done at a higher level */ - if( cpi->pb.info.keyframe_auto_p){ - if( ( 2* IntraError < 5 * InterError ) - && ( KFIndicator >= (ogg_uint32_t) - cpi->pb.info.keyframe_auto_threshold) - && ( cpi->LastKeyFrame > cpi->pb.info.keyframe_mindistance) - ){ - CompressKeyFrame(cpi); /* Code a key frame */ - return; - } - - } - - /* Increment the frames since last key frame count */ - cpi->LastKeyFrame++; - - /* Proceed with the frame update. */ - UpdateFrame(cpi); - cpi->DropCount = 0; - - if ( cpi->MotionScore > 0 ){ - /* Note the Quantizer used for each block coded. */ - for ( i = 0; i < cpi->pb.UnitFragments; i++ ){ - if ( cpi->pb.display_fragments[i] ){ - cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue; - } - } - - } - }else{ - /* even if we 'drop' a frame, a placeholder must be written as we - currently assume fixed frame rate timebase as Ogg mapping - invariant */ - UpdateFrame(cpi); - } -} - -/********************** The toplevel: encode ***********************/ - -static int _ilog(unsigned int v){ - int ret=0; - while(v){ - ret++; - v>>=1; - } - return(ret); -} - -static void theora_encode_dispatch_init(CP_INSTANCE *cpi); - -int theora_encode_init(theora_state *th, theora_info *c){ - int i; - - CP_INSTANCE *cpi; - - memset(th, 0, sizeof(*th)); - /*Currently only the 4:2:0 format is supported.*/ - if(c->pixelformat!=OC_PF_420)return OC_IMPL; - th->internal_encode=cpi=_ogg_calloc(1,sizeof(*cpi)); - theora_encode_dispatch_init(cpi); - - dsp_static_init (&cpi->dsp); - memcpy (&cpi->pb.dsp, &cpi->dsp, sizeof(DspFunctions)); - - c->version_major=TH_VERSION_MAJOR; - c->version_minor=TH_VERSION_MINOR; - c->version_subminor=TH_VERSION_SUB; - - InitTmpBuffers(&cpi->pb); - InitPPInstance(&cpi->pp, &cpi->dsp); - - /* Initialise Configuration structure to legal values */ - if(c->quality>63)c->quality=63; - if(c->quality<0)c->quality=32; - if(c->target_bitrate<0)c->target_bitrate=0; - /* we clamp target_bitrate to 24 bits after setting up the encoder */ - - cpi->Configuration.BaseQ = c->quality; - cpi->Configuration.FirstFrameQ = c->quality; - cpi->Configuration.MaxQ = c->quality; - cpi->Configuration.ActiveMaxQ = c->quality; - - cpi->MVChangeFactor = 14; - cpi->FourMvChangeFactor = 8; - cpi->MinImprovementForNewMV = 25; - cpi->ExhaustiveSearchThresh = 2500; - cpi->MinImprovementForFourMV = 100; - cpi->FourMVThreshold = 10000; - cpi->BitRateCapFactor = 1.5; - cpi->InterTripOutThresh = 5000; - cpi->MVEnabled = 1; - cpi->InterCodeCount = 127; - cpi->BpbCorrectionFactor = 1.0; - cpi->GoldenFrameEnabled = 1; - cpi->InterPrediction = 1; - cpi->MotionCompensation = 1; - cpi->ThreshMapThreshold = 5; - cpi->MaxConsDroppedFrames = 1; - - /* Set encoder flags. */ - /* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */ - if(!c->keyframe_auto_p) - c->keyframe_frequency_force = c->keyframe_frequency; - - /* Set the frame rate variables. */ - if ( c->fps_numerator < 1 ) - c->fps_numerator = 1; - if ( c->fps_denominator < 1 ) - c->fps_denominator = 1; - - /* don't go too nuts on keyframe spacing; impose a high limit to - make certain the granulepos encoding strategy works */ - if(c->keyframe_frequency_force>32768)c->keyframe_frequency_force=32768; - if(c->keyframe_mindistance>32768)c->keyframe_mindistance=32768; - if(c->keyframe_mindistance>c->keyframe_frequency_force) - c->keyframe_mindistance=c->keyframe_frequency_force; - cpi->pb.keyframe_granule_shift=_ilog(c->keyframe_frequency_force-1); - - /* clamp the target_bitrate to a maximum of 24 bits so we get a - more meaningful value when we write this out in the header. */ - if(c->target_bitrate>(1<<24)-1)c->target_bitrate=(1<<24)-1; - - /* copy in config */ - memcpy(&cpi->pb.info,c,sizeof(*c)); - th->i=&cpi->pb.info; - th->granulepos=-1; - - /* Set up default values for QTargetModifier[Q_TABLE_SIZE] table */ - for ( i = 0; i < Q_TABLE_SIZE; i++ ) - cpi->QTargetModifier[i] = 1.0; - - /* Set up an encode buffer */ - cpi->oggbuffer = _ogg_malloc(sizeof(oggpack_buffer)); - oggpackB_writeinit(cpi->oggbuffer); - - /* Set data rate related variables. */ - cpi->Configuration.TargetBandwidth = (c->target_bitrate) / 8; - - cpi->Configuration.OutputFrameRate = - (double)( c->fps_numerator / - c->fps_denominator ); - - cpi->frame_target_rate = cpi->Configuration.TargetBandwidth / - cpi->Configuration.OutputFrameRate; - - /* Set key frame data rate target; this is nominal keyframe size */ - cpi->Configuration.KeyFrameDataTarget = (c->keyframe_data_target_bitrate * - c->fps_denominator / - c->fps_numerator ) / 8; - - /* Note the height and width in the pre-processor control structure. */ - cpi->ScanConfig.VideoFrameHeight = cpi->pb.info.height; - cpi->ScanConfig.VideoFrameWidth = cpi->pb.info.width; - - InitFrameDetails(&cpi->pb); - EInitFragmentInfo(cpi); - EInitFrameInfo(cpi); - - /* Set up pre-processor config pointers. */ - cpi->ScanConfig.Yuv0ptr = cpi->yuv0ptr; - cpi->ScanConfig.Yuv1ptr = cpi->yuv1ptr; - cpi->ScanConfig.SrfWorkSpcPtr = cpi->ConvDestBuffer; - cpi->ScanConfig.disp_fragments = cpi->pb.display_fragments; - cpi->ScanConfig.RegionIndex = cpi->pb.pixel_index_table; - - /* Initialise the pre-processor module. */ - ScanYUVInit(&cpi->pp, &(cpi->ScanConfig)); - - /* Initialise Motion compensation */ - InitMotionCompensation(cpi); - - /* Initialise the compression process. */ - /* We always start at frame 1 */ - cpi->CurrentFrame = 1; - - /* Reset the rate targeting correction factor. */ - cpi->BpbCorrectionFactor = 1.0; - - cpi->TotalByteCount = 0; - cpi->TotalMotionScore = 0; - - /* Up regulation variables. */ - cpi->FinalPassLastPos = 0; /* Used to regulate a final unrestricted pass. */ - cpi->LastEndSB = 0; /* Where we were in the loop last time. */ - cpi->ResidueLastEndSB = 0; /* Where we were in the residue update - loop last time. */ - - InitHuffmanSet(&cpi->pb); - - /* This makes sure encoder version specific tables are initialised */ - memcpy(&cpi->pb.quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info)); - InitQTables(&cpi->pb); - - /* Indicate that the next frame to be compressed is the first in the - current clip. */ - cpi->ThisIsFirstFrame = 1; - cpi->readyflag = 1; - - cpi->pb.HeadersWritten = 0; - /*We overload this flag to track header output.*/ - cpi->doneflag=-3; - - return 0; -} - -int theora_encode_YUVin(theora_state *t, - yuv_buffer *yuv){ - ogg_int32_t i; - unsigned char *LocalDataPtr; - unsigned char *InputDataPtr; - CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode); - - if(!cpi->readyflag)return OC_EINVAL; - if(cpi->doneflag>0)return OC_EINVAL; - - /* If frame size has changed, abort out for now */ - if (yuv->y_height != (int)cpi->pb.info.height || - yuv->y_width != (int)cpi->pb.info.width ) - return(-1); - - - /* Copy over input YUV to internal YUV buffers. */ - /* we invert the image for backward compatibility with VP3 */ - /* First copy over the Y data */ - LocalDataPtr = cpi->yuv1ptr + yuv->y_width*(yuv->y_height - 1); - InputDataPtr = yuv->y; - for ( i = 0; i < yuv->y_height; i++ ){ - memcpy( LocalDataPtr, InputDataPtr, yuv->y_width ); - LocalDataPtr -= yuv->y_width; - InputDataPtr += yuv->y_stride; - } - - /* Now copy over the U data */ - LocalDataPtr = &cpi->yuv1ptr[(yuv->y_height * yuv->y_width)]; - LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1); - InputDataPtr = yuv->u; - for ( i = 0; i < yuv->uv_height; i++ ){ - memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width ); - LocalDataPtr -= yuv->uv_width; - InputDataPtr += yuv->uv_stride; - } - - /* Now copy over the V data */ - LocalDataPtr = - &cpi->yuv1ptr[((yuv->y_height*yuv->y_width)*5)/4]; - LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1); - InputDataPtr = yuv->v; - for ( i = 0; i < yuv->uv_height; i++ ){ - memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width ); - LocalDataPtr -= yuv->uv_width; - InputDataPtr += yuv->uv_stride; - } - - /* Special case for first frame */ - if ( cpi->ThisIsFirstFrame ){ - CompressFirstFrame(cpi); - cpi->ThisIsFirstFrame = 0; - cpi->ThisIsKeyFrame = 0; - } else { - - /* don't allow generating invalid files that overflow the p-frame - shift, even if keyframe_auto_p is turned off */ - if(cpi->LastKeyFrame >= (ogg_uint32_t) - cpi->pb.info.keyframe_frequency_force) - cpi->ThisIsKeyFrame = 1; - - if ( cpi->ThisIsKeyFrame ) { - CompressKeyFrame(cpi); - cpi->ThisIsKeyFrame = 0; - } else { - /* Compress the frame. */ - CompressFrame( cpi ); - } - - } - - /* Update stats variables. */ - cpi->LastFrameSize = oggpackB_bytes(cpi->oggbuffer); - cpi->CurrentFrame++; - cpi->packetflag=1; - - t->granulepos= - ((cpi->CurrentFrame - cpi->LastKeyFrame)<pb.keyframe_granule_shift)+ - cpi->LastKeyFrame - 1; - - return 0; -} - -int theora_encode_packetout( theora_state *t, int last_p, ogg_packet *op){ - CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode); - long bytes=oggpackB_bytes(cpi->oggbuffer); - - if(!bytes)return(0); - if(!cpi->packetflag)return(0); - if(cpi->doneflag>0)return(-1); - - op->packet=oggpackB_get_buffer(cpi->oggbuffer); - op->bytes=bytes; - op->b_o_s=0; - op->e_o_s=last_p; - - op->packetno=cpi->CurrentFrame; - op->granulepos=t->granulepos; - - cpi->packetflag=0; - if(last_p)cpi->doneflag=1; - - return 1; -} - -static void _tp_writebuffer(oggpack_buffer *opb, const char *buf, const long len) -{ - long i; - - for (i = 0; i < len; i++) - oggpackB_write(opb, *buf++, 8); -} - -static void _tp_writelsbint(oggpack_buffer *opb, long value) -{ - oggpackB_write(opb, value&0xFF, 8); - oggpackB_write(opb, value>>8&0xFF, 8); - oggpackB_write(opb, value>>16&0xFF, 8); - oggpackB_write(opb, value>>24&0xFF, 8); -} - -/* build the initial short header for stream recognition and format */ -int theora_encode_header(theora_state *t, ogg_packet *op){ - CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode); - int offset_y; - - oggpackB_reset(cpi->oggbuffer); - oggpackB_write(cpi->oggbuffer,0x80,8); - _tp_writebuffer(cpi->oggbuffer, "theora", 6); - - oggpackB_write(cpi->oggbuffer,TH_VERSION_MAJOR,8); - oggpackB_write(cpi->oggbuffer,TH_VERSION_MINOR,8); - oggpackB_write(cpi->oggbuffer,TH_VERSION_SUB,8); - - oggpackB_write(cpi->oggbuffer,cpi->pb.info.width>>4,16); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.height>>4,16); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_width,24); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_height,24); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.offset_x,8); - /* Applications use offset_y to mean offset from the top of the image; the - * meaning in the bitstream is the opposite (from the bottom). Transform. - */ - offset_y = cpi->pb.info.height - cpi->pb.info.frame_height - - cpi->pb.info.offset_y; - oggpackB_write(cpi->oggbuffer,offset_y,8); - - oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_numerator,32); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_denominator,32); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_numerator,24); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_denominator,24); - - oggpackB_write(cpi->oggbuffer,cpi->pb.info.colorspace,8); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.target_bitrate,24); - oggpackB_write(cpi->oggbuffer,cpi->pb.info.quality,6); - - oggpackB_write(cpi->oggbuffer,cpi->pb.keyframe_granule_shift,5); - - oggpackB_write(cpi->oggbuffer,cpi->pb.info.pixelformat,2); - - oggpackB_write(cpi->oggbuffer,0,3); /* spare config bits */ - - op->packet=oggpackB_get_buffer(cpi->oggbuffer); - op->bytes=oggpackB_bytes(cpi->oggbuffer); - - op->b_o_s=1; - op->e_o_s=0; - - op->packetno=0; - - op->granulepos=0; - cpi->packetflag=0; - - return(0); -} - -/* build the comment header packet from the passed metadata */ -int theora_encode_comment(theora_comment *tc, ogg_packet *op) -{ - const char *vendor = theora_version_string(); - const int vendor_length = strlen(vendor); - oggpack_buffer *opb; - - opb = _ogg_malloc(sizeof(oggpack_buffer)); - oggpackB_writeinit(opb); - oggpackB_write(opb, 0x81, 8); - _tp_writebuffer(opb, "theora", 6); - - _tp_writelsbint(opb, vendor_length); - _tp_writebuffer(opb, vendor, vendor_length); - - _tp_writelsbint(opb, tc->comments); - if(tc->comments){ - int i; - for(i=0;icomments;i++){ - if(tc->user_comments[i]){ - _tp_writelsbint(opb,tc->comment_lengths[i]); - _tp_writebuffer(opb,tc->user_comments[i],tc->comment_lengths[i]); - }else{ - oggpackB_write(opb,0,32); - } - } - } - op->bytes=oggpack_bytes(opb); - - /* So we're expecting the application will free this? */ - op->packet=_ogg_malloc(oggpack_bytes(opb)); - memcpy(op->packet, oggpack_get_buffer(opb), oggpack_bytes(opb)); - oggpack_writeclear(opb); - - _ogg_free(opb); - - op->b_o_s=0; - op->e_o_s=0; - - op->packetno=0; - op->granulepos=0; - - return (0); -} - -/* build the final header packet with the tables required - for decode */ -int theora_encode_tables(theora_state *t, ogg_packet *op){ - CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode); - - oggpackB_reset(cpi->oggbuffer); - oggpackB_write(cpi->oggbuffer,0x82,8); - _tp_writebuffer(cpi->oggbuffer,"theora",6); - - WriteQTables(&cpi->pb,cpi->oggbuffer); - WriteHuffmanTrees(cpi->pb.HuffRoot_VP3x,cpi->oggbuffer); - - op->packet=oggpackB_get_buffer(cpi->oggbuffer); - op->bytes=oggpackB_bytes(cpi->oggbuffer); - - op->b_o_s=0; - op->e_o_s=0; - - op->packetno=0; - - op->granulepos=0; - cpi->packetflag=0; - - cpi->pb.HeadersWritten = 1; - - return(0); -} - -static void theora_encode_clear (theora_state *th){ - CP_INSTANCE *cpi; - cpi=(CP_INSTANCE *)th->internal_encode; - if(cpi){ - - ClearHuffmanSet(&cpi->pb); - ClearFragmentInfo(&cpi->pb); - ClearFrameInfo(&cpi->pb); - EClearFragmentInfo(cpi); - EClearFrameInfo(cpi); - ClearTmpBuffers(&cpi->pb); - ClearPPInstance(&cpi->pp); - - oggpackB_writeclear(cpi->oggbuffer); - _ogg_free(cpi->oggbuffer); - _ogg_free(cpi); - } - - memset(th,0,sizeof(*th)); -} - - -/* returns, in seconds, absolute time of current packet in given - logical stream */ -static double theora_encode_granule_time(theora_state *th, - ogg_int64_t granulepos){ -#ifndef THEORA_DISABLE_FLOAT - CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode); - PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode); - - if(cpi)pbi=&cpi->pb; - - if(granulepos>=0){ - ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift; - ogg_int64_t pframe=granulepos-(iframe<keyframe_granule_shift); - - return (iframe+pframe)* - ((double)pbi->info.fps_denominator/pbi->info.fps_numerator); - - } -#endif - - return(-1); /* negative granulepos or float calculations disabled */ -} - -/* returns frame number of current packet in given logical stream */ -static ogg_int64_t theora_encode_granule_frame(theora_state *th, - ogg_int64_t granulepos){ - CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode); - PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode); - - if(cpi)pbi=&cpi->pb; - - if(granulepos>=0){ - ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift; - ogg_int64_t pframe=granulepos-(iframe<keyframe_granule_shift); - - return (iframe+pframe-1); - } - - return(-1); -} - - -static int theora_encode_control(theora_state *th,int req, - void *buf,size_t buf_sz) { - CP_INSTANCE *cpi; - PB_INSTANCE *pbi; - int value; - - if(th == NULL) - return TH_EFAULT; - - cpi = th->internal_encode; - pbi = &cpi->pb; - - switch(req) { - case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE: - { - ogg_uint32_t keyframe_frequency_force; - if( (buf==NULL) || (buf_sz!=sizeof(ogg_uint32_t))) return TH_EINVAL; - keyframe_frequency_force=*(ogg_uint32_t *)buf; - - keyframe_frequency_force= - OC_MINI(keyframe_frequency_force, - 1U<pb.keyframe_granule_shift); - cpi->pb.info.keyframe_frequency_force= - OC_MAXI(1,keyframe_frequency_force); - *(ogg_uint32_t *)buf=cpi->pb.info.keyframe_frequency_force; - return 0; - } - case TH_ENCCTL_SET_QUANT_PARAMS: - if( ( buf==NULL&&buf_sz!=0 ) - || ( buf!=NULL&&buf_sz!=sizeof(th_quant_info) ) - || cpi->pb.HeadersWritten ){ - return TH_EINVAL; - } - - if(buf==NULL) - memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info)); - else - memcpy(&pbi->quant_info, buf, sizeof(th_quant_info)); - InitQTables(pbi); - - return 0; - case TH_ENCCTL_SET_VP3_COMPATIBLE: - if(cpi->pb.HeadersWritten) - return TH_EINVAL; - - memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info)); - InitQTables(pbi); - - return 0; - case TH_ENCCTL_SET_SPLEVEL: - if(buf == NULL || buf_sz != sizeof(int)) - return TH_EINVAL; - - memcpy(&value, buf, sizeof(int)); - - switch(value) { - case 0: - cpi->MotionCompensation = 1; - pbi->info.quick_p = 0; - break; - - case 1: - cpi->MotionCompensation = 1; - pbi->info.quick_p = 1; - break; - - case 2: - cpi->MotionCompensation = 0; - pbi->info.quick_p = 1; - break; - - default: - return TH_EINVAL; - } - - return 0; - case TH_ENCCTL_GET_SPLEVEL_MAX: - value = 2; - memcpy(buf, &value, sizeof(int)); - return 0; - default: - return TH_EIMPL; - } -} - -static void theora_encode_dispatch_init(CP_INSTANCE *cpi){ - cpi->dispatch_vtbl.clear=theora_encode_clear; - cpi->dispatch_vtbl.control=theora_encode_control; - cpi->dispatch_vtbl.granule_frame=theora_encode_granule_frame; - cpi->dispatch_vtbl.granule_time=theora_encode_granule_time; -} diff --git a/Engine/lib/libtheora/lib/enc/frarray.c b/Engine/lib/libtheora/lib/enc/frarray.c deleted file mode 100644 index 51b327206..000000000 --- a/Engine/lib/libtheora/lib/enc/frarray.c +++ /dev/null @@ -1,243 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: frarray.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include -#include "codec_internal.h" -#include "block_inline.h" - -/* Long run bit string coding */ -static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value){ - ogg_uint32_t CodedVal = 0; - ogg_uint32_t CodedBits = 0; - - /* Coding scheme: - Codeword RunLength - 0 1 - 10x 2-3 - 110x 4-5 - 1110xx 6-9 - 11110xxx 10-17 - 111110xxxx 18-33 - 111111xxxxxxxxxxxx 34-4129 */ - - if ( value == 1 ){ - CodedVal = 0; - CodedBits = 1; - } else if ( value <= 3 ) { - CodedVal = 0x0004 + (value - 2); - CodedBits = 3; - } else if ( value <= 5 ) { - CodedVal = 0x000C + (value - 4); - CodedBits = 4; - } else if ( value <= 9 ) { - CodedVal = 0x0038 + (value - 6); - CodedBits = 6; - } else if ( value <= 17 ) { - CodedVal = 0x00F0 + (value - 10); - CodedBits = 8; - } else if ( value <= 33 ) { - CodedVal = 0x03E0 + (value - 18); - CodedBits = 10; - } else { - CodedVal = 0x3F000 + (value - 34); - CodedBits = 18; - } - - /* Add the bits to the encode holding buffer. */ - oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits ); - - return CodedBits; -} - -/* Short run bit string coding */ -static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi, - ogg_uint32_t value ) { - ogg_uint32_t CodedVal = 0; - ogg_uint32_t CodedBits = 0; - - /* Coding scheme: - Codeword RunLength - 0x 1-2 - 10x 3-4 - 110x 5-6 - 1110xx 7-10 - 11110xx 11-14 - 11111xxxx 15-30 */ - - if ( value <= 2 ) { - CodedVal = value - 1; - CodedBits = 2; - } else if ( value <= 4 ) { - CodedVal = 0x0004 + (value - 3); - CodedBits = 3; - - } else if ( value <= 6 ) { - CodedVal = 0x000C + (value - 5); - CodedBits = 4; - - } else if ( value <= 10 ) { - CodedVal = 0x0038 + (value - 7); - CodedBits = 6; - - } else if ( value <= 14 ) { - CodedVal = 0x0078 + (value - 11); - CodedBits = 7; - } else { - CodedVal = 0x01F0 + (value - 15); - CodedBits = 9; - } - - /* Add the bits to the encode holding buffer. */ - oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits ); - - return CodedBits; -} - -void PackAndWriteDFArray( CP_INSTANCE *cpi ){ - ogg_uint32_t i; - unsigned char val; - ogg_uint32_t run_count; - - ogg_uint32_t SB, MB, B; /* Block, MB and SB loop variables */ - ogg_uint32_t BListIndex = 0; - ogg_uint32_t LastSbBIndex = 0; - ogg_int32_t DfBlockIndex; /* Block index in display_fragments */ - - /* Initialise workspaces */ - memset( cpi->pb.SBFullyFlags, 1, cpi->pb.SuperBlocks); - memset( cpi->pb.SBCodedFlags, 0, cpi->pb.SuperBlocks ); - memset( cpi->PartiallyCodedFlags, 0, cpi->pb.SuperBlocks ); - memset( cpi->BlockCodedFlags, 0, cpi->pb.UnitFragments); - - for( SB = 0; SB < cpi->pb.SuperBlocks; SB++ ) { - /* Check for coded blocks and macro-blocks */ - for ( MB=0; MB<4; MB++ ) { - /* If MB in frame */ - if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) { - for ( B=0; B<4; B++ ) { - DfBlockIndex = QuadMapToIndex1( cpi->pb.BlockMap,SB, MB, B ); - - /* Does Block lie in frame: */ - if ( DfBlockIndex >= 0 ) { - /* In Frame: If it is not coded then this SB is only - partly coded.: */ - if ( cpi->pb.display_fragments[DfBlockIndex] ) { - cpi->pb.SBCodedFlags[SB] = 1; /* SB at least partly coded */ - cpi->BlockCodedFlags[BListIndex] = 1; /* Block is coded */ - - }else{ - cpi->pb.SBFullyFlags[SB] = 0; /* SB not fully coded */ - cpi->BlockCodedFlags[BListIndex] = 0; /* Block is not coded */ - } - - BListIndex++; - } - } - } - } - - /* Is the SB fully coded or uncoded. - If so then backup BListIndex and MBListIndex */ - if ( cpi->pb.SBFullyFlags[SB] || !cpi->pb.SBCodedFlags[SB] ) { - BListIndex = LastSbBIndex; /* Reset to values from previous SB */ - }else{ - cpi->PartiallyCodedFlags[SB] = 1; /* Set up list of partially - coded SBs */ - LastSbBIndex = BListIndex; - } - } - - /* Code list of partially coded Super-Block. */ - val = cpi->PartiallyCodedFlags[0]; - oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1); - - i = 0; - while ( i < cpi->pb.SuperBlocks ) { - run_count = 0; - while ( (ipb.SuperBlocks) && - (cpi->PartiallyCodedFlags[i]==val) && - run_count<4129 ) { - i++; - run_count++; - } - - /* Code the run */ - FrArrayCodeSBRun( cpi, run_count); - - if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){ - val = cpi->PartiallyCodedFlags[i]; - oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1); - - }else - val = ( val == 0 ) ? 1 : 0; - } - - /* RLC Super-Block fully/not coded. */ - i = 0; - - /* Skip partially coded blocks */ - while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] ) - i++; - - if ( i < cpi->pb.SuperBlocks ) { - val = cpi->pb.SBFullyFlags[i]; - oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1); - - while ( i < cpi->pb.SuperBlocks ) { - run_count = 0; - while ( (i < cpi->pb.SuperBlocks) && - (cpi->pb.SBFullyFlags[i] == val) && - run_count < 4129) { - i++; - /* Skip partially coded blocks */ - while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] ) - i++; - run_count++; - } - - /* Code the run */ - FrArrayCodeSBRun( cpi, run_count ); - - if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){ - val = cpi->PartiallyCodedFlags[i]; - oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1); - }else - val = ( val == 0 ) ? 1 : 0; - } - } - - - /* Now code the block flags */ - if ( BListIndex > 0 ) { - /* Code the block flags start value */ - val = cpi->BlockCodedFlags[0]; - oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1); - - /* Now code the block flags. */ - for ( i = 0; i < BListIndex; ) { - run_count = 0; - while ( (i < BListIndex) && (cpi->BlockCodedFlags[i] == val) ) { - i++; - run_count++; - } - - FrArrayCodeBlockRun( cpi, run_count ); - - val = ( val == 0 ) ? 1 : 0; - } - } -} diff --git a/Engine/lib/libtheora/lib/enc/frinit.c b/Engine/lib/libtheora/lib/enc/frinit.c deleted file mode 100644 index ae6bbd64f..000000000 --- a/Engine/lib/libtheora/lib/enc/frinit.c +++ /dev/null @@ -1,392 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: frinit.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include -#include "codec_internal.h" - - -void InitializeFragCoordinates(PB_INSTANCE *pbi){ - - ogg_uint32_t i, j; - - ogg_uint32_t HorizFrags = pbi->HFragments; - ogg_uint32_t VertFrags = pbi->VFragments; - ogg_uint32_t StartFrag = 0; - - /* Y */ - - for(i = 0; i< VertFrags; i++){ - for(j = 0; j< HorizFrags; j++){ - - ogg_uint32_t ThisFrag = i * HorizFrags + j; - pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH; - pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH; - - } - } - - /* U */ - HorizFrags >>= 1; - VertFrags >>= 1; - StartFrag = pbi->YPlaneFragments; - - for(i = 0; i< VertFrags; i++) { - for(j = 0; j< HorizFrags; j++) { - ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j; - pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH; - pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH; - - } - } - - /* V */ - StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments; - for(i = 0; i< VertFrags; i++) { - for(j = 0; j< HorizFrags; j++) { - ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j; - pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH; - pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH; - - } - } -} - -static void CalcPixelIndexTable( PB_INSTANCE *pbi){ - ogg_uint32_t i; - ogg_uint32_t * PixelIndexTablePtr; - - /* Calculate the pixel index table for normal image buffers */ - PixelIndexTablePtr = pbi->pixel_index_table; - for ( i = 0; i < pbi->YPlaneFragments; i++ ) { - PixelIndexTablePtr[ i ] = - ((i / pbi->HFragments) * VFRAGPIXELS * - pbi->info.width); - PixelIndexTablePtr[ i ] += - ((i % pbi->HFragments) * HFRAGPIXELS); - } - - PixelIndexTablePtr = &pbi->pixel_index_table[pbi->YPlaneFragments]; - for ( i = 0; i < ((pbi->HFragments >> 1) * pbi->VFragments); i++ ) { - PixelIndexTablePtr[ i ] = - ((i / (pbi->HFragments / 2) ) * - (VFRAGPIXELS * - (pbi->info.width / 2)) ); - PixelIndexTablePtr[ i ] += - ((i % (pbi->HFragments / 2) ) * - HFRAGPIXELS) + pbi->YPlaneSize; - } - - /************************************************************************/ - /* Now calculate the pixel index table for image reconstruction buffers */ - PixelIndexTablePtr = pbi->recon_pixel_index_table; - for ( i = 0; i < pbi->YPlaneFragments; i++ ){ - PixelIndexTablePtr[ i ] = - ((i / pbi->HFragments) * VFRAGPIXELS * - pbi->YStride); - PixelIndexTablePtr[ i ] += - ((i % pbi->HFragments) * HFRAGPIXELS) + - pbi->ReconYDataOffset; - } - - /* U blocks */ - PixelIndexTablePtr = &pbi->recon_pixel_index_table[pbi->YPlaneFragments]; - for ( i = 0; i < pbi->UVPlaneFragments; i++ ) { - PixelIndexTablePtr[ i ] = - ((i / (pbi->HFragments / 2) ) * - (VFRAGPIXELS * (pbi->UVStride)) ); - PixelIndexTablePtr[ i ] += - ((i % (pbi->HFragments / 2) ) * - HFRAGPIXELS) + pbi->ReconUDataOffset; - } - - /* V blocks */ - PixelIndexTablePtr = - &pbi->recon_pixel_index_table[pbi->YPlaneFragments + - pbi->UVPlaneFragments]; - - for ( i = 0; i < pbi->UVPlaneFragments; i++ ) { - PixelIndexTablePtr[ i ] = - ((i / (pbi->HFragments / 2) ) * - (VFRAGPIXELS * (pbi->UVStride)) ); - PixelIndexTablePtr[ i ] += - ((i % (pbi->HFragments / 2) ) * HFRAGPIXELS) + - pbi->ReconVDataOffset; - } -} - -void ClearFragmentInfo(PB_INSTANCE * pbi){ - - /* free prior allocs if present */ - if(pbi->display_fragments) _ogg_free(pbi->display_fragments); - if(pbi->pixel_index_table) _ogg_free(pbi->pixel_index_table); - if(pbi->recon_pixel_index_table) _ogg_free(pbi->recon_pixel_index_table); - if(pbi->FragTokenCounts) _ogg_free(pbi->FragTokenCounts); - if(pbi->CodedBlockList) _ogg_free(pbi->CodedBlockList); - if(pbi->FragMVect) _ogg_free(pbi->FragMVect); - if(pbi->FragCoeffs) _ogg_free(pbi->FragCoeffs); - if(pbi->FragCoefEOB) _ogg_free(pbi->FragCoefEOB); - if(pbi->skipped_display_fragments) _ogg_free(pbi->skipped_display_fragments); - if(pbi->QFragData) _ogg_free(pbi->QFragData); - if(pbi->TokenList) _ogg_free(pbi->TokenList); - if(pbi->FragCodingMethod) _ogg_free(pbi->FragCodingMethod); - if(pbi->FragCoordinates) _ogg_free(pbi->FragCoordinates); - - if(pbi->FragQIndex) _ogg_free(pbi->FragQIndex); - if(pbi->PPCoefBuffer) _ogg_free(pbi->PPCoefBuffer); - if(pbi->FragmentVariances) _ogg_free(pbi->FragmentVariances); - - if(pbi->BlockMap) _ogg_free(pbi->BlockMap); - - if(pbi->SBCodedFlags) _ogg_free(pbi->SBCodedFlags); - if(pbi->SBFullyFlags) _ogg_free(pbi->SBFullyFlags); - if(pbi->MBFullyFlags) _ogg_free(pbi->MBFullyFlags); - if(pbi->MBCodedFlags) _ogg_free(pbi->MBCodedFlags); - - if(pbi->_Nodes) _ogg_free(pbi->_Nodes); - pbi->_Nodes = 0; - - pbi->QFragData = 0; - pbi->TokenList = 0; - pbi->skipped_display_fragments = 0; - pbi->FragCoeffs = 0; - pbi->FragCoefEOB = 0; - pbi->display_fragments = 0; - pbi->pixel_index_table = 0; - pbi->recon_pixel_index_table = 0; - pbi->FragTokenCounts = 0; - pbi->CodedBlockList = 0; - pbi->FragCodingMethod = 0; - pbi->FragMVect = 0; - pbi->MBCodedFlags = 0; - pbi->MBFullyFlags = 0; - pbi->BlockMap = 0; - - pbi->SBCodedFlags = 0; - pbi->SBFullyFlags = 0; - pbi->QFragData = 0; - pbi->TokenList = 0; - pbi->skipped_display_fragments = 0; - pbi->FragCoeffs = 0; - pbi->FragCoefEOB = 0; - pbi->display_fragments = 0; - pbi->pixel_index_table = 0; - pbi->recon_pixel_index_table = 0; - pbi->FragTokenCounts = 0; - pbi->CodedBlockList = 0; - pbi->FragCodingMethod = 0; - pbi->FragCoordinates = 0; - pbi->FragMVect = 0; - - pbi->PPCoefBuffer=0; - pbi->PPCoefBuffer=0; - pbi->FragQIndex = 0; - pbi->FragQIndex = 0; - pbi->FragmentVariances= 0; - pbi->FragmentVariances = 0 ; -} - -void InitFragmentInfo(PB_INSTANCE * pbi){ - - /* clear any existing info */ - ClearFragmentInfo(pbi); - - /* Perform Fragment Allocations */ - pbi->display_fragments = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->display_fragments)); - - pbi->pixel_index_table = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->pixel_index_table)); - - pbi->recon_pixel_index_table = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->recon_pixel_index_table)); - - pbi->FragTokenCounts = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragTokenCounts)); - - pbi->CodedBlockList = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->CodedBlockList)); - - pbi->FragMVect = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragMVect)); - - pbi->FragCoeffs = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoeffs)); - - pbi->FragCoefEOB = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoefEOB)); - - pbi->skipped_display_fragments = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->skipped_display_fragments)); - - pbi->QFragData = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->QFragData)); - - pbi->TokenList = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->TokenList)); - - pbi->FragCodingMethod = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCodingMethod)); - - pbi->FragCoordinates = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoordinates)); - - pbi->FragQIndex = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragQIndex)); - - pbi->PPCoefBuffer = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->PPCoefBuffer)); - - pbi->FragmentVariances = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragmentVariances)); - - pbi->_Nodes = - _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->_Nodes)); - - /* Super Block Initialization */ - pbi->SBCodedFlags = - _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBCodedFlags)); - - pbi->SBFullyFlags = - _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBFullyFlags)); - - /* Macro Block Initialization */ - pbi->MBCodedFlags = - _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBCodedFlags)); - - pbi->MBFullyFlags = - _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBFullyFlags)); - - pbi->BlockMap = - _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->BlockMap)); - -} - -void ClearFrameInfo(PB_INSTANCE * pbi){ - if(pbi->ThisFrameRecon ) - _ogg_free(pbi->ThisFrameRecon ); - if(pbi->GoldenFrame) - _ogg_free(pbi->GoldenFrame); - if(pbi->LastFrameRecon) - _ogg_free(pbi->LastFrameRecon); - if(pbi->PostProcessBuffer) - _ogg_free(pbi->PostProcessBuffer); - - - pbi->ThisFrameRecon = 0; - pbi->GoldenFrame = 0; - pbi->LastFrameRecon = 0; - pbi->PostProcessBuffer = 0; - - - pbi->ThisFrameRecon = 0; - pbi->GoldenFrame = 0; - pbi->LastFrameRecon = 0; - pbi->PostProcessBuffer = 0; - -} - -void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize){ - - /* clear any existing info */ - ClearFrameInfo(pbi); - - /* allocate frames */ - pbi->ThisFrameRecon = - _ogg_malloc(FrameSize*sizeof(*pbi->ThisFrameRecon)); - - pbi->GoldenFrame = - _ogg_malloc(FrameSize*sizeof(*pbi->GoldenFrame)); - - pbi->LastFrameRecon = - _ogg_malloc(FrameSize*sizeof(*pbi->LastFrameRecon)); - - pbi->PostProcessBuffer = - _ogg_malloc(FrameSize*sizeof(*pbi->PostProcessBuffer)); - -} - -void InitFrameDetails(PB_INSTANCE *pbi){ - int FrameSize; - - /*pbi->PostProcessingLevel = 0; - pbi->PostProcessingLevel = 4; - pbi->PostProcessingLevel = 5; - pbi->PostProcessingLevel = 6;*/ - - pbi->PostProcessingLevel = 0; - - - /* Set the frame size etc. */ - - pbi->YPlaneSize = pbi->info.width * - pbi->info.height; - pbi->UVPlaneSize = pbi->YPlaneSize / 4; - pbi->HFragments = pbi->info.width / HFRAGPIXELS; - pbi->VFragments = pbi->info.height / VFRAGPIXELS; - pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2; - pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments; - pbi->UVPlaneFragments = pbi->YPlaneFragments / 4; - - pbi->YStride = (pbi->info.width + STRIDE_EXTRA); - pbi->UVStride = pbi->YStride / 2; - pbi->ReconYPlaneSize = pbi->YStride * - (pbi->info.height + STRIDE_EXTRA); - pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4; - FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize; - - pbi->YDataOffset = 0; - pbi->UDataOffset = pbi->YPlaneSize; - pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize; - pbi->ReconYDataOffset = - (pbi->YStride * UMV_BORDER) + UMV_BORDER; - pbi->ReconUDataOffset = pbi->ReconYPlaneSize + - (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2); - pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize + - (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2); - - /* Image dimensions in Super-Blocks */ - pbi->YSBRows = (pbi->info.height/32) + - ( pbi->info.height%32 ? 1 : 0 ); - pbi->YSBCols = (pbi->info.width/32) + - ( pbi->info.width%32 ? 1 : 0 ); - pbi->UVSBRows = ((pbi->info.height/2)/32) + - ( (pbi->info.height/2)%32 ? 1 : 0 ); - pbi->UVSBCols = ((pbi->info.width/2)/32) + - ( (pbi->info.width/2)%32 ? 1 : 0 ); - - /* Super-Blocks per component */ - pbi->YSuperBlocks = pbi->YSBRows * pbi->YSBCols; - pbi->UVSuperBlocks = pbi->UVSBRows * pbi->UVSBCols; - pbi->SuperBlocks = pbi->YSuperBlocks+2*pbi->UVSuperBlocks; - - /* Useful externals */ - pbi->MacroBlocks = ((pbi->VFragments+1)/2)*((pbi->HFragments+1)/2); - - InitFragmentInfo(pbi); - InitFrameInfo(pbi, FrameSize); - InitializeFragCoordinates(pbi); - - /* Configure mapping between quad-tree and fragments */ - CreateBlockMapping ( pbi->BlockMap, pbi->YSuperBlocks, - pbi->UVSuperBlocks, pbi->HFragments, pbi->VFragments); - - /* Re-initialise the pixel index table. */ - - CalcPixelIndexTable( pbi ); - -} - diff --git a/Engine/lib/libtheora/lib/enc/hufftables.h b/Engine/lib/libtheora/lib/enc/hufftables.h deleted file mode 100644 index eb4be22c2..000000000 --- a/Engine/lib/libtheora/lib/enc/hufftables.h +++ /dev/null @@ -1,1034 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: hufftables.h 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include "../dec/huffman.h" -#include "codec_internal.h" - -const unsigned char ExtraBitLengths_VP31[MAX_ENTROPY_TOKENS] = { - 0, 0, 0, 2, 3, 4, 12,3, 6, /* EOB and Zero-run tokens. */ - 0, 0, 0, 0, /* Very low value tokens. */ - 1, 1, 1, 1, 2, 3, 4, 5, 6, 10, /* Other value tokens */ - 1, 1, 1, 1, 1, 3, 4, /* Category 1 runs. */ - 2, 3, /* Category 2 runs. */ -}; - -#define NEW_FREQS 0 /* dbm - test new frequency tables */ - -#if NEW_FREQS -/* New baseline frequency tables for encoder version >= 2 */ -const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = { - /* DC Intra bias */ - { 272, 84, 31, 36, 10, 2, 1, 92, 1, - 701, 872, 410, 478, - 630, 502, 417, 356, 582, 824, 985, 965, 697, 606, - 125, 119, 40, 3, 9, 15, 10, - 73, 37, - }, - { 311, 107, 41, 51, 18, 4, 2, 120, 1, - 824, 1037, 468, 541, - 714, 555, 451, 374, 595, 819, 929, 817, 474, 220, - 172, 142, 27, 4, 9, 10, 2, - 98, 48, - }, - { 353, 125, 49, 66, 24, 6, 2, 124, 1, - 926, 1172, 512, 594, - 766, 581, 458, 379, 590, 789, 849, 665, 306, 80, - 204, 147, 25, 5, 12, 9, 2, - 108, 54, - }, - { 392, 141, 57, 75, 31, 7, 4, 138, 1, - 1050, 1321, 559, 649, - 806, 594, 460, 372, 568, 727, 710, 475, 155, 19, - 251, 174, 27, 7, 16, 8, 2, - 126, 62, - }, - { 455, 168, 66, 87, 39, 10, 6, 124, 2, - 1143, 1455, 592, 692, - 824, 596, 453, 361, 542, 657, 592, 329, 78, 5, - 269, 184, 27, 9, 19, 7, 2, - 127, 66, - }, - { 544, 201, 80, 102, 45, 11, 6, 99, 1, - 1236, 1587, 610, 720, - 833, 590, 444, 348, 506, 588, 487, 226, 39, 2, - 253, 178, 27, 10, 20, 7, 2, - 118, 65, - }, - { 649, 241, 98, 121, 54, 14, 8, 84, 1, - 1349, 1719, 634, 763, - 847, 583, 428, 323, 456, 492, 349, 120, 13, 1, - 231, 170, 24, 8, 19, 7, 1, - 109, 67, - }, - { 824, 304, 129, 158, 66, 19, 10, 44, 2, - 1476, 1925, 644, 794, - 838, 559, 396, 289, 392, 384, 223, 53, 3, 1, - 159, 121, 17, 6, 16, 6, 2, - 69, 53, - }, - - /* DC Inter Bias */ - { 534, 174, 71, 68, 10, 1, 1, 68, 119, - 1674, 1526, 560, 536, - 539, 331, 229, 168, 233, 262, 231, 149, 71, 51, - 629, 530, 284, 126, 182, 208, 184, - 148, 87, - }, - { 594, 195, 77, 71, 9, 1, 1, 47, 89, - 1723, 1592, 595, 570, - 574, 351, 241, 176, 243, 271, 234, 144, 65, 37, - 534, 449, 240, 117, 167, 277, 153, - 96, 54, - }, - { 642, 213, 88, 83, 12, 1, 1, 40, 80, - 1751, 1630, 621, 600, - 598, 367, 250, 183, 251, 276, 235, 143, 62, 28, - 485, 397, 212, 110, 161, 193, 141, - 84, 48, - }, - { 693, 258, 114, 131, 27, 3, 1, 44, 79, - 1794, 1644, 550, 533, - 518, 314, 213, 154, 209, 223, 174, 97, 40, 14, - 584, 463, 236, 138, 196, 249, 143, - 94, 54, - }, - { 758, 303, 144, 189, 53, 8, 1, 37, 69, - 1842, 1732, 513, 504, - 478, 287, 191, 137, 182, 186, 137, 72, 31, 6, - 589, 469, 199, 128, 177, 264, 161, - 89, 49, - }, - { 817, 344, 170, 243, 84, 18, 2, 30, 65, - 1836, 1733, 518, 511, - 477, 281, 185, 130, 169, 166, 117, 59, 25, 3, - 572, 450, 185, 121, 173, 232, 146, - 80, 43, - }, - { 865, 389, 204, 322, 139, 42, 9, 26, 51, - 1848, 1766, 531, 522, - 477, 275, 177, 122, 153, 144, 97, 50, 16, 1, - 485, 378, 167, 115, 164, 203, 128, - 74, 42, - }, - { 961, 447, 243, 407, 196, 74, 26, 12, 34, - 2003, 1942, 571, 565, - 494, 278, 173, 116, 141, 129, 85, 44, 8, 1, - 285, 223, 101, 66, 104, 120, 74, - 35, 22, - }, - - /* AC INTRA Tables */ - /* AC Intra bias group 1 tables */ - { 245, 68, 25, 28, 5, 1, 1, 359, 4, - 910, 904, 570, 571, - 766, 620, 478, 375, 554, 684, 652, 441, 182, 30, - 535, 206, 118, 77, 69, 90, 16, - 299, 100, - }, - { 302, 86, 32, 36, 8, 1, 1, 362, 3, - 974, 968, 599, 599, - 774, 635, 469, 365, 528, 628, 557, 337, 118, 14, - 577, 219, 136, 82, 69, 65, 13, - 317, 112, - }, - { 348, 102, 39, 44, 9, 2, 1, 363, 3, - 1062, 1055, 607, 609, - 787, 626, 457, 348, 494, 550, 452, 233, 60, 2, - 636, 244, 159, 92, 74, 68, 12, - 327, 119, - }, - { 400, 121, 47, 51, 11, 2, 1, 366, 3, - 1109, 1102, 620, 622, - 786, 624, 450, 331, 459, 490, 366, 163, 29, 1, - 673, 257, 175, 98, 77, 63, 14, - 344, 131, - }, - { 470, 151, 59, 67, 15, 3, 1, 354, 4, - 1198, 1189, 640, 643, - 769, 603, 410, 294, 386, 381, 240, 78, 5, 1, - 746, 282, 205, 113, 87, 64, 15, - 368, 145, - }, - { 553, 189, 77, 94, 24, 6, 1, 347, 4, - 1244, 1232, 650, 653, - 739, 551, 360, 249, 303, 261, 129, 24, 1, 1, - 828, 313, 245, 135, 108, 77, 17, - 403, 169, - }, - { 701, 253, 109, 140, 42, 12, 2, 350, 6, - 1210, 1197, 652, 647, - 673, 495, 299, 189, 211, 151, 50, 2, 1, 1, - 892, 336, 284, 162, 134, 101, 25, - 455, 205, - }, - { 924, 390, 180, 248, 85, 31, 13, 286, 14, - 1242, 1206, 601, 577, - 519, 342, 175, 100, 85, 36, 1, 1, 1, 1, - 1031, 348, 346, 204, 166, 131, 34, - 473, 197, - }, - /* AC Inter bias group 1 tables */ - { 459, 128, 50, 48, 8, 1, 1, 224, 69, - 1285, 1227, 587, 565, - 573, 406, 261, 180, 228, 213, 130, 47, 11, 3, - 1069, 540, 309, 231, 147, 279, 157, - 383, 165, - }, - { 524, 155, 62, 64, 14, 2, 1, 209, 63, - 1345, 1288, 523, 507, - 515, 358, 225, 153, 183, 160, 87, 29, 7, 2, - 1151, 591, 365, 282, 179, 308, 133, - 344, 157, - }, - { 588, 181, 75, 81, 19, 3, 1, 204, 68, - 1344, 1288, 517, 503, - 505, 346, 216, 141, 169, 139, 71, 21, 5, 1, - 1146, 584, 366, 286, 170, 298, 153, - 342, 157, - }, - { 634, 196, 82, 89, 22, 4, 1, 194, 60, - 1356, 1312, 515, 502, - 489, 331, 199, 127, 145, 111, 51, 14, 3, 1, - 1156, 589, 393, 300, 182, 285, 144, - 340, 159, - }, - { 715, 231, 98, 113, 31, 7, 1, 181, 57, - 1345, 1303, 498, 490, - 448, 291, 166, 101, 106, 75, 30, 9, 1, 1, - 1175, 584, 416, 321, 209, 333, 164, - 330, 159, - }, - { 825, 283, 125, 149, 44, 11, 2, 160, 59, - 1343, 1308, 476, 469, - 405, 247, 131, 75, 76, 47, 18, 5, 1, 1, - 1192, 579, 432, 332, 217, 327, 176, - 320, 154, - }, - { 961, 361, 170, 215, 70, 20, 5, 161, 55, - 1250, 1218, 463, 460, - 354, 204, 101, 52, 48, 28, 11, 1, 1, 1, - 1172, 570, 449, 350, 222, 332, 169, - 338, 174, - }, - { 1139, 506, 266, 387, 156, 57, 26, 114, 48, - 1192, 1170, 366, 366, - 226, 113, 47, 22, 22, 12, 1, 1, 1, 1, - 1222, 551, 462, 391, 220, 322, 156, - 290, 136, - }, - - /* AC Intra bias group 2 tables */ - { 245, 49, 15, 11, 1, 1, 1, 332, 38, - 1163, 1162, 685, 683, - 813, 623, 437, 318, 421, 424, 288, 109, 14, 1, - 729, 303, 179, 112, 87, 199, 46, - 364, 135, - }, - { 305, 67, 22, 17, 2, 1, 1, 329, 39, - 1250, 1245, 706, 705, - 801, 584, 385, 267, 330, 296, 165, 40, 3, 1, - 798, 340, 206, 131, 108, 258, 52, - 382, 154, - }, - { 356, 82, 28, 23, 3, 1, 1, 312, 42, - 1340, 1334, 701, 703, - 770, 545, 346, 227, 269, 223, 100, 17, 1, 1, - 846, 359, 222, 142, 120, 284, 55, - 379, 157, - }, - { 402, 95, 33, 30, 4, 1, 1, 300, 43, - 1379, 1371, 710, 714, - 724, 486, 289, 182, 202, 144, 47, 5, 1, 1, - 908, 394, 250, 161, 141, 350, 60, - 391, 171, - }, - { 499, 122, 44, 42, 7, 1, 1, 267, 45, - 1439, 1436, 690, 694, - 628, 385, 213, 122, 117, 62, 14, 1, 1, 1, - 992, 441, 288, 187, 167, 446, 82, - 378, 176, - }, - { 641, 168, 62, 60, 12, 1, 1, 247, 49, - 1435, 1436, 662, 669, - 527, 298, 142, 71, 55, 22, 3, 1, 1, 1, - 1036, 470, 319, 208, 193, 548, 106, - 362, 184, - }, - { 860, 274, 111, 113, 23, 4, 1, 229, 59, - 1331, 1323, 629, 645, - 419, 192, 72, 30, 19, 6, 1, 1, 1, 1, - 1022, 478, 339, 225, 213, 690, 142, - 342, 198, - }, - { 1059, 437, 218, 285, 84, 17, 2, 152, 44, - 1284, 1313, 530, 561, - 212, 66, 17, 6, 3, 1, 1, 1, 1, 1, - 1034, 485, 346, 226, 207, 819, 185, - 248, 145, - }, - /* AC Inter bias group 2 tables */ - { 407, 93, 31, 24, 2, 1, 1, 232, 108, - 1365, 1349, 581, 578, - 498, 305, 170, 100, 103, 67, 24, 5, 1, 1, - 1175, 604, 393, 268, 209, 506, 217, - 379, 193, - }, - { 521, 129, 46, 39, 4, 1, 1, 199, 116, - 1419, 1403, 543, 540, - 446, 263, 138, 78, 75, 44, 13, 2, 1, 1, - 1201, 605, 392, 267, 214, 533, 252, - 334, 167, - }, - { 575, 144, 52, 46, 6, 1, 1, 193, 124, - 1394, 1384, 528, 528, - 406, 227, 112, 59, 54, 28, 7, 1, 1, 1, - 1210, 621, 412, 284, 235, 604, 265, - 320, 167, - }, - { 673, 174, 64, 59, 9, 1, 1, 177, 128, - 1392, 1385, 499, 499, - 352, 183, 85, 42, 35, 16, 3, 1, 1, 1, - 1210, 626, 418, 289, 246, 675, 297, - 292, 158, - }, - { 804, 225, 85, 77, 12, 1, 1, 150, 129, - 1387, 1384, 455, 455, - 277, 129, 53, 23, 17, 7, 1, 1, 1, 1, - 1212, 635, 433, 306, 268, 760, 313, - 249, 137, - }, - { 975, 305, 123, 117, 20, 2, 1, 135, 140, - 1312, 1310, 401, 399, - 201, 80, 28, 11, 8, 2, 1, 1, 1, 1, - 1162, 623, 439, 314, 283, 906, 368, - 203, 121, - }, - { 1205, 452, 208, 231, 50, 6, 1, 123, 149, - 1161, 1164, 370, 370, - 137, 45, 14, 4, 2, 1, 1, 1, 1, 1, - 1047, 562, 413, 300, 277, 1020, 404, - 168, 105, - }, - { 1297, 662, 389, 574, 200, 39, 4, 55, 120, - 1069, 1076, 273, 265, - 66, 14, 2, 1, 1, 1, 1, 1, 1, 1, - 930, 475, 345, 249, 236, 1124, 376, - 91, 56, - }, - - /* AC Intra bias group 3 tables */ - { 278, 55, 17, 12, 1, 1, 1, 288, 71, - 1315, 1304, 725, 724, - 733, 506, 307, 195, 225, 175, 77, 12, 1, 1, - 904, 414, 246, 170, 126, 290, 205, - 423, 185, - }, - { 382, 80, 26, 21, 2, 1, 1, 239, 64, - 1442, 1429, 706, 701, - 664, 420, 239, 146, 152, 105, 34, 2, 1, 1, - 975, 440, 263, 185, 140, 332, 229, - 397, 169, - }, - { 451, 97, 32, 27, 4, 1, 1, 223, 75, - 1462, 1454, 682, 680, - 574, 343, 179, 101, 98, 54, 9, 1, 1, 1, - 1031, 482, 293, 210, 163, 400, 297, - 384, 181, - }, - { 551, 128, 43, 37, 5, 1, 1, 201, 78, - 1497, 1487, 642, 651, - 493, 269, 133, 70, 60, 24, 2, 1, 1, 1, - 1065, 504, 312, 228, 178, 451, 352, - 351, 174, - }, - { 693, 179, 63, 54, 8, 1, 1, 169, 78, - 1502, 1497, 580, 591, - 375, 186, 77, 35, 21, 4, 1, 1, 1, 1, - 1099, 533, 341, 253, 206, 542, 432, - 306, 164, - }, - { 867, 263, 105, 96, 16, 2, 1, 152, 81, - 1435, 1439, 521, 525, - 270, 107, 32, 8, 3, 1, 1, 1, 1, 1, - 1085, 537, 361, 277, 223, 616, 549, - 258, 156, - }, - { 1022, 385, 182, 207, 46, 7, 1, 158, 88, - 1290, 1318, 501, 502, - 184, 38, 6, 1, 1, 1, 1, 1, 1, 1, - 1023, 480, 345, 301, 232, 665, 661, - 210, 133, - }, - { 1184, 555, 307, 457, 185, 44, 6, 115, 41, - 1236, 1253, 329, 340, - 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1017, 385, 316, 370, 246, 672, 788, - 85, 23, - }, - /* AC Inter bias group 3 tables */ - { 502, 106, 33, 22, 1, 1, 1, 151, 132, - 1446, 1451, 502, 499, - 343, 181, 84, 42, 36, 16, 3, 1, 1, 1, - 1211, 661, 429, 312, 242, 637, 498, - 288, 156, - }, - { 651, 147, 48, 35, 3, 1, 1, 145, 140, - 1419, 1420, 469, 466, - 281, 132, 56, 25, 18, 6, 1, 1, 1, 1, - 1175, 656, 435, 328, 260, 715, 556, - 252, 147, - }, - { 749, 179, 59, 43, 4, 1, 1, 123, 135, - 1423, 1431, 413, 409, - 221, 95, 36, 15, 9, 2, 1, 1, 1, 1, - 1159, 658, 444, 340, 272, 782, 656, - 205, 124, - }, - { 902, 243, 86, 67, 7, 1, 1, 114, 141, - 1385, 1385, 387, 383, - 178, 67, 22, 7, 4, 1, 1, 1, 1, 1, - 1096, 632, 434, 339, 277, 813, 735, - 171, 109, - }, - { 1081, 337, 133, 112, 15, 1, 1, 92, 137, - 1350, 1349, 311, 309, - 115, 34, 8, 2, 1, 1, 1, 1, 1, 1, - 1016, 595, 418, 342, 283, 870, 883, - 114, 78, - }, - { 1253, 467, 210, 205, 34, 3, 1, 80, 130, - 1318, 1313, 258, 260, - 68, 12, 2, 1, 1, 1, 1, 1, 1, 1, - 874, 516, 378, 330, 273, 877, 1000, - 72, 53, - }, - { 1362, 626, 333, 423, 100, 10, 1, 73, 106, - 1311, 1313, 241, 231, - 31, 3, 1, 1, 1, 1, 1, 1, 1, 1, - 620, 368, 286, 302, 245, 814, 1127, - 34, 28, - }, - { 1203, 743, 460, 774, 284, 36, 1, 13, 25, - 1956, 1961, 103, 106, - 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 248, 131, 149, 272, 165, 535, 813, - 3, 3, - }, - - /* AC Intra bias group 4 tables */ - { 599, 150, 55, 50, 9, 1, 1, 181, 19, - 1487, 1487, 625, 625, - 473, 271, 138, 74, 71, 42, 11, 1, 1, 1, - 1187, 591, 356, 239, 170, 351, 137, - 395, 194, - }, - { 758, 209, 79, 74, 15, 2, 1, 147, 25, - 1514, 1514, 521, 520, - 334, 165, 74, 36, 30, 11, 1, 1, 1, 1, - 1252, 644, 409, 279, 211, 472, 203, - 318, 171, - }, - { 852, 252, 100, 98, 20, 3, 1, 130, 26, - 1493, 1498, 481, 473, - 268, 123, 51, 23, 15, 3, 1, 1, 1, 1, - 1256, 652, 426, 294, 231, 543, 242, - 278, 156, - }, - { 971, 309, 130, 136, 30, 5, 1, 113, 28, - 1458, 1467, 443, 435, - 215, 90, 31, 12, 5, 1, 1, 1, 1, 1, - 1232, 643, 426, 303, 243, 590, 300, - 235, 136, - }, - { 1100, 399, 180, 206, 53, 9, 1, 101, 29, - 1419, 1425, 375, 374, - 158, 47, 10, 1, 1, 1, 1, 1, 1, 1, - 1193, 609, 426, 319, 256, 643, 383, - 166, 103, - }, - { 1195, 505, 249, 326, 98, 20, 3, 102, 25, - 1370, 1356, 355, 347, - 104, 11, 1, 1, 1, 1, 1, 1, 1, 1, - 1100, 568, 381, 330, 261, 642, 466, - 105, 69, - }, - { 1176, 608, 345, 559, 244, 57, 6, 110, 9, - 1370, 1332, 372, 367, - 29, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 859, 427, 269, 359, 375, 608, 451, - 35, 20, - }, - { 1140, 613, 391, 797, 458, 180, 37, 2, 1, - 2037, 1697, 95, 31, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 360, 49, 23, 198, 1001, 719, 160, - 1, 1, - }, - /* AC Inter bias group 4 tables */ - { 931, 272, 105, 96, 16, 1, 1, 91, 52, - 1481, 1489, 347, 349, - 174, 74, 28, 12, 8, 3, 1, 1, 1, 1, - 1247, 719, 490, 356, 279, 706, 363, - 187, 110, - }, - { 1095, 358, 148, 143, 25, 3, 1, 74, 61, - 1439, 1457, 304, 302, - 127, 46, 15, 5, 3, 1, 1, 1, 1, 1, - 1138, 664, 469, 347, 282, 768, 487, - 139, 87, - }, - { 1192, 423, 188, 189, 36, 4, 1, 64, 61, - 1457, 1475, 284, 282, - 106, 35, 10, 3, 1, 1, 1, 1, 1, 1, - 1078, 624, 440, 329, 264, 744, 507, - 117, 73, - }, - { 1275, 496, 231, 258, 52, 6, 1, 53, 55, - 1458, 1470, 248, 245, - 77, 20, 5, 1, 1, 1, 1, 1, 1, 1, - 984, 576, 414, 323, 260, 771, 569, - 84, 54, - }, - { 1377, 603, 302, 367, 87, 11, 1, 37, 52, - 1522, 1532, 207, 204, - 47, 8, 1, 1, 1, 1, 1, 1, 1, 1, - 840, 493, 366, 291, 231, 690, 636, - 52, 32, - }, - { 1409, 708, 385, 529, 148, 24, 1, 23, 37, - 1672, 1670, 163, 162, - 22, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 647, 364, 291, 262, 210, 574, 643, - 26, 14, - }, - { 1348, 778, 481, 755, 245, 53, 4, 13, 19, - 2114, 2089, 141, 139, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 302, 183, 162, 181, 182, 344, 437, - 8, 3, - }, - { 1560, 769, 410, 664, 243, 58, 1, 1, 1, - 3017, 2788, 17, 24, - 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 34, 16, 8, 55, 134, 105, 86, - 1, 1, - }, -}; - -#else /* Frequency tables for encoder version < 2 */ - -const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = { - /* DC Intra bias */ - { 198, 62, 22, 31, 14, 6, 6, 205, 3, - 843, 843, 415, 516, - 660, 509, 412, 347, 560, 779, 941, 930, 661, 377, - 170, 155, 39, 2, 9, 15, 11, - 128, 86, - }, - { 299, 92, 34, 39, 15, 6, 6, 132, 1, - 851, 851, 484, 485, - 666, 514, 416, 351, 567, 788, 953, 943, 670, 383, - 117, 119, 26, 4, 17, 7, 1, - 93, 56, - }, - { 367, 115, 42, 47, 16, 6, 6, 105, 1, - 896, 896, 492, 493, - 667, 510, 408, 342, 547, 760, 932, 927, 656, 379, - 114, 103, 10, 3, 6, 2, 1, - 88, 49, - }, - { 462, 158, 63, 76, 28, 9, 8, 145, 1, - 1140, 1140, 573, 574, - 754, 562, 435, 357, 555, 742, 793, 588, 274, 81, - 154, 117, 13, 6, 12, 2, 1, - 104, 62, - }, - { 558, 196, 81, 99, 36, 11, 9, 135, 1, - 1300, 1301, 606, 607, - 779, 560, 429, 349, 536, 680, 644, 405, 153, 30, - 171, 120, 12, 5, 14, 3, 1, - 104, 53, - }, - { 635, 233, 100, 122, 46, 14, 12, 113, 1, - 1414, 1415, 631, 631, - 785, 555, 432, 335, 513, 611, 521, 284, 89, 13, - 170, 113, 10, 5, 14, 3, 1, - 102, 62, - }, - { 720, 276, 119, 154, 62, 20, 16, 101, 1, - 1583, 1583, 661, 661, - 794, 556, 407, 318, 447, 472, 343, 153, 35, 1, - 172, 115, 11, 7, 14, 3, 1, - 112, 70, - }, - { 853, 326, 144, 184, 80, 27, 19, 52, 1, - 1739, 1740, 684, 685, - 800, 540, 381, 277, 364, 352, 218, 78, 13, 1, - 139, 109, 9, 6, 20, 2, 1, - 94, 50, - }, - - /* DC Inter Bias */ - { 490, 154, 57, 53, 10, 2, 1, 238, 160, - 1391, 1390, 579, 578, - 491, 273, 172, 118, 152, 156, 127, 79, 41, 39, - 712, 547, 316, 125, 183, 306, 237, - 451, 358, - }, - { 566, 184, 70, 65, 11, 2, 1, 235, 51, - 1414, 1414, 599, 598, - 510, 285, 180, 124, 157, 161, 131, 82, 42, 40, - 738, 551, 322, 138, 195, 188, 93, - 473, 365, - }, - { 711, 261, 111, 126, 27, 4, 1, 137, 52, - 1506, 1505, 645, 645, - 567, 316, 199, 136, 172, 175, 142, 88, 45, 48, - 548, 449, 255, 145, 184, 174, 121, - 260, 227, - }, - { 823, 319, 144, 175, 43, 7, 1, 53, 42, - 1648, 1648, 653, 652, - 583, 329, 205, 139, 175, 176, 139, 84, 44, 34, - 467, 389, 211, 137, 181, 186, 107, - 106, 85, - }, - { 948, 411, 201, 276, 85, 16, 2, 39, 33, - 1778, 1777, 584, 583, - 489, 265, 162, 111, 140, 140, 108, 64, 38, 23, - 428, 356, 201, 139, 186, 165, 94, - 78, 63, - }, - { 1002, 470, 248, 386, 153, 39, 6, 23, 23, - 1866, 1866, 573, 573, - 467, 249, 155, 103, 130, 128, 94, 60, 38, 14, - 323, 263, 159, 111, 156, 153, 74, - 46, 34, - }, - { 1020, 518, 291, 504, 242, 78, 18, 14, 14, - 1980, 1979, 527, 526, - 408, 219, 132, 87, 110, 104, 79, 55, 31, 7, - 265, 213, 129, 91, 131, 111, 50, - 31, 20, - }, - { 1018, 544, 320, 591, 338, 139, 47, 5, 2, - 2123, 2123, 548, 547, - 414, 212, 126, 83, 101, 96, 79, 60, 23, 1, - 120, 97, 55, 39, 60, 38, 15, - 11, 8, - }, - - /* AC INTRA Tables */ - /* AC Intra bias group 1 tables */ - { 242, 62, 22, 20, 4, 1, 1, 438, 1, - 593, 593, 489, 490, - 657, 580, 471, 374, 599, 783, 869, 770, 491, 279, - 358, 144, 82, 54, 49, 70, 5, - 289, 107, - }, - { 317, 95, 38, 41, 8, 1, 1, 479, 1, - 653, 654, 500, 501, - 682, 611, 473, 376, 582, 762, 806, 656, 358, 155, - 419, 162, 86, 58, 36, 34, 1, - 315, 126, - }, - { 382, 121, 49, 59, 15, 3, 1, 496, 1, - 674, 674, 553, 554, - 755, 636, 487, 391, 576, 718, 701, 488, 221, 72, - 448, 161, 107, 56, 37, 29, 1, - 362, 156, - }, - { 415, 138, 57, 73, 21, 5, 1, 528, 1, - 742, 741, 562, 563, - 753, 669, 492, 388, 563, 664, 589, 340, 129, 26, - 496, 184, 139, 71, 48, 33, 2, - 387, 166, - }, - { 496, 170, 73, 94, 31, 8, 2, 513, 1, - 855, 855, 604, 604, - 769, 662, 477, 356, 486, 526, 381, 183, 51, 5, - 590, 214, 160, 85, 60, 39, 3, - 427, 203, - }, - { 589, 207, 89, 116, 40, 13, 3, 491, 1, - 919, 919, 631, 631, - 769, 633, 432, 308, 408, 378, 247, 94, 17, 1, - 659, 247, 201, 105, 73, 51, 3, - 466, 242, - }, - { 727, 266, 115, 151, 49, 17, 6, 439, 1, - 977, 977, 642, 642, - 718, 572, 379, 243, 285, 251, 133, 40, 1, 1, - 756, 287, 253, 126, 94, 66, 4, - 492, 280, - }, - { 940, 392, 180, 247, 82, 30, 14, 343, 1, - 1064, 1064, 615, 616, - 596, 414, 235, 146, 149, 108, 41, 1, 1, 1, - 882, 314, 346, 172, 125, 83, 6, - 489, 291, - }, - /* AC Inter bias group 1 tables */ - { 440, 102, 33, 23, 2, 1, 1, 465, 85, - 852, 852, 744, 743, - 701, 496, 297, 193, 225, 200, 129, 58, 18, 2, - 798, 450, 269, 202, 145, 308, 154, - 646, 389, - }, - { 592, 151, 53, 43, 6, 1, 1, 409, 34, - 875, 875, 748, 747, - 723, 510, 305, 196, 229, 201, 130, 59, 18, 2, - 800, 436, 253, 185, 115, 194, 88, - 642, 368, - }, - { 759, 222, 86, 85, 17, 2, 1, 376, 46, - 888, 888, 689, 688, - 578, 408, 228, 143, 165, 141, 84, 35, 7, 1, - 878, 488, 321, 244, 147, 266, 124, - 612, 367, - }, - { 912, 298, 122, 133, 34, 7, 1, 261, 44, - 1092, 1091, 496, 496, - 409, 269, 150, 95, 106, 87, 49, 16, 1, 1, - 1102, 602, 428, 335, 193, 323, 157, - 423, 253, - }, - { 1072, 400, 180, 210, 60, 16, 3, 210, 40, - 1063, 1063, 451, 451, - 345, 221, 121, 73, 79, 64, 31, 6, 1, 1, - 1105, 608, 462, 358, 202, 330, 155, - 377, 228, - }, - { 1164, 503, 254, 330, 109, 34, 9, 167, 35, - 1038, 1037, 390, 390, - 278, 170, 89, 54, 56, 40, 13, 1, 1, 1, - 1110, 607, 492, 401, 218, 343, 141, - 323, 192, - }, - { 1173, 583, 321, 486, 196, 68, 23, 124, 23, - 1037, 1037, 347, 346, - 232, 139, 69, 40, 37, 20, 2, 1, 1, 1, - 1128, 584, 506, 410, 199, 301, 113, - 283, 159, - }, - { 1023, 591, 366, 699, 441, 228, 113, 79, 5, - 1056, 1056, 291, 291, - 173, 96, 38, 19, 8, 1, 1, 1, 1, 1, - 1187, 527, 498, 409, 147, 210, 56, - 263, 117, - }, - - /* AC Intra bias group 2 tables */ - { 311, 74, 27, 27, 5, 1, 1, 470, 24, - 665, 667, 637, 638, - 806, 687, 524, 402, 585, 679, 609, 364, 127, 20, - 448, 210, 131, 76, 52, 111, 19, - 393, 195, - }, - { 416, 104, 39, 38, 8, 1, 1, 545, 33, - 730, 731, 692, 692, - 866, 705, 501, 365, 495, 512, 387, 168, 39, 2, - 517, 240, 154, 86, 64, 127, 19, - 461, 247, - }, - { 474, 117, 43, 42, 9, 1, 1, 560, 40, - 783, 783, 759, 760, - 883, 698, 466, 318, 404, 377, 215, 66, 7, 1, - 559, 259, 176, 110, 87, 170, 22, - 520, 278, - }, - { 582, 149, 53, 53, 12, 2, 1, 473, 39, - 992, 993, 712, 713, - 792, 593, 373, 257, 299, 237, 114, 25, 1, 1, - 710, 329, 221, 143, 116, 226, 26, - 490, 259, - }, - { 744, 210, 78, 77, 16, 2, 1, 417, 37, - 1034, 1035, 728, 728, - 718, 509, 296, 175, 184, 122, 42, 3, 1, 1, - 791, 363, 255, 168, 145, 311, 35, - 492, 272, - }, - { 913, 291, 121, 128, 28, 4, 1, 334, 40, - 1083, 1084, 711, 712, - 624, 378, 191, 107, 95, 50, 7, 1, 1, 1, - 876, 414, 288, 180, 164, 382, 39, - 469, 275, - }, - { 1065, 405, 184, 216, 53, 8, 1, 236, 36, - 1134, 1134, 685, 686, - 465, 253, 113, 48, 41, 9, 1, 1, 1, 1, - 965, 451, 309, 179, 166, 429, 53, - 414, 249, - }, - { 1148, 548, 301, 438, 160, 42, 6, 84, 17, - 1222, 1223, 574, 575, - 272, 111, 23, 6, 2, 1, 1, 1, 1, 1, - 1060, 502, 328, 159, 144, 501, 54, - 302, 183, - }, - /* AC Inter bias group 2 tables */ - { 403, 80, 24, 17, 1, 1, 1, 480, 90, - 899, 899, 820, 819, - 667, 413, 228, 133, 139, 98, 42, 10, 1, 1, - 865, 470, 316, 222, 171, 419, 213, - 645, 400, - }, - { 698, 169, 59, 49, 6, 1, 1, 414, 101, - 894, 893, 761, 761, - 561, 338, 171, 96, 97, 64, 26, 6, 1, 1, - 896, 494, 343, 239, 192, 493, 215, - 583, 366, - }, - { 914, 255, 94, 80, 10, 1, 1, 345, 128, - 935, 935, 670, 671, - 415, 222, 105, 55, 51, 30, 10, 1, 1, 1, - 954, 530, 377, 274, 232, 641, 295, - 456, 298, - }, - { 1103, 359, 146, 135, 20, 1, 1, 235, 119, - 1042, 1042, 508, 507, - 293, 146, 65, 33, 30, 16, 4, 1, 1, 1, - 1031, 561, 407, 296, 265, 813, 317, - 301, 192, - }, - { 1255, 504, 238, 265, 51, 5, 1, 185, 113, - 1013, 1013, 437, 438, - 212, 92, 41, 18, 15, 6, 1, 1, 1, 1, - 976, 530, 386, 276, 260, 927, 357, - 224, 148, - }, - { 1292, 610, 332, 460, 127, 16, 1, 136, 99, - 1014, 1015, 384, 384, - 153, 65, 25, 11, 6, 1, 1, 1, 1, 1, - 942, 487, 343, 241, 238, 970, 358, - 174, 103, - }, - { 1219, 655, 407, 700, 280, 55, 2, 100, 60, - 1029, 1029, 337, 336, - 119, 43, 11, 3, 2, 1, 1, 1, 1, 1, - 894, 448, 305, 199, 213, 1005, 320, - 136, 77, - }, - { 1099, 675, 435, 971, 581, 168, 12, 37, 16, - 1181, 1081, 319, 318, - 66, 11, 6, 1, 1, 1, 1, 1, 1, 1, - 914, 370, 235, 138, 145, 949, 128, - 94, 41, - }, - - /* AC Intra bias group 3 tables */ - { 486, 112, 39, 34, 6, 1, 1, 541, 67, - 819, 818, 762, 763, - 813, 643, 403, 280, 332, 295, 164, 53, 6, 1, - 632, 294, 180, 131, 105, 208, 109, - 594, 295, - }, - { 723, 191, 69, 65, 12, 1, 1, 445, 79, - 865, 865, 816, 816, - 750, 515, 290, 172, 184, 122, 46, 5, 1, 1, - 740, 340, 213, 165, 129, 270, 168, - 603, 326, - }, - { 884, 264, 102, 103, 21, 3, 1, 382, 68, - 897, 897, 836, 836, - 684, 427, 227, 119, 119, 70, 16, 1, 1, 1, - 771, 367, 234, 184, 143, 272, 178, - 555, 326, - }, - { 1028, 347, 153, 161, 36, 8, 1, 251, 44, - 1083, 1084, 735, 735, - 541, 289, 144, 77, 57, 23, 3, 1, 1, 1, - 926, 422, 270, 215, 176, 301, 183, - 443, 248, - }, - { 1155, 465, 224, 264, 71, 14, 3, 174, 27, - 1110, 1111, 730, 731, - 429, 206, 79, 30, 19, 4, 1, 1, 1, 1, - 929, 443, 279, 225, 194, 298, 196, - 354, 223, - }, - { 1191, 576, 296, 415, 144, 36, 8, 114, 16, - 1162, 1162, 749, 749, - 338, 108, 29, 8, 5, 1, 1, 1, 1, 1, - 947, 458, 273, 207, 194, 248, 145, - 258, 152, - }, - { 1169, 619, 366, 603, 247, 92, 23, 46, 1, - 1236, 1236, 774, 775, - 191, 35, 14, 1, 1, 1, 1, 1, 1, 1, - 913, 449, 260, 214, 194, 180, 82, - 174, 98, - }, - { 1006, 537, 381, 897, 504, 266, 101, 39, 1, - 1307, 1307, 668, 667, - 116, 3, 1, 1, 1, 1, 1, 1, 1, 1, - 1175, 261, 295, 70, 164, 107, 31, - 10, 76, - }, - /* AC Inter bias group 3 tables */ - { 652, 156, 53, 43, 5, 1, 1, 368, 128, - 983, 984, 825, 825, - 583, 331, 163, 88, 84, 48, 15, 1, 1, 1, - 870, 480, 316, 228, 179, 421, 244, - 562, 349, - }, - { 988, 280, 104, 87, 12, 1, 1, 282, 194, - 980, 981, 738, 739, - 395, 189, 80, 37, 31, 12, 2, 1, 1, 1, - 862, 489, 333, 262, 214, 600, 446, - 390, 260, - }, - { 1176, 399, 165, 154, 24, 2, 1, 218, 224, - 1017, 1018, 651, 651, - 280, 111, 42, 16, 9, 3, 1, 1, 1, 1, - 787, 469, 324, 269, 229, 686, 603, - 267, 194, - }, - { 1319, 530, 255, 268, 47, 4, 1, 113, 183, - 1149, 1150, 461, 461, - 173, 58, 17, 5, 3, 1, 1, 1, 1, 1, - 768, 450, 305, 261, 221, 716, 835, - 136, 97, - }, - { 1362, 669, 355, 465, 104, 9, 1, 76, 153, - 1253, 1253, 398, 397, - 102, 21, 5, 1, 1, 1, 1, 1, 1, 1, - 596, 371, 238, 228, 196, 660, 954, - 68, 53, - }, - { 1354, 741, 446, 702, 174, 15, 1, 38, 87, - 1498, 1498, 294, 294, - 43, 7, 1, 1, 1, 1, 1, 1, 1, 1, - 381, 283, 165, 181, 155, 544, 1039, - 25, 21, - }, - { 1262, 885, 546, 947, 263, 18, 1, 18, 27, - 1908, 1908, 163, 162, - 14, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 195, 152, 83, 125, 109, 361, 827, - 7, 5, - }, - { 2539, 951, 369, 554, 212, 18, 1, 1, 1, - 2290, 2289, 64, 64, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 18, 18, 9, 55, 36, 184, 323, - 1, 1, - }, - - - /* AC Intra bias group 4 tables */ - { 921, 264, 101, 100, 19, 2, 1, 331, 98, - 1015, 1016, 799, 799, - 512, 269, 119, 60, 50, 17, 1, 1, 1, 1, - 841, 442, 307, 222, 182, 493, 256, - 438, 310, - }, - { 1147, 412, 184, 206, 50, 6, 1, 242, 141, - 977, 976, 808, 807, - 377, 135, 40, 10, 7, 1, 1, 1, 1, 1, - 788, 402, 308, 223, 205, 584, 406, - 316, 227, - }, - { 1243, 504, 238, 310, 79, 11, 1, 184, 150, - 983, 984, 814, 813, - 285, 56, 10, 1, 1, 1, 1, 1, 1, 1, - 713, 377, 287, 217, 180, 615, 558, - 208, 164, - }, - { 1266, 606, 329, 484, 161, 27, 1, 79, 92, - 1187, 1188, 589, 588, - 103, 10, 1, 1, 1, 1, 1, 1, 1, 1, - 680, 371, 278, 221, 244, 614, 728, - 80, 62, - }, - { 1126, 828, 435, 705, 443, 90, 8, 10, 55, - 1220, 1219, 350, 350, - 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 602, 330, 222, 168, 158, 612, 919, - 104, 5, - }, - { 1210, 506, 1014, 926, 474, 240, 4, 1, 44, - 1801, 1801, 171, 171, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 900, 132, 36, 11, 47, 191, 316, - 2, 1, - }, - { 1210, 506, 1014, 926, 474, 240, 4, 1, 44, - 1801, 1801, 171, 171, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 900, 132, 36, 11, 47, 191, 316, - 2, 1, - }, - { 1210, 506, 1014, 926, 474, 240, 4, 1, 44, - 1801, 1801, 171, 171, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 900, 132, 36, 11, 47, 191, 316, - 2, 1, - }, - /* AC Inter bias group 4 tables */ - { 1064, 325, 129, 117, 20, 2, 1, 266, 121, - 1000, 1000, 706, 706, - 348, 162, 67, 32, 25, 11, 1, 1, 1, 1, - 876, 513, 363, 274, 225, 627, 384, - 370, 251, - }, - { 1311, 517, 238, 254, 45, 3, 1, 188, 160, - 1070, 1070, 635, 635, - 239, 85, 30, 11, 6, 1, 1, 1, 1, 1, - 744, 420, 313, 239, 206, 649, 541, - 221, 155, - }, - { 1394, 632, 322, 385, 78, 7, 1, 134, 152, - 1163, 1164, 607, 607, - 185, 51, 12, 3, 1, 1, 1, 1, 1, 1, - 631, 331, 275, 203, 182, 604, 620, - 146, 98, - }, - { 1410, 727, 407, 546, 146, 19, 1, 67, 88, - 1485, 1486, 419, 418, - 103, 18, 3, 1, 1, 1, 1, 1, 1, 1, - 555, 261, 234, 164, 148, 522, 654, - 67, 39, - }, - { 1423, 822, 492, 719, 216, 22, 1, 28, 59, - 1793, 1793, 323, 324, - 37, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 376, 138, 158, 102, 119, 400, 604, - 28, 9, - }, - { 1585, 923, 563, 918, 207, 25, 1, 5, 20, - 2229, 2230, 172, 172, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 191, 40, 56, 22, 65, 243, 312, - 2, 1, - }, - { 2225, 1100, 408, 608, 133, 8, 1, 1, 1, - 2658, 2658, 25, 24, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 125, 16, - 1, 1, - }, - { 2539, 951, 369, 554, 212, 18, 1, 1, 1, - 2290, 2289, 64, 64, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 18, 18, 9, 55, 36, 184, 323, - 1, 1, - }, -}; - -#endif /* NEW_FREQS */ diff --git a/Engine/lib/libtheora/lib/enc/mcomp.c b/Engine/lib/libtheora/lib/enc/mcomp.c deleted file mode 100644 index 3b6b4ac28..000000000 --- a/Engine/lib/libtheora/lib/enc/mcomp.c +++ /dev/null @@ -1,767 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mcomp.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" - -/* Initialises motion compentsation. */ -void InitMotionCompensation ( CP_INSTANCE *cpi ){ - int i; - int SearchSite=0; - int Len; - int LineStepY = (ogg_int32_t)cpi->pb.YStride; - - Len=((MAX_MV_EXTENT/2)+1)/2; - - - /* How many search stages are there. */ - cpi->MVSearchSteps = 0; - - /* Set up offsets arrays used in half pixel correction. */ - cpi->HalfPixelRef2Offset[0] = -LineStepY - 1; - cpi->HalfPixelRef2Offset[1] = -LineStepY; - cpi->HalfPixelRef2Offset[2] = -LineStepY + 1; - cpi->HalfPixelRef2Offset[3] = - 1; - cpi->HalfPixelRef2Offset[4] = 0; - cpi->HalfPixelRef2Offset[5] = 1; - cpi->HalfPixelRef2Offset[6] = LineStepY - 1; - cpi->HalfPixelRef2Offset[7] = LineStepY; - cpi->HalfPixelRef2Offset[8] = LineStepY + 1; - - cpi->HalfPixelXOffset[0] = -1; - cpi->HalfPixelXOffset[1] = 0; - cpi->HalfPixelXOffset[2] = 1; - cpi->HalfPixelXOffset[3] = -1; - cpi->HalfPixelXOffset[4] = 0; - cpi->HalfPixelXOffset[5] = 1; - cpi->HalfPixelXOffset[6] = -1; - cpi->HalfPixelXOffset[7] = 0; - cpi->HalfPixelXOffset[8] = 1; - - cpi->HalfPixelYOffset[0] = -1; - cpi->HalfPixelYOffset[1] = -1; - cpi->HalfPixelYOffset[2] = -1; - cpi->HalfPixelYOffset[3] = 0; - cpi->HalfPixelYOffset[4] = 0; - cpi->HalfPixelYOffset[5] = 0; - cpi->HalfPixelYOffset[6] = 1; - cpi->HalfPixelYOffset[7] = 1; - cpi->HalfPixelYOffset[8] = 1; - - - /* Generate offsets for 8 search sites per step. */ - while ( Len>0 ) { - /* Another step. */ - cpi->MVSearchSteps += 1; - - /* Compute offsets for search sites. */ - cpi->MVOffsetX[SearchSite] = -Len; - cpi->MVOffsetY[SearchSite++] = -Len; - cpi->MVOffsetX[SearchSite] = 0; - cpi->MVOffsetY[SearchSite++] = -Len; - cpi->MVOffsetX[SearchSite] = Len; - cpi->MVOffsetY[SearchSite++] = -Len; - cpi->MVOffsetX[SearchSite] = -Len; - cpi->MVOffsetY[SearchSite++] = 0; - cpi->MVOffsetX[SearchSite] = Len; - cpi->MVOffsetY[SearchSite++] = 0; - cpi->MVOffsetX[SearchSite] = -Len; - cpi->MVOffsetY[SearchSite++] = Len; - cpi->MVOffsetX[SearchSite] = 0; - cpi->MVOffsetY[SearchSite++] = Len; - cpi->MVOffsetX[SearchSite] = Len; - cpi->MVOffsetY[SearchSite++] = Len; - - /* Contract. */ - Len /= 2; - } - - /* Compute pixel index offsets. */ - for ( i=SearchSite-1; i>=0; i-- ) - cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i]; -} - -static ogg_uint32_t GetInterErr (CP_INSTANCE *cpi, unsigned char * NewDataPtr, - unsigned char * RefDataPtr1, - unsigned char * RefDataPtr2, - ogg_uint32_t PixelsPerLine ) { - ogg_int32_t DiffVal; - ogg_int32_t RefOffset = (int)(RefDataPtr1 - RefDataPtr2); - ogg_uint32_t RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA; - - /* Mode of interpolation chosen based upon on the offset of the - second reference pointer */ - if ( RefOffset == 0 ) { - DiffVal = dsp_inter8x8_err (cpi->dsp, NewDataPtr, PixelsPerLine, - RefDataPtr1, RefPixelsPerLine); - }else{ - DiffVal = dsp_inter8x8_err_xy2 (cpi->dsp, NewDataPtr, PixelsPerLine, - RefDataPtr1, - RefDataPtr2, RefPixelsPerLine); - } - - /* Compute and return population variance as mis-match metric. */ - return DiffVal; -} - -static ogg_uint32_t GetHalfPixelSumAbsDiffs (CP_INSTANCE *cpi, - unsigned char * SrcData, - unsigned char * RefDataPtr1, - unsigned char * RefDataPtr2, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ErrorSoFar, - ogg_uint32_t BestSoFar ) { - - ogg_uint32_t DiffVal = ErrorSoFar; - ogg_int32_t RefOffset = (int)(RefDataPtr1 - RefDataPtr2); - ogg_uint32_t RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA; - - if ( RefOffset == 0 ) { - /* Simple case as for non 0.5 pixel */ - DiffVal += dsp_sad8x8 (cpi->dsp, SrcData, PixelsPerLine, - RefDataPtr1, RefPixelsPerLine); - } else { - DiffVal += dsp_sad8x8_xy2_thres (cpi->dsp, SrcData, PixelsPerLine, - RefDataPtr1, - RefDataPtr2, RefPixelsPerLine, BestSoFar); - } - - return DiffVal; -} - -ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine ) { - ogg_uint32_t LocalFragIndex = FragIndex; - ogg_uint32_t IntraError = 0; - - dsp_save_fpu (cpi->dsp); - - /* Add together the intra errors for those blocks in the macro block - that are coded (Y only) */ - if ( cpi->pb.display_fragments[LocalFragIndex] ) - IntraError += - dsp_intra8x8_err (cpi->dsp, &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]], - PixelsPerLine); - - LocalFragIndex++; - if ( cpi->pb.display_fragments[LocalFragIndex] ) - IntraError += - dsp_intra8x8_err (cpi->dsp, &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]], - PixelsPerLine); - - LocalFragIndex = FragIndex + cpi->pb.HFragments; - if ( cpi->pb.display_fragments[LocalFragIndex] ) - IntraError += - dsp_intra8x8_err (cpi->dsp, &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]], - PixelsPerLine); - - LocalFragIndex++; - if ( cpi->pb.display_fragments[LocalFragIndex] ) - IntraError += - dsp_intra8x8_err (cpi->dsp, &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]], - PixelsPerLine); - - dsp_restore_fpu (cpi->dsp); - - return IntraError; -} - -ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi, - unsigned char * SrcPtr, - unsigned char * RefPtr, - ogg_uint32_t FragIndex, - ogg_int32_t LastXMV, - ogg_int32_t LastYMV, - ogg_uint32_t PixelsPerLine ) { - ogg_uint32_t RefPixelsPerLine = cpi->pb.YStride; - ogg_uint32_t LocalFragIndex = FragIndex; - ogg_int32_t PixelIndex; - ogg_int32_t RefPixelIndex; - ogg_int32_t RefPixelOffset; - ogg_int32_t RefPtr2Offset; - - ogg_uint32_t InterError = 0; - - unsigned char * SrcPtr1; - unsigned char * RefPtr1; - - dsp_save_fpu (cpi->dsp); - - /* Work out pixel offset into source buffer. */ - PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex]; - - /* Work out the pixel offset in reference buffer for the default - motion vector */ - RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex]; - RefPixelOffset = ((LastYMV/2) * RefPixelsPerLine) + (LastXMV/2); - - /* Work out the second reference pointer offset. */ - RefPtr2Offset = 0; - if ( LastXMV % 2 ) { - if ( LastXMV > 0 ) - RefPtr2Offset += 1; - else - RefPtr2Offset -= 1; - } - if ( LastYMV % 2 ) { - if ( LastYMV > 0 ) - RefPtr2Offset += RefPixelsPerLine; - else - RefPtr2Offset -= RefPixelsPerLine; - } - - /* Add together the errors for those blocks in the macro block that - are coded (Y only) */ - if ( cpi->pb.display_fragments[LocalFragIndex] ) { - SrcPtr1 = &SrcPtr[PixelIndex]; - RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset]; - InterError += GetInterErr(cpi, SrcPtr1, RefPtr1, - &RefPtr1[RefPtr2Offset], PixelsPerLine ); - } - - LocalFragIndex++; - if ( cpi->pb.display_fragments[LocalFragIndex] ) { - PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex]; - RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex]; - SrcPtr1 = &SrcPtr[PixelIndex]; - RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset]; - InterError += GetInterErr(cpi, SrcPtr1, RefPtr1, - &RefPtr1[RefPtr2Offset], PixelsPerLine ); - - } - - LocalFragIndex = FragIndex + cpi->pb.HFragments; - if ( cpi->pb.display_fragments[LocalFragIndex] ) { - PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex]; - RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex]; - SrcPtr1 = &SrcPtr[PixelIndex]; - RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset]; - InterError += GetInterErr(cpi, SrcPtr1, RefPtr1, - &RefPtr1[RefPtr2Offset], PixelsPerLine ); - } - - LocalFragIndex++; - if ( cpi->pb.display_fragments[LocalFragIndex] ) { - PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex]; - RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex]; - SrcPtr1 = &SrcPtr[PixelIndex]; - RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset]; - InterError += GetInterErr(cpi, SrcPtr1, RefPtr1, - &RefPtr1[RefPtr2Offset], PixelsPerLine ); - } - - dsp_restore_fpu (cpi->dsp); - - return InterError; -} - -ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - ogg_int32_t *MVPixelOffset, - MOTION_VECTOR *MV ) { - ogg_uint32_t Error = 0; - ogg_uint32_t MinError; - ogg_uint32_t InterMVError = 0; - - ogg_int32_t i; - ogg_int32_t x=0, y=0; - ogg_int32_t step; - ogg_int32_t SearchSite=0; - - unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL}; - unsigned char *RefPtr=NULL; - unsigned char *CandidateBlockPtr=NULL; - unsigned char *BestBlockPtr=NULL; - - ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8; - - int MBlockDispFrags[4]; - - /* Half pixel variables */ - ogg_int32_t HalfPixelError; - ogg_int32_t BestHalfPixelError; - unsigned char BestHalfOffset; - unsigned char * RefDataPtr1; - unsigned char * RefDataPtr2; - - dsp_save_fpu (cpi->dsp); - - /* Note which of the four blocks in the macro block are to be - included in the search. */ - MBlockDispFrags[0] = - cpi->pb.display_fragments[FragIndex]; - MBlockDispFrags[1] = - cpi->pb.display_fragments[FragIndex + 1]; - MBlockDispFrags[2] = - cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments]; - MBlockDispFrags[3] = - cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1]; - - /* Set up the source pointers for the four source blocks. */ - SrcPtr[0] = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]]; - SrcPtr[1] = SrcPtr[0] + 8; - SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8); - SrcPtr[3] = SrcPtr[2] + 8; - - /* Set starting reference point for search. */ - RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]]; - - /* Check the 0,0 candidate. */ - if ( MBlockDispFrags[0] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, RefPtr, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[1] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, RefPtr + 8, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[2] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, RefPtr + RefRow2Offset, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[3] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, RefPtr + RefRow2Offset + 8, - PixelsPerLine + STRIDE_EXTRA); - } - - /* Set starting values to results of 0, 0 vector. */ - MinError = Error; - BestBlockPtr = RefPtr; - x = 0; - y = 0; - MV->x = 0; - MV->y = 0; - - /* Proceed through N-steps. */ - for ( step=0; stepMVSearchSteps; step++ ) { - /* Search the 8-neighbours at distance pertinent to current step.*/ - for ( i=0; i<8; i++ ) { - /* Set pointer to next candidate matching block. */ - CandidateBlockPtr = RefPtr + MVPixelOffset[SearchSite]; - - /* Reset error */ - Error = 0; - - /* Get the score for the current offset */ - if ( MBlockDispFrags[0] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr, - PixelsPerLine + STRIDE_EXTRA); - } - - if ( MBlockDispFrags[1] && (Error < MinError) ) { - Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8, - PixelsPerLine + STRIDE_EXTRA, MinError); - } - - if ( MBlockDispFrags[2] && (Error < MinError) ) { - Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset, - PixelsPerLine + STRIDE_EXTRA, MinError); - } - - if ( MBlockDispFrags[3] && (Error < MinError) ) { - Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8, - PixelsPerLine + STRIDE_EXTRA, MinError); - } - - if ( Error < MinError ) { - /* Remember best match. */ - MinError = Error; - BestBlockPtr = CandidateBlockPtr; - - /* Where is it. */ - x = MV->x + cpi->MVOffsetX[SearchSite]; - y = MV->y + cpi->MVOffsetY[SearchSite]; - } - - /* Move to next search location. */ - SearchSite += 1; - } - - /* Move to best location this step. */ - RefPtr = BestBlockPtr; - MV->x = x; - MV->y = y; - } - - /* Factor vectors to 1/2 pixel resoultion. */ - MV->x = (MV->x * 2); - MV->y = (MV->y * 2); - - /* Now do the half pixel pass */ - BestHalfOffset = 4; /* Default to the no offset case. */ - BestHalfPixelError = MinError; - - /* Get the half pixel error for each half pixel offset */ - for ( i=0; i < 9; i++ ) { - HalfPixelError = 0; - - if ( MBlockDispFrags[0] ) { - RefDataPtr1 = BestBlockPtr; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + 8; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + RefRow2Offset; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( HalfPixelError < BestHalfPixelError ) { - BestHalfOffset = (unsigned char)i; - BestHalfPixelError = HalfPixelError; - } - } - - /* Half pixel adjust the MV */ - MV->x += cpi->HalfPixelXOffset[BestHalfOffset]; - MV->y += cpi->HalfPixelYOffset[BestHalfOffset]; - - /* Get the error score for the chosen 1/2 pixel offset as a variance. */ - InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr, - FragIndex, MV->x, MV->y, PixelsPerLine ); - - dsp_restore_fpu (cpi->dsp); - - /* Return score of best matching block. */ - return InterMVError; -} - -ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - MOTION_VECTOR *MV ) { - ogg_uint32_t Error = 0; - ogg_uint32_t MinError = HUGE_ERROR; - ogg_uint32_t InterMVError = 0; - - ogg_int32_t i, j; - ogg_int32_t x=0, y=0; - - unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL}; - unsigned char *RefPtr; - unsigned char *CandidateBlockPtr=NULL; - unsigned char *BestBlockPtr=NULL; - - ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8; - - int MBlockDispFrags[4]; - - /* Half pixel variables */ - ogg_int32_t HalfPixelError; - ogg_int32_t BestHalfPixelError; - unsigned char BestHalfOffset; - unsigned char * RefDataPtr1; - unsigned char * RefDataPtr2; - - dsp_save_fpu (cpi->dsp); - - /* Note which of the four blocks in the macro block are to be - included in the search. */ - MBlockDispFrags[0] = cpi-> - pb.display_fragments[FragIndex]; - MBlockDispFrags[1] = cpi-> - pb.display_fragments[FragIndex + 1]; - MBlockDispFrags[2] = cpi-> - pb.display_fragments[FragIndex + cpi->pb.HFragments]; - MBlockDispFrags[3] = cpi-> - pb.display_fragments[FragIndex + cpi->pb.HFragments + 1]; - - /* Set up the source pointers for the four source blocks. */ - SrcPtr[0] = &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]]; - SrcPtr[1] = SrcPtr[0] + 8; - SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8); - SrcPtr[3] = SrcPtr[2] + 8; - - RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]]; - RefPtr = RefPtr - ((MAX_MV_EXTENT/2) * cpi-> - pb.YStride) - (MAX_MV_EXTENT/2); - - /* Search each pixel alligned site */ - for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) { - /* Starting position in row */ - CandidateBlockPtr = RefPtr; - - for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ) { - /* Reset error */ - Error = 0; - - /* Summ errors for each block. */ - if ( MBlockDispFrags[0] ) { - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[1] ){ - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[2] ){ - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset, - PixelsPerLine + STRIDE_EXTRA); - } - if ( MBlockDispFrags[3] ){ - Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8, - PixelsPerLine + STRIDE_EXTRA); - } - - /* Was this the best so far */ - if ( Error < MinError ) { - MinError = Error; - BestBlockPtr = CandidateBlockPtr; - x = 16 + j - MAX_MV_EXTENT; - y = 16 + i - MAX_MV_EXTENT; - } - - /* Move the the next site */ - CandidateBlockPtr ++; - } - - /* Move on to the next row. */ - RefPtr += cpi->pb.YStride; - - } - - /* Factor vectors to 1/2 pixel resoultion. */ - MV->x = (x * 2); - MV->y = (y * 2); - - /* Now do the half pixel pass */ - BestHalfOffset = 4; /* Default to the no offset case. */ - BestHalfPixelError = MinError; - - /* Get the half pixel error for each half pixel offset */ - for ( i=0; i < 9; i++ ) { - HalfPixelError = 0; - - if ( MBlockDispFrags[0] ) { - RefDataPtr1 = BestBlockPtr; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + 8; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + RefRow2Offset; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) { - RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8; - RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2, - PixelsPerLine, HalfPixelError, BestHalfPixelError ); - } - - if ( HalfPixelError < BestHalfPixelError ){ - BestHalfOffset = (unsigned char)i; - BestHalfPixelError = HalfPixelError; - } - } - - /* Half pixel adjust the MV */ - MV->x += cpi->HalfPixelXOffset[BestHalfOffset]; - MV->y += cpi->HalfPixelYOffset[BestHalfOffset]; - - /* Get the error score for the chosen 1/2 pixel offset as a variance. */ - InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr, - FragIndex, MV->x, MV->y, PixelsPerLine ); - - dsp_restore_fpu (cpi->dsp); - - /* Return score of best matching block. */ - return InterMVError; -} - -static ogg_uint32_t GetBMVExhaustiveSearch (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - MOTION_VECTOR *MV ) { - ogg_uint32_t Error = 0; - ogg_uint32_t MinError = HUGE_ERROR; - ogg_uint32_t InterMVError = 0; - - ogg_int32_t i, j; - ogg_int32_t x=0, y=0; - - unsigned char *SrcPtr = NULL; - unsigned char *RefPtr; - unsigned char *CandidateBlockPtr=NULL; - unsigned char *BestBlockPtr=NULL; - - /* Half pixel variables */ - ogg_int32_t HalfPixelError; - ogg_int32_t BestHalfPixelError; - unsigned char BestHalfOffset; - unsigned char * RefDataPtr2; - - /* Set up the source pointer for the block. */ - SrcPtr = &cpi-> - ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]]; - - RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]]; - RefPtr = RefPtr - ((MAX_MV_EXTENT/2) * - cpi->pb.YStride) - (MAX_MV_EXTENT/2); - - /* Search each pixel alligned site */ - for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) { - /* Starting position in row */ - CandidateBlockPtr = RefPtr; - - for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ){ - /* Get the block error score. */ - Error = dsp_sad8x8 (cpi->dsp, SrcPtr, PixelsPerLine, CandidateBlockPtr, - PixelsPerLine + STRIDE_EXTRA); - - /* Was this the best so far */ - if ( Error < MinError ) { - MinError = Error; - BestBlockPtr = CandidateBlockPtr; - x = 16 + j - MAX_MV_EXTENT; - y = 16 + i - MAX_MV_EXTENT; - } - - /* Move the the next site */ - CandidateBlockPtr ++; - } - - /* Move on to the next row. */ - RefPtr += cpi->pb.YStride; - } - - /* Factor vectors to 1/2 pixel resoultion. */ - MV->x = (x * 2); - MV->y = (y * 2); - - /* Now do the half pixel pass */ - BestHalfOffset = 4; /* Default to the no offset case. */ - BestHalfPixelError = MinError; - - /* Get the half pixel error for each half pixel offset */ - for ( i=0; i < 9; i++ ) { - RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[i]; - HalfPixelError = - GetHalfPixelSumAbsDiffs(cpi, SrcPtr, BestBlockPtr, RefDataPtr2, - PixelsPerLine, 0, BestHalfPixelError ); - - if ( HalfPixelError < BestHalfPixelError ){ - BestHalfOffset = (unsigned char)i; - BestHalfPixelError = HalfPixelError; - } - } - - /* Half pixel adjust the MV */ - MV->x += cpi->HalfPixelXOffset[BestHalfOffset]; - MV->y += cpi->HalfPixelYOffset[BestHalfOffset]; - - /* Get the variance score at the chosen offset */ - RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[BestHalfOffset]; - - InterMVError = - GetInterErr(cpi, SrcPtr, BestBlockPtr, RefDataPtr2, PixelsPerLine ); - - /* Return score of best matching block. */ - return InterMVError; -} - -ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi, - unsigned char * RefFramePtr, - ogg_uint32_t FragIndex, - ogg_uint32_t PixelsPerLine, - MOTION_VECTOR *MV ) { - ogg_uint32_t InterMVError; - - dsp_save_fpu (cpi->dsp); - - /* For the moment the 4MV mode is only deemed to be valid - if all four Y blocks are to be updated */ - /* This may be adapted later. */ - if ( cpi->pb.display_fragments[FragIndex] && - cpi->pb.display_fragments[FragIndex + 1] && - cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments] && - cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1] ) { - - /* Reset the error score. */ - InterMVError = 0; - - /* Get the error component from each coded block */ - InterMVError += - GetBMVExhaustiveSearch(cpi, RefFramePtr, FragIndex, - PixelsPerLine, &(MV[0]) ); - InterMVError += - GetBMVExhaustiveSearch(cpi, RefFramePtr, (FragIndex + 1), - PixelsPerLine, &(MV[1]) ); - InterMVError += - GetBMVExhaustiveSearch(cpi, RefFramePtr, - (FragIndex + cpi->pb.HFragments), - PixelsPerLine, &(MV[2]) ); - InterMVError += - GetBMVExhaustiveSearch(cpi, RefFramePtr, - (FragIndex + cpi->pb.HFragments + 1), - PixelsPerLine, &(MV[3]) ); - }else{ - InterMVError = HUGE_ERROR; - } - - dsp_restore_fpu (cpi->dsp); - - /* Return score of best matching block. */ - return InterMVError; -} - diff --git a/Engine/lib/libtheora/lib/enc/misc_common.c b/Engine/lib/libtheora/lib/enc/misc_common.c deleted file mode 100644 index 1536a494a..000000000 --- a/Engine/lib/libtheora/lib/enc/misc_common.c +++ /dev/null @@ -1,339 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: misc_common.c 15323 2008-09-19 19:43:59Z giles $ - - ********************************************************************/ - -#include -#include "codec_internal.h" -#include "block_inline.h" - -#define FIXED_Q 150 -#define MAX_UP_REG_LOOPS 2 - -/* Gives the initial bytes per block estimate for each Q value */ -static const double BpbTable[Q_TABLE_SIZE] = { - 0.42, 0.45, 0.46, 0.49, 0.51, 0.53, 0.56, 0.58, - 0.61, 0.64, 0.68, 0.71, 0.74, 0.77, 0.80, 0.84, - 0.89, 0.92, 0.98, 1.01, 1.04, 1.13, 1.17, 1.23, - 1.28, 1.34, 1.41, 1.45, 1.51, 1.59, 1.69, 1.80, - 1.84, 1.94, 2.02, 2.15, 2.23, 2.34, 2.44, 2.50, - 2.69, 2.80, 2.87, 3.04, 3.16, 3.29, 3.59, 3.66, - 3.86, 3.94, 4.22, 4.50, 4.64, 4.70, 5.24, 5.34, - 5.61, 5.87, 6.11, 6.41, 6.71, 6.99, 7.36, 7.69 -}; - -static const double KfBpbTable[Q_TABLE_SIZE] = { - 0.74, 0.81, 0.88, 0.94, 1.00, 1.06, 1.14, 1.19, - 1.27, 1.34, 1.42, 1.49, 1.54, 1.59, 1.66, 1.73, - 1.80, 1.87, 1.97, 2.01, 2.08, 2.21, 2.25, 2.36, - 2.39, 2.50, 2.55, 2.65, 2.71, 2.82, 2.95, 3.01, - 3.11, 3.19, 3.31, 3.42, 3.58, 3.66, 3.78, 3.89, - 4.11, 4.26, 4.36, 4.39, 4.63, 4.76, 4.85, 5.04, - 5.26, 5.29, 5.47, 5.64, 5.76, 6.05, 6.35, 6.67, - 6.91, 7.17, 7.40, 7.56, 8.02, 8.45, 8.86, 9.38 -}; - -double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ ){ - ogg_uint32_t i; - ogg_int32_t ThreshTableIndex = Q_TABLE_SIZE - 1; - double BytesPerBlock; - - /* Search for the Q table index that matches the given Q. */ - for ( i = 0; i < Q_TABLE_SIZE; i++ ) { - if ( TargetQ >= cpi->pb.QThreshTable[i] ) { - ThreshTableIndex = i; - break; - } - } - - /* Adjust according to Q shift and type of frame */ - if ( cpi->pb.FrameType == KEY_FRAME ) { - /* Get primary prediction */ - BytesPerBlock = KfBpbTable[ThreshTableIndex]; - } else { - /* Get primary prediction */ - BytesPerBlock = BpbTable[ThreshTableIndex]; - BytesPerBlock = BytesPerBlock * cpi->BpbCorrectionFactor; - } - - return BytesPerBlock; -} - -static void UpRegulateMB( CP_INSTANCE *cpi, ogg_uint32_t RegulationQ, - ogg_uint32_t SB, ogg_uint32_t MB, int NoCheck ) { - ogg_int32_t FragIndex; - ogg_uint32_t B; - - /* Variables used in calculating corresponding row,col and index in - UV planes */ - ogg_uint32_t UVRow; - ogg_uint32_t UVColumn; - ogg_uint32_t UVFragOffset; - - /* There may be MB's lying out of frame which must be ignored. For - these MB's Top left block will have a negative Fragment Index. */ - if ( QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB ) >= 0 ) { - /* Up regulate the component blocks Y then UV. */ - for ( B=0; B<4; B++ ){ - FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B ); - - if ( ( !cpi->pb.display_fragments[FragIndex] ) && - ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ){ - cpi->pb.display_fragments[FragIndex] = 1; - cpi->extra_fragments[FragIndex] = 1; - cpi->FragmentLastQ[FragIndex] = RegulationQ; - cpi->MotionScore++; - } - } - - /* Check the two UV blocks */ - FragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB ); - - UVRow = (FragIndex / (cpi->pb.HFragments * 2)); - UVColumn = (FragIndex % cpi->pb.HFragments) / 2; - UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn; - - FragIndex = cpi->pb.YPlaneFragments + UVFragOffset; - if ( ( !cpi->pb.display_fragments[FragIndex] ) && - ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) { - cpi->pb.display_fragments[FragIndex] = 1; - cpi->extra_fragments[FragIndex] = 1; - cpi->FragmentLastQ[FragIndex] = RegulationQ; - cpi->MotionScore++; - } - - FragIndex += cpi->pb.UVPlaneFragments; - if ( ( !cpi->pb.display_fragments[FragIndex] ) && - ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) { - cpi->pb.display_fragments[FragIndex] = 1; - cpi->extra_fragments[FragIndex] = 1; - cpi->FragmentLastQ[FragIndex] = RegulationQ; - cpi->MotionScore++; - } - } -} - -static void UpRegulateBlocks (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ, - ogg_int32_t RecoveryBlocks, - ogg_uint32_t * LastSB, ogg_uint32_t * LastMB ) { - - ogg_uint32_t LoopTimesRound = 0; - ogg_uint32_t MaxSB = cpi->pb.YSBRows * - cpi->pb.YSBCols; /* Tot super blocks in image */ - ogg_uint32_t SB, MB; /* Super-Block and macro block indices. */ - - /* First scan for blocks for which a residue update is outstanding. */ - while ( (cpi->MotionScore < RecoveryBlocks) && - (LoopTimesRound < MAX_UP_REG_LOOPS) ) { - LoopTimesRound++; - - for ( SB = (*LastSB); SB < MaxSB; SB++ ) { - /* Check its four Macro-Blocks */ - for ( MB=(*LastMB); MB<4; MB++ ) { - /* Mark relevant blocks for update */ - UpRegulateMB( cpi, RegulationQ, SB, MB, 0 ); - - /* Keep track of the last refresh MB. */ - (*LastMB) += 1; - if ( (*LastMB) == 4 ) - (*LastMB) = 0; - - /* Termination clause */ - if (cpi->MotionScore >= RecoveryBlocks) { - /* Make sure we don't stall at SB level */ - if ( *LastMB == 0 ) - SB++; - break; - } - } - - /* Termination clause */ - if (cpi->MotionScore >= RecoveryBlocks) - break; - } - - /* Update super block start index */ - if ( SB >= MaxSB){ - (*LastSB) = 0; - }else{ - (*LastSB) = SB; - } - } -} - -void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ, - ogg_int32_t RecoveryBlocks ) { - ogg_uint32_t LastPassMBPos = 0; - ogg_uint32_t StdLastMBPos = 0; - - ogg_uint32_t MaxSB = cpi->pb.YSBRows * - cpi->pb.YSBCols; /* Tot super blocks in image */ - - ogg_uint32_t SB=0; /* Super-Block index */ - ogg_uint32_t MB; /* Macro-Block index */ - - /* Decduct the number of blocks in an MB / 2 from the recover block count. - This will compensate for the fact that once we start checking an MB - we test every block in that macro block */ - if ( RecoveryBlocks > 3 ) - RecoveryBlocks -= 3; - - /* Up regulate blocks last coded at higher Q */ - UpRegulateBlocks( cpi, RegulationQ, RecoveryBlocks, - &cpi->LastEndSB, &StdLastMBPos ); - - /* If we have still not used up the minimum number of blocks and are - at the minimum Q then run through a final pass of the data to - insure that each block gets a final refresh. */ - if ( (RegulationQ == VERY_BEST_Q) && - (cpi->MotionScore < RecoveryBlocks) ) { - if ( cpi->FinalPassLastPos < MaxSB ) { - for ( SB = cpi->FinalPassLastPos; SB < MaxSB; SB++ ) { - /* Check its four Macro-Blocks */ - for ( MB=LastPassMBPos; MB<4; MB++ ) { - /* Mark relevant blocks for update */ - UpRegulateMB( cpi, RegulationQ, SB, MB, 1 ); - - /* Keep track of the last refresh MB. */ - LastPassMBPos += 1; - if ( LastPassMBPos == 4 ) { - LastPassMBPos = 0; - - /* Increment SB index */ - cpi->FinalPassLastPos += 1; - } - - /* Termination clause */ - if (cpi->MotionScore >= RecoveryBlocks) - break; - } - - /* Termination clause */ - if (cpi->MotionScore >= RecoveryBlocks) - break; - - } - } - } -} - -void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore ) { - double PredUnitScoreBytes; - ogg_uint32_t QIndex = Q_TABLE_SIZE - 1; - ogg_uint32_t i; - - if ( UpdateScore > 0 ) { - double TargetUnitScoreBytes = (double)cpi->ThisFrameTargetBytes / - (double)UpdateScore; - double LastBitError = 10000.0; /* Silly high number */ - /* Search for the best Q for the target bitrate. */ - for ( i = 0; i < Q_TABLE_SIZE; i++ ) { - PredUnitScoreBytes = GetEstimatedBpb( cpi, cpi->pb.QThreshTable[i] ); - if ( PredUnitScoreBytes > TargetUnitScoreBytes ) { - if ( (PredUnitScoreBytes - TargetUnitScoreBytes) <= LastBitError ) { - QIndex = i; - } else { - QIndex = i - 1; - } - break; - } else { - LastBitError = TargetUnitScoreBytes - PredUnitScoreBytes; - } - } - } - - /* QIndex should now indicate the optimal Q. */ - cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[QIndex]; - - /* Apply range restrictions for key frames. */ - if ( cpi->pb.FrameType == KEY_FRAME ) { - if ( cpi->pb.ThisFrameQualityValue > cpi->pb.QThreshTable[20] ) - cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[20]; - else if ( cpi->pb.ThisFrameQualityValue < cpi->pb.QThreshTable[50] ) - cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[50]; - } - - /* Limit the Q value to the maximum available value */ - if (cpi->pb.ThisFrameQualityValue > - cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) { - cpi->pb.ThisFrameQualityValue = - (ogg_uint32_t)cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]; - } - - if(cpi->FixedQ) { - if ( cpi->pb.FrameType == KEY_FRAME ) { - cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[43]; - cpi->pb.ThisFrameQualityValue = cpi->FixedQ; - } else { - cpi->pb.ThisFrameQualityValue = cpi->FixedQ; - } - } - - /* If the quantizer value has changed then re-initialise it */ - if ( cpi->pb.ThisFrameQualityValue != cpi->pb.LastFrameQualityValue ) { - /* Initialise quality tables. */ - UpdateQC( cpi, cpi->pb.ThisFrameQualityValue ); - cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue; - } -} - -void CopyBackExtraFrags(CP_INSTANCE *cpi){ - ogg_uint32_t i,j; - unsigned char * SrcPtr; - unsigned char * DestPtr; - ogg_uint32_t PlaneLineStep; - ogg_uint32_t PixelIndex; - - /* Copy back for Y plane. */ - PlaneLineStep = cpi->pb.info.width; - for ( i = 0; i < cpi->pb.YPlaneFragments; i++ ) { - /* We are only interested in updated fragments. */ - if ( cpi->extra_fragments[i] ) { - /* Get the start index for the fragment. */ - PixelIndex = cpi->pb.pixel_index_table[i]; - SrcPtr = &cpi->yuv1ptr[PixelIndex]; - DestPtr = &cpi->ConvDestBuffer[PixelIndex]; - - for ( j = 0; j < VFRAGPIXELS; j++ ) { - memcpy( DestPtr, SrcPtr, HFRAGPIXELS); - - SrcPtr += PlaneLineStep; - DestPtr += PlaneLineStep; - } - } - } - - /* Now the U and V planes */ - PlaneLineStep = cpi->pb.info.width / 2; - for ( i = cpi->pb.YPlaneFragments; - i < (cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments)) ; - i++ ) { - - /* We are only interested in updated fragments. */ - if ( cpi->extra_fragments[i] ) { - /* Get the start index for the fragment. */ - PixelIndex = cpi->pb.pixel_index_table[i]; - SrcPtr = &cpi->yuv1ptr[PixelIndex]; - DestPtr = &cpi->ConvDestBuffer[PixelIndex]; - - for ( j = 0; j < VFRAGPIXELS; j++ ) { - memcpy( DestPtr, SrcPtr, HFRAGPIXELS); - SrcPtr += PlaneLineStep; - DestPtr += PlaneLineStep; - } - } - } -} - diff --git a/Engine/lib/libtheora/lib/enc/pb.c b/Engine/lib/libtheora/lib/enc/pb.c deleted file mode 100644 index 42047249a..000000000 --- a/Engine/lib/libtheora/lib/enc/pb.c +++ /dev/null @@ -1,89 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: pb.c 14372 2008-01-05 23:52:28Z giles $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" - -void ClearTmpBuffers(PB_INSTANCE * pbi){ - - if(pbi->ReconDataBuffer) - _ogg_free(pbi->ReconDataBuffer); - if(pbi->DequantBuffer) - _ogg_free(pbi->DequantBuffer); - if(pbi->TmpDataBuffer) - _ogg_free(pbi->TmpDataBuffer); - if(pbi->TmpReconBuffer) - _ogg_free(pbi->TmpReconBuffer); - - - pbi->ReconDataBuffer=0; - pbi->DequantBuffer = 0; - pbi->TmpDataBuffer = 0; - pbi->TmpReconBuffer = 0; - -} - -void InitTmpBuffers(PB_INSTANCE * pbi){ - - /* clear any existing info */ - ClearTmpBuffers(pbi); - - /* Adjust the position of all of our temporary */ - pbi->ReconDataBuffer = - _ogg_malloc(64*sizeof(*pbi->ReconDataBuffer)); - - pbi->DequantBuffer = - _ogg_malloc(64 * sizeof(*pbi->DequantBuffer)); - - pbi->TmpDataBuffer = - _ogg_malloc(64 * sizeof(*pbi->TmpDataBuffer)); - - pbi->TmpReconBuffer = - _ogg_malloc(64 * sizeof(*pbi->TmpReconBuffer)); - -} - -void ClearPBInstance(PB_INSTANCE *pbi){ - if(pbi){ - ClearTmpBuffers(pbi); - if (pbi->opb) { - _ogg_free(pbi->opb); - } - } -} - -void InitPBInstance(PB_INSTANCE *pbi){ - /* initialize whole structure to 0 */ - memset(pbi, 0, sizeof(*pbi)); - - InitTmpBuffers(pbi); - - /* allocate memory for the oggpack_buffer */ - pbi->opb = _ogg_malloc(sizeof(oggpack_buffer)); - - /* variables needing initialization (not being set to 0) */ - - pbi->ModifierPointer[0] = &pbi->Modifier[0][255]; - pbi->ModifierPointer[1] = &pbi->Modifier[1][255]; - pbi->ModifierPointer[2] = &pbi->Modifier[2][255]; - pbi->ModifierPointer[3] = &pbi->Modifier[3][255]; - - pbi->DecoderErrorCode = 0; - pbi->KeyFrameType = DCT_KEY_FRAME; - pbi->FramesHaveBeenSkipped = 0; -} diff --git a/Engine/lib/libtheora/lib/enc/pp.c b/Engine/lib/libtheora/lib/enc/pp.c deleted file mode 100644 index c45289703..000000000 --- a/Engine/lib/libtheora/lib/enc/pp.c +++ /dev/null @@ -1,951 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: pp.c 15057 2008-06-22 21:07:32Z xiphmont $ - - ********************************************************************/ - -#include -#include -#include "codec_internal.h" -#include "pp.h" -#include "dsp.h" - -#define MAX(a, b) ((a>b)?a:b) -#define MIN(a, b) ((aScanPixelIndexTable) _ogg_free(ppi->ScanPixelIndexTable); - ppi->ScanPixelIndexTable=0; - - if(ppi->ScanDisplayFragments) _ogg_free(ppi->ScanDisplayFragments); - ppi->ScanDisplayFragments=0; - - for(i = 0 ; i < MAX_PREV_FRAMES ; i ++) - if(ppi->PrevFragments[i]){ - _ogg_free(ppi->PrevFragments[i]); - ppi->PrevFragments[i]=0; - } - - if(ppi->FragScores) _ogg_free(ppi->FragScores); - ppi->FragScores=0; - - if(ppi->SameGreyDirPixels) _ogg_free(ppi->SameGreyDirPixels); - ppi->SameGreyDirPixels=0; - - if(ppi->FragDiffPixels) _ogg_free(ppi->FragDiffPixels); - ppi->FragDiffPixels=0; - - if(ppi->BarBlockMap) _ogg_free(ppi->BarBlockMap); - ppi->BarBlockMap=0; - - if(ppi->TmpCodedMap) _ogg_free(ppi->TmpCodedMap); - ppi->TmpCodedMap=0; - - if(ppi->RowChangedPixels) _ogg_free(ppi->RowChangedPixels); - ppi->RowChangedPixels=0; - - if(ppi->PixelScores) _ogg_free(ppi->PixelScores); - ppi->PixelScores=0; - - if(ppi->PixelChangedMap) _ogg_free(ppi->PixelChangedMap); - ppi->PixelChangedMap=0; - - if(ppi->ChLocals) _ogg_free(ppi->ChLocals); - ppi->ChLocals=0; - - if(ppi->yuv_differences) _ogg_free(ppi->yuv_differences); - ppi->yuv_differences=0; - -} - -void PInitFrameInfo(PP_INSTANCE * ppi){ - int i; - PClearFrameInfo(ppi); - - ppi->ScanPixelIndexTable = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanPixelIndexTable)); - - ppi->ScanDisplayFragments = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanDisplayFragments)); - - for(i = 0 ; i < MAX_PREV_FRAMES ; i ++) - ppi->PrevFragments[i] = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->PrevFragments)); - - ppi->FragScores = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores)); - - ppi->SameGreyDirPixels = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->SameGreyDirPixels)); - - ppi->FragDiffPixels = - _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores)); - - ppi->BarBlockMap= - _ogg_malloc(3 * ppi->ScanHFragments*sizeof(*ppi->BarBlockMap)); - - ppi->TmpCodedMap = - _ogg_malloc(ppi->ScanHFragments*sizeof(*ppi->TmpCodedMap)); - - ppi->RowChangedPixels = - _ogg_malloc(3 * ppi->ScanConfig.VideoFrameHeight* - sizeof(*ppi->RowChangedPixels)); - - ppi->PixelScores = - _ogg_malloc(ppi->ScanConfig.VideoFrameWidth* - sizeof(*ppi->PixelScores) * PSCORE_CB_ROWS); - - ppi->PixelChangedMap = - _ogg_malloc(ppi->ScanConfig.VideoFrameWidth* - sizeof(*ppi->PixelChangedMap) * PMAP_CB_ROWS); - - ppi->ChLocals = - _ogg_malloc(ppi->ScanConfig.VideoFrameWidth* - sizeof(*ppi->ChLocals) * CHLOCALS_CB_ROWS); - - ppi->yuv_differences = - _ogg_malloc(ppi->ScanConfig.VideoFrameWidth* - sizeof(*ppi->yuv_differences) * YDIFF_CB_ROWS); -} - -void ClearPPInstance(PP_INSTANCE *ppi){ - PClearFrameInfo(ppi); -} - - -void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs){ - - memset(ppi,0,sizeof(*ppi)); - - memcpy(&ppi->dsp, funcs, sizeof(DspFunctions)); - - /* Initializations */ - ppi->PrevFrameLimit = 3; /* Must not exceed MAX_PREV_FRAMES (Note - that this number includes the current - frame so "1 = no effect") */ - - /* Scan control variables. */ - ppi->HFragPixels = 8; - ppi->VFragPixels = 8; - - ppi->SRFGreyThresh = 4; - ppi->SRFColThresh = 5; - ppi->NoiseSupLevel = 3; - ppi->SgcLevelThresh = 3; - ppi->SuvcLevelThresh = 4; - - /* Variables controlling S.A.D. breakouts. */ - ppi->GrpLowSadThresh = 10; - ppi->GrpHighSadThresh = 64; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 16; /* (Default values for 8x8 blocks). */ - - ppi->UVBlockThreshCorrection = 1.25; - ppi->UVSgcCorrection = 1.5; - - ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN; -} - -static void DeringBlockStrong(unsigned char *SrcPtr, - unsigned char *DstPtr, - ogg_int32_t Pitch, - ogg_uint32_t FragQIndex, - const ogg_uint32_t *QuantScale){ - - ogg_int16_t UDMod[72]; - ogg_int16_t LRMod[72]; - unsigned int j,k,l; - const unsigned char * Src; - unsigned int QValue = QuantScale[FragQIndex]; - - unsigned char p; - unsigned char pl; - unsigned char pr; - unsigned char pu; - unsigned char pd; - - int al; - int ar; - int au; - int ad; - - int atot; - int B; - int newVal; - - const unsigned char *curRow = SrcPtr - 1; /* avoid negative array indexes */ - unsigned char *dstRow = DstPtr; - const unsigned char *lastRow = SrcPtr-Pitch; - const unsigned char *nextRow = SrcPtr+Pitch; - - unsigned int rowOffset = 0; - unsigned int round = (1<<6); - - int High; - int Low; - int TmpMod; - - int Sharpen = SharpenModifier[FragQIndex]; - High = 3 * QValue; - if(High>32)High=32; - Low = 0; - - - /* Initialize the Mod Data */ - Src = SrcPtr-Pitch; - for(k=0;k<9;k++){ - for(j=0;j<8;j++){ - - TmpMod = 32 + QValue - (abs(Src[j+Pitch]-Src[j])); - - if(TmpMod< -64) - TmpMod = Sharpen; - - else if(TmpModHigh) - TmpMod = High; - - UDMod[k*8+j] = (ogg_int16_t)TmpMod; - } - Src +=Pitch; - } - - Src = SrcPtr-1; - - for(k=0;k<8;k++){ - for(j=0;j<9;j++){ - TmpMod = 32 + QValue - (abs(Src[j+1]-Src[j])); - - if(TmpMod< -64 ) - TmpMod = Sharpen; - - else if(TmpMod<0) - TmpMod = Low; - - else if(TmpMod>High) - TmpMod = High; - - LRMod[k*9+j] = (ogg_int16_t)TmpMod; - } - Src+=Pitch; - } - - for(k=0;k<8;k++){ - /* In the case that this function called with same buffer for - source and destination, To keep the c and the mmx version to have - consistant results, intermediate buffer is used to store the - eight pixel value before writing them to destination - (i.e. Overwriting souce for the speical case) */ - for(l=0;l<8;l++){ - - atot = 128; - B = round; - p = curRow[ rowOffset +l +1]; - - pl = curRow[ rowOffset +l]; - al = LRMod[k*9+l]; - atot -= al; - B += al * pl; - - pu = lastRow[ rowOffset +l]; - au = UDMod[k*8+l]; - atot -= au; - B += au * pu; - - pd = nextRow[ rowOffset +l]; - ad = UDMod[(k+1)*8+l]; - atot -= ad; - B += ad * pd; - - pr = curRow[ rowOffset +l+2]; - ar = LRMod[k*9+l+1]; - atot -= ar; - B += ar * pr; - - newVal = ( atot * p + B) >> 7; - - dstRow[ rowOffset +l]= clamp255( newVal ); - } - rowOffset += Pitch; - } -} - -static void DeringBlockWeak(unsigned char *SrcPtr, - unsigned char *DstPtr, - ogg_int32_t Pitch, - ogg_uint32_t FragQIndex, - const ogg_uint32_t *QuantScale){ - - ogg_int16_t UDMod[72]; - ogg_int16_t LRMod[72]; - unsigned int j,k; - const unsigned char * Src; - unsigned int QValue = QuantScale[FragQIndex]; - - unsigned char p; - unsigned char pl; - unsigned char pr; - unsigned char pu; - unsigned char pd; - - int al; - int ar; - int au; - int ad; - - int atot; - int B; - int newVal; - - const unsigned char *curRow = SrcPtr-1; - unsigned char *dstRow = DstPtr; - const unsigned char *lastRow = SrcPtr-Pitch; - const unsigned char *nextRow = SrcPtr+Pitch; - - unsigned int rowOffset = 0; - unsigned int round = (1<<6); - - int High; - int Low; - int TmpMod; - int Sharpen = SharpenModifier[FragQIndex]; - - High = 3 * QValue; - if(High>24) - High=24; - Low = 0 ; - - /* Initialize the Mod Data */ - Src=SrcPtr-Pitch; - for(k=0;k<9;k++) { - for(j=0;j<8;j++) { - - TmpMod = 32 + QValue - 2*(abs(Src[j+Pitch]-Src[j])); - - if(TmpMod< -64) - TmpMod = Sharpen; - - else if(TmpModHigh) - TmpMod = High; - - UDMod[k*8+j] = (ogg_int16_t)TmpMod; - } - Src +=Pitch; - } - - Src = SrcPtr-1; - - for(k=0;k<8;k++){ - for(j=0;j<9;j++){ - TmpMod = 32 + QValue - 2*(abs(Src[j+1]-Src[j])); - - if(TmpMod< -64 ) - TmpMod = Sharpen; - - else if(TmpModHigh) - TmpMod = High; - - LRMod[k*9+j] = (ogg_int16_t)TmpMod; - } - Src+=Pitch; - } - - for(k=0;k<8;k++) { - for(j=0;j<8;j++){ - atot = 128; - B = round; - p = curRow[ rowOffset +j+1]; - - pl = curRow[ rowOffset +j]; - al = LRMod[k*9+j]; - atot -= al; - B += al * pl; - - pu = lastRow[ rowOffset +j]; - au = UDMod[k*8+j]; - atot -= au; - B += au * pu; - - pd = nextRow[ rowOffset +j]; - ad = UDMod[(k+1)*8+j]; - atot -= ad; - B += ad * pd; - - pr = curRow[ rowOffset +j+2]; - ar = LRMod[k*9+j+1]; - atot -= ar; - B += ar * pr; - - newVal = ( atot * p + B) >> 7; - - dstRow[ rowOffset +j] = clamp255( newVal ); - } - - rowOffset += Pitch; - } -} - -static void DeringFrame(PB_INSTANCE *pbi, - unsigned char *Src, unsigned char *Dst){ - ogg_uint32_t col,row; - unsigned char *SrcPtr; - unsigned char *DestPtr; - ogg_uint32_t BlocksAcross,BlocksDown; - const ogg_uint32_t *QuantScale; - ogg_uint32_t Block; - ogg_uint32_t LineLength; - - ogg_int32_t Thresh1,Thresh2,Thresh3,Thresh4; - - Thresh1 = 384; - Thresh2 = 4 * Thresh1; - Thresh3 = 5 * Thresh2/4; - Thresh4 = 5 * Thresh2/2; - - QuantScale = DeringModifierV1; - - BlocksAcross = pbi->HFragments; - BlocksDown = pbi->VFragments; - - SrcPtr = Src + pbi->ReconYDataOffset; - DestPtr = Dst + pbi->ReconYDataOffset; - LineLength = pbi->YStride; - - Block = 0; - - for ( row = 0 ; row < BlocksDown; row ++){ - for (col = 0; col < BlocksAcross; col ++){ - ogg_uint32_t Quality = pbi->FragQIndex[Block]; - ogg_int32_t Variance = pbi->FragmentVariances[Block]; - - if( pbi->PostProcessingLevel >5 && Variance > Thresh3 ){ - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - - if( (col > 0 && - pbi->FragmentVariances[Block-1] > Thresh4 ) || - (col + 1 < BlocksAcross && - pbi->FragmentVariances[Block+1] > Thresh4 ) || - (row + 1 < BlocksDown && - pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) || - (row > 0 && - pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) ){ - - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - } - } else if(Variance > Thresh2 ) { - - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - } else if(Variance > Thresh1 ) { - - DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - - } else { - - dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength); - - } - - ++Block; - - } - SrcPtr += 8 * LineLength; - DestPtr += 8 * LineLength; - } - - /* Then U */ - - BlocksAcross /= 2; - BlocksDown /= 2; - LineLength /= 2; - - SrcPtr = Src + pbi->ReconUDataOffset; - DestPtr = Dst + pbi->ReconUDataOffset; - for ( row = 0 ; row < BlocksDown; row ++) { - for (col = 0; col < BlocksAcross; col ++) { - ogg_uint32_t Quality = pbi->FragQIndex[Block]; - ogg_int32_t Variance = pbi->FragmentVariances[Block]; - - if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) { - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - - }else if(Variance > Thresh2 ){ - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - }else if(Variance > Thresh1 ){ - DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - }else{ - dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength); - } - - ++Block; - - } - SrcPtr += 8 * LineLength; - DestPtr += 8 * LineLength; - } - - /* Then V */ - SrcPtr = Src + pbi->ReconVDataOffset; - DestPtr = Dst + pbi->ReconVDataOffset; - - for ( row = 0 ; row < BlocksDown; row ++){ - for (col = 0; col < BlocksAcross; col ++){ - - ogg_uint32_t Quality = pbi->FragQIndex[Block]; - ogg_int32_t Variance = pbi->FragmentVariances[Block]; - - - if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) { - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - - }else if(Variance > Thresh2 ){ - DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - }else if(Variance > Thresh1 ){ - DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col, - LineLength,Quality,QuantScale); - }else{ - dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength); - } - - ++Block; - - } - SrcPtr += 8 * LineLength; - DestPtr += 8 * LineLength; - - } - -} - -void UpdateFragQIndex(PB_INSTANCE *pbi){ - - ogg_uint32_t ThisFrameQIndex; - ogg_uint32_t i; - - /* Check this frame quality index */ - ThisFrameQIndex = pbi->FrameQIndex; - - - /* It is not a key frame, so only reset those are coded */ - for( i = 0; i < pbi->UnitFragments; i++ ) - if( pbi->display_fragments[i]) - pbi->FragQIndex[i] = ThisFrameQIndex; - -} - -static void DeblockLoopFilteredBand(PB_INSTANCE *pbi, - unsigned char *SrcPtr, - unsigned char *DesPtr, - ogg_uint32_t PlaneLineStep, - ogg_uint32_t FragsAcross, - ogg_uint32_t StartFrag, - const ogg_uint32_t *QuantScale){ - ogg_uint32_t j,k; - ogg_uint32_t CurrentFrag=StartFrag; - ogg_int32_t QStep; - ogg_int32_t FLimit; - unsigned char *Src, *Des; - ogg_int32_t x[10]; - ogg_int32_t Sum1, Sum2; - - while(CurrentFrag < StartFrag + FragsAcross){ - - Src=SrcPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*5; - Des=DesPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*4; - - QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]]; - FLimit = ( QStep * 3 ) >> 2; - - for( j=0; j<8 ; j++){ - x[0] = Src[0]; - x[1] = Src[PlaneLineStep]; - x[2] = Src[PlaneLineStep*2]; - x[3] = Src[PlaneLineStep*3]; - x[4] = Src[PlaneLineStep*4]; - x[5] = Src[PlaneLineStep*5]; - x[6] = Src[PlaneLineStep*6]; - x[7] = Src[PlaneLineStep*7]; - x[8] = Src[PlaneLineStep*8]; - x[9] = Src[PlaneLineStep*9]; - - Sum1=Sum2=0; - - for(k=1;k<=4;k++){ - Sum1 += abs(x[k]-x[k-1]); - Sum2 += abs(x[k+4]-x[k+5]); - } - - pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1); - pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2); - - if( Sum1 < FLimit && - Sum2 < FLimit && - (x[5] - x[4]) < QStep && - (x[4] - x[5]) < QStep ){ - - /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */ - Des[0 ] = (x[0] + x[0] +x[0] + x[1] * 2 + - x[2] + x[3] +x[4] + 4) >> 3; - Des[PlaneLineStep ] = (x[0] + x[0] +x[1] + x[2] * 2 + - x[3] + x[4] +x[5] + 4) >> 3; - Des[PlaneLineStep*2] = (x[0] + x[1] +x[2] + x[3] * 2 + - x[4] + x[5] +x[6] + 4) >> 3; - Des[PlaneLineStep*3] = (x[1] + x[2] +x[3] + x[4] * 2 + - x[5] + x[6] +x[7] + 4) >> 3; - Des[PlaneLineStep*4] = (x[2] + x[3] +x[4] + x[5] * 2 + - x[6] + x[7] +x[8] + 4) >> 3; - Des[PlaneLineStep*5] = (x[3] + x[4] +x[5] + x[6] * 2 + - x[7] + x[8] +x[9] + 4) >> 3; - Des[PlaneLineStep*6] = (x[4] + x[5] +x[6] + x[7] * 2 + - x[8] + x[9] +x[9] + 4) >> 3; - Des[PlaneLineStep*7] = (x[5] + x[6] +x[7] + x[8] * 2 + - x[9] + x[9] +x[9] + 4) >> 3; - - }else { - /* copy the pixels to destination */ - Des[0 ]= (unsigned char)x[1]; - Des[PlaneLineStep ]= (unsigned char)x[2]; - Des[PlaneLineStep*2]= (unsigned char)x[3]; - Des[PlaneLineStep*3]= (unsigned char)x[4]; - Des[PlaneLineStep*4]= (unsigned char)x[5]; - Des[PlaneLineStep*5]= (unsigned char)x[6]; - Des[PlaneLineStep*6]= (unsigned char)x[7]; - Des[PlaneLineStep*7]= (unsigned char)x[8]; - } - Src ++; - Des ++; - } - - - /* done with filtering the horizontal edge, now let's do the - vertical one */ - /* skip the first one */ - if(CurrentFrag==StartFrag) - CurrentFrag++; - else{ - Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag); - Src=Des-5; - Des-=4; - - QStep = QuantScale[pbi->FragQIndex[CurrentFrag]]; - FLimit = ( QStep * 3 ) >> 2; - - for( j=0; j<8 ; j++){ - x[0] = Src[0]; - x[1] = Src[1]; - x[2] = Src[2]; - x[3] = Src[3]; - x[4] = Src[4]; - x[5] = Src[5]; - x[6] = Src[6]; - x[7] = Src[7]; - x[8] = Src[8]; - x[9] = Src[9]; - - Sum1=Sum2=0; - - for(k=1;k<=4;k++){ - Sum1 += abs(x[k]-x[k-1]); - Sum2 += abs(x[k+4]-x[k+5]); - } - - pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1); - pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2); - - if( Sum1 < FLimit && - Sum2 < FLimit && - (x[5] - x[4]) < QStep && - (x[4] - x[5]) < QStep ){ - - /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */ - Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3; - Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3; - Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3; - Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3; - Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3; - Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3; - Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3; - Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3; - } - - Src += PlaneLineStep; - Des += PlaneLineStep; - } - CurrentFrag ++; - } - } -} - -static void DeblockVerticalEdgesInLoopFilteredBand(PB_INSTANCE *pbi, - unsigned char *SrcPtr, - unsigned char *DesPtr, - ogg_uint32_t PlaneLineStep, - ogg_uint32_t FragsAcross, - ogg_uint32_t StartFrag, - const ogg_uint32_t *QuantScale){ - ogg_uint32_t j,k; - ogg_uint32_t CurrentFrag=StartFrag; - ogg_int32_t QStep; - ogg_int32_t FLimit; - unsigned char *Src, *Des; - ogg_int32_t x[10]; - ogg_int32_t Sum1, Sum2; - - while(CurrentFrag < StartFrag + FragsAcross-1) { - - Src=SrcPtr+8*(CurrentFrag-StartFrag+1)-5; - Des=DesPtr+8*(CurrentFrag-StartFrag+1)-4; - - QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]]; - FLimit = ( QStep * 3)>>2 ; - - for( j=0; j<8 ; j++){ - x[0] = Src[0]; - x[1] = Src[1]; - x[2] = Src[2]; - x[3] = Src[3]; - x[4] = Src[4]; - x[5] = Src[5]; - x[6] = Src[6]; - x[7] = Src[7]; - x[8] = Src[8]; - x[9] = Src[9]; - - Sum1=Sum2=0; - - for(k=1;k<=4;k++){ - Sum1 += abs(x[k]-x[k-1]); - Sum2 += abs(x[k+4]-x[k+5]); - } - - pbi->FragmentVariances[CurrentFrag] += ((Sum1>255)?255:Sum1); - pbi->FragmentVariances[CurrentFrag+1] += ((Sum2>255)?255:Sum2); - - - if( Sum1 < FLimit && - Sum2 < FLimit && - (x[5] - x[4]) < QStep && - (x[4] - x[5]) < QStep ){ - - /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */ - Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3; - Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3; - Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3; - Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3; - Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3; - Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3; - Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3; - Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3; - } - Src +=PlaneLineStep; - Des +=PlaneLineStep; - - } - CurrentFrag ++; - } -} - -static void DeblockPlane(PB_INSTANCE *pbi, - unsigned char *SourceBuffer, - unsigned char *DestinationBuffer, - ogg_uint32_t Channel ){ - - ogg_uint32_t i,k; - ogg_uint32_t PlaneLineStep=0; - ogg_uint32_t StartFrag =0; - ogg_uint32_t PixelIndex=0; - unsigned char * SrcPtr=0, * DesPtr=0; - ogg_uint32_t FragsAcross=0; - ogg_uint32_t FragsDown=0; - const ogg_uint32_t *QuantScale=0; - - switch( Channel ){ - case 0: - /* Get the parameters */ - PlaneLineStep = pbi->YStride; - FragsAcross = pbi->HFragments; - FragsDown = pbi->VFragments; - StartFrag = 0; - PixelIndex = pbi->ReconYDataOffset; - SrcPtr = & SourceBuffer[PixelIndex]; - DesPtr = & DestinationBuffer[PixelIndex]; - break; - - case 1: - /* Get the parameters */ - PlaneLineStep = pbi->UVStride; - FragsAcross = pbi->HFragments / 2; - FragsDown = pbi->VFragments / 2; - StartFrag = pbi->YPlaneFragments; - - PixelIndex = pbi->ReconUDataOffset; - SrcPtr = & SourceBuffer[PixelIndex]; - DesPtr = & DestinationBuffer[PixelIndex]; - break; - - default: - /* Get the parameters */ - PlaneLineStep = pbi->UVStride; - FragsAcross = pbi->HFragments / 2; - FragsDown = pbi->VFragments / 2; - StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments; - - PixelIndex = pbi->ReconVDataOffset; - SrcPtr = & SourceBuffer[PixelIndex]; - DesPtr = & DestinationBuffer[PixelIndex]; - break; - } - - QuantScale = DcQuantScaleV1; - - for(i=0;i<4;i++) - memcpy(DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep); - - k = 1; - - while( k < FragsDown ){ - - SrcPtr += 8*PlaneLineStep; - DesPtr += 8*PlaneLineStep; - - /* Filter both the horizontal and vertical block edges inside the band */ - DeblockLoopFilteredBand(pbi, SrcPtr, DesPtr, PlaneLineStep, - FragsAcross, StartFrag, QuantScale); - - /* Move Pointers */ - StartFrag += FragsAcross; - - k ++; - } - - /* The Last band */ - for(i=0;i<4;i++) - memcpy(DesPtr+(i+4)*PlaneLineStep, - SrcPtr+(i+4)*PlaneLineStep, - PlaneLineStep); - - DeblockVerticalEdgesInLoopFilteredBand(pbi,SrcPtr,DesPtr,PlaneLineStep, - FragsAcross,StartFrag,QuantScale); - -} - -static void DeblockFrame(PB_INSTANCE *pbi, unsigned char *SourceBuffer, - unsigned char *DestinationBuffer){ - - memset(pbi->FragmentVariances, 0 , sizeof(ogg_int32_t) * pbi->UnitFragments); - - - UpdateFragQIndex(pbi); - - /* Y */ - DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 0); - - /* U */ - DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 1); - - /* V */ - DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 2); - -} - -void PostProcess(PB_INSTANCE *pbi){ - - switch (pbi->PostProcessingLevel){ - case 8: - /* on a slow machine, use a simpler and faster deblocking filter */ - DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer); - break; - - case 6: - DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer); - UpdateUMVBorder(pbi, pbi->PostProcessBuffer ); - DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer); - break; - - case 5: - DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer); - UpdateUMVBorder(pbi, pbi->PostProcessBuffer ); - DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer); - break; - case 4: - DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer); - break; - case 1: - UpdateFragQIndex(pbi); - break; - - case 0: - break; - - default: - DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer); - UpdateUMVBorder(pbi, pbi->PostProcessBuffer ); - DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer); - break; - } -} - diff --git a/Engine/lib/libtheora/lib/enc/pp.h b/Engine/lib/libtheora/lib/enc/pp.h deleted file mode 100644 index 6eb3a7604..000000000 --- a/Engine/lib/libtheora/lib/enc/pp.h +++ /dev/null @@ -1,48 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: pp.h 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -/* Constants. */ -#define INTERNAL_BLOCK_HEIGHT 8 -#define INTERNAL_BLOCK_WIDTH 8 - - -/* NEW Line search values. */ -#define UP 0 -#define DOWN 1 -#define LEFT 2 -#define RIGHT 3 - -#define FIRST_ROW 0 -#define NOT_EDGE_ROW 1 -#define LAST_ROW 2 - -#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) -#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) -#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) -#define PSCORE_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 4) - -/* Status values in block coding map */ -#define CANDIDATE_BLOCK_LOW -2 -#define CANDIDATE_BLOCK -1 -#define BLOCK_NOT_CODED 0 -#define BLOCK_CODED_BAR 3 -#define BLOCK_CODED_SGC 4 -#define BLOCK_CODED_LOW 4 -#define BLOCK_CODED 5 - -#define MAX_PREV_FRAMES 16 -#define MAX_SEARCH_LINE_LEN 7 diff --git a/Engine/lib/libtheora/lib/enc/quant_lookup.h b/Engine/lib/libtheora/lib/enc/quant_lookup.h deleted file mode 100644 index 04bbce910..000000000 --- a/Engine/lib/libtheora/lib/enc/quant_lookup.h +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: quant_lookup.h 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include "codec_internal.h" - -#define MIN16 ((1<<16)-1) -#define SHIFT16 (1<<16) - -#define MIN_LEGAL_QUANT_ENTRY 8 -#define MIN_DEQUANT_VAL 2 -#define IDCT_SCALE_FACTOR 2 /* Shift left bits to improve IDCT precision */ -#define OLD_SCHEME 1 - - -/****************************** - * lookup table for DCT coefficient zig-zag ordering - * ****************************/ - -static const ogg_uint32_t dezigzag_index[64] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, - 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, - 53, 60, 61, 54, 47, 55, 62, 63 -}; - diff --git a/Engine/lib/libtheora/lib/enc/reconstruct.c b/Engine/lib/libtheora/lib/enc/reconstruct.c deleted file mode 100644 index 5602884af..000000000 --- a/Engine/lib/libtheora/lib/enc/reconstruct.c +++ /dev/null @@ -1,110 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: reconstruct.c 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include "codec_internal.h" - -static void copy8x8__c (unsigned char *src, - unsigned char *dest, - unsigned int stride) -{ - int j; - for ( j = 0; j < 8; j++ ){ - ((ogg_uint32_t*)dest)[0] = ((ogg_uint32_t*)src)[0]; - ((ogg_uint32_t*)dest)[1] = ((ogg_uint32_t*)src)[1]; - src+=stride; - dest+=stride; - } -} - -static void recon_intra8x8__c (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - ogg_uint32_t i; - - for (i = 8; i; i--){ - /* Convert the data back to 8 bit unsigned */ - /* Saturate the output to unsigend 8 bit values */ - ReconPtr[0] = clamp255( ChangePtr[0] + 128 ); - ReconPtr[1] = clamp255( ChangePtr[1] + 128 ); - ReconPtr[2] = clamp255( ChangePtr[2] + 128 ); - ReconPtr[3] = clamp255( ChangePtr[3] + 128 ); - ReconPtr[4] = clamp255( ChangePtr[4] + 128 ); - ReconPtr[5] = clamp255( ChangePtr[5] + 128 ); - ReconPtr[6] = clamp255( ChangePtr[6] + 128 ); - ReconPtr[7] = clamp255( ChangePtr[7] + 128 ); - - ReconPtr += LineStep; - ChangePtr += 8; - } -} - -static void recon_inter8x8__c (unsigned char *ReconPtr, unsigned char *RefPtr, - ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) -{ - ogg_uint32_t i; - - for (i = 8; i; i--){ - ReconPtr[0] = clamp255(RefPtr[0] + ChangePtr[0]); - ReconPtr[1] = clamp255(RefPtr[1] + ChangePtr[1]); - ReconPtr[2] = clamp255(RefPtr[2] + ChangePtr[2]); - ReconPtr[3] = clamp255(RefPtr[3] + ChangePtr[3]); - ReconPtr[4] = clamp255(RefPtr[4] + ChangePtr[4]); - ReconPtr[5] = clamp255(RefPtr[5] + ChangePtr[5]); - ReconPtr[6] = clamp255(RefPtr[6] + ChangePtr[6]); - ReconPtr[7] = clamp255(RefPtr[7] + ChangePtr[7]); - - ChangePtr += 8; - ReconPtr += LineStep; - RefPtr += LineStep; - } -} - -static void recon_inter8x8_half__c (unsigned char *ReconPtr, unsigned char *RefPtr1, - unsigned char *RefPtr2, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - ogg_uint32_t i; - - for (i = 8; i; i--){ - ReconPtr[0] = clamp255((((int)RefPtr1[0] + (int)RefPtr2[0]) >> 1) + ChangePtr[0] ); - ReconPtr[1] = clamp255((((int)RefPtr1[1] + (int)RefPtr2[1]) >> 1) + ChangePtr[1] ); - ReconPtr[2] = clamp255((((int)RefPtr1[2] + (int)RefPtr2[2]) >> 1) + ChangePtr[2] ); - ReconPtr[3] = clamp255((((int)RefPtr1[3] + (int)RefPtr2[3]) >> 1) + ChangePtr[3] ); - ReconPtr[4] = clamp255((((int)RefPtr1[4] + (int)RefPtr2[4]) >> 1) + ChangePtr[4] ); - ReconPtr[5] = clamp255((((int)RefPtr1[5] + (int)RefPtr2[5]) >> 1) + ChangePtr[5] ); - ReconPtr[6] = clamp255((((int)RefPtr1[6] + (int)RefPtr2[6]) >> 1) + ChangePtr[6] ); - ReconPtr[7] = clamp255((((int)RefPtr1[7] + (int)RefPtr2[7]) >> 1) + ChangePtr[7] ); - - ChangePtr += 8; - ReconPtr += LineStep; - RefPtr1 += LineStep; - RefPtr2 += LineStep; - } -} - -void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags) -{ - funcs->copy8x8 = copy8x8__c; - funcs->recon_intra8x8 = recon_intra8x8__c; - funcs->recon_inter8x8 = recon_inter8x8__c; - funcs->recon_inter8x8_half = recon_inter8x8_half__c; -#if defined(USE_ASM) - if (cpu_flags & OC_CPU_X86_MMX) { - dsp_mmx_recon_init(funcs); - } -#endif -} diff --git a/Engine/lib/libtheora/lib/enc/scan.c b/Engine/lib/libtheora/lib/enc/scan.c deleted file mode 100644 index 5466ca438..000000000 --- a/Engine/lib/libtheora/lib/enc/scan.c +++ /dev/null @@ -1,2301 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: scan.c 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "codec_internal.h" -#include "dsp.h" - -#define MAX_SEARCH_LINE_LEN 7 - -#define SET8_0(ptr) \ - ((ogg_uint32_t *)ptr)[0] = 0x00000000; \ - ((ogg_uint32_t *)ptr)[1] = 0x00000000; -#define SET8_1(ptr) \ - ((ogg_uint32_t *)ptr)[0] = 0x01010101; \ - ((ogg_uint32_t *)ptr)[1] = 0x01010101; -#define SET8_8(ptr) \ - ((ogg_uint32_t *)ptr)[0] = 0x08080808; \ - ((ogg_uint32_t *)ptr)[1] = 0x08080808; - -static ogg_uint32_t LineLengthScores[ MAX_SEARCH_LINE_LEN + 1 ] = { - 0, 0, 0, 0, 2, 4, 12, 24 -}; - -static ogg_uint32_t BodyNeighbourScore = 8; -static double DiffDevisor = 0.0625; -#define HISTORY_BLOCK_FACTOR 2 -#define MIN_STEP_THRESH 6 -#define SCORE_MULT_LOW 0.5 -#define SCORE_MULT_HIGH 4 - -#define UP 0 -#define DOWN 1 -#define LEFT 2 -#define RIGHT 3 - -#define INTERNAL_BLOCK_HEIGHT 8 -#define INTERNAL_BLOCK_WIDTH 8 - -#define BLOCK_NOT_CODED 0 -#define BLOCK_CODED_BAR 3 -#define BLOCK_CODED_SGC 4 -#define BLOCK_CODED_LOW 4 -#define BLOCK_CODED 5 - -#define CANDIDATE_BLOCK_LOW -2 -#define CANDIDATE_BLOCK -1 - -#define FIRST_ROW 0 -#define NOT_EDGE_ROW 1 -#define LAST_ROW 2 - -#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) -#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) -#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3) - -void ConfigurePP( PP_INSTANCE *ppi, int Level ) { - switch ( Level ){ - case 0: - ppi->SRFGreyThresh = 1; - ppi->SRFColThresh = 1; - ppi->NoiseSupLevel = 2; - ppi->SgcLevelThresh = 1; - ppi->SuvcLevelThresh = 1; - ppi->GrpLowSadThresh = 6; - ppi->GrpHighSadThresh = 24; - ppi->PrimaryBlockThreshold = 2; - ppi->SgcThresh = 10; - - ppi->PAKEnabled = 0; - break; - - case 1: - ppi->SRFGreyThresh = 2; - ppi->SRFColThresh = 2; - ppi->NoiseSupLevel = 2; - ppi->SgcLevelThresh = 2; - ppi->SuvcLevelThresh = 2; - ppi->GrpLowSadThresh = 8; - ppi->GrpHighSadThresh = 32; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 12; - - ppi->PAKEnabled = 1; - break; - - case 2: /* Default VP3 settings */ - ppi->SRFGreyThresh = 3; - ppi->SRFColThresh = 3; - ppi->NoiseSupLevel = 2; - ppi->SgcLevelThresh = 2; - ppi->SuvcLevelThresh = 2; - ppi->GrpLowSadThresh = 8; - ppi->GrpHighSadThresh = 32; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 16; - - ppi->PAKEnabled = 1; - break; - - case 3: - ppi->SRFGreyThresh = 4; - ppi->SRFColThresh = 4; - ppi->NoiseSupLevel = 3; - ppi->SgcLevelThresh = 3; - ppi->SuvcLevelThresh = 3; - ppi->GrpLowSadThresh = 10; - ppi->GrpHighSadThresh = 48; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 18; - - ppi->PAKEnabled = 1; - break; - - case 4: - ppi->SRFGreyThresh = 5; - ppi->SRFColThresh = 5; - ppi->NoiseSupLevel = 3; - ppi->SgcLevelThresh = 4; - ppi->SuvcLevelThresh = 4; - ppi->GrpLowSadThresh = 12; - ppi->GrpHighSadThresh = 48; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 20; - - ppi->PAKEnabled = 1; - break; - - case 5: - ppi->SRFGreyThresh = 6; - ppi->SRFColThresh = 6; - ppi->NoiseSupLevel = 3; - ppi->SgcLevelThresh = 4; - ppi->SuvcLevelThresh = 4; - ppi->GrpLowSadThresh = 12; - ppi->GrpHighSadThresh = 64; - ppi->PrimaryBlockThreshold = 10; - ppi->SgcThresh = 24; - - ppi->PAKEnabled = 1; - break; - - case 6: - ppi->SRFGreyThresh = 6; - ppi->SRFColThresh = 7; - ppi->NoiseSupLevel = 3; - ppi->SgcLevelThresh = 4; - ppi->SuvcLevelThresh = 4; - ppi->GrpLowSadThresh = 12; - ppi->GrpHighSadThresh = 64; - ppi->PrimaryBlockThreshold = 10; - ppi->SgcThresh = 24; - - ppi->PAKEnabled = 1; - break; - - default: - ppi->SRFGreyThresh = 3; - ppi->SRFColThresh = 3; - ppi->NoiseSupLevel = 2; - ppi->SgcLevelThresh = 2; - ppi->SuvcLevelThresh = 2; - ppi->GrpLowSadThresh = 10; - ppi->GrpHighSadThresh = 32; - ppi->PrimaryBlockThreshold = 5; - ppi->SgcThresh = 16; - ppi->PAKEnabled = 1; - break; - } -} - -static void ScanCalcPixelIndexTable(PP_INSTANCE *ppi){ - ogg_uint32_t i; - ogg_uint32_t * PixelIndexTablePtr = ppi->ScanPixelIndexTable; - - /* If appropriate add on extra inices for U and V planes. */ - for ( i = 0; i < (ppi->ScanYPlaneFragments); i++ ) { - PixelIndexTablePtr[ i ] = - ((i / ppi->ScanHFragments) * - VFRAGPIXELS * ppi->ScanConfig.VideoFrameWidth); - PixelIndexTablePtr[ i ] += - ((i % ppi->ScanHFragments) * HFRAGPIXELS); - } - - PixelIndexTablePtr = &ppi->ScanPixelIndexTable[ppi->ScanYPlaneFragments]; - - for ( i = 0; i < (ppi->ScanUVPlaneFragments * 2); i++ ){ - PixelIndexTablePtr[ i ] = - ((i / (ppi->ScanHFragments >> 1) ) * - (VFRAGPIXELS * (ppi->ScanConfig.VideoFrameWidth >> 1)) ); - PixelIndexTablePtr[ i ] += - ((i % (ppi->ScanHFragments >> 1) ) * - HFRAGPIXELS) + ppi->YFramePixels; - } -} - -static void InitScanMapArrays(PP_INSTANCE *ppi){ - int i; - unsigned char StepThresh; - - /* Clear down the fragment level map arrays for the current frame. */ - memset( ppi->FragScores, 0, - ppi->ScanFrameFragments * sizeof(*ppi->FragScores) ); - memset( ppi->SameGreyDirPixels, 0, - ppi->ScanFrameFragments ); - memset( ppi->FragDiffPixels, 0, - ppi->ScanFrameFragments ); - memset( ppi->RowChangedPixels, 0, - 3* ppi->ScanConfig.VideoFrameHeight*sizeof(*ppi->RowChangedPixels)); - - memset( ppi->ScanDisplayFragments, BLOCK_NOT_CODED, ppi->ScanFrameFragments); - - /* Threshold used in setting up ppi->NoiseScoreBoostTable[] */ - StepThresh = (unsigned int)(ppi->SRFGreyThresh >> 1); - if ( StepThresh < MIN_STEP_THRESH ) - StepThresh = MIN_STEP_THRESH; - ppi->SrfThresh = (int)ppi->SRFGreyThresh; - - /* Set up various tables used to tweak pixel score values and - scoring rules based upon absolute value of a pixel change */ - for ( i = 0; i < 256; i++ ){ - /* Score multiplier table indexed by absolute difference. */ - ppi->AbsDiff_ScoreMultiplierTable[i] = (double)i * DiffDevisor; - if ( ppi->AbsDiff_ScoreMultiplierTable[i] < SCORE_MULT_LOW ) - ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_LOW; - else if ( ppi->AbsDiff_ScoreMultiplierTable[i] > SCORE_MULT_HIGH) - ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_HIGH; - - /* Table that facilitates a relaxation of the changed locals rules - in NoiseScoreRow() for pixels that have changed by a large - amount. */ - if ( i < (ppi->SrfThresh + StepThresh) ) - ppi->NoiseScoreBoostTable[i] = 0; - else if ( i < (ppi->SrfThresh + (StepThresh * 4)) ) - ppi->NoiseScoreBoostTable[i] = 1; - else if ( i < (ppi->SrfThresh + (StepThresh * 6)) ) - ppi->NoiseScoreBoostTable[i] = 2; - else - ppi->NoiseScoreBoostTable[i] = 3; - - } - - /* Set various other threshold parameters. */ - - /* Set variables that control access to the line search algorithms. */ - ppi->LineSearchTripTresh = 16; - if ( ppi->LineSearchTripTresh > ppi->PrimaryBlockThreshold ) - ppi->LineSearchTripTresh = (unsigned int)(ppi->PrimaryBlockThreshold + 1); - - /* Adjust line search length if block threshold low */ - ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN; - while ( (ppi->MaxLineSearchLen > 0) && - (LineLengthScores[ppi->MaxLineSearchLen-1] > - ppi->PrimaryBlockThreshold) ) - ppi->MaxLineSearchLen -= 1; - -} - -void ScanYUVInit( PP_INSTANCE * ppi, SCAN_CONFIG_DATA * ScanConfigPtr){ - int i; - - /* Set up the various imported data structure pointers. */ - ppi->ScanConfig.Yuv0ptr = ScanConfigPtr->Yuv0ptr; - ppi->ScanConfig.Yuv1ptr = ScanConfigPtr->Yuv1ptr; - ppi->ScanConfig.SrfWorkSpcPtr = ScanConfigPtr->SrfWorkSpcPtr; - ppi->ScanConfig.disp_fragments = ScanConfigPtr->disp_fragments; - - ppi->ScanConfig.RegionIndex = ScanConfigPtr->RegionIndex; - - ppi->ScanConfig.VideoFrameWidth = ScanConfigPtr->VideoFrameWidth; - ppi->ScanConfig.VideoFrameHeight = ScanConfigPtr->VideoFrameHeight; - - /* UV plane sizes. */ - ppi->VideoUVPlaneWidth = ScanConfigPtr->VideoFrameWidth / 2; - ppi->VideoUVPlaneHeight = ScanConfigPtr->VideoFrameHeight / 2; - - /* Note the size of each plane in pixels. */ - ppi->YFramePixels = ppi->ScanConfig.VideoFrameWidth * - ppi->ScanConfig.VideoFrameHeight; - ppi->UVFramePixels = ppi->VideoUVPlaneWidth * ppi->VideoUVPlaneHeight; - - /* Work out various fragment related values. */ - ppi->ScanYPlaneFragments = ppi->YFramePixels / - (HFRAGPIXELS * VFRAGPIXELS); - ppi->ScanUVPlaneFragments = ppi->UVFramePixels / - (HFRAGPIXELS * VFRAGPIXELS);; - ppi->ScanHFragments = ppi->ScanConfig.VideoFrameWidth / HFRAGPIXELS; - ppi->ScanVFragments = ppi->ScanConfig.VideoFrameHeight / VFRAGPIXELS; - ppi->ScanFrameFragments = ppi->ScanYPlaneFragments + - (2 * ppi->ScanUVPlaneFragments); - - PInitFrameInfo(ppi); - - /* Set up the scan pixel index table. */ - ScanCalcPixelIndexTable(ppi); - - /* Initialise the previous frame block history lists */ - for ( i = 0; i < MAX_PREV_FRAMES; i++ ) - memset( ppi->PrevFragments[i], BLOCK_NOT_CODED, ppi->ScanFrameFragments); - - /* YUVAnalyseFrame() is not called for the first frame in a sequence - (a key frame obviously). This memset insures that for the second - frame all blocks are marked for coding in line with the behaviour - for other key frames. */ - memset( ppi->PrevFragments[ppi->PrevFrameLimit-1], - BLOCK_CODED, ppi->ScanFrameFragments ); - - /* Initialise scan arrays */ - InitScanMapArrays(ppi); -} - -static void SetFromPrevious(PP_INSTANCE *ppi) { - unsigned int i,j; - - /* We buld up the list of previously updated blocks in the zero - index list of PrevFragments[] so we must start by reseting its - contents */ - memset( ppi->PrevFragments[0], BLOCK_NOT_CODED, ppi->ScanFrameFragments ); - - if ( ppi->PrevFrameLimit > 1 ){ - /* Now build up PrevFragments[0] from PrevFragments[1 to PrevFrameLimit] */ - for ( i = 0; i < ppi->ScanFrameFragments; i++ ){ - for ( j = 1; j < ppi->PrevFrameLimit; j++ ){ - if ( ppi->PrevFragments[j][i] > BLOCK_CODED_BAR ){ - ppi->PrevFragments[0][i] = BLOCK_CODED; - break; - } - } - } - } -} - -static void UpdatePreviousBlockLists(PP_INSTANCE *ppi) { - int i; - - /* Shift previous frame block lists along. */ - for ( i = ppi->PrevFrameLimit; i > 1; i-- ){ - memcpy( ppi->PrevFragments[i], ppi->PrevFragments[i-1], - ppi->ScanFrameFragments ); - } - - /* Now copy in this frames block list */ - memcpy( ppi->PrevFragments[1], ppi->ScanDisplayFragments, - ppi->ScanFrameFragments ); -} - -static void CreateOutputDisplayMap( PP_INSTANCE *ppi, - signed char *InternalFragmentsPtr, - signed char *RecentHistoryPtr, - unsigned char *ExternalFragmentsPtr ) { - ogg_uint32_t i; - ogg_uint32_t HistoryBlocksAdded = 0; - ogg_uint32_t YBand = (ppi->ScanYPlaneFragments/8); /* 1/8th of Y image. */ - - ppi->OutputBlocksUpdated = 0; - for ( i = 0; i < ppi->ScanFrameFragments; i++ ) { - if ( InternalFragmentsPtr[i] > BLOCK_NOT_CODED ) { - ppi->OutputBlocksUpdated ++; - ExternalFragmentsPtr[i] = 1; - }else if ( RecentHistoryPtr[i] == BLOCK_CODED ){ - HistoryBlocksAdded ++; - ExternalFragmentsPtr[i] = 1; - }else{ - ExternalFragmentsPtr[i] = 0; - } - } - - /* Add in a weighting for the history blocks that have been added */ - ppi->OutputBlocksUpdated += (HistoryBlocksAdded / HISTORY_BLOCK_FACTOR); - - /* Now calculate a key frame candidate indicator. This is based - upon Y data only and ignores the top and bottom 1/8 of the - image. Also ignore history blocks and BAR blocks. */ - ppi->KFIndicator = 0; - for ( i = YBand; i < (ppi->ScanYPlaneFragments - YBand); i++ ) - if ( InternalFragmentsPtr[i] > BLOCK_CODED_BAR ) - ppi->KFIndicator ++; - - /* Convert the KF score to a range 0-100 */ - ppi->KFIndicator = ((ppi->KFIndicator*100)/((ppi->ScanYPlaneFragments*3)/4)); -} - -static int RowSadScan( PP_INSTANCE *ppi, - unsigned char * YuvPtr1, - unsigned char * YuvPtr2, - signed char * DispFragPtr){ - ogg_int32_t i, j; - ogg_uint32_t GrpSad; - ogg_uint32_t LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh; - ogg_uint32_t LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh; - signed char *LocalDispFragPtr; - unsigned char *LocalYuvPtr1; - unsigned char *LocalYuvPtr2; - - int InterestingBlocksInRow = 0; - - /* For each row of pixels in the row of blocks */ - for ( j = 0; j < VFRAGPIXELS; j++ ){ - /* Set local block map pointer. */ - LocalDispFragPtr = DispFragPtr; - - /* Set the local pixel data pointers for this row.*/ - LocalYuvPtr1 = YuvPtr1; - LocalYuvPtr2 = YuvPtr2; - - /* Scan along the row of pixels If the block to which a group of - pixels belongs is already marked for update then do nothing. */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){ - /* Calculate the SAD score for the block row */ - GrpSad = dsp_row_sad8(ppi->dsp, LocalYuvPtr1,LocalYuvPtr2); - - /* Now test the group SAD score */ - if ( GrpSad > LocalGrpLowSadThresh ){ - /* If SAD very high we must update else we have candidate block */ - if ( GrpSad > LocalGrpHighSadThresh ){ - /* Force update */ - *LocalDispFragPtr = BLOCK_CODED; - }else{ - /* Possible Update required */ - *LocalDispFragPtr = CANDIDATE_BLOCK; - } - InterestingBlocksInRow = 1; - } - } - LocalDispFragPtr++; - - LocalYuvPtr1 += 8; - LocalYuvPtr2 += 8; - } - - /* Increment the base data pointers to the start of the next line. */ - YuvPtr1 += ppi->PlaneStride; - YuvPtr2 += ppi->PlaneStride; - } - - return InterestingBlocksInRow; - -} - -static int ColSadScan( PP_INSTANCE *ppi, - unsigned char * YuvPtr1, - unsigned char * YuvPtr2, - signed char * DispFragPtr ){ - ogg_int32_t i; - ogg_uint32_t MaxSad; - ogg_uint32_t LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh; - ogg_uint32_t LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh; - signed char * LocalDispFragPtr; - - unsigned char * LocalYuvPtr1; - unsigned char * LocalYuvPtr2; - - int InterestingBlocksInRow = 0; - - /* Set the local pixel data pointers for this row. */ - LocalYuvPtr1 = YuvPtr1; - LocalYuvPtr2 = YuvPtr2; - - /* Set local block map pointer. */ - LocalDispFragPtr = DispFragPtr; - - /* Scan along the row of blocks */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - /* Skip if block already marked to be coded. */ - if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){ - /* Calculate the SAD score for the block column */ - MaxSad = dsp_col_sad8x8(ppi->dsp, LocalYuvPtr1, LocalYuvPtr2, ppi->PlaneStride ); - - /* Now test the group SAD score */ - if ( MaxSad > LocalGrpLowSadThresh ){ - /* If SAD very high we must update else we have candidate block */ - if ( MaxSad > LocalGrpHighSadThresh ){ - /* Force update */ - *LocalDispFragPtr = BLOCK_CODED; - }else{ - /* Possible Update required */ - *LocalDispFragPtr = CANDIDATE_BLOCK; - } - InterestingBlocksInRow = 1; - } - } - - /* Increment the block map pointer. */ - LocalDispFragPtr++; - - /* Step data pointers on ready for next block */ - LocalYuvPtr1 += HFRAGPIXELS; - LocalYuvPtr2 += HFRAGPIXELS; - } - - return InterestingBlocksInRow; -} - -static void SadPass2( PP_INSTANCE *ppi, - ogg_int32_t RowNumber, - signed char * DispFragPtr ){ - ogg_int32_t i; - - /* First row */ - if ( RowNumber == 0 ) { - /* First block in row. */ - if ( DispFragPtr[0] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[1] == BLOCK_CODED) || - (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[0] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - - /* All but first and last in row */ - for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){ - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i+1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - } - - /* Last block in row. */ - i = ppi->PlaneHFragments-1; - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else if ( RowNumber < (ppi->PlaneVFragments - 1) ){ - /* General case */ - /* First block in row. */ - if ( DispFragPtr[0] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[1] == BLOCK_CODED) || - (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) || - (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED) || - (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[0] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - - /* All but first and last in row */ - for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){ - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i+1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - } - - /* Last block in row. */ - i = ppi->PlaneHFragments-1; - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - /* Last row */ - /* First block in row. */ - if ( DispFragPtr[0] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[1] == BLOCK_CODED) || - (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) || - (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED)){ - ppi->TmpCodedMap[0] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - }else{ - ppi->TmpCodedMap[0] = DispFragPtr[0]; - } - - /* All but first and last in row */ - for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){ - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i+1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - } - - /* Last block in row. */ - i = ppi->PlaneHFragments-1; - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( (DispFragPtr[i-1] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) || - (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ){ - ppi->TmpCodedMap[i] = BLOCK_CODED_LOW; - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - }else{ - ppi->TmpCodedMap[i] = DispFragPtr[i]; - } - } - - /* Now copy back the modified Fragment data */ - memcpy( &DispFragPtr[0], &ppi->TmpCodedMap[0], (ppi->PlaneHFragments) ); -} - -static unsigned char ApplyPakLowPass( PP_INSTANCE *ppi, - unsigned char * SrcPtr ){ - unsigned char * SrcPtr1 = SrcPtr - 1; - unsigned char * SrcPtr0 = SrcPtr1 - ppi->PlaneStride; /* Note the - use of - stride not - width. */ - unsigned char * SrcPtr2 = SrcPtr1 + ppi->PlaneStride; - - return (unsigned char)( ( (ogg_uint32_t)SrcPtr0[0] + - (ogg_uint32_t)SrcPtr0[1] + - (ogg_uint32_t)SrcPtr0[2] + - (ogg_uint32_t)SrcPtr1[0] + - (ogg_uint32_t)SrcPtr1[2] + - (ogg_uint32_t)SrcPtr2[0] + - (ogg_uint32_t)SrcPtr2[1] + - (ogg_uint32_t)SrcPtr2[2] ) >> 3 ); - -} - -static void RowDiffScan( PP_INSTANCE *ppi, - unsigned char * YuvPtr1, - unsigned char * YuvPtr2, - ogg_int16_t * YUVDiffsPtr, - unsigned char * bits_map_ptr, - signed char * SgcPtr, - signed char * DispFragPtr, - unsigned char * FDiffPixels, - ogg_int32_t * RowDiffsPtr, - unsigned char * ChLocalsPtr, int EdgeRow ){ - - ogg_int32_t i,j; - ogg_int32_t FragChangedPixels; - - ogg_int16_t Diff; /* Temp local workspace. */ - - /* Cannot use kernel if at edge or if PAK disabled */ - if ( (!ppi->PAKEnabled) || EdgeRow ){ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Reset count of pixels changed for the current fragment. */ - FragChangedPixels = 0; - - /* Test for break out conditions to save time. */ - if (*DispFragPtr == CANDIDATE_BLOCK){ - - /* Clear down entries in changed locals array */ - SET8_0(ChLocalsPtr); - - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* Take a local copy of the measured difference. */ - Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j]; - - /* Store the actual difference value */ - YUVDiffsPtr[j] = Diff; - - /* Test against the Level thresholds and record the results */ - SgcPtr[0] += ppi->SgcThreshTable[Diff+255]; - - /* Test against the SRF thresholds */ - bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255]; - FragChangedPixels += ppi->SrfThreshTable[Diff+255]; - } - }else{ - /* If we are breaking out here mark all pixels as changed. */ - if ( *DispFragPtr > BLOCK_NOT_CODED ){ - SET8_1(bits_map_ptr); - SET8_8(ChLocalsPtr); - }else{ - SET8_0(ChLocalsPtr); - } - } - - *RowDiffsPtr += FragChangedPixels; - *FDiffPixels += (unsigned char)FragChangedPixels; - - YuvPtr1 += HFRAGPIXELS; - YuvPtr2 += HFRAGPIXELS; - bits_map_ptr += HFRAGPIXELS; - ChLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - SgcPtr ++; - FDiffPixels ++; - - /* If we have a lot of changed pixels for this fragment on this - row then the fragment is almost sure to be picked (e.g. through - the line search) so we can mark it as selected and then ignore - it. */ - if (FragChangedPixels >= 7){ - *DispFragPtr = BLOCK_CODED_LOW; - } - DispFragPtr++; - } - }else{ - - /*************************************************************/ - /* First fragment of row !! */ - - i = 0; - /* Reset count of pixels changed for the current fragment. */ - FragChangedPixels = 0; - - /* Test for break out conditions to save time. */ - if (*DispFragPtr == CANDIDATE_BLOCK){ - /* Clear down entries in changed locals array */ - SET8_0(ChLocalsPtr); - - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* Take a local copy of the measured difference. */ - Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j]; - - /* Store the actual difference value */ - YUVDiffsPtr[j] = Diff; - - /* Test against the Level thresholds and record the results */ - SgcPtr[0] += ppi->SgcThreshTable[Diff+255]; - - if (j>0 && ppi->SrfPakThreshTable[Diff+255] ) - Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - - (int)ApplyPakLowPass( ppi, &YuvPtr2[j] ); - - /* Test against the SRF thresholds */ - bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255]; - FragChangedPixels += ppi->SrfThreshTable[Diff+255]; - } - }else{ - /* If we are breaking out here mark all pixels as changed. */ - if ( *DispFragPtr > BLOCK_NOT_CODED ){ - SET8_1(bits_map_ptr); - SET8_8(ChLocalsPtr); - }else{ - SET8_0(ChLocalsPtr); - } - } - - *RowDiffsPtr += FragChangedPixels; - *FDiffPixels += (unsigned char)FragChangedPixels; - - YuvPtr1 += HFRAGPIXELS; - YuvPtr2 += HFRAGPIXELS; - bits_map_ptr += HFRAGPIXELS; - ChLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - SgcPtr ++; - FDiffPixels ++; - - /* If we have a lot of changed pixels for this fragment on this - row then the fragment is almost sure to be picked - (e.g. through the line search) so we can mark it as selected - and then ignore it. */ - if (FragChangedPixels >= 7){ - *DispFragPtr = BLOCK_CODED_LOW; - } - DispFragPtr++; - /*************************************************************/ - /* Fragment in between!! */ - - for ( i = HFRAGPIXELS ; i < ppi->PlaneWidth-HFRAGPIXELS; - i += HFRAGPIXELS ){ - /* Reset count of pixels changed for the current fragment. */ - FragChangedPixels = 0; - - /* Test for break out conditions to save time. */ - if (*DispFragPtr == CANDIDATE_BLOCK){ - /* Clear down entries in changed locals array */ - SET8_0(ChLocalsPtr); - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* Take a local copy of the measured difference. */ - Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j]; - - /* Store the actual difference value */ - YUVDiffsPtr[j] = Diff; - - /* Test against the Level thresholds and record the results */ - SgcPtr[0] += ppi->SgcThreshTable[Diff+255]; - - if (ppi->SrfPakThreshTable[Diff+255] ) - Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - - (int)ApplyPakLowPass( ppi, &YuvPtr2[j] ); - - - /* Test against the SRF thresholds */ - bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255]; - FragChangedPixels += ppi->SrfThreshTable[Diff+255]; - } - }else{ - /* If we are breaking out here mark all pixels as changed. */ - if ( *DispFragPtr > BLOCK_NOT_CODED ){ - SET8_1(bits_map_ptr); - SET8_8(ChLocalsPtr); - }else{ - SET8_0(ChLocalsPtr); - } - } - - *RowDiffsPtr += FragChangedPixels; - *FDiffPixels += (unsigned char)FragChangedPixels; - - YuvPtr1 += HFRAGPIXELS; - YuvPtr2 += HFRAGPIXELS; - bits_map_ptr += HFRAGPIXELS; - ChLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - SgcPtr ++; - FDiffPixels ++; - - /* If we have a lot of changed pixels for this fragment on this - row then the fragment is almost sure to be picked - (e.g. through the line search) so we can mark it as selected - and then ignore it. */ - if (FragChangedPixels >= 7){ - *DispFragPtr = BLOCK_CODED_LOW; - } - DispFragPtr++; - } - /*************************************************************/ - /* Last fragment of row !! */ - - /* Reset count of pixels changed for the current fragment. */ - FragChangedPixels = 0; - - /* Test for break out conditions to save time. */ - if (*DispFragPtr == CANDIDATE_BLOCK){ - /* Clear down entries in changed locals array */ - SET8_0(ChLocalsPtr); - - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* Take a local copy of the measured difference. */ - Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j]; - - /* Store the actual difference value */ - YUVDiffsPtr[j] = Diff; - - /* Test against the Level thresholds and record the results */ - SgcPtr[0] += ppi->SgcThreshTable[Diff+255]; - - if (j<7 && ppi->SrfPakThreshTable[Diff+255] ) - Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - - (int)ApplyPakLowPass( ppi, &YuvPtr2[j] ); - - - /* Test against the SRF thresholds */ - bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255]; - FragChangedPixels += ppi->SrfThreshTable[Diff+255]; - } - }else{ - /* If we are breaking out here mark all pixels as changed.*/ - if ( *DispFragPtr > BLOCK_NOT_CODED ) { - SET8_1(bits_map_ptr); - SET8_8(ChLocalsPtr); - }else{ - SET8_0(ChLocalsPtr); - } - } - /* If we have a lot of changed pixels for this fragment on this - row then the fragment is almost sure to be picked (e.g. through - the line search) so we can mark it as selected and then ignore - it. */ - *RowDiffsPtr += FragChangedPixels; - *FDiffPixels += (unsigned char)FragChangedPixels; - - /* If we have a lot of changed pixels for this fragment on this - row then the fragment is almost sure to be picked (e.g. through - the line search) so we can mark it as selected and then ignore - it. */ - if (FragChangedPixels >= 7){ - *DispFragPtr = BLOCK_CODED_LOW; - } - DispFragPtr++; - - } -} - -static void ConsolidateDiffScanResults( PP_INSTANCE *ppi, - unsigned char * FDiffPixels, - signed char * SgcScoresPtr, - signed char * DispFragPtr ){ - ogg_int32_t i; - - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - /* Consider only those blocks that were candidates in the - difference scan. Ignore definite YES and NO cases. */ - if ( DispFragPtr[i] == CANDIDATE_BLOCK ){ - if ( ((ogg_uint32_t)abs(SgcScoresPtr[i]) > ppi->BlockSgcThresh) ){ - /* Block marked for update due to Sgc change */ - DispFragPtr[i] = BLOCK_CODED_SGC; - }else if ( FDiffPixels[i] == 0 ){ - /* Block is no longer a candidate for the main tests but will - still be considered a candidate in RowBarEnhBlockMap() */ - DispFragPtr[i] = CANDIDATE_BLOCK_LOW; - } - } - } -} - -static void RowChangedLocalsScan( PP_INSTANCE *ppi, - unsigned char * PixelMapPtr, - unsigned char * ChLocalsPtr, - signed char * DispFragPtr, - unsigned char RowType ){ - - unsigned char changed_locals = 0; - unsigned char * PixelsChangedPtr0; - unsigned char * PixelsChangedPtr1; - unsigned char * PixelsChangedPtr2; - ogg_int32_t i, j; - ogg_int32_t LastRowIndex = ppi->PlaneWidth - 1; - - /* Set up the line based pointers into the bits changed map. */ - PixelsChangedPtr0 = PixelMapPtr - ppi->PlaneWidth; - if ( PixelsChangedPtr0 < ppi->PixelChangedMap ) - PixelsChangedPtr0 += ppi->PixelMapCircularBufferSize; - PixelsChangedPtr0 -= 1; - - PixelsChangedPtr1 = PixelMapPtr - 1; - - PixelsChangedPtr2 = PixelMapPtr + ppi->PlaneWidth; - if ( PixelsChangedPtr2 >= - (ppi->PixelChangedMap + ppi->PixelMapCircularBufferSize) ) - PixelsChangedPtr2 -= ppi->PixelMapCircularBufferSize; - PixelsChangedPtr2 -= 1; - - if ( RowType == NOT_EDGE_ROW ){ - /* Scan through the row of pixels and calculate changed locals. */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Skip a group of 8 pixels if the assosciated fragment has no - pixels of interest. */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - changed_locals = 0; - - /* If the pixel itself has changed */ - if ( PixelsChangedPtr1[1] ){ - if ( (i > 0) || (j > 0) ){ - changed_locals += PixelsChangedPtr0[0]; - changed_locals += PixelsChangedPtr1[0]; - changed_locals += PixelsChangedPtr2[0]; - } - - changed_locals += PixelsChangedPtr0[1]; - changed_locals += PixelsChangedPtr2[1]; - - if ( (i + j) < LastRowIndex ){ - changed_locals += PixelsChangedPtr0[2]; - changed_locals += PixelsChangedPtr1[2]; - changed_locals += PixelsChangedPtr2[2]; - } - - /* Store the number of changed locals */ - *ChLocalsPtr |= changed_locals; - } - - /* Increment to next pixel in the row */ - ChLocalsPtr++; - PixelsChangedPtr0++; - PixelsChangedPtr1++; - PixelsChangedPtr2++; - } - }else{ - if ( *DispFragPtr > BLOCK_NOT_CODED ) - SET8_0(ChLocalsPtr); - - /* Step pointers */ - ChLocalsPtr += HFRAGPIXELS; - PixelsChangedPtr0 += HFRAGPIXELS; - PixelsChangedPtr1 += HFRAGPIXELS; - PixelsChangedPtr2 += HFRAGPIXELS; - } - - /* Move on to next fragment. */ - DispFragPtr++; - - } - }else{ - /* Scan through the row of pixels and calculate changed locals. */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Skip a group of 8 pixels if the assosciated fragment has no - pixels of interest */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - changed_locals = 0; - - /* If the pixel itself has changed */ - if ( PixelsChangedPtr1[1] ){ - if ( RowType == FIRST_ROW ){ - if ( (i > 0) || (j > 0) ){ - changed_locals += PixelsChangedPtr1[0]; - changed_locals += PixelsChangedPtr2[0]; - } - - changed_locals += PixelsChangedPtr2[1]; - - if ( (i + j) < LastRowIndex ){ - changed_locals += PixelsChangedPtr1[2]; - changed_locals += PixelsChangedPtr2[2]; - } - }else{ - if ( (i > 0) || (j > 0 ) ){ - changed_locals += PixelsChangedPtr0[0]; - changed_locals += PixelsChangedPtr1[0]; - } - - changed_locals += PixelsChangedPtr0[1]; - - if ( (i + j) < LastRowIndex ){ - changed_locals += PixelsChangedPtr0[2]; - changed_locals += PixelsChangedPtr1[2]; - } - } - - /* Store the number of changed locals */ - *ChLocalsPtr |= changed_locals; - } - - /* Increment to next pixel in the row */ - ChLocalsPtr++; - PixelsChangedPtr0++; - PixelsChangedPtr1++; - PixelsChangedPtr2++; - } - }else{ - if ( *DispFragPtr > BLOCK_NOT_CODED ) - SET8_0(ChLocalsPtr); - - /* Step pointers */ - ChLocalsPtr += HFRAGPIXELS; - PixelsChangedPtr0 += HFRAGPIXELS; - PixelsChangedPtr1 += HFRAGPIXELS; - PixelsChangedPtr2 += HFRAGPIXELS; - } - - /* Move on to next fragment. */ - DispFragPtr++; - } - } -} - -static void NoiseScoreRow( PP_INSTANCE *ppi, - unsigned char * PixelMapPtr, - unsigned char * ChLocalsPtr, - ogg_int16_t * YUVDiffsPtr, - unsigned char * PixelNoiseScorePtr, - ogg_uint32_t * FragScorePtr, - signed char * DispFragPtr, - ogg_int32_t * RowDiffsPtr ){ - ogg_int32_t i,j; - unsigned char changed_locals = 0; - ogg_int32_t Score; - ogg_uint32_t FragScore; - ogg_int32_t AbsDiff; - - /* For each pixel in the row */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Skip a group of 8 pixels if the assosciated fragment has no - pixels of interest. */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - /* Reset the cumulative fragment score. */ - FragScore = 0; - - /* Pixels grouped along the row into fragments */ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - if ( PixelMapPtr[j] ){ - AbsDiff = (ogg_int32_t)( abs(YUVDiffsPtr[j]) ); - changed_locals = ChLocalsPtr[j]; - - /* Give this pixel a score based on changed locals and level - of its own change. */ - Score = (1 + ((ogg_int32_t)(changed_locals + - ppi->NoiseScoreBoostTable[AbsDiff]) - - ppi->NoiseSupLevel)); - - /* For no zero scores adjust by a level based score multiplier. */ - if ( Score > 0 ){ - Score = ((double)Score * - ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] ); - if ( Score < 1 ) - Score = 1; - }else{ - /* Set -ve values to 0 */ - Score = 0; - - /* If there are no changed locals then clear the pixel - changed flag and decrement the pixels changed in - fragment count to speed later stages. */ - if ( changed_locals == 0 ){ - PixelMapPtr[j] = 0; - *RowDiffsPtr -= 1; - } - } - - /* Update the pixel scores etc. */ - PixelNoiseScorePtr[j] = (unsigned char)Score; - FragScore += (ogg_uint32_t)Score; - } - } - - /* Add fragment score (with plane correction factor) into main - data structure */ - *FragScorePtr += (ogg_int32_t)(FragScore * - ppi->YUVPlaneCorrectionFactor); - - /* If score is greater than trip threshold then mark blcok for update. */ - if ( *FragScorePtr > ppi->BlockThreshold ){ - *DispFragPtr = BLOCK_CODED_LOW; - } - } - - /* Increment the various pointers */ - FragScorePtr++; - DispFragPtr++; - PixelNoiseScorePtr += HFRAGPIXELS; - PixelMapPtr += HFRAGPIXELS; - ChLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - } -} - -static void PrimaryEdgeScoreRow( PP_INSTANCE *ppi, - unsigned char * ChangedLocalsPtr, - ogg_int16_t * YUVDiffsPtr, - unsigned char * PixelNoiseScorePtr, - ogg_uint32_t * FragScorePtr, - signed char * DispFragPtr, - unsigned char RowType ){ - ogg_uint32_t BodyNeighbours; - ogg_uint32_t AbsDiff; - unsigned char changed_locals = 0; - ogg_int32_t Score; - ogg_uint32_t FragScore; - unsigned char * CHLocalsPtr0; - unsigned char * CHLocalsPtr1; - unsigned char * CHLocalsPtr2; - ogg_int32_t i,j; - ogg_int32_t LastRowIndex = ppi->PlaneWidth - 1; - - /* Set up pointers into the current previous and next row of the - changed locals data structure. */ - CHLocalsPtr0 = ChangedLocalsPtr - ppi->PlaneWidth; - if ( CHLocalsPtr0 < ppi->ChLocals ) - CHLocalsPtr0 += ppi->ChLocalsCircularBufferSize; - CHLocalsPtr0 -= 1; - - CHLocalsPtr1 = ChangedLocalsPtr - 1; - - CHLocalsPtr2 = ChangedLocalsPtr + ppi->PlaneWidth; - if ( CHLocalsPtr2 >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) ) - CHLocalsPtr2 -= ppi->ChLocalsCircularBufferSize; - CHLocalsPtr2 -= 1; - - - /* The defining rule used here is as follows. */ - /* An edge pixels has 3-5 changed locals. */ - /* And one or more of these changed locals has itself got 7-8 - changed locals. */ - - if ( RowType == NOT_EDGE_ROW ){ - /* Loop for all pixels in the row. */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Does the fragment contain anything interesting to work with. */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - /* Reset the cumulative fragment score. */ - FragScore = 0; - - /* Pixels grouped along the row into fragments */ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* How many changed locals has the current pixel got. */ - changed_locals = ChangedLocalsPtr[j]; - - /* Is the pixel a suitable candidate */ - if ( (changed_locals > 2) && (changed_locals < 6) ){ - /* The pixel may qualify... have a closer look. */ - BodyNeighbours = 0; - - /* Count the number of "BodyNeighbours" .. Pixels that - have 7 or more changed neighbours. */ - if ( (i > 0) || (j > 0 ) ){ - if ( CHLocalsPtr0[0] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr1[0] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr2[0] >= 7 ) - BodyNeighbours++; - } - - if ( CHLocalsPtr0[1] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr2[1] >= 7 ) - BodyNeighbours++; - - if ( (i + j) < LastRowIndex ){ - if ( CHLocalsPtr0[2] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr1[2] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr2[2] >= 7 ) - BodyNeighbours++; - } - - if ( BodyNeighbours > 0 ){ - AbsDiff = abs( YUVDiffsPtr[j] ); - Score = (ogg_int32_t) - ( (double)(BodyNeighbours * - BodyNeighbourScore) * - ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] ); - if ( Score < 1 ) - Score = 1; - - /* Increment the score by a value determined by the - number of body neighbours. */ - PixelNoiseScorePtr[j] += (unsigned char)Score; - FragScore += (ogg_uint32_t)Score; - } - } - - /* Increment pointers into changed locals buffer */ - CHLocalsPtr0 ++; - CHLocalsPtr1 ++; - CHLocalsPtr2 ++; - } - - /* Add fragment score (with plane correction factor) into main - data structure */ - *FragScorePtr += (ogg_int32_t)(FragScore * - ppi->YUVPlaneCorrectionFactor); - - /* If score is greater than trip threshold then mark blcok for - update. */ - if ( *FragScorePtr > ppi->BlockThreshold ){ - *DispFragPtr = BLOCK_CODED_LOW; - } - - }else{ - /* Nothing to do for this fragment group */ - /* Advance pointers into changed locals buffer */ - CHLocalsPtr0 += HFRAGPIXELS; - CHLocalsPtr1 += HFRAGPIXELS; - CHLocalsPtr2 += HFRAGPIXELS; - } - - /* Increment the various pointers */ - FragScorePtr++; - DispFragPtr++; - PixelNoiseScorePtr += HFRAGPIXELS; - ChangedLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - } - }else{ - /* This is either the top or bottom row of pixels in a plane. */ - /* Loop for all pixels in the row. */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Does the fragment contain anything interesting to work with. */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - /* Reset the cumulative fragment score. */ - FragScore = 0; - - /* Pixels grouped along the row into fragments */ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* How many changed locals has the current pixel got. */ - changed_locals = ChangedLocalsPtr[j]; - - /* Is the pixel a suitable candidate */ - if ( (changed_locals > 2) && (changed_locals < 6) ){ - /* The pixel may qualify... have a closer look. */ - BodyNeighbours = 0; - - /* Count the number of "BodyNeighbours" .. Pixels - that have 7 or more changed neighbours. */ - if ( RowType == LAST_ROW ){ - /* Test for cases where it could be the first pixel on - the line */ - if ( (i > 0) || (j > 0) ){ - if ( CHLocalsPtr0[0] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr1[0] >= 7 ) - BodyNeighbours++; - } - - if ( CHLocalsPtr0[1] >= 7 ) - BodyNeighbours++; - - /* Test for the end of line case */ - if ( (i + j) < LastRowIndex ){ - if ( CHLocalsPtr0[2] >= 7 ) - BodyNeighbours++; - - if ( CHLocalsPtr1[2] >= 7 ) - BodyNeighbours++; - } - }else{ - /* First Row */ - /* Test for cases where it could be the first pixel on - the line */ - if ( (i > 0) || (j > 0) ){ - if ( CHLocalsPtr1[0] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr2[0] >= 7 ) - BodyNeighbours++; - } - - /* Test for the end of line case */ - if ( CHLocalsPtr2[1] >= 7 ) - BodyNeighbours++; - - if ( (i + j) < LastRowIndex ){ - if ( CHLocalsPtr1[2] >= 7 ) - BodyNeighbours++; - if ( CHLocalsPtr2[2] >= 7 ) - BodyNeighbours++; - } - } - - /* Allocate a score according to the number of Body neighbours. */ - if ( BodyNeighbours > 0 ){ - AbsDiff = abs( YUVDiffsPtr[j] ); - Score = (ogg_int32_t) - ( (double)(BodyNeighbours * BodyNeighbourScore) * - ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] ); - if ( Score < 1 ) - Score = 1; - - PixelNoiseScorePtr[j] += (unsigned char)Score; - FragScore += (ogg_uint32_t)Score; - } - } - - /* Increment pointers into changed locals buffer */ - CHLocalsPtr0 ++; - CHLocalsPtr1 ++; - CHLocalsPtr2 ++; - } - - /* Add fragment score (with plane correction factor) into main - data structure */ - *FragScorePtr += - (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor); - - /* If score is greater than trip threshold then mark blcok for - update. */ - if ( *FragScorePtr > ppi->BlockThreshold ){ - *DispFragPtr = BLOCK_CODED_LOW; - } - - }else{ - /* Nothing to do for this fragment group */ - /* Advance pointers into changed locals buffer */ - CHLocalsPtr0 += HFRAGPIXELS; - CHLocalsPtr1 += HFRAGPIXELS; - CHLocalsPtr2 += HFRAGPIXELS; - } - - /* Increment the various pointers */ - FragScorePtr++; - DispFragPtr++; - PixelNoiseScorePtr += HFRAGPIXELS; - ChangedLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - } - } -} - -static void PixelLineSearch( PP_INSTANCE *ppi, - unsigned char * ChangedLocalsPtr, - ogg_int32_t RowNumber, - ogg_int32_t ColNumber, - unsigned char direction, - ogg_uint32_t * line_length ){ - /* Exit if the pixel does not qualify or we have fallen off the edge - of either the image plane or the row. */ - if ( (RowNumber < 0) || - (RowNumber >= ppi->PlaneHeight) || - (ColNumber < 0) || - (ColNumber >= ppi->PlaneWidth) || - ((*ChangedLocalsPtr) <= 1) || - ((*ChangedLocalsPtr) >= 6) ){ - /* If not then it isn't part of any line. */ - return; - } - - if (*line_length < ppi->MaxLineSearchLen){ - ogg_uint32_t TmpLineLength; - ogg_uint32_t BestLineLength; - unsigned char * search_ptr; - - /* Increment the line length to include this pixel. */ - *line_length += 1; - BestLineLength = *line_length; - - /* Continue search */ - /* up */ - if ( direction == UP ){ - TmpLineLength = *line_length; - - search_ptr = ChangedLocalsPtr - ppi->PlaneWidth; - if ( search_ptr < ppi->ChLocals ) - search_ptr += ppi->ChLocalsCircularBufferSize; - - PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* up and left */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && - ((direction == UP) || (direction == LEFT)) ){ - TmpLineLength = *line_length; - - search_ptr = ChangedLocalsPtr - ppi->PlaneWidth; - if ( search_ptr < ppi->ChLocals ) - search_ptr += ppi->ChLocalsCircularBufferSize; - search_ptr -= 1; - - PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber - 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* up and right */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && - ((direction == UP) || (direction == RIGHT)) ){ - TmpLineLength = *line_length; - - search_ptr = ChangedLocalsPtr - ppi->PlaneWidth; - if ( search_ptr < ppi->ChLocals ) - search_ptr += ppi->ChLocalsCircularBufferSize; - search_ptr += 1; - - PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber + 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* left */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == LEFT ) ){ - TmpLineLength = *line_length; - PixelLineSearch( ppi, ChangedLocalsPtr - 1, RowNumber, ColNumber - 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* right */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == RIGHT ) ){ - TmpLineLength = *line_length; - PixelLineSearch( ppi, ChangedLocalsPtr + 1, RowNumber, ColNumber + 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* Down */ - if ( BestLineLength < ppi->MaxLineSearchLen ){ - TmpLineLength = *line_length; - if ( direction == DOWN ){ - search_ptr = ChangedLocalsPtr + ppi->PlaneWidth; - if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) ) - search_ptr -= ppi->ChLocalsCircularBufferSize; - - PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber, direction, - &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - - /* down and left */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && - ((direction == DOWN) || (direction == LEFT)) ){ - TmpLineLength = *line_length; - - search_ptr = ChangedLocalsPtr + ppi->PlaneWidth; - if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) ) - search_ptr -= ppi->ChLocalsCircularBufferSize; - search_ptr -= 1; - - PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber - 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - - /* down and right */ - if ( (BestLineLength < ppi->MaxLineSearchLen) && - ((direction == DOWN) || (direction == RIGHT)) ){ - TmpLineLength = *line_length; - - search_ptr = ChangedLocalsPtr + ppi->PlaneWidth; - if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) ) - search_ptr -= ppi->ChLocalsCircularBufferSize; - search_ptr += 1; - - PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber + 1, - direction, &TmpLineLength ); - - if ( TmpLineLength > BestLineLength ) - BestLineLength = TmpLineLength; - } - } - - /* Note the search value for this pixel. */ - *line_length = BestLineLength; - } -} - -static unsigned char LineSearchScorePixel( PP_INSTANCE *ppi, - unsigned char * ChangedLocalsPtr, - ogg_int32_t RowNumber, - ogg_int32_t ColNumber ){ - ogg_uint32_t line_length = 0; - ogg_uint32_t line_length2 = 0; - ogg_uint32_t line_length_score = 0; - ogg_uint32_t tmp_line_length = 0; - ogg_uint32_t tmp_line_length2 = 0; - - /* Look UP and Down */ - PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, - ColNumber, UP, &tmp_line_length ); - - if (tmp_line_length < ppi->MaxLineSearchLen) { - /* Look DOWN */ - PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, - ColNumber, DOWN, &tmp_line_length2 ); - line_length = tmp_line_length + tmp_line_length2 - 1; - - if ( line_length > ppi->MaxLineSearchLen ) - line_length = ppi->MaxLineSearchLen; - }else - line_length = tmp_line_length; - - /* If no max length line found then look left and right */ - if ( line_length < ppi->MaxLineSearchLen ){ - tmp_line_length = 0; - tmp_line_length2 = 0; - - PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, - ColNumber, LEFT, &tmp_line_length ); - if (tmp_line_length < ppi->MaxLineSearchLen){ - PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, - ColNumber, RIGHT, &tmp_line_length2 ); - line_length2 = tmp_line_length + tmp_line_length2 - 1; - - if ( line_length2 > ppi->MaxLineSearchLen ) - line_length2 = ppi->MaxLineSearchLen; - }else - line_length2 = tmp_line_length; - - } - - /* Take the largest line length */ - if ( line_length2 > line_length ) - line_length = line_length2; - - /* Create line length score */ - line_length_score = LineLengthScores[line_length]; - - return (unsigned char)line_length_score; -} - -static void LineSearchScoreRow( PP_INSTANCE *ppi, - unsigned char * ChangedLocalsPtr, - ogg_int16_t * YUVDiffsPtr, - unsigned char * PixelNoiseScorePtr, - ogg_uint32_t * FragScorePtr, - signed char * DispFragPtr, - ogg_int32_t RowNumber ){ - ogg_uint32_t AbsDiff; - unsigned char changed_locals = 0; - ogg_int32_t Score; - ogg_uint32_t FragScore; - ogg_int32_t i,j; - - /* The defining rule used here is as follows. */ - /* An edge pixels has 2-5 changed locals. */ - /* And one or more of these changed locals has itself got 7-8 - changed locals. */ - - /* Loop for all pixels in the row. */ - for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){ - /* Does the fragment contain anything interesting to work with. */ - if ( *DispFragPtr == CANDIDATE_BLOCK ){ - /* Reset the cumulative fragment score. */ - FragScore = 0; - - /* Pixels grouped along the row into fragments */ - for ( j = 0; j < HFRAGPIXELS; j++ ){ - /* How many changed locals has the current pixel got. */ - changed_locals = ChangedLocalsPtr[j]; - - /* Is the pixel a suitable candidate for edge enhancement */ - if ( (changed_locals > 1) && (changed_locals < 6) && - (PixelNoiseScorePtr[j] < ppi->LineSearchTripTresh) ) { - Score = (ogg_int32_t) - LineSearchScorePixel( ppi, &ChangedLocalsPtr[j], RowNumber, i+j ); - - if ( Score ){ - AbsDiff = abs( YUVDiffsPtr[j] ); - Score = (ogg_int32_t) - ( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] ); - if ( Score < 1 ) - Score = 1; - - PixelNoiseScorePtr[j] += (unsigned char)Score; - FragScore += (ogg_uint32_t)Score; - } - } - } - - /* Add fragment score (with plane correction factor) into main - data structure */ - *FragScorePtr += - (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor); - - /* If score is greater than trip threshold then mark blcok for update. */ - if ( *FragScorePtr > ppi->BlockThreshold ){ - *DispFragPtr = BLOCK_CODED_LOW; - } - } - - /* Increment the various pointers */ - FragScorePtr++; - DispFragPtr++; - PixelNoiseScorePtr += HFRAGPIXELS; - ChangedLocalsPtr += HFRAGPIXELS; - YUVDiffsPtr += HFRAGPIXELS; - - } -} - -static void RowCopy( PP_INSTANCE *ppi, ogg_uint32_t BlockMapIndex ){ - - ogg_uint32_t i,j; - - ogg_uint32_t PixelIndex = ppi->ScanPixelIndexTable[BlockMapIndex]; - signed char * BlockMapPtr = &ppi->ScanDisplayFragments[BlockMapIndex]; - signed char * PrevFragmentsPtr = &ppi->PrevFragments[0][BlockMapIndex]; - - unsigned char * SourcePtr; - unsigned char * DestPtr; - - /* Copy pixels from changed blocks back to reference frame. */ - for ( i = 0; i < (ogg_uint32_t)ppi->PlaneHFragments; i ++ ){ - /* If the fragement is marked for update or was recently marked - for update (PrevFragmentsPtr[i]) */ - if ( (BlockMapPtr[i] > BLOCK_NOT_CODED) || - (PrevFragmentsPtr[i] == BLOCK_CODED) ){ - /* Set up the various pointers required. */ - SourcePtr = &ppi->ScanConfig.Yuv1ptr[PixelIndex]; - DestPtr = &ppi->ScanConfig.SrfWorkSpcPtr[PixelIndex]; - - /* For each row of the block */ - for ( j = 0; j < VFRAGPIXELS; j++ ){ - /* Copy the data unaltered from source to destination */ - memcpy(DestPtr,SourcePtr,8); - - /* Increment pointers for next line in the block */ - SourcePtr += ppi->PlaneWidth; - DestPtr += ppi->PlaneWidth; - } - } - - /* Increment pixel index for next block. */ - PixelIndex += HFRAGPIXELS; - } -} - -static void RowBarEnhBlockMap( PP_INSTANCE *ppi, - signed char * UpdatedBlockMapPtr, - signed char * BarBlockMapPtr, - ogg_uint32_t RowNumber ){ - int i; - - /* Start by blanking the row in the bar block map structure. */ - memset( BarBlockMapPtr, BLOCK_NOT_CODED, ppi->PlaneHFragments ); - - /* First row */ - if ( RowNumber == 0 ){ - - /* For each fragment in the row. */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW. Uncoded or - coded blocks will be ignored. */ - if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){ - /* Is one of the immediate neighbours updated in the main map. */ - /* Note special cases for blocks at the start and end of rows. */ - if ( i == 0 ){ - - if ((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1]>BLOCK_NOT_CODED ) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - - - }else if ( i == (ppi->PlaneHFragments - 1) ){ - - if ((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1]>BLOCK_NOT_CODED) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - - }else{ - if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - } - } - } - - } else if ( RowNumber == (ogg_uint32_t)(ppi->PlaneVFragments-1)) { - - /* Last row */ - /* Used to read PlaneHFragments */ - - /* For each fragment in the row. */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW - Uncoded or coded blocks will be ignored. */ - if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){ - /* Is one of the immediate neighbours updated in the main map. */ - /* Note special cases for blocks at the start and end of rows. */ - if ( i == 0 ){ - if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED )) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - - }else if ( i == (ppi->PlaneHFragments - 1) ){ - if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - }else{ - if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - } - } - } - - }else{ - /* All other rows */ - /* For each fragment in the row. */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW */ - /* Uncoded or coded blocks will be ignored. */ - if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){ - /* Is one of the immediate neighbours updated in the main map. */ - /* Note special cases for blocks at the start and end of rows. */ - if ( i == 0 ){ - - if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - - }else if ( i == (ppi->PlaneHFragments - 1) ){ - - if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - - }else{ - if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)|| - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) || - (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED )) - BarBlockMapPtr[i] = BLOCK_CODED_BAR; - } - } - } - } -} - -static void BarCopyBack( PP_INSTANCE *ppi, - signed char * UpdatedBlockMapPtr, - signed char * BarBlockMapPtr ){ - ogg_int32_t i; - - /* For each fragment in the row. */ - for ( i = 0; i < ppi->PlaneHFragments; i ++ ){ - if ( BarBlockMapPtr[i] > BLOCK_NOT_CODED ){ - UpdatedBlockMapPtr[i] = BarBlockMapPtr[i]; - } - } -} - -static void AnalysePlane( PP_INSTANCE *ppi, - unsigned char * PlanePtr0, - unsigned char * PlanePtr1, - ogg_uint32_t FragArrayOffset, - ogg_uint32_t PWidth, - ogg_uint32_t PHeight, - ogg_uint32_t PStride ) { - unsigned char * RawPlanePtr0; - unsigned char * RawPlanePtr1; - - ogg_int16_t * YUVDiffsPtr; - ogg_int16_t * YUVDiffsPtr1; - ogg_int16_t * YUVDiffsPtr2; - - ogg_uint32_t FragIndex; - ogg_uint32_t ScoreFragIndex1; - ogg_uint32_t ScoreFragIndex2; - ogg_uint32_t ScoreFragIndex3; - ogg_uint32_t ScoreFragIndex4; - - int UpdatedOrCandidateBlocks = 0; - - unsigned char * ChLocalsPtr0; - unsigned char * ChLocalsPtr1; - unsigned char * ChLocalsPtr2; - - unsigned char * PixelsChangedPtr0; - unsigned char * PixelsChangedPtr1; - - unsigned char * PixelScoresPtr1; - unsigned char * PixelScoresPtr2; - - signed char * DispFragPtr0; - signed char * DispFragPtr1; - signed char * DispFragPtr2; - - ogg_uint32_t * FragScoresPtr1; - ogg_uint32_t * FragScoresPtr2; - - ogg_int32_t * RowDiffsPtr; - ogg_int32_t * RowDiffsPtr1; - ogg_int32_t * RowDiffsPtr2; - - ogg_int32_t i,j; - - ogg_int32_t RowNumber1; - ogg_int32_t RowNumber2; - ogg_int32_t RowNumber3; - ogg_int32_t RowNumber4; - - int EdgeRow; - ogg_int32_t LineSearchRowNumber = 0; - - /* Variables used as temporary stores for frequently used values. */ - ogg_int32_t Row0Mod3; - ogg_int32_t Row1Mod3; - ogg_int32_t Row2Mod3; - ogg_int32_t BlockRowPixels; - - /* Set pixel difference threshold */ - if ( FragArrayOffset == 0 ){ - /* Luminance */ - ppi->LevelThresh = (int)ppi->SgcLevelThresh; - ppi->NegLevelThresh = -ppi->LevelThresh; - - ppi->SrfThresh = (int)ppi->SRFGreyThresh; - ppi->NegSrfThresh = -ppi->SrfThresh; - - /* Scores correction for Y pixels. */ - ppi->YUVPlaneCorrectionFactor = 1.0; - - ppi->BlockThreshold = ppi->PrimaryBlockThreshold; - ppi->BlockSgcThresh = ppi->SgcThresh; - }else{ - /* Chrominance */ - ppi->LevelThresh = (int)ppi->SuvcLevelThresh; - ppi->NegLevelThresh = -ppi->LevelThresh; - - ppi->SrfThresh = (int)ppi->SRFColThresh; - ppi->NegSrfThresh = -ppi->SrfThresh; - - /* Scores correction for UV pixels. */ - ppi->YUVPlaneCorrectionFactor = 1.5; - - /* Block threholds different for subsampled U and V blocks */ - ppi->BlockThreshold = - (ppi->PrimaryBlockThreshold / ppi->UVBlockThreshCorrection); - ppi->BlockSgcThresh = - (ppi->SgcThresh / ppi->UVSgcCorrection); - } - - /* Initialise the SRF thresh table and pointer. */ - memset( ppi->SrfThreshTable, 1, 512 ); - for ( i = ppi->NegSrfThresh; i <= ppi->SrfThresh; i++ ) - ppi->SrfThreshTable[i+255] = 0; - - /* Initialise the PAK thresh table. */ - for ( i = -255; i <= 255; i++ ) - if ( ppi->SrfThreshTable[i+255] && - (i <= ppi->HighChange) && - (i >= ppi->NegHighChange) ) - ppi->SrfPakThreshTable[i+255] = 1; - else - ppi->SrfPakThreshTable[i+255] = 0; - - /* Initialise the SGc lookup table */ - for ( i = -255; i <= 255; i++ ){ - if ( i <= ppi->NegLevelThresh ) - ppi->SgcThreshTable[i+255] = (unsigned char) -1; - else if ( i >= ppi->LevelThresh ) - ppi->SgcThreshTable[i+255] = 1; - else - ppi->SgcThreshTable[i+255] = 0; - } - - /* Set up plane dimension variables */ - ppi->PlaneHFragments = PWidth / HFRAGPIXELS; - ppi->PlaneVFragments = PHeight / VFRAGPIXELS; - ppi->PlaneWidth = PWidth; - ppi->PlaneHeight = PHeight; - ppi->PlaneStride = PStride; - - /* Set up local pointers into the raw image data. */ - RawPlanePtr0 = PlanePtr0; - RawPlanePtr1 = PlanePtr1; - - /* Note size and endo points for circular buffers. */ - ppi->YuvDiffsCircularBufferSize = YDIFF_CB_ROWS * ppi->PlaneWidth; - ppi->ChLocalsCircularBufferSize = CHLOCALS_CB_ROWS * ppi->PlaneWidth; - ppi->PixelMapCircularBufferSize = PMAP_CB_ROWS * ppi->PlaneWidth; - - /* Set high change thresh where PAK not needed */ - ppi->HighChange = ppi->SrfThresh * 4; - ppi->NegHighChange = -ppi->HighChange; - - /* Set up row difference pointers. */ - RowDiffsPtr = ppi->RowChangedPixels; - RowDiffsPtr1 = ppi->RowChangedPixels; - RowDiffsPtr2 = ppi->RowChangedPixels; - - BlockRowPixels = ppi->PlaneWidth * VFRAGPIXELS; - - for ( i = 0; i < (ppi->PlaneVFragments + 4); i++ ){ - RowNumber1 = (i - 1); - RowNumber2 = (i - 2); - RowNumber3 = (i - 3); - RowNumber4 = (i - 4); - - /* Pre calculate some frequently used values */ - Row0Mod3 = i % 3; - Row1Mod3 = RowNumber1 % 3; - Row2Mod3 = RowNumber2 % 3; - - /* For row diff scan last two iterations are invalid */ - if ( i < ppi->PlaneVFragments ){ - FragIndex = (i * ppi->PlaneHFragments) + FragArrayOffset; - YUVDiffsPtr = &ppi->yuv_differences[Row0Mod3 * BlockRowPixels]; - - PixelsChangedPtr0 = (&ppi->PixelChangedMap[Row0Mod3 * BlockRowPixels]); - DispFragPtr0 = &ppi->ScanDisplayFragments[FragIndex]; - - ChLocalsPtr0 = (&ppi->ChLocals[Row0Mod3 * BlockRowPixels]); - - } - - /* Set up the changed locals pointer to trail behind by one row of - fragments. */ - if ( i > 0 ){ - /* For last iteration the ch locals and noise scans are invalid */ - if ( RowNumber1 < ppi->PlaneVFragments ){ - ScoreFragIndex1 = (RowNumber1 * ppi->PlaneHFragments) + - FragArrayOffset; - - ChLocalsPtr1 = &ppi->ChLocals[Row1Mod3 * BlockRowPixels]; - PixelsChangedPtr1 = - &ppi->PixelChangedMap[(Row1Mod3) * BlockRowPixels]; - - PixelScoresPtr1 = &ppi->PixelScores[(RowNumber1 % 4) * BlockRowPixels]; - - YUVDiffsPtr1 = &ppi->yuv_differences[Row1Mod3 * BlockRowPixels]; - FragScoresPtr1 = &ppi->FragScores[ScoreFragIndex1]; - DispFragPtr1 = &ppi->ScanDisplayFragments[ScoreFragIndex1]; - - } - - if ( RowNumber2 >= 0 ){ - ScoreFragIndex2 = (RowNumber2 * ppi->PlaneHFragments) + - FragArrayOffset; - ChLocalsPtr2 = (&ppi->ChLocals[Row2Mod3 * BlockRowPixels]); - YUVDiffsPtr2 = &ppi->yuv_differences[Row2Mod3 * BlockRowPixels]; - - PixelScoresPtr2 = &ppi->PixelScores[(RowNumber2 % 4) * BlockRowPixels]; - - FragScoresPtr2 = &ppi->FragScores[ScoreFragIndex2]; - DispFragPtr2 = &ppi->ScanDisplayFragments[ScoreFragIndex2]; - }else{ - ChLocalsPtr2 = NULL; - } - }else{ - ChLocalsPtr1 = NULL; - ChLocalsPtr2 = NULL; - } - - /* Fast break out test for obvious yes and no cases in this row of - blocks */ - if ( i < ppi->PlaneVFragments ){ - dsp_save_fpu (ppi->dsp); - UpdatedOrCandidateBlocks = - RowSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 ); - UpdatedOrCandidateBlocks |= - ColSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 ); - dsp_restore_fpu (ppi->dsp); - }else{ - /* Make sure we still call other functions if RowSadScan() disabled */ - UpdatedOrCandidateBlocks = 1; - } - - /* Consolidation and fast break ot tests at Row 1 level */ - if ( (i > 0) && (RowNumber1 < ppi->PlaneVFragments) ){ - /* Mark as coded any candidate block that lies adjacent to a - coded block. */ - SadPass2( ppi, RowNumber1, DispFragPtr1 ); - - /* Check results of diff scan in last set of blocks. */ - /* Eliminate NO cases and add in +SGC cases */ - ConsolidateDiffScanResults( ppi, &ppi->FragDiffPixels[ScoreFragIndex1], - &ppi->SameGreyDirPixels[ScoreFragIndex1], - DispFragPtr1 - ); - } - - for ( j = 0; j < VFRAGPIXELS; j++ ){ - /* Last two iterations do not apply */ - if ( i < ppi->PlaneVFragments ){ - /* Is the current fragment at an edge. */ - EdgeRow = ( ( (i == 0) && (j == 0) ) || - ( (i == (ppi->PlaneVFragments - 1)) && - (j == (VFRAGPIXELS - 1)) ) ); - - /* Clear the arrays that will be used for the changed pixels maps */ - memset( PixelsChangedPtr0, 0, ppi->PlaneWidth ); - - /* Difference scan and map each row */ - if ( UpdatedOrCandidateBlocks ){ - /* Scan the row for interesting differences */ - /* Also clear the array that will be used for changed locals map */ - RowDiffScan( ppi, RawPlanePtr0, RawPlanePtr1, - YUVDiffsPtr, PixelsChangedPtr0, - &ppi->SameGreyDirPixels[FragIndex], - DispFragPtr0, &ppi->FragDiffPixels[FragIndex], - RowDiffsPtr, ChLocalsPtr0, EdgeRow); - }else{ - /* Clear the array that will be used for changed locals map */ - memset( ChLocalsPtr0, 0, ppi->PlaneWidth ); - } - - /* The actual image plane pointers must be incremented by - stride as this may be different (more) than the plane - width. Our own internal buffers use ppi->PlaneWidth. */ - RawPlanePtr0 += ppi->PlaneStride; - RawPlanePtr1 += ppi->PlaneStride; - PixelsChangedPtr0 += ppi->PlaneWidth; - ChLocalsPtr0 += ppi->PlaneWidth; - YUVDiffsPtr += ppi->PlaneWidth; - RowDiffsPtr++; - } - - /* Run behind calculating the changed locals data and noise scores. */ - if ( ChLocalsPtr1 != NULL ){ - /* Last few iterations do not apply */ - if ( RowNumber1 < ppi->PlaneVFragments ){ - /* Blank the next row in the pixel scores data structure. */ - memset( PixelScoresPtr1, 0, ppi->PlaneWidth ); - - /* Don't bother doing anything if there are no changed - pixels in this row */ - if ( *RowDiffsPtr1 ){ - /* Last valid row is a special case */ - if ( i < ppi->PlaneVFragments ) - RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, - DispFragPtr1, - ( (((i-1)==0) && (j==0)) ? - FIRST_ROW : NOT_EDGE_ROW) ); - else - RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, - DispFragPtr1, - ((j==(VFRAGPIXELS-1)) ? - LAST_ROW : NOT_EDGE_ROW) ); - - NoiseScoreRow( ppi, PixelsChangedPtr1, ChLocalsPtr1, YUVDiffsPtr1, - PixelScoresPtr1, FragScoresPtr1, DispFragPtr1, - RowDiffsPtr1 ); - } - - ChLocalsPtr1 += ppi->PlaneWidth; - PixelsChangedPtr1 += ppi->PlaneWidth; - YUVDiffsPtr1 += ppi->PlaneWidth; - PixelScoresPtr1 += ppi->PlaneWidth; - RowDiffsPtr1 ++; - } - - /* Run edge enhancement algorithms */ - if ( RowNumber2 < ppi->PlaneVFragments ){ - if ( ChLocalsPtr2 != NULL ){ - /* Don't bother doing anything if there are no changed - pixels in this row */ - if ( *RowDiffsPtr2 ){ - if ( RowNumber1 < ppi->PlaneVFragments ){ - PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2, - PixelScoresPtr2, FragScoresPtr2, - DispFragPtr2, - ( (((i-2)==0) && (j==0)) ? - FIRST_ROW : NOT_EDGE_ROW) ); - }else{ - /* Edge enhancement */ - PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2, - PixelScoresPtr2, FragScoresPtr2, - DispFragPtr2, - ((j==(VFRAGPIXELS-1)) ? - LAST_ROW : NOT_EDGE_ROW) ); - } - - /* Recursive line search */ - LineSearchScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2, - PixelScoresPtr2, FragScoresPtr2, - DispFragPtr2, - LineSearchRowNumber ); - } - - ChLocalsPtr2 += ppi->PlaneWidth; - YUVDiffsPtr2 += ppi->PlaneWidth; - PixelScoresPtr2 += ppi->PlaneWidth; - LineSearchRowNumber += 1; - RowDiffsPtr2 ++; - } - } - } - } - - /* BAR algorithm */ - if ( (RowNumber3 >= 0) && (RowNumber3 < ppi->PlaneVFragments) ){ - ScoreFragIndex3 = (RowNumber3 * ppi->PlaneHFragments) + FragArrayOffset; - RowBarEnhBlockMap(ppi, - &ppi->ScanDisplayFragments[ScoreFragIndex3], - &ppi->BarBlockMap[(RowNumber3 % 3) * - ppi->PlaneHFragments], - RowNumber3 ); - } - - /* BAR copy back and "ppi->SRF filtering" or "pixel copy back" */ - if ( (RowNumber4 >= 0) && (RowNumber4 < ppi->PlaneVFragments) ){ - /* BAR copy back stage must lag by one more row to avoid BAR blocks - being used in BAR descisions. */ - ScoreFragIndex4 = (RowNumber4 * ppi->PlaneHFragments) + FragArrayOffset; - - BarCopyBack(ppi, &ppi->ScanDisplayFragments[ScoreFragIndex4], - &ppi->BarBlockMap[(RowNumber4 % 3) * ppi->PlaneHFragments]); - - /* Copy over the data from any blocks marked for update into the - output buffer. */ - RowCopy(ppi, ScoreFragIndex4); - } - } -} - -ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi, ogg_uint32_t * KFIndicator ){ - - /* Initialise the map arrays. */ - InitScanMapArrays(ppi); - - /* If the motion level in the previous frame was high then adjust - the high and low SAD thresholds to speed things up. */ - ppi->ModifiedGrpLowSadThresh = ppi->GrpLowSadThresh; - ppi->ModifiedGrpHighSadThresh = ppi->GrpHighSadThresh; - - - /* Set up the internal plane height and width variables. */ - ppi->VideoYPlaneWidth = ppi->ScanConfig.VideoFrameWidth; - ppi->VideoYPlaneHeight = ppi->ScanConfig.VideoFrameHeight; - ppi->VideoUVPlaneWidth = ppi->ScanConfig.VideoFrameWidth / 2; - ppi->VideoUVPlaneHeight = ppi->ScanConfig.VideoFrameHeight / 2; - - /* To start with the strides will be set from the widths */ - ppi->VideoYPlaneStride = ppi->VideoYPlaneWidth; - ppi->VideoUPlaneStride = ppi->VideoUVPlaneWidth; - ppi->VideoVPlaneStride = ppi->VideoUVPlaneWidth; - - /* Set up the plane pointers */ - ppi->YPlanePtr0 = ppi->ScanConfig.Yuv0ptr; - ppi->YPlanePtr1 = ppi->ScanConfig.Yuv1ptr; - ppi->UPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels); - ppi->UPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels); - ppi->VPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels + - ppi->UVFramePixels); - ppi->VPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels + - ppi->UVFramePixels); - - /* Check previous frame lists and if necessary mark extra blocks for - update. */ - SetFromPrevious(ppi); - - /* Ananlyse the U and V palnes. */ - AnalysePlane( ppi, ppi->UPlanePtr0, ppi->UPlanePtr1, - ppi->ScanYPlaneFragments, ppi->VideoUVPlaneWidth, - ppi->VideoUVPlaneHeight, ppi->VideoUPlaneStride ); - AnalysePlane( ppi, ppi->VPlanePtr0, ppi->VPlanePtr1, - (ppi->ScanYPlaneFragments + ppi->ScanUVPlaneFragments), - ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight, - ppi->VideoVPlaneStride ); - - /* Now analyse the Y plane. */ - AnalysePlane( ppi, ppi->YPlanePtr0, ppi->YPlanePtr1, 0, - ppi->VideoYPlaneWidth, ppi->VideoYPlaneHeight, - ppi->VideoYPlaneStride ); - - /* Update the list of previous frame block updates. */ - UpdatePreviousBlockLists(ppi); - - /* Create an output block map for the calling process. */ - CreateOutputDisplayMap( ppi, ppi->ScanDisplayFragments, - ppi->PrevFragments[0], - ppi->ScanConfig.disp_fragments ); - - /* Set the candidate key frame indicator (0-100) */ - *KFIndicator = ppi->KFIndicator; - - /* Return the normalised block count (this is actually a motion - level weighting not a true block count). */ - return ppi->OutputBlocksUpdated; -} - diff --git a/Engine/lib/libtheora/lib/enc/toplevel_lookup.h b/Engine/lib/libtheora/lib/enc/toplevel_lookup.h deleted file mode 100644 index bf83a15b6..000000000 --- a/Engine/lib/libtheora/lib/enc/toplevel_lookup.h +++ /dev/null @@ -1,40 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: toplevel_lookup.h 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#include "codec_internal.h" - -const ogg_uint32_t PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1,2,3,4,5 }; - -/* Data structures controlling addition of residue blocks */ -const ogg_uint32_t ResidueErrorThresh[Q_TABLE_SIZE] = { - 750, 700, 650, 600, 590, 580, 570, 560, - 550, 540, 530, 520, 510, 500, 490, 480, - 470, 460, 450, 440, 430, 420, 410, 400, - 390, 380, 370, 360, 350, 340, 330, 320, - 310, 300, 290, 280, 270, 260, 250, 245, - 240, 235, 230, 225, 220, 215, 210, 205, - 200, 195, 190, 185, 180, 175, 170, 165, - 160, 155, 150, 145, 140, 135, 130, 130 }; -const ogg_uint32_t ResidueBlockFactor[Q_TABLE_SIZE] = { - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2 }; diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c deleted file mode 100644 index 547e974e3..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c +++ /dev/null @@ -1,409 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3= - 0x0003000300030003LL; -static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4= - 0x0004000400040004LL; - -static void loop_filter_v(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - long esi; - _pix-=_ystride*2; - __asm__ __volatile__( - /*mm0=0*/ - "pxor %%mm0,%%mm0\n\t" - /*esi=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*mm7=_pix[0...8]*/ - "movq (%[pix]),%%mm7\n\t" - /*mm4=_pix[0...8+_ystride*3]*/ - "movq (%[pix],%[s]),%%mm4\n\t" - /*mm6=_pix[0...8]*/ - "movq %%mm7,%%mm6\n\t" - /*Expand unsigned _pix[0...3] to 16 bits.*/ - "punpcklbw %%mm0,%%mm6\n\t" - "movq %%mm4,%%mm5\n\t" - /*Expand unsigned _pix[4...8] to 16 bits.*/ - "punpckhbw %%mm0,%%mm7\n\t" - /*Expand other arrays too.*/ - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/ - "psubw %%mm4,%%mm6\n\t" - "psubw %%mm5,%%mm7\n\t" - /*mm5=mm4=_pix[0...8+_ystride]*/ - "movq (%[pix],%[ystride]),%%mm4\n\t" - /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/ - "movq (%[pix],%[ystride],2),%%mm2\n\t" - "movq %%mm4,%%mm5\n\t" - "movq %%mm2,%%mm3\n\t" - "movq %%mm2,%%mm1\n\t" - /*Expand these arrays.*/ - "punpckhbw %%mm0,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm3\n\t" - "punpcklbw %%mm0,%%mm2\n\t" - /*Preload...*/ - "movq %[OC_V3],%%mm0\n\t" - /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/ - "psubw %%mm5,%%mm3\n\t" - "psubw %%mm4,%%mm2\n\t" - /*Scale by 3.*/ - "pmullw %%mm0,%%mm3\n\t" - "pmullw %%mm0,%%mm2\n\t" - /*Preload...*/ - "movq %[OC_V4],%%mm0\n\t" - /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+ - 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/ - "paddw %%mm7,%%mm3\n\t" - "paddw %%mm6,%%mm2\n\t" - /*Add 4.*/ - "paddw %%mm0,%%mm3\n\t" - "paddw %%mm0,%%mm2\n\t" - /*"Divide" by 8.*/ - "psraw $3,%%mm3\n\t" - "psraw $3,%%mm2\n\t" - /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/ - /*Free up mm5.*/ - "packuswb %%mm5,%%mm4\n\t" - /*mm0=L L L L*/ - "movq (%[ll]),%%mm0\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm2,%%mm5\n\t" - "pxor %%mm6,%%mm6\n\t" - "movq %%mm0,%%mm7\n\t" - "psubw %%mm0,%%mm6\n\t" - "psllw $1,%%mm7\n\t" - "psllw $1,%%mm6\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm2,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm3,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm3\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm3\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm6\n\t" - "movq %%mm2,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm2,%%mm6\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm7=-L>R_i?2L:0*/ - "pand %%mm6,%%mm7\n\t" - "pxor %%mm6,%%mm6\n\t" - /*mm2=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm7,%%mm2\n\t" - "psubw %%mm0,%%mm6\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm2,%%mm5\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm2\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm3,%%mm6\n\t" - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm3\n\t" - "psllw $1,%%mm0\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - /*mm0=-L>R_i?2L:0*/ - "pand %%mm6,%%mm0\n\t" - /*mm3=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm0,%%mm3\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm3,%%mm5\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*Unfortunately, there's no unsigned byte+signed byte with unsigned - saturation op code, so we have to promote things back 16 bits.*/ - "pxor %%mm0,%%mm0\n\t" - "movq %%mm4,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - "movq %%mm1,%%mm6\n\t" - "punpcklbw %%mm0,%%mm1\n\t" - "punpckhbw %%mm0,%%mm6\n\t" - /*_pix[0...8+_ystride]+=R_i*/ - "paddw %%mm2,%%mm4\n\t" - "paddw %%mm3,%%mm5\n\t" - /*_pix[0...8+_ystride*2]-=R_i*/ - "psubw %%mm2,%%mm1\n\t" - "psubw %%mm3,%%mm6\n\t" - "packuswb %%mm5,%%mm4\n\t" - "packuswb %%mm6,%%mm1\n\t" - /*Write it back out.*/ - "movq %%mm4,(%[pix],%[ystride])\n\t" - "movq %%mm1,(%[pix],%[ystride],2)\n\t" - :[s]"=&S"(esi) - :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll), - [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4) - :"memory" - ); -} - -/*This code implements the bulk of loop_filter_h(). - Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all - four p0's to one register we must transpose the values in four mmx regs. - When half is done we repeat this for the rest.*/ -static void loop_filter_h4(unsigned char *_pix,long _ystride, - const ogg_int16_t *_ll){ - long esi; - long edi; - __asm__ __volatile__( - /*x x x x 3 2 1 0*/ - "movd (%[pix]),%%mm0\n\t" - /*esi=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*x x x x 7 6 5 4*/ - "movd (%[pix],%[ystride]),%%mm1\n\t" - /*x x x x B A 9 8*/ - "movd (%[pix],%[ystride],2),%%mm2\n\t" - /*x x x x F E D C*/ - "movd (%[pix],%[s]),%%mm3\n\t" - /*mm0=7 3 6 2 5 1 4 0*/ - "punpcklbw %%mm1,%%mm0\n\t" - /*mm2=F B E A D 9 C 8*/ - "punpcklbw %%mm3,%%mm2\n\t" - /*mm1=7 3 6 2 5 1 4 0*/ - "movq %%mm0,%%mm1\n\t" - /*mm0=F B 7 3 E A 6 2*/ - "punpckhwd %%mm2,%%mm0\n\t" - /*mm1=D 9 5 1 C 8 4 0*/ - "punpcklwd %%mm2,%%mm1\n\t" - "pxor %%mm7,%%mm7\n\t" - /*mm5=D 9 5 1 C 8 4 0*/ - "movq %%mm1,%%mm5\n\t" - /*mm1=x C x 8 x 4 x 0==pix[0]*/ - "punpcklbw %%mm7,%%mm1\n\t" - /*mm5=x D x 9 x 5 x 1==pix[1]*/ - "punpckhbw %%mm7,%%mm5\n\t" - /*mm3=F B 7 3 E A 6 2*/ - "movq %%mm0,%%mm3\n\t" - /*mm0=x E x A x 6 x 2==pix[2]*/ - "punpcklbw %%mm7,%%mm0\n\t" - /*mm3=x F x B x 7 x 3==pix[3]*/ - "punpckhbw %%mm7,%%mm3\n\t" - /*mm1=mm1-mm3==pix[0]-pix[3]*/ - "psubw %%mm3,%%mm1\n\t" - /*Save a copy of pix[2] for later.*/ - "movq %%mm0,%%mm4\n\t" - /*mm0=mm0-mm5==pix[2]-pix[1]*/ - "psubw %%mm5,%%mm0\n\t" - /*Scale by 3.*/ - "pmullw %[OC_V3],%%mm0\n\t" - /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/ - "paddw %%mm1,%%mm0\n\t" - /*Add 4.*/ - "paddw %[OC_V4],%%mm0\n\t" - /*"Divide" by 8, producing the residuals R_i.*/ - "psraw $3,%%mm0\n\t" - /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/ - /*mm6=L L L L*/ - "movq (%[ll]),%%mm6\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm0,%%mm1\n\t" - "pxor %%mm2,%%mm2\n\t" - "movq %%mm6,%%mm3\n\t" - "psubw %%mm6,%%mm2\n\t" - "psllw $1,%%mm3\n\t" - "psllw $1,%%mm2\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-2L -2L -2L -2L*/ - /*mm3==2L 2L 2L 2L*/ - "pcmpgtw %%mm0,%%mm3\n\t" - "pcmpgtw %%mm2,%%mm1\n\t" - "pand %%mm3,%%mm0\n\t" - "pand %%mm1,%%mm0\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm2\n\t" - "movq %%mm0,%%mm1\n\t" - "movq %%mm6,%%mm3\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-L -L -L -L*/ - /*mm6==L L L L*/ - /*mm2=-L>R_i?FF:00*/ - "pcmpgtw %%mm0,%%mm2\n\t" - /*mm1=R_i>L?FF:00*/ - "pcmpgtw %%mm6,%%mm1\n\t" - /*mm3=2L 2L 2L 2L*/ - "psllw $1,%%mm3\n\t" - /*mm6=2L 2L 2L 2L*/ - "psllw $1,%%mm6\n\t" - /*mm3=R_i>L?2L:0*/ - "pand %%mm1,%%mm3\n\t" - /*mm6=-L>R_i?2L:0*/ - "pand %%mm2,%%mm6\n\t" - /*mm0=R_i>L?R_i-2L:R_i*/ - "psubw %%mm3,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L*/ - "por %%mm2,%%mm1\n\t" - /*mm0=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm6,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L?R_i':0*/ - "pand %%mm0,%%mm1\n\t" - /*mm0=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*mm0=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*_pix[1]+=R_i;*/ - "paddw %%mm0,%%mm5\n\t" - /*_pix[2]-=R_i;*/ - "psubw %%mm0,%%mm4\n\t" - /*mm5=x x x x D 9 5 1*/ - "packuswb %%mm7,%%mm5\n\t" - /*mm4=x x x x E A 6 2*/ - "packuswb %%mm7,%%mm4\n\t" - /*mm5=E D A 9 6 5 2 1*/ - "punpcklbw %%mm4,%%mm5\n\t" - /*edi=6 5 2 1*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix])\n\t" - /*Why is there such a big stall here?*/ - "psrlq $32,%%mm5\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride])\n\t" - /*edi=E D A 9*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride],2)\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[s])\n\t" - :[s]"=&S"(esi),[d]"=&D"(edi), - [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll) - :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4) - :"memory" - ); -} - -static void loop_filter_h(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - _pix-=2; - loop_filter_h4(_pix,_ystride,_ll); - loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); -} - -static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){ - int j; - ogg_int16_t __attribute__((aligned(8))) ll[4]; - unsigned char *cp = pbi->display_fragments; - ogg_uint32_t *bp = pbi->recon_pixel_index_table; - - if ( FLimit == 0 ) return; - ll[0]=ll[1]=ll[2]=ll[3]=FLimit; - - for ( j = 0; j < 3 ; j++){ - ogg_uint32_t *bp_begin = bp; - ogg_uint32_t *bp_end; - int stride; - int h; - - switch(j) { - case 0: /* y */ - bp_end = bp + pbi->YPlaneFragments; - h = pbi->HFragments; - stride = pbi->YStride; - break; - default: /* u,v, 4:20 specific */ - bp_end = bp + pbi->UVPlaneFragments; - h = pbi->HFragments >> 1; - stride = pbi->UVStride; - break; - } - - while(bpbp_left) - loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); - if(bp_left>bp_begin) - loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); - if(bp+1LastFrameRecon[bp[0]]+8,stride,ll); - if(bp+hLastFrameRecon[bp[h]],stride,ll); - } - bp++; - cp++; - } - } - } - - __asm__ __volatile__("emms\n\t"); -} - -/* install our implementation in the function table */ -void dsp_mmx_dct_decode_init(DspFunctions *funcs) -{ - funcs->LoopFilter = loop_filter_mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c deleted file mode 100644 index 3c8a46e6a..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c +++ /dev/null @@ -1,666 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmx.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL; - -#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2) -#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b))) -#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b)))) - -#define SUB_LOOP \ - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ - " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */ \ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\ - " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */\ - /* convert from UINT8 to INT16 */ \ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ - " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */ \ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ - " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */ \ - /* start calculation */ \ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */ \ - " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */ \ - " movq %%mm0, (%2) \n\t" /* write answer out */ \ - " movq %%mm2, 8(%2) \n\t" /* write answer out */ \ - /* Increment pointers */ \ - " add $16, %2 \n\t" \ - " add %3, %0 \n\t" \ - " add %4, %1 \n\t" - -static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr, - ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - SUB_LOOP - SUB_LOOP - SUB_LOOP - SUB_LOOP - SUB_LOOP - SUB_LOOP - SUB_LOOP - SUB_LOOP - : "+r" (FiltPtr), - "+r" (ReconPtr), - "+r" (DctInputPtr) - : "m" (PixelsPerLine), - "m" (ReconPixelsPerLine) - : "memory" - ); -} - -#define SUB_128_LOOP \ - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\ - /* convert from UINT8 to INT16 */ \ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ - /* start calculation */ \ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */ \ - " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */ \ - " movq %%mm0, (%1) \n\t" /* write answer out */ \ - " movq %%mm2, 8(%1) \n\t" /* write answer out */ \ - /* Increment pointers */ \ - " add $16, %1 \n\t" \ - " add %2, %0 \n\t" - - -static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - " movq %[V128], %%mm1 \n\t" - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - SUB_128_LOOP - : "+r" (FiltPtr), - "+r" (DctInputPtr) - : "m" (PixelsPerLine), - [V128] "m" (V128) - : "memory" - ); -} - -#define SUB_AVG2_LOOP \ - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ - " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */ \ - " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */ \ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */\ - " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */\ - " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */\ - /* convert from UINT8 to INT16 */ \ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ - " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */ \ - " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */ \ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ - " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */ \ - " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */ \ - /* average ReconPtr1 and ReconPtr2 */ \ - " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \ - " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \ - " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \ - " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \ - " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \ - " movq %%mm0, (%3) \n\t" /* write answer out */ \ - " movq %%mm2, 8(%3) \n\t" /* write answer out */ \ - /* Increment pointers */ \ - " add $16, %3 \n\t" \ - " add %4, %0 \n\t" \ - " add %5, %1 \n\t" \ - " add %5, %2 \n\t" - - -static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1, - unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - SUB_AVG2_LOOP - : "+r" (FiltPtr), - "+r" (ReconPtr1), - "+r" (ReconPtr2), - "+r" (DctInputPtr) - : "m" (PixelsPerLine), - "m" (ReconPixelsPerLine) - : "memory" - ); -} - -static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */ - " pxor %%mm7, %%mm7 \n\t" /* zero out mm7 for unpack */ - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" /* ; unpack low four bytes to higher precision */ - " punpckhbw %%mm7, %%mm1 \n\t" /* ; unpack high four bytes to higher precision */ - - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - " psrlq $32, %%mm2 \n\t" /* fold and add */ - " psrlq $32, %%mm3 \n\t" - " paddw %%mm2, %%mm0 \n\t" - " paddw %%mm3, %%mm1 \n\t" - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - " psrlq $16, %%mm2 \n\t" - " psrlq $16, %%mm3 \n\t" - " paddw %%mm2, %%mm0 \n\t" - " paddw %%mm3, %%mm1 \n\t" - - " psubusw %%mm0, %%mm1 \n\t" - " paddw %%mm0, %%mm1 \n\t" /* mm1 = max(mm1, mm0) */ - " movd %%mm1, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=m" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : - : "memory" - ); - return MaxSad; -} - -static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */ - " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */ - " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */ - " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */ - " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */ - " mov $4, %%edi \n\t" /* 4 rows */ - "1: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " mov $4, %%edi \n\t" /* 4 rows */ - "2: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%edi \n\t" - " jnz 2b \n\t" - - " psubusw %%mm6, %%mm7 \n\t" - " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm7, mm6) */ - " psubusw %%mm4, %%mm5 \n\t" - " paddw %%mm4, %%mm5 \n\t" /* mm5 = max(mm5, mm4) */ - " psubusw %%mm5, %%mm7 \n\t" - " paddw %%mm5, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */ - " movq %%mm7, %%mm6 \n\t" - " psrlq $32, %%mm6 \n\t" - " psubusw %%mm6, %%mm7 \n\t" - " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */ - " movq %%mm7, %%mm6 \n\t" - " psrlq $16, %%mm6 \n\t" - " psubusw %%mm6, %%mm7 \n\t" - " paddw %%mm6, %%mm7 \n\t" /* mm7 = max(mm5, mm7) */ - " movd %%mm7, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=r" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : "r" (stride) - : "memory", "edi" - ); - - return MaxSad; -} - -#define SAD_LOOP \ - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \ - " movq (%2), %%mm1 \n\t" \ - " movq %%mm0, %%mm2 \n\t" \ - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ \ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ \ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ \ - " movq %%mm0, %%mm1 \n\t" \ - " punpcklbw %%mm6, %%mm0 \n\t" /* unpack to higher precision for accumulation */ \ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \ - " punpckhbw %%mm6, %%mm1 \n\t" /* unpack high four bytes to higher precision */ \ - " add %3, %1 \n\t" /* Inc pointer into the new data */ \ - " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ \ - " add %4, %2 \n\t" /* Inc pointer into ref data */ - -static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */ - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - SAD_LOOP - SAD_LOOP - SAD_LOOP - SAD_LOOP - SAD_LOOP - SAD_LOOP - SAD_LOOP - SAD_LOOP - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddw %%mm0, %%mm7 \n\t" - " movq %%mm7, %%mm0 \n\t" - " psrlq $16, %%mm7 \n\t" - " paddw %%mm0, %%mm7 \n\t" - " movd %%mm7, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=m" (DiffVal), - "+r" (ptr1), - "+r" (ptr2) - : "r" (stride1), - "r" (stride2) - : "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres) -{ - return sad8x8__mmx (ptr1, stride1, ptr2, stride2); -} - -static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pcmpeqd %%mm5, %%mm5 \n\t" /* fefefefefefefefe in mm5 */ - " paddb %%mm5, %%mm5 \n\t" - - " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */ - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - " mov $8, %%edi \n\t" /* 8 rows */ - "1: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - - " movq (%2), %%mm2 \n\t" - " movq (%3), %%mm3 \n\t" /* take average of mm2 and mm3 */ - " movq %%mm2, %%mm1 \n\t" - " pand %%mm3, %%mm1 \n\t" - " pxor %%mm2, %%mm3 \n\t" - " pand %%mm5, %%mm3 \n\t" - " psrlq $1, %%mm3 \n\t" - " paddb %%mm3, %%mm1 \n\t" - - " movq %%mm0, %%mm2 \n\t" - - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " punpckhbw %%mm6, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " add %4, %1 \n\t" /* Inc pointer into the new data */ - " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ - " add %5, %2 \n\t" /* Inc pointer into ref data */ - " add %5, %3 \n\t" /* Inc pointer into ref data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddw %%mm0, %%mm7 \n\t" - " movq %%mm7, %%mm0 \n\t" - " psrlq $16, %%mm7 \n\t" - " paddw %%mm0, %%mm7 \n\t" - " movd %%mm7, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=m" (DiffVal), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "m" (SrcStride), - "m" (RefStride) - : "edi", "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride) -{ - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%edi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - " movq %%mm0, %%mm2 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %3, %2 \n\t" /* Inc pointer into src data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%edi \n\t" - " movsx %%di, %%edi \n\t" - " movl %%edi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=r" (XSum), - "=r" (XXSum), - "+r" (DataPtr) - : "r" (Stride) - : "edi", "memory" - ); - - /* Compute population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum ) ); -} - -static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr, ogg_uint32_t RefStride) -{ - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%edi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - " movq (%3), %%mm1 \n\t" - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpcklbw %%mm6, %%mm1 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - " punpckhbw %%mm6, %%mm3 \n\t" - - " psubsw %%mm1, %%mm0 \n\t" - " psubsw %%mm3, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %4, %2 \n\t" /* Inc pointer into src data */ - " add %5, %3 \n\t" /* Inc pointer into ref data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%edi \n\t" - " movsx %%di, %%edi \n\t" - " movl %%edi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=m" (XSum), - "=m" (XXSum), - "+r" (SrcData), - "+r" (RefDataPtr) - : "m" (SrcStride), - "m" (RefStride) - : "edi", "memory" - ); - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride) -{ - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pcmpeqd %%mm4, %%mm4 \n\t" /* fefefefefefefefe in mm4 */ - " paddb %%mm4, %%mm4 \n\t" - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%edi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - - " movq (%3), %%mm2 \n\t" - " movq (%4), %%mm3 \n\t" /* take average of mm2 and mm3 */ - " movq %%mm2, %%mm1 \n\t" - " pand %%mm3, %%mm1 \n\t" - " pxor %%mm2, %%mm3 \n\t" - " pand %%mm4, %%mm3 \n\t" - " psrlq $1, %%mm3 \n\t" - " paddb %%mm3, %%mm1 \n\t" - - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpcklbw %%mm6, %%mm1 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - " punpckhbw %%mm6, %%mm3 \n\t" - - " psubsw %%mm1, %%mm0 \n\t" - " psubsw %%mm3, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %5, %2 \n\t" /* Inc pointer into src data */ - " add %6, %3 \n\t" /* Inc pointer into ref data */ - " add %6, %4 \n\t" /* Inc pointer into ref data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%edi \n\t" - " movsx %%di, %%edi \n\t" - " movl %%edi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=m" (XSum), - "=m" (XXSum), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "m" (SrcStride), - "m" (RefStride) - : "edi", "memory" - ); - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -static void restore_fpu (void) -{ - __asm__ __volatile__ ( - " emms \n\t" - ); -} - -void dsp_mmx_init(DspFunctions *funcs) -{ - funcs->restore_fpu = restore_fpu; - funcs->sub8x8 = sub8x8__mmx; - funcs->sub8x8_128 = sub8x8_128__mmx; - funcs->sub8x8avg2 = sub8x8avg2__mmx; - funcs->row_sad8 = row_sad8__mmx; - funcs->col_sad8x8 = col_sad8x8__mmx; - funcs->sad8x8 = sad8x8__mmx; - funcs->sad8x8_thres = sad8x8_thres__mmx; - funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx; - funcs->intra8x8_err = intra8x8_err__mmx; - funcs->inter8x8_err = inter8x8_err__mmx; - funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c b/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c deleted file mode 100644 index 297c3213a..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c +++ /dev/null @@ -1,347 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmxext.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -#define SAD_MMXEXT_LOOP \ - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \ - " movq (%2), %%mm1 \n\t" \ - " psadbw %%mm1, %%mm0 \n\t" \ - " add %3, %1 \n\t" /* Inc pointer into the new data */ \ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \ - " add %4, %2 \n\t" /* Inc pointer into ref data */ - - -static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - SAD_MMXEXT_LOOP - - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - " psadbw %%mm1, %%mm0 \n\t" - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " movd %%mm7, %0 \n\t" - - : "=r" (DiffVal), - "+r" (ptr1), - "+r" (ptr2) - : "r" (stride1), - "r" (stride2) - : "memory" - ); - - return DiffVal; -} - -#define SAD_TRES_LOOP \ - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \ - " movq (%2), %%mm1 \n\t" \ - " psadbw %%mm1, %%mm0 \n\t" \ - " add %3, %1 \n\t" /* Inc pointer into the new data */ \ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \ - " add %4, %2 \n\t" /* Inc pointer into ref data */ - - -static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - SAD_TRES_LOOP - - " movd %%mm7, %0 \n\t" - - : "=r" (DiffVal), - "+r" (ptr1), - "+r" (ptr2) - : "r" (stride1), - "r" (stride2) - : "memory" - ); - - return DiffVal; -} - -#define SAD_XY2_TRES \ - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ \ - " movq (%2), %%mm1 \n\t" \ - " movq (%3), %%mm2 \n\t" \ - " pavgb %%mm2, %%mm1 \n\t" \ - " psadbw %%mm1, %%mm0 \n\t" \ - \ - " add %4, %1 \n\t" /* Inc pointer into the new data */ \ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ \ - " add %5, %2 \n\t" /* Inc pointer into ref data */ \ - " add %5, %3 \n\t" /* Inc pointer into ref data */ - - -static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - SAD_XY2_TRES - - " movd %%mm7, %0 \n\t" - : "=m" (DiffVal), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "m" (SrcStride), - "m" (RefStride) - : "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " movd (%1), %%mm0 \n\t" - " movd (%2), %%mm1 \n\t" - " psadbw %%mm0, %%mm1 \n\t" - " movd 4(%1), %%mm2 \n\t" - " movd 4(%2), %%mm3 \n\t" - " psadbw %%mm2, %%mm3 \n\t" - - " pmaxsw %%mm1, %%mm3 \n\t" - " movd %%mm3, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=m" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : - : "memory" - ); - - return MaxSad; -} - -static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */ - " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */ - " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */ - " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */ - " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */ - " mov $4, %%edi \n\t" /* 4 rows */ - "1: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " mov $4, %%edi \n\t" /* 4 rows */ - "2: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%edi \n\t" - " jnz 2b \n\t" - - " pmaxsw %%mm6, %%mm7 \n\t" - " pmaxsw %%mm4, %%mm5 \n\t" - " pmaxsw %%mm5, %%mm7 \n\t" - " movq %%mm7, %%mm6 \n\t" - " psrlq $32, %%mm6 \n\t" - " pmaxsw %%mm6, %%mm7 \n\t" - " movq %%mm7, %%mm6 \n\t" - " psrlq $16, %%mm6 \n\t" - " pmaxsw %%mm6, %%mm7 \n\t" - " movd %%mm7, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=r" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : "r" (stride) - : "memory", "edi" - ); - - return MaxSad; -} - -static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride) -{ - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm4, %%mm4 \n\t" - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%edi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - - " movq (%3), %%mm2 \n\t" - " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */ - " pavgb %%mm2, %%mm1 \n\t" - - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpcklbw %%mm4, %%mm1 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - " punpckhbw %%mm4, %%mm3 \n\t" - - " psubsw %%mm1, %%mm0 \n\t" - " psubsw %%mm3, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %5, %2 \n\t" /* Inc pointer into src data */ - " add %6, %3 \n\t" /* Inc pointer into ref data */ - " add %6, %4 \n\t" /* Inc pointer into ref data */ - - " dec %%edi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%edi \n\t" - " movsx %%di, %%edi \n\t" - " movl %%edi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=m" (XSum), - "=m" (XXSum), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "m" (SrcStride), - "m" (RefStride) - : "edi", "memory" - ); - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -void dsp_mmxext_init(DspFunctions *funcs) -{ - funcs->row_sad8 = row_sad8__mmxext; - funcs->col_sad8x8 = col_sad8x8__mmxext; - funcs->sad8x8 = sad8x8__mmxext; - funcs->sad8x8_thres = sad8x8_thres__mmxext; - funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext; - funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c deleted file mode 100644 index 8de691f81..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c +++ /dev/null @@ -1,339 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: fdct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -/* mmx fdct implementation */ - -#include "theora/theora.h" -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL; - -/* execute stage 1 of forward DCT */ -#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp) \ - " movq " #ip0 ", %%mm0 \n\t" \ - " movq " #ip1 ", %%mm1 \n\t" \ - " movq " #ip3 ", %%mm2 \n\t" \ - " movq " #ip5 ", %%mm3 \n\t" \ - " movq %%mm0, %%mm4 \n\t" \ - " movq %%mm1, %%mm5 \n\t" \ - " movq %%mm2, %%mm6 \n\t" \ - " movq %%mm3, %%mm7 \n\t" \ - \ - " paddsw " #ip7 ", %%mm0 \n\t" /* mm0 = ip0 + ip7 = is07 */ \ - " paddsw " #ip2 ", %%mm1 \n\t" /* mm1 = ip1 + ip2 = is12 */ \ - " paddsw " #ip4 ", %%mm2 \n\t" /* mm2 = ip3 + ip4 = is34 */ \ - " paddsw " #ip6 ", %%mm3 \n\t" /* mm3 = ip5 + ip6 = is56 */ \ - " psubsw " #ip7 ", %%mm4 \n\t" /* mm4 = ip0 - ip7 = id07 */ \ - " psubsw " #ip2 ", %%mm5 \n\t" /* mm5 = ip1 - ip2 = id12 */ \ - \ - " psubsw %%mm2, %%mm0 \n\t" /* mm0 = is07 - is34 */ \ - \ - " paddsw %%mm2, %%mm2 \n\t" \ - \ - " psubsw " #ip4 ", %%mm6 \n\t" /* mm6 = ip3 - ip4 = id34 */ \ - \ - " paddsw %%mm0, %%mm2 \n\t" /* mm2 = is07 + is34 = is0734 */ \ - " psubsw %%mm3, %%mm1 \n\t" /* mm1 = is12 - is56 */ \ - " movq %%mm0," #temp " \n\t" /* Save is07 - is34 to free mm0; */ \ - " paddsw %%mm3, %%mm3 \n\t" \ - " paddsw %%mm1, %%mm3 \n\t" /* mm3 = is12 + 1s56 = is1256 */ \ - \ - " psubsw " #ip6 ", %%mm7 \n\t" /* mm7 = ip5 - ip6 = id56 */ \ - /* ------------------------------------------------------------------- */ \ - " psubsw %%mm7, %%mm5 \n\t" /* mm5 = id12 - id56 */ \ - " paddsw %%mm7, %%mm7 \n\t" \ - " paddsw %%mm5, %%mm7 \n\t" /* mm7 = id12 + id56 */ \ - /* ------------------------------------------------------------------- */ \ - " psubsw %%mm3, %%mm2 \n\t" /* mm2 = is0734 - is1256 */ \ - " paddsw %%mm3, %%mm3 \n\t" \ - \ - " movq %%mm2, %%mm0 \n\t" /* make a copy */ \ - " paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \ - \ - " pmulhw %[xC4S4], %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \ - " paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \ - \ - " movq %%mm3, %%mm2 \n\t" \ - " movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \ - \ - " movq %%mm3, %%mm0 \n\t" \ - " pmulhw %[xC4S4], %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \ - " paddw %%mm2, %%mm3 \n\t" /* Truncate mm3, now it is op[0] */ \ - \ - " movq %%mm3," #ip0 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \ - " pmulhw %[xC2S6], %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \ - \ - " movq " #temp ", %%mm2 \n\t" \ - " movq %%mm2, %%mm0 \n\t" \ - \ - " psrlw $15, %%mm2 \n\t" /* mm3 = xC2S6 * irot_input_y */ \ - " paddw %%mm0, %%mm3 \n\t" \ - \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - " movq %%mm5, %%mm0 \n\t" \ - \ - " movq %%mm5, %%mm2 \n\t" \ - " pmulhw %[xC6S2], %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \ - \ - " paddsw %%mm0, %%mm3 \n\t" /* ip[2] */ \ - " movq %%mm3," #ip2 " \n\t" /* Save ip2 */ \ - \ - " movq %%mm5, %%mm0 \n\t" \ - " movq %%mm5, %%mm2 \n\t" \ - \ - " pmulhw %[xC2S6], %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " movq " #temp ", %%mm3 \n\t" \ - " paddw %%mm0, %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x */ \ - \ - " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \ - " movq %%mm3, %%mm2 \n\t" \ - \ - " pmulhw %[xC6S2], %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - " psubsw %%mm5, %%mm3 \n\t" \ - \ - " movq %%mm3," #ip6 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC4S4], %%mm0 \n\t" \ - " movq %%mm1, %%mm2 \n\t" \ - " movq %%mm1, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \ - " paddw %%mm2, %%mm1 \n\t" /* Truncate mm1, now it is icommon_product1 */ \ - \ - " movq %%mm7, %%mm2 \n\t" \ - " movq %%mm7, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \ - " paddw %%mm2, %%mm7 \n\t" /* Truncate mm7, now it is icommon_product2 */ \ - /* ------------------------------------------------------------------- */ \ - " pxor %%mm0, %%mm0 \n\t" /* Clear mm0 */ \ - " psubsw %%mm6, %%mm0 \n\t" /* mm0 = - id34 */ \ - \ - " psubsw %%mm7, %%mm0 \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \ - " paddsw %%mm6, %%mm6 \n\t" \ - " paddsw %%mm0, %%mm6 \n\t" /* mm6 = id34 - icommon_product2 */ \ - \ - " psubsw %%mm1, %%mm4 \n\t" /* mm4 = id07 - icommon_product1 */ \ - " paddsw %%mm1, %%mm1 \n\t" \ - " paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC1S7], %%mm7 \n\t" \ - " movq %%mm1, %%mm2 \n\t" \ - \ - " movq %%mm1, %%mm3 \n\t" \ - " pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \ - \ - " movq %[xC7S1], %%mm7 \n\t" \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \ - " paddw %%mm2, %%mm1 \n\t" /* Trucated */ \ - \ - " pmulhw %%mm7, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x */ \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - \ - " movq %%mm0, %%mm5 \n\t" \ - " movq %%mm0, %%mm2 \n\t" \ - \ - " movq %[xC1S7], %%mm7 \n\t" \ - " pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \ - \ - " movq %[xC7S1], %%mm7 \n\t" \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \ - " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \ - \ - " pmulhw %%mm7, %%mm5 \n\t" /* mm5 = xC7S1 * irot_input_y */ \ - " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \ - \ - " psubsw %%mm5, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \ - " paddsw %%mm0, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \ - \ - " movq %%mm1," #ip1 " \n\t" \ - " movq %%mm3," #ip7 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC3S5], %%mm0 \n\t" \ - " movq %[xC5S3], %%mm1 \n\t" \ - \ - " movq %%mm6, %%mm5 \n\t" \ - " movq %%mm6, %%mm7 \n\t" \ - \ - " movq %%mm4, %%mm2 \n\t" \ - " movq %%mm4, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \ - " pmulhw %%mm1, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " psrlw $15, %%mm5 \n\t" \ - \ - " paddw %%mm3, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x */ \ - " paddw %%mm7, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y */ \ - \ - " paddw %%mm2, %%mm4 \n\t" /* Truncated */ \ - " paddw %%mm5, %%mm6 \n\t" /* Truncated */ \ - \ - " psubsw %%mm6, %%mm4 \n\t" /* ip3 */ \ - " movq %%mm4," #ip3 " \n\t" \ - \ - " movq %%mm3, %%mm4 \n\t" \ - " movq %%mm7, %%mm6 \n\t" \ - \ - " pmulhw %%mm1, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \ - " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \ - \ - " paddw %%mm2, %%mm4 \n\t" \ - " paddw %%mm5, %%mm6 \n\t" \ - \ - " paddw %%mm4, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x */ \ - " paddw %%mm6, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y */ \ - \ - " paddw %%mm7, %%mm3 \n\t" /* ip5 */ \ - " movq %%mm3," #ip5 " \n\t" - -#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7, \ - op0,op1,op2,op3,op4,op5,op6,op7) \ - " movq " #ip0 ", %%mm0 \n\t" /* mm0 = a0 a1 a2 a3 */ \ - " movq " #ip4 ", %%mm4 \n\t" /* mm4 = e4 e5 e6 e7 */ \ - " movq " #ip1 ", %%mm1 \n\t" /* mm1 = b0 b1 b2 b3 */ \ - " movq " #ip5 ", %%mm5 \n\t" /* mm5 = f4 f5 f6 f7 */ \ - " movq " #ip2 ", %%mm2 \n\t" /* mm2 = c0 c1 c2 c3 */ \ - " movq " #ip6 ", %%mm6 \n\t" /* mm6 = g4 g5 g6 g7 */ \ - " movq " #ip3 ", %%mm3 \n\t" /* mm3 = d0 d1 d2 d3 */ \ - " movq %%mm1," #op1 " \n\t" /* save b0 b1 b2 b3 */ \ - " movq " #ip7 ", %%mm7 \n\t" /* mm7 = h0 h1 h2 h3 */ \ - /* Transpose 2x8 block */ \ - " movq %%mm4, %%mm1 \n\t" /* mm1 = e3 e2 e1 e0 */ \ - " punpcklwd %%mm5, %%mm4 \n\t" /* mm4 = f1 e1 f0 e0 */ \ - " movq %%mm0," #op0 " \n\t" /* save a3 a2 a1 a0 */ \ - " punpckhwd %%mm5, %%mm1 \n\t" /* mm1 = f3 e3 f2 e2 */ \ - " movq %%mm6, %%mm0 \n\t" /* mm0 = g3 g2 g1 g0 */ \ - " punpcklwd %%mm7, %%mm6 \n\t" /* mm6 = h1 g1 h0 g0 */ \ - " movq %%mm4, %%mm5 \n\t" /* mm5 = f1 e1 f0 e0 */ \ - " punpckldq %%mm6, %%mm4 \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \ - " punpckhdq %%mm6, %%mm5 \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \ - " movq %%mm1, %%mm6 \n\t" /* mm6 = f3 e3 f2 e2 */ \ - " movq %%mm4," #op4 " \n\t" \ - " punpckhwd %%mm7, %%mm0 \n\t" /* mm0 = h3 g3 h2 g2 */ \ - " movq %%mm5," #op5 " \n\t" \ - " punpckhdq %%mm0, %%mm6 \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \ - " movq " #op0 ", %%mm4 \n\t" /* mm4 = a3 a2 a1 a0 */ \ - " punpckldq %%mm0, %%mm1 \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \ - " movq " #op1 ", %%mm5 \n\t" /* mm5 = b3 b2 b1 b0 */ \ - " movq %%mm4, %%mm0 \n\t" /* mm0 = a3 a2 a1 a0 */ \ - " movq %%mm6," #op7 " \n\t" \ - " punpcklwd %%mm5, %%mm0 \n\t" /* mm0 = b1 a1 b0 a0 */ \ - " movq %%mm1," #op6 " \n\t" \ - " punpckhwd %%mm5, %%mm4 \n\t" /* mm4 = b3 a3 b2 a2 */ \ - " movq %%mm2, %%mm5 \n\t" /* mm5 = c3 c2 c1 c0 */ \ - " punpcklwd %%mm3, %%mm2 \n\t" /* mm2 = d1 c1 d0 c0 */ \ - " movq %%mm0, %%mm1 \n\t" /* mm1 = b1 a1 b0 a0 */ \ - " punpckldq %%mm2, %%mm0 \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \ - " punpckhdq %%mm2, %%mm1 \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \ - " movq %%mm4, %%mm2 \n\t" /* mm2 = b3 a3 b2 a2 */ \ - " movq %%mm0," #op0 " \n\t" \ - " punpckhwd %%mm3, %%mm5 \n\t" /* mm5 = d3 c3 d2 c2 */ \ - " movq %%mm1," #op1 " \n\t" \ - " punpckhdq %%mm5, %%mm4 \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \ - " punpckldq %%mm5, %%mm2 \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \ - " movq %%mm4," #op3 " \n\t" \ - " movq %%mm2," #op2 " \n\t" - - -/* This performs a 2D Forward DCT on an 8x8 block with short - coefficients. We try to do the truncation to match the C - version. */ -static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData) -{ - ogg_int16_t __attribute__((aligned(8))) temp[8*8]; - - __asm__ __volatile__ ( - " .p2align 4 \n\t" - /* - * Input data is an 8x8 block. To make processing of the data more efficent - * we will transpose the block of data to two 4x8 blocks??? - */ - Transpose_mmx ( (%0), 16(%0), 32(%0), 48(%0), 8(%0), 24(%0), 40(%0), 56(%0), - (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1)) - Fdct_mmx ( (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1), (%2)) - - Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0), - 64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1)) - Fdct_mmx (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2)) - - Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), - 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1)) - Fdct_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2)) - - Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), - 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1)) - Fdct_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2)) - - " emms \n\t" - - : "+r" (InputData), - "+r" (OutputData) - : "r" (temp), - [xC1S7] "m" (xC1S7), /* gcc 3.1+ allows named asm parameters */ - [xC2S6] "m" (xC2S6), - [xC3S5] "m" (xC3S5), - [xC4S4] "m" (xC4S4), - [xC5S3] "m" (xC5S3), - [xC6S2] "m" (xC6S2), - [xC7S1] "m" (xC7S1) - : "memory" - ); -} - -/* install our implementation in the function table */ -void dsp_mmx_fdct_init(DspFunctions *funcs) -{ - funcs->fdct_short = fdct_short__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c deleted file mode 100644 index 5fc6a1f66..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c +++ /dev/null @@ -1,1452 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: idct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -#define ASM asm - -/**************************************************************************** -* -* Description : IDCT with multiple versions based on # of non 0 coeffs -* -***************************************************************************** -*/ - -// Dequantization + inverse discrete cosine transform. - -// Constants used in MMX implementation of dequantization and idct. -// All the MMX stuff works with 4 16-bit quantities at a time and -// we create 11 constants of size 4 x 16 bits. -// The first 4 are used to mask the individual 16-bit words within a group -// and are used in the address-shuffling part of the dequantization. -// The last 7 are fixed-point approximations to the cosines of angles -// occurring in the DCT; each of these contains 4 copies of the same value. - -// There is only one (statically initialized) instance of this object -// wrapped in an allocator object that forces its starting address -// to be evenly divisible by 32. Hence the actual object occupies 2.75 -// cache lines on a Pentium processor. - -// Offsets in bytes used by the assembler code below -// must of course agree with the idctConstants constructor. - -#define MaskOffset 0 // 4 masks come in order low word to high -#define CosineOffset 32 // 7 cosines come in order pi/16 * (1 ... 7) -#define EightOffset 88 -#define IdctAdjustBeforeShift 8 - -/* -UINT16 idctcosTbl[ 7] = -{ - 64277, 60547, 54491, 46341, 36410, 25080, 12785 -}; - -void fillidctconstants(void) -{ - int j = 16; - UINT16 * p; - do - { - idctconstants[ --j] = 0; - } - while( j); - - idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535; - - j = 1; - do - { - p = idctconstants + ( (j+3) << 2); - p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1]; - } - while( ++j <= 7); - - idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift; -} -*/ - -ogg_uint16_t idctconstants[(4+7+1) * 4] = { - 65535, 0, 0, 0, 0, 65535, 0, 0, - 0, 0, 65535, 0, 0, 0, 0, 65535, - 64277, 64277, 64277, 64277, 60547, 60547, 60547, 60547, - 54491, 54491, 54491, 54491, 46341, 46341, 46341, 46341, - 36410, 36410, 36410, 36410, 25080, 25080, 25080, 25080, - 12785, 12785, 12785, 12785, 8, 8, 8, 8, -}; - -/* Dequantization + inverse DCT. - - Dequantization multiplies user's 16-bit signed indices (range -512 to +511) - by unsigned 16-bit quantization table entries. - These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14. - Result is scaled signed DCT coefficients (abs value < 2^15). - - In the data stream, the coefficients are sent in order of increasing - total (horizontal + vertical) frequency. The exact picture is as follows: - - 00 01 05 06 16 17 33 34 - 02 04 07 15 20 32 35 52 - 03 10 14 21 31 36 51 53 - 11 13 22 30 37 50 54 65 - - 12 23 27 40 47 55 64 66 - 24 26 41 46 56 63 67 74 - 25 42 45 57 62 70 73 75 - 43 44 60 61 71 72 76 77 - - Here the position in the matrix corresponds to the (horiz,vert) - freqency indices and the octal entry in the matrix is the position - of the coefficient in the data stream. Thus the coefficients are sent - in sort of a diagonal "snake". - - The dequantization stage "uncurls the snake" and stores the expanded - coefficients in more convenient positions. These are not exactly the - natural positions given above but take into account our implementation - of the idct, which basically requires two one-dimensional idcts and - two transposes. - - We fold the first transpose into the storage of the expanded coefficients. - We don't actually do a full transpose because this would require doubling - the size of the idct buffer; rather, we just transpose each of the 4x4 - subblocks. Using slightly varying addressing schemes in each of the - four 4x8 idcts then allows these transforms to be done in place. - - Transposing the 4x4 subblocks in the matrix above gives - - 00 02 03 11 16 20 31 37 - 01 04 10 13 17 32 36 50 - 05 07 14 22 33 35 51 54 - 06 15 21 30 34 52 53 65 - - 12 24 25 43 47 56 62 71 - 23 26 42 44 55 63 70 72 - 27 41 45 60 64 67 73 76 - 40 46 57 61 66 74 75 77 - - Finally, we reverse the words in each 4 word group to clarify - direction of shifts. - - 11 03 02 00 37 31 20 16 - 13 10 04 01 50 36 32 17 - 22 14 07 05 54 51 35 33 - 30 21 15 06 65 53 52 34 - - 43 25 24 12 71 62 56 47 - 44 42 26 23 72 70 63 55 - 60 45 41 27 76 73 67 64 - 61 57 46 40 77 75 74 66 - - This matrix then shows the 16 4x16 destination words in terms of - the 16 4x16 input words. - - We implement this algorithm by manipulation of mmx registers, - which seems to be the fastest way to proceed. It is completely - hand-written; there does not seem to be enough recurrence to - reasonably compartmentalize any of it. Hence the resulting - program is ugly and bloated. Furthermore, due to the absence of - register pressure, it is boring and artless. I hate it. - - The idct itself is more interesting. Since the two-dimensional dct - basis functions are products of the one-dimesional dct basis functions, - we can compute an inverse (or forward) dct via two 1-D transforms, - on rows then on columns. To exploit MMX parallelism, we actually do - both operations on columns, interposing a (partial) transpose between - the two 1-D transforms, the first transpose being done by the expansion - described above. - - The 8-sample one-dimensional DCT is a standard orthogonal expansion using - the (unnormalized) basis functions - - b[k]( i) = cos( pi * k * (2i + 1) / 16); - - here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate. - To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k] - should be multiplied by 1/2. - - The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs - in each direction. The (unnormalized) basis functions are - - B[k,l]( i, j) = b[k]( i) * b[l]( j); - - this time k and l are the horizontal and vertical frequencies, - i and j are the horizontal and vertical spatial coordinates; - all indices vary from 0 ... 7 (as above) - and there are now 4 cases of normalization. - - Our 1-D idct expansion uses constants C1 ... C7 given by - - (*) Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16) - - and the following 1-D algorithm transforming I0 ... I7 to R0 ... R7 : - - A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7) - C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3) - A. = C4 * (A - C) B. = C4 * (B - D) - C. = A + C D. = B + D - - E = C4 * (I0 + I4) F = C4 * (I0 - I4) - G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6) - E. = E - G - G. = E + G - - A.. = F + A. B.. = B. - H - F. = F - A. H. = B. + H - - R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B.. - R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B.. - - It is due to Vetterli and Lightenberg and may be found in the JPEG - reference book by Pennebaker and Mitchell. - - Correctness of the algorithm follows from (*) together with the - addition formulas for sine and cosine: - - cos( A + B) = cos( A) * cos( B) - sin( A) * sin( B) - sin( A + B) = sin( A) * cos( B) + cos( A) * sin( B) - - Note that this implementation absorbs the difference in normalization - between the 0th and higher frequencies, although the results produced - are actually twice as big as they should be. Since we do this for each - dimension, the 2-D idct results are 4x the desired results. Finally, - taking into account that the dequantization multiplies by 4 as well, - our actual results are 16x too big. We fix this by shifting the final - results right by 4 bits. - - High precision version approximates C1 ... C7 to 16 bits. - Since MMX only provides a signed multiply, C1 ... C5 appear to be - negative and multiplies involving them must be adjusted to compensate - for this. C6 and C7 do not require this adjustment since - they are < 1/2 and are correctly treated as positive numbers. - - Following macro does four 8-sample one-dimensional idcts in parallel. - This is actually not such a difficult program to write once you - make a couple of observations (I of course was unable to make these - observations until I'd half-written a couple of other versions). - - 1. Everything is easy once you are done with the multiplies. - This is because, given X and Y in registers, one may easily - calculate X+Y and X-Y using just those 2 registers. - - 2. You always need at least 2 extra registers to calculate products, - so storing 2 temporaries is inevitable. C. and D. seem to be - the best candidates. - - 3. The products should be calculated in decreasing order of complexity - (which translates into register pressure). Since C1 ... C5 require - adjustment (and C6, C7 do not), we begin by calculating C and D. -*/ - -/************************************************************************************** - * - * Routine: BeginIDCT - * - * Description: The Macro does IDct on 4 1-D Dcts - * - * Input: None - * - * Output: None - * - * Return: None - * - * Special Note: None - * - * Error: None - * - *************************************************************************************** - */ - -#define MtoSTR(s) #s - -#define Dump "call MMX_dump\n" - -#define BeginIDCT "#BeginIDCT\n" \ - \ - " movq " I(3)","r2"\n" \ - \ - " movq " C(3)","r6"\n" \ - " movq " r2","r4"\n" \ - " movq " J(5)","r7"\n" \ - " pmulhw " r6","r4"\n" \ - " movq " C(5)","r1"\n" \ - " pmulhw " r7","r6"\n" \ - " movq " r1","r5"\n" \ - " pmulhw " r2","r1"\n" \ - " movq " I(1)","r3"\n" \ - " pmulhw " r7","r5"\n" \ - " movq " C(1)","r0"\n" \ - " paddw " r2","r4"\n" \ - " paddw " r7","r6"\n" \ - " paddw " r1","r2"\n" \ - " movq " J(7)","r1"\n" \ - " paddw " r5","r7"\n" \ - " movq " r0","r5"\n" \ - " pmulhw " r3","r0"\n" \ - " paddsw " r7","r4"\n" \ - " pmulhw " r1","r5"\n" \ - " movq " C(7)","r7"\n" \ - " psubsw " r2","r6"\n" \ - " paddw " r3","r0"\n" \ - " pmulhw " r7","r3"\n" \ - " movq " I(2)","r2"\n" \ - " pmulhw " r1","r7"\n" \ - " paddw " r1","r5"\n" \ - " movq " r2","r1"\n" \ - " pmulhw " C(2)","r2"\n" \ - " psubsw " r5","r3"\n" \ - " movq " J(6)","r5"\n" \ - " paddsw " r7","r0"\n" \ - " movq " r5","r7"\n" \ - " psubsw " r4","r0"\n" \ - " pmulhw " C(2)","r5"\n" \ - " paddw " r1","r2"\n" \ - " pmulhw " C(6)","r1"\n" \ - " paddsw " r4","r4"\n" \ - " paddsw " r0","r4"\n" \ - " psubsw " r6","r3"\n" \ - " paddw " r7","r5"\n" \ - " paddsw " r6","r6"\n" \ - " pmulhw " C(6)","r7"\n" \ - " paddsw " r3","r6"\n" \ - " movq " r4","I(1)"\n" \ - " psubsw " r5","r1"\n" \ - " movq " C(4)","r4"\n" \ - " movq " r3","r5"\n" \ - " pmulhw " r4","r3"\n" \ - " paddsw " r2","r7"\n" \ - " movq " r6","I(2)"\n" \ - " movq " r0","r2"\n" \ - " movq " I(0)","r6"\n" \ - " pmulhw " r4","r0"\n" \ - " paddw " r3","r5"\n" \ - "\n" \ - " movq " J(4)","r3"\n" \ - " psubsw " r1","r5"\n" \ - " paddw " r0","r2"\n" \ - " psubsw " r3","r6"\n" \ - " movq " r6","r0"\n" \ - " pmulhw " r4","r6"\n" \ - " paddsw " r3","r3"\n" \ - " paddsw " r1","r1"\n" \ - " paddsw " r0","r3"\n" \ - " paddsw " r5","r1"\n" \ - " pmulhw " r3","r4"\n" \ - " paddsw " r0","r6"\n" \ - " psubsw " r2","r6"\n" \ - " paddsw " r2","r2"\n" \ - " movq " I(1)","r0"\n" \ - " paddsw " r6","r2"\n" \ - " paddw " r3","r4"\n" \ - " psubsw " r1","r2"\n" \ - "#end BeginIDCT\n" -// end BeginIDCT macro (38 cycles). - - -// Two versions of the end of the idct depending on whether we're feeding -// into a transpose or dividing the final results by 16 and storing them. - -/************************************************************************************** - * - * Routine: RowIDCT - * - * Description: The Macro does 1-D IDct on 4 Rows - * - * Input: None - * - * Output: None - * - * Return: None - * - * Special Note: None - * - * Error: None - * - *************************************************************************************** - */ - -// RowIDCT gets ready to transpose. - -#define RowIDCT ASM("\n"\ - "#RowIDCT\n" \ - BeginIDCT \ - "\n" \ - " movq "I(2)","r3"\n" /* r3 = D. */ \ - " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \ - " paddsw "r1","r1"\n" /* r1 = H. + H. */ \ - " paddsw "r7","r7"\n" /* r7 = G + G */ \ - " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\ - " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \ - " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \ - " paddsw "r3","r3"\n" \ - " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\ - " paddsw "r5","r5"\n" \ - " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \ - " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\ - " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \ - " paddsw "r0","r0"\n" \ - " movq "r1","I(1)"\n" /* save R1 */ \ - " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ - "#end RowIDCT" \ -); -// end RowIDCT macro (8 + 38 = 46 cycles) - - -/************************************************************************************** - * - * Routine: ColumnIDCT - * - * Description: The Macro does 1-D IDct on 4 columns - * - * Input: None - * - * Output: None - * - * Return: None - * - * Special Note: None - * - * Error: None - * - *************************************************************************************** - */ -// Column IDCT normalizes and stores final results. - -#define ColumnIDCT ASM("\n" \ - "#ColumnIDCT\n" \ - BeginIDCT \ - "\n" \ - " paddsw "Eight","r2"\n" \ - " paddsw "r1","r1"\n" /* r1 = H. + H. */ \ - " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\ - " psraw ""$4"","r2"\n" /* r2 = NR2 */ \ - " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \ - " psraw ""$4"","r1"\n" /* r1 = NR1 */ \ - " movq "I(2)","r3"\n" /* r3 = D. */ \ - " paddsw "r7","r7"\n" /* r7 = G + G */ \ - " movq "r2","I(2)"\n" /* store NR2 at I2 */ \ - " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \ - " movq "r1","I(1)"\n" /* store NR1 at I1 */ \ - " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \ - " paddsw "Eight","r4"\n" \ - " paddsw "r3","r3"\n" /* r3 = D. + D. */ \ - " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \ - " psraw ""$4"","r4"\n" /* r4 = NR4 */ \ - " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\ - " psraw ""$4"","r3"\n" /* r3 = NR3 */ \ - " paddsw "Eight","r6"\n" \ - " paddsw "r5","r5"\n" /* r5 = B.. + B.. */ \ - " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\ - " psraw ""$4"","r6"\n" /* r6 = NR6 */ \ - " movq "r4","J(4)"\n" /* store NR4 at J4 */ \ - " psraw ""$4"","r5"\n" /* r5 = NR5 */ \ - " movq "r3","I(3)"\n" /* store NR3 at I3 */ \ - " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \ - " paddsw "Eight","r7"\n" \ - " paddsw "r0","r0"\n" /* r0 = C. + C. */ \ - " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ - " psraw ""$4"","r7"\n" /* r7 = NR7 */ \ - " movq "r6","J(6)"\n" /* store NR6 at J6 */ \ - " psraw ""$4"","r0"\n" /* r0 = NR0 */ \ - " movq "r5","J(5)"\n" /* store NR5 at J5 */ \ - " movq "r7","J(7)"\n" /* store NR7 at J7 */ \ - " movq "r0","I(0)"\n" /* store NR0 at I0 */ \ - "#end ColumnIDCT\n" \ -); -// end ColumnIDCT macro (38 + 19 = 57 cycles) - -/************************************************************************************** - * - * Routine: Transpose - * - * Description: The Macro does two 4x4 transposes in place. - * - * Input: None - * - * Output: None - * - * Return: None - * - * Special Note: None - * - * Error: None - * - *************************************************************************************** - */ - -/* Following macro does two 4x4 transposes in place. - - At entry (we assume): - - r0 = a3 a2 a1 a0 - I(1) = b3 b2 b1 b0 - r2 = c3 c2 c1 c0 - r3 = d3 d2 d1 d0 - - r4 = e3 e2 e1 e0 - r5 = f3 f2 f1 f0 - r6 = g3 g2 g1 g0 - r7 = h3 h2 h1 h0 - - At exit, we have: - - I(0) = d0 c0 b0 a0 - I(1) = d1 c1 b1 a1 - I(2) = d2 c2 b2 a2 - I(3) = d3 c3 b3 a3 - - J(4) = h0 g0 f0 e0 - J(5) = h1 g1 f1 e1 - J(6) = h2 g2 f2 e2 - J(7) = h3 g3 f3 e3 - - I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. - J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. - - Since r1 is free at entry, we calculate the Js first. */ - - -#define Transpose ASM("\n#Transpose\n" \ - \ - " movq "r4","r1"\n" \ - " punpcklwd "r5","r4"\n" \ - " movq "r0","I(0)"\n" \ - " punpckhwd "r5","r1"\n" \ - " movq "r6","r0"\n" \ - " punpcklwd "r7","r6"\n" \ - " movq "r4","r5"\n" \ - " punpckldq "r6","r4"\n" \ - " punpckhdq "r6","r5"\n" \ - " movq "r1","r6"\n" \ - " movq "r4","J(4)"\n" \ - " punpckhwd "r7","r0"\n" \ - " movq "r5","J(5)"\n" \ - " punpckhdq "r0","r6"\n" \ - " movq "I(0)","r4"\n" \ - " punpckldq "r0","r1"\n" \ - " movq "I(1)","r5"\n" \ - " movq "r4","r0"\n" \ - " movq "r6","J(7)"\n" \ - " punpcklwd "r5","r0"\n" \ - " movq "r1","J(6)"\n" \ - " punpckhwd "r5","r4"\n" \ - " movq "r2","r5"\n" \ - " punpcklwd "r3","r2"\n" \ - " movq "r0","r1"\n" \ - " punpckldq "r2","r0"\n" \ - " punpckhdq "r2","r1"\n" \ - " movq "r4","r2"\n" \ - " movq "r0","I(0)"\n" \ - " punpckhwd "r3","r5"\n" \ - " movq "r1","I(1)"\n" \ - " punpckhdq "r5","r4"\n" \ - " punpckldq "r5","r2"\n" \ - \ - " movq "r4","I(3)"\n" \ - \ - " movq "r2","I(2)"\n" \ - "#end Transpose\n" \ -); -// end Transpose macro (19 cycles). - -/* -static void MMX_dump() -{ - ASM - ("\ - movq %mm0,(%edi)\n\ - movq %mm1,8(%edi)\n\ - movq %mm2,16(%edi)\n\ - movq %mm3,24(%edi)\n\ - movq %mm4,32(%edi)\n\ - movq %mm5,40(%edi)\n\ - movq %mm6,48(%edi)\n\ - movq %mm7,56(%edi)\n\ - ret" - ); -} -*/ - -/************************************************************************************** - * - * Routine: MMX_idct - * - * Description: Perform IDCT on a 8x8 block - * - * Input: Pointer to input and output buffer - * - * Output: None - * - * Return: None - * - * Special Note: The input coefficients are in ZigZag order - * - * Error: None - * - *************************************************************************************** - */ -void IDctSlow__mmx( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ) { - -# define MIDM(M,I) MtoSTR(M+I*8(%ecx)) -# define M(I) MIDM( MaskOffset , I ) -# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx)) -# define C(I) MIDC( CosineOffset , I ) -# define MIDEight(M) MtoSTR(M(%ecx)) -# define Eight MIDEight(EightOffset) - -# define r0 "%mm0" -# define r1 "%mm1" -# define r2 "%mm2" -# define r3 "%mm3" -# define r4 "%mm4" -# define r5 "%mm5" -# define r6 "%mm6" -# define r7 "%mm7" - - __asm__ __volatile__ ( - /* eax = quantized input */ - /* esi = quantization table */ - /* edx = destination (= idct buffer) */ - /* ecx = idctconstants */ - "" - : - :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants) - ); - - ASM( - "movq (%eax), "r0"\n" - "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */ - "movq 16(%eax), "r1"\n" - "pmullw 16(%esi), "r1"\n" /* r1 = 13 12 11 10 */ - "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */ - "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */ - "movq 8(%eax), "r4"\n" - "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */ - "pmullw 8(%esi), "r4"\n" /* r4 = 07 06 05 04 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */ - "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */ - "movq "r1", "r6"\n" /* r6 = 13 12 11 10 */ - "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */ - "psllq $32, "r6"\n" /* r6 = 11 10 __ __ */ - "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */ - "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */ - "pand "r6", "r7"\n" /* r7 = 11 __ __ __ */ - "por "r3", "r0"\n" /* r0 = __ 03 02 00 */ - "pxor "r7", "r6"\n" /* r6 = __ 10 __ __ */ - "por "r7", "r0"\n" /* r0 = 11 03 02 00 = R0 */ - "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */ - "movq "r4", "r3"\n" /* r3 = 07 06 05 04 */ - "movq "r0", (%edx)\n" /* write R0 = r0 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 04 */ - "movq 32(%eax), "r0"\n" - "psllq $16, "r3"\n" /* r3 = __ __ 04 __ */ - "pmullw 32(%esi), "r0"\n" /* r0 = 23 22 21 20 */ - "pand "r1", "r7"\n" /* r7 = 13 __ __ __ */ - "por "r3", "r5"\n" /* r5 = __ __ 04 01 */ - "por "r6", "r7"\n" /* r7 = 13 10 __ __ */ - "movq 24(%eax), "r3"\n" - "por "r5", "r7"\n" /* r7 = 13 10 04 01 = R1 */ - "pmullw 24(%esi), "r3"\n" /* r3 = 17 16 15 14 */ - "psrlq $16, "r4"\n" /* r4 = __ 07 06 05 */ - "movq "r7", 16(%edx)\n" /* write R1 = r7 */ - "movq "r4", "r5"\n" /* r5 = __ 07 06 05 */ - "movq "r0", "r7"\n" /* r7 = 23 22 21 20 */ - "psrlq $16, "r4"\n" /* r4 = __ __ 07 06 */ - "psrlq $48, "r7"\n" /* r7 = __ __ __ 23 */ - "movq "r2", "r6"\n" /* r6 = __ __ __ FF */ - "pand "r2", "r5"\n" /* r5 = __ __ __ 05 */ - "pand "r4", "r6"\n" /* r6 = __ __ __ 06 */ - "movq "r7", 80(%edx)\n" /* partial R9 = __ __ __ 23 */ - "pxor "r6", "r4"\n" /* r4 = __ __ 07 __ */ - "psrlq $32, "r1"\n" /* r1 = __ __ 13 12 */ - "por "r5", "r4"\n" /* r4 = __ __ 07 05 */ - "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */ - "pand "r2", "r1"\n" /* r1 = __ __ __ 12 */ - "movq 48(%eax), "r5"\n" - "psllq $16, "r0"\n" /* r0 = 22 21 20 __ */ - "pmullw 48(%esi), "r5"\n" /* r5 = 33 32 31 30 */ - "pand "r0", "r7"\n" /* r7 = 22 __ __ __ */ - "movq "r1", 64(%edx)\n" /* partial R8 = __ __ __ 12 */ - "por "r4", "r7"\n" /* r7 = 22 __ 07 05 */ - "movq "r3", "r4"\n" /* r4 = 17 16 15 14 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 14 */ - "movq "M(2)", "r1"\n" /* r1 = __ FF __ __ */ - "psllq $32, "r3"\n" /* r3 = __ 14 __ __ */ - "por "r3", "r7"\n" /* r7 = 22 14 07 05 = R2 */ - "movq "r5", "r3"\n" /* r3 = 33 32 31 30 */ - "psllq $48, "r3"\n" /* r3 = 30 __ __ __ */ - "pand "r0", "r1"\n" /* r1 = __ 21 __ __ */ - "movq "r7", 32(%edx)\n" /* write R2 = r7 */ - "por "r3", "r6"\n" /* r6 = 30 __ __ 06 */ - "movq "M(1)", "r7"\n" /* r7 = __ __ FF __ */ - "por "r1", "r6"\n" /* r6 = 30 21 __ 06 */ - "movq 56(%eax), "r1"\n" - "pand "r4", "r7"\n" /* r7 = __ __ 15 __ */ - "pmullw 56(%esi), "r1"\n" /* r1 = 37 36 35 34 */ - "por "r6", "r7"\n" /* r7 = 30 21 15 06 = R3 */ - "pand "M(1)", "r0"\n" /* r0 = __ __ 20 __ */ - "psrlq $32, "r4"\n" /* r4 = __ __ 17 16 */ - "movq "r7", 48(%edx)\n" /* write R3 = r7 */ - "movq "r4", "r6"\n" /* r6 = __ __ 17 16 */ - "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */ - "pand "r2", "r4"\n" /* r4 = __ __ __ 16 */ - "movq "M(1)", "r3"\n" /* r3 = __ __ FF __ */ - "pand "r1", "r7"\n" /* r7 = 37 __ __ __ */ - "pand "r5", "r3"\n" /* r3 = __ __ 31 __ */ - "por "r4", "r0"\n" /* r0 = __ __ 20 16 */ - "psllq $16, "r3"\n" /* r3 = __ 31 __ __ */ - "por "r0", "r7"\n" /* r7 = 37 __ 20 16 */ - "movq "M(2)", "r4"\n" /* r4 = __ FF __ __ */ - "por "r3", "r7"\n" /* r7 = 37 31 20 16 = R4 */ - "movq 80(%eax), "r0"\n" - "movq "r4", "r3"\n" /* r3 = __ __ FF __ */ - "pmullw 80(%esi), "r0"\n" /* r0 = 53 52 51 50 */ - "pand "r5", "r4"\n" /* r4 = __ 32 __ __ */ - "movq "r7", 8(%edx)\n" /* write R4 = r7 */ - "por "r4", "r6"\n" /* r6 = __ 32 17 16 */ - "movq "r3", "r4"\n" /* r4 = __ FF __ __ */ - "psrlq $16, "r6"\n" /* r6 = __ __ 32 17 */ - "movq "r0", "r7"\n" /* r7 = 53 52 51 50 */ - "pand "r1", "r4"\n" /* r4 = __ 36 __ __ */ - "psllq $48, "r7"\n" /* r7 = 50 __ __ __ */ - "por "r4", "r6"\n" /* r6 = __ 36 32 17 */ - "movq 88(%eax), "r4"\n" - "por "r6", "r7"\n" /* r7 = 50 36 32 17 = R5 */ - "pmullw 88(%esi), "r4"\n" /* r4 = 57 56 55 54 */ - "psrlq $16, "r3"\n" /* r3 = __ __ FF __ */ - "movq "r7", 24(%edx)\n" /* write R5 = r7 */ - "pand "r1", "r3"\n" /* r3 = __ __ 35 __ */ - "psrlq $48, "r5"\n" /* r5 = __ __ __ 33 */ - "pand "r2", "r1"\n" /* r1 = __ __ __ 34 */ - "movq 104(%eax), "r6"\n" - "por "r3", "r5"\n" /* r5 = __ __ 35 33 */ - "pmullw 104(%esi), "r6"\n" /* r6 = 67 66 65 64 */ - "psrlq $16, "r0"\n" /* r0 = __ 53 52 51 */ - "movq "r4", "r7"\n" /* r7 = 57 56 55 54 */ - "movq "r2", "r3"\n" /* r3 = __ __ __ FF */ - "psllq $48, "r7"\n" /* r7 = 54 __ __ __ */ - "pand "r0", "r3"\n" /* r3 = __ __ __ 51 */ - "pxor "r3", "r0"\n" /* r0 = __ 53 52 __ */ - "psllq $32, "r3"\n" /* r3 = __ 51 __ __ */ - "por "r5", "r7"\n" /* r7 = 54 __ 35 33 */ - "movq "r6", "r5"\n" /* r5 = 67 66 65 64 */ - "pand "M(1)", "r6"\n" /* r6 = __ __ 65 __ */ - "por "r3", "r7"\n" /* r7 = 54 51 35 33 = R6 */ - "psllq $32, "r6"\n" /* r6 = 65 __ __ __ */ - "por "r1", "r0"\n" /* r0 = __ 53 52 34 */ - "movq "r7", 40(%edx)\n" /* write R6 = r7 */ - "por "r6", "r0"\n" /* r0 = 65 53 52 34 = R7 */ - "movq 120(%eax), "r7"\n" - "movq "r5", "r6"\n" /* r6 = 67 66 65 64 */ - "pmullw 120(%esi), "r7"\n" /* r7 = 77 76 75 74 */ - "psrlq $32, "r5"\n" /* r5 = __ __ 67 66 */ - "pand "r2", "r6"\n" /* r6 = __ __ __ 64 */ - "movq "r5", "r1"\n" /* r1 = __ __ 67 66 */ - "movq "r0", 56(%edx)\n" /* write R7 = r0 */ - "pand "r2", "r1"\n" /* r1 = __ __ __ 66 */ - "movq 112(%eax), "r0"\n" - "movq "r7", "r3"\n" /* r3 = 77 76 75 74 */ - "pmullw 112(%esi), "r0"\n" /* r0 = 73 72 71 70 */ - "psllq $16, "r3"\n" /* r3 = 76 75 74 __ */ - "pand "M(3)", "r7"\n" /* r7 = 77 __ __ __ */ - "pxor "r1", "r5"\n" /* r5 = __ __ 67 __ */ - "por "r5", "r6"\n" /* r6 = __ __ 67 64 */ - "movq "r3", "r5"\n" /* r5 = 76 75 74 __ */ - "pand "M(3)", "r5"\n" /* r5 = 76 __ __ __ */ - "por "r1", "r7"\n" /* r7 = 77 __ __ 66 */ - "movq 96(%eax), "r1"\n" - "pxor "r5", "r3"\n" /* r3 = __ 75 74 __ */ - "pmullw 96(%esi), "r1"\n" /* r1 = 63 62 61 60 */ - "por "r3", "r7"\n" /* r7 = 77 75 74 66 = R15 */ - "por "r5", "r6"\n" /* r6 = 76 __ 67 64 */ - "movq "r0", "r5"\n" /* r5 = 73 72 71 70 */ - "movq "r7", 120(%edx)\n" /* store R15 = r7 */ - "psrlq $16, "r5"\n" /* r5 = __ 73 72 71 */ - "pand "M(2)", "r5"\n" /* r5 = __ 73 __ __ */ - "movq "r0", "r7"\n" /* r7 = 73 72 71 70 */ - "por "r5", "r6"\n" /* r6 = 76 73 67 64 = R14 */ - "pand "r2", "r0"\n" /* r0 = __ __ __ 70 */ - "pxor "r0", "r7"\n" /* r7 = 73 72 71 __ */ - "psllq $32, "r0"\n" /* r0 = __ 70 __ __ */ - "movq "r6", 104(%edx)\n" /* write R14 = r6 */ - "psrlq $16, "r4"\n" /* r4 = __ 57 56 55 */ - "movq 72(%eax), "r5"\n" - "psllq $16, "r7"\n" /* r7 = 72 71 __ __ */ - "pmullw 72(%esi), "r5"\n" /* r5 = 47 46 45 44 */ - "movq "r7", "r6"\n" /* r6 = 72 71 __ __ */ - "movq "M(2)", "r3"\n" /* r3 = __ FF __ __ */ - "psllq $16, "r6"\n" /* r6 = 71 __ __ __ */ - "pand "M(3)", "r7"\n" /* r7 = 72 __ __ __ */ - "pand "r1", "r3"\n" /* r3 = __ 62 __ __ */ - "por "r0", "r7"\n" /* r7 = 72 70 __ __ */ - "movq "r1", "r0"\n" /* r0 = 63 62 61 60 */ - "pand "M(3)", "r1"\n" /* r1 = 63 __ __ __ */ - "por "r3", "r6"\n" /* r6 = 71 62 __ __ */ - "movq "r4", "r3"\n" /* r3 = __ 57 56 55 */ - "psrlq $32, "r1"\n" /* r1 = __ __ 63 __ */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 55 */ - "por "r1", "r7"\n" /* r7 = 72 70 63 __ */ - "por "r3", "r7"\n" /* r7 = 72 70 63 55 = R13 */ - "movq "r4", "r3"\n" /* r3 = __ 57 56 55 */ - "pand "M(1)", "r3"\n" /* r3 = __ __ 56 __ */ - "movq "r5", "r1"\n" /* r1 = 47 46 45 44 */ - "movq "r7", 88(%edx)\n" /* write R13 = r7 */ - "psrlq $48, "r5"\n" /* r5 = __ __ __ 47 */ - "movq 64(%eax), "r7"\n" - "por "r3", "r6"\n" /* r6 = 71 62 56 __ */ - "pmullw 64(%esi), "r7"\n" /* r7 = 43 42 41 40 */ - "por "r5", "r6"\n" /* r6 = 71 62 56 47 = R12 */ - "pand "M(2)", "r4"\n" /* r4 = __ 57 __ __ */ - "psllq $32, "r0"\n" /* r0 = 61 60 __ __ */ - "movq "r6", 72(%edx)\n" /* write R12 = r6 */ - "movq "r0", "r6"\n" /* r6 = 61 60 __ __ */ - "pand "M(3)", "r0"\n" /* r0 = 61 __ __ __ */ - "psllq $16, "r6"\n" /* r6 = 60 __ __ __ */ - "movq 40(%eax), "r5"\n" - "movq "r1", "r3"\n" /* r3 = 47 46 45 44 */ - "pmullw 40(%esi), "r5"\n" /* r5 = 27 26 25 24 */ - "psrlq $16, "r1"\n" /* r1 = __ 47 46 45 */ - "pand "M(1)", "r1"\n" /* r1 = __ __ 46 __ */ - "por "r4", "r0"\n" /* r0 = 61 57 __ __ */ - "pand "r7", "r2"\n" /* r2 = __ __ __ 40 */ - "por "r1", "r0"\n" /* r0 = 61 57 46 __ */ - "por "r2", "r0"\n" /* r0 = 61 57 46 40 = R11 */ - "psllq $16, "r3"\n" /* r3 = 46 45 44 __ */ - "movq "r3", "r4"\n" /* r4 = 46 45 44 __ */ - "movq "r5", "r2"\n" /* r2 = 27 26 25 24 */ - "movq "r0", 112(%edx)\n" /* write R11 = r0 */ - "psrlq $48, "r2"\n" /* r2 = __ __ __ 27 */ - "pand "M(2)", "r4"\n" /* r4 = __ 45 __ __ */ - "por "r2", "r6"\n" /* r6 = 60 __ __ 27 */ - "movq "M(1)", "r2"\n" /* r2 = __ __ FF __ */ - "por "r4", "r6"\n" /* r6 = 60 45 __ 27 */ - "pand "r7", "r2"\n" /* r2 = __ __ 41 __ */ - "psllq $32, "r3"\n" /* r3 = 44 __ __ __ */ - "por 80(%edx), "r3"\n" /* r3 = 44 __ __ 23 */ - "por "r2", "r6"\n" /* r6 = 60 45 41 27 = R10 */ - "movq "M(3)", "r2"\n" /* r2 = FF __ __ __ */ - "psllq $16, "r5"\n" /* r5 = 26 25 24 __ */ - "movq "r6", 96(%edx)\n" /* store R10 = r6 */ - "pand "r5", "r2"\n" /* r2 = 26 __ __ __ */ - "movq "M(2)", "r6"\n" /* r6 = __ FF __ __ */ - "pxor "r2", "r5"\n" /* r5 = __ 25 24 __ */ - "pand "r7", "r6"\n" /* r6 = __ 42 __ __ */ - "psrlq $32, "r2"\n" /* r2 = __ __ 26 __ */ - "pand "M(3)", "r7"\n" /* r7 = 43 __ __ __ */ - "por "r2", "r3"\n" /* r3 = 44 __ 26 23 */ - "por 64(%edx), "r7"\n" /* r7 = 43 __ __ 12 */ - "por "r3", "r6"\n" /* r6 = 44 42 26 23 = R9 */ - "por "r5", "r7"\n" /* r7 = 43 25 24 12 = R8 */ - "movq "r6", 80(%edx)\n" /* store R9 = r6 */ - "movq "r7", 64(%edx)\n" /* store R8 = r7 */ - ); - /* 123c ( / 64 coeffs < 2c / coeff) */ -# undef M - -/* Done w/dequant + descramble + partial transpose; now do the idct itself. */ - -# define I( K) MtoSTR(K*16(%edx)) -# define J( K) MtoSTR(((K - 4)*16)+8(%edx)) - - RowIDCT /* 46 c */ - Transpose /* 19 c */ - -# undef I -# undef J -# define I( K) MtoSTR((K*16)+64(%edx)) -# define J( K) MtoSTR(((K-4)*16)+72(%edx)) - - RowIDCT /* 46 c */ - Transpose /* 19 c */ - -# undef I -# undef J -# define I( K) MtoSTR((K * 16)(%edx)) -# define J( K) I( K) - - ColumnIDCT /* 57 c */ - -# undef I -# undef J -# define I( K) MtoSTR((K*16)+8(%edx)) -# define J( K) I( K) - - ColumnIDCT /* 57 c */ - -# undef I -# undef J - /* 368 cycles ( / 64 coeff < 6 c / coeff) */ - - ASM("emms\n"); -} - -/************************************************************************************** - * - * Routine: MMX_idct10 - * - * Description: Perform IDCT on a 8x8 block with at most 10 nonzero coefficients - * - * Input: Pointer to input and output buffer - * - * Output: None - * - * Return: None - * - * Special Note: The input coefficients are in transposed ZigZag order - * - * Error: None - * - *************************************************************************************** - */ -/* --------------------------------------------------------------- */ -// This macro does four 4-sample one-dimensional idcts in parallel. Inputs -// 4 thru 7 are assumed to be zero. -#define BeginIDCT_10 "#BeginIDCT_10\n" \ - " movq "I(3)","r2"\n" \ - \ - " movq "C(3)","r6"\n" \ - " movq "r2","r4"\n" \ - \ - " movq "C(5)","r1"\n" \ - " pmulhw "r6","r4"\n" \ - \ - " movq "I(1)","r3"\n" \ - " pmulhw "r2","r1"\n" \ - \ - " movq "C(1)","r0"\n" \ - " paddw "r2","r4"\n" \ - \ - " pxor "r6","r6"\n" \ - " paddw "r1","r2"\n" \ - \ - " movq "I(2)","r5"\n" \ - " pmulhw "r3","r0"\n" \ - \ - " movq "r5","r1"\n" \ - " paddw "r3","r0"\n" \ - \ - " pmulhw "C(7)","r3"\n" \ - " psubsw "r2","r6"\n" \ - \ - " pmulhw "C(2)","r5"\n" \ - " psubsw "r4","r0"\n" \ - \ - " movq "I(2)","r7"\n" \ - " paddsw "r4","r4"\n" \ - \ - " paddw "r5","r7"\n" \ - " paddsw "r0","r4"\n" \ - \ - " pmulhw "C(6)","r1"\n" \ - " psubsw "r6","r3"\n" \ - \ - " movq "r4","I(1)"\n" \ - " paddsw "r6","r6"\n" \ - \ - " movq "C(4)","r4"\n" \ - " paddsw "r3","r6"\n" \ - \ - " movq "r3","r5"\n" \ - " pmulhw "r4","r3"\n" \ - \ - " movq "r6","I(2)"\n" \ - " movq "r0","r2"\n" \ - \ - " movq "I(0)","r6"\n" \ - " pmulhw "r4","r0"\n" \ - \ - " paddw "r3","r5"\n" \ - " paddw "r0","r2"\n" \ - \ - " psubsw "r1","r5"\n" \ - " pmulhw "r4","r6"\n" \ - \ - " paddw "I(0)","r6"\n" \ - " paddsw "r1","r1"\n" \ - \ - " movq "r6","r4"\n" \ - " paddsw "r5","r1"\n" \ - \ - " psubsw "r2","r6"\n" \ - " paddsw "r2","r2"\n" \ - \ - " movq "I(1)","r0"\n" \ - " paddsw "r6","r2"\n" \ - \ - " psubsw "r1","r2"\n" \ - "#end BeginIDCT_10\n" -// end BeginIDCT_10 macro (25 cycles). - -#define RowIDCT_10 ASM("\n" \ - "#RowIDCT_10\n" \ - BeginIDCT_10 \ - "\n" \ - " movq "I(2)","r3"\n" /* r3 = D. */ \ - " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \ - " paddsw "r1","r1"\n" /* r1 = H. + H. */ \ - " paddsw "r7","r7"\n" /* r7 = G + G */ \ - " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\ - " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \ - " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \ - " paddsw "r3","r3"\n" \ - " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\ - " paddsw "r5","r5"\n" \ - " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \ - " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\ - " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \ - " paddsw "r0","r0"\n" \ - " movq "r1","I(1)"\n" /* save R1 */ \ - " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ - "#end RowIDCT_10\n" \ -); -// end RowIDCT macro (8 + 38 = 46 cycles) - -// Column IDCT normalizes and stores final results. - -#define ColumnIDCT_10 ASM("\n" \ - "#ColumnIDCT_10\n" \ - BeginIDCT_10 \ - "\n" \ - " paddsw "Eight","r2"\n" \ - " paddsw "r1","r1"\n" /* r1 = H. + H. */ \ - " paddsw "r2","r1"\n" /* r1 = R1 = A.. + H. */\ - " psraw ""$4"","r2"\n" /* r2 = NR2 */ \ - " psubsw "r7","r4"\n" /* r4 = E. = E - G */ \ - " psraw ""$4"","r1"\n" /* r1 = NR1 */ \ - " movq "I(2)","r3"\n" /* r3 = D. */ \ - " paddsw "r7","r7"\n" /* r7 = G + G */ \ - " movq "r2","I(2)"\n" /* store NR2 at I2 */ \ - " paddsw "r4","r7"\n" /* r7 = G. = E + G */ \ - " movq "r1","I(1)"\n" /* store NR1 at I1 */ \ - " psubsw "r3","r4"\n" /* r4 = R4 = E. - D. */ \ - " paddsw "Eight","r4"\n" \ - " paddsw "r3","r3"\n" /* r3 = D. + D. */ \ - " paddsw "r4","r3"\n" /* r3 = R3 = E. + D. */ \ - " psraw ""$4"","r4"\n" /* r4 = NR4 */ \ - " psubsw "r5","r6"\n" /* r6 = R6 = F. - B.. */\ - " psraw ""$4"","r3"\n" /* r3 = NR3 */ \ - " paddsw "Eight","r6"\n" \ - " paddsw "r5","r5"\n" /* r5 = B.. + B.. */ \ - " paddsw "r6","r5"\n" /* r5 = R5 = F. + B.. */\ - " psraw ""$4"","r6"\n" /* r6 = NR6 */ \ - " movq "r4","J(4)"\n" /* store NR4 at J4 */ \ - " psraw ""$4"","r5"\n" /* r5 = NR5 */ \ - " movq "r3","I(3)"\n" /* store NR3 at I3 */ \ - " psubsw "r0","r7"\n" /* r7 = R7 = G. - C. */ \ - " paddsw "Eight","r7"\n" \ - " paddsw "r0","r0"\n" /* r0 = C. + C. */ \ - " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ - " psraw ""$4"","r7"\n" /* r7 = NR7 */ \ - " movq "r6","J(6)"\n" /* store NR6 at J6 */ \ - " psraw ""$4"","r0"\n" /* r0 = NR0 */ \ - " movq "r5","J(5)"\n" /* store NR5 at J5 */ \ - \ - " movq "r7","J(7)"\n" /* store NR7 at J7 */ \ - \ - " movq "r0","I(0)"\n" /* store NR0 at I0 */ \ - "#end ColumnIDCT_10\n" \ -); -// end ColumnIDCT macro (38 + 19 = 57 cycles) -/* --------------------------------------------------------------- */ - - -/* --------------------------------------------------------------- */ -/* IDCT 10 */ -void IDct10__mmx( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ) { - -# define MIDM(M,I) MtoSTR(M+I*8(%ecx)) -# define M(I) MIDM( MaskOffset , I ) -# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx)) -# define C(I) MIDC( CosineOffset , I ) -# define MIDEight(M) MtoSTR(M(%ecx)) -# define Eight MIDEight(EightOffset) - -# define r0 "%mm0" -# define r1 "%mm1" -# define r2 "%mm2" -# define r3 "%mm3" -# define r4 "%mm4" -# define r5 "%mm5" -# define r6 "%mm6" -# define r7 "%mm7" - - __asm__ __volatile__ ( - /* eax = quantized input */ - /* esi = quantization table */ - /* edx = destination (= idct buffer) */ - /* ecx = idctconstants */ - "" - : - :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants) - ); - - ASM( - "movq (%eax), "r0"\n" - "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */ - "movq 16(%eax), "r1"\n" - "pmullw 16(%esi), "r1"\n" /* r1 = 13 12 11 10 */ - "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */ - "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */ - "movq 8(%eax), "r4"\n" - "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */ - "pmullw 8(%esi), "r4"\n" /* r4 = 07 06 05 04 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */ - "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */ - "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */ - "psllq $32, "r1"\n" /* r1 = 11 10 __ __ */ - "movq "M(3)", "r7"\n" /* r7 = FF __ __ __ */ - "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */ - "pand "r1", "r7"\n" /* r7 = 11 __ __ __ */ - "por "r3", "r0"\n" /* r0 = __ 03 02 00 */ - "pxor "r7", "r1"\n" /* r1 = __ 10 __ __ */ - "por "r7", "r0"\n" /* r0 = 11 03 02 00 = R0 */ - "movq "r4", "r3"\n" /* r3 = 07 06 05 04 */ - "movq "r0", (%edx)\n" /* write R0 = r0 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 04 */ - "psllq $16, "r3"\n" /* r3 = __ __ 04 __ */ - "por "r3", "r5"\n" /* r5 = __ __ 04 01 */ - "por "r5", "r1"\n" /* r1 = __ 10 04 01 = R1 */ - "psrlq $16, "r4"\n" /* r4 = __ 07 06 05 */ - "movq "r1", 16(%edx)\n" /* write R1 = r1 */ - "movq "r4", "r5"\n" /* r5 = __ 07 06 05 */ - "psrlq $16, "r4"\n" /* r4 = __ __ 07 06 */ - "movq "r2", "r6"\n" /* r6 = __ __ __ FF */ - "pand "r2", "r5"\n" /* r5 = __ __ __ 05 */ - "pand "r4", "r6"\n" /* r6 = __ __ __ 06 */ - "pxor "r6", "r4"\n" /* r4 = __ __ 07 __ */ - "por "r5", "r4"\n" /* r4 = __ __ 07 05 */ - "movq "r4", 32(%edx)\n" /* write R2 = r4 */ - "movq "r6", 48(%edx)\n" /* write R3 = r6 */ - ); -# undef M - -/* Done w/dequant + descramble + partial transpose; now do the idct itself. */ - -# define I( K) MtoSTR((K*16)(%edx)) -# define J( K) MtoSTR(((K - 4) * 16)+8(%edx)) - - RowIDCT_10 /* 33 c */ - Transpose /* 19 c */ - -# undef I -# undef J -//# define I( K) [edx + ( K * 16) + 64] -//# define J( K) [edx + ( (K - 4) * 16) + 72] - -// RowIDCT ; 46 c -// Transpose ; 19 c - -//# undef I -//# undef J -# define I( K) MtoSTR((K * 16)(%edx)) -# define J( K) I( K) - - ColumnIDCT_10 /* 44 c */ - -# undef I -# undef J -# define I( K) MtoSTR((K * 16)+8(%edx)) -# define J( K) I( K) - - ColumnIDCT_10 /* 44 c */ - -# undef I -# undef J - - ASM("emms\n"); -} - -/************************************************************************************** - * - * Routine: MMX_idct3 - * - * Description: Perform IDCT on a 8x8 block with at most 3 nonzero coefficients - * - * Input: Pointer to input and output buffer - * - * Output: None - * - * Return: None - * - * Special Note: Only works for three nonzero coefficients. - * - * Error: None - * - *************************************************************************************** - */ -/*************************************************************************************** - In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block. - In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to - do 1-D row idcts on the first two rows, the rest six rows remain zero anyway. - After row IDCTs, since every column could have nonzero coefficients, we need do - eight 1-D column IDCT. However, for each column, there are at most two nonzero - coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the - two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified - - from a full version: - - A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7) - C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3) - A. = C4 * (A - C) B. = C4 * (B - D) - C. = A + C D. = B + D - - E = C4 * (I0 + I4) F = C4 * (I0 - I4) - G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6) - E. = E - G - G. = E + G - - A.. = F + A. B.. = B. - H - F. = F - A. H. = B. + H - - R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B.. - R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B.. - - To: - - - A = (C1 * I1) B = (C7 * I1) - C = 0 D = 0 - A. = C4 * A B. = C4 * B - C. = A D. = B - - E = C4 * I0 F = E - G = 0 H = 0 - E. = E - G. = E - - A.. = E + A. B.. = B. - F. = E - A. H. = B. - - R0 = E + A R1 = E + A. + B. R3 = E + B R5 = E - A. + B. - R7 = E - A R2 = E + A. - B. R4 = E - B R6 = F - A. - B. - -******************************************************************************************/ - -#define RowIDCT_3 ASM("\n"\ - "#RowIDCT_3\n"\ - " movq "I(1)","r7"\n" /* r7 = I1 */ \ - " movq "C(1)","r0"\n" /* r0 = C1 */ \ - " movq "C(7)","r3"\n" /* r3 = C7 */ \ - " pmulhw "r7","r0"\n" /* r0 = C1 * I1 - I1 */ \ - " pmulhw "r7","r3"\n" /* r3 = C7 * I1 = B, D. */ \ - " movq "I(0)","r6"\n" /* r6 = I0 */ \ - " movq "C(4)","r4"\n" /* r4 = C4 */ \ - " paddw "r7","r0"\n" /* r0 = C1 * I1 = A, C. */ \ - " movq "r6","r1"\n" /* make a copy of I0 */ \ - " pmulhw "r4","r6"\n" /* r2 = C4 * I0 - I0 */ \ - " movq "r0","r2"\n" /* make a copy of A */ \ - " movq "r3","r5"\n" /* make a copy of B */ \ - " pmulhw "r4","r2"\n" /* r2 = C4 * A - A */ \ - " pmulhw "r4","r5"\n" /* r5 = C4 * B - B */ \ - " paddw "r1","r6"\n" /* r2 = C4 * I0 = E, F */ \ - " movq "r6","r4"\n" /* r4 = E */ \ - " paddw "r0","r2"\n" /* r2 = A. */ \ - " paddw "r3","r5"\n" /* r5 = B. */ \ - " movq "r6","r7"\n" /* r7 = E */ \ - " movq "r5","r1"\n" /* r1 = B. */ \ - /* r0 = A */ \ - /* r3 = B */ \ - /* r2 = A. */ \ - /* r5 = B. */ \ - /* r6 = E */ \ - /* r4 = E */ \ - /* r7 = E */ \ - /* r1 = B. */ \ - " psubw "r2","r6"\n" /* r6 = E - A. */ \ - " psubw "r3","r4"\n" /* r4 = E - B ----R4 */ \ - " psubw "r0","r7"\n" /* r7 = E - A ----R7 */ \ - " paddw "r2","r2"\n" /* r2 = A. + A. */ \ - " paddw "r3","r3"\n" /* r3 = B + B */ \ - " paddw "r0","r0"\n" /* r0 = A + A */ \ - " paddw "r6","r2"\n" /* r2 = E + A. */ \ - " paddw "r4","r3"\n" /* r3 = E + B ----R3 */ \ - " psubw "r1","r2"\n" /* r2 = E + A. - B. ----R2 */ \ - " psubw "r5","r6"\n" /* r6 = E - A. - B. ----R6 */ \ - " paddw "r1","r1"\n" /* r1 = B. + B. */ \ - " paddw "r5","r5"\n" /* r5 = B. + B. */ \ - " paddw "r7","r0"\n" /* r0 = E + A ----R0 */ \ - " paddw "r2","r1"\n" /* r1 = E + A. + B. -----R1 */ \ - " movq "r1","I(1)"\n" /* save r1 */ \ - " paddw "r6","r5"\n" /* r5 = E - A. + B. -----R5 */ \ - "#end RowIDCT_3\n"\ -); -//End of RowIDCT_3 - -#define ColumnIDCT_3 ASM("\n"\ - "#ColumnIDCT_3\n"\ - " movq "I(1)","r7"\n" /* r7 = I1 */ \ - " movq "C(1)","r0"\n" /* r0 = C1 */ \ - " movq "C(7)","r3"\n" /* r3 = C7 */ \ - " pmulhw "r7","r0"\n" /* r0 = C1 * I1 - I1 */ \ - " pmulhw "r7","r3"\n" /* r3 = C7 * I1 = B, D. */ \ - " movq "I(0)","r6"\n" /* r6 = I0 */ \ - " movq "C(4)","r4"\n" /* r4 = C4 */ \ - " paddw "r7","r0"\n" /* r0 = C1 * I1 = A, C. */ \ - " movq "r6","r1"\n" /* make a copy of I0 */ \ - " pmulhw "r4","r6"\n" /* r2 = C4 * I0 - I0 */ \ - " movq "r0","r2"\n" /* make a copy of A */ \ - " movq "r3","r5"\n" /* make a copy of B */ \ - " pmulhw "r4","r2"\n" /* r2 = C4 * A - A */ \ - " pmulhw "r4","r5"\n" /* r5 = C4 * B - B */ \ - " paddw "r1","r6"\n" /* r2 = C4 * I0 = E, F */ \ - " movq "r6","r4"\n" /* r4 = E */ \ - " paddw "Eight","r6"\n" /* +8 for shift */ \ - " paddw "Eight","r4"\n" /* +8 for shift */ \ - " paddw "r0","r2"\n" /* r2 = A. */ \ - " paddw "r3","r5"\n" /* r5 = B. */ \ - " movq "r6","r7"\n" /* r7 = E */ \ - " movq "r5","r1"\n" /* r1 = B. */ \ -/* r0 = A */ \ -/* r3 = B */ \ -/* r2 = A. */ \ -/* r5 = B. */ \ -/* r6 = E */ \ -/* r4 = E */ \ -/* r7 = E */ \ -/* r1 = B. */ \ - " psubw "r2","r6"\n" /* r6 = E - A. */ \ - " psubw "r3","r4"\n" /* r4 = E - B ----R4 */ \ - " psubw "r0","r7"\n" /* r7 = E - A ----R7 */ \ - " paddw "r2","r2"\n" /* r2 = A. + A. */ \ - " paddw "r3","r3"\n" /* r3 = B + B */ \ - " paddw "r0","r0"\n" /* r0 = A + A */ \ - " paddw "r6","r2"\n" /* r2 = E + A. */ \ - " paddw "r4","r3"\n" /* r3 = E + B ----R3 */ \ - " psraw $4,"r4"\n" /* shift */ \ - " movq "r4","J(4)"\n" /* store R4 at J4 */ \ - " psraw $4,"r3"\n" /* shift */ \ - " movq "r3","I(3)"\n" /* store R3 at I3 */ \ - " psubw "r1","r2"\n" /* r2 = E + A. - B. ----R2 */ \ - " psubw "r5","r6"\n" /* r6 = E - A. - B. ----R6 */ \ - " paddw "r1","r1"\n" /* r1 = B. + B. */ \ - " paddw "r5","r5"\n" /* r5 = B. + B. */ \ - " paddw "r7","r0"\n" /* r0 = E + A ----R0 */ \ - " paddw "r2","r1"\n" /* r1 = E + A. + B. -----R1 */ \ - " psraw $4,"r7"\n" /* shift */ \ - " psraw $4,"r2"\n" /* shift */ \ - " psraw $4,"r0"\n" /* shift */ \ - " psraw $4,"r1"\n" /* shift */ \ - " movq "r7","J(7)"\n" /* store R7 to J7 */ \ - " movq "r0","I(0)"\n" /* store R0 to I0 */ \ - " movq "r1","I(1)"\n" /* store R1 to I1 */ \ - " movq "r2","I(2)"\n" /* store R2 to I2 */ \ - " movq "r1","I(1)"\n" /* save r1 */ \ - " paddw "r6","r5"\n" /* r5 = E - A. + B. -----R5 */ \ - " psraw $4,"r5"\n" /* shift */ \ - " movq "r5","J(5)"\n" /* store R5 at J5 */ \ - " psraw $4,"r6"\n" /* shift */ \ - " movq "r6","J(6)"\n" /* store R6 at J6 */ \ - "#end ColumnIDCT_3\n"\ -); -//End of ColumnIDCT_3 - -void IDct3__mmx( Q_LIST_ENTRY * InputData, - ogg_int16_t *QuantMatrix, - ogg_int16_t * OutputData ) { - -# define MIDM(M,I) MtoSTR(M+I*8(%ecx)) -# define M(I) MIDM( MaskOffset , I ) -# define MIDC(M,I) MtoSTR(M+(I-1)*8(%ecx)) -# define C(I) MIDC( CosineOffset , I ) -# define MIDEight(M) MtoSTR(M(%ecx)) -# define Eight MIDEight(EightOffset) - -# define r0 "%mm0" -# define r1 "%mm1" -# define r2 "%mm2" -# define r3 "%mm3" -# define r4 "%mm4" -# define r5 "%mm5" -# define r6 "%mm6" -# define r7 "%mm7" - - __asm__ __volatile__ ( - /* eax = quantized input */ - /* esi = quantization table */ - /* edx = destination (= idct buffer) */ - /* ecx = idctconstants */ - "" - : - :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants) - ); - - ASM( - "movq (%eax), "r0"\n" - "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */ - "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */ - "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */ - "psrlq $16, "r0"\n" /* r0 = __ 03 02 01 */ - "pand "r2", "r3"\n" /* r3 = __ __ __ 00 */ - "movq "r0", "r5"\n" /* r5 = __ 03 02 01 */ - "pand "r2", "r5"\n" /* r5 = __ __ __ 01 */ - "pxor "r5", "r0"\n" /* r0 = __ 03 02 __ */ - "por "r3", "r0"\n" /* r0 = __ 03 02 00 */ - "movq "r0", (%edx)\n" /* write R0 = r0 */ - "movq "r5", 16(%edx)\n" /* write R1 = r5 */ - ); -# undef M - -/* Done partial transpose; now do the idct itself. */ - -# define I( K) MtoSTR(K*16(%edx)) -# define J( K) MtoSTR(((K - 4)*16)+8(%edx)) - - RowIDCT_3 /* 33 c */ - Transpose /* 19 c */ - -# undef I -# undef J -//# define I( K) [edx + ( K * 16) + 64] -//# define J( K) [edx + ( (K - 4) * 16) + 72] - -// RowIDCT ; 46 c -// Transpose ; 19 c - -//# undef I -//# undef J -# define I( K) MtoSTR((K * 16)(%edx)) -# define J( K) I( K) - - ColumnIDCT_3 /* 44 c */ - -# undef I -# undef J -# define I( K) MtoSTR((K*16)+8(%edx)) -# define J( K) I( K) - - ColumnIDCT_3 /* 44 c */ - -# undef I -# undef J - - ASM("emms\n"); -} - - -/* install our implementation in the function table */ -void dsp_mmx_idct_init(DspFunctions *funcs) -{ - funcs->IDctSlow = IDctSlow__mmx; - funcs->IDct10 = IDct10__mmx; - funcs->IDct3 = IDct3__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c deleted file mode 100644 index 7a931afe4..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c +++ /dev/null @@ -1,182 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: recon_mmx.c 15153 2008-08-04 18:37:55Z tterribe $ - - ********************************************************************/ - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL; - -static void copy8x8__mmx (unsigned char *src, - unsigned char *dest, - unsigned int stride) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " lea (%2, %2, 2), %%edi \n\t" - - " movq (%1), %%mm0 \n\t" - " movq (%1, %2), %%mm1 \n\t" - " movq (%1, %2, 2), %%mm2 \n\t" - " movq (%1, %%edi), %%mm3 \n\t" - - " lea (%1, %2, 4), %1 \n\t" - - " movq %%mm0, (%0) \n\t" - " movq %%mm1, (%0, %2) \n\t" - " movq %%mm2, (%0, %2, 2) \n\t" - " movq %%mm3, (%0, %%edi) \n\t" - - " lea (%0, %2, 4), %0 \n\t" - - " movq (%1), %%mm0 \n\t" - " movq (%1, %2), %%mm1 \n\t" - " movq (%1, %2, 2), %%mm2 \n\t" - " movq (%1, %%edi), %%mm3 \n\t" - - " movq %%mm0, (%0) \n\t" - " movq %%mm1, (%0, %2) \n\t" - " movq %%mm2, (%0, %2, 2) \n\t" - " movq %%mm3, (%0, %%edi) \n\t" - : "+a" (dest) - : "c" (src), - "d" (stride) - : "memory", "edi" - ); -} - -static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */ - - " lea 128(%1), %%edi \n\t" /* Endpoint in input buffer */ - "1: \n\t" - " movq (%1), %%mm2 \n\t" /* First four input values */ - - " packsswb 8(%1), %%mm2 \n\t" /* pack with next(high) four values */ - " por %%mm0, %%mm0 \n\t" - " pxor %%mm0, %%mm2 \n\t" /* Convert result to unsigned (same as add 128) */ - " lea 16(%1), %1 \n\t" /* Step source buffer */ - " cmp %%edi, %1 \n\t" /* are we done */ - - " movq %%mm2, (%0) \n\t" /* store results */ - - " lea (%0, %2), %0 \n\t" /* Step output buffer */ - " jc 1b \n\t" /* Loop back if we are not done */ - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" (LineStep), - [V128] "m" (V128) - : "memory", "edi" - ); -} - -static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr, - ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm0, %%mm0 \n\t" - " lea 128(%1), %%edi \n\t" - - "1: \n\t" - " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */ - - " movq (%1), %%mm4 \n\t" /* first 4 changes */ - " movq %%mm2, %%mm3 \n\t" - " movq 8(%1), %%mm5 \n\t" /* last 4 changes */ - " punpcklbw %%mm0, %%mm2 \n\t" /* turn first 4 refs into positive 16-bit #s */ - " paddsw %%mm4, %%mm2 \n\t" /* add in first 4 changes */ - " punpckhbw %%mm0, %%mm3 \n\t" /* turn last 4 refs into positive 16-bit #s */ - " paddsw %%mm5, %%mm3 \n\t" /* add in last 4 changes */ - " add %3, %2 \n\t" /* next row of reference pixels */ - " packuswb %%mm3, %%mm2 \n\t" /* pack result to unsigned 8-bit values */ - " lea 16(%1), %1 \n\t" /* next row of changes */ - " cmp %%edi, %1 \n\t" /* are we done? */ - - " movq %%mm2, (%0) \n\t" /* store result */ - - " lea (%0, %3), %0 \n\t" /* next row of output */ - " jc 1b \n\t" - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" (RefPtr), - "r" (LineStep) - : "memory", "edi" - ); -} - -static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1, - unsigned char *RefPtr2, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .p2align 4 \n\t" - - " pxor %%mm0, %%mm0 \n\t" - " lea 128(%1), %%edi \n\t" - - "1: \n\t" - " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */ - " movq (%3), %%mm4 \n\t" /* (+3 misaligned) 8 reference pixels */ - - " movq %%mm2, %%mm3 \n\t" - " punpcklbw %%mm0, %%mm2 \n\t" /* mm2 = start ref1 as positive 16-bit #s */ - " movq %%mm4, %%mm5 \n\t" - " movq (%1), %%mm6 \n\t" /* first 4 changes */ - " punpckhbw %%mm0, %%mm3 \n\t" /* mm3 = end ref1 as positive 16-bit #s */ - " movq 8(%1), %%mm7 \n\t" /* last 4 changes */ - " punpcklbw %%mm0, %%mm4 \n\t" /* mm4 = start ref2 as positive 16-bit #s */ - " punpckhbw %%mm0, %%mm5 \n\t" /* mm5 = end ref2 as positive 16-bit #s */ - " paddw %%mm4, %%mm2 \n\t" /* mm2 = start (ref1 + ref2) */ - " paddw %%mm5, %%mm3 \n\t" /* mm3 = end (ref1 + ref2) */ - " psrlw $1, %%mm2 \n\t" /* mm2 = start (ref1 + ref2)/2 */ - " psrlw $1, %%mm3 \n\t" /* mm3 = end (ref1 + ref2)/2 */ - " paddw %%mm6, %%mm2 \n\t" /* add changes to start */ - " paddw %%mm7, %%mm3 \n\t" /* add changes to end */ - " lea 16(%1), %1 \n\t" /* next row of changes */ - " packuswb %%mm3, %%mm2 \n\t" /* pack start|end to unsigned 8-bit */ - " add %4, %2 \n\t" /* next row of reference pixels */ - " add %4, %3 \n\t" /* next row of reference pixels */ - " movq %%mm2, (%0) \n\t" /* store result */ - " add %4, %0 \n\t" /* next row of output */ - " cmp %%edi, %1 \n\t" /* are we done? */ - " jc 1b \n\t" - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" (RefPtr1), - "r" (RefPtr2), - "m" (LineStep) - : "memory", "edi" - ); -} - -void dsp_mmx_recon_init(DspFunctions *funcs) -{ - funcs->copy8x8 = copy8x8__mmx; - funcs->recon_intra8x8 = recon_intra8x8__mmx; - funcs->recon_inter8x8 = recon_inter8x8__mmx; - funcs->recon_inter8x8_half = recon_inter8x8_half__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c deleted file mode 100644 index cecc0eb76..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c +++ /dev/null @@ -1,1605 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" -#include "../dsp.h" - -#if 0 -//These are to let me selectively enable the C versions, these are needed -#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2) -#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b))) -#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b)))) -#endif - - -static const ogg_int64_t V128 = 0x0080008000800080; - -static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr, - ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - - //Make non-zero to use the C-version -#if 0 - int i; - - /* For each block row */ - for (i=8; i; i--) { - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]); - - /* Start next row */ - FiltPtr += PixelsPerLine; - ReconPtr += ReconPixelsPerLine; - DctInputPtr += 8; - } -#else - __asm { - align 16 - - pxor mm7, mm7 - - mov eax, FiltPtr - mov ebx, ReconPtr - mov edx, DctInputPtr - - /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/ - - /* ITERATION 1 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 2 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 3 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 4 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 5 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 6 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 7 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - /* ITERATION 8 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm1, [ebx] /* mm1 = ReconPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - movq mm3, mm1 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */ - psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */ - movq [edx], mm0 /* write answer out */ - movq [8 + edx], mm2 /* write answer out */ - /* Increment pointers */ - add edx, 16 - add eax, PixelsPerLine - add ebx, ReconPixelsPerLine - - - - - - }; - -#endif -} - -static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine) -{ - -#if 0 - int i; - /* For each block row */ - for (i=8; i; i--) { - /* INTRA mode so code raw image data */ - /* We convert the data to 8 bit signed (by subtracting 128) as - this reduces the internal precision requirments in the DCT - transform. */ - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128); - - /* Start next row */ - FiltPtr += PixelsPerLine; - DctInputPtr += 8; - } - -#else - __asm { - align 16 - - pxor mm7, mm7 - - mov eax, FiltPtr - mov ebx, DctInputPtr - - movq mm1, V128 - - /* ITERATION 1 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 2 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 3 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 4 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 5 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 6 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 7 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - - /* ITERATION 8 */ - movq mm0, [eax] /* mm0 = FiltPtr */ - movq mm2, mm0 /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */ - punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - psubw mm0, mm1 /* mm0 = FiltPtr - 128 */ - psubw mm2, mm1 /* mm2 = FiltPtr - 128 */ - movq [ebx], mm0 /* write answer out */ - movq [8 + ebx], mm2 /* write answer out */ - /* Increment pointers */ - add ebx, 16 - add eax, PixelsPerLine - - }; - -#endif -} - - - - -static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1, - unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - -#if 0 - int i; - - /* For each block row */ - for (i=8; i; i--) { - DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0])); - DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1])); - DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2])); - DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3])); - DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4])); - DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5])); - DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6])); - DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7])); - - /* Start next row */ - FiltPtr += PixelsPerLine; - ReconPtr1 += ReconPixelsPerLine; - ReconPtr2 += ReconPixelsPerLine; - DctInputPtr += 8; - } -#else - - __asm { - align 16 - - pxor mm7, mm7 - - mov eax, FiltPtr - mov ebx, ReconPtr1 - mov ecx, ReconPtr2 - mov edx, DctInputPtr - - /* ITERATION 1 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 2 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 3 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 4 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 5 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 6 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 7 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - - /* ITERATION 8 */ - movq mm0, [eax] ; /* mm0 = FiltPtr */ - movq mm1, [ebx] ; /* mm1 = ReconPtr1 */ - movq mm4, [ecx] ; /* mm1 = ReconPtr2 */ - movq mm2, mm0 ; /* dup to prepare for up conversion */ - movq mm3, mm1 ; /* dup to prepare for up conversion */ - movq mm5, mm4 ; /* dup to prepare for up conversion */ - ; /* convert from UINT8 to INT16 */ - punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */ - punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */ - punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */ - punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */ - punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */ - punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */ - ; /* average ReconPtr1 and ReconPtr2 */ - paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */ - paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */ - psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - movq [edx], mm0 ; /* write answer out */ - movq [8 + edx], mm2 ; /* write answer out */ - ; /* Increment pointers */ - add edx, 16 ; - add eax, PixelsPerLine ; - add ebx, ReconPixelsPerLine ; - add ecx, ReconPixelsPerLine ; - - }; - - - - - -#endif -} - -static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2) -{ - -#if 0 - ogg_uint32_t SadValue; - ogg_uint32_t SadValue1; - - SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) + - DSP_OP_ABS_DIFF (Src1[1], Src2[1]) + - DSP_OP_ABS_DIFF (Src1[2], Src2[2]) + - DSP_OP_ABS_DIFF (Src1[3], Src2[3]); - - SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) + - DSP_OP_ABS_DIFF (Src1[5], Src2[5]) + - DSP_OP_ABS_DIFF (Src1[6], Src2[6]) + - DSP_OP_ABS_DIFF (Src1[7], Src2[7]); - - SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1; - - return SadValue; - -#else - ogg_uint32_t MaxSad; - - - __asm { - align 16 - mov ebx, Src1 - mov ecx, Src2 - - - pxor mm6, mm6 ; /* zero out mm6 for unpack */ - pxor mm7, mm7 ; /* zero out mm7 for unpack */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [ecx] ; - - movq mm2, mm0 ; - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* ; unpack low four bytes to higher precision */ - punpckhbw mm1, mm7 ; /* ; unpack high four bytes to higher precision */ - - movq mm2, mm0 ; - movq mm3, mm1 ; - psrlq mm2, 32 ; /* fold and add */ - psrlq mm3, 32 ; - paddw mm0, mm2 ; - paddw mm1, mm3 ; - movq mm2, mm0 ; - movq mm3, mm1 ; - psrlq mm2, 16 ; - psrlq mm3, 16 ; - paddw mm0, mm2 ; - paddw mm1, mm3 ; - - psubusw mm1, mm0 ; - paddw mm1, mm0 ; /* mm1 = max(mm1, mm0) */ - movd eax, mm1 ; - - and eax, 0xffff - mov MaxSad, eax - }; - return MaxSad; - - - - - -#endif -} - - - - -static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride) -{ - -#if 0 - ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0}; - ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0}; - ogg_uint32_t MaxSad = 0; - ogg_uint32_t i; - - for ( i = 0; i < 4; i++ ){ - SadValue[0] += abs(Src1[0] - Src2[0]); - SadValue[1] += abs(Src1[1] - Src2[1]); - SadValue[2] += abs(Src1[2] - Src2[2]); - SadValue[3] += abs(Src1[3] - Src2[3]); - SadValue[4] += abs(Src1[4] - Src2[4]); - SadValue[5] += abs(Src1[5] - Src2[5]); - SadValue[6] += abs(Src1[6] - Src2[6]); - SadValue[7] += abs(Src1[7] - Src2[7]); - - Src1 += stride; - Src2 += stride; - } - - for ( i = 0; i < 4; i++ ){ - SadValue2[0] += abs(Src1[0] - Src2[0]); - SadValue2[1] += abs(Src1[1] - Src2[1]); - SadValue2[2] += abs(Src1[2] - Src2[2]); - SadValue2[3] += abs(Src1[3] - Src2[3]); - SadValue2[4] += abs(Src1[4] - Src2[4]); - SadValue2[5] += abs(Src1[5] - Src2[5]); - SadValue2[6] += abs(Src1[6] - Src2[6]); - SadValue2[7] += abs(Src1[7] - Src2[7]); - - Src1 += stride; - Src2 += stride; - } - - for ( i = 0; i < 8; i++ ){ - if ( SadValue[i] > MaxSad ) - MaxSad = SadValue[i]; - if ( SadValue2[i] > MaxSad ) - MaxSad = SadValue2[i]; - } - - return MaxSad; -#else - ogg_uint32_t MaxSad; - - - __asm { - align 16 - mov ebx, Src1 - mov ecx, Src2 - - pxor mm3, mm3 ; /* zero out mm3 for unpack */ - pxor mm4, mm4 ; /* mm4 low sum */ - pxor mm5, mm5 ; /* mm5 high sum */ - pxor mm6, mm6 ; /* mm6 low sum */ - pxor mm7, mm7 ; /* mm7 high sum */ - mov edi, 4 ; /* 4 rows */ - label_1: ; - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [ecx] ; /* take 8 bytes */ - - movq mm2, mm0 ; - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */ - paddw mm4, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */ - paddw mm5, mm1 ; /* accumulate difference... */ - add ebx, stride ; /* Inc pointer into the new data */ - add ecx, stride ; /* Inc pointer into the new data */ - - dec edi ; - jnz label_1 ; - - mov edi, 4 ; /* 4 rows */ - label_2: ; - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [ecx] ; /* take 8 bytes */ - - movq mm2, mm0 ; - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */ - paddw mm6, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */ - paddw mm7, mm1 ; /* accumulate difference... */ - add ebx, stride ; /* Inc pointer into the new data */ - add ecx, stride ; /* Inc pointer into the new data */ - - dec edi ; - jnz label_2 ; - - psubusw mm7, mm6 ; - paddw mm7, mm6 ; /* mm7 = max(mm7, mm6) */ - psubusw mm5, mm4 ; - paddw mm5, mm4 ; /* mm5 = max(mm5, mm4) */ - psubusw mm7, mm5 ; - paddw mm7, mm5 ; /* mm7 = max(mm5, mm7) */ - movq mm6, mm7 ; - psrlq mm6, 32 ; - psubusw mm7, mm6 ; - paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */ - movq mm6, mm7 ; - psrlq mm6, 16 ; - psubusw mm7, mm6 ; - paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */ - movd eax, mm7 ; - and eax, 0xffff ; - - mov MaxSad, eax - }; - - return MaxSad; - - -#endif -} - -static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2) -{ - -#if 0 - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]); - sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]); - sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]); - sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]); - sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]); - sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]); - sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]); - sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]); - - /* Step to next row of block. */ - ptr1 += stride1; - ptr2 += stride2; - } - - return sad; -#else - ogg_uint32_t DiffVal; - - __asm { - align 16 - - mov ebx, ptr1 - mov edx, ptr2 - - pxor mm6, mm6 ; /* zero out mm6 for unpack */ - pxor mm7, mm7 ; /* mm7 contains the result */ - - ; /* ITERATION 1 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - ; /* ITERATION 2 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - ; /* ITERATION 3 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - ; /* ITERATION 4 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - ; /* ITERATION 5 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - ; /* ITERATION 6 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - ; /* ITERATION 7 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - - ; /* ITERATION 8 */ - movq mm0, [ebx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, stride1 ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add edx, stride2 ; /* Inc pointer into ref data */ - - - - ; /* ------ */ - - movq mm0, mm7 ; - psrlq mm7, 32 ; - paddw mm7, mm0 ; - movq mm0, mm7 ; - psrlq mm7, 16 ; - paddw mm7, mm0 ; - movd eax, mm7 ; - and eax, 0xffff ; - - mov DiffVal, eax - }; - - return DiffVal; - - - -#endif -} - -static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres) -{ -#if 0 - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]); - sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]); - sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]); - sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]); - sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]); - sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]); - sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]); - sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]); - - if (sad > thres ) - break; - - /* Step to next row of block. */ - ptr1 += stride1; - ptr2 += stride2; - } - - return sad; -#else - return sad8x8__mmx (ptr1, stride1, ptr2, stride2); -#endif -} - - -static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres) -{ -#if 0 - ogg_uint32_t i; - ogg_uint32_t sad = 0; - - for (i=8; i; i--) { - sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0])); - sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1])); - sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2])); - sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3])); - sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4])); - sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5])); - sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6])); - sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7])); - - if ( sad > thres ) - break; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr1 += RefStride; - RefDataPtr2 += RefStride; - } - - return sad; -#else - ogg_uint32_t DiffVal; - - __asm { - align 16 - - mov ebx, SrcData - mov ecx, RefDataPtr1 - mov edx, RefDataPtr2 - - - pcmpeqd mm5, mm5 ; /* fefefefefefefefe in mm5 */ - paddb mm5, mm5 ; - ; - pxor mm6, mm6 ; /* zero out mm6 for unpack */ - pxor mm7, mm7 ; /* mm7 contains the result */ - mov edi, 8 ; /* 8 rows */ - loop_start: ; - movq mm0, [ebx] ; /* take 8 bytes */ - - movq mm2, [ecx] ; - movq mm3, [edx] ; /* take average of mm2 and mm3 */ - movq mm1, mm2 ; - pand mm1, mm3 ; - pxor mm3, mm2 ; - pand mm3, mm5 ; - psrlq mm3, 1 ; - paddb mm1, mm3 ; - - movq mm2, mm0 ; - - psubusb mm0, mm1 ; /* A - B */ - psubusb mm1, mm2 ; /* B - A */ - por mm0, mm1 ; /* and or gives abs difference */ - movq mm1, mm0 ; - - punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */ - paddw mm7, mm0 ; /* accumulate difference... */ - punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */ - add ebx, SrcStride ; /* Inc pointer into the new data */ - paddw mm7, mm1 ; /* accumulate difference... */ - add ecx, RefStride ; /* Inc pointer into ref data */ - add edx, RefStride ; /* Inc pointer into ref data */ - - dec edi ; - jnz loop_start ; - - movq mm0, mm7 ; - psrlq mm7, 32 ; - paddw mm7, mm0 ; - movq mm0, mm7 ; - psrlq mm7, 16 ; - paddw mm7, mm0 ; - movd eax, mm7 ; - and eax, 0xffff ; - - mov DiffVal, eax - }; - - return DiffVal; - - - -#endif -} - -static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride) -{ -#if 0 - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - - for (i=8; i; i--) { - /* Examine alternate pixel locations. */ - XSum += DataPtr[0]; - XXSum += DataPtr[0]*DataPtr[0]; - XSum += DataPtr[1]; - XXSum += DataPtr[1]*DataPtr[1]; - XSum += DataPtr[2]; - XXSum += DataPtr[2]*DataPtr[2]; - XSum += DataPtr[3]; - XXSum += DataPtr[3]*DataPtr[3]; - XSum += DataPtr[4]; - XXSum += DataPtr[4]*DataPtr[4]; - XSum += DataPtr[5]; - XXSum += DataPtr[5]*DataPtr[5]; - XSum += DataPtr[6]; - XXSum += DataPtr[6]*DataPtr[6]; - XSum += DataPtr[7]; - XXSum += DataPtr[7]*DataPtr[7]; - - /* Step to next row of block. */ - DataPtr += Stride; - } - - /* Compute population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum ) ); -#else - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm { - align 16 - - mov ecx, DataPtr - - pxor mm5, mm5 ; - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov edi, 8 ; - loop_start: - movq mm0, [ecx] ; /* take 8 bytes */ - movq mm2, mm0 ; - - punpcklbw mm0, mm6 ; - punpckhbw mm2, mm6 ; - - paddw mm5, mm0 ; - paddw mm5, mm2 ; - - pmaddwd mm0, mm0 ; - pmaddwd mm2, mm2 ; - ; - paddd mm7, mm0 ; - paddd mm7, mm2 ; - - add ecx, Stride ; /* Inc pointer into src data */ - - dec edi ; - jnz loop_start ; - - movq mm0, mm5 ; - psrlq mm5, 32 ; - paddw mm5, mm0 ; - movq mm0, mm5 ; - psrlq mm5, 16 ; - paddw mm5, mm0 ; - movd edi, mm5 ; - movsx edi, di ; - mov eax, edi ; - - movq mm0, mm7 ; - psrlq mm7, 32 ; - paddd mm7, mm0 ; - movd ebx, mm7 ; - - mov XSum, eax - mov XXSum, ebx; - - }; - /* Compute population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum ) ); - - - -#endif -} - -static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr, ogg_uint32_t RefStride) -{ - -#if 0 - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - ogg_int32_t DiffVal; - - for (i=8; i; i--) { - DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr += RefStride; - } - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -#else - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - - __asm { - align 16 - - mov ecx, SrcData - mov edx, RefDataPtr - - pxor mm5, mm5 ; - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov edi, 8 ; - loop_start: ; - movq mm0, [ecx] ; /* take 8 bytes */ - movq mm1, [edx] ; - movq mm2, mm0 ; - movq mm3, mm1 ; - - punpcklbw mm0, mm6 ; - punpcklbw mm1, mm6 ; - punpckhbw mm2, mm6 ; - punpckhbw mm3, mm6 ; - - psubsw mm0, mm1 ; - psubsw mm2, mm3 ; - - paddw mm5, mm0 ; - paddw mm5, mm2 ; - - pmaddwd mm0, mm0 ; - pmaddwd mm2, mm2 ; - ; - paddd mm7, mm0 ; - paddd mm7, mm2 ; - - add ecx, SrcStride ; /* Inc pointer into src data */ - add edx, RefStride ; /* Inc pointer into ref data */ - - dec edi ; - jnz loop_start ; - - movq mm0, mm5 ; - psrlq mm5, 32 ; - paddw mm5, mm0 ; - movq mm0, mm5 ; - psrlq mm5, 16 ; - paddw mm5, mm0 ; - movd edi, mm5 ; - movsx edi, di ; - mov eax, edi ; - - movq mm0, mm7 ; - psrlq mm7, 32 ; - paddd mm7, mm0 ; - movd ebx, mm7 ; - - mov XSum, eax - mov XXSum, ebx - - }; - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); - - -#endif -} - -static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride) -{ -#if 0 - ogg_uint32_t i; - ogg_uint32_t XSum=0; - ogg_uint32_t XXSum=0; - ogg_int32_t DiffVal; - - for (i=8; i; i--) { - DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7])); - XSum += DiffVal; - XXSum += DiffVal*DiffVal; - - /* Step to next row of block. */ - SrcData += SrcStride; - RefDataPtr1 += RefStride; - RefDataPtr2 += RefStride; - } - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -#else - ogg_uint32_t XSum; - ogg_uint32_t XXSum; - - __asm { - align 16 - - mov ebx, SrcData - mov ecx, RefDataPtr1 - mov edx, RefDataPtr2 - - pcmpeqd mm4, mm4 ; /* fefefefefefefefe in mm4 */ - paddb mm4, mm4 ; - pxor mm5, mm5 ; - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov edi, 8 ; - loop_start: ; - movq mm0, [ebx] ; /* take 8 bytes */ - - movq mm2, [ecx] ; - movq mm3, [edx] ; /* take average of mm2 and mm3 */ - movq mm1, mm2 ; - pand mm1, mm3 ; - pxor mm3, mm2 ; - pand mm3, mm4 ; - psrlq mm3, 1 ; - paddb mm1, mm3 ; - - movq mm2, mm0 ; - movq mm3, mm1 ; - - punpcklbw mm0, mm6 ; - punpcklbw mm1, mm6 ; - punpckhbw mm2, mm6 ; - punpckhbw mm3, mm6 ; - - psubsw mm0, mm1 ; - psubsw mm2, mm3 ; - - paddw mm5, mm0 ; - paddw mm5, mm2 ; - - pmaddwd mm0, mm0 ; - pmaddwd mm2, mm2 ; - ; - paddd mm7, mm0 ; - paddd mm7, mm2 ; - - add ebx, SrcStride ; /* Inc pointer into src data */ - add ecx, RefStride ; /* Inc pointer into ref data */ - add edx, RefStride ; /* Inc pointer into ref data */ - - dec edi ; - jnz loop_start ; - - movq mm0, mm5 ; - psrlq mm5, 32 ; - paddw mm5, mm0 ; - movq mm0, mm5 ; - psrlq mm5, 16 ; - paddw mm5, mm0 ; - movd edi, mm5 ; - movsx edi, di ; - mov XSum, edi ; /* movl eax, edi ; Modified for vc to resuse eax*/ - - movq mm0, mm7 ; - psrlq mm7, 32 ; - paddd mm7, mm0 ; - movd XXSum, mm7 ; /*movd eax, mm7 ; Modified for vc to reuse eax */ - }; - - return (( (XXSum<<6) - XSum*XSum )); - -#endif -} - -static void restore_fpu (void) -{ - - __asm { - emms - } - -} - -void dsp_mmx_init(DspFunctions *funcs) -{ - funcs->restore_fpu = restore_fpu; - funcs->sub8x8 = sub8x8__mmx; - funcs->sub8x8_128 = sub8x8_128__mmx; - funcs->sub8x8avg2 = sub8x8avg2__mmx; - funcs->row_sad8 = row_sad8__mmx; - funcs->col_sad8x8 = col_sad8x8__mmx; - funcs->sad8x8 = sad8x8__mmx; - funcs->sad8x8_thres = sad8x8_thres__mmx; - funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx; - funcs->intra8x8_err = intra8x8_err__mmx; - funcs->inter8x8_err = inter8x8_err__mmx; - funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx; -} - diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c deleted file mode 100644 index 65cd9c367..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c +++ /dev/null @@ -1,333 +0,0 @@ -;//========================================================================== -;// -;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY -;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR -;// PURPOSE. -;// -;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved. -;// -;//-------------------------------------------------------------------------- - -#include "theora/theora.h" -#include "../codec_internal.h" -#include "../dsp.h" - - -static const ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15; -static const ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83; -static const ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4db; -static const ogg_int64_t xC4S4 = 0x0b505b505b505b505; -static const ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3a; -static const ogg_int64_t xC6S2 = 0x061f861f861f861f8; -static const ogg_int64_t xC7S1 = 0x031f131f131f131f1; - - -static __inline void Transpose_mmx( ogg_int16_t *InputData1, ogg_int16_t *OutputData1, - ogg_int16_t *InputData2, ogg_int16_t *OutputData2) -{ - - __asm { - align 16 - mov eax, InputData1 - mov ebx, InputData2 - mov ecx, OutputData1 - mov edx, OutputData2 - - - movq mm0, [eax] ; /* mm0 = a0 a1 a2 a3 */ - movq mm4, [ebx] ; /* mm4 = e4 e5 e6 e7 */ - movq mm1, [16 + eax] ; /* mm1 = b0 b1 b2 b3 */ - movq mm5, [16 + ebx] ; /* mm5 = f4 f5 f6 f7 */ - movq mm2, [32 + eax] ; /* mm2 = c0 c1 c2 c3 */ - movq mm6, [32 + ebx] ; /* mm6 = g4 g5 g6 g7 */ - movq mm3, [48 + eax] ; /* mm3 = d0 d1 d2 d3 */ - movq [16 + ecx], mm1 ; /* save b0 b1 b2 b3 */ - movq mm7, [48 + ebx] ; /* mm7 = h0 h1 h2 h3 */ - ; /* Transpose 2x8 block */ - movq mm1, mm4 ; /* mm1 = e3 e2 e1 e0 */ - punpcklwd mm4, mm5 ; /* mm4 = f1 e1 f0 e0 */ - movq [ecx], mm0 ; /* save a3 a2 a1 a0 */ - punpckhwd mm1, mm5 ; /* mm1 = f3 e3 f2 e2 */ - movq mm0, mm6 ; /* mm0 = g3 g2 g1 g0 */ - punpcklwd mm6, mm7 ; /* mm6 = h1 g1 h0 g0 */ - movq mm5, mm4 ; /* mm5 = f1 e1 f0 e0 */ - punpckldq mm4, mm6 ; /* mm4 = h0 g0 f0 e0 = MM4 */ - punpckhdq mm5, mm6 ; /* mm5 = h1 g1 f1 e1 = MM5 */ - movq mm6, mm1 ; /* mm6 = f3 e3 f2 e2 */ - movq [edx], mm4 ; - punpckhwd mm0, mm7 ; /* mm0 = h3 g3 h2 g2 */ - movq [16 + edx], mm5 ; - punpckhdq mm6, mm0 ; /* mm6 = h3 g3 f3 e3 = MM7 */ - movq mm4, [ecx] ; /* mm4 = a3 a2 a1 a0 */ - punpckldq mm1, mm0 ; /* mm1 = h2 g2 f2 e2 = MM6 */ - movq mm5, [16 + ecx] ; /* mm5 = b3 b2 b1 b0 */ - movq mm0, mm4 ; /* mm0 = a3 a2 a1 a0 */ - movq [48 + edx], mm6 ; - punpcklwd mm0, mm5 ; /* mm0 = b1 a1 b0 a0 */ - movq [32 + edx], mm1 ; - punpckhwd mm4, mm5 ; /* mm4 = b3 a3 b2 a2 */ - movq mm5, mm2 ; /* mm5 = c3 c2 c1 c0 */ - punpcklwd mm2, mm3 ; /* mm2 = d1 c1 d0 c0 */ - movq mm1, mm0 ; /* mm1 = b1 a1 b0 a0 */ - punpckldq mm0, mm2 ; /* mm0 = d0 c0 b0 a0 = MM0 */ - punpckhdq mm1, mm2 ; /* mm1 = d1 c1 b1 a1 = MM1 */ - movq mm2, mm4 ; /* mm2 = b3 a3 b2 a2 */ - movq [ecx], mm0 ; - punpckhwd mm5, mm3 ; /* mm5 = d3 c3 d2 c2 */ - movq [16 + ecx], mm1 ; - punpckhdq mm4, mm5 ; /* mm4 = d3 c3 b3 a3 = MM3 */ - punpckldq mm2, mm5 ; /* mm2 = d2 c2 b2 a2 = MM2 */ - movq [48 + ecx], mm4 ; - movq [32 + ecx], mm2 ; - - }; - - -} - -static __inline void Fdct_mmx( ogg_int16_t *InputData1, ogg_int16_t *InputData2, ogg_int16_t *temp) -{ - - __asm { - align 16 - - - mov eax, InputData1 - mov ebx, InputData2 - mov ecx, temp - movq mm0, [eax] ; - movq mm1, [16 + eax] ; - movq mm2, [48 + eax] ; - movq mm3, [16 + ebx] ; - movq mm4, mm0 ; - movq mm5, mm1 ; - movq mm6, mm2 ; - movq mm7, mm3 ; - ; - paddsw mm0, [48 + ebx] ; /* mm0 = ip0 + ip7 = is07 */ - paddsw mm1, [32 + eax] ; /* mm1 = ip1 + ip2 = is12 */ - paddsw mm2, [ebx] ; /* mm2 = ip3 + ip4 = is34 */ - paddsw mm3, [32 + ebx] ; /* mm3 = ip5 + ip6 = is56 */ - psubsw mm4, [48 + ebx] ; /* mm4 = ip0 - ip7 = id07 */ - psubsw mm5, [32 + eax] ; /* mm5 = ip1 - ip2 = id12 */ - ; - psubsw mm0, mm2 ; /* mm0 = is07 - is34 */ - ; - paddsw mm2, mm2 ; - ; - psubsw mm6, [ebx] ; /* mm6 = ip3 - ip4 = id34 */ - ; - paddsw mm2, mm0 ; /* mm2 = is07 + is34 = is0734 */ - psubsw mm1, mm3 ; /* mm1 = is12 - is56 */ - movq [ecx], mm0 ; /* Save is07 - is34 to free mm0; */ - paddsw mm3, mm3 ; - paddsw mm3, mm1 ; /* mm3 = is12 + 1s56 = is1256 */ - ; - psubsw mm7, [32 + ebx] ; /* mm7 = ip5 - ip6 = id56 */ - ; /* ------------------------------------------------------------------- */ - psubsw mm5, mm7 ; /* mm5 = id12 - id56 */ - paddsw mm7, mm7 ; - paddsw mm7, mm5 ; /* mm7 = id12 + id56 */ - ; /* ------------------------------------------------------------------- */ - psubsw mm2, mm3 ; /* mm2 = is0734 - is1256 */ - paddsw mm3, mm3 ; - ; - movq mm0, mm2 ; /* make a copy */ - paddsw mm3, mm2 ; /* mm3 = is0734 + is1256 */ - ; - pmulhw mm0, xC4S4 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ - paddw mm0, mm2 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) */ - psrlw mm2, 15 ; - paddw mm0, mm2 ; /* Truncate mm0, now it is op[4] */ - ; - movq mm2, mm3 ; - movq [ebx], mm0 ; /* save ip4, now mm0,mm2 are free */ - ; - movq mm0, mm3 ; - pmulhw mm3, xC4S4 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ - ; - psrlw mm2, 15 ; - paddw mm3, mm0 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) */ - paddw mm3, mm2 ; /* Truncate mm3, now it is op[0] */ - ; - movq [eax], mm3 ; - ; /* ------------------------------------------------------------------- */ - movq mm3, [ecx] ; /* mm3 = irot_input_y */ - pmulhw mm3, xC2S6 ; /* mm3 = xC2S6 * irot_input_y - irot_input_y */ - ; - movq mm2, [ecx] ; - movq mm0, mm2 ; - ; - psrlw mm2, 15 ; /* mm3 = xC2S6 * irot_input_y */ - paddw mm3, mm0 ; - ; - paddw mm3, mm2 ; /* Truncated */ - movq mm0, mm5 ; - ; - movq mm2, mm5 ; - pmulhw mm0, xC6S2 ; /* mm0 = xC6S2 * irot_input_x */ - ; - psrlw mm2, 15 ; - paddw mm0, mm2 ; /* Truncated */ - ; - paddsw mm3, mm0 ; /* ip[2] */ - movq [32 + eax], mm3 ; /* Save ip2 */ - ; - movq mm0, mm5 ; - movq mm2, mm5 ; - ; - pmulhw mm5, xC2S6 ; /* mm5 = xC2S6 * irot_input_x - irot_input_x */ - psrlw mm2, 15 ; - ; - movq mm3, [ecx] ; - paddw mm5, mm0 ; /* mm5 = xC2S6 * irot_input_x */ - ; - paddw mm5, mm2 ; /* Truncated */ - movq mm2, mm3 ; - ; - pmulhw mm3, xC6S2 ; /* mm3 = xC6S2 * irot_input_y */ - psrlw mm2, 15 ; - ; - paddw mm3, mm2 ; /* Truncated */ - psubsw mm3, mm5 ; - ; - movq [32 + ebx], mm3 ; - ; /* ------------------------------------------------------------------- */ - movq mm0, xC4S4 ; - movq mm2, mm1 ; - movq mm3, mm1 ; - ; - pmulhw mm1, mm0 ; /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ - psrlw mm2, 15 ; - ; - paddw mm1, mm3 ; /* mm0 = xC4S4 * ( is12 - is56 ) */ - paddw mm1, mm2 ; /* Truncate mm1, now it is icommon_product1 */ - ; - movq mm2, mm7 ; - movq mm3, mm7 ; - ; - pmulhw mm7, mm0 ; /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ - psrlw mm2, 15 ; - ; - paddw mm7, mm3 ; /* mm7 = xC4S4 * ( id12 + id56 ) */ - paddw mm7, mm2 ; /* Truncate mm7, now it is icommon_product2 */ - ; /* ------------------------------------------------------------------- */ - pxor mm0, mm0 ; /* Clear mm0 */ - psubsw mm0, mm6 ; /* mm0 = - id34 */ - ; - psubsw mm0, mm7 ; /* mm0 = - ( id34 + idcommon_product2 ) */ - paddsw mm6, mm6 ; - paddsw mm6, mm0 ; /* mm6 = id34 - icommon_product2 */ - ; - psubsw mm4, mm1 ; /* mm4 = id07 - icommon_product1 */ - paddsw mm1, mm1 ; - paddsw mm1, mm4 ; /* mm1 = id07 + icommon_product1 */ - ; /* ------------------------------------------------------------------- */ - movq mm7, xC1S7 ; - movq mm2, mm1 ; - ; - movq mm3, mm1 ; - pmulhw mm1, mm7 ; /* mm1 = xC1S7 * irot_input_x - irot_input_x */ - ; - movq mm7, xC7S1 ; - psrlw mm2, 15 ; - ; - paddw mm1, mm3 ; /* mm1 = xC1S7 * irot_input_x */ - paddw mm1, mm2 ; /* Trucated */ - ; - pmulhw mm3, mm7 ; /* mm3 = xC7S1 * irot_input_x */ - paddw mm3, mm2 ; /* Truncated */ - ; - movq mm5, mm0 ; - movq mm2, mm0 ; - ; - movq mm7, xC1S7 ; - pmulhw mm0, mm7 ; /* mm0 = xC1S7 * irot_input_y - irot_input_y */ - ; - movq mm7, xC7S1 ; - psrlw mm2, 15 ; - ; - paddw mm0, mm5 ; /* mm0 = xC1S7 * irot_input_y */ - paddw mm0, mm2 ; /* Truncated */ - ; - pmulhw mm5, mm7 ; /* mm5 = xC7S1 * irot_input_y */ - paddw mm5, mm2 ; /* Truncated */ - ; - psubsw mm1, mm5 ; /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ - paddsw mm3, mm0 ; /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ - ; - movq [16 + eax], mm1 ; - movq [48 + ebx], mm3 ; - ; /* ------------------------------------------------------------------- */ - movq mm0, xC3S5 ; - movq mm1, xC5S3 ; - ; - movq mm5, mm6 ; - movq mm7, mm6 ; - ; - movq mm2, mm4 ; - movq mm3, mm4 ; - ; - pmulhw mm4, mm0 ; /* mm4 = xC3S5 * irot_input_x - irot_input_x */ - pmulhw mm6, mm1 ; /* mm6 = xC5S3 * irot_input_y - irot_input_y */ - ; - psrlw mm2, 15 ; - psrlw mm5, 15 ; - ; - paddw mm4, mm3 ; /* mm4 = xC3S5 * irot_input_x */ - paddw mm6, mm7 ; /* mm6 = xC5S3 * irot_input_y */ - ; - paddw mm4, mm2 ; /* Truncated */ - paddw mm6, mm5 ; /* Truncated */ - ; - psubsw mm4, mm6 ; /* ip3 */ - movq [48 + eax], mm4 ; - ; - movq mm4, mm3 ; - movq mm6, mm7 ; - ; - pmulhw mm3, mm1 ; /* mm3 = xC5S3 * irot_input_x - irot_input_x */ - pmulhw mm7, mm0 ; /* mm7 = xC3S5 * irot_input_y - irot_input_y */ - ; - paddw mm4, mm2 ; - paddw mm6, mm5 ; - ; - paddw mm3, mm4 ; /* mm3 = xC5S3 * irot_input_x */ - paddw mm7, mm6 ; /* mm7 = xC3S5 * irot_input_y */ - ; - paddw mm3, mm7 ; /* ip5 */ - movq [16 + ebx], mm3 ; - -}; - -} - - -static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData) -{ - - static ogg_int16_t tmp[32]; - ogg_int16_t* align_tmp = (ogg_int16_t*)((unsigned char*)tmp + (16 - ((int)tmp)&15)); - - - Transpose_mmx(InputData, OutputData, InputData + 4, OutputData + 4); - Fdct_mmx(OutputData, OutputData + 4, align_tmp); - - Transpose_mmx(InputData + 32, OutputData + 32, InputData + 36, OutputData + 36); - Fdct_mmx(OutputData+32, OutputData + 36, align_tmp); - - Transpose_mmx(OutputData, OutputData, OutputData + 32, OutputData + 32); - Fdct_mmx(OutputData, OutputData + 32, align_tmp); - - Transpose_mmx(OutputData + 4, OutputData + 4, OutputData + 36, OutputData + 36); - Fdct_mmx(OutputData + 4, OutputData + 36, align_tmp); - - __asm emms - -} - -void dsp_mmx_fdct_init(DspFunctions *funcs) -{ - funcs->fdct_short = fdct_short__mmx; -} diff --git a/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c deleted file mode 100644 index 1e0f1f095..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c +++ /dev/null @@ -1,197 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: reconstruct.c,v 1.6 2003/12/03 08:59:41 arc Exp $ - - ********************************************************************/ - -#include "../codec_internal.h" - - -static const unsigned __int64 V128 = 0x8080808080808080; - -static void copy8x8__mmx (unsigned char *src, - unsigned char *dest, - unsigned int stride) -{ - - //Is this even the fastest way to do this? - __asm { - align 16 - - mov eax, src - mov ebx, dest - mov ecx, stride - - lea edi, [ecx + ecx * 2] - movq mm0, [eax] - movq mm1, [eax + ecx] - movq mm2, [eax + ecx * 2] - movq mm3, [eax + edi] - lea eax, [eax + ecx * 4] - movq [ebx], mm0 - movq [ebx + ecx], mm1 - movq [ebx + ecx * 2], mm2 - movq [ebx + edi], mm3 - lea ebx, [ebx + ecx * 4] - movq mm0, [eax] - movq mm1, [eax + ecx] - movq mm2, [eax + ecx * 2] - movq mm3, [eax + edi] - movq [ebx], mm0 - movq [ebx + ecx], mm1 - movq [ebx + ecx * 2], mm2 - movq [ebx + edi], mm3 - - }; - -} - -static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - - __asm { - align 16 - - mov eax, ReconPtr - mov ebx, ChangePtr - mov ecx, LineStep - - movq mm0, V128 - - lea edi, [128 + ebx] - loop_start: - movq mm2, [ebx] - - packsswb mm2, [8 + ebx] - por mm0, mm0 - pxor mm2, mm0 - lea ebx, [16 + ebx] - cmp ebx, edi - - movq [eax], mm2 - - - - lea eax, [eax + ecx] - jc loop_start - - - }; - -} - - - - - -static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr, - ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) -{ - - __asm { - - align 16 - - mov eax, ReconPtr - mov ebx, ChangePtr - mov ecx, LineStep - mov edx, RefPtr - - pxor mm0, mm0 - lea edi, [128 + ebx] - - loop_start: - movq mm2, [edx] - - movq mm4, [ebx] - movq mm3, mm2 - movq mm5, [8 + ebx] - punpcklbw mm2, mm0 - paddsw mm2, mm4 - punpckhbw mm3, mm0 - paddsw mm3, mm5 - add edx, ecx - packuswb mm2, mm3 - lea ebx, [16 + ebx] - cmp ebx, edi - - movq [eax], mm2 - - lea eax, [eax + ecx] - jc loop_start - - }; -} - - - - -static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1, - unsigned char *RefPtr2, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - __asm { - align 16 - - mov eax, ReconPtr - mov ebx, ChangePtr - mov ecx, RefPtr1 - mov edx, RefPtr2 - - pxor mm0, mm0 - lea edi, [128 + ebx] - - loop_start: - movq mm2, [ecx] - movq mm4, [edx] - - movq mm3, mm2 - punpcklbw mm2, mm0 - movq mm5, mm4 - movq mm6, [ebx] - punpckhbw mm3, mm0 - movq mm7, [8 + ebx] - punpcklbw mm4, mm0 - punpckhbw mm5, mm0 - paddw mm2, mm4 - paddw mm3, mm5 - psrlw mm2, 1 - psrlw mm3, 1 - paddw mm2, mm6 - paddw mm3, mm7 - lea ebx, [16 + ebx] - packuswb mm2, mm3 - add ecx, LineStep - add edx, LineStep - movq [eax], mm2 - add eax, LineStep - cmp ebx, edi - jc loop_start - - }; - -} - - - - -void dsp_mmx_recon_init(DspFunctions *funcs) -{ - funcs->copy8x8 = copy8x8__mmx; - funcs->recon_intra8x8 = recon_intra8x8__mmx; - funcs->recon_inter8x8 = recon_inter8x8__mmx; - funcs->recon_inter8x8_half = recon_inter8x8_half__mmx; -} - diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c deleted file mode 100644 index 547e974e3..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c +++ /dev/null @@ -1,409 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3= - 0x0003000300030003LL; -static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4= - 0x0004000400040004LL; - -static void loop_filter_v(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - long esi; - _pix-=_ystride*2; - __asm__ __volatile__( - /*mm0=0*/ - "pxor %%mm0,%%mm0\n\t" - /*esi=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*mm7=_pix[0...8]*/ - "movq (%[pix]),%%mm7\n\t" - /*mm4=_pix[0...8+_ystride*3]*/ - "movq (%[pix],%[s]),%%mm4\n\t" - /*mm6=_pix[0...8]*/ - "movq %%mm7,%%mm6\n\t" - /*Expand unsigned _pix[0...3] to 16 bits.*/ - "punpcklbw %%mm0,%%mm6\n\t" - "movq %%mm4,%%mm5\n\t" - /*Expand unsigned _pix[4...8] to 16 bits.*/ - "punpckhbw %%mm0,%%mm7\n\t" - /*Expand other arrays too.*/ - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/ - "psubw %%mm4,%%mm6\n\t" - "psubw %%mm5,%%mm7\n\t" - /*mm5=mm4=_pix[0...8+_ystride]*/ - "movq (%[pix],%[ystride]),%%mm4\n\t" - /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/ - "movq (%[pix],%[ystride],2),%%mm2\n\t" - "movq %%mm4,%%mm5\n\t" - "movq %%mm2,%%mm3\n\t" - "movq %%mm2,%%mm1\n\t" - /*Expand these arrays.*/ - "punpckhbw %%mm0,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm3\n\t" - "punpcklbw %%mm0,%%mm2\n\t" - /*Preload...*/ - "movq %[OC_V3],%%mm0\n\t" - /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/ - "psubw %%mm5,%%mm3\n\t" - "psubw %%mm4,%%mm2\n\t" - /*Scale by 3.*/ - "pmullw %%mm0,%%mm3\n\t" - "pmullw %%mm0,%%mm2\n\t" - /*Preload...*/ - "movq %[OC_V4],%%mm0\n\t" - /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+ - 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/ - "paddw %%mm7,%%mm3\n\t" - "paddw %%mm6,%%mm2\n\t" - /*Add 4.*/ - "paddw %%mm0,%%mm3\n\t" - "paddw %%mm0,%%mm2\n\t" - /*"Divide" by 8.*/ - "psraw $3,%%mm3\n\t" - "psraw $3,%%mm2\n\t" - /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/ - /*Free up mm5.*/ - "packuswb %%mm5,%%mm4\n\t" - /*mm0=L L L L*/ - "movq (%[ll]),%%mm0\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm2,%%mm5\n\t" - "pxor %%mm6,%%mm6\n\t" - "movq %%mm0,%%mm7\n\t" - "psubw %%mm0,%%mm6\n\t" - "psllw $1,%%mm7\n\t" - "psllw $1,%%mm6\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm2,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-2L -2L -2L -2L*/ - /*mm7==2L 2L 2L 2L*/ - "pcmpgtw %%mm3,%%mm7\n\t" - "pcmpgtw %%mm6,%%mm5\n\t" - "pand %%mm7,%%mm3\n\t" - "movq %%mm0,%%mm7\n\t" - "pand %%mm5,%%mm3\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm6\n\t" - "movq %%mm2,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm2==R_3 R_2 R_1 R_0*/ - /*mm5==R_3 R_2 R_1 R_0*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm2,%%mm6\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm2\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - "psllw $1,%%mm7\n\t" - /*mm7=-L>R_i?2L:0*/ - "pand %%mm6,%%mm7\n\t" - "pxor %%mm6,%%mm6\n\t" - /*mm2=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm7,%%mm2\n\t" - "psubw %%mm0,%%mm6\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm2,%%mm5\n\t" - "movq %%mm0,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm2\n\t" - "psllw $1,%%mm7\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm2\n\t" - "movq %%mm3,%%mm5\n\t" - /*mm3==R_7 R_6 R_5 R_4*/ - /*mm5==R_7 R_6 R_5 R_4*/ - /*mm6==-L -L -L -L*/ - /*mm0==L L L L*/ - /*mm6=-L>R_i?FF:00*/ - "pcmpgtw %%mm3,%%mm6\n\t" - /*mm5=R_i>L?FF:00*/ - "pcmpgtw %%mm0,%%mm5\n\t" - /*mm7=R_i>L?2L:0*/ - "pand %%mm5,%%mm7\n\t" - /*mm2=R_i>L?R_i-2L:R_i*/ - "psubw %%mm7,%%mm3\n\t" - "psllw $1,%%mm0\n\t" - /*mm5=-L>R_i||R_i>L*/ - "por %%mm6,%%mm5\n\t" - /*mm0=-L>R_i?2L:0*/ - "pand %%mm6,%%mm0\n\t" - /*mm3=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm0,%%mm3\n\t" - /*mm5=-L>R_i||R_i>L?-R_i':0*/ - "pand %%mm3,%%mm5\n\t" - /*mm2=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm5,%%mm3\n\t" - /*Unfortunately, there's no unsigned byte+signed byte with unsigned - saturation op code, so we have to promote things back 16 bits.*/ - "pxor %%mm0,%%mm0\n\t" - "movq %%mm4,%%mm5\n\t" - "punpcklbw %%mm0,%%mm4\n\t" - "punpckhbw %%mm0,%%mm5\n\t" - "movq %%mm1,%%mm6\n\t" - "punpcklbw %%mm0,%%mm1\n\t" - "punpckhbw %%mm0,%%mm6\n\t" - /*_pix[0...8+_ystride]+=R_i*/ - "paddw %%mm2,%%mm4\n\t" - "paddw %%mm3,%%mm5\n\t" - /*_pix[0...8+_ystride*2]-=R_i*/ - "psubw %%mm2,%%mm1\n\t" - "psubw %%mm3,%%mm6\n\t" - "packuswb %%mm5,%%mm4\n\t" - "packuswb %%mm6,%%mm1\n\t" - /*Write it back out.*/ - "movq %%mm4,(%[pix],%[ystride])\n\t" - "movq %%mm1,(%[pix],%[ystride],2)\n\t" - :[s]"=&S"(esi) - :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll), - [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4) - :"memory" - ); -} - -/*This code implements the bulk of loop_filter_h(). - Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all - four p0's to one register we must transpose the values in four mmx regs. - When half is done we repeat this for the rest.*/ -static void loop_filter_h4(unsigned char *_pix,long _ystride, - const ogg_int16_t *_ll){ - long esi; - long edi; - __asm__ __volatile__( - /*x x x x 3 2 1 0*/ - "movd (%[pix]),%%mm0\n\t" - /*esi=_ystride*3*/ - "lea (%[ystride],%[ystride],2),%[s]\n\t" - /*x x x x 7 6 5 4*/ - "movd (%[pix],%[ystride]),%%mm1\n\t" - /*x x x x B A 9 8*/ - "movd (%[pix],%[ystride],2),%%mm2\n\t" - /*x x x x F E D C*/ - "movd (%[pix],%[s]),%%mm3\n\t" - /*mm0=7 3 6 2 5 1 4 0*/ - "punpcklbw %%mm1,%%mm0\n\t" - /*mm2=F B E A D 9 C 8*/ - "punpcklbw %%mm3,%%mm2\n\t" - /*mm1=7 3 6 2 5 1 4 0*/ - "movq %%mm0,%%mm1\n\t" - /*mm0=F B 7 3 E A 6 2*/ - "punpckhwd %%mm2,%%mm0\n\t" - /*mm1=D 9 5 1 C 8 4 0*/ - "punpcklwd %%mm2,%%mm1\n\t" - "pxor %%mm7,%%mm7\n\t" - /*mm5=D 9 5 1 C 8 4 0*/ - "movq %%mm1,%%mm5\n\t" - /*mm1=x C x 8 x 4 x 0==pix[0]*/ - "punpcklbw %%mm7,%%mm1\n\t" - /*mm5=x D x 9 x 5 x 1==pix[1]*/ - "punpckhbw %%mm7,%%mm5\n\t" - /*mm3=F B 7 3 E A 6 2*/ - "movq %%mm0,%%mm3\n\t" - /*mm0=x E x A x 6 x 2==pix[2]*/ - "punpcklbw %%mm7,%%mm0\n\t" - /*mm3=x F x B x 7 x 3==pix[3]*/ - "punpckhbw %%mm7,%%mm3\n\t" - /*mm1=mm1-mm3==pix[0]-pix[3]*/ - "psubw %%mm3,%%mm1\n\t" - /*Save a copy of pix[2] for later.*/ - "movq %%mm0,%%mm4\n\t" - /*mm0=mm0-mm5==pix[2]-pix[1]*/ - "psubw %%mm5,%%mm0\n\t" - /*Scale by 3.*/ - "pmullw %[OC_V3],%%mm0\n\t" - /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/ - "paddw %%mm1,%%mm0\n\t" - /*Add 4.*/ - "paddw %[OC_V4],%%mm0\n\t" - /*"Divide" by 8, producing the residuals R_i.*/ - "psraw $3,%%mm0\n\t" - /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/ - /*mm6=L L L L*/ - "movq (%[ll]),%%mm6\n\t" - /*if(R_i<-2L||R_i>2L)R_i=0:*/ - "movq %%mm0,%%mm1\n\t" - "pxor %%mm2,%%mm2\n\t" - "movq %%mm6,%%mm3\n\t" - "psubw %%mm6,%%mm2\n\t" - "psllw $1,%%mm3\n\t" - "psllw $1,%%mm2\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-2L -2L -2L -2L*/ - /*mm3==2L 2L 2L 2L*/ - "pcmpgtw %%mm0,%%mm3\n\t" - "pcmpgtw %%mm2,%%mm1\n\t" - "pand %%mm3,%%mm0\n\t" - "pand %%mm1,%%mm0\n\t" - /*if(R_i<-L)R_i'=R_i+2L; - if(R_i>L)R_i'=R_i-2L; - if(R_i<-L||R_i>L)R_i=-R_i':*/ - "psraw $1,%%mm2\n\t" - "movq %%mm0,%%mm1\n\t" - "movq %%mm6,%%mm3\n\t" - /*mm0==R_3 R_2 R_1 R_0*/ - /*mm1==R_3 R_2 R_1 R_0*/ - /*mm2==-L -L -L -L*/ - /*mm6==L L L L*/ - /*mm2=-L>R_i?FF:00*/ - "pcmpgtw %%mm0,%%mm2\n\t" - /*mm1=R_i>L?FF:00*/ - "pcmpgtw %%mm6,%%mm1\n\t" - /*mm3=2L 2L 2L 2L*/ - "psllw $1,%%mm3\n\t" - /*mm6=2L 2L 2L 2L*/ - "psllw $1,%%mm6\n\t" - /*mm3=R_i>L?2L:0*/ - "pand %%mm1,%%mm3\n\t" - /*mm6=-L>R_i?2L:0*/ - "pand %%mm2,%%mm6\n\t" - /*mm0=R_i>L?R_i-2L:R_i*/ - "psubw %%mm3,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L*/ - "por %%mm2,%%mm1\n\t" - /*mm0=-L>R_i?R_i+2L:R_i*/ - "paddw %%mm6,%%mm0\n\t" - /*mm1=-L>R_i||R_i>L?R_i':0*/ - "pand %%mm0,%%mm1\n\t" - /*mm0=-L>R_i||R_i>L?0:R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*mm0=-L>R_i||R_i>L?-R_i':R_i*/ - "psubw %%mm1,%%mm0\n\t" - /*_pix[1]+=R_i;*/ - "paddw %%mm0,%%mm5\n\t" - /*_pix[2]-=R_i;*/ - "psubw %%mm0,%%mm4\n\t" - /*mm5=x x x x D 9 5 1*/ - "packuswb %%mm7,%%mm5\n\t" - /*mm4=x x x x E A 6 2*/ - "packuswb %%mm7,%%mm4\n\t" - /*mm5=E D A 9 6 5 2 1*/ - "punpcklbw %%mm4,%%mm5\n\t" - /*edi=6 5 2 1*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix])\n\t" - /*Why is there such a big stall here?*/ - "psrlq $32,%%mm5\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride])\n\t" - /*edi=E D A 9*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride],2)\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[s])\n\t" - :[s]"=&S"(esi),[d]"=&D"(edi), - [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll) - :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4) - :"memory" - ); -} - -static void loop_filter_h(unsigned char *_pix,int _ystride, - const ogg_int16_t *_ll){ - _pix-=2; - loop_filter_h4(_pix,_ystride,_ll); - loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); -} - -static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){ - int j; - ogg_int16_t __attribute__((aligned(8))) ll[4]; - unsigned char *cp = pbi->display_fragments; - ogg_uint32_t *bp = pbi->recon_pixel_index_table; - - if ( FLimit == 0 ) return; - ll[0]=ll[1]=ll[2]=ll[3]=FLimit; - - for ( j = 0; j < 3 ; j++){ - ogg_uint32_t *bp_begin = bp; - ogg_uint32_t *bp_end; - int stride; - int h; - - switch(j) { - case 0: /* y */ - bp_end = bp + pbi->YPlaneFragments; - h = pbi->HFragments; - stride = pbi->YStride; - break; - default: /* u,v, 4:20 specific */ - bp_end = bp + pbi->UVPlaneFragments; - h = pbi->HFragments >> 1; - stride = pbi->UVStride; - break; - } - - while(bpbp_left) - loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); - if(bp_left>bp_begin) - loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); - if(bp+1LastFrameRecon[bp[0]]+8,stride,ll); - if(bp+hLastFrameRecon[bp[h]],stride,ll); - } - bp++; - cp++; - } - } - } - - __asm__ __volatile__("emms\n\t"); -} - -/* install our implementation in the function table */ -void dsp_mmx_dct_decode_init(DspFunctions *funcs) -{ - funcs->LoopFilter = loop_filter_mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c deleted file mode 100644 index 6c2689e63..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c +++ /dev/null @@ -1,303 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -typedef unsigned long long ogg_uint64_t; - -static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL; - -#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2) -#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b))) -#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b)))) - -static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr, - ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - - ".rept 8 \n\t" - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ - " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ - " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ - " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ - " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */ - /* start calculation */ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */ - " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */ - " movq %%mm0, (%2) \n\t" /* write answer out */ - " movq %%mm2, 8(%2) \n\t" /* write answer out */ - /* Increment pointers */ - " add $16, %2 \n\t" - " add %3, %0 \n\t" - " add %4, %1 \n\t" - ".endr \n\t" - - : "+r" (FiltPtr), - "+r" (ReconPtr), - "+r" (DctInputPtr) - : "r" ((ogg_uint64_t)PixelsPerLine), - "r" ((ogg_uint64_t)ReconPixelsPerLine) - : "memory" - ); -} - -static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine) -{ - ogg_uint64_t ppl = PixelsPerLine; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - " movq %[V128], %%mm1 \n\t" - - ".rept 8 \n\t" - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ - /* start calculation */ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */ - " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */ - " movq %%mm0, (%1) \n\t" /* write answer out */ - " movq %%mm2, 8(%1) \n\t" /* write answer out */ - /* Increment pointers */ - " add $16, %1 \n\t" - " add %2, %0 \n\t" - ".endr \n\t" - - : "+r" (FiltPtr), - "+r" (DctInputPtr) - : "r" (ppl), /* gcc bug? a cast won't work here, e.g. (ogg_uint64_t)PixelsPerLine */ - [V128] "m" (V128) - : "memory" - ); -} - -static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1, - unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, - ogg_uint32_t PixelsPerLine, - ogg_uint32_t ReconPixelsPerLine) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm7, %%mm7 \n\t" - - ".rept 8 \n\t" - " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ - " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */ - " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */ - " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ - " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ - " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */ - /* convert from UINT8 to INT16 */ - " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ - " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */ - " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */ - " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ - " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */ - " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */ - /* average ReconPtr1 and ReconPtr2 */ - " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ - " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ - " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ - " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ - " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ - " movq %%mm0, (%3) \n\t" /* write answer out */ - " movq %%mm2, 8(%3) \n\t" /* write answer out */ - /* Increment pointers */ - " add $16, %3 \n\t" - " add %4, %0 \n\t" - " add %5, %1 \n\t" - " add %5, %2 \n\t" - ".endr \n\t" - - : "+r" (FiltPtr), - "+r" (ReconPtr1), - "+r" (ReconPtr2), - "+r" (DctInputPtr) - : "r" ((ogg_uint64_t)PixelsPerLine), - "r" ((ogg_uint64_t)ReconPixelsPerLine) - : "memory" - ); -} - -static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride) -{ - ogg_uint64_t XSum; - ogg_uint64_t XXSum; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%rdi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - " movq %%mm0, %%mm2 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %3, %2 \n\t" /* Inc pointer into src data */ - - " dec %%rdi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%rdi \n\t" - " movsx %%di, %%rdi \n\t" - " mov %%rdi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=r" (XSum), - "=r" (XXSum), - "+r" (DataPtr) - : "r" ((ogg_uint64_t)Stride) - : "rdi", "memory" - ); - - /* Compute population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum ) ); -} - -static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr, ogg_uint32_t RefStride) -{ - ogg_uint64_t XSum; - ogg_uint64_t XXSum; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%rdi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - " movq (%3), %%mm1 \n\t" - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpcklbw %%mm6, %%mm1 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - " punpckhbw %%mm6, %%mm3 \n\t" - - " psubsw %%mm1, %%mm0 \n\t" - " psubsw %%mm3, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %4, %2 \n\t" /* Inc pointer into src data */ - " add %5, %3 \n\t" /* Inc pointer into ref data */ - - " dec %%rdi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%rdi \n\t" - " movsx %%di, %%rdi \n\t" - " mov %%rdi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=m" (XSum), - "=m" (XXSum), - "+r" (SrcData), - "+r" (RefDataPtr) - : "r" ((ogg_uint64_t)SrcStride), - "r" ((ogg_uint64_t)RefStride) - : "rdi", "memory" - ); - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -static void restore_fpu (void) -{ - __asm__ __volatile__ ( - " emms \n\t" - ); -} - -void dsp_mmx_init(DspFunctions *funcs) -{ - funcs->restore_fpu = restore_fpu; - funcs->sub8x8 = sub8x8__mmx; - funcs->sub8x8_128 = sub8x8_128__mmx; - funcs->sub8x8avg2 = sub8x8avg2__mmx; - funcs->intra8x8_err = intra8x8_err__mmx; - funcs->inter8x8_err = inter8x8_err__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c b/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c deleted file mode 100644 index f0aeed96e..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c +++ /dev/null @@ -1,323 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmxext.c 15397 2008-10-14 02:06:24Z tterribe $ - - ********************************************************************/ - -#include - -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -typedef unsigned long long ogg_uint64_t; - -static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - - ".rept 7 \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - " psadbw %%mm1, %%mm0 \n\t" - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " add %4, %2 \n\t" /* Inc pointer into ref data */ - ".endr \n\t" - - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - " psadbw %%mm1, %%mm0 \n\t" - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " movd %%mm7, %0 \n\t" - - : "=r" (DiffVal), - "+r" (ptr1), - "+r" (ptr2) - : "r" ((ogg_uint64_t)stride1), - "r" ((ogg_uint64_t)stride2) - : "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1, - unsigned char *ptr2, ogg_uint32_t stride2, - ogg_uint32_t thres) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - - ".rept 8 \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - " psadbw %%mm1, %%mm0 \n\t" - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " add %4, %2 \n\t" /* Inc pointer into ref data */ - ".endr \n\t" - - " movd %%mm7, %0 \n\t" - - : "=r" (DiffVal), - "+r" (ptr1), - "+r" (ptr2) - : "r" ((ogg_uint64_t)stride1), - "r" ((ogg_uint64_t)stride2) - : "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride, - ogg_uint32_t thres) -{ - ogg_uint32_t DiffVal; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ - ".rept 8 \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" - " movq (%3), %%mm2 \n\t" - " pavgb %%mm2, %%mm1 \n\t" - " psadbw %%mm1, %%mm0 \n\t" - - " add %4, %1 \n\t" /* Inc pointer into the new data */ - " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ - " add %5, %2 \n\t" /* Inc pointer into ref data */ - " add %5, %3 \n\t" /* Inc pointer into ref data */ - ".endr \n\t" - - " movd %%mm7, %0 \n\t" - : "=m" (DiffVal), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "r" ((ogg_uint64_t)SrcStride), - "r" ((ogg_uint64_t)RefStride) - : "memory" - ); - - return DiffVal; -} - -static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " movd (%1), %%mm0 \n\t" - " movd (%2), %%mm1 \n\t" - " psadbw %%mm0, %%mm1 \n\t" - " movd 4(%1), %%mm2 \n\t" - " movd 4(%2), %%mm3 \n\t" - " psadbw %%mm2, %%mm3 \n\t" - - " pmaxsw %%mm1, %%mm3 \n\t" - " movd %%mm3, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=m" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : - : "memory" - ); - - return MaxSad; -} - -static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2, - ogg_uint32_t stride) -{ - ogg_uint32_t MaxSad; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm3, %%mm3 \n\t" /* zero out mm3 for unpack */ - " pxor %%mm4, %%mm4 \n\t" /* mm4 low sum */ - " pxor %%mm5, %%mm5 \n\t" /* mm5 high sum */ - " pxor %%mm6, %%mm6 \n\t" /* mm6 low sum */ - " pxor %%mm7, %%mm7 \n\t" /* mm7 high sum */ - " mov $4, %%rdi \n\t" /* 4 rows */ - "1: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm4 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm5 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%rdi \n\t" - " jnz 1b \n\t" - - " mov $4, %%rdi \n\t" /* 4 rows */ - "2: \n\t" - " movq (%1), %%mm0 \n\t" /* take 8 bytes */ - " movq (%2), %%mm1 \n\t" /* take 8 bytes */ - - " movq %%mm0, %%mm2 \n\t" - " psubusb %%mm1, %%mm0 \n\t" /* A - B */ - " psubusb %%mm2, %%mm1 \n\t" /* B - A */ - " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ - " movq %%mm0, %%mm1 \n\t" - - " punpcklbw %%mm3, %%mm0 \n\t" /* unpack to higher precision for accumulation */ - " paddw %%mm0, %%mm6 \n\t" /* accumulate difference... */ - " punpckhbw %%mm3, %%mm1 \n\t" /* unpack high four bytes to higher precision */ - " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ - " add %3, %1 \n\t" /* Inc pointer into the new data */ - " add %3, %2 \n\t" /* Inc pointer into the new data */ - - " dec %%rdi \n\t" - " jnz 2b \n\t" - - " pmaxsw %%mm6, %%mm7 \n\t" - " pmaxsw %%mm4, %%mm5 \n\t" - " pmaxsw %%mm5, %%mm7 \n\t" - " movq %%mm7, %%mm6 \n\t" - " psrlq $32, %%mm6 \n\t" - " pmaxsw %%mm6, %%mm7 \n\t" - " movq %%mm7, %%mm6 \n\t" - " psrlq $16, %%mm6 \n\t" - " pmaxsw %%mm6, %%mm7 \n\t" - " movd %%mm7, %0 \n\t" - " andl $0xffff, %0 \n\t" - - : "=r" (MaxSad), - "+r" (Src1), - "+r" (Src2) - : "r" ((ogg_uint64_t)stride) - : "memory", "rdi" - ); - - return MaxSad; -} - -static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride, - unsigned char *RefDataPtr1, - unsigned char *RefDataPtr2, ogg_uint32_t RefStride) -{ - ogg_uint64_t XSum; - ogg_uint64_t XXSum; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm4, %%mm4 \n\t" - " pxor %%mm5, %%mm5 \n\t" - " pxor %%mm6, %%mm6 \n\t" - " pxor %%mm7, %%mm7 \n\t" - " mov $8, %%rdi \n\t" - "1: \n\t" - " movq (%2), %%mm0 \n\t" /* take 8 bytes */ - - " movq (%3), %%mm2 \n\t" - " movq (%4), %%mm1 \n\t" /* take average of mm2 and mm1 */ - " pavgb %%mm2, %%mm1 \n\t" - - " movq %%mm0, %%mm2 \n\t" - " movq %%mm1, %%mm3 \n\t" - - " punpcklbw %%mm6, %%mm0 \n\t" - " punpcklbw %%mm4, %%mm1 \n\t" - " punpckhbw %%mm6, %%mm2 \n\t" - " punpckhbw %%mm4, %%mm3 \n\t" - - " psubsw %%mm1, %%mm0 \n\t" - " psubsw %%mm3, %%mm2 \n\t" - - " paddw %%mm0, %%mm5 \n\t" - " paddw %%mm2, %%mm5 \n\t" - - " pmaddwd %%mm0, %%mm0 \n\t" - " pmaddwd %%mm2, %%mm2 \n\t" - - " paddd %%mm0, %%mm7 \n\t" - " paddd %%mm2, %%mm7 \n\t" - - " add %5, %2 \n\t" /* Inc pointer into src data */ - " add %6, %3 \n\t" /* Inc pointer into ref data */ - " add %6, %4 \n\t" /* Inc pointer into ref data */ - - " dec %%rdi \n\t" - " jnz 1b \n\t" - - " movq %%mm5, %%mm0 \n\t" - " psrlq $32, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movq %%mm5, %%mm0 \n\t" - " psrlq $16, %%mm5 \n\t" - " paddw %%mm0, %%mm5 \n\t" - " movd %%mm5, %%edi \n\t" - " movsx %%di, %%edi \n\t" - " movl %%edi, %0 \n\t" - - " movq %%mm7, %%mm0 \n\t" - " psrlq $32, %%mm7 \n\t" - " paddd %%mm0, %%mm7 \n\t" - " movd %%mm7, %1 \n\t" - - : "=m" (XSum), - "=m" (XXSum), - "+r" (SrcData), - "+r" (RefDataPtr1), - "+r" (RefDataPtr2) - : "r" ((ogg_uint64_t)SrcStride), - "r" ((ogg_uint64_t)RefStride) - : "rdi", "memory" - ); - - /* Compute and return population variance as mis-match metric. */ - return (( (XXSum<<6) - XSum*XSum )); -} - -void dsp_mmxext_init(DspFunctions *funcs) -{ - funcs->row_sad8 = row_sad8__mmxext; - funcs->col_sad8x8 = col_sad8x8__mmxext; - funcs->sad8x8 = sad8x8__mmxext; - funcs->sad8x8_thres = sad8x8_thres__mmxext; - funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext; - funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c deleted file mode 100644 index 3765561cf..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c +++ /dev/null @@ -1,342 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ********************************************************************/ - -/* mmx fdct implementation for x86_64 */ -/* $Id: fdct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ */ - -#include "theora/theora.h" -#include "../codec_internal.h" -#include "../dsp.h" - -#if defined(USE_ASM) - -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL; -static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL; - -#if defined(__MINGW32__) || defined(__CYGWIN__) || \ - defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__)) -# define M(a) "_" #a -#else -# define M(a) #a -#endif - -/* execute stage 1 of forward DCT */ -#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp) \ - " movq " #ip0 ", %%mm0 \n\t" \ - " movq " #ip1 ", %%mm1 \n\t" \ - " movq " #ip3 ", %%mm2 \n\t" \ - " movq " #ip5 ", %%mm3 \n\t" \ - " movq %%mm0, %%mm4 \n\t" \ - " movq %%mm1, %%mm5 \n\t" \ - " movq %%mm2, %%mm6 \n\t" \ - " movq %%mm3, %%mm7 \n\t" \ - \ - " paddsw " #ip7 ", %%mm0 \n\t" /* mm0 = ip0 + ip7 = is07 */ \ - " paddsw " #ip2 ", %%mm1 \n\t" /* mm1 = ip1 + ip2 = is12 */ \ - " paddsw " #ip4 ", %%mm2 \n\t" /* mm2 = ip3 + ip4 = is34 */ \ - " paddsw " #ip6 ", %%mm3 \n\t" /* mm3 = ip5 + ip6 = is56 */ \ - " psubsw " #ip7 ", %%mm4 \n\t" /* mm4 = ip0 - ip7 = id07 */ \ - " psubsw " #ip2 ", %%mm5 \n\t" /* mm5 = ip1 - ip2 = id12 */ \ - \ - " psubsw %%mm2, %%mm0 \n\t" /* mm0 = is07 - is34 */ \ - \ - " paddsw %%mm2, %%mm2 \n\t" \ - \ - " psubsw " #ip4 ", %%mm6 \n\t" /* mm6 = ip3 - ip4 = id34 */ \ - \ - " paddsw %%mm0, %%mm2 \n\t" /* mm2 = is07 + is34 = is0734 */ \ - " psubsw %%mm3, %%mm1 \n\t" /* mm1 = is12 - is56 */ \ - " movq %%mm0," #temp " \n\t" /* Save is07 - is34 to free mm0; */ \ - " paddsw %%mm3, %%mm3 \n\t" \ - " paddsw %%mm1, %%mm3 \n\t" /* mm3 = is12 + 1s56 = is1256 */ \ - \ - " psubsw " #ip6 ", %%mm7 \n\t" /* mm7 = ip5 - ip6 = id56 */ \ - /* ------------------------------------------------------------------- */ \ - " psubsw %%mm7, %%mm5 \n\t" /* mm5 = id12 - id56 */ \ - " paddsw %%mm7, %%mm7 \n\t" \ - " paddsw %%mm5, %%mm7 \n\t" /* mm7 = id12 + id56 */ \ - /* ------------------------------------------------------------------- */ \ - " psubsw %%mm3, %%mm2 \n\t" /* mm2 = is0734 - is1256 */ \ - " paddsw %%mm3, %%mm3 \n\t" \ - \ - " movq %%mm2, %%mm0 \n\t" /* make a copy */ \ - " paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \ - \ - " pmulhw %[xC4S4], %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \ - " paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \ - \ - " movq %%mm3, %%mm2 \n\t" \ - " movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \ - \ - " movq %%mm3, %%mm0 \n\t" \ - " pmulhw %[xC4S4], %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \ - " paddw %%mm2, %%mm3 \n\t" /* Truncate mm3, now it is op[0] */ \ - \ - " movq %%mm3," #ip0 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \ - " pmulhw %[xC2S6], %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \ - \ - " movq " #temp ", %%mm2 \n\t" \ - " movq %%mm2, %%mm0 \n\t" \ - \ - " psrlw $15, %%mm2 \n\t" /* mm3 = xC2S6 * irot_input_y */ \ - " paddw %%mm0, %%mm3 \n\t" \ - \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - " movq %%mm5, %%mm0 \n\t" \ - \ - " movq %%mm5, %%mm2 \n\t" \ - " pmulhw %[xC6S2], %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \ - \ - " paddsw %%mm0, %%mm3 \n\t" /* ip[2] */ \ - " movq %%mm3," #ip2 " \n\t" /* Save ip2 */ \ - \ - " movq %%mm5, %%mm0 \n\t" \ - " movq %%mm5, %%mm2 \n\t" \ - \ - " pmulhw %[xC2S6], %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " movq " #temp ", %%mm3 \n\t" \ - " paddw %%mm0, %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x */ \ - \ - " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \ - " movq %%mm3, %%mm2 \n\t" \ - \ - " pmulhw %[xC6S2], %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - " psubsw %%mm5, %%mm3 \n\t" \ - \ - " movq %%mm3," #ip6 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC4S4], %%mm0 \n\t" \ - " movq %%mm1, %%mm2 \n\t" \ - " movq %%mm1, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm1 \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \ - " paddw %%mm2, %%mm1 \n\t" /* Truncate mm1, now it is icommon_product1 */ \ - \ - " movq %%mm7, %%mm2 \n\t" \ - " movq %%mm7, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm7 \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \ - " paddw %%mm2, %%mm7 \n\t" /* Truncate mm7, now it is icommon_product2 */ \ - /* ------------------------------------------------------------------- */ \ - " pxor %%mm0, %%mm0 \n\t" /* Clear mm0 */ \ - " psubsw %%mm6, %%mm0 \n\t" /* mm0 = - id34 */ \ - \ - " psubsw %%mm7, %%mm0 \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \ - " paddsw %%mm6, %%mm6 \n\t" \ - " paddsw %%mm0, %%mm6 \n\t" /* mm6 = id34 - icommon_product2 */ \ - \ - " psubsw %%mm1, %%mm4 \n\t" /* mm4 = id07 - icommon_product1 */ \ - " paddsw %%mm1, %%mm1 \n\t" \ - " paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC1S7], %%mm7 \n\t" \ - " movq %%mm1, %%mm2 \n\t" \ - \ - " movq %%mm1, %%mm3 \n\t" \ - " pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \ - \ - " movq %[xC7S1], %%mm7 \n\t" \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \ - " paddw %%mm2, %%mm1 \n\t" /* Trucated */ \ - \ - " pmulhw %%mm7, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x */ \ - " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ - \ - " movq %%mm0, %%mm5 \n\t" \ - " movq %%mm0, %%mm2 \n\t" \ - \ - " movq %[xC1S7], %%mm7 \n\t" \ - " pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \ - \ - " movq %[xC7S1], %%mm7 \n\t" \ - " psrlw $15, %%mm2 \n\t" \ - \ - " paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \ - " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \ - \ - " pmulhw %%mm7, %%mm5 \n\t" /* mm5 = xC7S1 * irot_input_y */ \ - " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \ - \ - " psubsw %%mm5, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \ - " paddsw %%mm0, %%mm3 \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \ - \ - " movq %%mm1," #ip1 " \n\t" \ - " movq %%mm3," #ip7 " \n\t" \ - /* ------------------------------------------------------------------- */ \ - " movq %[xC3S5], %%mm0 \n\t" \ - " movq %[xC5S3], %%mm1 \n\t" \ - \ - " movq %%mm6, %%mm5 \n\t" \ - " movq %%mm6, %%mm7 \n\t" \ - \ - " movq %%mm4, %%mm2 \n\t" \ - " movq %%mm4, %%mm3 \n\t" \ - \ - " pmulhw %%mm0, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \ - " pmulhw %%mm1, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \ - \ - " psrlw $15, %%mm2 \n\t" \ - " psrlw $15, %%mm5 \n\t" \ - \ - " paddw %%mm3, %%mm4 \n\t" /* mm4 = xC3S5 * irot_input_x */ \ - " paddw %%mm7, %%mm6 \n\t" /* mm6 = xC5S3 * irot_input_y */ \ - \ - " paddw %%mm2, %%mm4 \n\t" /* Truncated */ \ - " paddw %%mm5, %%mm6 \n\t" /* Truncated */ \ - \ - " psubsw %%mm6, %%mm4 \n\t" /* ip3 */ \ - " movq %%mm4," #ip3 " \n\t" \ - \ - " movq %%mm3, %%mm4 \n\t" \ - " movq %%mm7, %%mm6 \n\t" \ - \ - " pmulhw %%mm1, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \ - " pmulhw %%mm0, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \ - \ - " paddw %%mm2, %%mm4 \n\t" \ - " paddw %%mm5, %%mm6 \n\t" \ - \ - " paddw %%mm4, %%mm3 \n\t" /* mm3 = xC5S3 * irot_input_x */ \ - " paddw %%mm6, %%mm7 \n\t" /* mm7 = xC3S5 * irot_input_y */ \ - \ - " paddw %%mm7, %%mm3 \n\t" /* ip5 */ \ - " movq %%mm3," #ip5 " \n\t" - -#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7, \ - op0,op1,op2,op3,op4,op5,op6,op7) \ - " movq " #ip0 ", %%mm0 \n\t" /* mm0 = a0 a1 a2 a3 */ \ - " movq " #ip4 ", %%mm4 \n\t" /* mm4 = e4 e5 e6 e7 */ \ - " movq " #ip1 ", %%mm1 \n\t" /* mm1 = b0 b1 b2 b3 */ \ - " movq " #ip5 ", %%mm5 \n\t" /* mm5 = f4 f5 f6 f7 */ \ - " movq " #ip2 ", %%mm2 \n\t" /* mm2 = c0 c1 c2 c3 */ \ - " movq " #ip6 ", %%mm6 \n\t" /* mm6 = g4 g5 g6 g7 */ \ - " movq " #ip3 ", %%mm3 \n\t" /* mm3 = d0 d1 d2 d3 */ \ - " movq %%mm1," #op1 " \n\t" /* save b0 b1 b2 b3 */ \ - " movq " #ip7 ", %%mm7 \n\t" /* mm7 = h0 h1 h2 h3 */ \ - /* Transpose 2x8 block */ \ - " movq %%mm4, %%mm1 \n\t" /* mm1 = e3 e2 e1 e0 */ \ - " punpcklwd %%mm5, %%mm4 \n\t" /* mm4 = f1 e1 f0 e0 */ \ - " movq %%mm0," #op0 " \n\t" /* save a3 a2 a1 a0 */ \ - " punpckhwd %%mm5, %%mm1 \n\t" /* mm1 = f3 e3 f2 e2 */ \ - " movq %%mm6, %%mm0 \n\t" /* mm0 = g3 g2 g1 g0 */ \ - " punpcklwd %%mm7, %%mm6 \n\t" /* mm6 = h1 g1 h0 g0 */ \ - " movq %%mm4, %%mm5 \n\t" /* mm5 = f1 e1 f0 e0 */ \ - " punpckldq %%mm6, %%mm4 \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \ - " punpckhdq %%mm6, %%mm5 \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \ - " movq %%mm1, %%mm6 \n\t" /* mm6 = f3 e3 f2 e2 */ \ - " movq %%mm4," #op4 " \n\t" \ - " punpckhwd %%mm7, %%mm0 \n\t" /* mm0 = h3 g3 h2 g2 */ \ - " movq %%mm5," #op5 " \n\t" \ - " punpckhdq %%mm0, %%mm6 \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \ - " movq " #op0 ", %%mm4 \n\t" /* mm4 = a3 a2 a1 a0 */ \ - " punpckldq %%mm0, %%mm1 \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \ - " movq " #op1 ", %%mm5 \n\t" /* mm5 = b3 b2 b1 b0 */ \ - " movq %%mm4, %%mm0 \n\t" /* mm0 = a3 a2 a1 a0 */ \ - " movq %%mm6," #op7 " \n\t" \ - " punpcklwd %%mm5, %%mm0 \n\t" /* mm0 = b1 a1 b0 a0 */ \ - " movq %%mm1," #op6 " \n\t" \ - " punpckhwd %%mm5, %%mm4 \n\t" /* mm4 = b3 a3 b2 a2 */ \ - " movq %%mm2, %%mm5 \n\t" /* mm5 = c3 c2 c1 c0 */ \ - " punpcklwd %%mm3, %%mm2 \n\t" /* mm2 = d1 c1 d0 c0 */ \ - " movq %%mm0, %%mm1 \n\t" /* mm1 = b1 a1 b0 a0 */ \ - " punpckldq %%mm2, %%mm0 \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \ - " punpckhdq %%mm2, %%mm1 \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \ - " movq %%mm4, %%mm2 \n\t" /* mm2 = b3 a3 b2 a2 */ \ - " movq %%mm0," #op0 " \n\t" \ - " punpckhwd %%mm3, %%mm5 \n\t" /* mm5 = d3 c3 d2 c2 */ \ - " movq %%mm1," #op1 " \n\t" \ - " punpckhdq %%mm5, %%mm4 \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \ - " punpckldq %%mm5, %%mm2 \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \ - " movq %%mm4," #op3 " \n\t" \ - " movq %%mm2," #op2 " \n\t" - - -/* This performs a 2D Forward DCT on an 8x8 block with short - coefficients. We try to do the truncation to match the C - version. */ -static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData) -{ - ogg_int16_t __attribute__((aligned(8))) temp[8*8]; - - __asm__ __volatile__ ( - " .balign 16 \n\t" - /* - * Input data is an 8x8 block. To make processing of the data more efficent - * we will transpose the block of data to two 4x8 blocks??? - */ - Transpose_mmx ( (%0), 16(%0), 32(%0), 48(%0), 8(%0), 24(%0), 40(%0), 56(%0), - (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1)) - Fdct_mmx ( (%1), 16(%1), 32(%1), 48(%1), 8(%1), 24(%1), 40(%1), 56(%1), (%2)) - - Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0), - 64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1)) - Fdct_mmx (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2)) - - Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), - 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1)) - Fdct_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2)) - - Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), - 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1)) - Fdct_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2)) - - " emms \n\t" - - : "+r" (InputData), - "+r" (OutputData) - : "r" (temp), - [xC1S7] "m" (xC1S7), /* gcc 3.1+ allows named asm parameters */ - [xC2S6] "m" (xC2S6), - [xC3S5] "m" (xC3S5), - [xC4S4] "m" (xC4S4), - [xC5S3] "m" (xC5S3), - [xC6S2] "m" (xC6S2), - [xC7S1] "m" (xC7S1) - : "memory" - ); -} - -/* install our implementation in the function table */ -void dsp_mmx_fdct_init(DspFunctions *funcs) -{ - funcs->fdct_short = fdct_short__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c deleted file mode 100644 index b87db6085..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: idct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ - - ********************************************************************/ - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -/* nothing implemented right now */ -void dsp_mmx_idct_init(DspFunctions *funcs) -{ -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c b/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c deleted file mode 100644 index b9b86e982..000000000 --- a/Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c +++ /dev/null @@ -1,184 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: recon_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ - - ********************************************************************/ - -#include "../codec_internal.h" - -#if defined(USE_ASM) - -typedef unsigned long long ogg_uint64_t; - -static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL; - -static void copy8x8__mmx (unsigned char *src, - unsigned char *dest, - ogg_uint32_t stride) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " lea (%2, %2, 2), %%rdi \n\t" - - " movq (%1), %%mm0 \n\t" - " movq (%1, %2), %%mm1 \n\t" - " movq (%1, %2, 2), %%mm2 \n\t" - " movq (%1, %%rdi), %%mm3 \n\t" - - " lea (%1, %2, 4), %1 \n\t" - - " movq %%mm0, (%0) \n\t" - " movq %%mm1, (%0, %2) \n\t" - " movq %%mm2, (%0, %2, 2) \n\t" - " movq %%mm3, (%0, %%rdi) \n\t" - - " lea (%0, %2, 4), %0 \n\t" - - " movq (%1), %%mm0 \n\t" - " movq (%1, %2), %%mm1 \n\t" - " movq (%1, %2, 2), %%mm2 \n\t" - " movq (%1, %%rdi), %%mm3 \n\t" - - " movq %%mm0, (%0) \n\t" - " movq %%mm1, (%0, %2) \n\t" - " movq %%mm2, (%0, %2, 2) \n\t" - " movq %%mm3, (%0, %%rdi) \n\t" - : "+a" (dest) - : "c" (src), - "d" ((ogg_uint64_t)stride) - : "memory", "rdi" - ); -} - -static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */ - - " lea 128(%1), %%rdi \n\t" /* Endpoint in input buffer */ - "1: \n\t" - " movq (%1), %%mm2 \n\t" /* First four input values */ - - " packsswb 8(%1), %%mm2 \n\t" /* pack with next(high) four values */ - " por %%mm0, %%mm0 \n\t" - " pxor %%mm0, %%mm2 \n\t" /* Convert result to unsigned (same as add 128) */ - " lea 16(%1), %1 \n\t" /* Step source buffer */ - " cmp %%rdi, %1 \n\t" /* are we done */ - - " movq %%mm2, (%0) \n\t" /* store results */ - - " lea (%0, %2), %0 \n\t" /* Step output buffer */ - " jc 1b \n\t" /* Loop back if we are not done */ - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" ((ogg_uint64_t)LineStep), - [V128] "m" (V128) - : "memory", "rdi" - ); -} - -static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr, - ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm0, %%mm0 \n\t" - " lea 128(%1), %%rdi \n\t" - - "1: \n\t" - " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */ - - " movq (%1), %%mm4 \n\t" /* first 4 changes */ - " movq %%mm2, %%mm3 \n\t" - " movq 8(%1), %%mm5 \n\t" /* last 4 changes */ - " punpcklbw %%mm0, %%mm2 \n\t" /* turn first 4 refs into positive 16-bit #s */ - " paddsw %%mm4, %%mm2 \n\t" /* add in first 4 changes */ - " punpckhbw %%mm0, %%mm3 \n\t" /* turn last 4 refs into positive 16-bit #s */ - " paddsw %%mm5, %%mm3 \n\t" /* add in last 4 changes */ - " add %3, %2 \n\t" /* next row of reference pixels */ - " packuswb %%mm3, %%mm2 \n\t" /* pack result to unsigned 8-bit values */ - " lea 16(%1), %1 \n\t" /* next row of changes */ - " cmp %%rdi, %1 \n\t" /* are we done? */ - - " movq %%mm2, (%0) \n\t" /* store result */ - - " lea (%0, %3), %0 \n\t" /* next row of output */ - " jc 1b \n\t" - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" (RefPtr), - "r" ((ogg_uint64_t)LineStep) - : "memory", "rdi" - ); -} - -static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1, - unsigned char *RefPtr2, ogg_int16_t *ChangePtr, - ogg_uint32_t LineStep) -{ - __asm__ __volatile__ ( - " .balign 16 \n\t" - - " pxor %%mm0, %%mm0 \n\t" - " lea 128(%1), %%rdi \n\t" - - "1: \n\t" - " movq (%2), %%mm2 \n\t" /* (+3 misaligned) 8 reference pixels */ - " movq (%3), %%mm4 \n\t" /* (+3 misaligned) 8 reference pixels */ - - " movq %%mm2, %%mm3 \n\t" - " punpcklbw %%mm0, %%mm2 \n\t" /* mm2 = start ref1 as positive 16-bit #s */ - " movq %%mm4, %%mm5 \n\t" - " movq (%1), %%mm6 \n\t" /* first 4 changes */ - " punpckhbw %%mm0, %%mm3 \n\t" /* mm3 = end ref1 as positive 16-bit #s */ - " movq 8(%1), %%mm7 \n\t" /* last 4 changes */ - " punpcklbw %%mm0, %%mm4 \n\t" /* mm4 = start ref2 as positive 16-bit #s */ - " punpckhbw %%mm0, %%mm5 \n\t" /* mm5 = end ref2 as positive 16-bit #s */ - " paddw %%mm4, %%mm2 \n\t" /* mm2 = start (ref1 + ref2) */ - " paddw %%mm5, %%mm3 \n\t" /* mm3 = end (ref1 + ref2) */ - " psrlw $1, %%mm2 \n\t" /* mm2 = start (ref1 + ref2)/2 */ - " psrlw $1, %%mm3 \n\t" /* mm3 = end (ref1 + ref2)/2 */ - " paddw %%mm6, %%mm2 \n\t" /* add changes to start */ - " paddw %%mm7, %%mm3 \n\t" /* add changes to end */ - " lea 16(%1), %1 \n\t" /* next row of changes */ - " packuswb %%mm3, %%mm2 \n\t" /* pack start|end to unsigned 8-bit */ - " add %4, %2 \n\t" /* next row of reference pixels */ - " add %4, %3 \n\t" /* next row of reference pixels */ - " movq %%mm2, (%0) \n\t" /* store result */ - " add %4, %0 \n\t" /* next row of output */ - " cmp %%rdi, %1 \n\t" /* are we done? */ - " jc 1b \n\t" - : "+r" (ReconPtr) - : "r" (ChangePtr), - "r" (RefPtr1), - "r" (RefPtr2), - "r" ((ogg_uint64_t)LineStep) - : "memory", "rdi" - ); -} - -void dsp_mmx_recon_init(DspFunctions *funcs) -{ - funcs->copy8x8 = copy8x8__mmx; - funcs->recon_intra8x8 = recon_intra8x8__mmx; - funcs->recon_inter8x8 = recon_inter8x8__mmx; - funcs->recon_inter8x8_half = recon_inter8x8_half__mmx; -} - -#endif /* USE_ASM */ diff --git a/Engine/lib/libtheora/lib/encapiwrapper.c b/Engine/lib/libtheora/lib/encapiwrapper.c new file mode 100644 index 000000000..874f12442 --- /dev/null +++ b/Engine/lib/libtheora/lib/encapiwrapper.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include "apiwrapper.h" +#include "encint.h" +#include "theora/theoraenc.h" + + + +static void th_enc_api_clear(th_api_wrapper *_api){ + if(_api->encode)th_encode_free(_api->encode); + memset(_api,0,sizeof(*_api)); +} + +static void theora_encode_clear(theora_state *_te){ + if(_te->i!=NULL)theora_info_clear(_te->i); + memset(_te,0,sizeof(*_te)); +} + +static int theora_encode_control(theora_state *_te,int _req, + void *_buf,size_t _buf_sz){ + return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode, + _req,_buf,_buf_sz); +} + +static ogg_int64_t theora_encode_granule_frame(theora_state *_te, + ogg_int64_t _gp){ + return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); +} + +static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){ + return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); +} + +static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={ + (oc_state_clear_func)theora_encode_clear, + (oc_state_control_func)theora_encode_control, + (oc_state_granule_frame_func)theora_encode_granule_frame, + (oc_state_granule_time_func)theora_encode_granule_time, +}; + +int theora_encode_init(theora_state *_te,theora_info *_ci){ + th_api_info *apiinfo; + th_info info; + ogg_uint32_t keyframe_frequency_force; + /*Allocate our own combined API wrapper/theora_info struct. + We put them both in one malloc'd block so that when the API wrapper is + freed, the info struct goes with it. + This avoids having to figure out whether or not we need to free the info + struct in either theora_info_clear() or theora_clear().*/ + apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo)); + if(apiinfo==NULL)return TH_EFAULT; + /*Make our own copy of the info struct, since its lifetime should be + independent of the one we were passed in.*/ + *&apiinfo->info=*_ci; + oc_theora_info2th_info(&info,_ci); + apiinfo->api.encode=th_encode_alloc(&info); + if(apiinfo->api.encode==NULL){ + _ogg_free(apiinfo); + return OC_EINVAL; + } + apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear; + /*Provide entry points for ABI compatibility with old decoder shared libs.*/ + _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL; + _te->internal_decode=NULL; + _te->granulepos=0; + _te->i=&apiinfo->info; + _te->i->codec_setup=&apiinfo->api; + /*Set the precise requested keyframe frequency.*/ + keyframe_frequency_force=_ci->keyframe_auto_p? + _ci->keyframe_frequency_force:_ci->keyframe_frequency; + th_encode_ctl(apiinfo->api.encode, + TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, + &keyframe_frequency_force,sizeof(keyframe_frequency_force)); + /*TODO: Additional codec setup using the extra fields in theora_info.*/ + return 0; +} + +int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){ + th_api_wrapper *api; + th_ycbcr_buffer buf; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + buf[0].width=_yuv->y_width; + buf[0].height=_yuv->y_height; + buf[0].stride=_yuv->y_stride; + buf[0].data=_yuv->y; + buf[1].width=_yuv->uv_width; + buf[1].height=_yuv->uv_height; + buf[1].stride=_yuv->uv_stride; + buf[1].data=_yuv->u; + buf[2].width=_yuv->uv_width; + buf[2].height=_yuv->uv_height; + buf[2].stride=_yuv->uv_stride; + buf[2].data=_yuv->v; + ret=th_encode_ycbcr_in(api->encode,buf); + if(ret<0)return ret; + _te->granulepos=api->encode->state.granpos; + return ret; +} + +int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){ + th_api_wrapper *api; + api=(th_api_wrapper *)_te->i->codec_setup; + return th_encode_packetout(api->encode,_last_p,_op); +} + +int theora_encode_header(theora_state *_te,ogg_packet *_op){ + oc_enc_ctx *enc; + th_api_wrapper *api; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + enc=api->encode; + /*If we've already started encoding, fail.*/ + if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ + return TH_EINVAL; + } + /*Reset the state to make sure we output an info packet.*/ + enc->packet_state=OC_PACKET_INFO_HDR; + ret=th_encode_flushheader(api->encode,NULL,_op); + return ret>=0?0:ret; +} + +int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){ + oggpack_buffer opb; + void *buf; + int packet_state; + int ret; + packet_state=OC_PACKET_COMMENT_HDR; + oggpackB_writeinit(&opb); + ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL, + th_version_string(),(th_comment *)_tc,_op); + if(ret>=0){ + /*The oggpack_buffer's lifetime ends with this function, so we have to + copy out the packet contents. + Presumably the application knows it is supposed to free this. + This part works nothing like the Vorbis API, and the documentation on it + has been wrong for some time, claiming libtheora owned the memory.*/ + buf=_ogg_malloc(_op->bytes); + if(buf==NULL){ + _op->packet=NULL; + ret=TH_EFAULT; + } + else{ + memcpy(buf,_op->packet,_op->bytes); + _op->packet=buf; + ret=0; + } + } + oggpack_writeclear(&opb); + return ret; +} + +int theora_encode_tables(theora_state *_te,ogg_packet *_op){ + oc_enc_ctx *enc; + th_api_wrapper *api; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + enc=api->encode; + /*If we've already started encoding, fail.*/ + if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ + return TH_EINVAL; + } + /*Reset the state to make sure we output a setup packet.*/ + enc->packet_state=OC_PACKET_SETUP_HDR; + ret=th_encode_flushheader(api->encode,NULL,_op); + return ret>=0?0:ret; +} diff --git a/Engine/lib/libtheora/lib/encfrag.c b/Engine/lib/libtheora/lib/encfrag.c new file mode 100644 index 000000000..bb814c8e4 --- /dev/null +++ b/Engine/lib/libtheora/lib/encfrag.c @@ -0,0 +1,388 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + +void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride); +} + +void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]); + _src+=_ystride; + _ref+=_ystride; + } +} + +void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride){ + (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride); +} + +void oc_enc_frag_sub_128_c(ogg_int16_t *_diff, + const unsigned char *_src,int _ystride){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128); + _src+=_ystride; + } +} + +unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x, + const unsigned char *_y,int _ystride){ + return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride); +} + +unsigned oc_enc_frag_sad_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); + _src+=_ystride; + _ref+=_ystride; + } + return sad; +} + +unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh){ + return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh); +} + +unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); + if(sad>_thresh)break; + _src+=_ystride; + _ref+=_ystride; + } + return sad; +} + +unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh){ + return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride, + _thresh); +} + +unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1)); + if(sad>_thresh)break; + _src+=_ystride; + _ref1+=_ystride; + _ref2+=_ystride; + } + return sad; +} + +static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + t0=_src[0]-_ref[0]+_src[4]-_ref[4]; + t4=_src[0]-_ref[0]-_src[4]+_ref[4]; + t1=_src[1]-_ref[1]+_src[5]-_ref[5]; + t5=_src[1]-_ref[1]-_src[5]+_ref[5]; + t2=_src[2]-_ref[2]+_src[6]-_ref[6]; + t6=_src[2]-_ref[2]-_src[6]+_ref[6]; + t3=_src[3]-_ref[3]+_src[7]-_ref[7]; + t7=_src[3]-_ref[3]-_src[7]+_ref[7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + _ref+=_ystride; + } +} + +static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + r=_ref1[0]+_ref2[0]>>1; + t4=_ref1[4]+_ref2[4]>>1; + t0=_src[0]-r+_src[4]-t4; + t4=_src[0]-r-_src[4]+t4; + r=_ref1[1]+_ref2[1]>>1; + t5=_ref1[5]+_ref2[5]>>1; + t1=_src[1]-r+_src[5]-t5; + t5=_src[1]-r-_src[5]+t5; + r=_ref1[2]+_ref2[2]>>1; + t6=_ref1[6]+_ref2[6]>>1; + t2=_src[2]-r+_src[6]-t6; + t6=_src[2]-r-_src[6]+t6; + r=_ref1[3]+_ref2[3]>>1; + t7=_ref1[7]+_ref2[7]>>1; + t3=_src[3]-r+_src[7]-t7; + t7=_src[3]-r-_src[7]+t7; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + _ref1+=_ystride; + _ref2+=_ystride; + } +} + +static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, + int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + t0=_src[0]+_src[4]; + t4=_src[0]-_src[4]; + t1=_src[1]+_src[5]; + t5=_src[1]-_src[5]; + t2=_src[2]+_src[6]; + t6=_src[2]-_src[6]; + t3=_src[3]+_src[7]; + t7=_src[3]-_src[7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + } +} + +unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){ + unsigned sad; + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + int i; + sad=0; + for(i=0;i<8;i++){ + /*Hadamard stage 1:*/ + t0=_buf[i*8+0]+_buf[i*8+4]; + t4=_buf[i*8+0]-_buf[i*8+4]; + t1=_buf[i*8+1]+_buf[i*8+5]; + t5=_buf[i*8+1]-_buf[i*8+5]; + t2=_buf[i*8+2]+_buf[i*8+6]; + t6=_buf[i*8+2]-_buf[i*8+6]; + t3=_buf[i*8+3]+_buf[i*8+7]; + t7=_buf[i*8+3]-_buf[i*8+7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + r=abs(t0+t1); + r+=abs(t0-t1); + r+=abs(t2+t3); + r+=abs(t2-t3); + r+=abs(t4+t5); + r+=abs(t4-t5); + r+=abs(t6+t7); + r+=abs(t6-t7); + sad+=r; + if(sad>_thresh)break; + } + return sad; +} + +unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh){ + return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh); +} + +unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + ogg_int16_t buf[64]; + oc_diff_hadamard(buf,_src,_ref,_ystride); + return oc_hadamard_sad_thresh(buf,_thresh); +} + +unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh){ + return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride, + _thresh); +} + +unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ogg_int16_t buf[64]; + oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride); + return oc_hadamard_sad_thresh(buf,_thresh); +} + +unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, + const unsigned char *_src,int _ystride){ + return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride); +} + +unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){ + ogg_int16_t buf[64]; + oc_intra_hadamard(buf,_src,_ystride); + return oc_hadamard_sad_thresh(buf,UINT_MAX) + -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); +} + +void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride); +} + +void oc_enc_frag_copy2_c(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + int i; + int j; + for(i=8;i-->0;){ + for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1; + _dst+=_ystride; + _src1+=_ystride; + _src2+=_ystride; + } +} + +void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, + unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){ + (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue); +} + +void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue); +} diff --git a/Engine/lib/libtheora/lib/encinfo.c b/Engine/lib/libtheora/lib/encinfo.c new file mode 100644 index 000000000..83be1dae7 --- /dev/null +++ b/Engine/lib/libtheora/lib/encinfo.c @@ -0,0 +1,121 @@ +#include +#include +#include "internal.h" +#include "enquant.h" +#include "huffenc.h" + + + +/*Packs a series of octets from a given byte array into the pack buffer. + _opb: The pack buffer to store the octets in. + _buf: The byte array containing the bytes to pack. + _len: The number of octets to pack.*/ +static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){ + int i; + for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8); +} + + + +int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, + oggpack_buffer *_opb,const th_quant_info *_qinfo, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], + const char *_vendor,th_comment *_tc,ogg_packet *_op){ + unsigned char *packet; + int b_o_s; + if(_op==NULL)return TH_EFAULT; + switch(*_packet_state){ + /*Codec info header.*/ + case OC_PACKET_INFO_HDR:{ + if(_state==NULL)return TH_EFAULT; + oggpackB_reset(_opb); + /*Mark this packet as the info header.*/ + oggpackB_write(_opb,0x80,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the codec bitstream version.*/ + oggpackB_write(_opb,TH_VERSION_MAJOR,8); + oggpackB_write(_opb,TH_VERSION_MINOR,8); + oggpackB_write(_opb,TH_VERSION_SUB,8); + /*Describe the encoded frame.*/ + oggpackB_write(_opb,_state->info.frame_width>>4,16); + oggpackB_write(_opb,_state->info.frame_height>>4,16); + oggpackB_write(_opb,_state->info.pic_width,24); + oggpackB_write(_opb,_state->info.pic_height,24); + oggpackB_write(_opb,_state->info.pic_x,8); + oggpackB_write(_opb,_state->info.pic_y,8); + oggpackB_write(_opb,_state->info.fps_numerator,32); + oggpackB_write(_opb,_state->info.fps_denominator,32); + oggpackB_write(_opb,_state->info.aspect_numerator,24); + oggpackB_write(_opb,_state->info.aspect_denominator,24); + oggpackB_write(_opb,_state->info.colorspace,8); + oggpackB_write(_opb,_state->info.target_bitrate,24); + oggpackB_write(_opb,_state->info.quality,6); + oggpackB_write(_opb,_state->info.keyframe_granule_shift,5); + oggpackB_write(_opb,_state->info.pixel_fmt,2); + /*Spare configuration bits.*/ + oggpackB_write(_opb,0,3); + b_o_s=1; + }break; + /*Comment header.*/ + case OC_PACKET_COMMENT_HDR:{ + int vendor_len; + int i; + if(_tc==NULL)return TH_EFAULT; + vendor_len=strlen(_vendor); + oggpackB_reset(_opb); + /*Mark this packet as the comment header.*/ + oggpackB_write(_opb,0x81,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the vendor string.*/ + oggpack_write(_opb,vendor_len,32); + oc_pack_octets(_opb,_vendor,vendor_len); + oggpack_write(_opb,_tc->comments,32); + for(i=0;i<_tc->comments;i++){ + if(_tc->user_comments[i]!=NULL){ + oggpack_write(_opb,_tc->comment_lengths[i],32); + oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]); + } + else oggpack_write(_opb,0,32); + } + b_o_s=0; + }break; + /*Codec setup header.*/ + case OC_PACKET_SETUP_HDR:{ + int ret; + oggpackB_reset(_opb); + /*Mark this packet as the setup header.*/ + oggpackB_write(_opb,0x82,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the quantizer tables.*/ + oc_quant_params_pack(_opb,_qinfo); + /*Write the huffman codes.*/ + ret=oc_huff_codes_pack(_opb,_codes); + /*This should never happen, because we validate the tables when they + are set. + If you see, it's a good chance memory is being corrupted.*/ + if(ret<0)return ret; + b_o_s=0; + }break; + /*No more headers to emit.*/ + default:return 0; + } + /*This is kind of fugly: we hand the user a buffer which they do not own. + We will overwrite it when the next packet is output, so the user better be + done with it by then. + Vorbis is little better: it hands back buffers that it will free the next + time the headers are requested, or when the encoder is cleared. + Hopefully libogg2 will make this much cleaner.*/ + packet=oggpackB_get_buffer(_opb); + /*If there's no packet, malloc failed while writing.*/ + if(packet==NULL)return TH_EFAULT; + _op->packet=packet; + _op->bytes=oggpackB_bytes(_opb); + _op->b_o_s=b_o_s; + _op->e_o_s=0; + _op->granulepos=0; + _op->packetno=*_packet_state+3; + return ++(*_packet_state)+3; +} diff --git a/Engine/lib/libtheora/lib/encint.h b/Engine/lib/libtheora/lib/encint.h new file mode 100644 index 000000000..97897d5a0 --- /dev/null +++ b/Engine/lib/libtheora/lib/encint.h @@ -0,0 +1,493 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#if !defined(_encint_H) +# define _encint_H (1) +# if defined(HAVE_CONFIG_H) +# include "config.h" +# endif +# include "theora/theoraenc.h" +# include "internal.h" +# include "ocintrin.h" +# include "mathops.h" +# include "enquant.h" +# include "huffenc.h" +/*# define OC_COLLECT_METRICS*/ + + + +typedef oc_mv oc_mv2[2]; + +typedef struct oc_enc_opt_vtable oc_enc_opt_vtable; +typedef struct oc_mb_enc_info oc_mb_enc_info; +typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser; +typedef struct oc_iir_filter oc_iir_filter; +typedef struct oc_frame_metrics oc_frame_metrics; +typedef struct oc_rc_state oc_rc_state; +typedef struct th_enc_ctx oc_enc_ctx; +typedef struct oc_token_checkpoint oc_token_checkpoint; + + + +/*Constants for the packet-out state machine specific to the encoder.*/ + +/*Next packet to emit: Data packet, but none are ready yet.*/ +#define OC_PACKET_EMPTY (0) +/*Next packet to emit: Data packet, and one is ready.*/ +#define OC_PACKET_READY (1) + +/*All features enabled.*/ +#define OC_SP_LEVEL_SLOW (0) +/*Enable early skip.*/ +#define OC_SP_LEVEL_EARLY_SKIP (1) +/*Disable motion compensation.*/ +#define OC_SP_LEVEL_NOMC (2) +/*Maximum valid speed level.*/ +#define OC_SP_LEVEL_MAX (2) + + +/*The bits used for each of the MB mode codebooks.*/ +extern const unsigned char OC_MODE_BITS[2][OC_NMODES]; + +/*The bits used for each of the MV codebooks.*/ +extern const unsigned char OC_MV_BITS[2][64]; + +/*The minimum value that can be stored in a SB run for each codeword. + The last entry is the upper bound on the length of a single SB run.*/ +extern const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]; +/*The bits used for each SB run codeword.*/ +extern const unsigned char OC_SB_RUN_CODE_NBITS[7]; + +/*The bits used for each block run length (starting with 1).*/ +extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]; + + + +/*Encoder specific functions with accelerated variants.*/ +struct oc_enc_opt_vtable{ + unsigned (*frag_sad)(const unsigned char *_src, + const unsigned char *_ref,int _ystride); + unsigned (*frag_sad_thresh)(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); + unsigned (*frag_sad2_thresh)(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); + unsigned (*frag_satd_thresh)(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); + unsigned (*frag_satd2_thresh)(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); + unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride); + void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride); + void (*frag_sub_128)(ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); + void (*frag_copy2)(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); + void (*frag_recon_intra)(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]); + void (*frag_recon_inter)(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); + void (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +}; + + +void oc_enc_vtable_init(oc_enc_ctx *_enc); + + + +/*Encoder-specific macroblock information.*/ +struct oc_mb_enc_info{ + /*Neighboring macro blocks that have MVs available from the current frame.*/ + unsigned cneighbors[4]; + /*Neighboring macro blocks to use for MVs from the previous frame.*/ + unsigned pneighbors[4]; + /*The number of current-frame neighbors.*/ + unsigned char ncneighbors; + /*The number of previous-frame neighbors.*/ + unsigned char npneighbors; + /*Flags indicating which MB modes have been refined.*/ + unsigned char refined; + /*Motion vectors for a macro block for the current frame and the + previous two frames. + Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which + can be used to estimate constant velocity and constant acceleration + predictors. + Uninitialized MVs are (0,0).*/ + oc_mv2 analysis_mv[3]; + /*Current unrefined analysis MVs.*/ + oc_mv unref_mv[2]; + /*Unrefined block MVs.*/ + oc_mv block_mv[4]; + /*Refined block MVs.*/ + oc_mv ref_mv[4]; + /*Minimum motion estimation error from the analysis stage.*/ + ogg_uint16_t error[2]; + /*MB error for half-pel refinement for each frame type.*/ + unsigned satd[2]; + /*Block error for half-pel refinement.*/ + unsigned block_satd[4]; +}; + + + +/*State machine to estimate the opportunity cost of coding a MB mode.*/ +struct oc_mode_scheme_chooser{ + /*Pointers to the a list containing the index of each mode in the mode + alphabet used by each scheme. + The first entry points to the dynamic scheme0_ranks, while the remaining 7 + point to the constant entries stored in OC_MODE_SCHEMES.*/ + const unsigned char *mode_ranks[8]; + /*The ranks for each mode when coded with scheme 0. + These are optimized so that the more frequent modes have lower ranks.*/ + unsigned char scheme0_ranks[OC_NMODES]; + /*The list of modes, sorted in descending order of frequency, that + corresponds to the ranks above.*/ + unsigned char scheme0_list[OC_NMODES]; + /*The number of times each mode has been chosen so far.*/ + int mode_counts[OC_NMODES]; + /*The list of mode coding schemes, sorted in ascending order of bit cost.*/ + unsigned char scheme_list[8]; + /*The number of bits used by each mode coding scheme.*/ + ptrdiff_t scheme_bits[8]; +}; + + +void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser); + + + +/*A 2nd order low-pass Bessel follower. + We use this for rate control because it has fast reaction time, but is + critically damped.*/ +struct oc_iir_filter{ + ogg_int32_t c[2]; + ogg_int64_t g; + ogg_int32_t x[2]; + ogg_int32_t y[2]; +}; + + + +/*The 2-pass metrics associated with a single frame.*/ +struct oc_frame_metrics{ + /*The log base 2 of the scale factor for this frame in Q24 format.*/ + ogg_int32_t log_scale; + /*The number of application-requested duplicates of this frame.*/ + unsigned dup_count:31; + /*The frame type from pass 1.*/ + unsigned frame_type:1; +}; + + + +/*Rate control state information.*/ +struct oc_rc_state{ + /*The target average bits per frame.*/ + ogg_int64_t bits_per_frame; + /*The current buffer fullness (bits available to be used).*/ + ogg_int64_t fullness; + /*The target buffer fullness. + This is where we'd like to be by the last keyframe the appears in the next + buf_delay frames.*/ + ogg_int64_t target; + /*The maximum buffer fullness (total size of the buffer).*/ + ogg_int64_t max; + /*The log of the number of pixels in a frame in Q57 format.*/ + ogg_int64_t log_npixels; + /*The exponent used in the rate model in Q8 format.*/ + unsigned exp[2]; + /*The number of frames to distribute the buffer usage over.*/ + int buf_delay; + /*The total drop count from the previous frame. + This includes duplicates explicitly requested via the + TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/ + ogg_uint32_t prev_drop_count; + /*The log of an estimated scale factor used to obtain the real framerate, for + VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/ + ogg_int64_t log_drop_scale; + /*The log of estimated scale factor for the rate model in Q57 format.*/ + ogg_int64_t log_scale[2]; + /*The log of the target quantizer level in Q57 format.*/ + ogg_int64_t log_qtarget; + /*Will we drop frames to meet bitrate target?*/ + unsigned char drop_frames; + /*Do we respect the maximum buffer fullness?*/ + unsigned char cap_overflow; + /*Can the reservoir go negative?*/ + unsigned char cap_underflow; + /*Second-order lowpass filters to track scale and VFR.*/ + oc_iir_filter scalefilter[2]; + int inter_count; + int inter_delay; + int inter_delay_target; + oc_iir_filter vfrfilter; + /*Two-pass mode state. + 0 => 1-pass encoding. + 1 => 1st pass of 2-pass encoding. + 2 => 2nd pass of 2-pass encoding.*/ + int twopass; + /*Buffer for current frame metrics.*/ + unsigned char twopass_buffer[48]; + /*The number of bytes in the frame metrics buffer. + When 2-pass encoding is enabled, this is set to 0 after each frame is + submitted, and must be non-zero before the next frame will be accepted.*/ + int twopass_buffer_bytes; + int twopass_buffer_fill; + /*Whether or not to force the next frame to be a keyframe.*/ + unsigned char twopass_force_kf; + /*The metrics for the previous frame.*/ + oc_frame_metrics prev_metrics; + /*The metrics for the current frame.*/ + oc_frame_metrics cur_metrics; + /*The buffered metrics for future frames.*/ + oc_frame_metrics *frame_metrics; + int nframe_metrics; + int cframe_metrics; + /*The index of the current frame in the circular metric buffer.*/ + int frame_metrics_head; + /*The frame count of each type (keyframes, delta frames, and dup frames); + 32 bits limits us to 2.268 years at 60 fps.*/ + ogg_uint32_t frames_total[3]; + /*The number of frames of each type yet to be processed.*/ + ogg_uint32_t frames_left[3]; + /*The sum of the scale values for each frame type.*/ + ogg_int64_t scale_sum[2]; + /*The start of the window over which the current scale sums are taken.*/ + int scale_window0; + /*The end of the window over which the current scale sums are taken.*/ + int scale_window_end; + /*The frame count of each type in the current 2-pass window; this does not + include dup frames.*/ + int nframes[3]; + /*The total accumulated estimation bias.*/ + ogg_int64_t rate_bias; +}; + + +void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc); +void oc_rc_state_clear(oc_rc_state *_rc); + +void oc_enc_rc_resize(oc_enc_ctx *_enc); +int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp); +void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type); +int oc_enc_update_rc_state(oc_enc_ctx *_enc, + long _bits,int _qti,int _qi,int _trial,int _droppable); +int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf); +int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes); + + + +/*The internal encoder state.*/ +struct th_enc_ctx{ + /*Shared encoder/decoder state.*/ + oc_theora_state state; + /*Buffer in which to assemble packets.*/ + oggpack_buffer opb; + /*Encoder-specific macroblock information.*/ + oc_mb_enc_info *mb_info; + /*DC coefficients after prediction.*/ + ogg_int16_t *frag_dc; + /*The list of coded macro blocks, in coded order.*/ + unsigned *coded_mbis; + /*The number of coded macro blocks.*/ + size_t ncoded_mbis; + /*Whether or not packets are ready to be emitted. + This takes on negative values while there are remaining header packets to + be emitted, reaches 0 when the codec is ready for input, and becomes + positive when a frame has been processed and data packets are ready.*/ + int packet_state; + /*The maximum distance between keyframes.*/ + ogg_uint32_t keyframe_frequency_force; + /*The number of duplicates to produce for the next frame.*/ + ogg_uint32_t dup_count; + /*The number of duplicates remaining to be emitted for the current frame.*/ + ogg_uint32_t nqueued_dups; + /*The number of duplicates emitted for the last frame.*/ + ogg_uint32_t prev_dup_count; + /*The current speed level.*/ + int sp_level; + /*Whether or not VP3 compatibility mode has been enabled.*/ + unsigned char vp3_compatible; + /*Whether or not any INTER frames have been coded.*/ + unsigned char coded_inter_frame; + /*Whether or not previous frame was dropped.*/ + unsigned char prevframe_dropped; + /*Stores most recently chosen Huffman tables for each frame type, DC and AC + coefficients, and luma and chroma tokens. + The actual Huffman table used for a given coefficient depends not only on + the choice made here, but also its index in the zig-zag ordering.*/ + unsigned char huff_idxs[2][2][2]; + /*Current count of bits used by each MV coding mode.*/ + size_t mv_bits[2]; + /*The mode scheme chooser for estimating mode coding costs.*/ + oc_mode_scheme_chooser chooser; + /*The number of vertical super blocks in an MCU.*/ + int mcu_nvsbs; + /*The SSD error for skipping each fragment in the current MCU.*/ + unsigned *mcu_skip_ssd; + /*The DCT token lists for each coefficient and each plane.*/ + unsigned char **dct_tokens[3]; + /*The extra bits associated with each DCT token.*/ + ogg_uint16_t **extra_bits[3]; + /*The number of DCT tokens for each coefficient for each plane.*/ + ptrdiff_t ndct_tokens[3][64]; + /*Pending EOB runs for each coefficient for each plane.*/ + ogg_uint16_t eob_run[3][64]; + /*The offset of the first DCT token for each coefficient for each plane.*/ + unsigned char dct_token_offs[3][64]; + /*The last DC coefficient for each plane and reference frame.*/ + int dc_pred_last[3][3]; +#if defined(OC_COLLECT_METRICS) + /*Fragment SATD statistics for MB mode estimation metrics.*/ + unsigned *frag_satd; + /*Fragment SSD statistics for MB mode estimation metrics.*/ + unsigned *frag_ssd; +#endif + /*The R-D optimization parameter.*/ + int lambda; + /*The huffman tables in use.*/ + th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; + /*The quantization parameters in use.*/ + th_quant_info qinfo; + oc_iquant *enquant_tables[64][3][2]; + oc_iquant_table enquant_table_data[64][3][2]; + /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi + value. + This is used to paramterize the rate control decisions. + They are kept in the log domain to simplify later processing. + Keep in mind these are DCT domain quantizers, and so are scaled by an + additional factor of 4 from the pixel domain.*/ + ogg_int64_t log_qavg[2][64]; + /*The buffer state used to drive rate control.*/ + oc_rc_state rc; + /*Table for encoder acceleration functions.*/ + oc_enc_opt_vtable opt_vtable; +}; + + +void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode); +int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode); +#if defined(OC_COLLECT_METRICS) +void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc); +void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc); +#endif + + + +/*Perform fullpel motion search for a single MB against both reference frames.*/ +void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi); +/*Refine a MB MV for one frame.*/ +void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame); +/*Refine the block MVs.*/ +void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi); + + + +/*Used to rollback a tokenlog transaction when we retroactively decide to skip + a fragment. + A checkpoint is taken right before each token is added.*/ +struct oc_token_checkpoint{ + /*The color plane the token was added to.*/ + unsigned char pli; + /*The zig-zag index the token was added to.*/ + unsigned char zzi; + /*The outstanding EOB run count before the token was added.*/ + ogg_uint16_t eob_run; + /*The token count before the token was added.*/ + ptrdiff_t ndct_tokens; +}; + + + +void oc_enc_tokenize_start(oc_enc_ctx *_enc); +int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, + ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, + int _zzi,oc_token_checkpoint **_stack,int _acmin); +void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, + const oc_token_checkpoint *_stack,int _n); +void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, + int _pli,int _fragy0,int _frag_yend); +void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, + const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, + int _prev_ndct_tokens1,int _prev_eob_run1); +void oc_enc_tokenize_finish(oc_enc_ctx *_enc); + + + +/*Utility routine to encode one of the header packets.*/ +int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, + oggpack_buffer *_opb,const th_quant_info *_qinfo, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], + const char *_vendor,th_comment *_tc,ogg_packet *_op); + + + +/*Encoder-specific accelerated functions.*/ +void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride); +void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); +unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, + const unsigned char *_src,int _ystride); +void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, + unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]); +void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], + const ogg_int16_t _x[64]); + +/*Default pure-C implementations.*/ +void oc_enc_vtable_init_c(oc_enc_ctx *_enc); + +void oc_enc_frag_sub_c(ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride); +void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); +void oc_enc_frag_copy2_c(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +unsigned oc_enc_frag_sad_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride); +void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/Engine/lib/libtheora/lib/encode.c b/Engine/lib/libtheora/lib/encode.c new file mode 100644 index 000000000..0c5ea6a17 --- /dev/null +++ b/Engine/lib/libtheora/lib/encode.c @@ -0,0 +1,1615 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encode.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" +#if defined(OC_X86_ASM) +# include "x86/x86enc.h" +#endif + + + +/*The default quantization parameters used by VP3.1.*/ +static const int OC_VP31_RANGE_SIZES[1]={63}; +static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={ + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + }, + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + } +}; +static const th_quant_base OC_VP31_BASES_INTRA_C[2]={ + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + }, + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + } +}; +static const th_quant_base OC_VP31_BASES_INTER[2]={ + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + }, + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + } +}; + +const th_quant_info TH_VP31_QUANT_INFO={ + { + 220,200,190,180,170,170,160,160, + 150,150,140,140,130,130,120,120, + 110,110,100,100, 90, 90, 90, 80, + 80, 80, 70, 70, 70, 60, 60, 60, + 60, 50, 50, 50, 50, 40, 40, 40, + 40, 40, 30, 30, 30, 30, 30, 30, + 30, 20, 20, 20, 20, 20, 20, 20, + 20, 10, 10, 10, 10, 10, 10, 10 + }, + { + 500,450,400,370,340,310,285,265, + 245,225,210,195,185,180,170,160, + 150,145,135,130,125,115,110,107, + 100, 96, 93, 89, 85, 82, 75, 74, + 70, 68, 64, 60, 57, 56, 52, 50, + 49, 45, 44, 43, 40, 38, 37, 35, + 33, 32, 30, 29, 28, 25, 24, 22, + 21, 19, 18, 17, 15, 13, 12, 10 + }, + { + 30,25,20,20,15,15,14,14, + 13,13,12,12,11,11,10,10, + 9, 9, 8, 8, 7, 7, 7, 7, + 6, 6, 6, 6, 5, 5, 5, 5, + 4, 4, 4, 4, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C} + }, + { + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER} + } + } +}; + +/*The current default quantization parameters.*/ +static const int OC_DEF_QRANGE_SIZES[3]={32,16,15}; +static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={ + { + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + }, + { + 15, 12, 12, 15, 18, 20, 20, 21, + 13, 13, 14, 17, 18, 21, 21, 20, + 14, 14, 15, 18, 20, 21, 21, 21, + 14, 16, 17, 19, 20, 21, 21, 21, + 16, 17, 20, 21, 21, 21, 21, 21, + 18, 19, 20, 21, 21, 21, 21, 21, + 20, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21 + }, + { + 16, 12, 11, 16, 20, 25, 27, 28, + 13, 13, 14, 18, 21, 28, 28, 27, + 14, 13, 16, 20, 25, 28, 28, 28, + 14, 16, 19, 22, 27, 29, 29, 28, + 17, 19, 25, 28, 28, 30, 30, 29, + 20, 24, 27, 28, 29, 30, 30, 29, + 27, 28, 29, 29, 30, 30, 30, 30, + 29, 29, 29, 29, 30, 30, 30, 29 + }, + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + } +}; +static const th_quant_base OC_DEF_BASES_INTRA_C[4]={ + { + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19 + }, + { + 18, 18, 21, 25, 26, 26, 26, 26, + 18, 20, 22, 26, 26, 26, 26, 26, + 21, 22, 25, 26, 26, 26, 26, 26, + 25, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26 + }, + { + 17, 18, 22, 31, 36, 36, 36, 36, + 18, 20, 24, 34, 36, 36, 36, 36, + 22, 24, 33, 36, 36, 36, 36, 36, + 31, 34, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36 + }, + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + } +}; +static const th_quant_base OC_DEF_BASES_INTER[4]={ + { + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21 + }, + { + 18, 18, 18, 21, 23, 24, 25, 27, + 18, 18, 21, 23, 24, 25, 27, 28, + 18, 21, 23, 24, 25, 27, 28, 29, + 21, 23, 24, 25, 27, 28, 29, 29, + 23, 24, 25, 27, 28, 29, 29, 29, + 24, 25, 27, 28, 29, 29, 29, 30, + 25, 27, 28, 29, 29, 29, 30, 30, + 27, 28, 29, 29, 29, 30, 30, 30 + }, + { + 17, 17, 17, 20, 23, 26, 28, 32, + 17, 17, 20, 23, 26, 28, 32, 34, + 17, 20, 23, 26, 28, 32, 34, 37, + 20, 23, 26, 28, 32, 34, 37, 37, + 23, 26, 28, 32, 34, 37, 37, 37, + 26, 28, 32, 34, 37, 37, 37, 41, + 28, 32, 34, 37, 37, 37, 41, 42, + 32, 34, 37, 37, 37, 41, 42, 42 + }, + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + } +}; + +const th_quant_info TH_DEF_QUANT_INFO={ + { + 365,348,333,316,300,287,277,265, + 252,240,229,219,206,197,189,180, + 171,168,160,153,146,139,132,127, + 121,115,110,107,101, 97, 94, 89, + 85, 83, 78, 73, 72, 67, 66, 62, + 60, 59, 56, 53, 52, 48, 47, 43, + 42, 40, 36, 35, 34, 33, 31, 30, + 28, 25, 24, 22, 20, 17, 14, 10 + }, + { + 365,348,333,316,300,287,277,265, + 252,240,229,219,206,197,189,180, + 171,168,160,153,146,139,132,127, + 121,115,110,107,101, 97, 94, 89, + 85, 83, 78, 73, 72, 67, 66, 62, + 60, 59, 56, 53, 52, 48, 47, 43, + 42, 40, 36, 35, 34, 33, 31, 30, + 28, 25, 24, 22, 20, 17, 14, 10 + }, + { + 30,25,20,20,15,15,14,14, + 13,13,12,12,11,11,10,10, + 9, 9, 8, 8, 7, 7, 7, 7, + 6, 6, 6, 6, 5, 5, 5, 5, + 4, 4, 4, 4, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C} + }, + { + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER} + } + } +}; + + + +/*The Huffman codes used for macro block modes.*/ + +const unsigned char OC_MODE_BITS[2][OC_NMODES]={ + /*Codebook 0: a maximally skewed prefix code.*/ + {1,2,3,4,5,6,7,7}, + /*Codebook 1: a fixed-length code.*/ + {3,3,3,3,3,3,3,3} +}; + +static const unsigned char OC_MODE_CODES[2][OC_NMODES]={ + /*Codebook 0: a maximally skewed prefix code.*/ + {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F}, + /*Codebook 1: a fixed-length code.*/ + {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07} +}; + + +/*The Huffman codes used for motion vectors.*/ + +const unsigned char OC_MV_BITS[2][64]={ + /*Codebook 0: VLC code.*/ + { + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3, + 3, + 3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }, + /*Codebook 1: (5 bit magnitude, 1 bit sign). + This wastes a code word (0x01, negative zero), or a bit (0x00, positive + zero, requires only 5 bits to uniquely decode), but is hopefully not used + very often.*/ + { + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 + } +}; + +static const unsigned char OC_MV_CODES[2][64]={ + /*Codebook 0: VLC code.*/ + { + 0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3, + 0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3, + 0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63, + 0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02, + 0x00, + 0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60, + 0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0, + 0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0, + 0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE + }, + /*Codebook 1: (5 bit magnitude, 1 bit sign).*/ + { + 0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33, + 0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23, + 0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13, + 0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03, + 0x00, + 0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10, + 0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20, + 0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30, + 0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E + } +}; + + + +/*Super block run coding scheme: + Codeword Run Length + 0 1 + 10x 2-3 + 110x 4-5 + 1110xx 6-9 + 11110xxx 10-17 + 111110xxxx 18-33 + 111111xxxxxxxxxxxx 34-4129*/ +const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130}; +static const unsigned OC_SB_RUN_CODE_PREFIX[7]={ + 0,4,0xC,0x38,0xF0,0x3E0,0x3F000 +}; +const unsigned char OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18}; + + +/*Writes the bit pattern for the run length of a super block run to the given + oggpack_buffer. + _opb: The buffer to write to. + _run_count: The length of the run, which must be positive. + _flag: The current flag. + _done: Whether or not more flags are to be encoded.*/ +static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count, + int _flag,int _done){ + int i; + if(_run_count>=4129){ + do{ + oggpackB_write(_opb,0x3FFFF,18); + _run_count-=4129; + if(_run_count>0)oggpackB_write(_opb,_flag,1); + else if(!_done)oggpackB_write(_opb,!_flag,1); + } + while(_run_count>=4129); + if(_run_count<=0)return; + } + for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); + oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i], + OC_SB_RUN_CODE_NBITS[i]); +} + + + +/*Block run coding scheme: + Codeword Run Length + 0x 1-2 + 10x 3-4 + 110x 5-6 + 1110xx 7-10 + 11110xx 11-14 + 11111xxxx 15-30*/ +const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={ + 2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9 +}; +static const ogg_uint16_t OC_BLOCK_RUN_CODE_PATTERN[30]={ + 0x000,0x001,0x004,0x005,0x00C,0x00D,0x038, + 0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0, + 0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8, + 0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF +}; + + +/*Writes the bit pattern for the run length of a block run to the given + oggpack_buffer. + _opb: The buffer to write to. + _run_count: The length of the run. + This must be positive, and no more than 30.*/ +static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){ + oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1], + OC_BLOCK_RUN_CODE_NBITS[_run_count-1]); +} + + + +static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){ + /*Mark this as a data packet.*/ + oggpackB_write(&_enc->opb,0,1); + /*Output the frame type (key frame or delta frame).*/ + oggpackB_write(&_enc->opb,_enc->state.frame_type,1); + /*Write out the current qi list.*/ + oggpackB_write(&_enc->opb,_enc->state.qis[0],6); + if(_enc->state.nqis>1){ + oggpackB_write(&_enc->opb,1,1); + oggpackB_write(&_enc->opb,_enc->state.qis[1],6); + if(_enc->state.nqis>2){ + oggpackB_write(&_enc->opb,1,1); + oggpackB_write(&_enc->opb,_enc->state.qis[2],6); + } + else oggpackB_write(&_enc->opb,0,1); + } + else oggpackB_write(&_enc->opb,0,1); + if(_enc->state.frame_type==OC_INTRA_FRAME){ + /*Key frames have 3 unused configuration bits, holdovers from the VP3 days. + Most of the other unused bits in the VP3 headers were eliminated. + Monty kept these to leave us some wiggle room for future expansion, + though a single bit in all frames would have been far more useful.*/ + oggpackB_write(&_enc->opb,0,3); + } +} + +/*Writes the bit flags for whether or not each super block is partially coded + or not. + These flags are run-length encoded, with the flag value alternating between + each run. + Return: The number partially coded SBs.*/ +static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){ + const oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned npartial; + int flag; + sb_flags=_enc->state.sb_flags; + nsbs=_enc->state.nsbs; + flag=sb_flags[0].coded_partially; + oggpackB_write(&_enc->opb,flag,1); + sbi=npartial=0; + do{ + unsigned run_count; + for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); + flag=!flag; + } + while(sbistate.sb_flags; + nsbs=_enc->state.nsbs; + /*Skip partially coded super blocks; their flags have already been coded.*/ + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); + flag=sb_flags[sbi].coded_fully; + oggpackB_write(&_enc->opb,flag,1); + do{ + unsigned run_count; + for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); + flag=!flag; + } + while(sbistate.nsbs)oc_enc_coded_sb_flags_pack(_enc); + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + nsbs=_enc->state.nsbs; + frags=_enc->state.frags; + for(sbi=0;sbiopb,flag,1); + run_count=0; + nsbs=sbi=0; + for(pli=0;pli<3;pli++){ + nsbs+=_enc->state.fplanes[pli].nsbs; + for(;sbi=0){ + if(frags[fragi].coded!=flag){ + oc_block_run_pack(&_enc->opb,run_count); + flag=!flag; + run_count=1; + } + else run_count++; + } + } + } + } + } + } + /*Flush any trailing block coded run.*/ + if(run_count>0)oc_block_run_pack(&_enc->opb,run_count); + } +} + +static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){ + const unsigned char *mode_codes; + const unsigned char *mode_bits; + const unsigned char *mode_ranks; + unsigned *coded_mbis; + size_t ncoded_mbis; + const signed char *mb_modes; + unsigned mbii; + int scheme; + int mb_mode; + scheme=_enc->chooser.scheme_list[0]; + /*Encode the best scheme.*/ + oggpackB_write(&_enc->opb,scheme,3); + /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/ + if(scheme==0){ + for(mb_mode=0;mb_modeopb,_enc->chooser.scheme0_ranks[mb_mode],3); + } + } + mode_ranks=_enc->chooser.mode_ranks[scheme]; + mode_bits=OC_MODE_BITS[scheme+1>>3]; + mode_codes=OC_MODE_CODES[scheme+1>>3]; + coded_mbis=_enc->coded_mbis; + ncoded_mbis=_enc->ncoded_mbis; + mb_modes=_enc->state.mb_modes; + for(mbii=0;mbiiopb,mode_codes[rank],mode_bits[rank]); + } +} + +static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,int _dx,int _dy){ + oggpackB_write(&_enc->opb, + OC_MV_CODES[_mv_scheme][_dx+31],OC_MV_BITS[_mv_scheme][_dx+31]); + oggpackB_write(&_enc->opb, + OC_MV_CODES[_mv_scheme][_dy+31],OC_MV_BITS[_mv_scheme][_dy+31]); +} + +static void oc_enc_mvs_pack(oc_enc_ctx *_enc){ + const unsigned *coded_mbis; + size_t ncoded_mbis; + const oc_mb_map *mb_maps; + const signed char *mb_modes; + const oc_fragment *frags; + const oc_mv *frag_mvs; + unsigned mbii; + int mv_scheme; + /*Choose the coding scheme.*/ + mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0]; + oggpackB_write(&_enc->opb,mv_scheme,1); + /*Encode the motion vectors. + Macro blocks are iterated in Hilbert scan order, but the MVs within the + macro block are coded in raster order.*/ + coded_mbis=_enc->coded_mbis; + ncoded_mbis=_enc->ncoded_mbis; + mb_modes=_enc->state.mb_modes; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + frags=_enc->state.frags; + frag_mvs=(const oc_mv *)_enc->state.frag_mvs; + for(mbii=0;mbiistate.nqis<=1)return; + ncoded_fragis=_enc->state.ntotal_coded_fragis; + if(ncoded_fragis<=0)return; + coded_fragis=_enc->state.coded_fragis; + frags=_enc->state.frags; + flag=!!frags[coded_fragis[0]].qii; + oggpackB_write(&_enc->opb,flag,1); + nqi0=0; + for(fragii=0;fragiiopb,run_count,flag,fragii>=ncoded_fragis); + flag=!flag; + } + if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return; + for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++); + flag=frags[coded_fragis[fragii]].qii-1; + oggpackB_write(&_enc->opb,flag,1); + while(fragiiopb,run_count,flag,fragii>=ncoded_fragis); + flag=!flag; + } +} + +/*Counts the tokens of each type used for the given range of coefficient + indices in zig-zag order. + _zzi_start: The first zig-zag index to include. + _zzi_end: The first zig-zag index to not include. + _token_counts_y: Returns the token counts for the Y' plane. + _token_counts_c: Returns the token counts for the Cb and Cr planes.*/ +static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end, + ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){ + const unsigned char *dct_tokens; + ptrdiff_t ndct_tokens; + int pli; + int zzi; + ptrdiff_t ti; + memset(_token_counts_y,0,32*sizeof(*_token_counts_y)); + memset(_token_counts_c,0,32*sizeof(*_token_counts_c)); + for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ + dct_tokens=_enc->dct_tokens[0][zzi]; + ndct_tokens=_enc->ndct_tokens[0][zzi]; + for(ti=_enc->dct_token_offs[0][zzi];tidct_tokens[pli][zzi]; + ndct_tokens=_enc->ndct_tokens[pli][zzi]; + for(ti=_enc->dct_token_offs[pli][zzi];tihuff_codes[huffi+huff_offs][token].nbits; + } + } +} + +/*Returns the Huffman index using the fewest number of bits.*/ +static int oc_select_huff_idx(size_t _bit_counts[16]){ + int best_huffi; + int huffi; + best_huffi=0; + for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){ + best_huffi=huffi; + } + return best_huffi; +} + +static void oc_enc_huff_group_pack(oc_enc_ctx *_enc, + int _zzi_start,int _zzi_end,const int _huff_idxs[2]){ + int zzi; + for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ + int pli; + for(pli=0;pli<3;pli++){ + const unsigned char *dct_tokens; + const ogg_uint16_t *extra_bits; + ptrdiff_t ndct_tokens; + const th_huff_code *huff_codes; + ptrdiff_t ti; + dct_tokens=_enc->dct_tokens[pli][zzi]; + extra_bits=_enc->extra_bits[pli][zzi]; + ndct_tokens=_enc->ndct_tokens[pli][zzi]; + huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]]; + for(ti=_enc->dct_token_offs[pli][zzi];tiopb,huff_codes[token].pattern, + huff_codes[token].nbits); + neb=OC_DCT_TOKEN_EXTRA_BITS[token]; + if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb); + } + } + } +} + +static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){ + static const unsigned char OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64}; + static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1; + ptrdiff_t token_counts_y[32]; + ptrdiff_t token_counts_c[32]; + size_t bits_y[16]; + size_t bits_c[16]; + int huff_idxs[2]; + int frame_type; + int hgi; + frame_type=_enc->state.frame_type; + /*Choose which Huffman tables to use for the DC token list.*/ + oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c); + memset(bits_y,0,sizeof(bits_y)); + memset(bits_c,0,sizeof(bits_c)); + oc_enc_count_bits(_enc,0,token_counts_y,bits_y); + oc_enc_count_bits(_enc,0,token_counts_c,bits_c); + huff_idxs[0]=oc_select_huff_idx(bits_y); + huff_idxs[1]=oc_select_huff_idx(bits_c); + /*Write the DC token list with the chosen tables.*/ + oggpackB_write(&_enc->opb,huff_idxs[0],4); + oggpackB_write(&_enc->opb,huff_idxs[1],4); + _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0]; + _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1]; + oc_enc_huff_group_pack(_enc,0,1,huff_idxs); + /*Choose which Huffman tables to use for the AC token lists.*/ + memset(bits_y,0,sizeof(bits_y)); + memset(bits_c,0,sizeof(bits_c)); + for(hgi=1;hgi<5;hgi++){ + oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi], + token_counts_y,token_counts_c); + oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y); + oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c); + } + huff_idxs[0]=oc_select_huff_idx(bits_y); + huff_idxs[1]=oc_select_huff_idx(bits_c); + /*Write the AC token lists using the chosen tables.*/ + oggpackB_write(&_enc->opb,huff_idxs[0],4); + oggpackB_write(&_enc->opb,huff_idxs[1],4); + _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0]; + _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1]; + for(hgi=1;hgi<5;hgi++){ + huff_idxs[0]+=16; + huff_idxs[1]+=16; + oc_enc_huff_group_pack(_enc, + OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs); + } +} + +static void oc_enc_frame_pack(oc_enc_ctx *_enc){ + oggpackB_reset(&_enc->opb); + /*Only proceed if we have some coded blocks. + If there are no coded blocks, we can drop this frame simply by emitting a + 0 byte packet.*/ + if(_enc->state.ntotal_coded_fragis>0){ + oc_enc_frame_header_pack(_enc); + if(_enc->state.frame_type==OC_INTER_FRAME){ + /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/ + oc_enc_coded_flags_pack(_enc); + oc_enc_mb_modes_pack(_enc); + oc_enc_mvs_pack(_enc); + } + oc_enc_block_qis_pack(_enc); + oc_enc_tokenize_finish(_enc); + oc_enc_residual_tokens_pack(_enc); + } + /*Success: Mark the packet as ready to be flushed.*/ + _enc->packet_state=OC_PACKET_READY; +#if defined(OC_COLLECT_METRICS) + oc_enc_mode_metrics_collect(_enc); +#endif +} + + +void oc_enc_vtable_init_c(oc_enc_ctx *_enc){ + /*The implementations prefixed with oc_enc_ are encoder-specific. + The rest we re-use from the decoder.*/ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c; + _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c; +} + +/*Initialize the macro block neighbor lists for MC analysis. + This assumes that the entire mb_info memory region has been initialized with + zeros.*/ +static void oc_enc_mb_info_init(oc_enc_ctx *_enc){ + oc_mb_enc_info *embs; + const signed char *mb_modes; + unsigned nhsbs; + unsigned nvsbs; + unsigned nhmbs; + unsigned nvmbs; + unsigned sby; + mb_modes=_enc->state.mb_modes; + embs=_enc->mb_info; + nhsbs=_enc->state.fplanes[0].nhsbs; + nvsbs=_enc->state.fplanes[0].nvsbs; + nhmbs=_enc->state.nhmbs; + nvmbs=_enc->state.nvmbs; + for(sby=0;sby>1); + mby=2*sby+(quadi+1>>1&1); + /*Fill in the neighbors with current motion vectors available.*/ + for(ni=0;ni=nhmbs||nmby<0||nmby>=nvmbs)continue; + nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; + if(mb_modes[nmbi]==OC_MODE_INVALID)continue; + embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi; + } + /*Fill in the neighbors with previous motion vectors available.*/ + for(ni=0;ni<4;ni++){ + nmbx=mbx+PDX[ni]; + nmby=mby+PDY[ni]; + if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue; + nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; + if(mb_modes[nmbi]==OC_MODE_INVALID)continue; + embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi; + } + } + } + } +} + +static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ + int ret; + if(_enc==NULL)return TH_EFAULT; + if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; + if(_codes==NULL)_codes=TH_VP31_HUFF_CODES; + /*Validate the codes.*/ + oggpackB_reset(&_enc->opb); + ret=oc_huff_codes_pack(&_enc->opb,_codes); + if(ret<0)return ret; + memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes)); + return 0; +} + +/*Sets the quantization parameters to use. + This may only be called before the setup header is written. + If it is called multiple times, only the last call has any effect. + _qinfo: The quantization parameters. + These are described in more detail in theoraenc.h. + This can be NULL, in which case the default quantization parameters + will be used.*/ +static int oc_enc_set_quant_params(oc_enc_ctx *_enc, + const th_quant_info *_qinfo){ + int qi; + int pli; + int qti; + if(_enc==NULL)return TH_EFAULT; + if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; + if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO; + /*TODO: Analyze for packing purposes instead of just doing a shallow copy.*/ + memcpy(&_enc->qinfo,_qinfo,sizeof(_enc->qinfo)); + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ + _enc->state.dequant_tables[qi][pli][qti]= + _enc->state.dequant_table_data[qi][pli][qti]; + _enc->enquant_tables[qi][pli][qti]=_enc->enquant_table_data[qi][pli][qti]; + } + oc_enquant_tables_init(_enc->state.dequant_tables, + _enc->enquant_tables,_qinfo); + memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits, + sizeof(_enc->state.loop_filter_limits)); + oc_enquant_qavg_init(_enc->log_qavg,_enc->state.dequant_tables, + _enc->state.info.pixel_fmt); + return 0; +} + +static void oc_enc_clear(oc_enc_ctx *_enc); + +static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){ + th_info info; + size_t mcu_nmbs; + ptrdiff_t mcu_nfrags; + int hdec; + int vdec; + int ret; + int pli; + /*Clean up the requested settings.*/ + memcpy(&info,_info,sizeof(info)); + info.version_major=TH_VERSION_MAJOR; + info.version_minor=TH_VERSION_MINOR; + info.version_subminor=TH_VERSION_SUB; + if(info.quality>63)info.quality=63; + if(info.quality<0)info.quality=32; + if(info.target_bitrate<0)info.target_bitrate=0; + /*Initialize the shared encoder/decoder state.*/ + ret=oc_state_init(&_enc->state,&info,4); + if(ret<0)return ret; + _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info)); + _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc)); + _enc->coded_mbis= + (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis)); + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + /*If chroma is sub-sampled in the vertical direction, we have to encode two + super block rows of Y' for each super block row of Cb and Cr.*/ + _enc->mcu_nvsbs=1<mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4; + mcu_nfrags=4*mcu_nmbs+(8*mcu_nmbs>>hdec+vdec); + _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc( + mcu_nfrags*sizeof(*_enc->mcu_skip_ssd)); + for(pli=0;pli<3;pli++){ + _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64, + _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens)); + _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64, + _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits)); + } +#if defined(OC_COLLECT_METRICS) + _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd)); + _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd)); +#endif +#if defined(OC_X86_ASM) + oc_enc_vtable_init_x86(_enc); +#else + oc_enc_vtable_init_c(_enc); +#endif + _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift; + _enc->state.qis[0]=_enc->state.info.quality; + _enc->state.nqis=1; + oc_rc_state_init(&_enc->rc,_enc); + oggpackB_writeinit(&_enc->opb); + if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL|| + _enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL|| + _enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL|| + _enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL|| + _enc->extra_bits[2]==NULL +#if defined(OC_COLLECT_METRICS) + ||_enc->frag_satd==NULL||_enc->frag_ssd==NULL +#endif + ){ + oc_enc_clear(_enc); + return TH_EFAULT; + } + oc_mode_scheme_chooser_init(&_enc->chooser); + oc_enc_mb_info_init(_enc); + memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs)); + /*Reset the packet-out state machine.*/ + _enc->packet_state=OC_PACKET_INFO_HDR; + _enc->dup_count=0; + _enc->nqueued_dups=0; + _enc->prev_dup_count=0; + /*Enable speed optimizations up through early skip by default.*/ + _enc->sp_level=OC_SP_LEVEL_EARLY_SKIP; + /*Disable VP3 compatibility by default.*/ + _enc->vp3_compatible=0; + /*No INTER frames coded yet.*/ + _enc->coded_inter_frame=0; + memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes)); + oc_enc_set_quant_params(_enc,NULL); + return 0; +} + +static void oc_enc_clear(oc_enc_ctx *_enc){ + int pli; + oc_rc_state_clear(&_enc->rc); +#if defined(OC_COLLECT_METRICS) + oc_enc_mode_metrics_dump(_enc); +#endif + oggpackB_writeclear(&_enc->opb); +#if defined(OC_COLLECT_METRICS) + _ogg_free(_enc->frag_ssd); + _ogg_free(_enc->frag_satd); +#endif + for(pli=3;pli-->0;){ + oc_free_2d(_enc->extra_bits[pli]); + oc_free_2d(_enc->dct_tokens[pli]); + } + _ogg_free(_enc->mcu_skip_ssd); + _ogg_free(_enc->coded_mbis); + _ogg_free(_enc->frag_dc); + _ogg_free(_enc->mb_info); + oc_state_clear(&_enc->state); +} + +static void oc_enc_drop_frame(th_enc_ctx *_enc){ + /*Use the previous frame's reconstruction.*/ + _enc->state.ref_frame_idx[OC_FRAME_SELF]= + _enc->state.ref_frame_idx[OC_FRAME_PREV]; + /*Flag motion vector analysis about the frame drop.*/ + _enc->prevframe_dropped=1; + /*Zero the packet.*/ + oggpackB_reset(&_enc->opb); +} + +static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){ + if(_enc->state.info.target_bitrate>0){ + _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME, + _enc->state.curframe_num>0); + _enc->state.nqis=1; + } + oc_enc_calc_lambda(_enc,OC_INTRA_FRAME); + oc_enc_analyze_intra(_enc,_recode); + oc_enc_frame_pack(_enc); + /*On the first frame, the previous call was an initial dry-run to prime + feed-forward statistics.*/ + if(!_recode&&_enc->state.curframe_num==0){ + if(_enc->state.info.target_bitrate>0){ + oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + OC_INTRA_FRAME,_enc->state.qis[0],1,0); + } + oc_enc_compress_keyframe(_enc,1); + } +} + +static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){ + if(_enc->state.info.target_bitrate>0){ + _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1); + _enc->state.nqis=1; + } + oc_enc_calc_lambda(_enc,OC_INTER_FRAME); + if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){ + /*Mode analysis thinks this should have been a keyframe; start over.*/ + oc_enc_compress_keyframe(_enc,1); + } + else{ + oc_enc_frame_pack(_enc); + if(!_enc->coded_inter_frame){ + /*On the first INTER frame, the previous call was an initial dry-run to + prime feed-forward statistics.*/ + _enc->coded_inter_frame=1; + if(_enc->state.info.target_bitrate>0){ + /*Rate control also needs to prime.*/ + oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + OC_INTER_FRAME,_enc->state.qis[0],1,0); + } + oc_enc_compress_frame(_enc,1); + } + } +} + +/*Set the granule position for the next packet to output based on the current + internal state.*/ +static void oc_enc_set_granpos(oc_enc_ctx *_enc){ + unsigned dup_offs; + /*Add an offset for the number of duplicate frames we've emitted so far.*/ + dup_offs=_enc->prev_dup_count-_enc->nqueued_dups; + /*If the current frame was a keyframe, use it for the high part.*/ + if(_enc->state.frame_type==OC_INTRA_FRAME){ + _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<< + _enc->state.info.keyframe_granule_shift)+dup_offs; + } + /*Otherwise use the last keyframe in the high part and put the current frame + in the low part.*/ + else{ + _enc->state.granpos= + (_enc->state.keyframe_num+_enc->state.granpos_bias<< + _enc->state.info.keyframe_granule_shift) + +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs; + } +} + + +th_enc_ctx *th_encode_alloc(const th_info *_info){ + oc_enc_ctx *enc; + if(_info==NULL)return NULL; + enc=_ogg_malloc(sizeof(*enc)); + if(enc==NULL||oc_enc_init(enc,_info)<0){ + _ogg_free(enc); + return NULL; + } + return enc; +} + +void th_encode_free(th_enc_ctx *_enc){ + if(_enc!=NULL){ + oc_enc_clear(_enc); + _ogg_free(_enc); + } +} + +int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){ + switch(_req){ + case TH_ENCCTL_SET_HUFFMAN_CODES:{ + if(_buf==NULL&&_buf_sz!=0|| + _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){ + return TH_EINVAL; + } + return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf); + }break; + case TH_ENCCTL_SET_QUANT_PARAMS:{ + if(_buf==NULL&&_buf_sz!=0|| + _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){ + return TH_EINVAL; + } + return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf); + }break; + case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{ + ogg_uint32_t keyframe_frequency_force; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL; + keyframe_frequency_force=*(ogg_uint32_t *)_buf; + if(keyframe_frequency_force<=0)keyframe_frequency_force=1; + if(_enc->packet_state==OC_PACKET_INFO_HDR){ + /*It's still early enough to enlarge keyframe_granule_shift.*/ + _enc->state.info.keyframe_granule_shift=OC_CLAMPI( + _enc->state.info.keyframe_granule_shift, + OC_ILOG_32(keyframe_frequency_force-1),31); + } + _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force, + (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift); + *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force; + return 0; + }break; + case TH_ENCCTL_SET_VP3_COMPATIBLE:{ + int vp3_compatible; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL; + vp3_compatible=*(int *)_buf; + _enc->vp3_compatible=vp3_compatible; + if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0; + if(oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO)<0)vp3_compatible=0; + if(_enc->state.info.pixel_fmt!=TH_PF_420|| + _enc->state.info.pic_width<_enc->state.info.frame_width|| + _enc->state.info.pic_height<_enc->state.info.frame_height|| + /*If we have more than 4095 super blocks, VP3's RLE coding might + overflow. + We could overcome this by ensuring we flip the coded/not-coded flags on + at least one super block in the frame, but we pick the simple solution + of just telling the user the stream will be incompatible instead. + It's unlikely the old VP3 codec would be able to decode streams at this + resolution in real time in the first place.*/ + _enc->state.nsbs>4095){ + vp3_compatible=0; + } + *(int *)_buf=vp3_compatible; + return 0; + }break; + case TH_ENCCTL_GET_SPLEVEL_MAX:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + *(int *)_buf=OC_SP_LEVEL_MAX; + return 0; + }break; + case TH_ENCCTL_SET_SPLEVEL:{ + int speed; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(speed))return TH_EINVAL; + speed=*(int *)_buf; + if(speed<0||speed>OC_SP_LEVEL_MAX)return TH_EINVAL; + _enc->sp_level=speed; + return 0; + }break; + case TH_ENCCTL_GET_SPLEVEL:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + *(int *)_buf=_enc->sp_level; + return 0; + } + case TH_ENCCTL_SET_DUP_COUNT:{ + int dup_count; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(dup_count))return TH_EINVAL; + dup_count=*(int *)_buf; + if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL; + _enc->dup_count=OC_MAXI(dup_count,0); + return 0; + }break; + case TH_ENCCTL_SET_QUALITY:{ + int qi; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate>0)return TH_EINVAL; + qi=*(int *)_buf; + if(qi<0||qi>63)return TH_EINVAL; + _enc->state.info.quality=qi; + _enc->state.qis[0]=(unsigned char)qi; + _enc->state.nqis=1; + return 0; + }break; + case TH_ENCCTL_SET_BITRATE:{ + long bitrate; + int reset; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + bitrate=*(long *)_buf; + if(bitrate<=0)return TH_EINVAL; + reset=_enc->state.info.target_bitrate<=0; + _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate; + if(reset)oc_rc_state_init(&_enc->rc,_enc); + else oc_enc_rc_resize(_enc); + return 0; + }break; + case TH_ENCCTL_SET_RATE_FLAGS:{ + int set; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(set))return TH_EINVAL; + if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; + set=*(int *)_buf; + _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES; + _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW; + _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW; + return 0; + }break; + case TH_ENCCTL_SET_RATE_BUFFER:{ + int set; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(set))return TH_EINVAL; + if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; + set=*(int *)_buf; + _enc->rc.buf_delay=set; + oc_enc_rc_resize(_enc); + *(int *)_buf=_enc->rc.buf_delay; + return 0; + }break; + case TH_ENCCTL_2PASS_OUT:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate<=0|| + _enc->state.curframe_num>=0&&_enc->rc.twopass!=1|| + _buf_sz!=sizeof(unsigned char *)){ + return TH_EINVAL; + } + return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf); + }break; + case TH_ENCCTL_2PASS_IN:{ + if(_enc==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate<=0|| + _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){ + return TH_EINVAL; + } + return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz); + }break; + default:return TH_EIMPL; + } +} + +int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){ + if(_enc==NULL)return TH_EFAULT; + return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb, + &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(), + _tc,_op); +} + +static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src, + ogg_int32_t _pic_x,ogg_int32_t _pic_y, + ogg_int32_t _pic_width,ogg_int32_t _pic_height){ + unsigned char *dst; + int dstride; + ogg_uint32_t frame_width; + ogg_uint32_t frame_height; + ogg_uint32_t y; + frame_width=_dst->width; + frame_height=_dst->height; + /*If we have _no_ data, just encode a dull green.*/ + if(_pic_width==0||_pic_height==0){ + dst=_dst->data; + dstride=_dst->stride; + for(y=0;ystride; + sstride=_src->stride; + dst_data=_dst->data; + src_data=_src->data; + dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x; + src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x; + for(y=0;y<_pic_height;y++){ + memcpy(dst,src,_pic_width); + dst+=dstride; + src+=sstride; + } + /*Step 2: Perform a low-pass extension into the padding region.*/ + /*Left side.*/ + for(x=_pic_x;x-->0;){ + dst=dst_data+_pic_y*(ptrdiff_t)dstride+x; + for(y=0;y<_pic_height;y++){ + dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1] + +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2; + dst+=dstride; + } + } + /*Right side.*/ + for(x=_pic_x+_pic_width;x0)))[0] + +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2; + dst+=dstride; + } + } + /*Top.*/ + dst=dst_data+_pic_y*(ptrdiff_t)dstride; + for(y=_pic_y;y-->0;){ + for(x=0;x0)] + +dst[x+(x+1>2; + } + dst-=dstride; + } + /*Bottom.*/ + dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride; + for(y=_pic_y+_pic_height;y0)] + +(dst-dstride)[x+(x+1>2; + } + dst+=dstride; + } + } +} + +int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){ + th_ycbcr_buffer img; + int cframe_width; + int cframe_height; + int cpic_width; + int cpic_height; + int cpic_x; + int cpic_y; + int hdec; + int vdec; + int pli; + int refi; + int drop; + /*Step 1: validate parameters.*/ + if(_enc==NULL||_img==NULL)return TH_EFAULT; + if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL; + if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL; + if((ogg_uint32_t)_img[0].width!=_enc->state.info.frame_width|| + (ogg_uint32_t)_img[0].height!=_enc->state.info.frame_height){ + return TH_EINVAL; + } + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + cframe_width=_enc->state.info.frame_width>>hdec; + cframe_height=_enc->state.info.frame_height>>vdec; + if(_img[1].width!=cframe_width||_img[2].width!=cframe_width|| + _img[1].height!=cframe_height||_img[2].height!=cframe_height){ + return TH_EINVAL; + } + /*Step 2: Copy the input to our internal buffer. + This lets us add padding, if necessary, so we don't have to worry about + dereferencing possibly invalid addresses, and allows us to use the same + strides and fragment offsets for both the input frame and the reference + frames.*/ + /*Flip the input buffer upside down.*/ + oc_ycbcr_buffer_flip(img,_img); + oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+0,img+0, + _enc->state.info.pic_x,_enc->state.info.pic_y, + _enc->state.info.pic_width,_enc->state.info.pic_height); + cpic_x=_enc->state.info.pic_x>>hdec; + cpic_y=_enc->state.info.pic_y>>vdec; + cpic_width=(_enc->state.info.pic_x+_enc->state.info.pic_width+hdec>>hdec) + -cpic_x; + cpic_height=(_enc->state.info.pic_y+_enc->state.info.pic_height+vdec>>vdec) + -cpic_y; + for(pli=1;pli<3;pli++){ + oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+pli,img+pli, + cpic_x,cpic_y,cpic_width,cpic_height); + } + /*Step 3: Update the buffer state.*/ + if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){ + _enc->state.ref_frame_idx[OC_FRAME_PREV]= + _enc->state.ref_frame_idx[OC_FRAME_SELF]; + if(_enc->state.frame_type==OC_INTRA_FRAME){ + /*The new frame becomes both the previous and gold reference frames.*/ + _enc->state.keyframe_num=_enc->state.curframe_num; + _enc->state.ref_frame_idx[OC_FRAME_GOLD]= + _enc->state.ref_frame_idx[OC_FRAME_SELF]; + } + } + /*Select a free buffer to use for the reconstructed version of this frame.*/ + for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]|| + refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++); + _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi; + _enc->state.curframe_num+=_enc->prev_dup_count+1; + /*Step 4: Compress the frame.*/ + /*Start with a keyframe, and don't allow the generation of invalid files that + overflow the keyframe_granule_shift.*/ + if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0|| + _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>= + _enc->keyframe_frequency_force){ + oc_enc_compress_keyframe(_enc,0); + drop=0; + } + else{ + oc_enc_compress_frame(_enc,0); + drop=1; + } + oc_restore_fpu(&_enc->state); + /*drop currently indicates if the frame is droppable.*/ + if(_enc->state.info.target_bitrate>0){ + drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + _enc->state.frame_type,_enc->state.qis[0],0,drop); + } + else drop=0; + /*drop now indicates if the frame was dropped.*/ + if(drop)oc_enc_drop_frame(_enc); + else _enc->prevframe_dropped=0; + _enc->packet_state=OC_PACKET_READY; + _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count; + _enc->dup_count=0; +#if defined(OC_DUMP_IMAGES) + oc_enc_set_granpos(_enc); + oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src"); + oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec"); +#endif + return 0; +} + +int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){ + if(_enc==NULL||_op==NULL)return TH_EFAULT; + if(_enc->packet_state==OC_PACKET_READY){ + _enc->packet_state=OC_PACKET_EMPTY; + if(_enc->rc.twopass!=1){ + unsigned char *packet; + packet=oggpackB_get_buffer(&_enc->opb); + /*If there's no packet, malloc failed while writing; it's lost forever.*/ + if(packet==NULL)return TH_EFAULT; + _op->packet=packet; + _op->bytes=oggpackB_bytes(&_enc->opb); + } + /*For the first pass in 2-pass mode, don't emit any packet data.*/ + else{ + _op->packet=NULL; + _op->bytes=0; + } + } + else if(_enc->packet_state==OC_PACKET_EMPTY){ + if(_enc->nqueued_dups>0){ + _enc->nqueued_dups--; + _op->packet=NULL; + _op->bytes=0; + } + else{ + if(_last_p)_enc->packet_state=OC_PACKET_DONE; + return 0; + } + } + else return 0; + _last_p=_last_p&&_enc->nqueued_dups<=0; + _op->b_o_s=0; + _op->e_o_s=_last_p; + oc_enc_set_granpos(_enc); + _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3; + _op->granulepos=_enc->state.granpos; + if(_last_p)_enc->packet_state=OC_PACKET_DONE; + return 1+_enc->nqueued_dups; +} diff --git a/Engine/lib/libtheora/lib/enquant.c b/Engine/lib/libtheora/lib/enquant.c new file mode 100644 index 000000000..3372fed22 --- /dev/null +++ b/Engine/lib/libtheora/lib/enquant.c @@ -0,0 +1,274 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + + +void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ + const th_quant_ranges *qranges; + const th_quant_base *base_mats[2*3*64]; + int indices[2][3][64]; + int nbase_mats; + int nbits; + int ci; + int qi; + int qri; + int qti; + int pli; + int qtj; + int plj; + int bmi; + int i; + i=_qinfo->loop_filter_limits[0]; + for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); + nbits=OC_ILOG_32(i); + oggpackB_write(_opb,nbits,3); + for(qi=0;qi<64;qi++){ + oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); + } + /*580 bits for VP3.*/ + i=1; + for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i); + nbits=OC_ILOGNZ_32(i); + oggpackB_write(_opb,nbits-1,4); + for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); + /*516 bits for VP3.*/ + i=1; + for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i); + nbits=OC_ILOGNZ_32(i); + oggpackB_write(_opb,nbits-1,4); + for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); + /*Consolidate any duplicate base matrices.*/ + nbase_mats=0; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + qranges=_qinfo->qi_ranges[qti]+pli; + for(qri=0;qri<=qranges->nranges;qri++){ + for(bmi=0;;bmi++){ + if(bmi>=nbase_mats){ + base_mats[bmi]=qranges->base_matrices+qri; + indices[qti][pli][qri]=nbase_mats++; + break; + } + else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], + sizeof(base_mats[bmi][0]))==0){ + indices[qti][pli][qri]=bmi; + break; + } + } + } + } + /*Write out the list of unique base matrices. + 1545 bits for VP3 matrices.*/ + oggpackB_write(_opb,nbase_mats-1,9); + for(bmi=0;bmiqi_ranges[qti]+pli; + if(i>0){ + if(qti>0){ + if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& + memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, + qranges->nranges*sizeof(qranges->sizes[0]))==0&& + memcmp(indices[qti][pli],indices[qti-1][pli], + (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ + oggpackB_write(_opb,1,2); + continue; + } + } + qtj=(i-1)/3; + plj=(i-1)%3; + if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& + memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, + qranges->nranges*sizeof(qranges->sizes[0]))==0&& + memcmp(indices[qti][pli],indices[qtj][plj], + (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ + oggpackB_write(_opb,0,1+(qti>0)); + continue; + } + oggpackB_write(_opb,1,1); + } + oggpackB_write(_opb,indices[qti][pli][0],nbits); + for(qi=qri=0;qi<63;qri++){ + oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi)); + qi+=qranges->sizes[qri]; + oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); + } + } +} + +static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ + ogg_uint32_t t; + int l; + _d<<=1; + l=OC_ILOGNZ_32(_d)-1; + t=1+((ogg_uint32_t)1<<16+l)/_d; + _this->m=(ogg_int16_t)(t-0x10000); + _this->l=l; +} + +/*See comments at oc_dequant_tables_init() for how the quantization tables' + storage should be initialized.*/ +void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], + oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){ + int qi; + int pli; + int qti; + /*Initialize the dequantization tables first.*/ + oc_dequant_tables_init(_dequant,NULL,_qinfo); + /*Derive the quantization tables directly from the dequantization tables.*/ + for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + int zzi; + int plj; + int qtj; + int dupe; + dupe=0; + for(qtj=0;qtj<=qti;qtj++){ + for(plj=0;plj<(qtj>1))/qd; + qp+=rq*(ogg_uint32_t)rq; + } + q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp; + } + /*qavg=1.0/sqrt(q2).*/ + _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; + } +} diff --git a/Engine/lib/libtheora/lib/enquant.h b/Engine/lib/libtheora/lib/enquant.h new file mode 100644 index 000000000..d62df10d1 --- /dev/null +++ b/Engine/lib/libtheora/lib/enquant.h @@ -0,0 +1,27 @@ +#if !defined(_enquant_H) +# define _enquant_H (1) +# include "quant.h" + +typedef struct oc_iquant oc_iquant; + +#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1)) + +/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0)) + (i.e., one 16x16->16 mul, 2 shifts, and 2 adds). + This is not an approximation; for 16-bit x and d, it is exact.*/ +struct oc_iquant{ + ogg_int16_t m; + ogg_int16_t l; +}; + +typedef oc_iquant oc_iquant_table[64]; + + + +void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo); +void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], + oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo); +void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], + ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt); + +#endif diff --git a/Engine/lib/libtheora/lib/fdct.c b/Engine/lib/libtheora/lib/fdct.c new file mode 100644 index 000000000..dc3a66f24 --- /dev/null +++ b/Engine/lib/libtheora/lib/fdct.c @@ -0,0 +1,422 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: fdct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include "encint.h" +#include "dct.h" + + + +/*Performs a forward 8 point Type-II DCT transform. + The output is scaled by a factor of 2 from the orthonormal version of the + transform. + _y: The buffer to store the result in. + Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). + _x: The input coefficients. + Every 8th entry is used (e.g., from a column of an 8x8 block).*/ +static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + int s; + int u; + int v; + /*Stage 1:*/ + /*0-7 butterfly.*/ + t0=_x[0<<3]+(int)_x[7<<3]; + t7=_x[0<<3]-(int)_x[7<<3]; + /*1-6 butterfly.*/ + t1=_x[1<<3]+(int)_x[6<<3]; + t6=_x[1<<3]-(int)_x[6<<3]; + /*2-5 butterfly.*/ + t2=_x[2<<3]+(int)_x[5<<3]; + t5=_x[2<<3]-(int)_x[5<<3]; + /*3-4 butterfly.*/ + t3=_x[3<<3]+(int)_x[4<<3]; + t4=_x[3<<3]-(int)_x[4<<3]; + /*Stage 2:*/ + /*0-3 butterfly.*/ + r=t0+t3; + t3=t0-t3; + t0=r; + /*1-2 butterfly.*/ + r=t1+t2; + t2=t1-t2; + t1=r; + /*6-5 butterfly.*/ + r=t6+t5; + t5=t6-t5; + t6=r; + /*Stages 3 and 4 are where all the approximation occurs. + These are chosen to be as close to an exact inverse of the approximations + made in the iDCT as possible, while still using mostly 16-bit arithmetic. + We use some 16x16->32 signed MACs, but those still commonly execute in 1 + cycle on a 16-bit DSP. + For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of + t5=(OC_C4S4*s>>16). + That is, applying the latter to the output of the former will recover t5 + exactly (over the valid input range of t5, -23171...23169). + We increase the rounding bias to 0xB500 in this particular case so that + errors inverting the subsequent butterfly are not one-sided (e.g., the + mean error is very close to zero). + The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0. + The fDCT of an all-zeros block will still not be zero, because of the + biases we added at the very beginning of the process, but it will be close + enough that it is guaranteed to round to zero.*/ + /*Stage 3:*/ + /*4-5 butterfly.*/ + s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1; + r=t4+s; + t5=t4-s; + t4=r; + /*7-6 butterfly.*/ + s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1; + r=t7+s; + t6=t7-s; + t7=r; + /*Stage 4:*/ + /*0-1 butterfly.*/ + r=(27146*t0+0x4000>>16)+t0+(t0!=0); + s=(27146*t1+0xB500>>16)+t1+(t1!=0); + u=r+s>>1; + v=r-u; + _y[0]=u; + _y[4]=v; + /*3-2 rotation by 6pi/16*/ + u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0); + s=(OC_C6S2*u>>16)-t2; + v=(s*21600+0x2800>>18)+s+(s!=0); + _y[2]=u; + _y[6]=v; + /*6-5 rotation by 3pi/16*/ + u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0); + s=t6-(OC_C5S3*u>>16); + v=(s*26568+0x3400>>17)+s+(s!=0); + _y[5]=u; + _y[3]=v; + /*7-4 rotation by 7pi/16*/ + u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0); + s=(OC_C7S1*u>>16)-t4; + v=(s*20539+0x3000>>20)+s+(s!=0); + _y[1]=u; + _y[7]=v; +} + +void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], + const ogg_int16_t _x[64]){ + (*_enc->opt_vtable.fdct8x8)(_y,_x); +} + +/*Performs a forward 8x8 Type-II DCT transform. + The output is scaled by a factor of 4 relative to the orthonormal version + of the transform. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients. */ +void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + int i; + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + for(i=0;i<64;i++)w[i]=_x[i]<<2; + /*These biases correct for some systematic error that remains in the full + fDCT->iDCT round trip.*/ + w[0]+=(w[0]!=0)+1; + w[1]++; + w[8]--; + /*Transform columns of w into rows of _y.*/ + for(in=w,out=_y,end=out+64;out>2; +} + + + +/*This does not seem to outperform simple LFE border padding before MC. + It yields higher PSNR, but much higher bitrate usage.*/ +#if 0 +typedef struct oc_extension_info oc_extension_info; + + + +/*Information needed to pad boundary blocks. + We multiply each row/column by an extension matrix that fills in the padding + values as a linear combination of the active values, so that an equivalent + number of coefficients are forced to zero. + This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as + little as 7 multiplies. + We compute the extension matrices for every possible shape in advance, as + there are only 35. + The coefficients for all matrices are stored in a single array to take + advantage of the overlap and repetitiveness of many of the shapes. + A similar technique is applied to the offsets into this array. + This reduces the required table storage by about 48%. + See tools/extgen.c for details. + We could conceivably do the same for all 256 possible shapes.*/ +struct oc_extension_info{ + /*The mask of the active pixels in the shape.*/ + short mask; + /*The number of active pixels in the shape.*/ + short na; + /*The extension matrix. + This is (8-na)xna*/ + const ogg_int16_t *const *ext; + /*The pixel indices: na active pixels followed by 8-na padding pixels.*/ + unsigned char pi[8]; + /*The coefficient indices: na unconstrained coefficients followed by 8-na + coefficients to be forced to zero.*/ + unsigned char ci[8]; +}; + + +/*The number of shapes we need.*/ +#define OC_NSHAPES (35) + +static const ogg_int16_t OC_EXT_COEFFS[229]={ + 0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF, + 0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000, + 0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000, + 0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345, + 0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59, + 0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59, + 0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225, + 0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C, + 0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882, + 0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768, + 0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185, + 0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59, + 0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC, + 0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C, + 0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7, + 0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C, + 0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF, + 0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C, + 0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D, + 0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04, + 0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546, + 0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587, + 0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4, + 0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC, + 0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB, + 0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08, + 0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12, + 0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A, + 0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC +}; + +static const ogg_int16_t *const OC_EXT_ROWS[96]={ + OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0, + OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 6, + OC_EXT_COEFFS+ 27,OC_EXT_COEFFS+ 38,OC_EXT_COEFFS+ 43,OC_EXT_COEFFS+ 32, + OC_EXT_COEFFS+ 49,OC_EXT_COEFFS+ 58,OC_EXT_COEFFS+ 67,OC_EXT_COEFFS+ 71, + OC_EXT_COEFFS+ 62,OC_EXT_COEFFS+ 53,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, + OC_EXT_COEFFS+ 14,OC_EXT_COEFFS+ 13,OC_EXT_COEFFS+ 76,OC_EXT_COEFFS+ 81, + OC_EXT_COEFFS+ 86,OC_EXT_COEFFS+ 91,OC_EXT_COEFFS+ 96,OC_EXT_COEFFS+ 98, + OC_EXT_COEFFS+ 93,OC_EXT_COEFFS+ 88,OC_EXT_COEFFS+ 83,OC_EXT_COEFFS+ 78, + OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12, + OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, + OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108, + OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+ 16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141, + OC_EXT_COEFFS+ 20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116, + OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167, + OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149, + OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177, + OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197, + OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215, + OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+ 24,OC_EXT_COEFFS+ 204, + OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199, + OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135, + OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 134, + OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223, + OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221 +}; + +static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={ + {0x7F,7,OC_EXT_ROWS+ 0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}}, + {0xFE,7,OC_EXT_ROWS+ 7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}}, + {0x3F,6,OC_EXT_ROWS+ 8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}}, + {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}}, + {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}}, + {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}}, + {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}}, + {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}}, + {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}}, + {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}}, + {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}}, + {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}}, + {0x01,1,OC_EXT_ROWS+ 0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}}, + {0x80,1,OC_EXT_ROWS+ 0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}}, + {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}}, + {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}}, + {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}}, + {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}}, + {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}}, + {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}}, + {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}}, + {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}}, + {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}}, + {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}}, + {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}}, + {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}}, + {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}}, + {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}}, + {0x40,1,OC_EXT_ROWS+ 0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x20,1,OC_EXT_ROWS+ 0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x10,1,OC_EXT_ROWS+ 0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x08,1,OC_EXT_ROWS+ 0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x04,1,OC_EXT_ROWS+ 0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}}, + {0x02,1,OC_EXT_ROWS+ 0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}} +}; + + + +/*Pads a single column of a partial block and then performs a forward Type-II + DCT on the result. + The input is scaled by a factor of 4 and biased appropriately for the current + fDCT implementation. + The output is scaled by an additional factor of 2 from the orthonormal + version of the transform. + _y: The buffer to store the result in. + Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). + _x: The input coefficients. + Every 8th entry is used (e.g., from a column of an 8x8 block). + _e: The extension information for the shape.*/ +static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x, + const oc_extension_info *_e){ + const unsigned char *pi; + int na; + na=_e->na; + pi=_e->pi; + if(na==1){ + int ci; + /*While the branch below is still correct for shapes with na==1, we can + perform the entire transform with just 1 multiply in this case instead + of 23.*/ + _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]]))); + for(ci=1;ci<8;ci++)_y[ci]=0; + } + else{ + const ogg_int16_t *const *ext; + int zpi; + int api; + int nz; + /*First multiply by the extension matrix to compute the padding values.*/ + nz=8-na; + ext=_e->ext; + for(zpi=0;zpi>16)+1>>1; + } + oc_fdct8(_y,_x); + } +} + +/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the + border of the picture region. + This method ONLY works with rectangular regions. + _border: A description of which pixels are inside the border. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input pixel values. + Pixel values outside the border will be ignored.*/ +void oc_fdct8x8_border(const oc_border_info *_border, + ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ogg_int16_t *in; + ogg_int16_t *out; + ogg_int16_t w[64]; + ogg_int64_t mask; + const oc_extension_info *cext; + const oc_extension_info *rext; + int cmask; + int rmask; + int ri; + int ci; + /*Identify the shapes of the non-zero rows and columns.*/ + rmask=cmask=0; + mask=_border->mask; + for(ri=0;ri<8;ri++){ + /*This aggregation is _only_ correct for rectangular masks.*/ + cmask|=((mask&0xFF)!=0)<>=8; + } + /*Find the associated extension info for these shapes.*/ + if(cmask==0xFF)cext=NULL; + else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){ + /*If we somehow can't find the shape, then just do an unpadded fDCT. + It won't be efficient, but it should still be correct.*/ + if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){ + oc_enc_fdct8x8_c(_y,_x); + return; + } + } + if(rmask==0xFF)rext=NULL; + else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){ + /*If we somehow can't find the shape, then just do an unpadded fDCT. + It won't be efficient, but it should still be correct.*/ + if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){ + oc_enc_fdct8x8_c(_y,_x); + return; + } + } + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2; + /*These biases correct for some systematic error that remains in the full + fDCT->iDCT round trip. + We can safely add them before padding, since if these pixel values are + overwritten, we didn't care what they were anyway (and the unbiased values + will usually yield smaller DCT coefficient magnitudes).*/ + w[0]+=(w[0]!=0)+1; + w[1]++; + w[8]--; + /*Transform the columns. + We can ignore zero columns without a problem.*/ + in=w; + out=_y; + if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci); + else for(ci=0;ci<8;ci++)if(rmask&(1<>2; +} +#endif diff --git a/Engine/lib/libtheora/lib/fragment.c b/Engine/lib/libtheora/lib/fragment.c new file mode 100644 index 000000000..15372e9d9 --- /dev/null +++ b/Engine/lib/libtheora/lib/fragment.c @@ -0,0 +1,87 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include "internal.h" + +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride){ + (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride); +} + +void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){ + int i; + for(i=8;i-->0;){ + memcpy(_dst,_src,8*sizeof(*_dst)); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst, + int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue); +} + +void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128); + _dst+=_ystride; + } +} + +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue); +} + +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride, + const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue); +} + +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1)); + _dst+=_ystride; + _src1+=_ystride; + _src2+=_ystride; + } +} + +void oc_restore_fpu(const oc_theora_state *_state){ + _state->opt_vtable.restore_fpu(); +} + +void oc_restore_fpu_c(void){} diff --git a/Engine/lib/libtheora/lib/huffdec.c b/Engine/lib/libtheora/lib/huffdec.c new file mode 100644 index 000000000..8cf27f034 --- /dev/null +++ b/Engine/lib/libtheora/lib/huffdec.c @@ -0,0 +1,489 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "huffdec.h" +#include "decint.h" + + +/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/ +#define _ogg_offsetof(_type,_field)\ + ((size_t)((char *)&((_type *)0)->_field-(char *)0)) + +/*The number of internal tokens associated with each of the spec tokens.*/ +static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={ + 1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8 +}; + +/*The map from external spec-defined tokens to internal tokens. + This is constructed so that any extra bits read with the original token value + can be masked off the least significant bits of its internal token index. + In addition, all of the tokens which require additional extra bits are placed + at the start of the list, and grouped by type. + OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so + giving it index 0 may simplify comparisons on some architectures. + These requirements require some substantial reordering.*/ +static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ + /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ + 15, + /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ + 16, + /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ + 17, + /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ + 88, + /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ + 80, + /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ + 1, + /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ + 0, + /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ + 48, + /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ + 14, + /*OC_ONE_TOKEN (0 extra bits)*/ + 56, + /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ + 57, + /*OC_TWO_TOKEN (0 extra bits)*/ + 58, + /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ + 59, + /*OC_DCT_VAL_CAT2 (1 extra bit)*/ + 60, + 62, + 64, + 66, + /*OC_DCT_VAL_CAT3 (2 extra bits)*/ + 68, + /*OC_DCT_VAL_CAT4 (3 extra bits)*/ + 72, + /*OC_DCT_VAL_CAT5 (4 extra bits)*/ + 2, + /*OC_DCT_VAL_CAT6 (5 extra bits)*/ + 4, + /*OC_DCT_VAL_CAT7 (6 extra bits)*/ + 6, + /*OC_DCT_VAL_CAT8 (10 extra bits)*/ + 8, + /*OC_DCT_RUN_CAT1A (1 extra bit)*/ + 18, + 20, + 22, + 24, + 26, + /*OC_DCT_RUN_CAT1B (3 extra bits)*/ + 32, + /*OC_DCT_RUN_CAT1C (4 extra bits)*/ + 12, + /*OC_DCT_RUN_CAT2A (2 extra bits)*/ + 28, + /*OC_DCT_RUN_CAT2B (3 extra bits)*/ + 40 +}; + +/*These three functions are really part of the bitpack.c module, but + they are only used here. + Declaring local static versions so they can be inlined saves considerable + function call overhead.*/ + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + /*This version of _refill() doesn't bother setting eof because we won't + check for it after we've started decoding DCT tokens.*/ + if(ptr>=stop)available=OC_LOTS_OF_BITS; + while(available<=OC_PB_WINDOW_SIZE-8){ + available+=8; + window|=(oc_pb_window)*ptr++<=stop)available=OC_LOTS_OF_BITS; + } + _b->ptr=ptr; + if(_bits>available)window|=*ptr>>(available&7); + _b->bits=available; + return window; +} + + +/*Read in bits without advancing the bit pointer. + Here we assume 0<=_bits&&_bits<=32.*/ +static long oc_pack_look(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits); + result=window>>OC_PB_WINDOW_SIZE-_bits; + return result; +} + +/*Advance the bit pointer.*/ +static void oc_pack_adv(oc_pack_buf *_b,int _bits){ + /*We ignore the special cases for _bits==0 and _bits==32 here, since they are + never used actually used. + OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read + 32 bits in a single go, and would require a 32 GB lookup table (assuming + 8 byte pointers, since 4 byte pointers couldn't fit such a table).*/ + _b->window<<=_bits; + _b->bits-=_bits; +} + + +/*The log_2 of the size of a lookup table is allowed to grow to relative to + the number of unique nodes it contains. + E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is + wasted (each node will have an amortized cost of at most 20 bytes when using + 4-byte pointers). + Larger numbers can decode tokens with fewer read operations, while smaller + numbers may save more space (requiring as little as 8 bytes amortized per + node, though there will be more nodes). + With a sample file: + 32233473 read calls are required when no tree collapsing is done (100.0%). + 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%). + 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%). + 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%). + 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%). + Since a value of 1 gets us the vast majority of the speed-up with only a + small amount of wasted memory, this is what we use.*/ +#define OC_HUFF_SLUSH (1) + + +/*Determines the size in bytes of a Huffman tree node that represents a + subtree of depth _nbits. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise 1<<_nbits pointers are allocated for children. + Return: The number of bytes required to store the node.*/ +static size_t oc_huff_node_size(int _nbits){ + size_t size; + size=_ogg_offsetof(oc_huff_node,nodes); + if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits); + return size; +} + +static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){ + oc_huff_node *ret; + ret=(oc_huff_node *)*_storage; + ret->nbits=(unsigned char)_nbits; + (*_storage)+=_size; + return ret; +} + + +/*Determines the size in bytes of a Huffman tree. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise storage for 1<<_nbits pointers are added for children. + Return: The number of bytes required to store the tree.*/ +static size_t oc_huff_tree_size(const oc_huff_node *_node){ + size_t size; + size=oc_huff_node_size(_node->nbits); + if(_node->nbits){ + int nchildren; + int i; + nchildren=1<<_node->nbits; + for(i=0;inbits-_node->nodes[i]->depth){ + size+=oc_huff_tree_size(_node->nodes[i]); + } + } + return size; +} + + +/*Unpacks a sub-tree from the given buffer. + _opb: The buffer to unpack from. + _binodes: The nodes to store the sub-tree in. + _nbinodes: The number of nodes available for the sub-tree. + Return: 0 on success, or a negative value on error.*/ +static int oc_huff_tree_unpack(oc_pack_buf *_opb, + oc_huff_node *_binodes,int _nbinodes){ + oc_huff_node *binode; + long bits; + int nused; + if(_nbinodes<1)return TH_EBADHEADER; + binode=_binodes; + nused=0; + bits=oc_pack_read1(_opb); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Read an internal node:*/ + if(!bits){ + int ret; + nused++; + binode->nbits=1; + binode->depth=1; + binode->nodes[0]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + if(ret>=0){ + nused+=ret; + binode->nodes[1]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + } + if(ret<0)return ret; + nused+=ret; + } + /*Read a leaf node:*/ + else{ + int ntokens; + int token; + int i; + bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Find out how many internal tokens we translate this external token into.*/ + ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits]; + if(_nbinodes<2*ntokens-1)return TH_EBADHEADER; + /*Fill in a complete binary tree pointing to the internal tokens.*/ + for(i=1;inbits=0; + binode->depth=1; + binode->token=token+i; + } + } + return nused; +} + +/*Finds the depth of shortest branch of the given sub-tree. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The smallest depth of a leaf node in this sub-tree. + 0 indicates this sub-tree is a leaf node.*/ +static int oc_huff_tree_mindepth(oc_huff_node *_binode){ + int depth0; + int depth1; + if(_binode->nbits==0)return 0; + depth0=oc_huff_tree_mindepth(_binode->nodes[0]); + depth1=oc_huff_tree_mindepth(_binode->nodes[1]); + return OC_MINI(depth0,depth1)+1; +} + +/*Finds the number of internal nodes at a given depth, plus the number of + leaves at that depth or shallower. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The number of entries that would be contained in a jump table of the + given depth.*/ +static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){ + if(_binode->nbits==0||_depth<=0)return 1; + else{ + return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+ + oc_huff_tree_occupancy(_binode->nodes[1],_depth-1); + } +} + +/*Makes a copy of the given Huffman tree. + _node: The Huffman tree to copy. + Return: The copy of the Huffman tree.*/ +static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node, + char **_storage){ + oc_huff_node *ret; + ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits); + ret->depth=_node->depth; + if(_node->nbits){ + int nchildren; + int i; + int inext; + nchildren=1<<_node->nbits; + for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage); + inext=i+(1<<_node->nbits-ret->nodes[i]->depth); + while(++inodes[i]=ret->nodes[i-1]; + } + } + else ret->token=_node->token; + return ret; +} + +static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){ + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + if(_binode->nbits!=0&&_depth>0){ + return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+ + oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1); + } + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<0){ + size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1); + size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1); + } + return size; +} + +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage); + +/*Fills the given nodes table with all the children in the sub-tree at the + given depth. + The nodes in the sub-tree with a depth less than that stored in the table + are freed. + The sub-tree must be binary and complete up until the given depth. + _nodes: The nodes table to fill. + _binode: The root of the sub-tree to fill it with. + _binode->nbits must be 0 or 1. + _level: The current level in the table. + 0 indicates that the current node should be stored, regardless of + whether it is a leaf node or an internal node. + _depth: The depth of the nodes to fill the table with, relative to their + parent.*/ +static void oc_huff_node_fill(oc_huff_node **_nodes, + oc_huff_node *_binode,int _level,int _depth,char **_storage){ + if(_level<=0||_binode->nbits==0){ + int i; + _binode->depth=(unsigned char)(_depth-_level); + _nodes[0]=oc_huff_tree_collapse(_binode,_storage); + for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0]; + } + else{ + _level--; + oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage); + _nodes+=1<<_level; + oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage); + } +} + +/*Finds the largest complete sub-tree rooted at the current node and collapses + it into a single node. + This procedure is then applied recursively to all the children of that node. + _binode: The root of the sub-tree to collapse. + _binode->nbits must be 0 or 1. + Return: The new root of the collapsed sub-tree.*/ +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage){ + oc_huff_node *root; + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth; + oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage); + return root; +} + +/*Unpacks a set of Huffman trees, and reduces them to a collapsed + representation. + _opb: The buffer to unpack the trees from. + _nodes: The table to fill with the Huffman trees. + Return: 0 on success, or a negative value on error.*/ +int oc_huff_trees_unpack(oc_pack_buf *_opb, + oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;i0)_ogg_free(_dst[i]); + return TH_EFAULT; + } + _dst[i]=oc_huff_tree_copy(_src[i],&storage); + } + return 0; +} + +/*Frees the memory used by a set of Huffman trees. + _nodes: The array of trees to free.*/ +void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;inbits!=0){ + bits=oc_pack_look(_opb,_node->nbits); + _node=_node->nodes[bits]; + oc_pack_adv(_opb,_node->depth); + } + return _node->token; +} diff --git a/Engine/lib/libtheora/lib/dec/huffdec.h b/Engine/lib/libtheora/lib/huffdec.h similarity index 91% rename from Engine/lib/libtheora/lib/dec/huffdec.h rename to Engine/lib/libtheora/lib/huffdec.h index cc87b4092..d7ffa0e99 100644 --- a/Engine/lib/libtheora/lib/dec/huffdec.h +++ b/Engine/lib/libtheora/lib/huffdec.h @@ -5,19 +5,20 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: huffdec.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #if !defined(_huffdec_H) # define _huffdec_H (1) # include "huffman.h" +# include "bitpack.h" @@ -75,17 +76,17 @@ struct oc_huff_node{ The ACTUAL size of this array is 1< #include -#include "theora/theoraenc.h" -#include "theora/theora.h" -#include "codec_internal.h" -#include "../dec/ocintrin.h" - -/*Wrapper to translate the new API into the old API. - Eventually we need to convert the old functions to support the new API - natively and do the translation the other way. - theora-exp already the necessary code to do so.*/ +#include +#include "huffenc.h" -/*The default Huffman codes used for VP3.1. - It's kind of useless to include this, as TH_ENCCTL_SET_HUFFMAN_CODES is not - actually implemented in the old encoder, but it's part of the public API.*/ +/*The default Huffman codes used for VP3.1.*/ const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={ { {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8}, @@ -819,323 +811,100 @@ const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={ -static void th_info2theora_info(theora_info *_ci,const th_info *_info){ - _ci->version_major=_info->version_major; - _ci->version_minor=_info->version_minor; - _ci->version_subminor=_info->version_subminor; - _ci->width=_info->frame_width; - _ci->height=_info->frame_height; - _ci->frame_width=_info->pic_width; - _ci->frame_height=_info->pic_height; - _ci->offset_x=_info->pic_x; - _ci->offset_y=_info->pic_y; - _ci->fps_numerator=_info->fps_numerator; - _ci->fps_denominator=_info->fps_denominator; - _ci->aspect_numerator=_info->aspect_numerator; - _ci->aspect_denominator=_info->aspect_denominator; - switch(_info->colorspace){ - case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break; - case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break; - default:_ci->colorspace=OC_CS_UNSPECIFIED;break; - } - switch(_info->pixel_fmt){ - case TH_PF_420:_ci->pixelformat=OC_PF_420;break; - case TH_PF_422:_ci->pixelformat=OC_PF_422;break; - case TH_PF_444:_ci->pixelformat=OC_PF_444;break; - default:_ci->pixelformat=OC_PF_RSVD; - } - _ci->target_bitrate=_info->target_bitrate; - _ci->quality=_info->quality; - _ci->codec_setup=NULL; - /*Defaults from old encoder_example... eventually most of these should go - away when we make the encoder no longer use them.*/ - _ci->dropframes_p=0; - _ci->keyframe_auto_p=1; - _ci->keyframe_frequency=1<<_info->keyframe_granule_shift; - _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift; - _ci->keyframe_data_target_bitrate= - _info->target_bitrate+(_info->target_bitrate>>1); - _ci->keyframe_auto_threshold=80; - _ci->keyframe_mindistance=8; - _ci->noise_sensitivity=1; - _ci->sharpness=0; - _ci->quick_p=1; +/*A description of a Huffman code value used when encoding the tree.*/ +typedef struct{ + /*The bit pattern, left-shifted so that the MSB of all patterns is + aligned.*/ + ogg_uint32_t pattern; + /*The amount the bit pattern was shifted.*/ + int shift; + /*The token this bit pattern represents.*/ + int token; +}oc_huff_entry; + + + +/*Compares two oc_huff_entry structures by their bit patterns. + _c1: The first entry to compare. + _c2: The second entry to compare. + Return: <0 if _c1<_c2, >0 if _c1>_c2.*/ +static int huff_entry_cmp(const void *_c1,const void *_c2){ + ogg_uint32_t b1; + ogg_uint32_t b2; + b1=((const oc_huff_entry *)_c1)->pattern; + b2=((const oc_huff_entry *)_c2)->pattern; + return b1b2?1:0; } -static int _ilog(unsigned _v){ - int ret; - for(ret=0;_v;ret++)_v>>=1; - return ret; -} - - - -struct th_enc_ctx{ - /*This is required at the start of the struct for the common functions to - work.*/ - th_info info; - /*The actual encoder.*/ - theora_state state; - /*A temporary buffer for input frames. - This is needed if the U and V strides differ, or padding is required.*/ - unsigned char *buf; -}; - - -th_enc_ctx *th_encode_alloc(const th_info *_info){ - theora_info ci; - th_enc_ctx *enc; - th_info2theora_info(&ci,_info); - /*Do a bunch of checks the new API does, but the old one didn't.*/ - if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| - _info->frame_width>=0x100000||_info->frame_height>=0x100000|| - _info->pic_x+_info->pic_width>_info->frame_width|| - _info->pic_y+_info->pic_height>_info->frame_height|| - _info->pic_x>255|| - _info->frame_height-_info->pic_height-_info->pic_y>255|| - _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| - _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ - enc=NULL; - } - else{ - enc=(th_enc_ctx *)_ogg_malloc(sizeof(*enc)); - if(theora_encode_init(&enc->state,&ci)<0){ - _ogg_free(enc); - enc=NULL; +/*Encodes a description of the given Huffman tables. + Although the codes are stored in the encoder as flat arrays, in the bit + stream and in the decoder they are structured as a tree. + This function recovers the tree structure from the flat array and then + writes it out. + Note that the codes MUST form a Huffman code, and not merely a prefix-free + code, since the binary tree is assumed to be full. + _opb: The buffer to store the tree in. + _codes: The Huffman tables to pack. + Return: 0 on success, or a negative value if one of the given Huffman tables + does not form a full, prefix-free code.*/ +int oc_huff_codes_pack(oggpack_buffer *_opb, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ + int i; + for(i=0;iframe_width>_info->pic_width|| - _info->frame_height>_info->pic_height){ - enc->buf=_ogg_malloc((_info->frame_width*_info->frame_height+ - ((_info->frame_width>>!(_info->pixel_fmt&1))* - (_info->frame_height>>!(_info->pixel_fmt&2))<<1))*sizeof(*enc->buf)); - } - else enc->buf=NULL; - memcpy(&enc->info,_info,sizeof(enc->info)); - /*Overwrite values theora_encode_init() can change; don't trust the user.*/ - enc->info.version_major=ci.version_major; - enc->info.version_minor=ci.version_minor; - enc->info.version_subminor=ci.version_subminor; - enc->info.quality=ci.quality; - enc->info.target_bitrate=ci.target_bitrate; - enc->info.fps_numerator=ci.fps_numerator; - enc->info.fps_denominator=ci.fps_denominator; - enc->info.keyframe_granule_shift=_ilog(ci.keyframe_frequency_force-1); + mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1; + /*Copy over the codes into our temporary workspace. + The bit patterns are aligned, and the original entry each code is from + is stored as well.*/ + for(j=0;jstate,_req,_buf,_buf_sz); -} - -int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_comments, - ogg_packet *_op){ - theora_state *te; - CP_INSTANCE *cpi; - if(_enc==NULL||_op==NULL)return OC_FAULT; - te=&_enc->state; - cpi=(CP_INSTANCE *)te->internal_encode; - switch(cpi->doneflag){ - case -3:{ - theora_encode_header(te,_op); - return -cpi->doneflag++; - }break; - case -2:{ - if(_comments==NULL)return OC_FAULT; - theora_encode_comment((theora_comment *)_comments,_op); - /*The old API does not require a theora_state struct when writing the - comment header, so it can't use its internal buffer and relies on the - application to free it. - The old documentation is wrong on this subject, and this breaks on - Windows when linking against multiple versions of libc (which is - almost always done when, e.g., using DLLs built with mingw32). - The new API _does_ require a th_enc_ctx, and states that libtheora owns - the memory. - Thus we move the contents of this packet into our internal - oggpack_buffer so it can be properly reclaimed.*/ - oggpackB_reset(cpi->oggbuffer); - oggpackB_writecopy(cpi->oggbuffer,_op->packet,_op->bytes*8); - _ogg_free(_op->packet); - _op->packet=oggpackB_get_buffer(cpi->oggbuffer); - return -cpi->doneflag++; - }break; - case -1:{ - theora_encode_tables(te,_op); - return -cpi->doneflag++; - }break; - case 0:return 0; - default:return OC_EINVAL; - } -} - -/*Copies the picture region of the _src image plane into _dst and pads the rest - of _dst using a diffusion extension method. - We could do much better (e.g., the DCT-based low frequency extension method - in theora-exp's fdct.c) if we were to pad after motion compensation, but - that would require significant changes to the encoder.*/ -static unsigned char *th_encode_copy_pad_plane(th_img_plane *_dst, - unsigned char *_buf,th_img_plane *_src, - ogg_uint32_t _pic_x,ogg_uint32_t _pic_y, - ogg_uint32_t _pic_width,ogg_uint32_t _pic_height){ - size_t buf_sz; - _dst->width=_src->width; - _dst->height=_src->height; - _dst->stride=_src->width; - _dst->data=_buf; - buf_sz=_dst->width*_dst->height*sizeof(*_dst->data); - /*If we have _no_ data, just encode a dull green.*/ - if(_pic_width==0||_pic_height==0)memset(_dst->data,0,buf_sz); - else{ - unsigned char *dst; - unsigned char *src; - ogg_uint32_t x; - ogg_uint32_t y; - int dstride; - int sstride; - /*Step 1: Copy the data we do have.*/ - dstride=_dst->stride; - sstride=_src->stride; - dst=_dst->data+_pic_y*dstride+_pic_x; - src=_src->data+_pic_y*sstride+_pic_x; - for(y=0;y<_pic_height;y++){ - memcpy(dst,src,_pic_width); - dst+=dstride; - src+=sstride; - } - /*Step 2: Copy the border into any blocks that are 100% padding. - There's probably smarter things we could do than this.*/ - /*Left side.*/ - for(x=_pic_x;x-->0;){ - dst=_dst->data+_pic_y*dstride+x; - for(y=0;y<_pic_height;y++){ - dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1]+ - (dst+(dstride&-(y+1<_pic_height)))[1]+2>>2; - dst+=dstride; + /*Sort the codes into ascending order. + This is the order the leaves of the tree will be traversed.*/ + qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp); + /*For each leaf of the tree:*/ + bpos=maxlen; + for(j=0;jentries[j].shift;bpos--)oggpackB_write(_opb,0,1); + /*Mark this as a leaf node, and write its value.*/ + oggpackB_write(_opb,1,1); + oggpackB_write(_opb,entries[j].token,5); + /*For each 1 branch we've descended, back up the tree until we reach a + 0 branch.*/ + bit=1<width;x++){ - dst=_dst->data+_pic_y*dstride+x-1; - for(y=0;y<_pic_height;y++){ - dst[1]=(dst[0]<<1)+(dst-(dstride&-(y>0)))[0]+ - (dst+(dstride&-(y+1<_pic_height)))[0]+2>>2; - dst+=dstride; - } - } - /*Top.*/ - dst=_dst->data+_pic_y*dstride; - for(y=_pic_y;y-->0;){ - for(x=0;x<_dst->width;x++){ - (dst-dstride)[x]=(dst[x]<<1)+dst[x-(x>0)]+dst[x+(x+1<_dst->width)]+2>>2; - } - dst-=dstride; - } - /*Bottom.*/ - dst=_dst->data+(_pic_y+_pic_height)*dstride; - for(y=_pic_y+_pic_height;y<_dst->height;y++){ - for(x=0;x<_dst->width;x++){ - dst[x]=((dst-dstride)[x]<<1)+(dst-dstride)[x-(x>0)]+ - (dst-dstride)[x+(x+1<_dst->width)]+2>>2; - } - dst+=dstride; - } - } - _buf+=buf_sz; - return _buf; -} - -int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr){ - CP_INSTANCE *cpi; - theora_state *te; - th_img_plane *pycbcr; - th_ycbcr_buffer ycbcr; - yuv_buffer yuv; - ogg_uint32_t pic_width; - ogg_uint32_t pic_height; - int hdec; - int vdec; - int ret; - if(_enc==NULL||_ycbcr==NULL)return OC_FAULT; - te=&_enc->state; - /*theora_encode_YUVin() does not bother to check uv_width and uv_height, and - then uses them. - This is arguably okay (it will most likely lead to a crash if they're - wrong, which will make the developer who passed them fix the problem), but - our API promises to return an error code instead.*/ - cpi=(CP_INSTANCE *)te->internal_encode; - hdec=!(cpi->pb.info.pixelformat&1); - vdec=!(cpi->pb.info.pixelformat&2); - if(_ycbcr[0].width!=cpi->pb.info.width|| - _ycbcr[0].height!=cpi->pb.info.height|| - _ycbcr[1].width!=_ycbcr[0].width>>hdec|| - _ycbcr[1].height!=_ycbcr[0].height>>vdec|| - _ycbcr[2].width!=_ycbcr[1].width||_ycbcr[2].height!=_ycbcr[1].height){ - return OC_EINVAL; - } - pic_width=cpi->pb.info.frame_width; - pic_height=cpi->pb.info.frame_height; - /*We can only directly use the input buffer if no padding is required (since - the new API is documented not to use values outside the picture region) - and if the strides for the Cb and Cr planes are the same, since the old - API had no way to specify different ones.*/ - if(_ycbcr[0].width==pic_width&&_ycbcr[0].height==pic_height&& - _ycbcr[1].stride==_ycbcr[2].stride){ - pycbcr=_ycbcr; - } - else{ - unsigned char *buf; - int pic_x; - int pic_y; - int pli; - pic_x=cpi->pb.info.offset_x; - pic_y=cpi->pb.info.offset_y; - if(_ycbcr[0].width>pic_width||_ycbcr[0].height>pic_height){ - buf=th_encode_copy_pad_plane(ycbcr+0,_enc->buf,_ycbcr+0, - pic_x,pic_y,pic_width,pic_height); - } - else{ - /*If only the strides differ, we can still avoid copying the luma plane.*/ - memcpy(ycbcr+0,_ycbcr+0,sizeof(ycbcr[0])); - if(_enc->buf==NULL){ - _enc->buf=(unsigned char *)_ogg_malloc( - (_ycbcr[1].width*_ycbcr[1].height<<1)*sizeof(*_enc->buf)); - } - buf=_enc->buf; - } - for(pli=1;pli<3;pli++){ - int x0; - int y0; - x0=pic_x>>hdec; - y0=pic_y>>vdec; - buf=th_encode_copy_pad_plane(ycbcr+pli,buf,_ycbcr+pli, - x0,y0,(pic_x+pic_width+hdec>>hdec)-x0,(pic_y+pic_height+vdec>>vdec)-y0); - } - pycbcr=ycbcr; - } - yuv.y_width=pycbcr[0].width; - yuv.y_height=pycbcr[0].height; - yuv.uv_width=pycbcr[1].width; - yuv.uv_height=pycbcr[1].height; - yuv.y_stride=pycbcr[0].stride; - yuv.y=pycbcr[0].data; - yuv.uv_stride=pycbcr[1].stride; - yuv.u=pycbcr[1].data; - yuv.v=pycbcr[2].data; - ret=theora_encode_YUVin(te,&yuv); - return ret; -} - -int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op){ - if(_enc==NULL)return OC_FAULT; - return theora_encode_packetout(&_enc->state,_last,_op); -} - -void th_encode_free(th_enc_ctx *_enc){ - if(_enc!=NULL){ - theora_clear(&_enc->state); - _ogg_free(_enc->buf); - _ogg_free(_enc); } + return 0; } diff --git a/Engine/lib/libtheora/lib/huffenc.h b/Engine/lib/libtheora/lib/huffenc.h new file mode 100644 index 000000000..c5a3956f1 --- /dev/null +++ b/Engine/lib/libtheora/lib/huffenc.h @@ -0,0 +1,19 @@ +#if !defined(_huffenc_H) +# define _huffenc_H (1) +# include "huffman.h" + + + +typedef th_huff_code th_huff_table[TH_NDCT_TOKENS]; + + + +extern const th_huff_code + TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; + + + +int oc_huff_codes_pack(oggpack_buffer *_opb, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]); + +#endif diff --git a/Engine/lib/libtheora/lib/dec/huffman.h b/Engine/lib/libtheora/lib/huffman.h similarity index 92% rename from Engine/lib/libtheora/lib/dec/huffman.h rename to Engine/lib/libtheora/lib/huffman.h index 59096e1e8..36cf7572e 100644 --- a/Engine/lib/libtheora/lib/dec/huffman.h +++ b/Engine/lib/libtheora/lib/huffman.h @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: huffman.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: huffman.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ @@ -65,6 +65,6 @@ #define OC_NDCT_RUN_MAX (32) #define OC_NDCT_RUN_CAT1A_MAX (28) -extern const int OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]; +extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]; #endif diff --git a/Engine/lib/libtheora/lib/dec/idct.c b/Engine/lib/libtheora/lib/idct.c similarity index 70% rename from Engine/lib/libtheora/lib/dec/idct.c rename to Engine/lib/libtheora/lib/idct.c index 21ac83f14..0e68ac765 100644 --- a/Engine/lib/libtheora/lib/dec/idct.c +++ b/Engine/lib/libtheora/lib/idct.c @@ -5,20 +5,19 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: idct.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: idct.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include -#include +#include "internal.h" #include "dct.h" -#include "idct.h" /*Performs an inverse 8 point Type-II DCT transform. The output is scaled by a factor of 2 relative to the orthonormal version of @@ -220,19 +219,29 @@ static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ /*Performs an inverse 8x8 Type-II DCT transform. The input is assumed to be scaled by a factor of 4 relative to orthonormal version of the transform. + All coefficients but the first 3 in zig-zag scan order are assumed to be 0: + x x 0 0 0 0 0 0 + x 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 _y: The buffer to store the result in. This may be the same as _x. - _x: The input coefficients. */ -void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + _x: The input coefficients.*/ +static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ const ogg_int16_t *in; ogg_int16_t *end; ogg_int16_t *out; ogg_int16_t w[64]; /*Transform rows of x into columns of w.*/ - for(in=_x,out=w,end=out+8;out>4); } @@ -250,8 +259,8 @@ void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ 0 0 0 0 0 0 0 0 _y: The buffer to store the result in. This may be the same as _x. - _x: The input coefficients. */ -void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + _x: The input coefficients.*/ +static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ const ogg_int16_t *in; ogg_int16_t *end; ogg_int16_t *out; @@ -263,6 +272,64 @@ void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ idct8_1(w+3,_x+24); /*Transform rows of w into columns of y.*/ for(in=w,out=_y,end=out+8;out>4); } + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + for(in=_x,out=w,end=out+8;out>4); +} + +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64], + int _last_zzi){ + (*_state->opt_vtable.idct8x8)(_y,_last_zzi); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<3)oc_idct8x8_3(_y,_y); + else if(_last_zzi<10)oc_idct8x8_10(_y,_y); + else oc_idct8x8_slow(_y,_y); +} diff --git a/Engine/lib/libtheora/lib/dec/info.c b/Engine/lib/libtheora/lib/info.c similarity index 83% rename from Engine/lib/libtheora/lib/dec/info.c rename to Engine/lib/libtheora/lib/info.c index 26e7f42a9..6b9762978 100644 --- a/Engine/lib/libtheora/lib/dec/info.c +++ b/Engine/lib/libtheora/lib/info.c @@ -5,20 +5,20 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: info.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include #include #include -#include "../internal.h" +#include "internal.h" @@ -55,14 +55,21 @@ void th_comment_init(th_comment *_tc){ } void th_comment_add(th_comment *_tc,char *_comment){ - int comment_len; - _tc->user_comments=_ogg_realloc(_tc->user_comments, + char **user_comments; + int *comment_lengths; + int comment_len; + user_comments=_ogg_realloc(_tc->user_comments, (_tc->comments+2)*sizeof(*_tc->user_comments)); - _tc->comment_lengths=_ogg_realloc(_tc->comment_lengths, + if(user_comments==NULL)return; + _tc->user_comments=user_comments; + comment_lengths=_ogg_realloc(_tc->comment_lengths, (_tc->comments+2)*sizeof(*_tc->comment_lengths)); + if(comment_lengths==NULL)return; + _tc->comment_lengths=comment_lengths; comment_len=strlen(_comment); - _tc->comment_lengths[_tc->comments]=comment_len; - _tc->user_comments[_tc->comments]=_ogg_malloc(comment_len+1); + comment_lengths[_tc->comments]=comment_len; + user_comments[_tc->comments]=_ogg_malloc(comment_len+1); + if(user_comments[_tc->comments]==NULL)return; memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1); _tc->comments++; _tc->user_comments[_tc->comments]=NULL; @@ -76,6 +83,7 @@ void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){ val_len=strlen(_val); /*+2 for '=' and '\0'.*/ comment=_ogg_malloc(tag_len+val_len+2); + if(comment==NULL)return; memcpy(comment,_tag,tag_len); comment[tag_len]='='; memcpy(comment+tag_len+1,_val,val_len+1); diff --git a/Engine/lib/libtheora/lib/dec/internal.c b/Engine/lib/libtheora/lib/internal.c similarity index 62% rename from Engine/lib/libtheora/lib/dec/internal.c rename to Engine/lib/libtheora/lib/internal.c index 3fe62e55b..0fe4f63e7 100644 --- a/Engine/lib/libtheora/lib/dec/internal.c +++ b/Engine/lib/libtheora/lib/internal.c @@ -5,21 +5,20 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: internal.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include #include #include -#include "../internal.h" -#include "idct.h" +#include "internal.h" @@ -27,7 +26,7 @@ block. All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs past the end of a block in bogus streams get mapped to a known location.*/ -const int OC_FZIG_ZAG[128]={ +const unsigned char OC_FZIG_ZAG[128]={ 0, 1, 8,16, 9, 2, 3,10, 17,24,32,25,18,11, 4, 5, 12,19,26,33,40,48,41,34, @@ -48,7 +47,7 @@ const int OC_FZIG_ZAG[128]={ /*A map from the coefficient number in a block to its index in the zig zag scan.*/ -const int OC_IZIG_ZAG[64]={ +const unsigned char OC_IZIG_ZAG[64]={ 0, 1, 5, 6,14,15,27,28, 2, 4, 7,13,16,26,29,42, 3, 8,12,17,25,30,41,43, @@ -59,33 +58,13 @@ const int OC_IZIG_ZAG[64]={ 35,36,48,49,57,58,62,63 }; -/*The predictor frame to use for each macro block mode.*/ -const int OC_FRAME_FOR_MODE[8]={ - /*OC_MODE_INTER_NOMV*/ - OC_FRAME_PREV, - /*OC_MODE_INTRA*/ - OC_FRAME_SELF, - /*OC_MODE_INTER_MV*/ - OC_FRAME_PREV, - /*OC_MODE_INTER_MV_LAST*/ - OC_FRAME_PREV, - /*OC_MODE_INTER_MV_LAST2*/ - OC_FRAME_PREV, - /*OC_MODE_GOLDEN*/ - OC_FRAME_GOLD, - /*OC_MODE_GOLDEN_MV*/ - OC_FRAME_GOLD, - /*OC_MODE_INTER_MV_FOUR*/ - OC_FRAME_PREV, -}; - /*A map from physical macro block ordering to bitstream macro block ordering within a super block.*/ -const int OC_MB_MAP[2][2]={{0,3},{1,2}}; +const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}}; /*A list of the indices in the oc_mb.map array that can be valid for each of the various chroma decimation types.*/ -const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ +const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ {0,1,2,3,4,8}, {0,1,2,3,4,5,8,9}, {0,1,2,3,4,6,8,10}, @@ -94,13 +73,13 @@ const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ /*The number of indices in the oc_mb.map array that can be valid for each of the various chroma decimation types.*/ -const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12}; +const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12}; /*The number of extra bits that are coded with each of the DCT tokens. Each DCT token has some fixed number of additional bits (possibly 0) stored after the token itself, containing, for example, coefficient magnitude, sign bits, etc.*/ -const int OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={ +const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={ 0,0,0,2,3,4,12,3,6, 0,0,0,0, 1,1,1,1,2,3,4,5,6,10, @@ -118,113 +97,10 @@ int oc_ilog(unsigned _v){ -/*Determines the number of blocks or coefficients to be skipped for a given - token value. - _token: The token value to skip. - _extra_bits: The extra bits attached to this token. - Return: A positive value indicates that number of coefficients are to be - skipped in the current block. - Otherwise, the negative of the return value indicates that number of - blocks are to be ended.*/ -typedef int (*oc_token_skip_func)(int _token,int _extra_bits); - -/*Handles the simple end of block tokens.*/ -static int oc_token_skip_eob(int _token,int _extra_bits){ - static const int NBLOCKS_ADJUST[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0}; - return -_extra_bits-NBLOCKS_ADJUST[_token]; -} - -/*The last EOB token has a special case, where an EOB run of size zero ends all - the remaining blocks in the frame.*/ -static int oc_token_skip_eob6(int _token,int _extra_bits){ - if(!_extra_bits)return -INT_MAX; - return -_extra_bits; -} - -/*Handles the pure zero run tokens.*/ -static int oc_token_skip_zrl(int _token,int _extra_bits){ - return _extra_bits+1; -} - -/*Handles a normal coefficient value token.*/ -static int oc_token_skip_val(void){ - return 1; -} - -/*Handles a category 1A zero run/coefficient value combo token.*/ -static int oc_token_skip_run_cat1a(int _token){ - return _token-OC_DCT_RUN_CAT1A+2; -} - -/*Handles category 1b and 2 zero run/coefficient value combo tokens.*/ -static int oc_token_skip_run(int _token,int _extra_bits){ - static const int NCOEFFS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 7,11,2,3 - }; - static const int NCOEFFS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={ - 3,7,0,1 - }; - _token-=OC_DCT_RUN_CAT1B; - return (_extra_bits&NCOEFFS_MASK[_token])+NCOEFFS_ADJUST[_token]; -} - -/*A jump table for computing the number of coefficients or blocks to skip for - a given token value. - This reduces all the conditional branches, etc., needed to parse these token - values down to one indirect jump.*/ -static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={ - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob6, - oc_token_skip_zrl, - oc_token_skip_zrl, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - oc_token_skip_run, - oc_token_skip_run, - oc_token_skip_run, - oc_token_skip_run -}; - -/*Determines the number of blocks or coefficients to be skipped for a given - token value. - _token: The token value to skip. - _extra_bits: The extra bits attached to this token. - Return: A positive value indicates that number of coefficients are to be - skipped in the current block. - Otherwise, the negative of the return value indicates that number of - blocks are to be ended. - 0 will never be returned, so that at least one coefficient in one - block will always be decoded for every token.*/ -int oc_dct_token_skip(int _token,int _extra_bits){ - return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits); -} - - /*The function used to fill in the chroma plane motion vectors for a macro block when 4 different motion vectors are specified in the luma plane. - This version is for use with chroma decimated in the X and Y directions. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). _cbmvs: The chroma block-level motion vectors to fill in. _lbmvs: The luma block-level motion vectors.*/ static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ @@ -256,7 +132,7 @@ static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ /*The function used to fill in the chroma plane motion vectors for a macro block when 4 different motion vectors are specified in the luma plane. - This version is for use with chroma decimated in the X direction. + This version is for use with chroma decimated in the X direction (4:2:2). _cbmvs: The chroma block-level motion vectors to fill in. _lbmvs: The luma block-level motion vectors.*/ static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ @@ -274,7 +150,7 @@ static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ /*The function used to fill in the chroma plane motion vectors for a macro block when 4 different motion vectors are specified in the luma plane. - This version is for use with no chroma decimation. + This version is for use with no chroma decimation (4:4:4). _cbmvs: The chroma block-level motion vectors to fill in. _lmbmv: The luma macro-block level motion vector to fill in for use in prediction. @@ -305,6 +181,7 @@ void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){ datsz=rowsz*_height; /*Alloc array and row pointers.*/ ret=(char *)_ogg_malloc(datsz+colsz); + if(ret==NULL)return NULL; /*Initialize the array.*/ if(ret!=NULL){ size_t i; @@ -327,6 +204,7 @@ void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){ datsz=rowsz*_height; /*Alloc array and row pointers.*/ ret=(char *)_ogg_calloc(datsz+colsz,1); + if(ret==NULL)return NULL; /*Initialize the array.*/ if(ret!=NULL){ size_t i; @@ -355,7 +233,8 @@ void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, _dst[pli].width=_src[pli].width; _dst[pli].height=_src[pli].height; _dst[pli].stride=-_src[pli].stride; - _dst[pli].data=_src[pli].data+(1-_dst[pli].height)*_dst[pli].stride; + _dst[pli].data=_src[pli].data + +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride; } } @@ -364,7 +243,7 @@ const char *th_version_string(void){ } ogg_uint32_t th_version_number(void){ - return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+(TH_VERSION_SUB); + return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB; } /*Determines the packet type. diff --git a/Engine/lib/libtheora/lib/internal.h b/Engine/lib/libtheora/lib/internal.h index 0413a355a..d81263e13 100644 --- a/Engine/lib/libtheora/lib/internal.h +++ b/Engine/lib/libtheora/lib/internal.h @@ -5,36 +5,74 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: internal.h 15469 2008-10-30 12:49:42Z tterribe $ + last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ - #if !defined(_internal_H) # define _internal_H (1) # include +# include # if defined(HAVE_CONFIG_H) # include # endif # include "theora/codec.h" # include "theora/theora.h" -# include "dec/ocintrin.h" -# include "dec/huffman.h" -# include "dec/quant.h" -/*Thank you Microsoft, I know the order of operations.*/ # if defined(_MSC_VER) -# pragma warning(disable:4554) /* order of operations */ -# pragma warning(disable:4799) /* disable missing EMMS warnings */ +/*Disable missing EMMS warnings.*/ +# pragma warning(disable:4799) +/*Thank you Microsoft, I know the order of operations.*/ +# pragma warning(disable:4554) +# endif +/*You, too, gcc.*/ +# if defined(__GNUC_PREREQ) +# if __GNUC_PREREQ(4,2) +# pragma GCC diagnostic ignored "-Wparentheses" +# endif # endif +# include "ocintrin.h" +# include "huffman.h" +# include "quant.h" + +/*Some assembly constructs require aligned operands.*/ +# if defined(OC_X86_ASM) +# if defined(__GNUC__) +# define OC_ALIGN8(expr) expr __attribute__((aligned(8))) +# define OC_ALIGN16(expr) expr __attribute__((aligned(16))) +# elif defined(_MSC_VER) +# define OC_ALIGN8(expr) __declspec (align(8)) expr +# define OC_ALIGN16(expr) __declspec (align(16)) expr +# endif +# endif +# if !defined(OC_ALIGN8) +# define OC_ALIGN8(expr) expr +# endif +# if !defined(OC_ALIGN16) +# define OC_ALIGN16(expr) expr +# endif + + + +typedef struct oc_sb_flags oc_sb_flags; +typedef struct oc_border_info oc_border_info; +typedef struct oc_fragment oc_fragment; +typedef struct oc_fragment_plane oc_fragment_plane; +typedef struct oc_base_opt_vtable oc_base_opt_vtable; +typedef struct oc_base_opt_data oc_base_opt_data; +typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable; +typedef struct oc_theora_state oc_theora_state; + + + /*This library's version.*/ -# define OC_VENDOR_STRING "Xiph.Org libTheora I 20081020 3 2 1" +# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)" /*Theora bitstream version.*/ # define TH_VERSION_MAJOR (3) @@ -97,18 +135,10 @@ /*The number of (coded) modes.*/ #define OC_NMODES (8) -/*Macro block is not coded.*/ -#define OC_MODE_NOT_CODED (8) - -/*Predictor bit flags.*/ -/*Left.*/ -#define OC_PL (1) -/*Upper-left.*/ -#define OC_PUL (2) -/*Up.*/ -#define OC_PU (4) -/*Upper-right.*/ -#define OC_PUR (8) +/*Determines the reference frame used for a given MB mode.*/ +#define OC_FRAME_FOR_MODE(_x) \ + OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \ + OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x)) /*Constants for the packet state machine common between encoder and decoder.*/ @@ -123,21 +153,7 @@ -typedef struct oc_theora_state oc_theora_state; - - - -/*A map from a super block to fragment numbers.*/ -typedef int oc_sb_map[4][4]; -/*A map from a macro block to fragment numbers.*/ -typedef int oc_mb_map[3][4]; -/*A motion vector.*/ -typedef signed char oc_mv[2]; - - - -/*Super block information. - Super blocks are 32x32 segments of pixels in a single color plane indexed +/*Super blocks are 32x32 segments of pixels in a single color plane indexed in image order. Internally, super blocks are broken up into four quadrants, each of which contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels. @@ -148,194 +164,201 @@ typedef signed char oc_mv[2]; the regular image order indexing strategy, blocks indexed in image order are called "fragments". Fragments are indexed in image order, left to right, then bottom to top, - from Y plane to Cb plane to Cr plane.*/ -typedef struct{ - unsigned coded_fully:1; - unsigned coded_partially:1; - unsigned quad_valid:4; - oc_sb_map map; -}oc_sb; + from Y' plane to Cb plane to Cr plane. - - -/*Macro block information. - The co-located fragments in all image planes corresponding to the location of - a single luma plane super block quadrant forms a macro block. - Thus there is only a single set of macro blocks for all planes, which + The co-located fragments in all image planes corresponding to the location + of a single quadrant of a luma plane super block form a macro block. + Thus there is only a single set of macro blocks for all planes, each of which contains between 6 and 12 fragments, depending on the pixel format. - Therefore macro block information is kept in a separate array from super - blocks, to avoid unused space in the other planes.*/ -typedef struct{ - /*The current macro block mode. - A negative number indicates the macro block lies entirely outside the - coded frame.*/ - int mode; - /*The X location of the macro block's upper-left hand pixel.*/ - int x; - /*The Y location of the macro block's upper-right hand pixel.*/ - int y; - /*The fragments that belong to this macro block in each color plane. - Fragments are stored in image order (left to right then top to bottom). - When chroma components are decimated, the extra fragments have an index of - -1.*/ - oc_mb_map map; -}oc_mb; + Therefore macro block information is kept in a separate set of arrays from + super blocks to avoid unused space in the other planes. + The lists are indexed in super block order. + That is, the macro block corresponding to the macro block mbi in (luma plane) + super block sbi is at index (sbi<<2|mbi). + Thus the number of macro blocks in each dimension is always twice the number + of super blocks, even when only an odd number fall inside the coded frame. + These "extra" macro blocks are just an artifact of our internal data layout, + and not part of the coded stream; they are flagged with a negative MB mode.*/ + + + +/*A single quadrant of the map from a super block to fragment numbers.*/ +typedef ptrdiff_t oc_sb_map_quad[4]; +/*A map from a super block to fragment numbers.*/ +typedef oc_sb_map_quad oc_sb_map[4]; +/*A single plane of the map from a macro block to fragment numbers.*/ +typedef ptrdiff_t oc_mb_map_plane[4]; +/*A map from a macro block to fragment numbers.*/ +typedef oc_mb_map_plane oc_mb_map[3]; +/*A motion vector.*/ +typedef signed char oc_mv[2]; + + + +/*Super block information.*/ +struct oc_sb_flags{ + unsigned char coded_fully:1; + unsigned char coded_partially:1; + unsigned char quad_valid:4; +}; /*Information about a fragment which intersects the border of the displayable region. - This marks which pixels belong to the displayable region, and is used to - ensure that pixels outside of this region are never referenced. - This allows applications to pass in buffers that are really the size of the - displayable region without causing a seg fault.*/ -typedef struct{ + This marks which pixels belong to the displayable region.*/ +struct oc_border_info{ /*A bit mask marking which pixels are in the displayable region. Pixel (x,y) corresponds to bit (y<<3|x).*/ ogg_int64_t mask; /*The number of pixels in the displayable region. This is always positive, and always less than 64.*/ int npixels; -}oc_border_info; +}; /*Fragment information.*/ -typedef struct{ +struct oc_fragment{ /*A flag indicating whether or not this fragment is coded.*/ - unsigned coded:1; - /*A flag indicating that all of this fragment lies outside the displayable + unsigned coded:1; + /*A flag indicating that this entire fragment lies outside the displayable region of the frame. Note the contrast with an invalid macro block, which is outside the coded - frame, not just the displayable one.*/ - unsigned invalid:1; - /*The quality index used for this fragment's AC coefficients.*/ - unsigned qi:6; - /*The mode of the macroblock this fragment belongs to. + frame, not just the displayable one. + There are no fragments outside the coded frame by construction.*/ + unsigned invalid:1; + /*The index of the quality index used for this fragment's AC coefficients.*/ + unsigned qii:6; + /*The mode of the macroblock this fragment belongs to.*/ + unsigned mb_mode:3; + /*The index of the associated border information for fragments which lie + partially outside the displayable region. + For fragments completely inside or outside this region, this is -1. Note that the C standard requires an explicit signed keyword for bitfield types, since some compilers may treat them as unsigned without it.*/ - signed int mbmode:8; + signed int borderi:5; /*The prediction-corrected DC component. Note that the C standard requires an explicit signed keyword for bitfield types, since some compilers may treat them as unsigned without it.*/ - signed int dc:16; - /*A pointer to the portion of an image covered by this fragment in several - images. - The first three are reconstructed frame buffers, while the last is the - input image buffer. - The appropriate stride value is determined by the color plane the fragment - belongs in.*/ - unsigned char *buffer[4]; - /*Information for fragments which lie partially outside the displayable - region. - For fragments completely inside or outside this region, this is NULL.*/ - oc_border_info *border; - /*The motion vector used for this fragment.*/ - oc_mv mv; -}oc_fragment; + signed int dc:16; +}; /*A description of each fragment plane.*/ -typedef struct{ +struct oc_fragment_plane{ /*The number of fragments in the horizontal direction.*/ - int nhfrags; + int nhfrags; /*The number of fragments in the vertical direction.*/ - int nvfrags; + int nvfrags; /*The offset of the first fragment in the plane.*/ - int froffset; + ptrdiff_t froffset; /*The total number of fragments in the plane.*/ - int nfrags; + ptrdiff_t nfrags; /*The number of super blocks in the horizontal direction.*/ - int nhsbs; + unsigned nhsbs; /*The number of super blocks in the vertical direction.*/ - int nvsbs; + unsigned nvsbs; /*The offset of the first super block in the plane.*/ - int sboffset; + unsigned sboffset; /*The total number of super blocks in the plane.*/ - int nsbs; -}oc_fragment_plane; + unsigned nsbs; +}; /*The shared (encoder and decoder) functions that have accelerated variants.*/ -typedef struct{ - void (*frag_recon_intra)(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue); - void (*frag_recon_inter)(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); - void (*frag_recon_inter2)(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue); - void (*state_frag_copy)(const oc_theora_state *_state, - const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli); - void (*state_frag_recon)(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); +struct oc_base_opt_vtable{ + void (*frag_copy)(unsigned char *_dst, + const unsigned char *_src,int _ystride); + void (*frag_recon_intra)(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]); + void (*frag_recon_inter)(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); + void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); + void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi); + void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); + void (*state_frag_copy_list)(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); + void (*state_loop_filter_frag_rows)(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); void (*restore_fpu)(void); - void (*state_loop_filter_frag_rows)(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end); -}oc_base_opt_vtable; +}; + +/*The shared (encoder and decoder) tables that vary according to which variants + of the above functions are used.*/ +struct oc_base_opt_data{ + const unsigned char *dct_fzig_zag; +}; - -/*Common state information between the encoder and decoder.*/ +/*State information common to both the encoder and decoder.*/ struct oc_theora_state{ /*The stream information.*/ th_info info; /*Table for shared accelerated functions.*/ oc_base_opt_vtable opt_vtable; + /*Table for shared data used by accelerated functions.*/ + oc_base_opt_data opt_data; /*CPU flags to detect the presence of extended instruction sets.*/ ogg_uint32_t cpu_flags; /*The fragment plane descriptions.*/ oc_fragment_plane fplanes[3]; - /*The total number of fragments in a single frame.*/ - int nfrags; /*The list of fragments, indexed in image order.*/ oc_fragment *frags; + /*The the offset into the reference frame buffer to the upper-left pixel of + each fragment.*/ + ptrdiff_t *frag_buf_offs; + /*The motion vector for each fragment.*/ + oc_mv *frag_mvs; + /*The total number of fragments in a single frame.*/ + ptrdiff_t nfrags; + /*The list of super block maps, indexed in image order.*/ + oc_sb_map *sb_maps; + /*The list of super block flags, indexed in image order.*/ + oc_sb_flags *sb_flags; /*The total number of super blocks in a single frame.*/ - int nsbs; - /*The list of super blocks, indexed in image order.*/ - oc_sb *sbs; + unsigned nsbs; + /*The fragments from each color plane that belong to each macro block. + Fragments are stored in image order (left to right then top to bottom). + When chroma components are decimated, the extra fragments have an index of + -1.*/ + oc_mb_map *mb_maps; + /*The list of macro block modes. + A negative number indicates the macro block lies entirely outside the + coded frame.*/ + signed char *mb_modes; /*The number of macro blocks in the X direction.*/ - int nhmbs; + unsigned nhmbs; /*The number of macro blocks in the Y direction.*/ - int nvmbs; + unsigned nvmbs; /*The total number of macro blocks.*/ - int nmbs; - /*The list of macro blocks, indexed in super block order. - That is, the macro block corresponding to the macro block mbi in (luma - plane) super block sbi is (sbi<<2|mbi).*/ - oc_mb *mbs; - /*The list of coded fragments, in coded order.*/ - int *coded_fragis; + size_t nmbs; + /*The list of coded fragments, in coded order. + Uncoded fragments are stored in reverse order from the end of the list.*/ + ptrdiff_t *coded_fragis; /*The number of coded fragments in each plane.*/ - int ncoded_fragis[3]; - /*The list of uncoded fragments. - This just past the end of the list, which is in reverse order, and - uses the same block of allocated storage as the coded_fragis list.*/ - int *uncoded_fragis; - /*The number of uncoded fragments in each plane.*/ - int nuncoded_fragis[3]; - /*The list of coded macro blocks in the Y plane, in coded order.*/ - int *coded_mbis; - /*The number of coded macro blocks in the Y plane.*/ - int ncoded_mbis; - /*A copy of the image data used to fill the input pointers in each fragment. - If the data pointers or strides change, these input pointers must be - re-populated.*/ - th_ycbcr_buffer input; + ptrdiff_t ncoded_fragis[3]; + /*The total number of coded fragments.*/ + ptrdiff_t ntotal_coded_fragis; + /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ + int ref_frame_idx[4]; + /*The actual buffers used for the previously decoded frames.*/ + th_ycbcr_buffer ref_frame_bufs[4]; + /*The storage for the reference frame buffers.*/ + unsigned char *ref_frame_data[4]; + /*The strides for each plane in the reference frames.*/ + int ref_ystride[3]; /*The number of unique border patterns.*/ int nborders; - /*The storage for the border info for all border fragments. - This data is pointed to from the appropriate fragments.*/ + /*The unique border patterns for all border fragments. + The borderi field of fragments which straddle the border indexes this + list.*/ oc_border_info borders[16]; - /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ - int ref_frame_idx[3]; - /*The actual buffers used for the previously decoded frames.*/ - th_ycbcr_buffer ref_frame_bufs[3]; - /*The storage for the reference frame buffers.*/ - unsigned char *ref_frame_data; /*The frame number of the last keyframe.*/ ogg_int64_t keyframe_num; /*The frame number of the current frame.*/ @@ -343,14 +366,17 @@ struct oc_theora_state{ /*The granpos of the current frame.*/ ogg_int64_t granpos; /*The type of the current frame.*/ - int frame_type; - /*The quality indices of the current frame.*/ - int qis[3]; + unsigned char frame_type; + /*The bias to add to the frame count when computing granule positions.*/ + unsigned char granpos_bias; /*The number of quality indices used in the current frame.*/ - int nqis; - /*The dequantization tables.*/ - oc_quant_table *dequant_tables[2][3]; - oc_quant_tables dequant_table_data[2][3]; + unsigned char nqis; + /*The quality indices of the current frame.*/ + unsigned char qis[3]; + /*The dequantization tables, stored in zig-zag order, and indexed by + qi, pli, qti, and zzi.*/ + ogg_uint16_t *dequant_tables[64][3][2]; + OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]); /*Loop filter strength parameters.*/ unsigned char loop_filter_limits[64]; }; @@ -369,25 +395,20 @@ typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]); /*A map from the index in the zig zag scan to the coefficient number in a - block. - The extra 64 entries send out of bounds indexes to index 64. - This is used to safely ignore invalid zero runs when decoding - coefficients.*/ -extern const int OC_FZIG_ZAG[128]; + block.*/ +extern const unsigned char OC_FZIG_ZAG[128]; /*A map from the coefficient number in a block to its index in the zig zag scan.*/ -extern const int OC_IZIG_ZAG[64]; -/*The predictor frame to use for each macro block mode.*/ -extern const int OC_FRAME_FOR_MODE[OC_NMODES]; +extern const unsigned char OC_IZIG_ZAG[64]; /*A map from physical macro block ordering to bitstream macro block ordering within a super block.*/ -extern const int OC_MB_MAP[2][2]; -/*A list of the indices in the oc_mb.map array that can be valid for each of +extern const unsigned char OC_MB_MAP[2][2]; +/*A list of the indices in the oc_mb_map array that can be valid for each of the various chroma decimation types.*/ -extern const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; -/*The number of indices in the oc_mb.map array that can be valid for each of +extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; +/*The number of indices in the oc_mb_map array that can be valid for each of the various chroma decimation types.*/ -extern const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; +extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; /*A table of functions used to fill in the Cb,Cr plane motion vectors for a macro block when 4 different motion vectors are specified in the luma plane.*/ @@ -403,12 +424,7 @@ void oc_free_2d(void *_ptr); void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, const th_ycbcr_buffer _src); -int oc_dct_token_skip(int _token,int _extra_bits); - -int oc_frag_pred_dc(const oc_fragment *_frag, - const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]); - -int oc_state_init(oc_theora_state *_state,const th_info *_info); +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs); void oc_state_clear(oc_theora_state *_state); void oc_state_vtable_init_c(oc_theora_state *_state); void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, @@ -418,8 +434,8 @@ void oc_state_borders_fill(oc_theora_state *_state,int _refi); void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, th_ycbcr_buffer _img); int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby); -int oc_state_get_mv_offsets(oc_theora_state *_state,int *_offsets, - int _dx,int _dy,int _ystride,int _pli); +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy); int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv); void oc_state_loop_filter(oc_theora_state *_state,int _frame); @@ -429,39 +445,42 @@ int oc_state_dump_frame(const oc_theora_state *_state,int _frame, #endif /*Shared accelerated functions.*/ +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride); void oc_frag_recon_intra(const oc_theora_state *_state, - unsigned char *_dst,int _dst_ystride,const ogg_int16_t *_residue); -void oc_frag_recon_inter(const oc_theora_state *_state, - unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); + unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); void oc_frag_recon_inter2(const oc_theora_state *_state, - unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue); -void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli); -void oc_state_frag_recon(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); -void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end); + unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2, + int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); void oc_restore_fpu(const oc_theora_state *_state); /*Default pure-C implementations.*/ +void oc_frag_copy_c(unsigned char *_dst, + const unsigned char *_src,int _src_ystride); void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t *_residue); -void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); -void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue); -void oc_state_frag_copy_c(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli); -void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); -void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end); + const ogg_int16_t _residue[64]); +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); void oc_restore_fpu_c(void); /*We need a way to call a few encoder functions without introducing a link-time @@ -472,16 +491,15 @@ void oc_restore_fpu_c(void); We do a similar thing for the decoder in case we ever decide to split off a common base library.*/ typedef void (*oc_state_clear_func)(theora_state *_th); -typedef int (*oc_state_control_func)(theora_state *th,int req, - void *buf,size_t buf_sz); +typedef int (*oc_state_control_func)(theora_state *th,int _req, + void *_buf,size_t _buf_sz); typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th, ogg_int64_t _granulepos); typedef double (*oc_state_granule_time_func)(theora_state *_th, ogg_int64_t _granulepos); -typedef struct oc_state_dispatch_vtbl oc_state_dispatch_vtbl; -struct oc_state_dispatch_vtbl{ +struct oc_state_dispatch_vtable{ oc_state_clear_func clear; oc_state_control_func control; oc_state_granule_frame_func granule_frame; diff --git a/Engine/lib/libtheora/lib/mathops.c b/Engine/lib/libtheora/lib/mathops.c new file mode 100644 index 000000000..d3fb90919 --- /dev/null +++ b/Engine/lib/libtheora/lib/mathops.c @@ -0,0 +1,296 @@ +#include "mathops.h" +#include + +/*The fastest fallback strategy for platforms with fast multiplication appears + to be based on de Bruijn sequences~\cite{LP98}. + Tests confirmed this to be true even on an ARM11, where it is actually faster + than using the native clz instruction. + Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where + multiplication or table lookups are too expensive. + + @UNPUBLISHED{LP98, + author="Charles E. Leiserson and Harald Prokop", + title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", + month=Jun, + year=1998, + note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" + }*/ +#if !defined(OC_ILOG_NODEBRUIJN)&& \ + !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL +static const unsigned char OC_DEBRUIJN_IDX32[32]={ + 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, + 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 +}; +#endif + +int oc_ilog32(ogg_uint32_t _v){ +#if defined(OC_CLZ32) + return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v; +#else +/*On a Pentium M, this branchless version tested as the fastest version without + multiplications on 1,000,000,000 random 32-bit integers, edging out a + similar version with branches, and a 256-entry LUT version.*/ +# if defined(OC_ILOG_NODEBRUIJN) + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFU)<<4; + _v>>=m; + ret|=m; + m=(_v>0xFFU)<<3; + _v>>=m; + ret|=m; + m=(_v>0xFU)<<2; + _v>>=m; + ret|=m; + m=(_v>3)<<1; + _v>>=m; + ret|=m; + ret+=_v>1; + return ret; +/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ +# else + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v=(_v>>1)+1; + ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; + return ret; +# endif +#endif +} + +int oc_ilog64(ogg_int64_t _v){ +#if defined(OC_CLZ64) + return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v; +#else +# if defined(OC_ILOG_NODEBRUIJN) + ogg_uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(ogg_uint32_t)(_v>>m); + ret|=m; + m=(v>0xFFFFU)<<4; + v>>=m; + ret|=m; + m=(v>0xFFU)<<3; + v>>=m; + ret|=m; + m=(v>0xFU)<<2; + v>>=m; + ret|=m; + m=(v>3)<<1; + v>>=m; + ret|=m; + ret+=v>1; + return ret; +# else +/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ +# if LONG_MAX<9223372036854775807LL + ogg_uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(ogg_uint32_t)(_v>>m); + ret|=m; + v|=v>>1; + v|=v>>2; + v|=v>>4; + v|=v>>8; + v|=v>>16; + v=(v>>1)+1; + ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; + return ret; +/*Otherwise do it in one 64-bit operation.*/ +# else + static const unsigned char OC_DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v|=_v>>32; + _v=(_v>>1)+1; + ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; + return ret; +# endif +# endif +#endif +} + +/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/ +static const ogg_int64_t OC_ATANH_LOG2[32]={ + 0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL, + 0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL, + 0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL, + 0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL, + 0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL, + 0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL, + 0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL, + 0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL, + 0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL, + 0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL, + 0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL +}; + +/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/ +ogg_int64_t oc_bexp64(ogg_int64_t _z){ + ogg_int64_t w; + ogg_int64_t z; + int ipart; + ipart=(int)(_z>>57); + if(ipart<0)return 0; + if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL; + z=_z-OC_Q57(ipart); + if(z){ + ogg_int64_t mask; + long wlo; + int i; + /*C doesn't give us 64x64->128 muls, so we use CORDIC. + This is not particularly fast, but it's not being used in time-critical + code; it is very accurate.*/ + /*z is the fractional part of the log in Q62 format. + We need 1 bit of headroom since the magnitude can get larger than 1 + during the iteration, and a sign bit.*/ + z<<=5; + /*w is the exponential in Q61 format (since it also needs headroom and can + get as large as 2.0); we could get another bit if we dropped the sign, + but we'll recover that bit later anyway. + Ideally this should start out as + \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}} + but in order to guarantee convergence we have to repeat iterations 4, + 13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/ + w=0x26A3D0E401DD846DLL; + for(i=0;;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z-=OC_ATANH_LOG2[i]+mask^mask; + /*Repeat iteration 4.*/ + if(i>=3)break; + z<<=1; + } + for(;;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z-=OC_ATANH_LOG2[i]+mask^mask; + /*Repeat iteration 13.*/ + if(i>=12)break; + z<<=1; + } + for(;i<32;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1; + } + wlo=0; + /*Skip the remaining iterations unless we really require that much + precision. + We could have bailed out earlier for smaller iparts, but that would + require initializing w from a table, as the limit doesn't converge to + 61-bit precision until n=30.*/ + if(ipart>30){ + /*For these iterations, we just update the low bits, as the high bits + can't possibly be affected. + OC_ATANH_LOG2 has also converged (it actually did so one iteration + earlier, but that's no reason for an extra special case).*/ + for(;;i++){ + mask=-(z<0); + wlo+=(w>>i)+mask^mask; + z-=OC_ATANH_LOG2[31]+mask^mask; + /*Repeat iteration 40.*/ + if(i>=39)break; + z<<=1; + } + for(;i<61;i++){ + mask=-(z<0); + wlo+=(w>>i)+mask^mask; + z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1; + } + } + w=(w<<1)+wlo; + } + else w=(ogg_int64_t)1<<62; + if(ipart<62)w=(w>>61-ipart)+1>>1; + return w; +} + +/*Computes the binary logarithm of _w, returned in Q57 format.*/ +ogg_int64_t oc_blog64(ogg_int64_t _w){ + ogg_int64_t z; + int ipart; + if(_w<=0)return -1; + ipart=OC_ILOGNZ_64(_w)-1; + if(ipart>61)_w>>=ipart-61; + else _w<<=61-ipart; + z=0; + if(_w&_w-1){ + ogg_int64_t x; + ogg_int64_t y; + ogg_int64_t u; + ogg_int64_t mask; + int i; + /*C doesn't give us 64x64->128 muls, so we use CORDIC. + This is not particularly fast, but it's not being used in time-critical + code; it is very accurate.*/ + /*z is the fractional part of the log in Q61 format.*/ + /*x and y are the cosh() and sinh(), respectively, in Q61 format. + We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/ + x=_w+((ogg_int64_t)1<<61); + y=_w-((ogg_int64_t)1<<61); + for(i=0;i<4;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 4.*/ + for(i--;i<13;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 13.*/ + for(i--;i<32;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*OC_ATANH_LOG2 has converged.*/ + for(;i<40;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 40.*/ + for(i--;i<62;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + z=z+8>>4; + } + return OC_Q57(ipart)+z; +} diff --git a/Engine/lib/libtheora/lib/mathops.h b/Engine/lib/libtheora/lib/mathops.h new file mode 100644 index 000000000..efbc5377b --- /dev/null +++ b/Engine/lib/libtheora/lib/mathops.h @@ -0,0 +1,141 @@ +#if !defined(_mathops_H) +# define _mathops_H (1) +# include + +# ifdef __GNUC_PREREQ +# if __GNUC_PREREQ(3,4) +# include +/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from + "upgrading" the type of an entire expression to an (unsigned) size_t.*/ +# if INT_MAX>=2147483647 +# define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define OC_CLZ32(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=2147483647L +# define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define OC_CLZ32(_x) (__builtin_clzl(_x)) +# endif +# if INT_MAX>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clzl(_x)) +# elif LLONG_MAX>=9223372036854775807LL|| \ + __LONG_LONG_MAX__>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clzll(_x)) +# endif +# endif +# endif + + + +/** + * oc_ilog32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin + * function instead, which should be faster. + */ +int oc_ilog32(ogg_uint32_t _v); +/** + * oc_ilog64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin + * function instead, which should be faster. + */ +int oc_ilog64(ogg_int64_t _v); + + +# if defined(OC_CLZ32) +/** + * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value. + * @_v: A non-zero 32-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use OC_ILOG_32() instead. + */ +# define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v)) +/** + * OC_ILOG_32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define OC_ILOG_32(_v) (OC_ILOGNZ_32(_v)&-!!(_v)) +# else +# define OC_ILOGNZ_32(_v) (oc_ilog32(_v)) +# define OC_ILOG_32(_v) (oc_ilog32(_v)) +# endif + +# if defined(CLZ64) +/** + * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value. + * @_v: A non-zero 64-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use OC_ILOG_64() instead. + */ +# define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v)) +/** + * OC_ILOG_64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define OC_ILOG_64(_v) (OC_ILOGNZ_64(_v)&-!!(_v)) +# else +# define OC_ILOGNZ_64(_v) (oc_ilog64(_v)) +# define OC_ILOG_64(_v) (oc_ilog64(_v)) +# endif + +# define OC_STATIC_ILOG0(_v) (!!(_v)) +# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v)) +# define OC_STATIC_ILOG2(_v) \ + (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v)) +# define OC_STATIC_ILOG3(_v) \ + (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v)) +# define OC_STATIC_ILOG4(_v) \ + (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v)) +# define OC_STATIC_ILOG5(_v) \ + (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v)) +# define OC_STATIC_ILOG6(_v) \ + (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v)) +/** + * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. + * @_v: A non-negative 32-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v))) +/** + * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. + * @_v: A non-negative 64-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v))) + +#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57) + +ogg_int64_t oc_bexp64(ogg_int64_t _z); +ogg_int64_t oc_blog64(ogg_int64_t _w); + +#endif diff --git a/Engine/lib/libtheora/lib/mcenc.c b/Engine/lib/libtheora/lib/mcenc.c new file mode 100644 index 000000000..797e81f4f --- /dev/null +++ b/Engine/lib/libtheora/lib/mcenc.c @@ -0,0 +1,767 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id$ + + ********************************************************************/ +#include +#include +#include +#include "encint.h" + + + +typedef struct oc_mcenc_ctx oc_mcenc_ctx; + + + +/*Temporary state used for motion estimation.*/ +struct oc_mcenc_ctx{ + /*The candidate motion vectors.*/ + int candidates[13][2]; + /*The start of the Set B candidates.*/ + int setb0; + /*The total number of candidates.*/ + int ncandidates; +}; + + + +/*The maximum Y plane SAD value for accepting the median predictor.*/ +#define OC_YSAD_THRESH1 (256) +/*The amount to right shift the minimum error by when inflating it for + computing the second maximum Y plane SAD threshold.*/ +#define OC_YSAD_THRESH2_SCALE_BITS (4) +/*The amount to add to the second maximum Y plane threshold when inflating + it.*/ +#define OC_YSAD_THRESH2_OFFSET (64) + +/*The vector offsets in the X direction for each search site in the square + pattern.*/ +static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1}; +/*The vector offsets in the Y direction for each search site in the square + pattern.*/ +static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1}; +/*The number of sites to search for each boundary condition in the square + pattern. + Bit flags for the boundary conditions are as follows: + 1: -16==dx + 2: dx==15(.5) + 4: -16==dy + 8: dy==15(.5)*/ +static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3}; +/*The list of sites to search for each boundary condition in the square + pattern.*/ +static const int OC_SQUARE_SITES[11][8]={ + /* -15.5mb_info; + /*Skip a position to store the median predictor in.*/ + ncandidates=1; + if(embs[_mbi].ncneighbors>0){ + /*Fill in the first part of set A: the vectors from adjacent blocks.*/ + for(i=0;icandidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0]; + _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1]; + ncandidates++; + } + } + /*Add a few additional vectors to set A: the vectors used in the previous + frames and the (0,0) vector.*/ + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31); + ncandidates++; + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, + embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, + embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31); + ncandidates++; + _mcenc->candidates[ncandidates][0]=0; + _mcenc->candidates[ncandidates][1]=0; + ncandidates++; + /*Use the first three vectors of set A to find our best predictor: their + median.*/ + memcpy(a,_mcenc->candidates+1,sizeof(a)); + OC_SORT2I(a[0][0],a[1][0]); + OC_SORT2I(a[0][1],a[1][1]); + OC_SORT2I(a[1][0],a[2][0]); + OC_SORT2I(a[1][1],a[2][1]); + OC_SORT2I(a[0][0],a[1][0]); + OC_SORT2I(a[0][1],a[1][1]); + _mcenc->candidates[0][0]=a[1][0]; + _mcenc->candidates[0][1]=a[1][1]; + /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ + _mcenc->setb0=ncandidates; + /*The first time through the loop use the current macro block.*/ + nmbi=_mbi; + for(i=0;;i++){ + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, + 2*embs[_mbi].analysis_mv[1][_frame][0] + -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, + 2*embs[_mbi].analysis_mv[1][_frame][1] + -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31); + ncandidates++; + if(i>=embs[_mbi].npneighbors)break; + nmbi=embs[_mbi].pneighbors[i]; + } + /*Truncate to full-pel positions.*/ + for(i=0;icandidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]); + _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]); + } + _mcenc->ncandidates=ncandidates; +} + +#if 0 +static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], + int _mvoffset0,int _mvoffset1,const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _best_err){ + unsigned err; + int bi; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, + _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); + } + return err; +} +#endif + +static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], + int _mvoffset0,int _mvoffset1,const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _best_err){ + unsigned err; + int bi; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, + _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); + } + return err; +} + +static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _block_err[4]){ + unsigned err; + int mvoffset; + int bi; + mvoffset=_dx+_dy*_ystride; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + unsigned block_err; + frag_offs=_frag_buf_offs[_fragis[bi]]; + block_err=oc_enc_frag_sad(_enc, + _src+frag_offs,_ref+frag_offs+mvoffset,_ystride); + _block_err[bi]=block_err; + err+=block_err; + } + return err; +} + +static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + int mvoffset; + int err; + int bi; + mvoffset=_dx+_dy*_ystride; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_satd_thresh(_enc, + _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX); + } + return err; +} + +static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc, + ptrdiff_t _frag_offs,int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + return oc_enc_frag_satd_thresh(_enc, + _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX); +} + +/*Perform a motion vector search for this macro block against a single + reference frame. + As a bonus, individual block motion vectors are computed as well, as much of + the work can be shared. + The actual motion vector is stored in the appropriate place in the + oc_mb_enc_info structure. + _mcenc: The motion compensation context. + _accum: Drop frame/golden MV accumulators. + _mbi: The macro block index. + _frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/ +void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){ + /*Note: Traditionally this search is done using a rate-distortion objective + function of the form D+lambda*R. + However, xiphmont tested this and found it produced a small degredation, + while requiring extra computation. + This is most likely due to Theora's peculiar MV encoding scheme: MVs are + not coded relative to a predictor, and the only truly cheap way to use a + MV is in the LAST or LAST2 MB modes, which are not being considered here. + Therefore if we use the MV found here, it's only because both LAST and + LAST2 performed poorly, and therefore the MB is not likely to be uniform + or suffer from the aperture problem. + Furthermore we would like to re-use the MV found here for as many MBs as + possible, so picking a slightly sub-optimal vector to save a bit or two + may cause increased degredation in many blocks to come. + We could artificially reduce lambda to compensate, but it's faster to just + disable it entirely, and use D (the distortion) as the sole criterion.*/ + oc_mcenc_ctx mcenc; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + const unsigned char *src; + const unsigned char *ref; + int ystride; + oc_mb_enc_info *embs; + ogg_int32_t hit_cache[31]; + ogg_int32_t hitbit; + unsigned best_block_err[4]; + unsigned block_err[4]; + unsigned best_err; + int best_vec[2]; + int best_block_vec[4][2]; + int candx; + int candy; + int bi; + embs=_enc->mb_info; + /*Find some candidate motion vectors.*/ + oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame); + /*Clear the cache of locations we've examined.*/ + memset(hit_cache,0,sizeof(hit_cache)); + /*Start with the median predictor.*/ + candx=mcenc.candidates[0][0]; + candy=mcenc.candidates[0][1]; + hit_cache[candy+15]|=(ogg_int32_t)1<state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; + ystride=_enc->state.ref_ystride[0]; + /*TODO: customize error function for speed/(quality+size) tradeoff.*/ + best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, + frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); + best_vec[0]=candx; + best_vec[1]=candy; + if(_frame==OC_FRAME_PREV){ + for(bi=0;bi<4;bi++){ + best_block_err[bi]=block_err[bi]; + best_block_vec[bi][0]=candx; + best_block_vec[bi][1]=candy; + } + } + /*If this predictor fails, move on to set A.*/ + if(best_err>OC_YSAD_THRESH1){ + unsigned err; + unsigned t2; + int ncs; + int ci; + /*Compute the early termination threshold for set A.*/ + t2=embs[_mbi].error[_frame]; + ncs=OC_MINI(3,embs[_mbi].ncneighbors); + for(ci=0;ci>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET; + /*Examine the candidates in set A.*/ + for(ci=1;cit2){ + /*Examine the candidates in set B.*/ + for(;cit2){ + int best_site; + int nsites; + int sitei; + int site; + int b; + /*Square pattern search.*/ + for(;;){ + best_site=4; + /*Compose the bit flags for boundary conditions.*/ + b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1| + OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3; + nsites=OC_SQUARE_NSITES[b]; + for(sitei=0;sitei>=2; + for(bi=0;bi<4;bi++){ + if(best_block_err[bi]>t2){ + /*Square pattern search. + We do this in a slightly interesting manner. + We continue to check the SAD of all four blocks in the + macro block. + This gives us two things: + 1) We can continue to use the hit_cache to avoid duplicate + checks. + Otherwise we could continue to read it, but not write to it + without saving and restoring it for each block. + Note that we could still eliminate a large number of + duplicate checks by taking into account the site we came + from when choosing the site list. + We can still do that to avoid extra hit_cache queries, and + it might even be a speed win. + 2) It gives us a slightly better chance of escaping local + minima. + We would not be here if we weren't doing a fairly bad job + in finding a good vector, and checking these vectors can + save us from 100 to several thousand points off our SAD 1 + in 15 times. + TODO: Is this a good idea? + Who knows. + It needs more testing.*/ + for(;;){ + int bestx; + int besty; + int bj; + bestx=best_block_vec[bi][0]; + besty=best_block_vec[bi][1]; + /*Compose the bit flags for boundary conditions.*/ + b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1| + OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3; + nsites=OC_SQUARE_NSITES[b]; + for(sitei=0;siteimb_info[_mbi].analysis_mv; + if(_enc->prevframe_dropped){ + accum_p[0]=mvs[0][OC_FRAME_PREV][0]; + accum_p[1]=mvs[0][OC_FRAME_PREV][1]; + } + else accum_p[1]=accum_p[0]=0; + accum_g[0]=mvs[2][OC_FRAME_GOLD][0]; + accum_g[1]=mvs[2][OC_FRAME_GOLD][1]; + mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0]; + mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1]; + /*Move the motion vector predictors back a frame.*/ + memmove(mvs+1,mvs,2*sizeof(*mvs)); + /*Search the last frame.*/ + oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV); + mvs[2][OC_FRAME_PREV][0]=accum_p[0]; + mvs[2][OC_FRAME_PREV][1]=accum_p[1]; + /*GOLDEN MVs are different from PREV MVs in that they're each absolute + offsets from some frame in the past rather than relative offsets from the + frame before. + For predictor calculation to make sense, we need them to be in the same + form as PREV MVs.*/ + mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0]; + mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1]; + mvs[2][OC_FRAME_GOLD][0]-=accum_g[0]; + mvs[2][OC_FRAME_GOLD][1]-=accum_g[1]; + /*Search the golden frame.*/ + oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD); + /*Put GOLDEN MVs back into absolute offset form. + The newest MV is already an absolute offset.*/ + mvs[2][OC_FRAME_GOLD][0]+=accum_g[0]; + mvs[2][OC_FRAME_GOLD][1]+=accum_g[1]; + mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0]; + mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1]; +} + +#if 0 +static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi, + int _vec[2],int _best_err,int _frame){ + const unsigned char *src; + const unsigned char *ref; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + int offset_y[9]; + int ystride; + int mvoffset_base; + int best_site; + int sitei; + int err; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + ystride=_enc->state.ref_ystride[0]; + mvoffset_base=_vec[0]+_vec[1]*ystride; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); + err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis, + mvoffset0,mvoffset1,src,ref,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} +#endif + +static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc, + int _mbi,int _vec[2],unsigned _best_err,int _frame){ + const unsigned char *src; + const unsigned char *ref; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + int offset_y[9]; + int ystride; + int mvoffset_base; + int best_site; + int sitei; + int err; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + ystride=_enc->state.ref_ystride[0]; + mvoffset_base=_vec[0]+_vec[1]*ystride; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); + err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis, + mvoffset0,mvoffset1,src,ref,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} + +void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){ + oc_mb_enc_info *embs; + int vec[2]; + embs=_enc->mb_info; + vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]); + vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]); + embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc, + _mbi,vec,embs[_mbi].satd[_frame],_frame); + embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0]; + embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1]; +} + +#if 0 +static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc, + int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, + int _offset_y[9],unsigned _best_err){ + int mvoffset_base; + int best_site; + int sitei; + mvoffset_base=_vec[0]+_vec[1]*_ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + unsigned err; + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); + err=oc_enc_frag_sad2_thresh(_enc,_src, + _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} +#endif + +static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc, + int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, + int _offset_y[9],unsigned _best_err){ + int mvoffset_base; + int best_site; + int sitei; + mvoffset_base=_vec[0]+_vec[1]*_ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + unsigned err; + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); + err=oc_enc_frag_satd2_thresh(_enc,_src, + _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} + +void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){ + oc_mb_enc_info *embs; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + const unsigned char *src; + const unsigned char *ref; + int offset_y[9]; + int ystride; + int bi; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + embs=_enc->mb_info; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + int vec[2]; + frag_offs=frag_buf_offs[fragis[bi]]; + vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]); + vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]); + embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec, + src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]); + embs[_mbi].ref_mv[bi][0]=(signed char)vec[0]; + embs[_mbi].ref_mv[bi][1]=(signed char)vec[1]; + } +} diff --git a/Engine/lib/libtheora/lib/modedec.h b/Engine/lib/libtheora/lib/modedec.h new file mode 100644 index 000000000..ea12c64af --- /dev/null +++ b/Engine/lib/libtheora/lib/modedec.h @@ -0,0 +1,4027 @@ +/*File generated by libtheora with OC_COLLECT_METRICS defined at compile time.*/ +#if !defined(_modedec_H) +# define _modedec_H (1) + + + +# if defined(OC_COLLECT_METRICS) +typedef struct oc_mode_metrics oc_mode_metrics; +# endif +typedef struct oc_mode_rd oc_mode_rd; + + + +/*The number of extra bits of precision at which to store rate metrics.*/ +# define OC_BIT_SCALE (6) +/*The number of extra bits of precision at which to store RMSE metrics. + This must be at least half OC_BIT_SCALE (rounded up).*/ +# define OC_RMSE_SCALE (5) +/*The number of bins to partition statistics into.*/ +# define OC_SAD_BINS (24) +/*The number of bits of precision to drop from SAD scores to assign them to a + bin.*/ +# define OC_SAD_SHIFT (9) + + + +# if defined(OC_COLLECT_METRICS) +struct oc_mode_metrics{ + double fragw; + double satd; + double rate; + double rmse; + double satd2; + double satdrate; + double rate2; + double satdrmse; + double rmse2; +}; + + +int oc_has_mode_metrics; +oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS]; +# endif + + + +struct oc_mode_rd{ + ogg_int16_t rate; + ogg_int16_t rmse; +}; + + +# if !defined(OC_COLLECT_METRICS) +static const +# endif +oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={ + { + { + /*Y' qi=0 INTRA*/ + { + { 87, -66},{ 132, 1611},{ 197, 3474},{ 285, 5130}, + { 376, 6419},{ 450, 7545},{ 521, 8587},{ 600, 9587}, + { 689,10498},{ 790,11348},{ 899,12158},{ 1030,12855}, + { 1166,13459},{ 1276,14052},{ 1353,14732},{ 1444,15425}, + { 1535,16101},{ 1609,16856},{ 1697,17532},{ 1823,17995}, + { 1962,18426},{ 2085,18919},{ 2201,19503},{ 2304,20307} + }, + /*Y' qi=0 INTER*/ + { + { 32, -105},{ 40, 1268},{ 54, 2919},{ 91, 4559}, + { 118, 6244},{ 132, 7932},{ 142, 9514},{ 149,10989}, + { 155,12375},{ 161,13679},{ 168,14958},{ 176,16215}, + { 187,17431},{ 196,18623},{ 207,19790},{ 218,20941}, + { 230,22083},{ 246,23213},{ 265,24333},{ 292,25439}, + { 328,26512},{ 372,27538},{ 427,28522},{ 494,29479} + } + }, + { + /*Cb qi=0 INTRA*/ + { + { 1, 6},{ 27, 368},{ 52, 738},{ 67, 1171}, + { 80, 1642},{ 99, 2134},{ 110, 2642},{ 112, 3144}, + { 126, 3578},{ 154, 3967},{ 167, 4387},{ 172, 4839}, + { 191, 5278},{ 208, 5666},{ 220, 6036},{ 223, 6398}, + { 227, 6814},{ 253, 7157},{ 284, 7403},{ 292, 7699}, + { 314, 7983},{ 339, 8203},{ 363, 8460},{ 399, 8919} + }, + /*Cb qi=0 INTER*/ + { + { 68, -55},{ 63, 275},{ 58, 602},{ 53, 936}, + { 50, 1290},{ 54, 1691},{ 58, 2116},{ 62, 2553}, + { 67, 2992},{ 72, 3422},{ 78, 3843},{ 84, 4253}, + { 89, 4658},{ 94, 5062},{ 98, 5455},{ 100, 5848}, + { 102, 6231},{ 104, 6604},{ 104, 6982},{ 105, 7359}, + { 105, 7733},{ 104, 8104},{ 105, 8465},{ 111, 8828} + } + }, + { + /*Cr qi=0 INTRA*/ + { + { 1, 8},{ 23, 375},{ 47, 759},{ 63, 1220}, + { 71, 1693},{ 82, 2171},{ 94, 2652},{ 109, 3103}, + { 125, 3567},{ 133, 3995},{ 151, 4375},{ 168, 4819}, + { 174, 5244},{ 190, 5635},{ 215, 6005},{ 242, 6347}, + { 257, 6758},{ 280, 7068},{ 311, 7336},{ 326, 7652}, + { 346, 7968},{ 372, 8213},{ 388, 8515},{ 408, 9060} + }, + /*Cr qi=0 INTER*/ + { + { 69, 0},{ 60, 314},{ 49, 624},{ 45, 943}, + { 45, 1285},{ 49, 1691},{ 55, 2130},{ 62, 2560}, + { 71, 2973},{ 79, 3385},{ 85, 3800},{ 89, 4207}, + { 92, 4620},{ 95, 5037},{ 96, 5436},{ 97, 5839}, + { 98, 6252},{ 99, 6653},{ 99, 7038},{ 103, 7426}, + { 107, 7810},{ 108, 8178},{ 107, 8539},{ 106, 8937} + } + } + }, + { + { + /*Y' qi=1 INTRA*/ + { + { 81, -71},{ 133, 1610},{ 203, 3460},{ 296, 5083}, + { 392, 6342},{ 467, 7454},{ 541, 8486},{ 625, 9466}, + { 716,10352},{ 823,11181},{ 940,11961},{ 1074,12643}, + { 1211,13233},{ 1324,13807},{ 1408,14489},{ 1504,15167}, + { 1598,15824},{ 1679,16544},{ 1788,17161},{ 1928,17579}, + { 2070,17991},{ 2202,18456},{ 2324,19021},{ 2425,19894} + }, + /*Y' qi=1 INTER*/ + { + { 34, 4},{ 40, 1307},{ 55, 2914},{ 93, 4555}, + { 120, 6243},{ 134, 7912},{ 144, 9468},{ 152,10918}, + { 158,12275},{ 164,13569},{ 171,14846},{ 180,16098}, + { 191,17310},{ 204,18484},{ 216,19636},{ 228,20779}, + { 242,21912},{ 261,23036},{ 286,24146},{ 320,25221}, + { 363,26265},{ 418,27261},{ 485,28203},{ 551,29148} + } + }, + { + /*Cb qi=1 INTRA*/ + { + { 1, 6},{ 28, 367},{ 52, 738},{ 68, 1172}, + { 86, 1644},{ 106, 2135},{ 115, 2642},{ 119, 3141}, + { 132, 3569},{ 157, 3951},{ 172, 4366},{ 177, 4819}, + { 194, 5258},{ 211, 5638},{ 224, 6006},{ 233, 6367}, + { 236, 6784},{ 258, 7121},{ 299, 7357},{ 319, 7637}, + { 337, 7921},{ 358, 8141},{ 381, 8367},{ 401, 8768} + }, + /*Cb qi=1 INTER*/ + { + { 95, -31},{ 81, 295},{ 67, 614},{ 53, 953}, + { 48, 1305},{ 51, 1700},{ 56, 2125},{ 61, 2563}, + { 67, 3008},{ 73, 3435},{ 79, 3844},{ 85, 4251}, + { 90, 4663},{ 95, 5073},{ 98, 5458},{ 100, 5844}, + { 101, 6231},{ 102, 6606},{ 102, 6980},{ 103, 7347}, + { 104, 7726},{ 105, 8096},{ 105, 8453},{ 105, 8789} + } + }, + { + /*Cr qi=1 INTRA*/ + { + { 1, 8},{ 25, 375},{ 50, 759},{ 65, 1221}, + { 74, 1695},{ 86, 2172},{ 101, 2651},{ 117, 3101}, + { 129, 3561},{ 135, 3985},{ 153, 4368},{ 171, 4807}, + { 182, 5223},{ 202, 5608},{ 225, 5964},{ 251, 6300}, + { 271, 6697},{ 295, 6978},{ 324, 7235},{ 348, 7558}, + { 367, 7877},{ 394, 8101},{ 413, 8386},{ 409, 8945} + }, + /*Cr qi=1 INTER*/ + { + { 66, 11},{ 59, 323},{ 51, 631},{ 44, 949}, + { 44, 1292},{ 49, 1703},{ 56, 2140},{ 62, 2566}, + { 69, 2991},{ 77, 3397},{ 84, 3799},{ 89, 4211}, + { 93, 4634},{ 94, 5049},{ 95, 5444},{ 96, 5854}, + { 94, 6260},{ 95, 6640},{ 96, 7032},{ 101, 7423}, + { 104, 7790},{ 105, 8158},{ 109, 8527},{ 108, 8872} + } + } + }, + { + { + /*Y' qi=2 INTRA*/ + { + { 87, -72},{ 139, 1607},{ 213, 3426},{ 315, 4992}, + { 416, 6217},{ 495, 7315},{ 574, 8317},{ 666, 9265}, + { 763,10124},{ 875,10906},{ 1001,11654},{ 1147,12305}, + { 1289,12865},{ 1407,13424},{ 1503,14076},{ 1610,14724}, + { 1720,15342},{ 1815,16020},{ 1937,16579},{ 2084,16981}, + { 2236,17371},{ 2385,17779},{ 2536,18250},{ 2689,18931} + }, + /*Y' qi=2 INTER*/ + { + { 30, -2},{ 40, 1308},{ 57, 2921},{ 96, 4567}, + { 122, 6260},{ 136, 7902},{ 148, 9418},{ 156,10826}, + { 162,12157},{ 169,13448},{ 177,14709},{ 188,15938}, + { 200,17133},{ 213,18295},{ 228,19433},{ 245,20564}, + { 264,21685},{ 289,22790},{ 323,23876},{ 368,24916}, + { 427,25906},{ 499,26837},{ 585,27700},{ 680,28514} + } + }, + { + /*Cb qi=2 INTRA*/ + { + { 1, 6},{ 30, 367},{ 58, 738},{ 77, 1172}, + { 93, 1645},{ 111, 2137},{ 123, 2642},{ 126, 3133}, + { 136, 3553},{ 162, 3934},{ 178, 4352},{ 183, 4803}, + { 199, 5231},{ 220, 5596},{ 235, 5957},{ 245, 6314}, + { 256, 6718},{ 286, 7048},{ 320, 7285},{ 336, 7568}, + { 366, 7829},{ 387, 8045},{ 405, 8261},{ 445, 8550} + }, + /*Cb qi=2 INTER*/ + { + { 115, -61},{ 93, 277},{ 71, 609},{ 54, 963}, + { 49, 1329},{ 53, 1715},{ 58, 2138},{ 63, 2583}, + { 69, 3017},{ 75, 3442},{ 81, 3857},{ 88, 4263}, + { 93, 4667},{ 96, 5065},{ 101, 5451},{ 101, 5832}, + { 102, 6213},{ 103, 6593},{ 103, 6968},{ 104, 7336}, + { 104, 7710},{ 105, 8076},{ 106, 8440},{ 106, 8822} + } + }, + { + /*Cr qi=2 INTRA*/ + { + { 1, 8},{ 27, 375},{ 54, 759},{ 70, 1222}, + { 79, 1696},{ 89, 2173},{ 106, 2652},{ 123, 3098}, + { 135, 3553},{ 143, 3972},{ 161, 4348},{ 181, 4782}, + { 194, 5189},{ 213, 5565},{ 235, 5907},{ 266, 6229}, + { 286, 6618},{ 311, 6897},{ 339, 7152},{ 362, 7454}, + { 392, 7721},{ 416, 7946},{ 429, 8227},{ 458, 8540} + }, + /*Cr qi=2 INTER*/ + { + { 74, 20},{ 63, 330},{ 51, 635},{ 44, 942}, + { 47, 1287},{ 54, 1710},{ 59, 2147},{ 65, 2571}, + { 72, 2996},{ 79, 3413},{ 86, 3820},{ 91, 4230}, + { 93, 4642},{ 95, 5046},{ 95, 5442},{ 95, 5839}, + { 96, 6243},{ 97, 6641},{ 99, 7021},{ 101, 7396}, + { 103, 7764},{ 106, 8138},{ 109, 8507},{ 114, 8851} + } + } + }, + { + { + /*Y' qi=3 INTRA*/ + { + { 91, -67},{ 141, 1606},{ 219, 3405},{ 328, 4929}, + { 433, 6122},{ 515, 7209},{ 598, 8204},{ 693, 9145}, + { 796, 9986},{ 912,10756},{ 1045,11471},{ 1200,12079}, + { 1345,12640},{ 1471,13179},{ 1571,13809},{ 1678,14450}, + { 1798,15047},{ 1905,15701},{ 2043,16205},{ 2202,16569}, + { 2351,16971},{ 2501,17393},{ 2660,17851},{ 2825,18455} + }, + /*Y' qi=3 INTER*/ + { + { 53, -164},{ 38, 1314},{ 59, 2917},{ 99, 4563}, + { 124, 6253},{ 139, 7882},{ 150, 9375},{ 159,10749}, + { 166,12059},{ 173,13349},{ 183,14608},{ 194,15826}, + { 208,17003},{ 223,18150},{ 240,19287},{ 259,20411}, + { 284,21508},{ 317,22593},{ 359,23656},{ 414,24671}, + { 483,25634},{ 569,26519},{ 670,27332},{ 786,28072} + } + }, + { + /*Cb qi=3 INTRA*/ + { + { 1, 5},{ 31, 367},{ 58, 739},{ 78, 1173}, + { 96, 1645},{ 113, 2134},{ 125, 2638},{ 133, 3127}, + { 148, 3542},{ 171, 3915},{ 184, 4328},{ 192, 4776}, + { 209, 5197},{ 230, 5556},{ 245, 5909},{ 252, 6261}, + { 272, 6641},{ 304, 6942},{ 330, 7184},{ 342, 7477}, + { 380, 7736},{ 404, 7962},{ 428, 8151},{ 469, 8430} + }, + /*Cb qi=3 INTER*/ + { + { 86, -29},{ 72, 296},{ 58, 618},{ 46, 964}, + { 47, 1338},{ 51, 1743},{ 56, 2158},{ 63, 2594}, + { 69, 3035},{ 77, 3455},{ 84, 3859},{ 89, 4266}, + { 94, 4673},{ 98, 5074},{ 101, 5460},{ 101, 5842}, + { 101, 6217},{ 101, 6593},{ 102, 6964},{ 104, 7325}, + { 103, 7696},{ 103, 8056},{ 104, 8430},{ 103, 8792} + } + }, + { + /*Cr qi=3 INTRA*/ + { + { 1, 8},{ 27, 374},{ 56, 759},{ 74, 1221}, + { 83, 1696},{ 96, 2173},{ 113, 2650},{ 127, 3091}, + { 140, 3542},{ 151, 3960},{ 164, 4334},{ 188, 4764}, + { 208, 5144},{ 224, 5493},{ 250, 5841},{ 278, 6162}, + { 298, 6548},{ 334, 6816},{ 365, 7045},{ 388, 7343}, + { 419, 7613},{ 443, 7836},{ 455, 8105},{ 484, 8445} + }, + /*Cr qi=3 INTER*/ + { + { 76, 26},{ 65, 332},{ 53, 638},{ 45, 945}, + { 45, 1304},{ 53, 1725},{ 60, 2153},{ 68, 2584}, + { 74, 3007},{ 81, 3425},{ 87, 3844},{ 91, 4253}, + { 94, 4657},{ 95, 5061},{ 94, 5462},{ 94, 5856}, + { 95, 6250},{ 96, 6635},{ 97, 7014},{ 101, 7393}, + { 104, 7761},{ 106, 8137},{ 109, 8506},{ 111, 8823} + } + } + }, + { + { + /*Y' qi=4 INTRA*/ + { + { 80, -67},{ 143, 1603},{ 227, 3378},{ 344, 4861}, + { 454, 6026},{ 537, 7104},{ 626, 8089},{ 725, 9006}, + { 830, 9827},{ 950,10581},{ 1089,11270},{ 1257,11826}, + { 1409,12366},{ 1535,12912},{ 1640,13528},{ 1753,14173}, + { 1884,14756},{ 2007,15368},{ 2148,15852},{ 2307,16212}, + { 2464,16591},{ 2614,17019},{ 2785,17455},{ 2970,17963} + }, + /*Y' qi=4 INTER*/ + { + { 50, -145},{ 38, 1324},{ 61, 2921},{ 102, 4566}, + { 127, 6248},{ 142, 7845},{ 154, 9300},{ 163,10656}, + { 169,11965},{ 177,13246},{ 188,14495},{ 202,15702}, + { 218,16864},{ 236,18003},{ 256,19124},{ 278,20233}, + { 307,21330},{ 347,22398},{ 398,23437},{ 463,24429}, + { 546,25343},{ 649,26170},{ 767,26935},{ 888,27674} + } + }, + { + /*Cb qi=4 INTRA*/ + { + { 1, 5},{ 33, 367},{ 61, 739},{ 80, 1173}, + { 98, 1646},{ 114, 2136},{ 126, 2639},{ 137, 3124}, + { 152, 3535},{ 176, 3903},{ 194, 4307},{ 206, 4753}, + { 222, 5165},{ 242, 5508},{ 260, 5857},{ 272, 6205}, + { 294, 6559},{ 332, 6848},{ 356, 7104},{ 364, 7389}, + { 396, 7637},{ 415, 7878},{ 446, 8064},{ 506, 8294} + }, + /*Cb qi=4 INTER*/ + { + { 86, -15},{ 73, 308},{ 60, 627},{ 46, 967}, + { 47, 1343},{ 51, 1754},{ 56, 2183},{ 63, 2615}, + { 70, 3044},{ 79, 3459},{ 85, 3866},{ 90, 4276}, + { 94, 4686},{ 97, 5088},{ 100, 5467},{ 102, 5837}, + { 102, 6205},{ 101, 6569},{ 103, 6939},{ 104, 7317}, + { 105, 7690},{ 107, 8043},{ 107, 8394},{ 111, 8736} + } + }, + { + /*Cr qi=4 INTRA*/ + { + { 1, 7},{ 28, 375},{ 57, 759},{ 79, 1221}, + { 92, 1697},{ 105, 2174},{ 122, 2648},{ 135, 3085}, + { 146, 3530},{ 157, 3947},{ 171, 4316},{ 195, 4737}, + { 218, 5117},{ 239, 5445},{ 268, 5767},{ 295, 6074}, + { 315, 6460},{ 355, 6735},{ 392, 6933},{ 418, 7218}, + { 448, 7495},{ 471, 7688},{ 481, 7954},{ 504, 8313} + }, + /*Cr qi=4 INTER*/ + { + { 68, 28},{ 57, 334},{ 47, 639},{ 43, 953}, + { 48, 1314},{ 54, 1736},{ 59, 2169},{ 69, 2592}, + { 78, 3017},{ 84, 3434},{ 88, 3850},{ 92, 4260}, + { 95, 4663},{ 96, 5068},{ 95, 5455},{ 95, 5839}, + { 96, 6243},{ 97, 6626},{ 98, 7006},{ 101, 7390}, + { 104, 7755},{ 108, 8115},{ 111, 8471},{ 110, 8825} + } + } + }, + { + { + /*Y' qi=5 INTRA*/ + { + { 84, -69},{ 147, 1599},{ 237, 3350},{ 360, 4796}, + { 475, 5934},{ 562, 6992},{ 657, 7953},{ 765, 8837}, + { 874, 9641},{ 998,10384},{ 1146,11047},{ 1322,11572}, + { 1484,12076},{ 1617,12609},{ 1731,13203},{ 1856,13806}, + { 1995,14367},{ 2132,14936},{ 2289,15386},{ 2460,15721}, + { 2635,16066},{ 2802,16442},{ 2980,16805},{ 3177,17272} + }, + /*Y' qi=5 INTER*/ + { + { 38, -86},{ 37, 1349},{ 64, 2920},{ 105, 4563}, + { 129, 6236},{ 145, 7809},{ 158, 9236},{ 167,10572}, + { 174,11871},{ 182,13141},{ 195,14368},{ 212,15558}, + { 230,16706},{ 250,17828},{ 274,18944},{ 303,20041}, + { 342,21116},{ 394,22152},{ 460,23144},{ 543,24073}, + { 648,24919},{ 773,25673},{ 922,26323},{ 1084,26924} + } + }, + { + /*Cb qi=5 INTRA*/ + { + { 1, 5},{ 34, 367},{ 63, 739},{ 82, 1174}, + { 102, 1647},{ 119, 2137},{ 134, 2639},{ 145, 3121}, + { 161, 3529},{ 189, 3891},{ 207, 4290},{ 216, 4721}, + { 232, 5113},{ 258, 5455},{ 277, 5798},{ 294, 6124}, + { 322, 6427},{ 352, 6697},{ 370, 6982},{ 384, 7283}, + { 423, 7529},{ 448, 7766},{ 478, 7943},{ 527, 8151} + }, + /*Cb qi=5 INTER*/ + { + { 83, -49},{ 69, 284},{ 55, 611},{ 48, 961}, + { 49, 1355},{ 52, 1769},{ 58, 2191},{ 65, 2616}, + { 73, 3041},{ 80, 3460},{ 87, 3868},{ 92, 4276}, + { 95, 4682},{ 98, 5077},{ 100, 5459},{ 102, 5827}, + { 102, 6200},{ 102, 6568},{ 103, 6930},{ 103, 7303}, + { 104, 7672},{ 106, 8032},{ 106, 8391},{ 106, 8727} + } + }, + { + /*Cr qi=5 INTRA*/ + { + { 1, 8},{ 28, 375},{ 57, 760},{ 81, 1222}, + { 99, 1696},{ 111, 2175},{ 125, 2648},{ 140, 3079}, + { 152, 3520},{ 162, 3927},{ 179, 4294},{ 203, 4714}, + { 225, 5080},{ 254, 5389},{ 286, 5703},{ 318, 5997}, + { 342, 6364},{ 380, 6640},{ 416, 6837},{ 445, 7103}, + { 473, 7370},{ 497, 7562},{ 514, 7811},{ 549, 8148} + }, + /*Cr qi=5 INTER*/ + { + { 60, 6},{ 54, 323},{ 46, 638},{ 43, 958}, + { 45, 1329},{ 54, 1749},{ 61, 2175},{ 70, 2600}, + { 79, 3021},{ 85, 3437},{ 89, 3847},{ 93, 4254}, + { 95, 4660},{ 96, 5065},{ 95, 5456},{ 95, 5849}, + { 96, 6243},{ 96, 6621},{ 97, 6996},{ 101, 7366}, + { 104, 7722},{ 107, 8088},{ 111, 8448},{ 119, 8816} + } + } + }, + { + { + /*Y' qi=6 INTRA*/ + { + { 88, -69},{ 151, 1593},{ 251, 3294},{ 387, 4681}, + { 507, 5790},{ 601, 6837},{ 702, 7787},{ 813, 8648}, + { 927, 9427},{ 1059,10152},{ 1213,10787},{ 1399,11284}, + { 1568,11781},{ 1705,12312},{ 1823,12890},{ 1957,13482}, + { 2106,14036},{ 2249,14600},{ 2411,15042},{ 2588,15359}, + { 2772,15699},{ 2947,16062},{ 3127,16429},{ 3320,16849} + }, + /*Y' qi=6 INTER*/ + { + { 44, -80},{ 36, 1346},{ 69, 2919},{ 111, 4563}, + { 136, 6216},{ 154, 7746},{ 168, 9139},{ 178,10461}, + { 185,11747},{ 195,13007},{ 211,14229},{ 230,15408}, + { 250,16547},{ 274,17663},{ 302,18769},{ 339,19851}, + { 386,20907},{ 446,21933},{ 527,22884},{ 631,23746}, + { 760,24512},{ 914,25178},{ 1087,25758},{ 1278,26262} + } + }, + { + /*Cb qi=6 INTRA*/ + { + { 1, 4},{ 36, 367},{ 66, 739},{ 84, 1174}, + { 105, 1648},{ 126, 2139},{ 140, 2639},{ 149, 3116}, + { 164, 3523},{ 194, 3880},{ 217, 4271},{ 226, 4694}, + { 243, 5077},{ 270, 5407},{ 291, 5742},{ 310, 6061}, + { 340, 6340},{ 373, 6609},{ 394, 6890},{ 409, 7189}, + { 444, 7434},{ 469, 7652},{ 499, 7853},{ 559, 8135} + }, + /*Cb qi=6 INTER*/ + { + { 68, -46},{ 60, 291},{ 50, 623},{ 49, 971}, + { 50, 1357},{ 55, 1781},{ 61, 2211},{ 69, 2634}, + { 78, 3052},{ 86, 3466},{ 91, 3882},{ 95, 4292}, + { 98, 4691},{ 101, 5080},{ 102, 5458},{ 103, 5830}, + { 103, 6192},{ 104, 6554},{ 104, 6916},{ 106, 7278}, + { 108, 7641},{ 110, 8004},{ 112, 8371},{ 112, 8758} + } + }, + { + /*Cr qi=6 INTRA*/ + { + { 1, 8},{ 29, 375},{ 59, 760},{ 84, 1223}, + { 99, 1698},{ 112, 2176},{ 129, 2647},{ 143, 3076}, + { 156, 3510},{ 168, 3906},{ 189, 4269},{ 220, 4682}, + { 241, 5047},{ 266, 5342},{ 299, 5649},{ 331, 5954}, + { 357, 6309},{ 393, 6579},{ 431, 6765},{ 467, 6997}, + { 501, 7276},{ 520, 7488},{ 525, 7749},{ 548, 8146} + }, + /*Cr qi=6 INTER*/ + { + { 94, 31},{ 69, 335},{ 47, 641},{ 43, 967}, + { 50, 1350},{ 57, 1772},{ 65, 2197},{ 74, 2625}, + { 83, 3043},{ 90, 3454},{ 94, 3867},{ 97, 4273}, + { 98, 4671},{ 99, 5068},{ 99, 5461},{ 98, 5857}, + { 98, 6245},{ 99, 6610},{ 103, 6975},{ 105, 7345}, + { 108, 7712},{ 111, 8073},{ 113, 8415},{ 119, 8768} + } + } + }, + { + { + /*Y' qi=7 INTRA*/ + { + { 92, -70},{ 156, 1590},{ 261, 3267},{ 403, 4618}, + { 529, 5704},{ 628, 6730},{ 736, 7657},{ 856, 8491}, + { 978, 9246},{ 1118, 9943},{ 1281,10550},{ 1472,11028}, + { 1645,11507},{ 1793,12008},{ 1924,12565},{ 2067,13130}, + { 2229,13638},{ 2388,14160},{ 2558,14584},{ 2744,14886}, + { 2932,15194},{ 3116,15531},{ 3311,15858},{ 3538,16197} + }, + /*Y' qi=7 INTER*/ + { + { 43, -8},{ 36, 1351},{ 71, 2923},{ 112, 4568}, + { 138, 6201},{ 157, 7705},{ 171, 9083},{ 181,10390}, + { 189,11664},{ 202,12910},{ 220,14121},{ 241,15281}, + { 266,16401},{ 295,17507},{ 328,18608},{ 371,19677}, + { 430,20701},{ 508,21676},{ 604,22588},{ 727,23397}, + { 878,24093},{ 1055,24690},{ 1263,25151},{ 1496,25504} + } + }, + { + /*Cb qi=7 INTRA*/ + { + { 1, 5},{ 40, 367},{ 72, 740},{ 89, 1175}, + { 108, 1649},{ 129, 2140},{ 143, 2637},{ 154, 3110}, + { 169, 3507},{ 198, 3860},{ 224, 4237},{ 235, 4652}, + { 253, 5037},{ 282, 5358},{ 307, 5674},{ 329, 5986}, + { 361, 6273},{ 393, 6527},{ 419, 6777},{ 435, 7078}, + { 467, 7342},{ 495, 7554},{ 529, 7757},{ 591, 8053} + }, + /*Cb qi=7 INTER*/ + { + { 79, -33},{ 68, 299},{ 56, 627},{ 50, 978}, + { 51, 1366},{ 55, 1786},{ 61, 2213},{ 70, 2642}, + { 80, 3062},{ 87, 3474},{ 92, 3886},{ 96, 4292}, + { 99, 4684},{ 102, 5072},{ 103, 5450},{ 104, 5814}, + { 104, 6176},{ 104, 6538},{ 107, 6905},{ 110, 7270}, + { 110, 7625},{ 110, 7978},{ 111, 8340},{ 117, 8674} + } + }, + { + /*Cr qi=7 INTRA*/ + { + { 2, 7},{ 31, 375},{ 62, 760},{ 87, 1223}, + { 103, 1698},{ 115, 2175},{ 131, 2644},{ 147, 3066}, + { 161, 3494},{ 175, 3889},{ 199, 4250},{ 229, 4653}, + { 250, 5001},{ 279, 5275},{ 311, 5577},{ 343, 5889}, + { 376, 6227},{ 417, 6486},{ 457, 6689},{ 484, 6925}, + { 518, 7174},{ 544, 7393},{ 549, 7662},{ 577, 8050} + }, + /*Cr qi=7 INTER*/ + { + { 89, 22},{ 62, 332},{ 45, 641},{ 47, 976}, + { 52, 1363},{ 59, 1779},{ 67, 2203},{ 76, 2628}, + { 84, 3046},{ 90, 3460},{ 94, 3875},{ 98, 4272}, + { 99, 4666},{ 98, 5063},{ 98, 5459},{ 98, 5849}, + { 99, 6226},{ 101, 6594},{ 104, 6957},{ 109, 7324}, + { 109, 7686},{ 111, 8042},{ 115, 8379},{ 119, 8699} + } + } + }, + { + { + /*Y' qi=8 INTRA*/ + { + { 91, -69},{ 160, 1585},{ 274, 3226},{ 423, 4538}, + { 557, 5596},{ 664, 6595},{ 778, 7506},{ 905, 8319}, + { 1038, 9035},{ 1186, 9701},{ 1355,10292},{ 1554,10754}, + { 1739,11196},{ 1904,11639},{ 2047,12184},{ 2194,12763}, + { 2361,13256},{ 2529,13753},{ 2709,14155},{ 2902,14433}, + { 3100,14723},{ 3292,15026},{ 3489,15327},{ 3714,15705} + }, + /*Y' qi=8 INTER*/ + { + { 32, -157},{ 33, 1346},{ 74, 2914},{ 116, 4554}, + { 142, 6172},{ 162, 7648},{ 177, 9004},{ 186,10300}, + { 196,11570},{ 210,12808},{ 231,14001},{ 256,15150}, + { 285,16259},{ 319,17352},{ 359,18435},{ 415,19475}, + { 489,20470},{ 584,21400},{ 703,22246},{ 852,22968}, + { 1038,23556},{ 1253,24032},{ 1503,24367},{ 1778,24628} + } + }, + { + /*Cb qi=8 INTRA*/ + { + { 1, 4},{ 42, 367},{ 75, 740},{ 93, 1176}, + { 111, 1649},{ 128, 2139},{ 144, 2635},{ 157, 3103}, + { 174, 3494},{ 206, 3844},{ 233, 4207},{ 251, 4605}, + { 277, 4980},{ 304, 5284},{ 335, 5584},{ 359, 5888}, + { 393, 6152},{ 432, 6398},{ 455, 6656},{ 471, 6956}, + { 502, 7193},{ 528, 7405},{ 562, 7630},{ 603, 7922} + }, + /*Cb qi=8 INTER*/ + { + { 77, -37},{ 68, 299},{ 58, 632},{ 50, 991}, + { 50, 1382},{ 55, 1799},{ 62, 2226},{ 73, 2647}, + { 82, 3066},{ 90, 3480},{ 94, 3891},{ 96, 4296}, + { 98, 4687},{ 101, 5073},{ 103, 5456},{ 104, 5817}, + { 105, 6170},{ 106, 6523},{ 107, 6886},{ 108, 7250}, + { 109, 7600},{ 110, 7955},{ 111, 8305},{ 112, 8641} + } + }, + { + /*Cr qi=8 INTRA*/ + { + { 2, 7},{ 33, 375},{ 64, 760},{ 92, 1224}, + { 111, 1700},{ 122, 2173},{ 137, 2637},{ 156, 3055}, + { 172, 3476},{ 186, 3856},{ 211, 4211},{ 242, 4597}, + { 263, 4939},{ 292, 5214},{ 335, 5489},{ 376, 5772}, + { 406, 6099},{ 440, 6378},{ 483, 6578},{ 517, 6797}, + { 550, 7049},{ 571, 7283},{ 583, 7560},{ 618, 7967} + }, + /*Cr qi=8 INTER*/ + { + { 74, 25},{ 58, 328},{ 43, 637},{ 45, 980}, + { 51, 1371},{ 59, 1788},{ 69, 2207},{ 79, 2630}, + { 86, 3051},{ 91, 3470},{ 95, 3880},{ 97, 4280}, + { 98, 4680},{ 97, 5074},{ 96, 5456},{ 97, 5839}, + { 99, 6219},{ 101, 6583},{ 103, 6945},{ 106, 7312}, + { 110, 7671},{ 114, 8009},{ 115, 8345},{ 117, 8686} + } + } + }, + { + { + /*Y' qi=9 INTRA*/ + { + { 104, -68},{ 164, 1580},{ 288, 3173},{ 448, 4439}, + { 587, 5485},{ 702, 6465},{ 824, 7351},{ 958, 8148}, + { 1096, 8845},{ 1253, 9480},{ 1432,10047},{ 1640,10494}, + { 1835,10926},{ 2015,11350},{ 2166,11871},{ 2321,12428}, + { 2508,12876},{ 2684,13345},{ 2866,13741},{ 3069,13991}, + { 3281,14243},{ 3487,14518},{ 3689,14813},{ 3911,15175} + }, + /*Y' qi=9 INTER*/ + { + { 47, -140},{ 34, 1348},{ 77, 2915},{ 119, 4552}, + { 145, 6150},{ 166, 7600},{ 182, 8936},{ 192,10221}, + { 203,11482},{ 220,12711},{ 244,13886},{ 274,15012}, + { 308,16111},{ 349,17190},{ 401,18244},{ 470,19257}, + { 561,20209},{ 680,21069},{ 830,21822},{ 1010,22463}, + { 1227,22971},{ 1482,23328},{ 1769,23544},{ 2077,23655} + } + }, + { + /*Cb qi=9 INTRA*/ + { + { 1, 5},{ 43, 367},{ 76, 740},{ 95, 1176}, + { 114, 1649},{ 135, 2138},{ 153, 2629},{ 165, 3091}, + { 184, 3481},{ 217, 3831},{ 244, 4187},{ 260, 4572}, + { 290, 4930},{ 320, 5231},{ 351, 5521},{ 379, 5812}, + { 414, 6055},{ 452, 6307},{ 483, 6564},{ 502, 6848}, + { 525, 7115},{ 554, 7321},{ 589, 7533},{ 626, 7833} + }, + /*Cb qi=9 INTER*/ + { + { 101, -43},{ 81, 298},{ 62, 637},{ 49, 989}, + { 51, 1381},{ 56, 1806},{ 65, 2231},{ 74, 2653}, + { 84, 3071},{ 91, 3482},{ 95, 3892},{ 97, 4293}, + { 99, 4684},{ 101, 5066},{ 103, 5437},{ 103, 5793}, + { 103, 6148},{ 104, 6511},{ 105, 6867},{ 107, 7221}, + { 110, 7572},{ 111, 7926},{ 112, 8283},{ 116, 8625} + } + }, + { + /*Cr qi=9 INTRA*/ + { + { 2, 7},{ 35, 375},{ 66, 761},{ 93, 1224}, + { 112, 1700},{ 126, 2173},{ 144, 2633},{ 165, 3047}, + { 183, 3458},{ 199, 3835},{ 224, 4191},{ 257, 4558}, + { 283, 4887},{ 309, 5176},{ 351, 5446},{ 397, 5713}, + { 433, 6017},{ 469, 6283},{ 508, 6480},{ 546, 6687}, + { 579, 6945},{ 600, 7182},{ 610, 7434},{ 623, 7793} + }, + /*Cr qi=9 INTER*/ + { + { 77, 15},{ 57, 330},{ 45, 640},{ 48, 980}, + { 54, 1380},{ 61, 1802},{ 70, 2220},{ 80, 2639}, + { 87, 3057},{ 92, 3474},{ 94, 3882},{ 98, 4282}, + { 98, 4675},{ 97, 5062},{ 97, 5450},{ 98, 5829}, + { 100, 6197},{ 101, 6561},{ 104, 6927},{ 107, 7289}, + { 113, 7638},{ 117, 7978},{ 119, 8311},{ 117, 8629} + } + } + }, + { + { + /*Y' qi=10 INTRA*/ + { + { 101, -69},{ 168, 1574},{ 299, 3143},{ 465, 4386}, + { 610, 5410},{ 736, 6353},{ 866, 7207},{ 1006, 7982}, + { 1153, 8655},{ 1319, 9261},{ 1504, 9812},{ 1719,10248}, + { 1928,10653},{ 2116,11056},{ 2282,11550},{ 2458,12070}, + { 2654,12492},{ 2846,12923},{ 3043,13291},{ 3249,13537}, + { 3466,13764},{ 3682,13999},{ 3896,14268},{ 4145,14548} + }, + /*Y' qi=10 INTER*/ + { + { 48, -94},{ 34, 1355},{ 81, 2920},{ 124, 4545}, + { 151, 6113},{ 174, 7532},{ 190, 8850},{ 201,10125}, + { 214,11379},{ 235,12591},{ 264,13745},{ 299,14859}, + { 338,15948},{ 388,17008},{ 456,18029},{ 546,18988}, + { 661,19877},{ 808,20666},{ 993,21321},{ 1218,21835}, + { 1481,22203},{ 1783,22420},{ 2117,22504},{ 2469,22481} + } + }, + { + /*Cb qi=10 INTRA*/ + { + { 2, 4},{ 44, 367},{ 79, 740},{ 99, 1178}, + { 117, 1652},{ 137, 2141},{ 156, 2630},{ 170, 3089}, + { 192, 3474},{ 227, 3813},{ 259, 4157},{ 282, 4526}, + { 310, 4860},{ 342, 5140},{ 377, 5425},{ 400, 5714}, + { 436, 5952},{ 475, 6194},{ 496, 6468},{ 522, 6748}, + { 559, 6996},{ 587, 7216},{ 617, 7433},{ 673, 7678} + }, + /*Cb qi=10 INTER*/ + { + { 87, -37},{ 72, 301},{ 58, 636},{ 49, 995}, + { 51, 1394},{ 57, 1819},{ 66, 2241},{ 78, 2660}, + { 87, 3074},{ 93, 3482},{ 97, 3891},{ 99, 4294}, + { 101, 4678},{ 103, 5050},{ 105, 5414},{ 106, 5773}, + { 107, 6134},{ 108, 6485},{ 110, 6832},{ 113, 7187}, + { 113, 7547},{ 114, 7887},{ 117, 8230},{ 112, 8590} + } + }, + { + /*Cr qi=10 INTRA*/ + { + { 2, 7},{ 38, 375},{ 69, 761},{ 96, 1224}, + { 116, 1701},{ 131, 2175},{ 148, 2634},{ 168, 3041}, + { 190, 3439},{ 211, 3802},{ 238, 4151},{ 271, 4506}, + { 297, 4824},{ 331, 5103},{ 373, 5360},{ 415, 5632}, + { 459, 5928},{ 500, 6176},{ 535, 6386},{ 573, 6586}, + { 608, 6834},{ 629, 7079},{ 642, 7337},{ 686, 7680} + }, + /*Cr qi=10 INTER*/ + { + { 81, 34},{ 63, 333},{ 50, 633},{ 48, 987}, + { 53, 1397},{ 61, 1820},{ 71, 2237},{ 83, 2651}, + { 91, 3065},{ 95, 3479},{ 98, 3882},{ 100, 4279}, + { 101, 4673},{ 101, 5054},{ 100, 5429},{ 101, 5801}, + { 102, 6173},{ 104, 6541},{ 108, 6904},{ 110, 7264}, + { 114, 7609},{ 119, 7945},{ 123, 8275},{ 128, 8615} + } + } + }, + { + { + /*Y' qi=11 INTRA*/ + { + { 110, -66},{ 176, 1564},{ 316, 3087},{ 492, 4296}, + { 645, 5299},{ 781, 6217},{ 924, 7039},{ 1075, 7776}, + { 1232, 8421},{ 1410, 9005},{ 1607, 9532},{ 1834, 9929}, + { 2053,10300},{ 2249,10697},{ 2427,11184},{ 2619,11682}, + { 2826,12083},{ 3019,12508},{ 3225,12869},{ 3452,13064}, + { 3670,13280},{ 3890,13519},{ 4123,13750},{ 4367,14059} + }, + /*Y' qi=11 INTER*/ + { + { 72, -115},{ 32, 1354},{ 83, 2911},{ 126, 4534}, + { 154, 6080},{ 178, 7475},{ 194, 8779},{ 205,10047}, + { 222,11290},{ 246,12488},{ 281,13621},{ 322,14714}, + { 372,15786},{ 436,16821},{ 519,17813},{ 628,18728}, + { 770,19549},{ 950,20254},{ 1175,20800},{ 1443,21197}, + { 1752,21446},{ 2095,21555},{ 2457,21553},{ 2808,21544} + } + }, + { + /*Cb qi=11 INTRA*/ + { + { 2, 4},{ 45, 367},{ 81, 740},{ 101, 1177}, + { 121, 1650},{ 142, 2136},{ 159, 2621},{ 174, 3075}, + { 199, 3451},{ 234, 3778},{ 265, 4117},{ 297, 4473}, + { 333, 4789},{ 367, 5054},{ 402, 5319},{ 427, 5613}, + { 462, 5871},{ 503, 6107},{ 532, 6336},{ 560, 6584}, + { 601, 6842},{ 631, 7092},{ 662, 7292},{ 721, 7497} + }, + /*Cb qi=11 INTER*/ + { + { 117, -24},{ 93, 308},{ 69, 638},{ 52, 993}, + { 52, 1395},{ 58, 1822},{ 68, 2246},{ 80, 2665}, + { 89, 3082},{ 94, 3492},{ 96, 3900},{ 98, 4299}, + { 101, 4679},{ 103, 5047},{ 104, 5405},{ 106, 5763}, + { 106, 6120},{ 107, 6474},{ 109, 6823},{ 112, 7163}, + { 115, 7516},{ 117, 7868},{ 118, 8213},{ 119, 8561} + } + }, + { + /*Cr qi=11 INTRA*/ + { + { 2, 7},{ 40, 375},{ 75, 761},{ 100, 1224}, + { 119, 1700},{ 137, 2169},{ 154, 2622},{ 178, 3025}, + { 198, 3416},{ 220, 3770},{ 255, 4114},{ 294, 4459}, + { 323, 4756},{ 359, 5028},{ 399, 5292},{ 438, 5556}, + { 483, 5827},{ 518, 6073},{ 551, 6298},{ 598, 6501}, + { 634, 6754},{ 652, 6997},{ 670, 7211},{ 689, 7560} + }, + /*Cr qi=11 INTER*/ + { + { 75, 30},{ 61, 334},{ 51, 639},{ 49, 995}, + { 53, 1403},{ 62, 1821},{ 73, 2237},{ 84, 2654}, + { 91, 3070},{ 95, 3485},{ 96, 3890},{ 98, 4287}, + { 98, 4672},{ 99, 5050},{ 99, 5427},{ 100, 5798}, + { 103, 6169},{ 105, 6528},{ 107, 6881},{ 113, 7233}, + { 118, 7580},{ 121, 7916},{ 125, 8240},{ 130, 8551} + } + } + }, + { + { + /*Y' qi=12 INTRA*/ + { + { 104, -69},{ 182, 1557},{ 335, 3040},{ 521, 4205}, + { 684, 5178},{ 831, 6068},{ 986, 6854},{ 1151, 7559}, + { 1323, 8169},{ 1523, 8704},{ 1736, 9192},{ 1978, 9558}, + { 2213, 9908},{ 2421,10298},{ 2613,10757},{ 2822,11208}, + { 3042,11585},{ 3250,11991},{ 3474,12308},{ 3710,12480}, + { 3939,12687},{ 4174,12902},{ 4416,13102},{ 4672,13369} + }, + /*Y' qi=12 INTER*/ + { + { 52, -91},{ 34, 1355},{ 86, 2911},{ 129, 4518}, + { 159, 6037},{ 184, 7405},{ 200, 8694},{ 213, 9955}, + { 232,11185},{ 263,12360},{ 304,13479},{ 354,14555}, + { 415,15601},{ 495,16608},{ 601,17549},{ 738,18400}, + { 915,19136},{ 1139,19724},{ 1414,20150},{ 1731,20412}, + { 2090,20520},{ 2473,20509},{ 2851,20442},{ 3227,20328} + } + }, + { + /*Cb qi=12 INTRA*/ + { + { 1, 4},{ 46, 367},{ 85, 740},{ 109, 1178}, + { 126, 1650},{ 145, 2134},{ 165, 2617},{ 182, 3061}, + { 209, 3428},{ 245, 3749},{ 281, 4077},{ 316, 4417}, + { 354, 4718},{ 392, 4970},{ 430, 5217},{ 456, 5501}, + { 490, 5771},{ 534, 5996},{ 571, 6207},{ 600, 6458}, + { 644, 6697},{ 675, 6942},{ 707, 7151},{ 766, 7342} + }, + /*Cb qi=12 INTER*/ + { + { 84, -24},{ 73, 311},{ 60, 644},{ 52, 998}, + { 53, 1398},{ 60, 1825},{ 71, 2249},{ 83, 2665}, + { 90, 3081},{ 94, 3490},{ 97, 3893},{ 99, 4286}, + { 102, 4663},{ 104, 5032},{ 105, 5393},{ 106, 5751}, + { 107, 6102},{ 108, 6445},{ 111, 6788},{ 113, 7136}, + { 114, 7483},{ 117, 7828},{ 121, 8163},{ 122, 8496} + } + }, + { + /*Cr qi=12 INTRA*/ + { + { 3, 7},{ 41, 375},{ 78, 761},{ 106, 1225}, + { 124, 1700},{ 140, 2167},{ 163, 2616},{ 188, 3010}, + { 213, 3385},{ 240, 3718},{ 271, 4062},{ 309, 4406}, + { 345, 4691},{ 387, 4956},{ 430, 5212},{ 469, 5467}, + { 513, 5729},{ 554, 5970},{ 587, 6176},{ 633, 6395}, + { 673, 6659},{ 692, 6868},{ 712, 7061},{ 758, 7259} + }, + /*Cr qi=12 INTER*/ + { + { 73, 31},{ 59, 335},{ 48, 638},{ 50, 998}, + { 56, 1410},{ 65, 1827},{ 75, 2240},{ 85, 2657}, + { 92, 3073},{ 95, 3485},{ 97, 3888},{ 99, 4279}, + { 98, 4663},{ 99, 5042},{ 101, 5412},{ 102, 5779}, + { 105, 6142},{ 107, 6498},{ 108, 6848},{ 113, 7198}, + { 118, 7540},{ 121, 7867},{ 127, 8188},{ 132, 8508} + } + } + }, + { + { + /*Y' qi=13 INTRA*/ + { + { 109, -68},{ 187, 1551},{ 347, 3010},{ 541, 4153}, + { 709, 5107},{ 864, 5975},{ 1026, 6745},{ 1194, 7433}, + { 1375, 8021},{ 1581, 8550},{ 1803, 9026},{ 2054, 9371}, + { 2301, 9713},{ 2522,10082},{ 2728,10515},{ 2949,10956}, + { 3184,11297},{ 3408,11653},{ 3643,11946},{ 3886,12100}, + { 4124,12277},{ 4377,12459},{ 4632,12635},{ 4898,12861} + }, + /*Y' qi=13 INTER*/ + { + { 48, -78},{ 35, 1357},{ 89, 2914},{ 133, 4512}, + { 164, 6004},{ 190, 7348},{ 207, 8627},{ 222, 9881}, + { 247,11096},{ 284,12251},{ 333,13350},{ 392,14407}, + { 466,15426},{ 565,16391},{ 696,17279},{ 865,18058}, + { 1085,18689},{ 1358,19156},{ 1684,19456},{ 2050,19605}, + { 2447,19614},{ 2855,19524},{ 3243,19398},{ 3611,19201} + } + }, + { + /*Cb qi=13 INTRA*/ + { + { 2, 4},{ 47, 367},{ 86, 741},{ 108, 1179}, + { 127, 1651},{ 150, 2133},{ 173, 2611},{ 194, 3050}, + { 222, 3417},{ 262, 3733},{ 303, 4048},{ 337, 4375}, + { 378, 4657},{ 420, 4897},{ 456, 5148},{ 486, 5422}, + { 518, 5682},{ 558, 5903},{ 592, 6113},{ 623, 6372}, + { 662, 6628},{ 700, 6833},{ 751, 6989},{ 805, 7147} + }, + /*Cb qi=13 INTER*/ + { + { 94, -34},{ 78, 303},{ 60, 638},{ 51, 994}, + { 54, 1406},{ 61, 1836},{ 73, 2253},{ 84, 2668}, + { 92, 3082},{ 96, 3492},{ 99, 3894},{ 101, 4284}, + { 103, 4659},{ 105, 5023},{ 106, 5376},{ 108, 5726}, + { 109, 6070},{ 110, 6418},{ 113, 6765},{ 117, 7105}, + { 119, 7448},{ 122, 7784},{ 126, 8119},{ 131, 8463} + } + }, + { + /*Cr qi=13 INTRA*/ + { + { 3, 7},{ 43, 375},{ 80, 762},{ 110, 1226}, + { 131, 1701},{ 149, 2166},{ 172, 2610},{ 196, 2999}, + { 221, 3359},{ 254, 3679},{ 292, 4005},{ 332, 4329}, + { 369, 4612},{ 408, 4880},{ 456, 5139},{ 500, 5388}, + { 544, 5631},{ 581, 5877},{ 615, 6101},{ 660, 6316}, + { 692, 6594},{ 714, 6795},{ 736, 6997},{ 789, 7290} + }, + /*Cr qi=13 INTER*/ + { + { 73, 28},{ 61, 336},{ 46, 642},{ 50, 1003}, + { 58, 1414},{ 67, 1832},{ 79, 2245},{ 87, 2660}, + { 93, 3075},{ 97, 3484},{ 99, 3888},{ 100, 4277}, + { 100, 4651},{ 100, 5027},{ 101, 5403},{ 102, 5765}, + { 105, 6116},{ 109, 6470},{ 113, 6825},{ 119, 7163}, + { 124, 7497},{ 127, 7827},{ 131, 8137},{ 135, 8437} + } + } + }, + { + { + /*Y' qi=14 INTRA*/ + { + { 113, -68},{ 191, 1545},{ 358, 2981},{ 559, 4104}, + { 733, 5044},{ 896, 5890},{ 1066, 6636},{ 1241, 7304}, + { 1428, 7886},{ 1642, 8402},{ 1872, 8871},{ 2128, 9219}, + { 2380, 9547},{ 2609, 9908},{ 2825,10321},{ 3055,10728}, + { 3294,11076},{ 3523,11425},{ 3766,11689},{ 4013,11845}, + { 4254,12022},{ 4506,12209},{ 4759,12383},{ 5013,12637} + }, + /*Y' qi=14 INTER*/ + { + { 58, -82},{ 38, 1362},{ 93, 2914},{ 138, 4492}, + { 171, 5962},{ 198, 7289},{ 216, 8559},{ 234, 9804}, + { 263,11005},{ 306,12143},{ 363,13222},{ 434,14259}, + { 523,15255},{ 639,16188},{ 794,17021},{ 1000,17717}, + { 1262,18260},{ 1575,18645},{ 1943,18841},{ 2356,18872}, + { 2782,18802},{ 3194,18682},{ 3576,18559},{ 3923,18447} + } + }, + { + /*Cb qi=14 INTRA*/ + { + { 2, 3},{ 50, 367},{ 91, 741},{ 114, 1180}, + { 134, 1651},{ 157, 2131},{ 181, 2601},{ 208, 3028}, + { 239, 3391},{ 279, 3706},{ 322, 4000},{ 361, 4309}, + { 406, 4587},{ 445, 4822},{ 482, 5067},{ 515, 5344}, + { 546, 5612},{ 589, 5821},{ 626, 6020},{ 655, 6276}, + { 701, 6523},{ 748, 6717},{ 796, 6876},{ 815, 7151} + }, + /*Cb qi=14 INTER*/ + { + { 80, -43},{ 68, 301},{ 56, 644},{ 50, 1004}, + { 54, 1412},{ 63, 1836},{ 75, 2253},{ 87, 2670}, + { 94, 3083},{ 98, 3487},{ 101, 3885},{ 103, 4271}, + { 106, 4645},{ 107, 5004},{ 108, 5358},{ 109, 5705}, + { 112, 6047},{ 115, 6388},{ 118, 6731},{ 121, 7081}, + { 126, 7421},{ 129, 7747},{ 132, 8076},{ 137, 8419} + } + }, + { + /*Cr qi=14 INTRA*/ + { + { 3, 6},{ 45, 375},{ 85, 762},{ 116, 1226}, + { 138, 1700},{ 158, 2163},{ 180, 2602},{ 206, 2985}, + { 236, 3333},{ 270, 3639},{ 310, 3956},{ 359, 4258}, + { 397, 4524},{ 430, 4802},{ 478, 5068},{ 527, 5316}, + { 572, 5560},{ 613, 5802},{ 654, 6012},{ 699, 6216}, + { 734, 6489},{ 755, 6707},{ 775, 6898},{ 841, 7111} + }, + /*Cr qi=14 INTER*/ + { + { 78, 0},{ 59, 322},{ 46, 649},{ 51, 1016}, + { 58, 1422},{ 68, 1839},{ 81, 2253},{ 90, 2666}, + { 95, 3080},{ 98, 3486},{ 101, 3881},{ 102, 4268}, + { 102, 4644},{ 103, 5017},{ 105, 5382},{ 106, 5743}, + { 108, 6093},{ 112, 6442},{ 118, 6791},{ 124, 7130}, + { 127, 7463},{ 133, 7784},{ 138, 8085},{ 142, 8395} + } + } + }, + { + { + /*Y' qi=15 INTRA*/ + { + { 111, -66},{ 197, 1538},{ 370, 2949},{ 579, 4050}, + { 762, 4968},{ 933, 5798},{ 1112, 6520},{ 1299, 7161}, + { 1497, 7725},{ 1723, 8219},{ 1967, 8654},{ 2234, 8990}, + { 2499, 9302},{ 2740, 9637},{ 2968,10039},{ 3215,10414}, + { 3473,10709},{ 3721,11015},{ 3971,11270},{ 4228,11402}, + { 4487,11543},{ 4752,11707},{ 5011,11871},{ 5290,12099} + }, + /*Y' qi=15 INTER*/ + { + { 59, -113},{ 37, 1349},{ 95, 2904},{ 139, 4478}, + { 174, 5929},{ 201, 7244},{ 220, 8505},{ 241, 9736}, + { 275,10922},{ 327,12040},{ 395,13097},{ 477,14114}, + { 585,15071},{ 730,15947},{ 917,16714},{ 1162,17326}, + { 1468,17770},{ 1833,18029},{ 2251,18111},{ 2694,18068}, + { 3125,17968},{ 3529,17845},{ 3908,17713},{ 4260,17587} + } + }, + { + /*Cb qi=15 INTRA*/ + { + { 2, 3},{ 51, 367},{ 94, 741},{ 120, 1180}, + { 140, 1651},{ 160, 2129},{ 184, 2591},{ 213, 3010}, + { 246, 3371},{ 289, 3680},{ 335, 3969},{ 374, 4274}, + { 418, 4546},{ 460, 4783},{ 498, 5019},{ 532, 5280}, + { 565, 5553},{ 608, 5765},{ 647, 5958},{ 683, 6193}, + { 732, 6433},{ 782, 6620},{ 832, 6769},{ 848, 7027} + }, + /*Cb qi=15 INTER*/ + { + { 71, -52},{ 63, 296},{ 54, 644},{ 50, 1010}, + { 53, 1417},{ 64, 1837},{ 77, 2253},{ 88, 2666}, + { 95, 3079},{ 98, 3487},{ 100, 3882},{ 103, 4264}, + { 106, 4633},{ 108, 4991},{ 109, 5343},{ 109, 5693}, + { 112, 6038},{ 114, 6371},{ 119, 6709},{ 123, 7051}, + { 125, 7385},{ 130, 7716},{ 135, 8050},{ 140, 8374} + } + }, + { + /*Cr qi=15 INTRA*/ + { + { 2, 6},{ 47, 375},{ 87, 763},{ 119, 1225}, + { 143, 1699},{ 162, 2158},{ 185, 2595},{ 213, 2971}, + { 246, 3315},{ 279, 3618},{ 320, 3920},{ 372, 4210}, + { 409, 4480},{ 446, 4756},{ 496, 5017},{ 542, 5263}, + { 590, 5487},{ 639, 5721},{ 687, 5923},{ 724, 6132}, + { 753, 6417},{ 781, 6622},{ 805, 6806},{ 856, 6977} + }, + /*Cr qi=15 INTER*/ + { + { 71, 3},{ 61, 326},{ 52, 651},{ 50, 1017}, + { 58, 1422},{ 69, 1837},{ 82, 2251},{ 90, 2668}, + { 95, 3080},{ 98, 3484},{ 101, 3877},{ 102, 4257}, + { 102, 4632},{ 101, 5005},{ 103, 5370},{ 106, 5733}, + { 110, 6082},{ 116, 6424},{ 120, 6774},{ 124, 7106}, + { 130, 7427},{ 135, 7748},{ 141, 8052},{ 147, 8333} + } + } + }, + { + { + /*Y' qi=16 INTRA*/ + { + { 114, -63},{ 206, 1525},{ 396, 2887},{ 618, 3945}, + { 816, 4832},{ 1002, 5626},{ 1196, 6319},{ 1401, 6923}, + { 1616, 7458},{ 1857, 7928},{ 2121, 8334},{ 2405, 8645}, + { 2685, 8934},{ 2938, 9255},{ 3175, 9638},{ 3433, 9990}, + { 3707,10263},{ 3958,10577},{ 4218,10807},{ 4488,10906}, + { 4760,11028},{ 5037,11148},{ 5306,11286},{ 5625,11463} + }, + /*Y' qi=16 INTER*/ + { + { 69, -153},{ 39, 1348},{ 98, 2894},{ 144, 4448}, + { 181, 5872},{ 209, 7167},{ 228, 8422},{ 254, 9644}, + { 297,10810},{ 359,11908},{ 438,12944},{ 539,13930}, + { 672,14842},{ 850,15650},{ 1085,16318},{ 1391,16793}, + { 1769,17082},{ 2200,17198},{ 2659,17174},{ 3116,17072}, + { 3547,16948},{ 3943,16819},{ 4299,16701},{ 4611,16644} + } + }, + { + /*Cb qi=16 INTRA*/ + { + { 3, 4},{ 54, 367},{ 97, 742},{ 122, 1181}, + { 143, 1651},{ 168, 2123},{ 197, 2575},{ 226, 2985}, + { 263, 3338},{ 314, 3631},{ 367, 3903},{ 409, 4200}, + { 453, 4468},{ 491, 4703},{ 528, 4932},{ 566, 5188}, + { 601, 5459},{ 647, 5672},{ 693, 5844},{ 734, 6058}, + { 784, 6305},{ 836, 6460},{ 882, 6602},{ 905, 6891} + }, + /*Cb qi=16 INTER*/ + { + { 75, -64},{ 67, 292},{ 56, 645},{ 51, 1016}, + { 54, 1421},{ 66, 1842},{ 79, 2257},{ 89, 2670}, + { 95, 3082},{ 98, 3488},{ 101, 3879},{ 104, 4258}, + { 106, 4623},{ 108, 4974},{ 109, 5321},{ 113, 5664}, + { 116, 6001},{ 117, 6341},{ 123, 6677},{ 128, 7004}, + { 130, 7336},{ 136, 7671},{ 143, 7996},{ 148, 8310} + } + }, + { + /*Cr qi=16 INTRA*/ + { + { 4, 7},{ 50, 375},{ 90, 763},{ 124, 1225}, + { 148, 1698},{ 168, 2154},{ 195, 2582},{ 227, 2948}, + { 263, 3279},{ 302, 3575},{ 343, 3865},{ 394, 4137}, + { 439, 4402},{ 482, 4672},{ 533, 4925},{ 579, 5165}, + { 626, 5382},{ 675, 5616},{ 725, 5812},{ 769, 5991}, + { 810, 6242},{ 848, 6430},{ 868, 6615},{ 944, 6732} + }, + /*Cr qi=16 INTER*/ + { + { 78, 11},{ 62, 327},{ 49, 650},{ 50, 1025}, + { 59, 1431},{ 72, 1841},{ 83, 2253},{ 90, 2671}, + { 95, 3084},{ 98, 3487},{ 100, 3879},{ 101, 4254}, + { 102, 4625},{ 103, 4994},{ 106, 5355},{ 108, 5708}, + { 111, 6058},{ 115, 6400},{ 121, 6733},{ 128, 7058}, + { 134, 7374},{ 140, 7691},{ 146, 7993},{ 146, 8317} + } + } + }, + { + { + /*Y' qi=17 INTRA*/ + { + { 112, -59},{ 210, 1515},{ 409, 2850},{ 640, 3882}, + { 844, 4748},{ 1038, 5529},{ 1240, 6206},{ 1452, 6803}, + { 1676, 7330},{ 1925, 7792},{ 2194, 8201},{ 2483, 8512}, + { 2766, 8801},{ 3027, 9121},{ 3279, 9482},{ 3548, 9810}, + { 3825,10069},{ 4088,10345},{ 4362,10544},{ 4638,10644}, + { 4915,10744},{ 5196,10850},{ 5471,10981},{ 5802,11136} + }, + /*Y' qi=17 INTER*/ + { + { 70, -147},{ 45, 1349},{ 106, 2894},{ 155, 4425}, + { 195, 5818},{ 225, 7099},{ 247, 8348},{ 278, 9565}, + { 328,10717},{ 399,11794},{ 491,12807},{ 609,13760}, + { 766,14623},{ 984,15349},{ 1274,15902},{ 1642,16256}, + { 2082,16411},{ 2563,16409},{ 3048,16315},{ 3508,16194}, + { 3924,16064},{ 4306,15938},{ 4656,15828},{ 4966,15733} + } + }, + { + /*Cb qi=17 INTRA*/ + { + { 3, 4},{ 57, 367},{ 101, 742},{ 126, 1182}, + { 148, 1650},{ 175, 2118},{ 207, 2565},{ 241, 2966}, + { 279, 3307},{ 331, 3588},{ 389, 3845},{ 435, 4132}, + { 474, 4408},{ 517, 4641},{ 560, 4869},{ 602, 5122}, + { 638, 5389},{ 672, 5610},{ 716, 5787},{ 758, 6002}, + { 817, 6226},{ 869, 6393},{ 916, 6530},{ 950, 6799} + }, + /*Cb qi=17 INTER*/ + { + { 105, -65},{ 86, 288},{ 66, 638},{ 54, 1014}, + { 59, 1427},{ 71, 1844},{ 86, 2257},{ 95, 2668}, + { 100, 3075},{ 103, 3476},{ 106, 3867},{ 110, 4241}, + { 112, 4598},{ 114, 4948},{ 117, 5294},{ 121, 5633}, + { 123, 5968},{ 126, 6301},{ 131, 6637},{ 136, 6968}, + { 144, 7287},{ 152, 7606},{ 158, 7931},{ 162, 8262} + } + }, + { + /*Cr qi=17 INTRA*/ + { + { 4, 6},{ 55, 376},{ 97, 765},{ 128, 1226}, + { 152, 1696},{ 175, 2144},{ 204, 2568},{ 241, 2928}, + { 282, 3250},{ 323, 3530},{ 368, 3811},{ 420, 4089}, + { 463, 4347},{ 505, 4609},{ 562, 4860},{ 609, 5094}, + { 655, 5303},{ 709, 5535},{ 759, 5740},{ 803, 5913}, + { 844, 6153},{ 879, 6350},{ 905, 6527},{ 972, 6637} + }, + /*Cr qi=17 INTER*/ + { + { 88, 8},{ 68, 330},{ 51, 653},{ 54, 1028}, + { 65, 1433},{ 77, 1845},{ 89, 2257},{ 96, 2669}, + { 100, 3081},{ 102, 3481},{ 105, 3867},{ 106, 4245}, + { 108, 4613},{ 110, 4971},{ 112, 5328},{ 115, 5679}, + { 120, 6019},{ 127, 6355},{ 133, 6686},{ 140, 7007}, + { 149, 7316},{ 158, 7618},{ 166, 7924},{ 170, 8232} + } + } + }, + { + { + /*Y' qi=18 INTRA*/ + { + { 122, -58},{ 216, 1506},{ 425, 2815},{ 665, 3822}, + { 882, 4666},{ 1088, 5425},{ 1301, 6084},{ 1529, 6653}, + { 1766, 7162},{ 2026, 7611},{ 2312, 7987},{ 2612, 8278}, + { 2913, 8551},{ 3196, 8840},{ 3454, 9184},{ 3734, 9490}, + { 4030, 9725},{ 4305, 9973},{ 4585,10162},{ 4864,10251}, + { 5150,10324},{ 5443,10420},{ 5727,10536},{ 6053,10682} + }, + /*Y' qi=18 INTER*/ + { + { 66, -143},{ 47, 1351},{ 108, 2886},{ 158, 4401}, + { 200, 5775},{ 232, 7044},{ 256, 8288},{ 292, 9493}, + { 351,10625},{ 434,11679},{ 541,12665},{ 681,13578}, + { 875,14379},{ 1136,15025},{ 1483,15475},{ 1914,15709}, + { 2399,15767},{ 2907,15699},{ 3400,15579},{ 3852,15453}, + { 4259,15332},{ 4630,15221},{ 4976,15121},{ 5294,15061} + } + }, + { + /*Cb qi=18 INTRA*/ + { + { 2, 3},{ 61, 367},{ 107, 743},{ 131, 1182}, + { 155, 1648},{ 183, 2110},{ 220, 2542},{ 260, 2927}, + { 303, 3265},{ 359, 3540},{ 416, 3785},{ 462, 4063}, + { 506, 4334},{ 553, 4567},{ 595, 4797},{ 636, 5049}, + { 676, 5304},{ 717, 5516},{ 759, 5698},{ 801, 5904}, + { 861, 6133},{ 911, 6311},{ 962, 6443},{ 1021, 6645} + }, + /*Cb qi=18 INTER*/ + { + { 126, 5},{ 95, 326},{ 66, 643},{ 55, 1015}, + { 60, 1427},{ 73, 1843},{ 87, 2256},{ 96, 2667}, + { 101, 3073},{ 104, 3470},{ 108, 3853},{ 111, 4226}, + { 114, 4584},{ 117, 4928},{ 119, 5274},{ 122, 5612}, + { 126, 5942},{ 130, 6271},{ 136, 6606},{ 141, 6931}, + { 148, 7247},{ 156, 7568},{ 164, 7891},{ 173, 8211} + } + }, + { + /*Cr qi=18 INTRA*/ + { + { 4, 6},{ 59, 376},{ 104, 765},{ 133, 1226}, + { 156, 1692},{ 184, 2136},{ 218, 2548},{ 260, 2893}, + { 308, 3204},{ 348, 3481},{ 397, 3751},{ 448, 4024}, + { 490, 4281},{ 541, 4523},{ 593, 4776},{ 634, 5022}, + { 685, 5236},{ 748, 5455},{ 812, 5638},{ 856, 5818}, + { 891, 6048},{ 928, 6230},{ 961, 6405},{ 1055, 6449} + }, + /*Cr qi=18 INTER*/ + { + { 81, 34},{ 68, 342},{ 57, 652},{ 59, 1027}, + { 67, 1439},{ 80, 1848},{ 91, 2257},{ 97, 2670}, + { 100, 3076},{ 103, 3473},{ 106, 3857},{ 108, 4231}, + { 109, 4599},{ 110, 4958},{ 113, 5307},{ 119, 5650}, + { 125, 5991},{ 130, 6325},{ 138, 6651},{ 147, 6971}, + { 153, 7278},{ 162, 7578},{ 172, 7874},{ 177, 8156} + } + } + }, + { + { + /*Y' qi=19 INTRA*/ + { + { 128, -55},{ 228, 1495},{ 448, 2775},{ 699, 3758}, + { 931, 4571},{ 1154, 5296},{ 1386, 5914},{ 1636, 6450}, + { 1894, 6930},{ 2177, 7342},{ 2479, 7698},{ 2792, 7976}, + { 3099, 8235},{ 3392, 8517},{ 3658, 8853},{ 3938, 9155}, + { 4242, 9371},{ 4527, 9605},{ 4810, 9781},{ 5089, 9853}, + { 5378, 9920},{ 5674,10009},{ 5972,10110},{ 6336,10196} + }, + /*Y' qi=19 INTER*/ + { + { 69, -147},{ 49, 1353},{ 111, 2883},{ 162, 4381}, + { 205, 5737},{ 237, 6996},{ 264, 8232},{ 307, 9421}, + { 376,10534},{ 472,11567},{ 596,12525},{ 761,13395}, + { 990,14130},{ 1298,14694},{ 1695,15053},{ 2172,15195}, + { 2696,15173},{ 3213,15075},{ 3696,14948},{ 4141,14829}, + { 4541,14721},{ 4910,14609},{ 5245,14506},{ 5536,14399} + } + }, + { + /*Cb qi=19 INTRA*/ + { + { 3, 3},{ 61, 367},{ 109, 743},{ 135, 1182}, + { 161, 1646},{ 191, 2101},{ 229, 2524},{ 273, 2898}, + { 318, 3221},{ 376, 3490},{ 436, 3731},{ 487, 3994}, + { 539, 4251},{ 584, 4485},{ 621, 4721},{ 664, 4967}, + { 709, 5225},{ 752, 5431},{ 801, 5595},{ 846, 5796}, + { 912, 6011},{ 959, 6193},{ 1015, 6321},{ 1121, 6504} + }, + /*Cb qi=19 INTER*/ + { + { 126, 4},{ 97, 329},{ 69, 649},{ 56, 1017}, + { 61, 1432},{ 74, 1846},{ 88, 2255},{ 98, 2663}, + { 103, 3065},{ 106, 3460},{ 110, 3844},{ 114, 4211}, + { 117, 4564},{ 120, 4911},{ 122, 5253},{ 125, 5588}, + { 129, 5916},{ 135, 6241},{ 142, 6567},{ 149, 6885}, + { 155, 7206},{ 163, 7527},{ 174, 7843},{ 188, 8145} + } + }, + { + /*Cr qi=19 INTRA*/ + { + { 5, 6},{ 61, 376},{ 106, 765},{ 135, 1225}, + { 160, 1689},{ 192, 2126},{ 229, 2531},{ 271, 2869}, + { 321, 3168},{ 370, 3433},{ 421, 3704},{ 476, 3965}, + { 520, 4212},{ 572, 4452},{ 629, 4691},{ 671, 4939}, + { 724, 5152},{ 792, 5347},{ 858, 5510},{ 895, 5696}, + { 939, 5905},{ 991, 6056},{ 1027, 6244},{ 1127, 6333} + }, + /*Cr qi=19 INTER*/ + { + { 80, 45},{ 66, 344},{ 55, 654},{ 56, 1030}, + { 66, 1440},{ 80, 1850},{ 91, 2259},{ 98, 2668}, + { 102, 3072},{ 104, 3466},{ 107, 3845},{ 109, 4215}, + { 110, 4578},{ 112, 4933},{ 116, 5283},{ 122, 5625}, + { 129, 5963},{ 136, 6287},{ 143, 6611},{ 151, 6927}, + { 160, 7229},{ 170, 7528},{ 181, 7818},{ 191, 8092} + } + } + }, + { + { + /*Y' qi=20 INTRA*/ + { + { 129, -50},{ 238, 1481},{ 469, 2728},{ 730, 3684}, + { 974, 4473},{ 1213, 5171},{ 1463, 5763},{ 1729, 6281}, + { 2002, 6744},{ 2299, 7146},{ 2613, 7492},{ 2940, 7746}, + { 3265, 7978},{ 3571, 8228},{ 3853, 8543},{ 4156, 8815}, + { 4476, 9001},{ 4775, 9218},{ 5070, 9373},{ 5352, 9446}, + { 5649, 9510},{ 5956, 9580},{ 6268, 9660},{ 6647, 9705} + }, + /*Y' qi=20 INTER*/ + { + { 64, -93},{ 52, 1340},{ 116, 2862},{ 170, 4344}, + { 216, 5678},{ 249, 6928},{ 281, 8155},{ 333, 9326}, + { 418,10410},{ 533,11411},{ 683,12329},{ 890,13127}, + { 1183,13750},{ 1579,14162},{ 2066,14357},{ 2611,14370}, + { 3159,14284},{ 3675,14167},{ 4142,14053},{ 4568,13953}, + { 4961,13852},{ 5320,13755},{ 5649,13675},{ 5933,13610} + } + }, + { + /*Cb qi=20 INTRA*/ + { + { 3, 3},{ 62, 367},{ 112, 743},{ 140, 1183}, + { 165, 1646},{ 196, 2099},{ 235, 2517},{ 284, 2883}, + { 334, 3198},{ 393, 3460},{ 457, 3690},{ 509, 3945}, + { 560, 4198},{ 605, 4435},{ 647, 4658},{ 699, 4888}, + { 742, 5155},{ 788, 5350},{ 835, 5517},{ 880, 5730}, + { 956, 5914},{ 1007, 6060},{ 1053, 6199},{ 1158, 6358} + }, + /*Cb qi=20 INTER*/ + { + { 128, -6},{ 96, 322},{ 66, 653},{ 54, 1025}, + { 63, 1431},{ 79, 1844},{ 91, 2256},{ 99, 2665}, + { 104, 3065},{ 107, 3455},{ 111, 3831},{ 115, 4189}, + { 120, 4539},{ 123, 4885},{ 126, 5219},{ 130, 5548}, + { 135, 5876},{ 141, 6199},{ 149, 6519},{ 156, 6837}, + { 166, 7153},{ 179, 7468},{ 189, 7784},{ 194, 8102} + } + }, + { + /*Cr qi=20 INTRA*/ + { + { 4, 6},{ 63, 376},{ 109, 765},{ 139, 1225}, + { 165, 1689},{ 199, 2124},{ 239, 2523},{ 285, 2852}, + { 340, 3140},{ 388, 3398},{ 438, 3662},{ 499, 3914}, + { 547, 4155},{ 596, 4392},{ 652, 4634},{ 699, 4877}, + { 759, 5074},{ 824, 5257},{ 883, 5428},{ 936, 5589}, + { 986, 5790},{ 1030, 5960},{ 1074, 6119},{ 1172, 6191} + }, + /*Cr qi=20 INTER*/ + { + { 92, 40},{ 70, 345},{ 55, 658},{ 57, 1034}, + { 69, 1441},{ 84, 1852},{ 94, 2261},{ 98, 2669}, + { 102, 3074},{ 105, 3465},{ 107, 3841},{ 110, 4206}, + { 112, 4562},{ 116, 4915},{ 121, 5260},{ 127, 5591}, + { 134, 5920},{ 142, 6246},{ 153, 6562},{ 163, 6870}, + { 173, 7170},{ 186, 7463},{ 198, 7746},{ 199, 8030} + } + } + }, + { + { + /*Y' qi=21 INTRA*/ + { + { 130, -51},{ 244, 1476},{ 483, 2705},{ 756, 3635}, + { 1013, 4396},{ 1266, 5070},{ 1530, 5647},{ 1806, 6153}, + { 2093, 6600},{ 2411, 6976},{ 2739, 7299},{ 3079, 7534}, + { 3422, 7744},{ 3738, 7987},{ 4032, 8274},{ 4348, 8533}, + { 4675, 8721},{ 4989, 8909},{ 5291, 9051},{ 5577, 9111}, + { 5879, 9163},{ 6190, 9228},{ 6506, 9286},{ 6899, 9295} + }, + /*Y' qi=21 INTER*/ + { + { 64, -56},{ 55, 1341},{ 119, 2859},{ 174, 4324}, + { 223, 5640},{ 258, 6880},{ 295, 8096},{ 359, 9246}, + { 460,10302},{ 595,11268},{ 778,12131},{ 1032,12857}, + { 1387,13385},{ 1850,13683},{ 2399,13774},{ 2976,13729}, + { 3527,13619},{ 4034,13504},{ 4492,13401},{ 4912,13291}, + { 5298,13209},{ 5648,13137},{ 5974,13046},{ 6308,12977} + } + }, + { + /*Cb qi=21 INTRA*/ + { + { 4, 3},{ 64, 367},{ 114, 743},{ 141, 1183}, + { 166, 1645},{ 201, 2092},{ 247, 2502},{ 299, 2856}, + { 352, 3158},{ 413, 3412},{ 480, 3642},{ 536, 3893}, + { 588, 4137},{ 637, 4367},{ 678, 4598},{ 725, 4834}, + { 774, 5083},{ 827, 5269},{ 883, 5420},{ 930, 5633}, + { 999, 5829},{ 1057, 5959},{ 1113, 6082},{ 1200, 6265} + }, + /*Cb qi=21 INTER*/ + { + { 109, -8},{ 84, 321},{ 62, 654},{ 54, 1028}, + { 64, 1434},{ 80, 1847},{ 92, 2259},{ 100, 2664}, + { 105, 3060},{ 109, 3445},{ 114, 3815},{ 118, 4172}, + { 122, 4519},{ 126, 4861},{ 128, 5194},{ 133, 5520}, + { 139, 5847},{ 146, 6169},{ 155, 6487},{ 166, 6801}, + { 177, 7114},{ 189, 7423},{ 201, 7729},{ 208, 8035} + } + }, + { + /*Cr qi=21 INTRA*/ + { + { 4, 6},{ 64, 377},{ 111, 766},{ 144, 1225}, + { 174, 1683},{ 206, 2114},{ 248, 2506},{ 302, 2824}, + { 357, 3099},{ 404, 3357},{ 455, 3622},{ 519, 3867}, + { 573, 4098},{ 625, 4331},{ 683, 4571},{ 733, 4802}, + { 793, 4994},{ 863, 5173},{ 926, 5337},{ 978, 5492}, + { 1030, 5685},{ 1079, 5856},{ 1126, 6027},{ 1217, 6159} + }, + /*Cr qi=21 INTER*/ + { + { 82, 29},{ 67, 341},{ 55, 660},{ 58, 1038}, + { 71, 1443},{ 85, 1851},{ 95, 2258},{ 99, 2666}, + { 103, 3069},{ 107, 3456},{ 110, 3826},{ 112, 4188}, + { 114, 4544},{ 118, 4891},{ 124, 5231},{ 132, 5567}, + { 139, 5894},{ 148, 6210},{ 159, 6520},{ 171, 6822}, + { 185, 7111},{ 196, 7403},{ 209, 7691},{ 225, 7945} + } + } + }, + { + { + /*Y' qi=22 INTRA*/ + { + { 128, -45},{ 254, 1463},{ 507, 2662},{ 794, 3562}, + { 1070, 4292},{ 1340, 4941},{ 1622, 5492},{ 1920, 5968}, + { 2229, 6387},{ 2565, 6742},{ 2911, 7047},{ 3263, 7264}, + { 3615, 7464},{ 3944, 7689},{ 4258, 7950},{ 4591, 8183}, + { 4934, 8347},{ 5259, 8517},{ 5573, 8634},{ 5870, 8683}, + { 6186, 8723},{ 6508, 8762},{ 6831, 8801},{ 7232, 8830} + }, + /*Y' qi=22 INTER*/ + { + { 77, -48},{ 57, 1343},{ 122, 2853},{ 180, 4299}, + { 231, 5597},{ 269, 6826},{ 314, 8025},{ 393, 9150}, + { 512,10179},{ 673,11103},{ 894,11908},{ 1207,12542}, + { 1635,12956},{ 2166,13148},{ 2755,13167},{ 3345,13088}, + { 3895,12966},{ 4386,12848},{ 4832,12746},{ 5252,12647}, + { 5634,12563},{ 5978,12497},{ 6299,12412},{ 6633,12338} + } + }, + { + /*Cb qi=22 INTRA*/ + { + { 4, 3},{ 66, 367},{ 122, 744},{ 153, 1182}, + { 177, 1640},{ 213, 2080},{ 263, 2475},{ 323, 2811}, + { 382, 3103},{ 451, 3346},{ 522, 3568},{ 581, 3814}, + { 633, 4054},{ 674, 4288},{ 719, 4523},{ 768, 4756}, + { 823, 4979},{ 883, 5162},{ 937, 5325},{ 996, 5510}, + { 1070, 5687},{ 1129, 5807},{ 1193, 5929},{ 1311, 6099} + }, + /*Cb qi=22 INTER*/ + { + { 107, -5},{ 83, 322},{ 61, 653},{ 55, 1030}, + { 66, 1436},{ 81, 1845},{ 94, 2253},{ 102, 2656}, + { 107, 3050},{ 111, 3435},{ 115, 3804},{ 119, 4158}, + { 124, 4501},{ 128, 4835},{ 132, 5164},{ 138, 5490}, + { 146, 5812},{ 154, 6128},{ 163, 6442},{ 174, 6754}, + { 188, 7060},{ 205, 7361},{ 219, 7662},{ 233, 7953} + } + }, + { + /*Cr qi=22 INTRA*/ + { + { 4, 6},{ 67, 378},{ 118, 767},{ 151, 1222}, + { 182, 1675},{ 221, 2097},{ 269, 2476},{ 329, 2774}, + { 389, 3039},{ 444, 3292},{ 500, 3545},{ 560, 3788}, + { 615, 4020},{ 671, 4251},{ 734, 4484},{ 781, 4712}, + { 850, 4887},{ 925, 5060},{ 981, 5229},{ 1031, 5369}, + { 1092, 5549},{ 1148, 5715},{ 1200, 5861},{ 1291, 5943} + }, + /*Cr qi=22 INTER*/ + { + { 88, 34},{ 69, 340},{ 57, 657},{ 60, 1039}, + { 73, 1445},{ 87, 1851},{ 96, 2257},{ 100, 2662}, + { 103, 3058},{ 107, 3442},{ 111, 3812},{ 115, 4172}, + { 118, 4524},{ 123, 4864},{ 129, 5199},{ 136, 5531}, + { 145, 5855},{ 156, 6168},{ 170, 6468},{ 184, 6765}, + { 193, 7066},{ 207, 7353},{ 222, 7628},{ 230, 7900} + } + } + }, + { + { + /*Y' qi=23 INTRA*/ + { + { 126, -40},{ 257, 1458},{ 521, 2636},{ 825, 3501}, + { 1111, 4207},{ 1391, 4842},{ 1684, 5385},{ 1992, 5858}, + { 2311, 6277},{ 2653, 6626},{ 3005, 6929},{ 3366, 7134}, + { 3729, 7311},{ 4071, 7526},{ 4396, 7770},{ 4734, 7986}, + { 5086, 8131},{ 5421, 8286},{ 5735, 8404},{ 6033, 8456}, + { 6357, 8486},{ 6682, 8525},{ 7003, 8573},{ 7387, 8604} + }, + /*Y' qi=23 INTER*/ + { + { 64, -57},{ 60, 1345},{ 124, 2853},{ 185, 4284}, + { 239, 5565},{ 282, 6783},{ 336, 7967},{ 429, 9069}, + { 568,10063},{ 758,10943},{ 1028,11679},{ 1407,12216}, + { 1909,12520},{ 2502,12616},{ 3126,12573},{ 3722,12461}, + { 4258,12344},{ 4742,12236},{ 5185,12136},{ 5590,12052}, + { 5970,11980},{ 6315,11901},{ 6631,11826},{ 6954,11769} + } + }, + { + /*Cb qi=23 INTRA*/ + { + { 3, 3},{ 70, 367},{ 124, 744},{ 151, 1182}, + { 181, 1637},{ 222, 2071},{ 276, 2460},{ 343, 2785}, + { 403, 3072},{ 468, 3317},{ 542, 3534},{ 605, 3773}, + { 659, 4009},{ 703, 4243},{ 747, 4479},{ 795, 4707}, + { 852, 4923},{ 908, 5105},{ 972, 5254},{ 1043, 5423}, + { 1118, 5594},{ 1172, 5731},{ 1240, 5853},{ 1365, 6005} + }, + /*Cb qi=23 INTER*/ + { + { 109, -10},{ 87, 325},{ 63, 650},{ 57, 1031}, + { 67, 1439},{ 83, 1847},{ 96, 2253},{ 103, 2652}, + { 109, 3041},{ 114, 3421},{ 117, 3789},{ 122, 4141}, + { 128, 4480},{ 134, 4811},{ 139, 5138},{ 144, 5463}, + { 152, 5781},{ 161, 6096},{ 174, 6404},{ 185, 6714}, + { 198, 7023},{ 216, 7320},{ 233, 7621},{ 245, 7935} + } + }, + { + /*Cr qi=23 INTRA*/ + { + { 5, 6},{ 70, 379},{ 122, 768},{ 155, 1222}, + { 187, 1671},{ 231, 2088},{ 283, 2459},{ 346, 2750}, + { 411, 3009},{ 465, 3261},{ 523, 3509},{ 585, 3746}, + { 639, 3980},{ 695, 4219},{ 754, 4449},{ 803, 4671}, + { 873, 4840},{ 953, 5001},{ 1015, 5156},{ 1071, 5286}, + { 1137, 5464},{ 1191, 5629},{ 1249, 5782},{ 1359, 5885} + }, + /*Cr qi=23 INTER*/ + { + { 84, 29},{ 69, 343},{ 58, 660},{ 62, 1041}, + { 75, 1448},{ 88, 1853},{ 97, 2258},{ 102, 2659}, + { 105, 3050},{ 108, 3430},{ 113, 3799},{ 116, 4155}, + { 121, 4505},{ 126, 4845},{ 132, 5176},{ 142, 5504}, + { 153, 5826},{ 165, 6133},{ 180, 6432},{ 197, 6722}, + { 212, 7005},{ 226, 7287},{ 244, 7555},{ 258, 7828} + } + } + }, + { + { + /*Y' qi=24 INTRA*/ + { + { 125, -34},{ 268, 1444},{ 547, 2590},{ 866, 3422}, + { 1172, 4098},{ 1476, 4702},{ 1790, 5222},{ 2117, 5678}, + { 2453, 6080},{ 2811, 6418},{ 3178, 6700},{ 3552, 6895}, + { 3928, 7055},{ 4286, 7243},{ 4627, 7477},{ 4981, 7674}, + { 5344, 7802},{ 5683, 7944},{ 6009, 8043},{ 6313, 8082}, + { 6633, 8111},{ 6959, 8151},{ 7280, 8197},{ 7660, 8221} + }, + /*Y' qi=24 INTER*/ + { + { 62, -63},{ 68, 1345},{ 134, 2840},{ 199, 4245}, + { 256, 5508},{ 304, 6715},{ 371, 7880},{ 484, 8950}, + { 652, 9899},{ 892,10709},{ 1238,11334},{ 1722,11722}, + { 2326,11875},{ 2983,11864},{ 3616,11783},{ 4189,11678}, + { 4707,11570},{ 5178,11476},{ 5617,11395},{ 6017,11319}, + { 6380,11252},{ 6720,11185},{ 7044,11126},{ 7377,11118} + } + }, + { + /*Cb qi=24 INTRA*/ + { + { 4, 3},{ 75, 367},{ 132, 745},{ 159, 1182}, + { 187, 1634},{ 230, 2061},{ 289, 2439},{ 361, 2753}, + { 425, 3034},{ 492, 3278},{ 566, 3490},{ 630, 3720}, + { 686, 3956},{ 732, 4190},{ 777, 4420},{ 829, 4637}, + { 894, 4840},{ 958, 5012},{ 1023, 5155},{ 1090, 5326}, + { 1165, 5502},{ 1226, 5622},{ 1299, 5717},{ 1408, 5887} + }, + /*Cb qi=24 INTER*/ + { + { 110, 35},{ 92, 337},{ 70, 651},{ 63, 1033}, + { 74, 1440},{ 91, 1846},{ 102, 2248},{ 109, 2644}, + { 114, 3031},{ 120, 3404},{ 127, 3762},{ 133, 4109}, + { 138, 4445},{ 144, 4772},{ 151, 5094},{ 159, 5411}, + { 168, 5728},{ 180, 6037},{ 195, 6338},{ 210, 6640}, + { 227, 6944},{ 249, 7236},{ 272, 7528},{ 299, 7809} + } + }, + { + /*Cr qi=24 INTRA*/ + { + { 5, 6},{ 72, 380},{ 124, 770},{ 158, 1222}, + { 195, 1668},{ 240, 2079},{ 297, 2438},{ 367, 2715}, + { 433, 2966},{ 488, 3218},{ 549, 3467},{ 609, 3701}, + { 664, 3935},{ 728, 4165},{ 792, 4379},{ 845, 4586}, + { 917, 4744},{ 995, 4898},{ 1063, 5049},{ 1120, 5187}, + { 1190, 5359},{ 1249, 5522},{ 1304, 5672},{ 1397, 5806} + }, + /*Cr qi=24 INTER*/ + { + { 91, 56},{ 73, 353},{ 61, 664},{ 66, 1045}, + { 80, 1449},{ 95, 1851},{ 103, 2250},{ 107, 2648}, + { 111, 3038},{ 116, 3413},{ 120, 3774},{ 124, 4128}, + { 130, 4471},{ 138, 4802},{ 145, 5130},{ 156, 5453}, + { 171, 5764},{ 187, 6061},{ 204, 6355},{ 220, 6643}, + { 238, 6923},{ 254, 7204},{ 275, 7475},{ 289, 7752} + } + } + }, + { + { + /*Y' qi=25 INTRA*/ + { + { 125, -28},{ 285, 1426},{ 582, 2540},{ 917, 3351}, + { 1244, 3997},{ 1569, 4570},{ 1903, 5071},{ 2258, 5498}, + { 2626, 5866},{ 3002, 6182},{ 3382, 6448},{ 3770, 6623}, + { 4162, 6760},{ 4528, 6934},{ 4882, 7144},{ 5249, 7328}, + { 5610, 7453},{ 5958, 7578},{ 6291, 7672},{ 6597, 7708}, + { 6928, 7715},{ 7258, 7737},{ 7575, 7781},{ 7950, 7829} + }, + /*Y' qi=25 INTER*/ + { + { 64, -16},{ 72, 1348},{ 139, 2832},{ 206, 4218}, + { 268, 5465},{ 322, 6659},{ 403, 7803},{ 540, 8838}, + { 747, 9734},{ 1044,10465},{ 1473,10981},{ 2048,11249}, + { 2717,11311},{ 3397,11257},{ 4025,11161},{ 4589,11052}, + { 5099,10947},{ 5560,10859},{ 5989,10786},{ 6389,10717}, + { 6753,10652},{ 7078,10592},{ 7389,10535},{ 7697,10460} + } + }, + { + /*Cb qi=25 INTRA*/ + { + { 3, 3},{ 78, 368},{ 133, 745},{ 159, 1180}, + { 193, 1627},{ 242, 2046},{ 304, 2411},{ 381, 2714}, + { 456, 2983},{ 527, 3224},{ 598, 3437},{ 667, 3655}, + { 726, 3888},{ 776, 4117},{ 826, 4333},{ 883, 4543}, + { 954, 4727},{ 1019, 4878},{ 1095, 5014},{ 1171, 5187}, + { 1255, 5342},{ 1319, 5458},{ 1396, 5546},{ 1536, 5678} + }, + /*Cb qi=25 INTER*/ + { + { 117, 32},{ 89, 342},{ 67, 660},{ 64, 1037}, + { 77, 1441},{ 93, 1845},{ 105, 2243},{ 113, 2633}, + { 120, 3016},{ 125, 3387},{ 131, 3739},{ 137, 4080}, + { 144, 4416},{ 152, 4741},{ 160, 5057},{ 169, 5369}, + { 180, 5680},{ 193, 5990},{ 209, 6294},{ 227, 6594}, + { 249, 6888},{ 269, 7180},{ 294, 7467},{ 317, 7768} + } + }, + { + /*Cr qi=25 INTRA*/ + { + { 6, 6},{ 74, 380},{ 129, 770},{ 165, 1220}, + { 201, 1658},{ 253, 2061},{ 315, 2410},{ 388, 2676}, + { 462, 2920},{ 523, 3166},{ 584, 3404},{ 647, 3637}, + { 701, 3870},{ 769, 4086},{ 838, 4296},{ 898, 4491}, + { 980, 4627},{ 1065, 4759},{ 1126, 4920},{ 1187, 5058}, + { 1283, 5180},{ 1347, 5332},{ 1404, 5475},{ 1527, 5534} + }, + /*Cr qi=25 INTER*/ + { + { 92, 41},{ 75, 347},{ 64, 664},{ 70, 1045}, + { 85, 1448},{ 98, 1849},{ 105, 2245},{ 110, 2637}, + { 115, 3023},{ 120, 3395},{ 126, 3753},{ 131, 4102}, + { 136, 4439},{ 145, 4768},{ 156, 5094},{ 168, 5410}, + { 184, 5717},{ 203, 6010},{ 221, 6300},{ 239, 6577}, + { 262, 6847},{ 282, 7123},{ 303, 7390},{ 322, 7665} + } + } + }, + { + { + /*Y' qi=26 INTRA*/ + { + { 130, -24},{ 292, 1423},{ 594, 2525},{ 943, 3307}, + { 1289, 3921},{ 1633, 4467},{ 1991, 4943},{ 2368, 5348}, + { 2753, 5696},{ 3148, 5991},{ 3545, 6247},{ 3942, 6415}, + { 4342, 6535},{ 4726, 6690},{ 5093, 6883},{ 5466, 7047}, + { 5840, 7159},{ 6202, 7274},{ 6545, 7351},{ 6855, 7375}, + { 7186, 7384},{ 7517, 7416},{ 7840, 7447},{ 8238, 7450} + }, + /*Y' qi=26 INTER*/ + { + { 52, 16},{ 75, 1336},{ 143, 2815},{ 213, 4191}, + { 278, 5427},{ 339, 6611},{ 436, 7734},{ 600, 8732}, + { 843, 9579},{ 1195,10243},{ 1702,10660},{ 2355,10825}, + { 3070,10820},{ 3755,10743},{ 4372,10643},{ 4925,10538}, + { 5426,10440},{ 5882,10354},{ 6296,10290},{ 6686,10224}, + { 7049,10163},{ 7380,10113},{ 7672,10062},{ 7937,10021} + } + }, + { + /*Cb qi=26 INTRA*/ + { + { 4, 3},{ 79, 368},{ 138, 745},{ 167, 1180}, + { 200, 1623},{ 252, 2034},{ 322, 2389},{ 403, 2682}, + { 480, 2941},{ 558, 3176},{ 631, 3393},{ 700, 3608}, + { 766, 3825},{ 819, 4046},{ 868, 4265},{ 926, 4472}, + { 1002, 4645},{ 1070, 4800},{ 1151, 4924},{ 1242, 5063}, + { 1325, 5221},{ 1393, 5338},{ 1464, 5431},{ 1595, 5559} + }, + /*Cb qi=26 INTER*/ + { + { 98, 33},{ 83, 343},{ 65, 662},{ 65, 1037}, + { 80, 1437},{ 96, 1839},{ 107, 2238},{ 115, 2628}, + { 122, 3007},{ 128, 3373},{ 134, 3722},{ 142, 4060}, + { 149, 4390},{ 158, 4713},{ 167, 5029},{ 178, 5341}, + { 191, 5647},{ 208, 5948},{ 227, 6244},{ 247, 6539}, + { 269, 6833},{ 295, 7114},{ 328, 7388},{ 369, 7658} + } + }, + { + /*Cr qi=26 INTRA*/ + { + { 5, 6},{ 75, 380},{ 133, 769},{ 172, 1217}, + { 212, 1652},{ 266, 2048},{ 333, 2384},{ 412, 2643}, + { 490, 2880},{ 552, 3124},{ 616, 3365},{ 681, 3594}, + { 739, 3816},{ 810, 4024},{ 880, 4224},{ 945, 4405}, + { 1029, 4538},{ 1114, 4674},{ 1183, 4822},{ 1254, 4946}, + { 1346, 5063},{ 1417, 5201},{ 1478, 5345},{ 1597, 5411} + }, + /*Cr qi=26 INTER*/ + { + { 97, 29},{ 75, 342},{ 62, 667},{ 70, 1047}, + { 87, 1447},{ 100, 1846},{ 107, 2242},{ 113, 2633}, + { 118, 3016},{ 123, 3382},{ 128, 3737},{ 135, 4082}, + { 142, 4417},{ 151, 4746},{ 162, 5066},{ 176, 5377}, + { 194, 5679},{ 217, 5963},{ 239, 6244},{ 260, 6522}, + { 284, 6789},{ 309, 7052},{ 335, 7313},{ 355, 7582} + } + } + }, + { + { + /*Y' qi=27 INTRA*/ + { + { 118, -10},{ 308, 1404},{ 630, 2473},{ 997, 3227}, + { 1360, 3819},{ 1719, 4354},{ 2086, 4829},{ 2470, 5233}, + { 2863, 5576},{ 3267, 5870},{ 3677, 6117},{ 4085, 6268}, + { 4499, 6376},{ 4888, 6521},{ 5257, 6705},{ 5638, 6865}, + { 6020, 6962},{ 6394, 7056},{ 6744, 7130},{ 7051, 7158}, + { 7386, 7164},{ 7717, 7185},{ 8042, 7209},{ 8444, 7206} + }, + /*Y' qi=27 INTER*/ + { + { 54, 19},{ 77, 1333},{ 147, 2806},{ 221, 4166}, + { 290, 5390},{ 360, 6564},{ 474, 7665},{ 664, 8630}, + { 949, 9423},{ 1370,10002},{ 1958,10323},{ 2670,10414}, + { 3406,10375},{ 4086,10285},{ 4691,10182},{ 5233,10085}, + { 5724, 9994},{ 6169, 9918},{ 6582, 9863},{ 6962, 9813}, + { 7316, 9759},{ 7645, 9707},{ 7948, 9660},{ 8262, 9623} + } + }, + { + /*Cb qi=27 INTRA*/ + { + { 4, 3},{ 79, 368},{ 137, 745},{ 166, 1180}, + { 200, 1622},{ 253, 2030},{ 324, 2381},{ 407, 2671}, + { 487, 2925},{ 567, 3156},{ 640, 3372},{ 712, 3580}, + { 782, 3792},{ 833, 4015},{ 887, 4227},{ 954, 4422}, + { 1031, 4592},{ 1103, 4738},{ 1187, 4856},{ 1280, 4990}, + { 1371, 5135},{ 1442, 5244},{ 1520, 5321},{ 1684, 5398} + }, + /*Cb qi=27 INTER*/ + { + { 113, 20},{ 90, 338},{ 66, 661},{ 67, 1034}, + { 82, 1438},{ 97, 1842},{ 108, 2238},{ 115, 2624}, + { 123, 3000},{ 130, 3361},{ 138, 3708},{ 146, 4040}, + { 155, 4367},{ 164, 4688},{ 174, 4999},{ 186, 5306}, + { 203, 5609},{ 222, 5908},{ 243, 6202},{ 268, 6494}, + { 295, 6781},{ 326, 7058},{ 367, 7319},{ 420, 7551} + } + }, + { + /*Cr qi=27 INTRA*/ + { + { 5, 6},{ 75, 380},{ 133, 770},{ 173, 1217}, + { 214, 1650},{ 268, 2040},{ 337, 2375},{ 418, 2631}, + { 496, 2862},{ 558, 3104},{ 625, 3346},{ 692, 3571}, + { 753, 3786},{ 825, 3989},{ 896, 4182},{ 969, 4352}, + { 1059, 4479},{ 1144, 4614},{ 1212, 4757},{ 1284, 4871}, + { 1380, 4982},{ 1457, 5125},{ 1528, 5267},{ 1651, 5346} + }, + /*Cr qi=27 INTER*/ + { + { 92, 24},{ 74, 341},{ 61, 669},{ 71, 1049}, + { 88, 1448},{ 100, 1849},{ 107, 2243},{ 113, 2631}, + { 119, 3010},{ 125, 3373},{ 131, 3723},{ 137, 4064}, + { 146, 4396},{ 159, 4720},{ 172, 5033},{ 189, 5340}, + { 210, 5636},{ 233, 5920},{ 256, 6197},{ 282, 6465}, + { 310, 6730},{ 332, 7000},{ 359, 7259},{ 385, 7515} + } + } + }, + { + { + /*Y' qi=28 INTRA*/ + { + { 116, -8},{ 314, 1400},{ 640, 2458},{ 1013, 3197}, + { 1386, 3768},{ 1762, 4279},{ 2151, 4733},{ 2558, 5117}, + { 2970, 5442},{ 3393, 5714},{ 3820, 5935},{ 4243, 6069}, + { 4671, 6161},{ 5074, 6289},{ 5456, 6457},{ 5849, 6598}, + { 6244, 6689},{ 6632, 6777},{ 6984, 6833},{ 7294, 6855}, + { 7625, 6862},{ 7961, 6875},{ 8302, 6890},{ 8720, 6883} + }, + /*Y' qi=28 INTER*/ + { + { 54, 8},{ 81, 1333},{ 154, 2793},{ 231, 4138}, + { 304, 5352},{ 384, 6512},{ 519, 7585},{ 743, 8508}, + { 1082, 9236},{ 1587, 9717},{ 2267, 9928},{ 3034, 9944}, + { 3775, 9878},{ 4438, 9786},{ 5031, 9686},{ 5563, 9601}, + { 6042, 9523},{ 6481, 9456},{ 6890, 9405},{ 7266, 9356}, + { 7614, 9313},{ 7933, 9265},{ 8238, 9220},{ 8545, 9193} + } + }, + { + /*Cb qi=28 INTRA*/ + { + { 3, 3},{ 80, 368},{ 138, 746},{ 168, 1179}, + { 208, 1615},{ 268, 2014},{ 345, 2354},{ 432, 2637}, + { 515, 2884},{ 595, 3108},{ 669, 3323},{ 745, 3533}, + { 818, 3740},{ 876, 3953},{ 932, 4160},{ 1003, 4349}, + { 1088, 4501},{ 1154, 4648},{ 1241, 4768},{ 1349, 4889}, + { 1441, 5023},{ 1524, 5113},{ 1611, 5187},{ 1783, 5283} + }, + /*Cb qi=28 INTER*/ + { + { 117, 29},{ 91, 341},{ 65, 663},{ 68, 1038}, + { 85, 1440},{ 100, 1841},{ 110, 2234},{ 119, 2616}, + { 127, 2985},{ 135, 3342},{ 142, 3685},{ 151, 4015}, + { 162, 4337},{ 174, 4652},{ 186, 4960},{ 201, 5264}, + { 218, 5567},{ 239, 5863},{ 266, 6149},{ 295, 6434}, + { 328, 6715},{ 371, 6976},{ 409, 7239},{ 460, 7477} + } + }, + { + /*Cr qi=28 INTRA*/ + { + { 6, 7},{ 79, 381},{ 138, 771},{ 178, 1215}, + { 222, 1644},{ 285, 2026},{ 359, 2347},{ 441, 2597}, + { 521, 2827},{ 588, 3066},{ 655, 3303},{ 725, 3523}, + { 791, 3728},{ 870, 3920},{ 950, 4103},{ 1030, 4265}, + { 1121, 4388},{ 1198, 4520},{ 1266, 4659},{ 1356, 4759}, + { 1461, 4865},{ 1540, 4993},{ 1619, 5115},{ 1786, 5160} + }, + /*Cr qi=28 INTER*/ + { + { 96, 18},{ 78, 340},{ 66, 672},{ 74, 1051}, + { 90, 1450},{ 103, 1845},{ 110, 2235},{ 116, 2619}, + { 122, 2995},{ 129, 3356},{ 137, 3702},{ 146, 4038}, + { 156, 4365},{ 168, 4684},{ 182, 4995},{ 203, 5297}, + { 227, 5588},{ 253, 5866},{ 282, 6131},{ 311, 6394}, + { 339, 6664},{ 366, 6918},{ 400, 7171},{ 424, 7450} + } + } + }, + { + { + /*Y' qi=29 INTRA*/ + { + { 112, 7},{ 334, 1382},{ 681, 2410},{ 1081, 3112}, + { 1484, 3650},{ 1894, 4128},{ 2316, 4547},{ 2749, 4905}, + { 3188, 5208},{ 3634, 5458},{ 4079, 5666},{ 4517, 5791}, + { 4952, 5870},{ 5359, 5983},{ 5754, 6137},{ 6165, 6268}, + { 6568, 6351},{ 6958, 6423},{ 7320, 6471},{ 7638, 6490}, + { 7979, 6490},{ 8313, 6499},{ 8651, 6517},{ 9085, 6499} + }, + /*Y' qi=29 INTER*/ + { + { 55, 15},{ 85, 1336},{ 160, 2780},{ 242, 4104}, + { 323, 5302},{ 418, 6443},{ 586, 7480},{ 859, 8342}, + { 1278, 8982},{ 1888, 9347},{ 2658, 9457},{ 3457, 9425}, + { 4192, 9343},{ 4842, 9247},{ 5417, 9162},{ 5935, 9086}, + { 6404, 9011},{ 6841, 8952},{ 7241, 8907},{ 7609, 8867}, + { 7953, 8832},{ 8267, 8792},{ 8562, 8740},{ 8836, 8701} + } + }, + { + /*Cb qi=29 INTRA*/ + { + { 5, 3},{ 84, 368},{ 144, 746},{ 176, 1175}, + { 219, 1604},{ 285, 1991},{ 372, 2318},{ 462, 2591}, + { 546, 2833},{ 628, 3058},{ 704, 3274},{ 788, 3473}, + { 870, 3664},{ 935, 3865},{ 995, 4059},{ 1072, 4239}, + { 1167, 4388},{ 1248, 4518},{ 1334, 4634},{ 1429, 4765}, + { 1536, 4884},{ 1628, 4964},{ 1716, 5038},{ 1885, 5128} + }, + /*Cb qi=29 INTER*/ + { + { 126, 25},{ 95, 340},{ 69, 662},{ 71, 1039}, + { 88, 1440},{ 102, 1839},{ 113, 2227},{ 122, 2604}, + { 132, 2969},{ 141, 3320},{ 151, 3659},{ 161, 3985}, + { 172, 4301},{ 186, 4612},{ 200, 4917},{ 219, 5213}, + { 241, 5509},{ 265, 5800},{ 296, 6081},{ 329, 6360}, + { 369, 6633},{ 414, 6899},{ 465, 7148},{ 520, 7387} + } + }, + { + /*Cr qi=29 INTRA*/ + { + { 6, 7},{ 82, 382},{ 142, 772},{ 185, 1211}, + { 233, 1632},{ 303, 2000},{ 388, 2306},{ 475, 2550}, + { 556, 2779},{ 627, 3007},{ 707, 3237},{ 778, 3459}, + { 843, 3654},{ 927, 3834},{ 1012, 4012},{ 1101, 4152}, + { 1197, 4262},{ 1275, 4399},{ 1359, 4511},{ 1455, 4596}, + { 1562, 4708},{ 1644, 4833},{ 1719, 4954},{ 1888, 4988} + }, + /*Cr qi=29 INTER*/ + { + { 101, 28},{ 81, 343},{ 67, 673},{ 75, 1053}, + { 93, 1450},{ 106, 1844},{ 113, 2230},{ 119, 2610}, + { 127, 2980},{ 135, 3334},{ 143, 3676},{ 153, 4007}, + { 165, 4330},{ 180, 4645},{ 201, 4951},{ 224, 5243}, + { 253, 5522},{ 284, 5794},{ 314, 6060},{ 345, 6322}, + { 381, 6578},{ 419, 6828},{ 455, 7073},{ 495, 7316} + } + } + }, + { + { + /*Y' qi=30 INTRA*/ + { + { 112, 8},{ 335, 1380},{ 682, 2401},{ 1083, 3093}, + { 1489, 3619},{ 1902, 4092},{ 2332, 4511},{ 2777, 4865}, + { 3231, 5156},{ 3693, 5394},{ 4153, 5585},{ 4605, 5689}, + { 5049, 5764},{ 5468, 5871},{ 5875, 6004},{ 6295, 6120}, + { 6706, 6201},{ 7099, 6273},{ 7461, 6311},{ 7785, 6320}, + { 8128, 6322},{ 8469, 6331},{ 8806, 6342},{ 9220, 6338} + }, + /*Y' qi=30 INTER*/ + { + { 58, 8},{ 90, 1340},{ 169, 2771},{ 257, 4079}, + { 345, 5266},{ 459, 6387},{ 660, 7383},{ 990, 8180}, + { 1496, 8726},{ 2203, 8992},{ 3029, 9038},{ 3833, 8984}, + { 4549, 8900},{ 5183, 8813},{ 5745, 8735},{ 6250, 8674}, + { 6715, 8619},{ 7138, 8565},{ 7529, 8528},{ 7899, 8495}, + { 8234, 8465},{ 8550, 8429},{ 8856, 8395},{ 9160, 8374} + } + }, + { + /*Cb qi=30 INTRA*/ + { + { 7, 3},{ 88, 369},{ 149, 747},{ 185, 1175}, + { 232, 1599},{ 304, 1976},{ 392, 2293},{ 486, 2557}, + { 573, 2797},{ 656, 3027},{ 735, 3243},{ 819, 3442}, + { 903, 3629},{ 966, 3828},{ 1025, 4027},{ 1105, 4204}, + { 1201, 4343},{ 1282, 4469},{ 1379, 4575},{ 1486, 4689}, + { 1588, 4813},{ 1678, 4900},{ 1767, 4969},{ 1911, 5080} + }, + /*Cb qi=30 INTER*/ + { + { 120, 23},{ 96, 336},{ 72, 661},{ 75, 1043}, + { 91, 1441},{ 105, 1837},{ 117, 2221},{ 127, 2592}, + { 137, 2953},{ 148, 3301},{ 159, 3635},{ 170, 3959}, + { 184, 4271},{ 199, 4578},{ 216, 4879},{ 238, 5175}, + { 262, 5466},{ 294, 5750},{ 332, 6027},{ 373, 6298}, + { 421, 6559},{ 473, 6805},{ 526, 7053},{ 587, 7298} + } + }, + { + /*Cr qi=30 INTRA*/ + { + { 10, 7},{ 89, 384},{ 147, 773},{ 192, 1211}, + { 245, 1627},{ 322, 1984},{ 412, 2280},{ 501, 2520}, + { 583, 2750},{ 654, 2982},{ 736, 3207},{ 810, 3419}, + { 873, 3614},{ 957, 3794},{ 1048, 3965},{ 1139, 4102}, + { 1237, 4208},{ 1327, 4328},{ 1408, 4448},{ 1496, 4545}, + { 1604, 4652},{ 1699, 4760},{ 1780, 4877},{ 1937, 4942} + }, + /*Cr qi=30 INTER*/ + { + { 115, 26},{ 89, 342},{ 70, 672},{ 79, 1055}, + { 96, 1451},{ 108, 1841},{ 116, 2222},{ 124, 2599}, + { 132, 2965},{ 141, 3316},{ 151, 3655},{ 163, 3984}, + { 178, 4301},{ 197, 4609},{ 219, 4909},{ 247, 5195}, + { 280, 5469},{ 317, 5734},{ 351, 5991},{ 383, 6248}, + { 423, 6500},{ 467, 6744},{ 502, 6995},{ 558, 7226} + } + } + }, + { + { + /*Y' qi=31 INTRA*/ + { + { 116, 20},{ 359, 1361},{ 732, 2350},{ 1162, 3010}, + { 1597, 3507},{ 2042, 3950},{ 2503, 4339},{ 2974, 4670}, + { 3446, 4951},{ 3922, 5179},{ 4394, 5357},{ 4858, 5454}, + { 5313, 5519},{ 5734, 5626},{ 6154, 5755},{ 6585, 5859}, + { 7004, 5928},{ 7408, 5998},{ 7775, 6039},{ 8102, 6048}, + { 8442, 6051},{ 8790, 6054},{ 9136, 6057},{ 9554, 6041} + }, + /*Y' qi=31 INTER*/ + { + { 53, 12},{ 90, 1340},{ 169, 2765},{ 259, 4062}, + { 353, 5236},{ 483, 6340},{ 713, 7305},{ 1086, 8059}, + { 1651, 8548},{ 2423, 8751},{ 3288, 8754},{ 4106, 8674}, + { 4827, 8572},{ 5451, 8482},{ 6007, 8407},{ 6514, 8344}, + { 6970, 8282},{ 7397, 8225},{ 7795, 8193},{ 8159, 8161}, + { 8498, 8120},{ 8814, 8093},{ 9127, 8066},{ 9432, 8040} + } + }, + { + /*Cb qi=31 INTRA*/ + { + { 7, 3},{ 88, 369},{ 149, 746},{ 185, 1173}, + { 234, 1595},{ 308, 1967},{ 399, 2278},{ 494, 2537}, + { 583, 2774},{ 669, 2997},{ 755, 3204},{ 847, 3390}, + { 936, 3569},{ 1008, 3759},{ 1078, 3942},{ 1162, 4104}, + { 1262, 4238},{ 1352, 4364},{ 1442, 4470},{ 1557, 4567}, + { 1676, 4674},{ 1759, 4781},{ 1850, 4853},{ 2043, 4897} + }, + /*Cb qi=31 INTER*/ + { + { 121, 23},{ 96, 335},{ 72, 660},{ 74, 1043}, + { 90, 1440},{ 105, 1834},{ 116, 2217},{ 127, 2586}, + { 138, 2945},{ 148, 3293},{ 159, 3626},{ 172, 3945}, + { 185, 4256},{ 202, 4559},{ 223, 4856},{ 245, 5150}, + { 272, 5440},{ 306, 5719},{ 346, 5989},{ 391, 6253}, + { 443, 6511},{ 510, 6743},{ 583, 6965},{ 651, 7182} + } + }, + { + /*Cr qi=31 INTRA*/ + { + { 10, 7},{ 88, 384},{ 147, 773},{ 192, 1209}, + { 247, 1622},{ 326, 1974},{ 417, 2262},{ 509, 2500}, + { 596, 2726},{ 670, 2949},{ 754, 3170},{ 836, 3370}, + { 912, 3548},{ 999, 3724},{ 1093, 3888},{ 1198, 4000}, + { 1304, 4095},{ 1384, 4230},{ 1470, 4347},{ 1577, 4422}, + { 1696, 4513},{ 1798, 4620},{ 1869, 4746},{ 1991, 4798} + }, + /*Cr qi=31 INTER*/ + { + { 113, 32},{ 88, 345},{ 69, 674},{ 79, 1055}, + { 96, 1451},{ 108, 1839},{ 115, 2218},{ 123, 2592}, + { 132, 2957},{ 141, 3308},{ 151, 3643},{ 163, 3968}, + { 179, 4285},{ 200, 4590},{ 225, 4886},{ 254, 5169}, + { 291, 5436},{ 330, 5696},{ 368, 5951},{ 409, 6200}, + { 452, 6448},{ 493, 6695},{ 536, 6940},{ 571, 7204} + } + } + }, + { + { + /*Y' qi=32 INTRA*/ + { + { 123, 26},{ 370, 1356},{ 756, 2321},{ 1211, 2944}, + { 1674, 3408},{ 2148, 3826},{ 2639, 4193},{ 3138, 4504}, + { 3634, 4765},{ 4133, 4973},{ 4625, 5137},{ 5101, 5225}, + { 5567, 5274},{ 6002, 5363},{ 6437, 5482},{ 6885, 5566}, + { 7312, 5625},{ 7723, 5686},{ 8101, 5721},{ 8429, 5732}, + { 8769, 5728},{ 9120, 5726},{ 9472, 5723},{ 9918, 5700} + }, + /*Y' qi=32 INTER*/ + { + { 54, -3},{ 95, 1343},{ 179, 2750},{ 276, 4027}, + { 382, 5185},{ 543, 6256},{ 830, 7161},{ 1301, 7815}, + { 2003, 8172},{ 2883, 8266},{ 3779, 8217},{ 4578, 8127}, + { 5274, 8035},{ 5886, 7952},{ 6430, 7887},{ 6929, 7835}, + { 7380, 7779},{ 7796, 7737},{ 8190, 7705},{ 8552, 7672}, + { 8896, 7640},{ 9210, 7612},{ 9510, 7589},{ 9746, 7552} + } + }, + { + /*Cb qi=32 INTRA*/ + { + { 6, 3},{ 89, 369},{ 153, 746},{ 193, 1167}, + { 247, 1577},{ 330, 1935},{ 429, 2236},{ 528, 2494}, + { 620, 2732},{ 712, 2948},{ 801, 3146},{ 898, 3325}, + { 999, 3489},{ 1078, 3664},{ 1155, 3832},{ 1251, 3985}, + { 1360, 4115},{ 1451, 4236},{ 1549, 4338},{ 1667, 4433}, + { 1797, 4522},{ 1891, 4613},{ 1989, 4687},{ 2162, 4776} + }, + /*Cb qi=32 INTER*/ + { + { 116, -1},{ 98, 321},{ 80, 656},{ 80, 1042}, + { 96, 1438},{ 110, 1827},{ 122, 2205},{ 133, 2570}, + { 144, 2925},{ 157, 3268},{ 170, 3597},{ 185, 3911}, + { 202, 4216},{ 221, 4516},{ 244, 4809},{ 273, 5096}, + { 308, 5376},{ 350, 5644},{ 401, 5907},{ 459, 6160}, + { 520, 6401},{ 592, 6630},{ 676, 6837},{ 758, 7050} + } + }, + { + /*Cr qi=32 INTRA*/ + { + { 12, 7},{ 91, 386},{ 152, 773},{ 201, 1202}, + { 261, 1603},{ 347, 1942},{ 447, 2223},{ 540, 2460}, + { 626, 2684},{ 711, 2901},{ 801, 3115},{ 887, 3312}, + { 969, 3480},{ 1068, 3633},{ 1176, 3779},{ 1283, 3885}, + { 1392, 3969},{ 1485, 4090},{ 1573, 4206},{ 1686, 4274}, + { 1813, 4354},{ 1911, 4459},{ 2004, 4563},{ 2162, 4590} + }, + /*Cr qi=32 INTER*/ + { + { 129, 5},{ 98, 334},{ 75, 673},{ 84, 1055}, + { 101, 1448},{ 113, 1832},{ 121, 2206},{ 129, 2577}, + { 140, 2937},{ 151, 3282},{ 163, 3614},{ 179, 3932}, + { 198, 4240},{ 221, 4542},{ 252, 4830},{ 290, 5102}, + { 329, 5364},{ 373, 5618},{ 420, 5864},{ 468, 6105}, + { 513, 6351},{ 564, 6587},{ 624, 6810},{ 697, 7017} + } + } + }, + { + { + /*Y' qi=33 INTRA*/ + { + { 115, 36},{ 388, 1338},{ 791, 2289},{ 1258, 2899}, + { 1732, 3352},{ 2220, 3760},{ 2730, 4117},{ 3244, 4415}, + { 3751, 4662},{ 4261, 4858},{ 4766, 5012},{ 5249, 5094}, + { 5719, 5141},{ 6159, 5225},{ 6597, 5333},{ 7044, 5416}, + { 7474, 5472},{ 7893, 5531},{ 8268, 5570},{ 8591, 5580}, + { 8931, 5578},{ 9283, 5579},{ 9634, 5582},{10067, 5560} + }, + /*Y' qi=33 INTER*/ + { + { 65, -14},{ 102, 1345},{ 190, 2736},{ 294, 3999}, + { 411, 5146},{ 597, 6192},{ 934, 7045},{ 1488, 7622}, + { 2281, 7895},{ 3213, 7937},{ 4108, 7871},{ 4883, 7784}, + { 5556, 7709},{ 6150, 7643},{ 6685, 7585},{ 7176, 7539}, + { 7620, 7502},{ 8034, 7466},{ 8427, 7435},{ 8793, 7409}, + { 9136, 7386},{ 9446, 7364},{ 9743, 7339},{10025, 7303} + } + }, + { + /*Cb qi=33 INTRA*/ + { + { 5, 3},{ 92, 369},{ 159, 746},{ 203, 1163}, + { 263, 1564},{ 353, 1911},{ 458, 2204},{ 557, 2460}, + { 650, 2697},{ 744, 2913},{ 836, 3110},{ 934, 3292}, + { 1036, 3454},{ 1125, 3616},{ 1204, 3781},{ 1298, 3932}, + { 1410, 4058},{ 1507, 4170},{ 1606, 4265},{ 1725, 4358}, + { 1853, 4445},{ 1955, 4535},{ 2067, 4597},{ 2258, 4663} + }, + /*Cb qi=33 INTER*/ + { + { 109, 37},{ 94, 343},{ 81, 662},{ 85, 1042}, + { 102, 1436},{ 116, 1823},{ 128, 2195},{ 141, 2554}, + { 154, 2906},{ 167, 3246},{ 183, 3570},{ 202, 3881}, + { 220, 4185},{ 241, 4482},{ 268, 4772},{ 302, 5053}, + { 341, 5328},{ 388, 5592},{ 446, 5846},{ 507, 6096}, + { 581, 6328},{ 670, 6534},{ 762, 6731},{ 842, 6922} + } + }, + { + /*Cr qi=33 INTRA*/ + { + { 11, 7},{ 93, 387},{ 158, 774},{ 211, 1197}, + { 278, 1589},{ 372, 1917},{ 475, 2191},{ 569, 2429}, + { 658, 2655},{ 744, 2868},{ 835, 3083},{ 926, 3271}, + { 1010, 3430},{ 1110, 3586},{ 1224, 3724},{ 1336, 3826}, + { 1449, 3908},{ 1547, 4021},{ 1636, 4136},{ 1751, 4200}, + { 1886, 4277},{ 1977, 4384},{ 2070, 4474},{ 2232, 4510} + }, + /*Cr qi=33 INTER*/ + { + { 77, 9},{ 90, 347},{ 80, 674},{ 91, 1053}, + { 107, 1444},{ 119, 1825},{ 127, 2196},{ 137, 2563}, + { 149, 2919},{ 161, 3259},{ 176, 3588},{ 194, 3905}, + { 217, 4209},{ 246, 4504},{ 280, 4786},{ 320, 5055}, + { 364, 5316},{ 409, 5565},{ 460, 5804},{ 517, 6039}, + { 578, 6264},{ 640, 6489},{ 701, 6721},{ 772, 6948} + } + } + }, + { + { + /*Y' qi=34 INTRA*/ + { + { 124, 40},{ 401, 1333},{ 823, 2262},{ 1318, 2842}, + { 1823, 3265},{ 2339, 3650},{ 2872, 3991},{ 3405, 4274}, + { 3926, 4513},{ 4448, 4704},{ 4961, 4845},{ 5450, 4921}, + { 5925, 4971},{ 6372, 5053},{ 6813, 5160},{ 7264, 5242}, + { 7704, 5291},{ 8124, 5346},{ 8500, 5382},{ 8831, 5384}, + { 9178, 5380},{ 9525, 5387},{ 9869, 5389},{10310, 5356} + }, + /*Y' qi=34 INTER*/ + { + { 64, -17},{ 101, 1344},{ 190, 2730},{ 299, 3981}, + { 430, 5110},{ 648, 6127},{ 1036, 6933},{ 1664, 7445}, + { 2535, 7652},{ 3504, 7653},{ 4402, 7572},{ 5173, 7479}, + { 5843, 7400},{ 6441, 7334},{ 6976, 7280},{ 7464, 7231}, + { 7910, 7189},{ 8332, 7157},{ 8730, 7125},{ 9091, 7103}, + { 9422, 7086},{ 9753, 7061},{10067, 7036},{10316, 7029} + } + }, + { + /*Cb qi=34 INTRA*/ + { + { 5, 3},{ 91, 369},{ 158, 746},{ 204, 1162}, + { 266, 1561},{ 358, 1903},{ 466, 2189},{ 570, 2439}, + { 665, 2671},{ 765, 2880},{ 864, 3069},{ 970, 3238}, + { 1079, 3392},{ 1174, 3545},{ 1265, 3693},{ 1360, 3841}, + { 1471, 3968},{ 1572, 4083},{ 1675, 4181},{ 1804, 4255}, + { 1939, 4332},{ 2048, 4411},{ 2155, 4484},{ 2339, 4584} + }, + /*Cb qi=34 INTER*/ + { + { 99, 44},{ 92, 345},{ 82, 661},{ 86, 1043}, + { 101, 1436},{ 116, 1821},{ 128, 2191},{ 140, 2549}, + { 154, 2898},{ 168, 3235},{ 185, 3556},{ 203, 3865}, + { 224, 4166},{ 248, 4457},{ 278, 4741},{ 315, 5021}, + { 361, 5289},{ 416, 5546},{ 483, 5792},{ 559, 6025}, + { 651, 6237},{ 752, 6432},{ 849, 6626},{ 967, 6790} + } + }, + { + /*Cr qi=34 INTRA*/ + { + { 11, 7},{ 93, 387},{ 158, 773},{ 212, 1195}, + { 282, 1584},{ 378, 1909},{ 483, 2179},{ 578, 2414}, + { 671, 2633},{ 766, 2837},{ 866, 3038},{ 960, 3223}, + { 1049, 3376},{ 1158, 3520},{ 1285, 3644},{ 1400, 3740}, + { 1505, 3828},{ 1616, 3928},{ 1713, 4030},{ 1820, 4104}, + { 1957, 4185},{ 2063, 4280},{ 2160, 4355},{ 2320, 4341} + }, + /*Cr qi=34 INTER*/ + { + { 78, 11},{ 89, 347},{ 79, 674},{ 90, 1053}, + { 106, 1444},{ 117, 1823},{ 127, 2192},{ 137, 2558}, + { 149, 2912},{ 163, 3249},{ 178, 3574},{ 197, 3888}, + { 222, 4189},{ 252, 4481},{ 293, 4755},{ 341, 5013}, + { 386, 5268},{ 436, 5512},{ 498, 5743},{ 563, 5970}, + { 622, 6200},{ 694, 6415},{ 776, 6622},{ 871, 6818} + } + } + }, + { + { + /*Y' qi=35 INTRA*/ + { + { 116, 51},{ 433, 1312},{ 881, 2221},{ 1406, 2771}, + { 1948, 3156},{ 2511, 3501},{ 3085, 3811},{ 3654, 4066}, + { 4212, 4273},{ 4763, 4444},{ 5298, 4572},{ 5799, 4638}, + { 6285, 4678},{ 6747, 4746},{ 7203, 4838},{ 7673, 4905}, + { 8124, 4950},{ 8552, 5003},{ 8938, 5027},{ 9275, 5026}, + { 9628, 5019},{ 9981, 5024},{10331, 5030},{10795, 5000} + }, + /*Y' qi=35 INTER*/ + { + { 71, -10},{ 108, 1348},{ 203, 2710},{ 325, 3938}, + { 485, 5040},{ 766, 6000},{ 1267, 6706},{ 2048, 7089}, + { 3037, 7191},{ 4032, 7146},{ 4903, 7061},{ 5648, 6977}, + { 6301, 6912},{ 6884, 6857},{ 7413, 6812},{ 7898, 6775}, + { 8342, 6739},{ 8764, 6710},{ 9160, 6688},{ 9519, 6668}, + { 9859, 6646},{10190, 6625},{10492, 6612},{10755, 6595} + } + }, + { + /*Cb qi=35 INTRA*/ + { + { 6, 3},{ 95, 369},{ 164, 746},{ 214, 1156}, + { 287, 1542},{ 390, 1869},{ 504, 2143},{ 611, 2388}, + { 712, 2613},{ 822, 2811},{ 937, 2987},{ 1055, 3147}, + { 1174, 3285},{ 1286, 3420},{ 1386, 3560},{ 1488, 3698}, + { 1604, 3814},{ 1714, 3916},{ 1825, 4008},{ 1958, 4088}, + { 2101, 4159},{ 2224, 4226},{ 2339, 4292},{ 2538, 4383} + }, + /*Cb qi=35 INTER*/ + { + { 98, 41},{ 90, 348},{ 86, 665},{ 92, 1042}, + { 108, 1432},{ 122, 1812},{ 136, 2175},{ 151, 2528}, + { 165, 2872},{ 182, 3202},{ 202, 3516},{ 225, 3819}, + { 251, 4112},{ 281, 4398},{ 320, 4675},{ 367, 4944}, + { 421, 5204},{ 493, 5450},{ 579, 5679},{ 672, 5892}, + { 785, 6082},{ 906, 6258},{ 1026, 6432},{ 1153, 6592} + } + }, + { + /*Cr qi=35 INTRA*/ + { + { 12, 7},{ 98, 388},{ 166, 773},{ 226, 1187}, + { 306, 1563},{ 411, 1874},{ 524, 2134},{ 622, 2365}, + { 721, 2577},{ 826, 2768},{ 947, 2946},{ 1066, 3106}, + { 1163, 3250},{ 1274, 3395},{ 1417, 3508},{ 1539, 3590}, + { 1639, 3671},{ 1754, 3765},{ 1865, 3855},{ 1979, 3921}, + { 2127, 3998},{ 2249, 4085},{ 2346, 4172},{ 2473, 4210} + }, + /*Cr qi=35 INTER*/ + { + { 86, 12},{ 94, 354},{ 85, 677},{ 96, 1052}, + { 113, 1439},{ 125, 1811},{ 135, 2177},{ 147, 2537}, + { 160, 2884},{ 177, 3215},{ 195, 3535},{ 219, 3842}, + { 252, 4133},{ 292, 4413},{ 339, 4680},{ 396, 4928}, + { 455, 5169},{ 514, 5408},{ 588, 5626},{ 672, 5835}, + { 750, 6051},{ 837, 6257},{ 943, 6442},{ 1073, 6595} + } + } + }, + { + { + /*Y' qi=36 INTRA*/ + { + { 116, 52},{ 432, 1312},{ 881, 2215},{ 1407, 2759}, + { 1948, 3140},{ 2511, 3484},{ 3090, 3789},{ 3672, 4036}, + { 4243, 4236},{ 4803, 4397},{ 5346, 4517},{ 5856, 4581}, + { 6350, 4614},{ 6821, 4675},{ 7286, 4763},{ 7754, 4832}, + { 8201, 4875},{ 8631, 4922},{ 9015, 4948},{ 9351, 4945}, + { 9706, 4941},{10061, 4948},{10408, 4949},{10878, 4923} + }, + /*Y' qi=36 INTER*/ + { + { 63, -16},{ 114, 1332},{ 216, 2690},{ 343, 3914}, + { 515, 5009},{ 829, 5939},{ 1399, 6586},{ 2263, 6901}, + { 3290, 6967},{ 4272, 6920},{ 5115, 6847},{ 5839, 6779}, + { 6478, 6726},{ 7051, 6685},{ 7571, 6649},{ 8050, 6614}, + { 8495, 6587},{ 8908, 6567},{ 9298, 6550},{ 9673, 6530}, + {10005, 6512},{10324, 6499},{10640, 6483},{10936, 6487} + } + }, + { + /*Cb qi=36 INTRA*/ + { + { 6, 3},{ 98, 370},{ 170, 746},{ 225, 1150}, + { 306, 1527},{ 416, 1845},{ 534, 2116},{ 642, 2363}, + { 743, 2591},{ 851, 2794},{ 964, 2972},{ 1081, 3133}, + { 1198, 3275},{ 1311, 3410},{ 1411, 3547},{ 1519, 3680}, + { 1642, 3789},{ 1750, 3892},{ 1860, 3982},{ 1998, 4054}, + { 2141, 4129},{ 2256, 4204},{ 2372, 4278},{ 2567, 4356} + }, + /*Cb qi=36 INTER*/ + { + { 107, 30},{ 96, 346},{ 88, 667},{ 100, 1039}, + { 115, 1426},{ 128, 1804},{ 142, 2164},{ 158, 2512}, + { 176, 2851},{ 195, 3178},{ 218, 3491},{ 243, 3791}, + { 270, 4084},{ 307, 4365},{ 348, 4638},{ 397, 4908}, + { 464, 5157},{ 545, 5392},{ 635, 5620},{ 734, 5831}, + { 854, 6015},{ 993, 6170},{ 1124, 6327},{ 1234, 6502} + } + }, + { + /*Cr qi=36 INTRA*/ + { + { 12, 7},{ 102, 388},{ 172, 773},{ 239, 1182}, + { 328, 1546},{ 439, 1848},{ 554, 2106},{ 651, 2341}, + { 747, 2561},{ 850, 2757},{ 972, 2934},{ 1086, 3097}, + { 1182, 3245},{ 1302, 3382},{ 1447, 3491},{ 1572, 3567}, + { 1677, 3641},{ 1793, 3733},{ 1899, 3828},{ 2013, 3894}, + { 2163, 3967},{ 2283, 4059},{ 2387, 4142},{ 2559, 4145} + }, + /*Cr qi=36 INTER*/ + { + { 98, -10},{ 96, 347},{ 89, 676},{ 102, 1048}, + { 118, 1433},{ 130, 1804},{ 141, 2167},{ 154, 2523}, + { 171, 2866},{ 190, 3194},{ 212, 3508},{ 240, 3809}, + { 276, 4099},{ 320, 4377},{ 372, 4638},{ 428, 4887}, + { 492, 5122},{ 560, 5353},{ 638, 5572},{ 725, 5779}, + { 814, 5985},{ 902, 6192},{ 1013, 6377},{ 1155, 6527} + } + } + }, + { + { + /*Y' qi=37 INTRA*/ + { + { 109, 58},{ 445, 1302},{ 927, 2177},{ 1489, 2689}, + { 2053, 3052},{ 2632, 3387},{ 3230, 3683},{ 3830, 3922}, + { 4417, 4114},{ 4992, 4266},{ 5546, 4375},{ 6067, 4430}, + { 6571, 4459},{ 7046, 4516},{ 7513, 4599},{ 7991, 4663}, + { 8445, 4706},{ 8883, 4749},{ 9273, 4771},{ 9612, 4770}, + { 9970, 4765},{10325, 4773},{10672, 4778},{11106, 4758} + }, + /*Y' qi=37 INTER*/ + { + { 56, -14},{ 114, 1333},{ 218, 2683},{ 354, 3894}, + { 550, 4966},{ 916, 5854},{ 1569, 6437},{ 2520, 6685}, + { 3596, 6704},{ 4585, 6635},{ 5424, 6556},{ 6147, 6489}, + { 6787, 6437},{ 7358, 6395},{ 7876, 6358},{ 8361, 6325}, + { 8807, 6294},{ 9229, 6271},{ 9631, 6253},{10002, 6238}, + {10356, 6228},{10678, 6212},{10975, 6197},{11274, 6185} + } + }, + { + /*Cb qi=37 INTRA*/ + { + { 6, 3},{ 99, 370},{ 171, 746},{ 227, 1149}, + { 309, 1522},{ 421, 1836},{ 541, 2104},{ 652, 2347}, + { 757, 2572},{ 871, 2768},{ 989, 2936},{ 1111, 3087}, + { 1238, 3223},{ 1357, 3352},{ 1465, 3486},{ 1576, 3612}, + { 1709, 3705},{ 1828, 3801},{ 1937, 3895},{ 2076, 3967}, + { 2220, 4035},{ 2345, 4104},{ 2466, 4173},{ 2680, 4265} + }, + /*Cb qi=37 INTER*/ + { + { 111, 27},{ 97, 344},{ 87, 667},{ 99, 1038}, + { 115, 1425},{ 128, 1802},{ 143, 2160},{ 159, 2506}, + { 176, 2843},{ 198, 3167},{ 220, 3477},{ 247, 3774}, + { 280, 4061},{ 321, 4338},{ 368, 4608},{ 427, 4867}, + { 501, 5109},{ 595, 5332},{ 701, 5544},{ 818, 5738}, + { 956, 5905},{ 1105, 6066},{ 1248, 6217},{ 1381, 6353} + } + }, + { + /*Cr qi=37 INTRA*/ + { + { 12, 7},{ 102, 388},{ 173, 773},{ 242, 1180}, + { 331, 1541},{ 444, 1839},{ 562, 2095},{ 662, 2326}, + { 763, 2540},{ 871, 2728},{ 1003, 2892},{ 1130, 3045}, + { 1230, 3188},{ 1350, 3321},{ 1503, 3418},{ 1634, 3492}, + { 1737, 3568},{ 1856, 3653},{ 1970, 3744},{ 2091, 3802}, + { 2247, 3871},{ 2371, 3962},{ 2477, 4041},{ 2655, 4052} + }, + /*Cr qi=37 INTER*/ + { + { 89, -9},{ 97, 347},{ 88, 677},{ 102, 1048}, + { 118, 1432},{ 130, 1802},{ 141, 2163},{ 154, 2517}, + { 172, 2857},{ 192, 3181},{ 216, 3494},{ 246, 3793}, + { 286, 4074},{ 337, 4343},{ 395, 4600},{ 464, 4837}, + { 534, 5066},{ 608, 5289},{ 694, 5501},{ 788, 5704}, + { 893, 5901},{ 1010, 6088},{ 1151, 6249},{ 1331, 6374} + } + } + }, + { + { + /*Y' qi=38 INTRA*/ + { + { 107, 65},{ 476, 1286},{ 968, 2148},{ 1548, 2641}, + { 2141, 2979},{ 2757, 3289},{ 3390, 3564},{ 4020, 3784}, + { 4632, 3957},{ 5224, 4097},{ 5794, 4201},{ 6326, 4250}, + { 6828, 4274},{ 7309, 4322},{ 7790, 4401},{ 8271, 4463}, + { 8729, 4498},{ 9165, 4540},{ 9552, 4566},{ 9901, 4560}, + {10266, 4552},{10617, 4563},{10964, 4572},{11393, 4567} + }, + /*Y' qi=38 INTER*/ + { + { 57, -13},{ 118, 1332},{ 233, 2665},{ 386, 3856}, + { 620, 4899},{ 1070, 5722},{ 1849, 6211},{ 2898, 6384}, + { 3989, 6376},{ 4947, 6311},{ 5754, 6249},{ 6454, 6199}, + { 7077, 6161},{ 7640, 6132},{ 8159, 6101},{ 8639, 6076}, + { 9081, 6054},{ 9502, 6037},{ 9900, 6027},{10274, 6012}, + {10621, 5999},{10938, 5991},{11237, 5977},{11557, 5966} + } + }, + { + /*Cb qi=38 INTRA*/ + { + { 8, 3},{ 104, 370},{ 179, 744},{ 243, 1139}, + { 338, 1498},{ 458, 1801},{ 584, 2060},{ 700, 2297}, + { 812, 2514},{ 935, 2699},{ 1061, 2858},{ 1189, 3007}, + { 1321, 3141},{ 1446, 3266},{ 1563, 3388},{ 1684, 3512}, + { 1816, 3614},{ 1942, 3702},{ 2055, 3793},{ 2201, 3857}, + { 2357, 3923},{ 2477, 3994},{ 2593, 4061},{ 2768, 4178} + }, + /*Cb qi=38 INTER*/ + { + { 118, 24},{ 102, 342},{ 91, 663},{ 101, 1040}, + { 116, 1427},{ 131, 1799},{ 147, 2152},{ 168, 2491}, + { 191, 2822},{ 215, 3139},{ 244, 3441},{ 276, 3731}, + { 316, 4013},{ 363, 4286},{ 423, 4546},{ 495, 4795}, + { 584, 5028},{ 691, 5242},{ 814, 5439},{ 959, 5608}, + { 1119, 5759},{ 1277, 5906},{ 1449, 6035},{ 1655, 6144} + } + }, + { + /*Cr qi=38 INTRA*/ + { + { 12, 6},{ 106, 387},{ 182, 771},{ 261, 1168}, + { 364, 1514},{ 483, 1802},{ 603, 2053},{ 707, 2282}, + { 817, 2489},{ 933, 2670},{ 1074, 2825},{ 1210, 2967}, + { 1320, 3104},{ 1444, 3229},{ 1599, 3324},{ 1735, 3396}, + { 1846, 3464},{ 1971, 3547},{ 2086, 3646},{ 2206, 3711}, + { 2366, 3773},{ 2499, 3859},{ 2603, 3945},{ 2766, 3952} + }, + /*Cr qi=38 INTER*/ + { + { 86, -9},{ 91, 352},{ 85, 680},{ 102, 1053}, + { 119, 1435},{ 132, 1799},{ 146, 2153},{ 162, 2501}, + { 183, 2835},{ 209, 3154},{ 240, 3458},{ 278, 3751}, + { 327, 4025},{ 388, 4284},{ 455, 4532},{ 529, 4766}, + { 616, 4980},{ 711, 5188},{ 815, 5386},{ 920, 5583}, + { 1042, 5770},{ 1186, 5936},{ 1348, 6080},{ 1542, 6196} + } + } + }, + { + { + /*Y' qi=39 INTRA*/ + { + { 103, 66},{ 479, 1283},{ 998, 2125},{ 1610, 2591}, + { 2223, 2913},{ 2855, 3214},{ 3501, 3482},{ 4146, 3698}, + { 4772, 3868},{ 5376, 3999},{ 5956, 4095},{ 6496, 4140}, + { 7008, 4162},{ 7499, 4209},{ 7987, 4282},{ 8478, 4338}, + { 8947, 4374},{ 9385, 4417},{ 9783, 4437},{10143, 4433}, + {10504, 4424},{10866, 4435},{11225, 4444},{11665, 4430} + }, + /*Y' qi=39 INTER*/ + { + { 56, 2},{ 118, 1332},{ 235, 2660},{ 395, 3843}, + { 653, 4867},{ 1153, 5652},{ 2003, 6089},{ 3113, 6214}, + { 4228, 6178},{ 5189, 6102},{ 6002, 6031},{ 6707, 5976}, + { 7336, 5936},{ 7901, 5900},{ 8424, 5870},{ 8915, 5844}, + { 9361, 5822},{ 9784, 5807},{10187, 5794},{10571, 5778}, + {10931, 5763},{11264, 5751},{11582, 5742},{11916, 5730} + } + }, + { + /*Cb qi=39 INTRA*/ + { + { 8, 3},{ 104, 370},{ 179, 744},{ 244, 1138}, + { 340, 1496},{ 461, 1796},{ 588, 2053},{ 705, 2288}, + { 820, 2503},{ 945, 2684},{ 1073, 2840},{ 1210, 2981}, + { 1352, 3106},{ 1480, 3225},{ 1603, 3342},{ 1728, 3464}, + { 1865, 3559},{ 1990, 3645},{ 2106, 3734},{ 2258, 3796}, + { 2413, 3856},{ 2540, 3920},{ 2667, 3986},{ 2887, 4060} + }, + /*Cb qi=39 INTER*/ + { + { 119, 19},{ 103, 340},{ 90, 664},{ 100, 1040}, + { 115, 1426},{ 131, 1797},{ 148, 2148},{ 169, 2486}, + { 192, 2816},{ 217, 3131},{ 247, 3432},{ 282, 3721}, + { 324, 3999},{ 374, 4268},{ 435, 4526},{ 520, 4766}, + { 621, 4990},{ 738, 5194},{ 878, 5376},{ 1035, 5543}, + { 1202, 5686},{ 1374, 5819},{ 1545, 5950},{ 1729, 6064} + } + }, + { + /*Cr qi=39 INTRA*/ + { + { 12, 6},{ 106, 387},{ 182, 771},{ 262, 1167}, + { 365, 1512},{ 486, 1798},{ 608, 2047},{ 713, 2274}, + { 824, 2479},{ 945, 2655},{ 1091, 2804},{ 1231, 2941}, + { 1346, 3073},{ 1475, 3194},{ 1633, 3282},{ 1778, 3345}, + { 1891, 3414},{ 2013, 3501},{ 2138, 3584},{ 2266, 3640}, + { 2428, 3701},{ 2568, 3782},{ 2674, 3863},{ 2816, 3894} + }, + /*Cr qi=39 INTER*/ + { + { 88, -7},{ 92, 352},{ 85, 680},{ 102, 1053}, + { 119, 1434},{ 132, 1797},{ 146, 2151},{ 163, 2498}, + { 185, 2830},{ 211, 3147},{ 243, 3451},{ 285, 3735}, + { 337, 4005},{ 401, 4260},{ 477, 4499},{ 565, 4721}, + { 655, 4937},{ 749, 5148},{ 858, 5344},{ 979, 5529}, + { 1110, 5710},{ 1264, 5871},{ 1460, 5990},{ 1677, 6086} + } + } + }, + { + { + /*Y' qi=40 INTRA*/ + { + { 98, 71},{ 491, 1274},{ 1023, 2103},{ 1641, 2559}, + { 2257, 2877},{ 2898, 3171},{ 3566, 3429},{ 4233, 3629}, + { 4881, 3784},{ 5499, 3906},{ 6088, 3997},{ 6631, 4040}, + { 7145, 4060},{ 7640, 4107},{ 8128, 4178},{ 8618, 4233}, + { 9077, 4267},{ 9514, 4304},{ 9919, 4324},{10277, 4317}, + {10635, 4312},{10985, 4324},{11338, 4331},{11792, 4334} + }, + /*Y' qi=40 INTER*/ + { + { 63, -26},{ 125, 1331},{ 256, 2640},{ 439, 3801}, + { 757, 4782},{ 1391, 5474},{ 2399, 5805},{ 3582, 5870}, + { 4678, 5824},{ 5600, 5763},{ 6386, 5710},{ 7076, 5667}, + { 7693, 5637},{ 8252, 5610},{ 8775, 5586},{ 9255, 5571}, + { 9694, 5556},{10115, 5541},{10530, 5530},{10903, 5522}, + {11242, 5515},{11596, 5501},{11904, 5482},{12205, 5475} + } + }, + { + /*Cb qi=40 INTRA*/ + { + { 8, 3},{ 108, 371},{ 189, 743},{ 265, 1128}, + { 371, 1475},{ 499, 1767},{ 628, 2022},{ 746, 2256}, + { 864, 2467},{ 991, 2647},{ 1124, 2801},{ 1270, 2933}, + { 1412, 3054},{ 1547, 3165},{ 1677, 3277},{ 1804, 3393}, + { 1946, 3483},{ 2078, 3569},{ 2201, 3651},{ 2352, 3711}, + { 2513, 3766},{ 2643, 3826},{ 2775, 3880},{ 3025, 3919} + }, + /*Cb qi=40 INTER*/ + { + { 114, 35},{ 104, 349},{ 96, 667},{ 106, 1040}, + { 121, 1423},{ 138, 1789},{ 158, 2132},{ 184, 2464}, + { 212, 2787},{ 242, 3095},{ 279, 3389},{ 321, 3671}, + { 374, 3941},{ 438, 4199},{ 517, 4446},{ 617, 4673}, + { 740, 4881},{ 891, 5064},{ 1058, 5225},{ 1239, 5372}, + { 1441, 5499},{ 1638, 5610},{ 1840, 5719},{ 2076, 5814} + } + }, + { + /*Cr qi=40 INTRA*/ + { + { 14, 7},{ 114, 389},{ 193, 771},{ 283, 1156}, + { 399, 1488},{ 523, 1768},{ 643, 2018},{ 752, 2245}, + { 865, 2450},{ 984, 2626},{ 1139, 2763},{ 1290, 2887}, + { 1413, 3014},{ 1550, 3128},{ 1711, 3211},{ 1865, 3268}, + { 1981, 3334},{ 2103, 3415},{ 2237, 3486},{ 2365, 3543}, + { 2529, 3610},{ 2666, 3700},{ 2775, 3779},{ 2929, 3803} + }, + /*Cr qi=40 INTER*/ + { + { 89, -8},{ 95, 353},{ 90, 681},{ 107, 1053}, + { 124, 1430},{ 139, 1787},{ 156, 2136},{ 177, 2477}, + { 203, 2803},{ 237, 3112},{ 276, 3406},{ 329, 3683}, + { 395, 3942},{ 475, 4182},{ 567, 4407},{ 665, 4624}, + { 767, 4834},{ 879, 5032},{ 1011, 5213},{ 1169, 5375}, + { 1348, 5525},{ 1547, 5654},{ 1785, 5743},{ 2066, 5787} + } + } + }, + { + { + /*Y' qi=41 INTRA*/ + { + { 98, 71},{ 495, 1272},{ 1040, 2090},{ 1675, 2533}, + { 2302, 2842},{ 2953, 3132},{ 3631, 3381},{ 4309, 3574}, + { 4966, 3726},{ 5593, 3846},{ 6189, 3934},{ 6738, 3972}, + { 7256, 3991},{ 7754, 4036},{ 8250, 4099},{ 8747, 4150}, + { 9207, 4185},{ 9650, 4222},{10057, 4242},{10411, 4237}, + {10771, 4230},{11127, 4244},{11486, 4254},{11933, 4252} + }, + /*Y' qi=41 INTER*/ + { + { 65, -25},{ 125, 1331},{ 260, 2633},{ 457, 3782}, + { 807, 4740},{ 1499, 5397},{ 2562, 5693},{ 3766, 5743}, + { 4859, 5695},{ 5776, 5638},{ 6556, 5590},{ 7243, 5554}, + { 7859, 5529},{ 8417, 5506},{ 8935, 5486},{ 9419, 5473}, + { 9869, 5460},{10296, 5446},{10711, 5436},{11089, 5430}, + {11445, 5421},{11802, 5412},{12129, 5404},{12465, 5393} + } + }, + { + /*Cb qi=41 INTRA*/ + { + { 8, 3},{ 108, 371},{ 189, 743},{ 267, 1126}, + { 374, 1471},{ 504, 1760},{ 635, 2011},{ 758, 2241}, + { 881, 2447},{ 1013, 2621},{ 1147, 2773},{ 1293, 2906}, + { 1441, 3023},{ 1580, 3131},{ 1712, 3243},{ 1844, 3360}, + { 1985, 3451},{ 2114, 3532},{ 2240, 3613},{ 2390, 3680}, + { 2550, 3740},{ 2687, 3800},{ 2825, 3862},{ 3052, 3944} + }, + /*Cb qi=41 INTER*/ + { + { 104, 39},{ 100, 350},{ 95, 667},{ 105, 1040}, + { 121, 1422},{ 137, 1787},{ 159, 2129},{ 185, 2459}, + { 216, 2778},{ 249, 3083},{ 287, 3374},{ 335, 3653}, + { 393, 3920},{ 462, 4175},{ 549, 4414},{ 660, 4636}, + { 791, 4839},{ 952, 5014},{ 1135, 5166},{ 1337, 5297}, + { 1552, 5411},{ 1752, 5530},{ 1972, 5634},{ 2224, 5724} + } + }, + { + /*Cr qi=41 INTRA*/ + { + { 15, 7},{ 115, 389},{ 193, 770},{ 284, 1154}, + { 401, 1484},{ 528, 1761},{ 652, 2005},{ 764, 2228}, + { 882, 2427},{ 1008, 2599},{ 1167, 2734},{ 1320, 2859}, + { 1443, 2990},{ 1580, 3103},{ 1743, 3181},{ 1894, 3241}, + { 2012, 3309},{ 2141, 3385},{ 2272, 3459},{ 2398, 3519}, + { 2566, 3584},{ 2707, 3680},{ 2816, 3762},{ 2991, 3770} + }, + /*Cr qi=41 INTER*/ + { + { 92, -9},{ 98, 354},{ 90, 682},{ 107, 1052}, + { 124, 1429},{ 139, 1786},{ 156, 2132},{ 178, 2471}, + { 207, 2794},{ 241, 3100},{ 285, 3391},{ 345, 3662}, + { 417, 3915},{ 503, 4151},{ 600, 4375},{ 703, 4589}, + { 815, 4791},{ 942, 4981},{ 1088, 5155},{ 1250, 5316}, + { 1432, 5462},{ 1653, 5575},{ 1930, 5639},{ 2250, 5655} + } + } + }, + { + { + /*Y' qi=42 INTRA*/ + { + { 109, 75},{ 534, 1257},{ 1114, 2047},{ 1793, 2456}, + { 2461, 2735},{ 3157, 2994},{ 3879, 3221},{ 4595, 3396}, + { 5282, 3531},{ 5931, 3638},{ 6546, 3714},{ 7105, 3749}, + { 7633, 3766},{ 8147, 3803},{ 8652, 3865},{ 9148, 3915}, + { 9613, 3946},{10075, 3976},{10489, 3997},{10835, 3994}, + {11195, 3985},{11553, 3997},{11909, 4004},{12369, 3990} + }, + /*Y' qi=42 INTER*/ + { + { 69, -23},{ 134, 1332},{ 287, 2611},{ 521, 3730}, + { 970, 4624},{ 1827, 5176},{ 3028, 5382},{ 4262, 5389}, + { 5325, 5338},{ 6214, 5291},{ 6976, 5255},{ 7651, 5228}, + { 8260, 5206},{ 8821, 5190},{ 9343, 5177},{ 9823, 5165}, + {10273, 5152},{10709, 5143},{11121, 5136},{11502, 5129}, + {11857, 5125},{12193, 5115},{12520, 5107},{12802, 5097} + } + }, + { + /*Cb qi=42 INTRA*/ + { + { 9, 3},{ 113, 371},{ 199, 743},{ 279, 1123}, + { 390, 1462},{ 525, 1743},{ 662, 1986},{ 789, 2208}, + { 916, 2406},{ 1057, 2571},{ 1204, 2712},{ 1362, 2835}, + { 1524, 2943},{ 1676, 3040},{ 1815, 3145},{ 1959, 3249}, + { 2117, 3325},{ 2249, 3406},{ 2377, 3488},{ 2537, 3547}, + { 2706, 3597},{ 2854, 3646},{ 2999, 3705},{ 3236, 3759} + }, + /*Cb qi=42 INTER*/ + { + { 114, 44},{ 107, 353},{ 101, 670},{ 111, 1041}, + { 129, 1418},{ 148, 1775},{ 174, 2110},{ 208, 2432}, + { 244, 2746},{ 283, 3046},{ 330, 3330},{ 388, 3602}, + { 460, 3858},{ 546, 4101},{ 655, 4326},{ 793, 4530}, + { 966, 4703},{ 1165, 4851},{ 1388, 4980},{ 1630, 5088}, + { 1869, 5189},{ 2122, 5268},{ 2403, 5328},{ 2667, 5417} + } + }, + { + /*Cr qi=42 INTRA*/ + { + { 15, 7},{ 120, 390},{ 202, 771},{ 298, 1150}, + { 421, 1473},{ 553, 1743},{ 681, 1982},{ 796, 2199}, + { 923, 2388},{ 1062, 2547},{ 1225, 2678},{ 1392, 2792}, + { 1531, 2907},{ 1682, 3007},{ 1856, 3074},{ 2009, 3134}, + { 2138, 3192},{ 2274, 3257},{ 2407, 3333},{ 2536, 3393}, + { 2711, 3455},{ 2875, 3531},{ 3000, 3598},{ 3186, 3599} + }, + /*Cr qi=42 INTER*/ + { + { 87, -4},{ 95, 358},{ 97, 683},{ 113, 1052}, + { 131, 1423},{ 148, 1774},{ 170, 2116},{ 198, 2448}, + { 234, 2762},{ 276, 3062},{ 331, 3343},{ 404, 3603}, + { 494, 3844},{ 598, 4067},{ 715, 4276},{ 842, 4471}, + { 977, 4661},{ 1128, 4840},{ 1311, 4991},{ 1516, 5127}, + { 1759, 5233},{ 2050, 5300},{ 2377, 5323},{ 2710, 5304} + } + } + }, + { + { + /*Y' qi=43 INTRA*/ + { + { 99, 79},{ 557, 1244},{ 1175, 2016},{ 1882, 2408}, + { 2570, 2677},{ 3288, 2926},{ 4030, 3141},{ 4760, 3307}, + { 5458, 3435},{ 6115, 3537},{ 6743, 3608},{ 7312, 3636}, + { 7841, 3652},{ 8357, 3687},{ 8870, 3742},{ 9376, 3788}, + { 9850, 3821},{10315, 3853},{10734, 3873},{11084, 3870}, + {11442, 3862},{11800, 3874},{12160, 3879},{12618, 3876} + }, + /*Y' qi=43 INTER*/ + { + { 69, -22},{ 134, 1331},{ 294, 2601},{ 551, 3703}, + { 1056, 4563},{ 2003, 5061},{ 3276, 5215},{ 4534, 5194}, + { 5599, 5133},{ 6488, 5083},{ 7257, 5044},{ 7938, 5014}, + { 8556, 4992},{ 9124, 4975},{ 9648, 4960},{10138, 4948}, + {10594, 4939},{11039, 4926},{11462, 4919},{11847, 4912}, + {12216, 4904},{12570, 4896},{12883, 4889},{13189, 4879} + } + }, + { + /*Cb qi=43 INTRA*/ + { + { 9, 3},{ 114, 371},{ 202, 740},{ 294, 1110}, + { 417, 1440},{ 558, 1716},{ 700, 1956},{ 833, 2172}, + { 966, 2365},{ 1116, 2524},{ 1269, 2661},{ 1431, 2781}, + { 1599, 2885},{ 1756, 2980},{ 1902, 3082},{ 2051, 3185}, + { 2209, 3261},{ 2337, 3342},{ 2464, 3420},{ 2633, 3475}, + { 2809, 3525},{ 2948, 3579},{ 3094, 3633},{ 3347, 3678} + }, + /*Cb qi=43 INTER*/ + { + { 111, 44},{ 106, 353},{ 102, 670},{ 112, 1040}, + { 128, 1416},{ 148, 1771},{ 176, 2104},{ 211, 2424}, + { 250, 2734},{ 293, 3030},{ 347, 3309},{ 411, 3575}, + { 490, 3828},{ 589, 4064},{ 716, 4278},{ 869, 4472}, + { 1050, 4640},{ 1264, 4781},{ 1512, 4895},{ 1775, 4991}, + { 2042, 5069},{ 2310, 5141},{ 2593, 5207},{ 2912, 5239} + } + }, + { + /*Cr qi=43 INTRA*/ + { + { 15, 7},{ 121, 390},{ 208, 767},{ 315, 1135}, + { 449, 1449},{ 586, 1715},{ 718, 1950},{ 843, 2158}, + { 977, 2342},{ 1120, 2501},{ 1290, 2632},{ 1466, 2739}, + { 1613, 2845},{ 1763, 2945},{ 1937, 3015},{ 2093, 3070}, + { 2225, 3126},{ 2366, 3194},{ 2501, 3267},{ 2634, 3324}, + { 2815, 3385},{ 2964, 3466},{ 3087, 3538},{ 3263, 3555} + }, + /*Cr qi=43 INTER*/ + { + { 84, -4},{ 93, 358},{ 95, 683},{ 113, 1052}, + { 131, 1421},{ 148, 1770},{ 171, 2110},{ 201, 2439}, + { 240, 2750},{ 287, 3046},{ 348, 3322},{ 429, 3576}, + { 527, 3811},{ 641, 4029},{ 767, 4230},{ 904, 4422}, + { 1053, 4603},{ 1225, 4765},{ 1433, 4903},{ 1661, 5030}, + { 1928, 5121},{ 2252, 5160},{ 2604, 5164},{ 2979, 5125} + } + } + }, + { + { + /*Y' qi=44 INTRA*/ + { + { 103, 80},{ 560, 1244},{ 1183, 2009},{ 1891, 2391}, + { 2586, 2649},{ 3324, 2884},{ 4093, 3089},{ 4850, 3243}, + { 5575, 3358},{ 6252, 3452},{ 6886, 3518},{ 7459, 3546}, + { 7993, 3562},{ 8515, 3594},{ 9030, 3645},{ 9534, 3691}, + {10004, 3723},{10469, 3750},{10887, 3765},{11236, 3766}, + {11596, 3762},{11960, 3775},{12317, 3784},{12766, 3789} + }, + /*Y' qi=44 INTER*/ + { + { 77, -24},{ 145, 1332},{ 332, 2580},{ 642, 3649}, + { 1270, 4438},{ 2360, 4860},{ 3685, 4982},{ 4910, 4966}, + { 5929, 4928},{ 6785, 4900},{ 7529, 4880},{ 8198, 4863}, + { 8804, 4850},{ 9361, 4842},{ 9882, 4836},{10371, 4830}, + {10827, 4822},{11262, 4816},{11672, 4811},{12052, 4807}, + {12431, 4806},{12780, 4798},{13095, 4792},{13401, 4791} + } + }, + { + /*Cb qi=44 INTRA*/ + { + { 9, 2},{ 122, 371},{ 214, 741},{ 307, 1109}, + { 433, 1432},{ 576, 1704},{ 718, 1939},{ 855, 2152}, + { 991, 2340},{ 1141, 2497},{ 1298, 2632},{ 1463, 2749}, + { 1636, 2851},{ 1796, 2944},{ 1947, 3041},{ 2101, 3140}, + { 2260, 3219},{ 2392, 3297},{ 2527, 3366},{ 2693, 3424}, + { 2872, 3477},{ 3025, 3525},{ 3175, 3584},{ 3451, 3626} + }, + /*Cb qi=44 INTER*/ + { + { 111, 14},{ 110, 339},{ 109, 671},{ 120, 1040}, + { 139, 1410},{ 162, 1758},{ 197, 2084},{ 243, 2397}, + { 291, 2702},{ 342, 2992},{ 405, 3265},{ 484, 3521}, + { 584, 3760},{ 705, 3983},{ 855, 4185},{ 1048, 4356}, + { 1274, 4500},{ 1531, 4617},{ 1816, 4707},{ 2111, 4783}, + { 2409, 4846},{ 2720, 4901},{ 3044, 4957},{ 3391, 4985} + } + }, + { + /*Cr qi=44 INTRA*/ + { + { 17, 7},{ 128, 392},{ 219, 770},{ 329, 1135}, + { 465, 1442},{ 601, 1703},{ 734, 1935},{ 862, 2142}, + { 998, 2325},{ 1147, 2482},{ 1321, 2606},{ 1496, 2710}, + { 1649, 2813},{ 1809, 2908},{ 1984, 2977},{ 2143, 3032}, + { 2279, 3087},{ 2423, 3152},{ 2559, 3225},{ 2684, 3288}, + { 2866, 3351},{ 3025, 3426},{ 3161, 3492},{ 3372, 3500} + }, + /*Cr qi=44 INTER*/ + { + { 89, 0},{ 101, 352},{ 104, 683},{ 121, 1051}, + { 141, 1414},{ 163, 1757},{ 192, 2092},{ 231, 2415}, + { 278, 2720},{ 336, 3007},{ 412, 3273},{ 510, 3516}, + { 633, 3733},{ 769, 3936},{ 914, 4130},{ 1076, 4307}, + { 1256, 4472},{ 1469, 4617},{ 1723, 4732},{ 2012, 4822}, + { 2347, 4871},{ 2716, 4875},{ 3082, 4866},{ 3422, 4826} + } + } + }, + { + { + /*Y' qi=45 INTRA*/ + { + { 119, 78},{ 610, 1226},{ 1271, 1965},{ 2026, 2319}, + { 2768, 2550},{ 3556, 2757},{ 4369, 2938},{ 5157, 3076}, + { 5901, 3182},{ 6598, 3268},{ 7253, 3326},{ 7844, 3343}, + { 8392, 3356},{ 8922, 3386},{ 9453, 3433},{ 9973, 3474}, + {10457, 3503},{10929, 3530},{11351, 3543},{11709, 3541}, + {12068, 3537},{12434, 3547},{12805, 3555},{13268, 3563} + }, + /*Y' qi=45 INTER*/ + { + { 77, -20},{ 146, 1330},{ 342, 2566},{ 699, 3604}, + { 1439, 4332},{ 2669, 4672},{ 4075, 4727},{ 5318, 4679}, + { 6345, 4630},{ 7209, 4595},{ 7963, 4570},{ 8644, 4551}, + { 9262, 4535},{ 9831, 4525},{10370, 4515},{10872, 4506}, + {11334, 4500},{11783, 4492},{12219, 4489},{12617, 4483}, + {12995, 4477},{13350, 4472},{13674, 4466},{13968, 4468} + } + }, + { + /*Cb qi=45 INTRA*/ + { + { 9, 2},{ 122, 370},{ 219, 735},{ 324, 1096}, + { 465, 1414},{ 619, 1679},{ 771, 1905},{ 920, 2103}, + { 1070, 2276},{ 1236, 2419},{ 1410, 2539},{ 1595, 2644}, + { 1784, 2736},{ 1949, 2831},{ 2104, 2931},{ 2275, 3021}, + { 2443, 3092},{ 2586, 3166},{ 2735, 3234},{ 2904, 3288}, + { 3093, 3338},{ 3262, 3382},{ 3419, 3427},{ 3708, 3456} + }, + /*Cb qi=45 INTER*/ + { + { 103, 0},{ 109, 339},{ 109, 670},{ 119, 1039}, + { 137, 1408},{ 162, 1754},{ 199, 2076},{ 248, 2386}, + { 301, 2684},{ 360, 2967},{ 433, 3234},{ 525, 3481}, + { 640, 3713},{ 780, 3924},{ 956, 4110},{ 1176, 4266}, + { 1438, 4390},{ 1736, 4481},{ 2057, 4553},{ 2385, 4613}, + { 2718, 4656},{ 3056, 4698},{ 3416, 4733},{ 3799, 4755} + } + }, + { + /*Cr qi=45 INTRA*/ + { + { 16, 7},{ 128, 391},{ 225, 763},{ 350, 1120}, + { 500, 1420},{ 649, 1673},{ 792, 1893},{ 929, 2089}, + { 1084, 2257},{ 1250, 2401},{ 1440, 2518},{ 1633, 2614}, + { 1799, 2708},{ 1968, 2798},{ 2151, 2863},{ 2314, 2914}, + { 2453, 2968},{ 2611, 3025},{ 2759, 3095},{ 2887, 3160}, + { 3082, 3210},{ 3259, 3278},{ 3403, 3342},{ 3593, 3354} + }, + /*Cr qi=45 INTER*/ + { + { 92, 0},{ 101, 352},{ 103, 682},{ 120, 1049}, + { 140, 1412},{ 163, 1752},{ 193, 2083},{ 234, 2402}, + { 287, 2702},{ 353, 2983},{ 442, 3240},{ 557, 3471}, + { 694, 3680},{ 846, 3873},{ 1014, 4056},{ 1200, 4224}, + { 1414, 4369},{ 1664, 4495},{ 1946, 4595},{ 2278, 4654}, + { 2654, 4673},{ 3047, 4658},{ 3438, 4627},{ 3825, 4585} + } + } + }, + { + { + /*Y' qi=46 INTRA*/ + { + { 119, 78},{ 610, 1227},{ 1277, 1960},{ 2043, 2309}, + { 2805, 2529},{ 3618, 2719},{ 4452, 2887},{ 5257, 3016}, + { 6017, 3115},{ 6727, 3195},{ 7392, 3248},{ 7984, 3267}, + { 8528, 3281},{ 9059, 3310},{ 9593, 3354},{10119, 3395}, + {10599, 3425},{11064, 3450},{11493, 3464},{11850, 3466}, + {12207, 3462},{12578, 3471},{12948, 3480},{13407, 3487} + }, + /*Y' qi=46 INTER*/ + { + { 74, -14},{ 149, 1326},{ 382, 2538},{ 807, 3541}, + { 1670, 4211},{ 3000, 4499},{ 4416, 4533},{ 5628, 4490}, + { 6628, 4453},{ 7479, 4425},{ 8228, 4406},{ 8902, 4393}, + { 9521, 4380},{10090, 4371},{10623, 4364},{11124, 4356}, + {11586, 4351},{12043, 4344},{12476, 4341},{12863, 4340}, + {13244, 4337},{13610, 4329},{13936, 4324},{14246, 4329} + } + }, + { + /*Cb qi=46 INTRA*/ + { + { 11, 2},{ 132, 371},{ 234, 737},{ 340, 1094}, + { 481, 1405},{ 637, 1667},{ 791, 1891},{ 944, 2084}, + { 1099, 2253},{ 1268, 2392},{ 1444, 2507},{ 1633, 2610}, + { 1825, 2700},{ 1990, 2794},{ 2147, 2895},{ 2321, 2984}, + { 2493, 3053},{ 2640, 3126},{ 2787, 3198},{ 2954, 3253}, + { 3146, 3297},{ 3313, 3344},{ 3473, 3393},{ 3757, 3434} + }, + /*Cb qi=46 INTER*/ + { + { 97, 0},{ 109, 339},{ 108, 669},{ 120, 1035}, + { 142, 1398},{ 173, 1737},{ 221, 2052},{ 281, 2353}, + { 345, 2646},{ 415, 2924},{ 504, 3183},{ 616, 3421}, + { 749, 3643},{ 914, 3842},{ 1123, 4012},{ 1379, 4150}, + { 1685, 4250},{ 2014, 4327},{ 2366, 4382},{ 2731, 4426}, + { 3083, 4470},{ 3445, 4490},{ 3805, 4511},{ 4146, 4539} + } + }, + { + /*Cr qi=46 INTRA*/ + { + { 19, 7},{ 137, 393},{ 237, 765},{ 364, 1116}, + { 516, 1411},{ 665, 1662},{ 809, 1880},{ 951, 2072}, + { 1109, 2236},{ 1278, 2378},{ 1474, 2491},{ 1669, 2584}, + { 1835, 2678},{ 2014, 2766},{ 2203, 2828},{ 2366, 2880}, + { 2506, 2933},{ 2661, 2988},{ 2810, 3053},{ 2941, 3116}, + { 3131, 3175},{ 3310, 3243},{ 3461, 3303},{ 3656, 3321} + }, + /*Cr qi=46 INTER*/ + { + { 91, 1},{ 103, 351},{ 104, 681},{ 121, 1046}, + { 144, 1401},{ 173, 1736},{ 213, 2060},{ 265, 2373}, + { 330, 2666},{ 410, 2938},{ 517, 3185},{ 655, 3404}, + { 815, 3601},{ 989, 3784},{ 1183, 3951},{ 1400, 4104}, + { 1649, 4241},{ 1933, 4352},{ 2261, 4427},{ 2646, 4458}, + { 3057, 4446},{ 3453, 4418},{ 3820, 4385},{ 4171, 4352} + } + } + }, + { + { + /*Y' qi=47 INTRA*/ + { + { 117, 83},{ 670, 1205},{ 1408, 1904},{ 2239, 2219}, + { 3049, 2414},{ 3905, 2584},{ 4775, 2734},{ 5610, 2852}, + { 6393, 2944},{ 7121, 3017},{ 7804, 3066},{ 8407, 3081}, + { 8957, 3093},{ 9498, 3119},{10043, 3160},{10582, 3199}, + {11083, 3226},{11561, 3250},{11993, 3263},{12352, 3264}, + {12711, 3259},{13092, 3266},{13463, 3271},{13918, 3275} + }, + /*Y' qi=47 INTER*/ + { + { 74, -11},{ 148, 1325},{ 404, 2518},{ 910, 3478}, + { 1916, 4080},{ 3369, 4298},{ 4823, 4292},{ 6035, 4238}, + { 7037, 4197},{ 7894, 4168},{ 8650, 4146},{ 9337, 4129}, + { 9968, 4116},{10549, 4105},{11096, 4096},{11605, 4089}, + {12081, 4083},{12547, 4076},{12990, 4070},{13399, 4070}, + {13776, 4065},{14133, 4059},{14486, 4057},{14842, 4053} + } + }, + { + /*Cb qi=47 INTRA*/ + { + { 11, 2},{ 133, 370},{ 242, 731},{ 367, 1077}, + { 524, 1378},{ 692, 1630},{ 860, 1844},{ 1028, 2024}, + { 1203, 2178},{ 1393, 2305},{ 1582, 2413},{ 1787, 2507}, + { 1992, 2590},{ 2175, 2676},{ 2351, 2767},{ 2534, 2851}, + { 2707, 2923},{ 2862, 2994},{ 3021, 3060},{ 3193, 3111}, + { 3396, 3147},{ 3573, 3184},{ 3752, 3220},{ 4038, 3255} + }, + /*Cb qi=47 INTER*/ + { + { 101, 0},{ 107, 339},{ 108, 667},{ 120, 1033}, + { 142, 1394},{ 175, 1729},{ 227, 2040},{ 295, 2335}, + { 369, 2619},{ 452, 2888},{ 556, 3138},{ 686, 3368}, + { 850, 3574},{ 1050, 3758},{ 1299, 3910},{ 1605, 4024}, + { 1950, 4104},{ 2317, 4163},{ 2689, 4210},{ 3077, 4239}, + { 3466, 4258},{ 3840, 4278},{ 4205, 4298},{ 4515, 4340} + } + }, + { + /*Cr qi=47 INTRA*/ + { + { 19, 7},{ 138, 392},{ 248, 758},{ 396, 1094}, + { 563, 1378},{ 723, 1621},{ 881, 1829},{ 1037, 2011}, + { 1214, 2165},{ 1410, 2290},{ 1623, 2393},{ 1834, 2480}, + { 2016, 2564},{ 2203, 2647},{ 2405, 2707},{ 2569, 2757}, + { 2709, 2810},{ 2871, 2860},{ 3027, 2924},{ 3178, 2980}, + { 3375, 3034},{ 3563, 3097},{ 3724, 3151},{ 3952, 3153} + }, + /*Cr qi=47 INTER*/ + { + { 91, 1},{ 100, 351},{ 102, 681},{ 120, 1043}, + { 144, 1397},{ 175, 1729},{ 219, 2049},{ 277, 2356}, + { 353, 2640},{ 451, 2902},{ 579, 3136},{ 739, 3342}, + { 926, 3525},{ 1125, 3698},{ 1343, 3859},{ 1595, 3998}, + { 1881, 4113},{ 2208, 4205},{ 2589, 4253},{ 3014, 4250}, + { 3444, 4220},{ 3838, 4183},{ 4196, 4147},{ 4521, 4116} + } + } + }, + { + { + /*Y' qi=48 INTRA*/ + { + { 107, 87},{ 681, 1200},{ 1456, 1883},{ 2306, 2193}, + { 3122, 2386},{ 3984, 2548},{ 4862, 2693},{ 5704, 2808}, + { 6495, 2899},{ 7232, 2970},{ 7915, 3018},{ 8524, 3034}, + { 9085, 3043},{ 9635, 3068},{10192, 3108},{10735, 3145}, + {11237, 3171},{11719, 3194},{12153, 3207},{12516, 3206}, + {12888, 3202},{13266, 3210},{13637, 3218},{14101, 3219} + }, + /*Y' qi=48 INTER*/ + { + { 83, -18},{ 147, 1328},{ 398, 2519},{ 923, 3468}, + { 1979, 4047},{ 3472, 4246},{ 4936, 4232},{ 6148, 4178}, + { 7150, 4139},{ 8007, 4111},{ 8765, 4091},{ 9458, 4076}, + {10090, 4063},{10676, 4054},{11226, 4045},{11742, 4038}, + {12223, 4033},{12686, 4029},{13127, 4022},{13527, 4015}, + {13915, 4012},{14277, 4007},{14619, 4004},{14966, 4001} + } + }, + { + /*Cb qi=48 INTRA*/ + { + { 11, 2},{ 134, 369},{ 245, 730},{ 373, 1075}, + { 531, 1374},{ 698, 1625},{ 865, 1839},{ 1033, 2019}, + { 1207, 2173},{ 1397, 2300},{ 1588, 2408},{ 1795, 2501}, + { 2003, 2581},{ 2187, 2666},{ 2362, 2757},{ 2548, 2841}, + { 2719, 2912},{ 2876, 2983},{ 3034, 3047},{ 3209, 3097}, + { 3409, 3137},{ 3589, 3178},{ 3762, 3216},{ 4004, 3252} + }, + /*Cb qi=48 INTER*/ + { + { 113, 26},{ 112, 344},{ 111, 668},{ 120, 1032}, + { 141, 1392},{ 173, 1727},{ 224, 2036},{ 290, 2330}, + { 363, 2612},{ 447, 2880},{ 551, 3130},{ 685, 3358}, + { 852, 3563},{ 1061, 3742},{ 1332, 3884},{ 1654, 3993}, + { 2011, 4068},{ 2394, 4120},{ 2782, 4160},{ 3172, 4186}, + { 3557, 4209},{ 3932, 4228},{ 4306, 4237},{ 4675, 4236} + } + }, + { + /*Cr qi=48 INTRA*/ + { + { 18, 7},{ 139, 389},{ 252, 755},{ 404, 1090}, + { 573, 1372},{ 732, 1615},{ 889, 1823},{ 1045, 2005}, + { 1222, 2159},{ 1417, 2285},{ 1631, 2387},{ 1843, 2474}, + { 2027, 2558},{ 2212, 2639},{ 2413, 2697},{ 2578, 2746}, + { 2720, 2798},{ 2887, 2852},{ 3040, 2913},{ 3181, 2970}, + { 3381, 3024},{ 3581, 3081},{ 3743, 3130},{ 3948, 3133} + }, + /*Cr qi=48 INTER*/ + { + { 89, 0},{ 106, 352},{ 105, 682},{ 120, 1044}, + { 144, 1395},{ 174, 1724},{ 215, 2044},{ 270, 2350}, + { 343, 2635},{ 441, 2895},{ 571, 3129},{ 735, 3334}, + { 926, 3518},{ 1139, 3684},{ 1371, 3836},{ 1628, 3977}, + { 1933, 4089},{ 2279, 4164},{ 2672, 4204},{ 3105, 4205}, + { 3533, 4176},{ 3931, 4135},{ 4290, 4089},{ 4624, 4057} + } + } + }, + { + { + /*Y' qi=49 INTRA*/ + { + { 120, 85},{ 706, 1194},{ 1485, 1875},{ 2348, 2187}, + { 3190, 2372},{ 4076, 2521},{ 4967, 2658},{ 5819, 2771}, + { 6611, 2861},{ 7345, 2936},{ 8026, 2990},{ 8626, 3013}, + { 9182, 3030},{ 9723, 3059},{10266, 3100},{10802, 3143}, + {11293, 3179},{11768, 3206},{12201, 3221},{12556, 3225}, + {12914, 3226},{13281, 3237},{13639, 3247},{14089, 3257} + }, + /*Y' qi=49 INTER*/ + { + { 72, -11},{ 155, 1320},{ 458, 2485},{ 1090, 3386}, + { 2284, 3907},{ 3835, 4075},{ 5272, 4064},{ 6449, 4026}, + { 7426, 4003},{ 8267, 3987},{ 9017, 3976},{ 9698, 3967}, + {10328, 3962},{10913, 3959},{11452, 3954},{11961, 3950}, + {12442, 3947},{12904, 3946},{13347, 3945},{13749, 3943}, + {14123, 3941},{14490, 3941},{14826, 3939},{15153, 3937} + } + }, + { + /*Cb qi=49 INTRA*/ + { + { 11, 2},{ 145, 369},{ 262, 729},{ 393, 1070}, + { 557, 1363},{ 731, 1607},{ 907, 1811},{ 1085, 1983}, + { 1268, 2130},{ 1465, 2251},{ 1658, 2359},{ 1868, 2454}, + { 2079, 2534},{ 2264, 2621},{ 2440, 2717},{ 2625, 2802}, + { 2792, 2878},{ 2945, 2954},{ 3106, 3021},{ 3277, 3075}, + { 3466, 3119},{ 3638, 3170},{ 3824, 3213},{ 4100, 3243} + }, + /*Cb qi=49 INTER*/ + { + { 98, -6},{ 113, 343},{ 110, 669},{ 122, 1029}, + { 149, 1380},{ 192, 1706},{ 258, 2007},{ 340, 2293}, + { 426, 2569},{ 525, 2831},{ 653, 3071},{ 814, 3287}, + { 1013, 3478},{ 1262, 3637},{ 1575, 3761},{ 1936, 3851}, + { 2328, 3910},{ 2741, 3949},{ 3163, 3970},{ 3559, 3994}, + { 3936, 4025},{ 4300, 4050},{ 4655, 4060},{ 4962, 4062} + } + }, + { + /*Cr qi=49 INTRA*/ + { + { 19, 7},{ 151, 389},{ 270, 753},{ 427, 1084}, + { 602, 1360},{ 767, 1595},{ 933, 1794},{ 1098, 1968}, + { 1285, 2115},{ 1489, 2237},{ 1699, 2342},{ 1912, 2435}, + { 2101, 2519},{ 2288, 2601},{ 2486, 2663},{ 2651, 2715}, + { 2799, 2769},{ 2958, 2825},{ 3106, 2890},{ 3257, 2948}, + { 3452, 3007},{ 3634, 3075},{ 3786, 3136},{ 3959, 3164} + }, + /*Cr qi=49 INTER*/ + { + { 85, 1},{ 103, 352},{ 104, 681},{ 121, 1039}, + { 152, 1382},{ 195, 1702},{ 248, 2015},{ 316, 2316}, + { 403, 2595},{ 520, 2847},{ 676, 3068},{ 870, 3258}, + { 1091, 3429},{ 1329, 3585},{ 1597, 3725},{ 1894, 3849}, + { 2242, 3940},{ 2656, 3984},{ 3098, 3992},{ 3531, 3981}, + { 3936, 3950},{ 4304, 3915},{ 4646, 3879},{ 4915, 3861} + } + } + }, + { + { + /*Y' qi=50 INTRA*/ + { + { 122, 89},{ 798, 1170},{ 1682, 1812},{ 2613, 2096}, + { 3501, 2260},{ 4430, 2388},{ 5352, 2510},{ 6228, 2613}, + { 7043, 2698},{ 7793, 2770},{ 8486, 2823},{ 9092, 2846}, + { 9652, 2865},{10210, 2895},{10773, 2936},{11315, 2979}, + {11817, 3014},{12297, 3041},{12734, 3057},{13097, 3064}, + {13443, 3067},{13813, 3078},{14190, 3088},{14646, 3103} + }, + /*Y' qi=50 INTER*/ + { + { 73, -11},{ 154, 1318},{ 501, 2457},{ 1281, 3291}, + { 2685, 3719},{ 4356, 3810},{ 5811, 3769},{ 6988, 3726}, + { 7976, 3700},{ 8835, 3682},{ 9606, 3669},{10307, 3659}, + {10953, 3652},{11556, 3645},{12115, 3643},{12641, 3640}, + {13138, 3636},{13613, 3634},{14068, 3629},{14488, 3627}, + {14876, 3625},{15237, 3621},{15585, 3623},{15922, 3629} + } + }, + { + /*Cb qi=50 INTRA*/ + { + { 11, 2},{ 148, 368},{ 278, 724},{ 431, 1052}, + { 613, 1334},{ 806, 1567},{ 1004, 1756},{ 1203, 1915}, + { 1405, 2051},{ 1621, 2163},{ 1833, 2262},{ 2059, 2347}, + { 2280, 2424},{ 2476, 2512},{ 2670, 2598},{ 2864, 2679}, + { 3037, 2754},{ 3201, 2826},{ 3376, 2887},{ 3562, 2936}, + { 3756, 2976},{ 3932, 3022},{ 4117, 3065},{ 4385, 3094} + }, + /*Cb qi=50 INTER*/ + { + { 92, -3},{ 112, 343},{ 109, 669},{ 121, 1027}, + { 149, 1375},{ 196, 1697},{ 270, 1992},{ 366, 2267}, + { 471, 2532},{ 594, 2782},{ 747, 3011},{ 942, 3212}, + { 1189, 3384},{ 1497, 3521},{ 1875, 3613},{ 2297, 3673}, + { 2739, 3710},{ 3195, 3725},{ 3644, 3737},{ 4057, 3751}, + { 4445, 3763},{ 4841, 3769},{ 5211, 3779},{ 5568, 3769} + } + }, + { + /*Cr qi=50 INTRA*/ + { + { 19, 7},{ 155, 388},{ 290, 744},{ 474, 1060}, + { 666, 1324},{ 847, 1549},{ 1033, 1737},{ 1219, 1898}, + { 1428, 2034},{ 1653, 2147},{ 1885, 2245},{ 2115, 2329}, + { 2316, 2410},{ 2517, 2486},{ 2730, 2539},{ 2901, 2586}, + { 3042, 2638},{ 3199, 2693},{ 3366, 2755},{ 3534, 2805}, + { 3738, 2858},{ 3934, 2916},{ 4079, 2975},{ 4257, 2992} + }, + /*Cr qi=50 INTER*/ + { + { 87, 1},{ 102, 353},{ 103, 680},{ 121, 1036}, + { 153, 1377},{ 199, 1694},{ 260, 1999},{ 339, 2291}, + { 446, 2559},{ 590, 2797},{ 780, 3003},{ 1010, 3176}, + { 1267, 3331},{ 1547, 3474},{ 1874, 3594},{ 2245, 3688}, + { 2666, 3742},{ 3130, 3758},{ 3594, 3748},{ 4028, 3711}, + { 4415, 3674},{ 4771, 3641},{ 5122, 3605},{ 5482, 3569} + } + } + }, + { + { + /*Y' qi=51 INTRA*/ + { + { 115, 93},{ 819, 1164},{ 1739, 1806},{ 2695, 2101}, + { 3612, 2257},{ 4552, 2374},{ 5479, 2490},{ 6352, 2593}, + { 7158, 2683},{ 7898, 2761},{ 8580, 2823},{ 9177, 2854}, + { 9728, 2880},{10268, 2917},{10816, 2966},{11350, 3016}, + {11834, 3058},{12311, 3089},{12741, 3109},{13092, 3119}, + {13434, 3126},{13791, 3142},{14156, 3155},{14590, 3171} + }, + /*Y' qi=51 INTER*/ + { + { 58, 0},{ 171, 1307},{ 610, 2407},{ 1563, 3175}, + { 3116, 3545},{ 4789, 3624},{ 6185, 3602},{ 7320, 3583}, + { 8282, 3574},{ 9124, 3569},{ 9878, 3567},{10569, 3565}, + {11207, 3563},{11801, 3564},{12359, 3566},{12884, 3567}, + {13373, 3568},{13841, 3567},{14289, 3566},{14699, 3568}, + {15086, 3568},{15446, 3566},{15788, 3564},{16103, 3568} + } + }, + { + /*Cb qi=51 INTRA*/ + { + { 14, 3},{ 161, 369},{ 297, 722},{ 454, 1047}, + { 639, 1325},{ 833, 1554},{ 1033, 1742},{ 1236, 1897}, + { 1440, 2032},{ 1653, 2148},{ 1860, 2253},{ 2077, 2347}, + { 2288, 2432},{ 2476, 2525},{ 2661, 2621},{ 2841, 2714}, + { 3010, 2797},{ 3170, 2876},{ 3333, 2945},{ 3510, 3000}, + { 3696, 3054},{ 3865, 3114},{ 4046, 3164},{ 4317, 3200} + }, + /*Cb qi=51 INTER*/ + { + { 88, -11},{ 109, 341},{ 109, 668},{ 126, 1019}, + { 168, 1358},{ 233, 1670},{ 329, 1955},{ 451, 2219}, + { 584, 2472},{ 736, 2711},{ 931, 2923},{ 1179, 3104}, + { 1480, 3254},{ 1846, 3368},{ 2265, 3448},{ 2714, 3501}, + { 3180, 3524},{ 3638, 3529},{ 4074, 3543},{ 4485, 3560}, + { 4868, 3571},{ 5238, 3581},{ 5597, 3594},{ 5953, 3591} + } + }, + { + /*Cr qi=51 INTRA*/ + { + { 24, 7},{ 168, 388},{ 309, 742},{ 496, 1054}, + { 688, 1316},{ 873, 1538},{ 1063, 1723},{ 1252, 1882}, + { 1460, 2018},{ 1682, 2134},{ 1907, 2238},{ 2125, 2332}, + { 2317, 2422},{ 2507, 2510},{ 2705, 2575},{ 2869, 2630}, + { 3015, 2684},{ 3178, 2744},{ 3329, 2815},{ 3477, 2878}, + { 3667, 2945},{ 3848, 3016},{ 3997, 3082},{ 4174, 3121} + }, + /*Cr qi=51 INTER*/ + { + { 83, -2},{ 102, 351},{ 102, 680},{ 126, 1029}, + { 172, 1359},{ 238, 1665},{ 321, 1962},{ 422, 2246}, + { 552, 2505},{ 733, 2728},{ 970, 2912},{ 1247, 3069}, + { 1552, 3209},{ 1876, 3338},{ 2251, 3440},{ 2692, 3502}, + { 3161, 3529},{ 3637, 3525},{ 4084, 3509},{ 4487, 3479}, + { 4850, 3444},{ 5181, 3419},{ 5507, 3406},{ 5786, 3398} + } + } + }, + { + { + /*Y' qi=52 INTRA*/ + { + { 117, 93},{ 814, 1168},{ 1729, 1822},{ 2706, 2119}, + { 3655, 2262},{ 4604, 2374},{ 5528, 2490},{ 6394, 2596}, + { 7189, 2691},{ 7921, 2777},{ 8596, 2846},{ 9184, 2885}, + { 9728, 2918},{10260, 2961},{10796, 3014},{11316, 3069}, + {11793, 3115},{12267, 3150},{12692, 3172},{13037, 3185}, + {13367, 3196},{13717, 3214},{14087, 3227},{14521, 3249} + }, + /*Y' qi=52 INTER*/ + { + { 52, 0},{ 169, 1308},{ 668, 2382},{ 1735, 3112}, + { 3384, 3451},{ 5077, 3519},{ 6461, 3506},{ 7587, 3496}, + { 8545, 3494},{ 9384, 3494},{10142, 3498},{10838, 3501}, + {11475, 3503},{12078, 3508},{12640, 3511},{13162, 3513}, + {13654, 3517},{14130, 3521},{14576, 3522},{14980, 3523}, + {15369, 3523},{15737, 3522},{16071, 3521},{16382, 3516} + } + }, + { + /*Cb qi=52 INTRA*/ + { + { 14, 3},{ 163, 369},{ 299, 722},{ 457, 1044}, + { 645, 1319},{ 843, 1545},{ 1050, 1728},{ 1261, 1879}, + { 1468, 2013},{ 1678, 2132},{ 1883, 2240},{ 2093, 2338}, + { 2301, 2428},{ 2488, 2523},{ 2667, 2619},{ 2843, 2718}, + { 3010, 2805},{ 3163, 2887},{ 3323, 2963},{ 3490, 3028}, + { 3665, 3087},{ 3841, 3145},{ 4011, 3197},{ 4289, 3230} + }, + /*Cb qi=52 INTER*/ + { + { 98, -7},{ 109, 342},{ 109, 668},{ 126, 1018}, + { 170, 1355},{ 242, 1663},{ 352, 1941},{ 490, 2195}, + { 642, 2439},{ 823, 2666},{ 1052, 2868},{ 1333, 3039}, + { 1670, 3178},{ 2074, 3280},{ 2524, 3348},{ 2996, 3390}, + { 3469, 3410},{ 3923, 3420},{ 4355, 3434},{ 4771, 3451}, + { 5166, 3468},{ 5532, 3483},{ 5885, 3499},{ 6263, 3501} + } + }, + { + /*Cr qi=52 INTRA*/ + { + { 25, 7},{ 170, 388},{ 312, 741},{ 500, 1051}, + { 694, 1310},{ 883, 1529},{ 1082, 1709},{ 1280, 1864}, + { 1491, 1998},{ 1710, 2117},{ 1932, 2225},{ 2143, 2324}, + { 2328, 2418},{ 2516, 2506},{ 2708, 2578},{ 2870, 2637}, + { 3017, 2693},{ 3170, 2758},{ 3312, 2835},{ 3455, 2901}, + { 3644, 2972},{ 3827, 3049},{ 3968, 3121},{ 4115, 3166} + }, + /*Cr qi=52 INTER*/ + { + { 86, -2},{ 101, 352},{ 100, 680},{ 126, 1028}, + { 175, 1356},{ 247, 1657},{ 341, 1948},{ 458, 2224}, + { 615, 2471},{ 828, 2681},{ 1091, 2857},{ 1395, 3008}, + { 1732, 3140},{ 2095, 3257},{ 2502, 3348},{ 2968, 3402}, + { 3457, 3420},{ 3926, 3413},{ 4360, 3388},{ 4759, 3357}, + { 5128, 3329},{ 5449, 3306},{ 5741, 3295},{ 6071, 3296} + } + } + }, + { + { + /*Y' qi=53 INTRA*/ + { + { 138, 93},{ 850, 1161},{ 1773, 1810},{ 2763, 2103}, + { 3722, 2245},{ 4675, 2360},{ 5600, 2483},{ 6464, 2597}, + { 7255, 2700},{ 7982, 2792},{ 8652, 2867},{ 9237, 2913}, + { 9775, 2950},{10302, 2998},{10834, 3058},{11347, 3121}, + {11826, 3169},{12299, 3207},{12713, 3235},{13054, 3250}, + {13387, 3265},{13744, 3286},{14110, 3302},{14515, 3323} + }, + /*Y' qi=53 INTER*/ + { + { 52, 2},{ 169, 1308},{ 680, 2377},{ 1763, 3103}, + { 3410, 3450},{ 5094, 3531},{ 6469, 3526},{ 7590, 3525}, + { 8547, 3530},{ 9385, 3534},{10139, 3540},{10835, 3548}, + {11479, 3553},{12075, 3559},{12634, 3565},{13159, 3570}, + {13650, 3573},{14124, 3576},{14575, 3580},{14993, 3583}, + {15375, 3584},{15744, 3584},{16091, 3583},{16421, 3586} + } + }, + { + /*Cb qi=53 INTRA*/ + { + { 14, 3},{ 167, 367},{ 317, 717},{ 492, 1033}, + { 687, 1306},{ 887, 1531},{ 1095, 1715},{ 1309, 1866}, + { 1517, 2000},{ 1729, 2119},{ 1932, 2227},{ 2146, 2325}, + { 2358, 2414},{ 2544, 2511},{ 2724, 2611},{ 2902, 2711}, + { 3070, 2800},{ 3227, 2878},{ 3381, 2954},{ 3548, 3021}, + { 3724, 3077},{ 3888, 3140},{ 4065, 3196},{ 4359, 3225} + }, + /*Cb qi=53 INTER*/ + { + { 93, -8},{ 110, 342},{ 108, 668},{ 125, 1018}, + { 170, 1355},{ 242, 1663},{ 353, 1939},{ 494, 2192}, + { 651, 2433},{ 838, 2658},{ 1076, 2856},{ 1368, 3022}, + { 1716, 3158},{ 2123, 3260},{ 2575, 3330},{ 3042, 3373}, + { 3507, 3396},{ 3962, 3413},{ 4394, 3430},{ 4797, 3452}, + { 5169, 3476},{ 5547, 3496},{ 5914, 3510},{ 6235, 3525} + } + }, + { + /*Cr qi=53 INTRA*/ + { + { 25, 7},{ 175, 386},{ 335, 734},{ 541, 1037}, + { 737, 1296},{ 926, 1516},{ 1125, 1696},{ 1324, 1851}, + { 1540, 1984},{ 1763, 2102},{ 1989, 2210},{ 2202, 2310}, + { 2386, 2404},{ 2572, 2495},{ 2768, 2569},{ 2929, 2627}, + { 3071, 2684},{ 3231, 2749},{ 3374, 2825},{ 3514, 2894}, + { 3703, 2963},{ 3882, 3040},{ 4024, 3111},{ 4190, 3150} + }, + /*Cr qi=53 INTER*/ + { + { 87, -1},{ 99, 352},{ 100, 680},{ 125, 1027}, + { 175, 1355},{ 249, 1657},{ 343, 1946},{ 462, 2220}, + { 624, 2465},{ 844, 2671},{ 1122, 2841},{ 1435, 2989}, + { 1768, 3125},{ 2134, 3243},{ 2545, 3334},{ 3002, 3393}, + { 3490, 3412},{ 3965, 3405},{ 4401, 3384},{ 4797, 3359}, + { 5156, 3328},{ 5482, 3297},{ 5800, 3292},{ 6135, 3293} + } + } + }, + { + { + /*Y' qi=54 INTRA*/ + { + { 184, 94},{ 902, 1151},{ 1876, 1776},{ 2881, 2057}, + { 3832, 2200},{ 4785, 2315},{ 5709, 2442},{ 6570, 2562}, + { 7362, 2672},{ 8092, 2771},{ 8760, 2852},{ 9337, 2901}, + { 9874, 2943},{10402, 2995},{10928, 3059},{11443, 3126}, + {11926, 3178},{12396, 3220},{12805, 3251},{13139, 3266}, + {13466, 3280},{13822, 3304},{14184, 3322},{14585, 3342} + }, + /*Y' qi=54 INTER*/ + { + { 60, 5},{ 169, 1308},{ 683, 2375},{ 1791, 3090}, + { 3478, 3412},{ 5184, 3470},{ 6568, 3455},{ 7697, 3446}, + { 8659, 3446},{ 9503, 3447},{10266, 3450},{10971, 3454}, + {11619, 3458},{12223, 3462},{12789, 3467},{13315, 3471}, + {13811, 3475},{14291, 3479},{14743, 3479},{15148, 3481}, + {15535, 3483},{15913, 3481},{16252, 3479},{16569, 3472} + } + }, + { + /*Cb qi=54 INTRA*/ + { + { 13, 2},{ 165, 367},{ 318, 715},{ 498, 1030}, + { 698, 1301},{ 906, 1523},{ 1121, 1703},{ 1336, 1853}, + { 1549, 1984},{ 1765, 2100},{ 1974, 2207},{ 2192, 2306}, + { 2402, 2396},{ 2587, 2493},{ 2773, 2591},{ 2953, 2691}, + { 3119, 2778},{ 3277, 2858},{ 3430, 2940},{ 3603, 3004}, + { 3788, 3059},{ 3950, 3121},{ 4128, 3173},{ 4398, 3215} + }, + /*Cb qi=54 INTER*/ + { + { 100, -3},{ 109, 343},{ 107, 668},{ 125, 1018}, + { 169, 1354},{ 241, 1662},{ 353, 1938},{ 496, 2190}, + { 655, 2431},{ 843, 2655},{ 1082, 2851},{ 1381, 3015}, + { 1739, 3146},{ 2154, 3243},{ 2610, 3310},{ 3094, 3344}, + { 3581, 3358},{ 4034, 3371},{ 4457, 3384},{ 4867, 3399}, + { 5255, 3413},{ 5630, 3425},{ 6003, 3440},{ 6346, 3440} + } + }, + { + /*Cr qi=54 INTRA*/ + { + { 23, 7},{ 174, 386},{ 338, 732},{ 549, 1034}, + { 751, 1289},{ 947, 1506},{ 1150, 1685},{ 1353, 1837}, + { 1572, 1969},{ 1800, 2087},{ 2031, 2192},{ 2248, 2291}, + { 2434, 2387},{ 2622, 2477},{ 2815, 2549},{ 2976, 2607}, + { 3126, 2663},{ 3286, 2727},{ 3427, 2807},{ 3569, 2877}, + { 3761, 2941},{ 3942, 3016},{ 4084, 3093},{ 4226, 3131} + }, + /*Cr qi=54 INTER*/ + { + { 88, -2},{ 99, 351},{ 100, 680},{ 125, 1027}, + { 175, 1354},{ 248, 1656},{ 343, 1945},{ 463, 2219}, + { 626, 2463},{ 850, 2668},{ 1128, 2837},{ 1445, 2983}, + { 1791, 3111},{ 2168, 3224},{ 2597, 3309},{ 3075, 3351}, + { 3560, 3364},{ 4029, 3356},{ 4464, 3335},{ 4858, 3307}, + { 5218, 3275},{ 5547, 3256},{ 5850, 3247},{ 6171, 3214} + } + } + }, + { + { + /*Y' qi=55 INTRA*/ + { + { 178, 95},{ 968, 1137},{ 2000, 1747},{ 3013, 2027}, + { 3966, 2173},{ 4920, 2294},{ 5842, 2427},{ 6702, 2553}, + { 7489, 2668},{ 8213, 2773},{ 8875, 2858},{ 9452, 2913}, + { 9986, 2959},{10504, 3016},{11023, 3085},{11530, 3157}, + {12011, 3213},{12480, 3257},{12882, 3291},{13214, 3310}, + {13542, 3325},{13890, 3350},{14248, 3371},{14671, 3398} + }, + /*Y' qi=55 INTER*/ + { + { 59, 5},{ 170, 1307},{ 725, 2358},{ 1886, 3058}, + { 3589, 3385},{ 5284, 3459},{ 6654, 3458},{ 7771, 3461}, + { 8727, 3470},{ 9564, 3478},{10322, 3488},{11019, 3497}, + {11658, 3505},{12258, 3513},{12819, 3520},{13344, 3527}, + {13840, 3533},{14314, 3537},{14755, 3541},{15161, 3544}, + {15552, 3548},{15916, 3548},{16257, 3548},{16576, 3540} + } + }, + { + /*Cb qi=55 INTRA*/ + { + { 13, 2},{ 167, 366},{ 322, 714},{ 508, 1026}, + { 716, 1292},{ 930, 1511},{ 1148, 1690},{ 1366, 1839}, + { 1578, 1972},{ 1793, 2090},{ 2001, 2199},{ 2217, 2300}, + { 2427, 2393},{ 2609, 2495},{ 2784, 2600},{ 2961, 2704}, + { 3121, 2797},{ 3268, 2884},{ 3423, 2965},{ 3590, 3032}, + { 3764, 3096},{ 3926, 3165},{ 4101, 3223},{ 4405, 3258} + }, + /*Cb qi=55 INTER*/ + { + { 90, -4},{ 109, 344},{ 107, 668},{ 126, 1017}, + { 172, 1351},{ 249, 1657},{ 370, 1928},{ 527, 2174}, + { 702, 2407},{ 909, 2624},{ 1170, 2814},{ 1493, 2970}, + { 1869, 3097},{ 2292, 3192},{ 2752, 3258},{ 3232, 3295}, + { 3709, 3314},{ 4156, 3335},{ 4592, 3355},{ 5004, 3373}, + { 5377, 3389},{ 5737, 3411},{ 6092, 3432},{ 6473, 3423} + } + }, + { + /*Cr qi=55 INTRA*/ + { + { 23, 7},{ 175, 385},{ 342, 730},{ 561, 1028}, + { 771, 1279},{ 973, 1493},{ 1181, 1669},{ 1384, 1822}, + { 1602, 1956},{ 1830, 2076},{ 2057, 2184},{ 2270, 2288}, + { 2452, 2389},{ 2637, 2484},{ 2823, 2559},{ 2983, 2621}, + { 3129, 2682},{ 3280, 2753},{ 3417, 2833},{ 3554, 2904}, + { 3743, 2977},{ 3921, 3060},{ 4055, 3137},{ 4185, 3186} + }, + /*Cr qi=55 INTER*/ + { + { 85, 0},{ 99, 352},{ 100, 679},{ 126, 1025}, + { 178, 1351},{ 256, 1650},{ 359, 1935},{ 493, 2202}, + { 675, 2439},{ 921, 2636},{ 1220, 2799},{ 1552, 2941}, + { 1910, 3068},{ 2303, 3177},{ 2735, 3262},{ 3206, 3311}, + { 3689, 3333},{ 4152, 3327},{ 4588, 3299},{ 4978, 3272}, + { 5325, 3243},{ 5651, 3221},{ 5969, 3210},{ 6218, 3185} + } + } + }, + { + { + /*Y' qi=56 INTRA*/ + { + { 137, 104},{ 1048, 1128},{ 2147, 1760},{ 3261, 2029}, + { 4319, 2131},{ 5310, 2234},{ 6245, 2351},{ 7101, 2464}, + { 7886, 2572},{ 8610, 2675},{ 9270, 2762},{ 9840, 2818}, + {10365, 2869},{10875, 2928},{11393, 2997},{11900, 3071}, + {12371, 3128},{12834, 3172},{13233, 3208},{13562, 3228}, + {13878, 3245},{14221, 3271},{14584, 3292},{15008, 3320} + }, + /*Y' qi=56 INTER*/ + { + { 19, 21},{ 207, 1292},{ 1031, 2252},{ 2553, 2846}, + { 4463, 3085},{ 6137, 3131},{ 7441, 3151},{ 8526, 3172}, + { 9468, 3193},{10301, 3209},{11059, 3224},{11760, 3237}, + {12405, 3249},{13008, 3261},{13570, 3270},{14100, 3278}, + {14597, 3284},{15074, 3289},{15524, 3297},{15929, 3302}, + {16314, 3306},{16675, 3307},{17004, 3305},{17288, 3301} + } + }, + { + /*Cb qi=56 INTRA*/ + { + { 16, 3},{ 188, 367},{ 353, 712},{ 546, 1017}, + { 765, 1275},{ 989, 1484},{ 1221, 1653},{ 1459, 1791}, + { 1681, 1920},{ 1893, 2046},{ 2102, 2160},{ 2323, 2257}, + { 2534, 2347},{ 2720, 2447},{ 2902, 2549},{ 3075, 2654}, + { 3239, 2749},{ 3392, 2835},{ 3544, 2920},{ 3712, 2988}, + { 3882, 3052},{ 4052, 3123},{ 4227, 3181},{ 4483, 3213} + }, + /*Cb qi=56 INTER*/ + { + { 92, -1},{ 111, 343},{ 114, 665},{ 148, 1003}, + { 224, 1321},{ 345, 1609},{ 526, 1858},{ 754, 2077}, + { 1009, 2281},{ 1319, 2464},{ 1702, 2614},{ 2145, 2732}, + { 2625, 2824},{ 3123, 2890},{ 3634, 2933},{ 4137, 2954}, + { 4614, 2965},{ 5052, 2988},{ 5468, 3015},{ 5852, 3035}, + { 6213, 3060},{ 6557, 3081},{ 6906, 3094},{ 7243, 3112} + } + }, + { + /*Cr qi=56 INTRA*/ + { + { 28, 8},{ 195, 385},{ 373, 727},{ 598, 1019}, + { 816, 1263},{ 1033, 1465},{ 1260, 1630},{ 1482, 1773}, + { 1717, 1900},{ 1949, 2018},{ 2178, 2128},{ 2393, 2233}, + { 2570, 2338},{ 2749, 2435},{ 2937, 2514},{ 3097, 2577}, + { 3240, 2638},{ 3398, 2709},{ 3540, 2791},{ 3673, 2865}, + { 3869, 2938},{ 4049, 3019},{ 4179, 3095},{ 4330, 3137} + }, + /*Cr qi=56 INTER*/ + { + { 83, 0},{ 99, 353},{ 103, 676},{ 146, 1010}, + { 232, 1320},{ 355, 1601},{ 512, 1866},{ 713, 2109}, + { 988, 2312},{ 1344, 2471},{ 1750, 2602},{ 2180, 2719}, + { 2642, 2819},{ 3141, 2892},{ 3653, 2939},{ 4159, 2961}, + { 4636, 2961},{ 5072, 2945},{ 5464, 2917},{ 5813, 2895}, + { 6134, 2890},{ 6458, 2883},{ 6735, 2881},{ 6953, 2902} + } + } + }, + { + { + /*Y' qi=57 INTRA*/ + { + { 170, 106},{ 1106, 1120},{ 2246, 1740},{ 3399, 1993}, + { 4482, 2077},{ 5492, 2167},{ 6446, 2273},{ 7324, 2379}, + { 8130, 2482},{ 8866, 2578},{ 9537, 2661},{10119, 2715}, + {10646, 2762},{11161, 2820},{11694, 2886},{12214, 2957}, + {12693, 3013},{13166, 3053},{13569, 3087},{13897, 3106}, + {14224, 3122},{14568, 3148},{14931, 3167},{15390, 3192} + }, + /*Y' qi=57 INTER*/ + { + { 19, 20},{ 205, 1292},{ 1096, 2229},{ 2775, 2766}, + { 4811, 2943},{ 6512, 2964},{ 7832, 2976},{ 8940, 2990}, + { 9903, 3004},{10755, 3017},{11532, 3029},{12243, 3039}, + {12891, 3047},{13502, 3058},{14073, 3065},{14603, 3071}, + {15097, 3078},{15581, 3083},{16036, 3086},{16452, 3090}, + {16855, 3093},{17222, 3094},{17552, 3092},{17851, 3098} + } + }, + { + /*Cb qi=57 INTRA*/ + { + { 16, 3},{ 197, 365},{ 384, 704},{ 603, 1001}, + { 837, 1252},{ 1077, 1455},{ 1326, 1618},{ 1581, 1748}, + { 1819, 1871},{ 2042, 1993},{ 2264, 2104},{ 2500, 2196}, + { 2722, 2280},{ 2916, 2375},{ 3103, 2473},{ 3290, 2575}, + { 3456, 2667},{ 3612, 2748},{ 3775, 2829},{ 3958, 2896}, + { 4145, 2947},{ 4307, 3012},{ 4476, 3070},{ 4733, 3110} + }, + /*Cb qi=57 INTER*/ + { + { 94, -1},{ 111, 344},{ 112, 665},{ 147, 1002}, + { 227, 1319},{ 353, 1604},{ 543, 1849},{ 785, 2062}, + { 1066, 2257},{ 1408, 2430},{ 1827, 2568},{ 2320, 2670}, + { 2848, 2743},{ 3386, 2791},{ 3934, 2812},{ 4453, 2820}, + { 4929, 2830},{ 5368, 2842},{ 5787, 2856},{ 6190, 2875}, + { 6554, 2896},{ 6895, 2913},{ 7229, 2927},{ 7572, 2932} + } + }, + { + /*Cr qi=57 INTRA*/ + { + { 28, 8},{ 207, 383},{ 413, 716},{ 661, 999}, + { 889, 1237},{ 1123, 1433},{ 1365, 1592},{ 1603, 1731}, + { 1853, 1852},{ 2103, 1965},{ 2345, 2072},{ 2571, 2173}, + { 2763, 2271},{ 2949, 2364},{ 3146, 2438},{ 3315, 2497}, + { 3459, 2552},{ 3618, 2616},{ 3767, 2697},{ 3906, 2773}, + { 4099, 2841},{ 4281, 2916},{ 4429, 2987},{ 4569, 3030} + }, + /*Cr qi=57 INTER*/ + { + { 85, 0},{ 99, 352},{ 102, 675},{ 147, 1008}, + { 235, 1317},{ 363, 1597},{ 529, 1858},{ 748, 2094}, + { 1050, 2287},{ 1439, 2436},{ 1877, 2557},{ 2352, 2660}, + { 2869, 2740},{ 3413, 2791},{ 3962, 2815},{ 4485, 2819}, + { 4955, 2816},{ 5382, 2800},{ 5769, 2772},{ 6107, 2748}, + { 6443, 2740},{ 6754, 2739},{ 7029, 2737},{ 7284, 2745} + } + } + }, + { + { + /*Y' qi=58 INTRA*/ + { + { 164, 109},{ 1198, 1111},{ 2396, 1737},{ 3606, 1978}, + { 4727, 2048},{ 5749, 2138},{ 6708, 2243},{ 7584, 2347}, + { 8388, 2449},{ 9122, 2549},{ 9784, 2635},{10354, 2691}, + {10876, 2740},{11385, 2800},{11912, 2869},{12429, 2941}, + {12902, 2997},{13375, 3040},{13779, 3075},{14103, 3096}, + {14435, 3112},{14783, 3140},{15141, 3160},{15599, 3186} + }, + /*Y' qi=58 INTER*/ + { + { 14, 23},{ 210, 1290},{ 1277, 2178},{ 3118, 2677}, + { 5207, 2834},{ 6902, 2857},{ 8218, 2878},{ 9323, 2900}, + {10285, 2919},{11132, 2934},{11899, 2949},{12599, 2961}, + {13235, 2971},{13835, 2982},{14394, 2991},{14917, 2997}, + {15412, 3005},{15882, 3009},{16325, 3013},{16735, 3016}, + {17131, 3018},{17501, 3021},{17824, 3021},{18125, 3016} + } + }, + { + /*Cb qi=58 INTRA*/ + { + { 17, 3},{ 200, 365},{ 389, 703},{ 613, 996}, + { 853, 1243},{ 1095, 1445},{ 1349, 1604},{ 1613, 1731}, + { 1853, 1853},{ 2074, 1978},{ 2292, 2091},{ 2526, 2184}, + { 2750, 2266},{ 2945, 2360},{ 3134, 2458},{ 3320, 2561}, + { 3482, 2654},{ 3641, 2737},{ 3804, 2818},{ 3985, 2881}, + { 4168, 2935},{ 4331, 3003},{ 4499, 3060},{ 4751, 3100} + }, + /*Cb qi=58 INTER*/ + { + { 94, -1},{ 112, 345},{ 112, 665},{ 152, 998}, + { 247, 1307},{ 406, 1580},{ 644, 1810},{ 938, 2007}, + { 1271, 2189},{ 1668, 2348},{ 2151, 2470},{ 2691, 2558}, + { 3249, 2619},{ 3798, 2659},{ 4334, 2682},{ 4849, 2692}, + { 5314, 2700},{ 5747, 2721},{ 6167, 2742},{ 6547, 2765}, + { 6902, 2790},{ 7251, 2804},{ 7583, 2819},{ 7924, 2833} + } + }, + { + /*Cr qi=58 INTRA*/ + { + { 29, 8},{ 210, 382},{ 419, 714},{ 671, 993}, + { 903, 1229},{ 1141, 1422},{ 1390, 1578},{ 1635, 1713}, + { 1889, 1833},{ 2140, 1946},{ 2379, 2055},{ 2604, 2157}, + { 2794, 2256},{ 2977, 2349},{ 3174, 2422},{ 3339, 2482}, + { 3483, 2537},{ 3643, 2604},{ 3790, 2684},{ 3927, 2757}, + { 4112, 2826},{ 4294, 2900},{ 4451, 2975},{ 4600, 3011} + }, + /*Cr qi=58 INTER*/ + { + { 86, 0},{ 99, 352},{ 103, 675},{ 151, 1004}, + { 256, 1306},{ 417, 1573},{ 628, 1819},{ 901, 2040}, + { 1262, 2217},{ 1705, 2353},{ 2191, 2466},{ 2713, 2556}, + { 3268, 2622},{ 3831, 2664},{ 4374, 2682},{ 4881, 2686}, + { 5339, 2685},{ 5747, 2668},{ 6123, 2646},{ 6465, 2630}, + { 6783, 2618},{ 7082, 2623},{ 7366, 2632},{ 7673, 2654} + } + } + }, + { + { + /*Y' qi=59 INTRA*/ + { + { 142, 112},{ 1259, 1100},{ 2552, 1711},{ 3815, 1933}, + { 4955, 1987},{ 5983, 2068},{ 6949, 2165},{ 7832, 2263}, + { 8645, 2359},{ 9392, 2454},{10066, 2536},{10643, 2589}, + {11174, 2636},{11696, 2693},{12230, 2758},{12752, 2826}, + {13239, 2883},{13721, 2926},{14139, 2959},{14479, 2978}, + {14811, 2993},{15166, 3020},{15532, 3039},{16000, 3062} + }, + /*Y' qi=59 INTER*/ + { + { 8, 25},{ 211, 1289},{ 1394, 2144},{ 3421, 2580}, + { 5611, 2689},{ 7316, 2701},{ 8643, 2717},{ 9762, 2734}, + {10735, 2750},{11587, 2763},{12353, 2775},{13056, 2785}, + {13693, 2793},{14288, 2805},{14843, 2814},{15361, 2821}, + {15857, 2827},{16328, 2831},{16763, 2834},{17171, 2838}, + {17568, 2840},{17941, 2842},{18285, 2843},{18586, 2839} + } + }, + { + /*Cb qi=59 INTRA*/ + { + { 17, 3},{ 224, 363},{ 441, 696},{ 689, 982}, + { 945, 1222},{ 1204, 1416},{ 1474, 1571},{ 1751, 1695}, + { 2001, 1816},{ 2228, 1941},{ 2453, 2055},{ 2693, 2147}, + { 2924, 2227},{ 3125, 2321},{ 3321, 2416},{ 3510, 2520}, + { 3676, 2616},{ 3839, 2699},{ 4008, 2778},{ 4193, 2842}, + { 4371, 2898},{ 4535, 2965},{ 4710, 3023},{ 4921, 3068} + }, + /*Cb qi=59 INTER*/ + { + { 95, -5},{ 111, 343},{ 112, 664},{ 157, 995}, + { 258, 1302},{ 429, 1569},{ 691, 1790},{ 1017, 1977}, + { 1387, 2148},{ 1832, 2294},{ 2368, 2401},{ 2961, 2472}, + { 3553, 2518},{ 4133, 2545},{ 4688, 2557},{ 5198, 2563}, + { 5663, 2574},{ 6100, 2590},{ 6511, 2608},{ 6898, 2621}, + { 7274, 2634},{ 7631, 2655},{ 7984, 2669},{ 8361, 2669} + } + }, + { + /*Cr qi=59 INTRA*/ + { + { 31, 8},{ 240, 379},{ 480, 706},{ 748, 978}, + { 993, 1208},{ 1250, 1394},{ 1519, 1543},{ 1779, 1674}, + { 2047, 1792},{ 2307, 1904},{ 2552, 2013},{ 2780, 2116}, + { 2973, 2216},{ 3165, 2309},{ 3362, 2383},{ 3528, 2444}, + { 3677, 2499},{ 3841, 2566},{ 3995, 2646},{ 4139, 2720}, + { 4324, 2793},{ 4504, 2867},{ 4658, 2939},{ 4806, 2975} + }, + /*Cr qi=59 INTER*/ + { + { 89, -3},{ 98, 352},{ 103, 674},{ 156, 1002}, + { 268, 1300},{ 441, 1562},{ 673, 1801},{ 980, 2010}, + { 1385, 2175},{ 1868, 2301},{ 2401, 2402},{ 2984, 2474}, + { 3591, 2520},{ 4179, 2545},{ 4729, 2555},{ 5232, 2553}, + { 5679, 2545},{ 6081, 2530},{ 6447, 2510},{ 6791, 2496}, + { 7101, 2487},{ 7393, 2489},{ 7684, 2499},{ 7950, 2501} + } + } + }, + { + { + /*Y' qi=60 INTRA*/ + { + { 92, 116},{ 1361, 1085},{ 2746, 1686},{ 4050, 1895}, + { 5209, 1939},{ 6244, 2012},{ 7213, 2103},{ 8105, 2197}, + { 8928, 2290},{ 9685, 2381},{10371, 2460},{10952, 2511}, + {11487, 2556},{12026, 2611},{12574, 2674},{13102, 2739}, + {13597, 2793},{14092, 2831},{14523, 2862},{14862, 2881}, + {15198, 2897},{15568, 2923},{15949, 2941},{16416, 2964} + }, + /*Y' qi=60 INTER*/ + { + { 4, 30},{ 215, 1287},{ 1547, 2104},{ 3729, 2491}, + { 5973, 2568},{ 7672, 2577},{ 9001, 2591},{10123, 2606}, + {11094, 2620},{11943, 2632},{12709, 2643},{13409, 2652}, + {14044, 2660},{14641, 2669},{15193, 2677},{15709, 2684}, + {16201, 2689},{16675, 2693},{17118, 2696},{17522, 2701}, + {17920, 2704},{18293, 2706},{18620, 2702},{18923, 2700} + } + }, + { + /*Cb qi=60 INTRA*/ + { + { 18, 3},{ 227, 362},{ 447, 694},{ 708, 974}, + { 981, 1207},{ 1252, 1397},{ 1532, 1547},{ 1822, 1663}, + { 2082, 1780},{ 2316, 1903},{ 2548, 2013},{ 2794, 2101}, + { 3029, 2178},{ 3242, 2266},{ 3445, 2360},{ 3638, 2459}, + { 3816, 2547},{ 3980, 2628},{ 4146, 2708},{ 4344, 2766}, + { 4546, 2812},{ 4725, 2872},{ 4880, 2930},{ 5054, 2966} + }, + /*Cb qi=60 INTER*/ + { + { 97, -4},{ 112, 343},{ 114, 664},{ 162, 993}, + { 273, 1294},{ 472, 1553},{ 774, 1762},{ 1138, 1939}, + { 1543, 2102},{ 2034, 2236},{ 2620, 2329},{ 3244, 2389}, + { 3860, 2423},{ 4443, 2440},{ 4997, 2449},{ 5502, 2455}, + { 5962, 2458},{ 6413, 2466},{ 6836, 2485},{ 7217, 2506}, + { 7592, 2518},{ 7957, 2533},{ 8291, 2543},{ 8574, 2545} + } + }, + { + /*Cr qi=60 INTRA*/ + { + { 32, 8},{ 243, 379},{ 488, 702},{ 771, 968}, + { 1030, 1192},{ 1300, 1373},{ 1581, 1517},{ 1854, 1643}, + { 2127, 1757},{ 2393, 1864},{ 2645, 1968},{ 2879, 2068}, + { 3078, 2166},{ 3277, 2256},{ 3484, 2325},{ 3660, 2381}, + { 3808, 2433},{ 3970, 2496},{ 4138, 2571},{ 4288, 2643}, + { 4475, 2710},{ 4655, 2778},{ 4810, 2843},{ 4959, 2879} + }, + /*Cr qi=60 INTER*/ + { + { 86, -2},{ 99, 352},{ 103, 673},{ 160, 998}, + { 284, 1292},{ 484, 1546},{ 753, 1774},{ 1100, 1973}, + { 1546, 2129},{ 2072, 2246},{ 2652, 2334},{ 3279, 2392}, + { 3911, 2425},{ 4504, 2440},{ 5044, 2443},{ 5536, 2440}, + { 5979, 2430},{ 6381, 2413},{ 6735, 2397},{ 7062, 2382}, + { 7383, 2376},{ 7680, 2375},{ 7962, 2373},{ 8203, 2379} + } + } + }, + { + { + /*Y' qi=61 INTRA*/ + { + { 54, 121},{ 1477, 1069},{ 3061, 1638},{ 4465, 1808}, + { 5649, 1827},{ 6710, 1884},{ 7716, 1958},{ 8648, 2037}, + { 9514, 2116},{10311, 2192},{11033, 2261},{11641, 2305}, + {12202, 2342},{12771, 2387},{13356, 2440},{13924, 2493}, + {14444, 2541},{14951, 2576},{15409, 2600},{15779, 2615}, + {16131, 2626},{16521, 2648},{16921, 2663},{17409, 2694} + }, + /*Y' qi=61 INTER*/ + { + { -1, 32},{ 216, 1286},{ 1806, 2036},{ 4279, 2327}, + { 6629, 2352},{ 8347, 2352},{ 9707, 2357},{10860, 2364}, + {11857, 2372},{12726, 2377},{13508, 2382},{14225, 2387}, + {14877, 2392},{15484, 2398},{16048, 2401},{16581, 2405}, + {17092, 2409},{17573, 2409},{18016, 2410},{18427, 2413}, + {18829, 2415},{19221, 2415},{19578, 2415},{19980, 2413} + } + }, + { + /*Cb qi=61 INTRA*/ + { + { 19, 3},{ 231, 362},{ 456, 693},{ 733, 965}, + { 1032, 1188},{ 1330, 1369},{ 1637, 1508},{ 1956, 1612}, + { 2241, 1718},{ 2496, 1832},{ 2750, 1932},{ 3019, 2007}, + { 3274, 2074},{ 3505, 2154},{ 3725, 2236},{ 3943, 2323}, + { 4138, 2403},{ 4323, 2476},{ 4505, 2543},{ 4706, 2592}, + { 4909, 2630},{ 5109, 2675},{ 5292, 2724},{ 5495, 2768} + }, + /*Cb qi=61 INTER*/ + { + { 91, -2},{ 111, 344},{ 114, 663},{ 166, 989}, + { 291, 1285},{ 522, 1534},{ 875, 1729},{ 1302, 1889}, + { 1786, 2031},{ 2368, 2141},{ 3042, 2207},{ 3734, 2243}, + { 4388, 2259},{ 4982, 2264},{ 5533, 2265},{ 6043, 2262}, + { 6524, 2264},{ 6982, 2274},{ 7422, 2283},{ 7831, 2295}, + { 8198, 2308},{ 8593, 2319},{ 8965, 2329},{ 9258, 2340} + } + }, + { + /*Cr qi=61 INTRA*/ + { + { 33, 9},{ 245, 378},{ 497, 699},{ 801, 958}, + { 1087, 1171},{ 1384, 1342},{ 1692, 1474},{ 1992, 1589}, + { 2290, 1692},{ 2576, 1789},{ 2852, 1884},{ 3109, 1973}, + { 3324, 2061},{ 3544, 2142},{ 3763, 2199},{ 3945, 2244}, + { 4103, 2292},{ 4283, 2349},{ 4469, 2413},{ 4635, 2476}, + { 4836, 2534},{ 5038, 2592},{ 5210, 2649},{ 5358, 2682} + }, + /*Cr qi=61 INTER*/ + { + { 82, 0},{ 97, 353},{ 104, 672},{ 165, 995}, + { 303, 1284},{ 532, 1529},{ 852, 1742},{ 1273, 1921}, + { 1798, 2057},{ 2409, 2154},{ 3090, 2212},{ 3794, 2240}, + { 4460, 2251},{ 5057, 2249},{ 5596, 2249},{ 6085, 2245}, + { 6519, 2234},{ 6908, 2220},{ 7269, 2203},{ 7618, 2196}, + { 7949, 2198},{ 8269, 2195},{ 8554, 2196},{ 8928, 2217} + } + } + }, + { + { + /*Y' qi=62 INTRA*/ + { + { 29, 124},{ 1527, 1067},{ 3221, 1618},{ 4703, 1751}, + { 5909, 1744},{ 7001, 1779},{ 8057, 1829},{ 9049, 1885}, + { 9968, 1943},{10813, 1999},{11572, 2050},{12206, 2082}, + {12801, 2107},{13402, 2140},{14020, 2180},{14625, 2223}, + {15179, 2260},{15718, 2288},{16196, 2305},{16581, 2313}, + {16963, 2324},{17382, 2341},{17800, 2351},{18318, 2376} + }, + /*Y' qi=62 INTER*/ + { + { -8, 36},{ 218, 1284},{ 2073, 1965},{ 4814, 2159}, + { 7237, 2138},{ 8979, 2124},{10378, 2115},{11570, 2109}, + {12601, 2106},{13503, 2103},{14320, 2103},{15064, 2103}, + {15746, 2103},{16384, 2104},{16975, 2105},{17534, 2105}, + {18062, 2106},{18564, 2107},{19035, 2106},{19471, 2107}, + {19890, 2107},{20288, 2107},{20651, 2107},{21012, 2108} + } + }, + { + /*Cb qi=62 INTRA*/ + { + { 21, 3},{ 283, 360},{ 565, 683},{ 907, 938}, + { 1269, 1143},{ 1611, 1311},{ 1949, 1441},{ 2290, 1535}, + { 2596, 1632},{ 2877, 1738},{ 3162, 1828},{ 3458, 1893}, + { 3745, 1948},{ 4011, 2016},{ 4253, 2089},{ 4506, 2164}, + { 4734, 2233},{ 4943, 2294},{ 5162, 2353},{ 5381, 2393}, + { 5593, 2420},{ 5807, 2454},{ 6003, 2496},{ 6210, 2543} + }, + /*Cb qi=62 INTER*/ + { + { 91, -1},{ 110, 344},{ 113, 663},{ 169, 987}, + { 306, 1279},{ 562, 1519},{ 961, 1701},{ 1450, 1845}, + { 2013, 1967},{ 2686, 2053},{ 3437, 2095},{ 4171, 2109}, + { 4841, 2109},{ 5441, 2105},{ 6002, 2097},{ 6542, 2089}, + { 7028, 2087},{ 7491, 2088},{ 7949, 2090},{ 8377, 2089}, + { 8789, 2095},{ 9195, 2103},{ 9569, 2104},{ 9937, 2102} + } + }, + { + /*Cr qi=62 INTRA*/ + { + { 38, 8},{ 308, 374},{ 619, 685},{ 984, 925}, + { 1326, 1126},{ 1662, 1285},{ 1999, 1407},{ 2328, 1512}, + { 2659, 1604},{ 2976, 1691},{ 3285, 1774},{ 3570, 1853}, + { 3815, 1931},{ 4068, 1998},{ 4304, 2044},{ 4491, 2082}, + { 4666, 2124},{ 4870, 2174},{ 5078, 2231},{ 5262, 2285}, + { 5480, 2335},{ 5703, 2378},{ 5905, 2423},{ 6075, 2454} + }, + /*Cr qi=62 INTER*/ + { + { 79, 1},{ 95, 353},{ 102, 671},{ 169, 992}, + { 318, 1277},{ 569, 1515},{ 936, 1716},{ 1428, 1876}, + { 2034, 1993},{ 2738, 2067},{ 3511, 2095},{ 4268, 2094}, + { 4943, 2087},{ 5543, 2079},{ 6074, 2074},{ 6552, 2069}, + { 6985, 2057},{ 7366, 2043},{ 7728, 2030},{ 8086, 2021}, + { 8423, 2017},{ 8752, 2016},{ 9057, 2014},{ 9376, 2008} + } + } + }, + { + { + /*Y' qi=63 INTRA*/ + { + { -59, 134},{ 1734, 1036},{ 3743, 1521},{ 5309, 1618}, + { 6520, 1597},{ 7664, 1609},{ 8809, 1630},{ 9894, 1657}, + {10907, 1687},{11838, 1717},{12673, 1744},{13379, 1758}, + {14038, 1767},{14698, 1784},{15379, 1806},{16062, 1831}, + {16694, 1852},{17300, 1867},{17827, 1878},{18250, 1881}, + {18702, 1884},{19199, 1892},{19665, 1896},{20273, 1908} + }, + /*Y' qi=63 INTER*/ + { + { -7, 33},{ 209, 1285},{ 2309, 1904},{ 5274, 2025}, + { 7801, 1966},{ 9637, 1924},{11126, 1892},{12403, 1868}, + {13515, 1849},{14491, 1834},{15380, 1822},{16197, 1814}, + {16944, 1806},{17645, 1799},{18303, 1794},{18916, 1789}, + {19494, 1785},{20056, 1782},{20568, 1779},{21047, 1776}, + {21508, 1775},{21925, 1772},{22327, 1770},{22678, 1771} + } + }, + { + /*Cb qi=63 INTRA*/ + { + { 20, 3},{ 294, 357},{ 608, 673},{ 1047, 908}, + { 1501, 1090},{ 1898, 1240},{ 2275, 1353},{ 2654, 1427}, + { 3014, 1502},{ 3366, 1579},{ 3726, 1637},{ 4084, 1674}, + { 4425, 1703},{ 4752, 1743},{ 5058, 1791},{ 5377, 1838}, + { 5676, 1877},{ 5946, 1912},{ 6213, 1945},{ 6458, 1969}, + { 6704, 1982},{ 6969, 1997},{ 7210, 2017},{ 7439, 2037} + }, + /*Cb qi=63 INTER*/ + { + { 86, 1},{ 108, 345},{ 111, 663},{ 168, 985}, + { 307, 1276},{ 577, 1513},{ 1007, 1688},{ 1550, 1819}, + { 2189, 1921},{ 2938, 1981},{ 3744, 2002},{ 4512, 2002}, + { 5199, 1996},{ 5824, 1986},{ 6419, 1971},{ 6978, 1954}, + { 7507, 1940},{ 8015, 1932},{ 8502, 1928},{ 8978, 1920}, + { 9410, 1915},{ 9842, 1910},{10262, 1901},{10634, 1896} + } + }, + { + /*Cr qi=63 INTRA*/ + { + { 38, 7},{ 324, 367},{ 677, 670},{ 1136, 892}, + { 1562, 1070},{ 1951, 1209},{ 2326, 1313},{ 2694, 1399}, + { 3074, 1471},{ 3460, 1531},{ 3850, 1575},{ 4214, 1622}, + { 4522, 1679},{ 4819, 1723},{ 5089, 1749},{ 5315, 1769}, + { 5530, 1792},{ 5756, 1825},{ 6006, 1860},{ 6244, 1889}, + { 6514, 1924},{ 6792, 1946},{ 7026, 1962},{ 7191, 1971} + }, + /*Cr qi=63 INTER*/ + { + { 80, 2},{ 95, 354},{ 101, 671},{ 167, 990}, + { 321, 1274},{ 585, 1509},{ 984, 1702},{ 1534, 1849}, + { 2217, 1947},{ 3005, 1995},{ 3839, 1999},{ 4619, 1986}, + { 5310, 1973},{ 5933, 1961},{ 6486, 1952},{ 6988, 1942}, + { 7435, 1927},{ 7817, 1911},{ 8198, 1900},{ 8552, 1895}, + { 8881, 1890},{ 9253, 1883},{ 9598, 1876},{ 9923, 1859} + } + } + } +}; + +#endif diff --git a/Engine/lib/libtheora/lib/ocintrin.h b/Engine/lib/libtheora/lib/ocintrin.h new file mode 100644 index 000000000..d49ebb215 --- /dev/null +++ b/Engine/lib/libtheora/lib/ocintrin.h @@ -0,0 +1,128 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*Some common macros for potential platform-specific optimization.*/ +#include +#if !defined(_ocintrin_H) +# define _ocintrin_H (1) + +/*Some specific platforms may have optimized intrinsic or inline assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Note that we do not provide a macro for abs(), because it is provided as a + library function, which we assume is translated into an intrinsic to avoid + the function call overhead and then implemented in the smartest way for the + target platform. + With modern gcc (4.x), this is true: it uses cmov instructions if the + architecture supports it and branchless bit-twiddling if it does not (the + speed difference between the two approaches is not measurable). + Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) + by Sun Microsystems, despite prior art dating back to at least 1996: + http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT + On gcc 3.x, however, our assumption is not true, as abs() is translated to a + conditional jump, which is horrible on deeply piplined architectures (e.g., + all consumer architectures for the past decade or more). + Also be warned that -C*abs(x) where C is a constant is mis-optimized as + abs(C*x) on every gcc release before 4.2.3. + See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((_b)>(_a)) internally.*/ +#define OC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a)))) +#define OC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a)))) +/*Clamps an integer into the given range. + If _a>_c, then the lower bound _a is respected over the upper bound _c (this + behavior is required to meet our documented API behavior). + _a: The lower bound. + _b: The value to clamp. + _c: The upper boud.*/ +#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) +#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255)))) +/*This has a chance of compiling branchless, and is just as fast as the + bit-twiddling method, which is slightly less portable, since it relies on a + sign-extended rightshift, which is not guaranteed by ANSI (but present on + every relevant platform).*/ +#define OC_SIGNI(_a) (((_a)>0)-((_a)<0)) +/*Slightly more portable than relying on a sign-extended right-shift (which is + not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both) + compile it into the right-shift anyway.*/ +#define OC_SIGNMASK(_a) (-((_a)<0)) +/*Divides an integer by a power of two, truncating towards 0. + _dividend: The integer to divide. + _shift: The non-negative power of two to divide by. + _rmask: (1<<_shift)-1*/ +#define OC_DIV_POW2(_dividend,_shift,_rmask)\ + ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift)) +/*Divides _x by 65536, truncating towards 0.*/ +#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF) +/*Divides _x by 2, truncating towards 0.*/ +#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1) +/*Divides _x by 8, truncating towards 0.*/ +#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7) +/*Divides _x by 16, truncating towards 0.*/ +#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF) +/*Right shifts _dividend by _shift, adding _rval, and subtracting one for + negative dividends first. + When _rval is (1<<_shift-1), this is equivalent to division with rounding + ties away from zero.*/ +#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\ + ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift)) +/*Divides a _x by 2, rounding towards even numbers.*/ +#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1) +/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/ +#define OC_DIV_POW2_RE(_x,_shift) \ + ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift)) +/*Swaps two integers _a and _b if _a>_b.*/ +#define OC_SORT2I(_a,_b) \ + do{ \ + int t__; \ + t__=((_a)^(_b))&-((_b)<(_a)); \ + (_a)^=t__; \ + (_b)^=t__; \ + } \ + while(0) + +/*Accesses one of four (signed) bytes given an index. + This can be used to avoid small lookup tables.*/ +#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \ + ((signed char) \ + (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8)) +/*Accesses one of eight (unsigned) nibbles given an index. + This can be used to avoid small lookup tables.*/ +#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \ + ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \ + ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF) + + + +/*All of these macros should expect floats as arguments.*/ +#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a)) +#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a)) +#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c))) +#define OC_FABSF(_f) ((float)fabs(_f)) +#define OC_SQRTF(_f) ((float)sqrt(_f)) +#define OC_POWF(_b,_e) ((float)pow(_b,_e)) +#define OC_LOGF(_f) ((float)log(_f)) +#define OC_IFLOORF(_f) ((int)floor(_f)) +#define OC_ICEILF(_f) ((int)ceil(_f)) + +#endif diff --git a/Engine/lib/libtheora/lib/quant.c b/Engine/lib/libtheora/lib/quant.c new file mode 100644 index 000000000..8359f5abe --- /dev/null +++ b/Engine/lib/libtheora/lib/quant.c @@ -0,0 +1,119 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: quant.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "quant.h" +#include "decint.h" + +static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; +static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; + +/*Initializes the dequantization tables from a set of quantizer info. + Currently the dequantizer (and elsewhere enquantizer) tables are expected to + be initialized as pointing to the storage reserved for them in the + oc_theora_state (resp. oc_enc_ctx) structure. + If some tables are duplicates of others, the pointers will be adjusted to + point to a single copy of the tables, but the storage for them will not be + freed. + If you're concerned about the memory footprint, the obvious thing to do is + to move the storage out of its fixed place in the structures and allocate + it on demand. + However, a much, much better option is to only store the quantization + matrices being used for the current frame, and to recalculate these as the + qi values change between frames (this is what VP3 did).*/ +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], + int _pp_dc_scale[64],const th_quant_info *_qinfo){ + /*Coding mode: intra or inter.*/ + int qti; + /*Y', C_b, C_r*/ + int pli; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + /*Quality index.*/ + int qi; + /*Range iterator.*/ + int qri; + for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ + th_quant_base base; + ogg_uint32_t q; + int qi_start; + int qi_end; + memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], + sizeof(base)); + qi_start=qi; + if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; + else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; + /*Iterate over quality indicies in this range.*/ + for(;;){ + ogg_uint32_t qfac; + int zzi; + int ci; + /*In the original VP3.2 code, the rounding offset and the size of the + dead zone around 0 were controlled by a "sharpness" parameter. + The size of our dead zone is now controlled by the per-coefficient + quality thresholds returned by our HVS module. + We round down from a more accurate value when the quality of the + reconstruction does not fall below our threshold and it saves bits. + Hence, all of that VP3.2 code is gone from here, and the remaining + floating point code has been implemented as equivalent integer code + with exact precision.*/ + qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; + /*For postprocessing, not dequantization.*/ + if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); + /*Scale DC the coefficient from the proper table.*/ + q=(qfac/100)<<2; + q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][0]=(ogg_uint16_t)q; + /*Now scale AC coefficients from the proper table.*/ + for(zzi=1;zzi<64;zzi++){ + q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2; + q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q; + } + /*If this is a duplicate of a previous matrix, use that instead. + This simple check helps us improve cache coherency later.*/ + { + int dupe; + int qtj; + int plj; + dupe=0; + for(qtj=0;qtj<=qti;qtj++){ + for(plj=0;plj<(qtj=qi_end)break; + /*Interpolate the next base matrix.*/ + for(ci=0;ci<64;ci++){ + base[ci]=(unsigned char)( + (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ + (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) + +_qinfo->qi_ranges[qti][pli].sizes[qri])/ + (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); + } + } + } + } +} diff --git a/Engine/lib/libtheora/lib/dec/quant.h b/Engine/lib/libtheora/lib/quant.h similarity index 82% rename from Engine/lib/libtheora/lib/dec/quant.h rename to Engine/lib/libtheora/lib/quant.h index c3cefd1d8..49ce13a65 100644 --- a/Engine/lib/libtheora/lib/dec/quant.h +++ b/Engine/lib/libtheora/lib/quant.h @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: quant.h 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ @@ -21,14 +21,13 @@ # include "ocintrin.h" typedef ogg_uint16_t oc_quant_table[64]; -typedef oc_quant_table oc_quant_tables[64]; /*Maximum scaled quantizer value.*/ #define OC_QUANT_MAX (1024<<2) -void oc_dequant_tables_init(oc_quant_table *_dequant[2][3], +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], int _pp_dc_scale[64],const th_quant_info *_qinfo); #endif diff --git a/Engine/lib/libtheora/lib/rate.c b/Engine/lib/libtheora/lib/rate.c new file mode 100644 index 000000000..4f43bb2e5 --- /dev/null +++ b/Engine/lib/libtheora/lib/rate.c @@ -0,0 +1,1137 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + +/*A rough lookup table for tan(x), 0<=x>24; + if(i>=17)i=16; + t0=OC_ROUGH_TAN_LOOKUP[i]; + t1=OC_ROUGH_TAN_LOOKUP[i+1]; + d=_alpha*36-(i<<24); + return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32); +} + +/*Re-initialize the Bessel filter coefficients with the specified delay. + This does not alter the x/y state, but changes the reaction time of the + filter. + Altering the time constant of a reactive filter without alterning internal + state is something that has to be done carefuly, but our design operates at + high enough delays and with small enough time constant changes to make it + safe.*/ +static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){ + int alpha; + ogg_int64_t one48; + ogg_int64_t warp; + ogg_int64_t k1; + ogg_int64_t k2; + ogg_int64_t d; + ogg_int64_t a; + ogg_int64_t ik2; + ogg_int64_t b1; + ogg_int64_t b2; + /*This borrows some code from an unreleased version of Postfish. + See the recipe at http://unicorn.us.com/alex/2polefilters.html for details + on deriving the filter coefficients.*/ + /*alpha is Q24*/ + alpha=(1<<24)/_delay; + one48=(ogg_int64_t)1<<48; + /*warp is 7.12*/ + warp=OC_MAXI(oc_warp_alpha(alpha),1); + /*k1 is 9.12*/ + k1=3*warp; + /*k2 is 16.24.*/ + k2=k1*warp; + /*d is 16.15.*/ + d=((1<<12)+k1<<12)+k2+256>>9; + /*a is 0.32, since d is larger than both 1.0 and k2.*/ + a=(k2<<23)/d; + /*ik2 is 25.24.*/ + ik2=one48/k2; + /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/ + b1=2*a*(ik2-(1<<24)); + /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/ + b2=(one48<<8)-(4*a<<24)-b1; + /*All of the filter parameters are Q24.*/ + _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32); + _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32); + _f->g=(ogg_int32_t)(a+128>>8); +} + +/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay + and initial value. + _value is Q24.*/ +static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){ + oc_iir_filter_reinit(_f,_delay); + _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value; +} + +static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){ + ogg_int64_t c0; + ogg_int64_t c1; + ogg_int64_t g; + ogg_int64_t x0; + ogg_int64_t x1; + ogg_int64_t y0; + ogg_int64_t y1; + ogg_int64_t ya; + c0=_f->c[0]; + c1=_f->c[1]; + g=_f->g; + x0=_f->x[0]; + x1=_f->x[1]; + y0=_f->y[0]; + y1=_f->y[1]; + ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24; + _f->x[1]=(ogg_int32_t)x0; + _f->x[0]=_x; + _f->y[1]=(ogg_int32_t)y0; + _f->y[0]=(ogg_int32_t)ya; + return ya; +} + + + +/*Search for the quantizer that matches the target most closely. + We don't assume a linear ordering, but when there are ties we pick the + quantizer closest to the old one.*/ +static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old, + int _qi_min,ogg_int64_t _log_qtarget){ + ogg_int64_t best_qdiff; + int best_qi; + int qi; + best_qi=_qi_min; + best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget; + best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff); + for(qi=_qi_min+1;qi<64;qi++){ + ogg_int64_t qdiff; + qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget; + qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff); + if(qdiffstate.qis[0]; + /*If rate control is active, use the lambda for the _target_ quantizer. + This allows us to scale to rates slightly lower than we'd normally be able + to reach, and give the rate control a semblance of "fractional qi" + precision. + TODO: Add API for changing QI, and allow extra precision.*/ + if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget; + else lq=_enc->log_qavg[_qti][qi]; + /*The resulting lambda value is less than 0x500000.*/ + _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL); + /*Select additional quantizers. + The R-D optimal block AC quantizer statistics suggest that the distribution + is roughly Gaussian-like with a slight positive skew. + K-means clustering on log_qavg to select 3 quantizers produces cluster + centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}. + Experiments confirm these are relatively good choices. + + Although we do greedy R-D optimization of the qii flags to avoid switching + too frequently, this becomes ineffective at low rates, either because we + do a poor job of predicting the actual R-D cost, or the greedy + optimization is not sufficient. + Therefore adaptive quantization is disabled above an (experimentally + suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or + INTER qi=20 with current matrices). + This may need to be revised if the R-D cost estimation or qii flag + optimization strategies change.*/ + nqis=1; + if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){ + qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0, + lq+(OC_Q57(7)+5)/10); + if(qi1!=qi)_enc->state.qis[nqis++]=qi1; + qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0, + lq-(OC_Q57(6)+5)/10); + if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1; + } + _enc->state.nqis=nqis; +} + +/*Binary exponential of _log_scale with 24-bit fractional precision and + saturation. + _log_scale: A binary logarithm in Q24 format. + Return: The binary exponential in Q24 format, saturated to 2**47-1 if + _log_scale was too large.*/ +static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){ + if(_log_scale<(ogg_int32_t)23<<24){ + ogg_int64_t ret; + ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24)); + return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL; + } + return 0x7FFFFFFFFFFFLL; +} + +/*Convenience function converts Q57 value to a clamped 32-bit Q24 value + _in: input in Q57 format. + Return: same number in Q24 */ +static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){ + ogg_int64_t ret; + ret=_in+((ogg_int64_t)1<<32)>>33; + /*0x80000000 is automatically converted to unsigned on 32-bit systems. + -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to + unsigned.*/ + return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF); +} + +/*Binary exponential of _log_scale with 24-bit fractional precision and + saturation. + _log_scale: A binary logarithm in Q57 format. + Return: The binary exponential in Q24 format, saturated to 2**31-1 if + _log_scale was too large.*/ +static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){ + if(_log_scalerc.bits_per_frame=(_enc->state.info.target_bitrate* + (ogg_int64_t)_enc->state.info.fps_denominator)/ + _enc->state.info.fps_numerator; + /*Insane framerates or frame sizes mean insane bitrates. + Let's not get carried away.*/ + if(_enc->rc.bits_per_frame>0x400000000000LL){ + _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; + } + else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; + _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); + _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; + /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend + on a single keyframe interval. + We can require fully half the bits in an interval for a keyframe, so this + initial level gives us maximum flexibility for over/under-shooting in + subsequent frames.*/ + _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* + OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); + _enc->rc.fullness=_enc->rc.target; + /*Pick exponents and initial scales for quantizer selection.*/ + npixels=_enc->state.info.frame_width* + (ogg_int64_t)_enc->state.info.frame_height; + _enc->rc.log_npixels=oc_blog64(npixels); + ibpp=npixels/_enc->rc.bits_per_frame; + if(ibpp<1){ + _enc->rc.exp[0]=59; + _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8); + } + else if(ibpp<2){ + _enc->rc.exp[0]=55; + _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8); + } + else{ + _enc->rc.exp[0]=48; + _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8); + } + if(ibpp<4){ + _enc->rc.exp[1]=100; + _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8); + } + else if(ibpp<8){ + _enc->rc.exp[1]=95; + _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8); + } + else{ + _enc->rc.exp[1]=73; + _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8); + } + _enc->rc.prev_drop_count=0; + _enc->rc.log_drop_scale=OC_Q57(0); + /*Set up second order followers, initialized according to corresponding + time constants.*/ + oc_iir_filter_init(&_enc->rc.scalefilter[0],4, + oc_q57_to_q24(_enc->rc.log_scale[0])); + inter_delay=(_enc->rc.twopass? + OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1; + _enc->rc.inter_count=0; + /*We clamp the actual inter_delay to a minimum of 10 to work within the range + of values where later incrementing the delay works as designed. + 10 is not an exact choice, but rather a good working trade-off.*/ + _enc->rc.inter_delay=10; + _enc->rc.inter_delay_target=inter_delay; + oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay, + oc_q57_to_q24(_enc->rc.log_scale[1])); + oc_iir_filter_init(&_enc->rc.vfrfilter,4, + oc_bexp64_q24(_enc->rc.log_drop_scale)); +} + +void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){ + _rc->twopass=0; + _rc->twopass_buffer_bytes=0; + _rc->twopass_force_kf=0; + _rc->frame_metrics=NULL; + _rc->rate_bias=0; + if(_enc->state.info.target_bitrate>0){ + /*The buffer size is set equal to the keyframe interval, clamped to the + range [12,256] frames. + The 12 frame minimum gives us some chance to distribute bit estimation + errors. + The 256 frame maximum means we'll require 8-10 seconds of pre-buffering + at 24-30 fps, which is not unreasonable.*/ + _rc->buf_delay=_enc->keyframe_frequency_force>256? + 256:_enc->keyframe_frequency_force; + /*By default, enforce all buffer constraints.*/ + _rc->drop_frames=1; + _rc->cap_overflow=1; + _rc->cap_underflow=0; + oc_enc_rc_reset(_enc); + } +} + +void oc_rc_state_clear(oc_rc_state *_rc){ + _ogg_free(_rc->frame_metrics); +} + +void oc_enc_rc_resize(oc_enc_ctx *_enc){ + /*If encoding has not yet begun, reset the buffer state.*/ + if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc); + else{ + int idt; + /*Otherwise, update the bounds on the buffer, but not the current + fullness.*/ + _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate* + (ogg_int64_t)_enc->state.info.fps_denominator)/ + _enc->state.info.fps_numerator; + /*Insane framerates or frame sizes mean insane bitrates. + Let's not get carried away.*/ + if(_enc->rc.bits_per_frame>0x400000000000LL){ + _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; + } + else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; + _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); + _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; + _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* + OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); + /*Update the INTER-frame scale filter delay. + We jump to it immediately if we've already seen enough frames; otherwise + it is simply set as the new target.*/ + _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10); + if(idtrc.inter_delay,_enc->rc.inter_count)){ + oc_iir_filter_init(&_enc->rc.scalefilter[1],idt, + _enc->rc.scalefilter[1].y[0]); + _enc->rc.inter_delay=idt; + } + } + /*If we're in pass-2 mode, make sure the frame metrics array is big enough + to hold frame statistics for the full buffer.*/ + if(_enc->rc.twopass==2){ + int cfm; + int buf_delay; + int reset_window; + buf_delay=_enc->rc.buf_delay; + reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0|| + buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]); + cfm=_enc->rc.cframe_metrics; + /*Only try to resize the frame metrics buffer if a) it's too small and + b) we were using a finite buffer, or are about to start.*/ + if(cfmrc.frame_metrics!=NULL||reset_window)){ + oc_frame_metrics *fm; + int nfm; + int fmh; + fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics, + buf_delay*sizeof(*_enc->rc.frame_metrics)); + if(fm==NULL){ + /*We failed to allocate a finite buffer.*/ + /*If we don't have a valid 2-pass header yet, just return; we'll reset + the buffer size when we read the header.*/ + if(_enc->rc.frames_total[0]==0)return; + /*Otherwise revert to the largest finite buffer previously set, or to + whole-file buffering if we were still using that.*/ + _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL? + cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + oc_enc_rc_resize(_enc); + return; + } + _enc->rc.frame_metrics=fm; + _enc->rc.cframe_metrics=buf_delay; + /*Re-organize the circular buffer.*/ + fmh=_enc->rc.frame_metrics_head; + nfm=_enc->rc.nframe_metrics; + if(fmh+nfm>cfm){ + int shift; + shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm); + memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm)); + if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay); + } + } + /*We were using whole-file buffering; now we're not.*/ + if(reset_window){ + _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0; + _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; + _enc->rc.scale_window_end=_enc->rc.scale_window0= + _enc->state.curframe_num+_enc->prev_dup_count+1; + if(_enc->rc.twopass_buffer_bytes){ + int qti; + /*We already read the metrics for the first frame in the window.*/ + *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics; + _enc->rc.nframe_metrics++; + qti=_enc->rc.cur_metrics.frame_type; + _enc->rc.nframes[qti]++; + _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1; + if(_enc->rc.scale_window_end-_enc->rc.scale_window0rc.twopass_buffer_bytes=0; + } + } + } + /*Otherwise, we could shrink the size of the current window, if necessary, + but leaving it like it is lets us adapt to the new buffer size more + gracefully.*/ + } +} + +/*Scale the number of frames by the number of expected drops/duplicates.*/ +static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){ + if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){ + ogg_int64_t dup_scale; + dup_scale=oc_bexp64((_rc->log_drop_scale + +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8)); + if(dup_scale<_nframes<<8){ + int dup_scalei; + dup_scalei=(int)dup_scale; + if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei; + } + else _nframes=!!_nframes; + } + return _nframes; +} + +int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){ + ogg_int64_t rate_total; + ogg_int64_t rate_bias; + int nframes[2]; + int buf_delay; + int buf_pad; + ogg_int64_t log_qtarget; + ogg_int64_t log_scale0; + ogg_int64_t log_cur_scale; + ogg_int64_t log_qexp; + int exp0; + int old_qi; + int qi; + /*Figure out how to re-distribute bits so that we hit our fullness target + before the last keyframe in our current buffer window (after the current + frame), or the end of the buffer window, whichever comes first.*/ + log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33; + buf_pad=0; + switch(_enc->rc.twopass){ + default:{ + ogg_uint32_t next_key_frame; + /*Single pass mode: assume only forced keyframes and attempt to estimate + the drop count for VFR content.*/ + next_key_frame=_qti?_enc->keyframe_frequency_force + -(_enc->state.curframe_num-_enc->state.keyframe_num):0; + nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay) + +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force; + if(nframes[0]+_qti>1){ + nframes[0]--; + buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force; + } + else buf_delay=_enc->rc.buf_delay; + nframes[1]=buf_delay-nframes[0]; + /*Downgrade the delta frame rate to correspond to the recent drop count + history.*/ + nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]); + }break; + case 1:{ + /*Pass 1 mode: use a fixed qi value.*/ + qi=_enc->state.qis[0]; + _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi]; + return qi; + }break; + case 2:{ + ogg_int64_t scale_sum[2]; + int qti; + /*Pass 2 mode: we know exactly how much of each frame type there is in + the current buffer window, and have estimates for the scales.*/ + nframes[0]=_enc->rc.nframes[0]; + nframes[1]=_enc->rc.nframes[1]; + scale_sum[0]=_enc->rc.scale_sum[0]; + scale_sum[1]=_enc->rc.scale_sum[1]; + /*The window size can be slightly larger than the buffer window for VFR + content; clamp it down, if appropriate (the excess will all be dup + frames).*/ + buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0, + _enc->rc.buf_delay); + /*If we're approaching the end of the file, add some slack to keep us + from slamming into a rail. + Our rate accuracy goes down, but it keeps the result sensible. + We position the target where the first forced keyframe beyond the end + of the file would be (for consistency with 1-pass mode).*/ + buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num + +_enc->keyframe_frequency_force-_enc->rc.scale_window0); + if(buf_delayrc.frame_metrics!=NULL){ + int fmi; + int fm_tail; + fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics; + if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics; + for(fmi=fm_tail;;){ + oc_frame_metrics *m; + fmi--; + if(fmi<0)fmi+=_enc->rc.cframe_metrics; + /*Stop before we remove the first frame.*/ + if(fmi==_enc->rc.frame_metrics_head)break; + m=_enc->rc.frame_metrics+fmi; + /*If we find a keyframe, remove it and everything past it.*/ + if(m->frame_type==OC_INTRA_FRAME){ + do{ + qti=m->frame_type; + nframes[qti]--; + scale_sum[qti]-=oc_bexp_q24(m->log_scale); + buf_delay-=m->dup_count+1; + fmi++; + if(fmi>=_enc->rc.cframe_metrics)fmi=0; + m=_enc->rc.frame_metrics+fmi; + } + while(fmi!=fm_tail); + /*And stop scanning backwards.*/ + break; + } + } + } + } + /*If we're not using the same frame type as in pass 1 (because someone + changed the keyframe interval), remove that scale estimate. + We'll add in a replacement for the correct frame type below.*/ + qti=_enc->rc.cur_metrics.frame_type; + if(qti!=_qti){ + nframes[qti]--; + scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + } + /*Compute log_scale estimates for each frame type from the pass-1 scales + we measured in the current window.*/ + for(qti=0;qti<2;qti++){ + _enc->rc.log_scale[qti]=nframes[qti]>0? + oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24): + -_enc->rc.log_npixels; + } + /*If we're not using the same frame type as in pass 1, add a scale + estimate for the corresponding frame using the current low-pass + filter value. + This is mostly to ensure we have a valid estimate even when pass 1 had + no frames of this type in the buffer window. + TODO: We could also plan ahead and figure out how many keyframes we'll + be forced to add in the current buffer window.*/ + qti=_enc->rc.cur_metrics.frame_type; + if(qti!=_qti){ + ogg_int64_t scale; + scale=_enc->rc.log_scale[_qti]rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL; + scale*=nframes[_qti]; + nframes[_qti]++; + scale+=oc_bexp_q24(log_cur_scale>>33); + _enc->rc.log_scale[_qti]=oc_blog64(scale) + -oc_blog64(nframes[qti])-OC_Q57(24); + } + else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33; + /*Add the padding from above. + This basically reverts to 1-pass estimations in the last keyframe + interval.*/ + if(buf_pad>0){ + ogg_int64_t scale; + int nextra_frames; + /*Extend the buffer.*/ + buf_delay+=buf_pad; + /*Add virtual delta frames according to the estimated drop count.*/ + nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad); + /*And blend in the low-pass filtered scale according to how many frames + we added.*/ + scale= + oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1] + +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames; + nframes[1]+=nextra_frames; + _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24); + } + }break; + } + /*If we've been missing our target, add a penalty term.*/ + rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))* + (buf_delay-buf_pad); + /*rate_total is the total bits available over the next buf_delay frames.*/ + rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias + +buf_delay*_enc->rc.bits_per_frame; + log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels; + /*If there aren't enough bits to achieve our desired fullness level, use the + minimum quality permitted.*/ + if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG; + else{ + static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL; + ogg_int64_t log_scale1; + ogg_int64_t rlo; + ogg_int64_t rhi; + log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels; + rlo=0; + rhi=(rate_total+nframes[_qti]-1)/nframes[_qti]; + while(rlo>1; + log_rpow=oc_blog64(curr)-log_scale0; + log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti]; + if(_qti)log_rpow+=LOG_KEY_RATIO>>6; + else log_rpow-=LOG_KEY_RATIO>>6; + log_rpow*=_enc->rc.exp[1-_qti]; + rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow); + rdiff=nframes[_qti]*curr+rscale-rate_total; + if(rdiff<0)rlo=curr+1; + else if(rdiff>0)rhi=curr-1; + else break; + } + log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/ + _enc->rc.exp[_qti]<<6); + log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); + } + /*The above allocation looks only at the total rate we'll accumulate in the + next buf_delay frames. + However, we could overflow the buffer on the very next frame, so check for + that here, if we're not using a soft target.*/ + exp0=_enc->rc.exp[_qti]; + if(_enc->rc.cap_overflow){ + ogg_int64_t margin; + ogg_int64_t soft_limit; + ogg_int64_t log_soft_limit; + /*Allow 3% of the buffer for prediction error. + This should be plenty, and we don't mind if we go a bit over; we only + want to keep these bits from being completely wasted.*/ + margin=_enc->rc.max+31>>5; + /*We want to use at least this many bits next frame.*/ + soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin); + log_soft_limit=oc_blog64(soft_limit); + /*If we're predicting we won't use that many...*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; + if(log_scale0-log_qexp>32)* + ((OC_MINI(margin,soft_limit)<<32)/margin); + log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); + } + } + /*If this was not one of the initial frames, limit the change in quality.*/ + old_qi=_enc->state.qis[0]; + if(_clamp){ + ogg_int64_t log_qmin; + ogg_int64_t log_qmax; + /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the + current quantizer. + TODO: With user-specified quant matrices, we need to enlarge these limits + if they don't actually let us change qi values.*/ + log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL; + log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL; + log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax); + } + /*The above allocation looks only at the total rate we'll accumulate in the + next buf_delay frames. + However, we could bust the budget on the very next frame, so check for that + here, if we're not using a soft target.*/ + /* Disabled when our minimum qi > 0; if we saturate log_qtarget to + to the maximum possible size when we have a minimum qi, the + resulting lambda will interact very strangely with SKIP. The + resulting artifacts look like waterfalls. */ + if(_enc->state.info.quality==0){ + ogg_int64_t log_hard_limit; + /*Compute the maximum number of bits we can use in the next frame. + Allow 50% of the rate for a single frame for prediction error. + This may not be enough for keyframes or sudden changes in complexity.*/ + log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1)); + /*If we're predicting we'll use more than this...*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; + if(log_scale0-log_qexp>log_hard_limit){ + /*Force the target to hit our limit exactly.*/ + log_qexp=log_scale0-log_hard_limit; + log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); + /*If that target is unreasonable, oh well; we'll have to drop.*/ + log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); + } + } + /*Compute a final estimate of the number of bits we plan to use.*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti]; + _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp); + qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi, + _enc->state.info.quality,log_qtarget); + /*Save the quantizer target for lambda calculations.*/ + _enc->rc.log_qtarget=log_qtarget; + return qi; +} + +int oc_enc_update_rc_state(oc_enc_ctx *_enc, + long _bits,int _qti,int _qi,int _trial,int _droppable){ + ogg_int64_t buf_delta; + ogg_int64_t log_scale; + int dropped; + dropped=0; + /* Drop frames also disabled for now in the case of infinite-buffer + two-pass mode */ + if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){ + _droppable=0; + } + buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count); + if(_bits<=0){ + /*We didn't code any blocks in this frame.*/ + log_scale=OC_Q57(-64); + _bits=0; + } + else{ + ogg_int64_t log_bits; + ogg_int64_t log_qexp; + /*Compute the estimated scale factor for this frame type.*/ + log_bits=oc_blog64(_bits); + log_qexp=_enc->rc.log_qtarget-OC_Q57(2); + log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]); + log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16)); + } + /*Special two-pass processing.*/ + switch(_enc->rc.twopass){ + case 1:{ + /*Pass 1 mode: save the metrics for this frame.*/ + _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale); + _enc->rc.cur_metrics.dup_count=_enc->dup_count; + _enc->rc.cur_metrics.frame_type=_enc->state.frame_type; + _enc->rc.twopass_buffer_bytes=0; + }break; + case 2:{ + /*Pass 2 mode:*/ + if(!_trial){ + ogg_int64_t next_frame_num; + int qti; + /*Move the current metrics back one frame.*/ + *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics; + next_frame_num=_enc->state.curframe_num+_enc->dup_count+1; + /*Back out the last frame's statistics from the sliding window.*/ + qti=_enc->rc.prev_metrics.frame_type; + _enc->rc.frames_left[qti]--; + _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count; + _enc->rc.nframes[qti]--; + _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count; + _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale); + _enc->rc.scale_window0=(int)next_frame_num; + /*Free the corresponding entry in the circular buffer.*/ + if(_enc->rc.frame_metrics!=NULL){ + _enc->rc.nframe_metrics--; + _enc->rc.frame_metrics_head++; + if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){ + _enc->rc.frame_metrics_head=0; + } + } + /*Mark us ready for the next 2-pass packet.*/ + _enc->rc.twopass_buffer_bytes=0; + /*Update state, so the user doesn't have to keep calling 2pass_in after + they've fed in all the data when we're using a finite buffer.*/ + _enc->prev_dup_count=_enc->dup_count; + oc_enc_rc_2pass_in(_enc,NULL,0); + } + }break; + } + /*Common to all passes:*/ + if(_bits>0){ + if(_trial){ + oc_iir_filter *f; + /*Use the estimated scale factor directly if this was a trial.*/ + f=_enc->rc.scalefilter+_qti; + f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale); + _enc->rc.log_scale[_qti]=log_scale; + } + else{ + /*Lengthen the time constant for the INTER filter as we collect more + frame statistics, until we reach our target.*/ + if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&& + _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){ + oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay); + } + /*Otherwise update the low-pass scale filter for this frame type, + regardless of whether or not we dropped this frame.*/ + _enc->rc.log_scale[_qti]=oc_iir_filter_update( + _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33; + /*If this frame busts our budget, it must be dropped.*/ + if(_droppable&&_enc->rc.fullness+buf_delta<_bits){ + _enc->rc.prev_drop_count+=1+_enc->dup_count; + _bits=0; + dropped=1; + } + else{ + ogg_uint32_t drop_count; + /*Update a low-pass filter to estimate the "real" frame rate taking + drops and duplicates into account. + This is only done if the frame is coded, as it needs the final + count of dropped frames.*/ + drop_count=_enc->rc.prev_drop_count+1; + if(drop_count>0x7F)drop_count=0x7FFFFFFF; + else drop_count<<=24; + _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update( + &_enc->rc.vfrfilter,drop_count))-OC_Q57(24); + /*Initialize the drop count for this frame to the user-requested dup + count. + It will be increased if we drop more frames.*/ + _enc->rc.prev_drop_count=_enc->dup_count; + } + } + /*Increment the INTER frame count, for filter adaptation purposes.*/ + if(_enc->rc.inter_countrc.inter_count+=_qti; + } + /*Increase the drop count.*/ + else _enc->rc.prev_drop_count+=1+_enc->dup_count; + /*And update the buffer fullness level.*/ + if(!_trial){ + _enc->rc.fullness+=buf_delta-_bits; + /*If we're too quick filling the buffer and overflow is capped, + that rate is lost forever.*/ + if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){ + _enc->rc.fullness=_enc->rc.max; + } + /*If we're too quick draining the buffer and underflow is capped, + don't try to make up that rate later.*/ + if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){ + _enc->rc.fullness=0; + } + /*Adjust the bias for the real bits we've used.*/ + _enc->rc.rate_bias-=_bits; + } + return dropped; +} + +#define OC_RC_2PASS_VERSION (1) +#define OC_RC_2PASS_HDR_SZ (38) +#define OC_RC_2PASS_PACKET_SZ (8) + +static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){ + while(_bytes-->0){ + _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF); + _val>>=8; + } +} + +int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){ + if(_enc->rc.twopass_buffer_bytes==0){ + if(_enc->rc.twopass==0){ + int qi; + /*Pick first-pass qi for scale calculations.*/ + qi=oc_enc_select_qi(_enc,0,0); + _enc->state.nqis=1; + _enc->state.qis[0]=qi; + _enc->rc.twopass=1; + _enc->rc.frames_total[0]=_enc->rc.frames_total[1]= + _enc->rc.frames_total[2]=0; + _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; + /*Fill in dummy summary values.*/ + oc_rc_buffer_val(&_enc->rc,0x5032544F,4); + oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); + oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8); + } + else{ + int qti; + qti=_enc->rc.cur_metrics.frame_type; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + _enc->rc.frames_total[qti]++; + _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count; + oc_rc_buffer_val(&_enc->rc, + _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4); + } + } + else if(_enc->packet_state==OC_PACKET_DONE&& + _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){ + _enc->rc.twopass_buffer_bytes=0; + oc_rc_buffer_val(&_enc->rc,0x5032544F,4); + oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1); + oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1); + oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8); + oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8); + } + else{ + /*The data for this frame has already been retrieved.*/ + *_buf=NULL; + return 0; + } + *_buf=_enc->rc.twopass_buffer; + return _enc->rc.twopass_buffer_bytes; +} + +static size_t oc_rc_buffer_fill(oc_rc_state *_rc, + unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){ + while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){ + _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++]; + } + return _consumed; +} + +static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){ + ogg_int64_t ret; + int shift; + ret=0; + shift=0; + while(_bytes-->0){ + ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<rc.twopass==0){ + _enc->rc.twopass=2; + _enc->rc.twopass_buffer_fill=0; + _enc->rc.frames_total[0]=0; + _enc->rc.nframe_metrics=0; + _enc->rc.cframe_metrics=0; + _enc->rc.frame_metrics_head=0; + _enc->rc.scale_window0=0; + _enc->rc.scale_window_end=0; + } + /*If we haven't got a valid summary header yet, try to parse one.*/ + if(_enc->rc.frames_total[0]==0){ + if(!_buf){ + int frames_needed; + /*If we're using a whole-file buffer, we just need the first frame. + Otherwise, we may need as many as one per buffer slot.*/ + frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay; + return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ + -_enc->rc.twopass_buffer_fill; + } + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){ + ogg_int64_t scale_sum[2]; + int exp[2]; + int buf_delay; + /*Read the summary header data.*/ + /*Check the magic value and version number.*/ + if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F|| + oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){ + _enc->rc.twopass_buffer_bytes=0; + return TH_ENOTFORMAT; + } + _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1); + exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1); + scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8); + scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8); + /*Make sure the file claims to have at least one frame. + Otherwise we probably got the placeholder data from an aborted pass 1. + Also make sure the total frame count doesn't overflow an integer.*/ + buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + if(_enc->rc.frames_total[0]==0||buf_delay<0|| + (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]|| + (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){ + _enc->rc.frames_total[0]=0; + _enc->rc.twopass_buffer_bytes=0; + return TH_EBADHEADER; + } + /*Got a valid header; set up pass 2.*/ + _enc->rc.frames_left[0]=_enc->rc.frames_total[0]; + _enc->rc.frames_left[1]=_enc->rc.frames_total[1]; + _enc->rc.frames_left[2]=_enc->rc.frames_total[2]; + /*If the user hasn't specified a buffer size, use the whole file.*/ + if(_enc->rc.frame_metrics==NULL){ + _enc->rc.buf_delay=buf_delay; + _enc->rc.nframes[0]=_enc->rc.frames_total[0]; + _enc->rc.nframes[1]=_enc->rc.frames_total[1]; + _enc->rc.nframes[2]=_enc->rc.frames_total[2]; + _enc->rc.scale_sum[0]=scale_sum[0]; + _enc->rc.scale_sum[1]=scale_sum[1]; + _enc->rc.scale_window_end=buf_delay; + oc_enc_rc_reset(_enc); + } + _enc->rc.exp[0]=exp[0]; + _enc->rc.exp[1]=exp[1]; + /*Clear the header data from the buffer to make room for packet data.*/ + _enc->rc.twopass_buffer_fill=0; + _enc->rc.twopass_buffer_bytes=0; + } + } + if(_enc->rc.frames_total[0]!=0){ + ogg_int64_t curframe_num; + int nframes_total; + curframe_num=_enc->state.curframe_num; + if(curframe_num>=0){ + /*We just encoded a frame; make sure things matched.*/ + if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){ + _enc->rc.twopass_buffer_bytes=0; + return TH_EINVAL; + } + } + curframe_num+=_enc->prev_dup_count+1; + nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + if(curframe_num>=nframes_total){ + /*We don't want any more data after the last frame, and we don't want to + allow any more frames to be encoded.*/ + _enc->rc.twopass_buffer_bytes=0; + } + else if(_enc->rc.twopass_buffer_bytes==0){ + if(_enc->rc.frame_metrics==NULL){ + /*We're using a whole-file buffer:*/ + if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill; + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ + ogg_uint32_t dup_count; + ogg_int32_t log_scale; + int qti; + int arg; + /*Read the metrics for the next frame.*/ + dup_count=oc_rc_unbuffer_val(&_enc->rc,4); + log_scale=oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.cur_metrics.log_scale=log_scale; + qti=(dup_count&0x80000000)>>31; + _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF; + _enc->rc.cur_metrics.frame_type=qti; + _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME; + /*"Helpfully" set the dup count back to what it was in pass 1.*/ + arg=_enc->rc.cur_metrics.dup_count; + th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); + /*Clear the buffer for the next frame.*/ + _enc->rc.twopass_buffer_fill=0; + } + } + else{ + int frames_needed; + /*We're using a finite buffer:*/ + frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay + -(_enc->rc.scale_window_end-_enc->rc.scale_window0), + _enc->rc.frames_left[0]+_enc->rc.frames_left[1] + -_enc->rc.nframes[0]-_enc->rc.nframes[1]); + while(frames_needed>0){ + if(!_buf){ + return OC_RC_2PASS_PACKET_SZ*frames_needed + -_enc->rc.twopass_buffer_fill; + } + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ + oc_frame_metrics *m; + int fmi; + ogg_uint32_t dup_count; + ogg_int32_t log_scale; + int qti; + /*Read the metrics for the next frame.*/ + dup_count=oc_rc_unbuffer_val(&_enc->rc,4); + log_scale=oc_rc_unbuffer_val(&_enc->rc,4); + /*Add the to the circular buffer.*/ + fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++; + if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics; + m=_enc->rc.frame_metrics+fmi; + m->log_scale=log_scale; + qti=(dup_count&0x80000000)>>31; + m->dup_count=dup_count&0x7FFFFFFF; + m->frame_type=qti; + /*And accumulate the statistics over the window.*/ + _enc->rc.nframes[qti]++; + _enc->rc.nframes[2]+=m->dup_count; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale); + _enc->rc.scale_window_end+=m->dup_count+1; + /*Compute an upper bound on the number of remaining packets needed + for the current window.*/ + frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay + -(_enc->rc.scale_window_end-_enc->rc.scale_window0), + _enc->rc.frames_left[0]+_enc->rc.frames_left[1] + -_enc->rc.nframes[0]-_enc->rc.nframes[1]); + /*Clear the buffer for the next frame.*/ + _enc->rc.twopass_buffer_fill=0; + _enc->rc.twopass_buffer_bytes=0; + } + /*Go back for more data.*/ + else break; + } + /*If we've got all the frames we need, fill in the current metrics. + We're ready to go.*/ + if(frames_needed<=0){ + int arg; + *&_enc->rc.cur_metrics= + *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head); + _enc->rc.twopass_force_kf= + _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME; + /*"Helpfully" set the dup count back to what it was in pass 1.*/ + arg=_enc->rc.cur_metrics.dup_count; + th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); + /*Mark us ready for the next frame.*/ + _enc->rc.twopass_buffer_bytes=1; + } + } + } + } + return (int)consumed; +} diff --git a/Engine/lib/libtheora/lib/dec/state.c b/Engine/lib/libtheora/lib/state.c similarity index 55% rename from Engine/lib/libtheora/lib/dec/state.c rename to Engine/lib/libtheora/lib/state.c index 387f2d0b1..42ed33a9a 100644 --- a/Engine/lib/libtheora/lib/dec/state.c +++ b/Engine/lib/libtheora/lib/state.c @@ -5,21 +5,20 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: state.c 15469 2008-10-30 12:49:42Z tterribe $ + last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include #include -#include "../internal.h" -#include "idct.h" -#if defined(USE_ASM) +#include "internal.h" +#if defined(OC_X86_ASM) #if defined(_MSC_VER) # include "x86_vc/x86int.h" #else @@ -31,35 +30,30 @@ # include "png.h" #endif -void oc_restore_fpu(const oc_theora_state *_state){ - _state->opt_vtable.restore_fpu(); -} - -void oc_restore_fpu_c(void){} - /*Returns the fragment index of the top-left block in a macro block. - This can be used to test whether or not the whole macro block is coded. - _sb: The super block. - _quadi: The quadrant number. + This can be used to test whether or not the whole macro block is valid. + _sb_map: The super block map. + _quadi: The quadrant number. Return: The index of the fragment of the upper left block in the macro block, or -1 if the block lies outside the coded frame.*/ -static int oc_sb_quad_top_left_frag(const oc_sb *_sb,int _quadi){ +static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ /*It so happens that under the Hilbert curve ordering described below, the upper-left block in each macro block is at index 0, except in macro block 3, where it is at index 2.*/ - return _sb->map[_quadi][_quadi&_quadi<<1]; + return _sb_map[_quadi][_quadi&_quadi<<1]; } /*Fills in the mapping from block positions to fragment numbers for a single color plane. - This function also fills in the "valid" flag of each quadrant in a super - block. - _sbs: The array of super blocks for the color plane. - _frag0: The index of the first fragment in the plane. - _hfrags: The number of horizontal fragments in a coded frame. - _vfrags: The number of vertical fragments in a coded frame.*/ -static void oc_sb_create_plane_mapping(oc_sb _sbs[],int _frag0,int _hfrags, - int _vfrags){ + This function also fills in the "valid" flag of each quadrant in the super + block flags. + _sb_maps: The array of super block maps for the color plane. + _sb_flags: The array of super block flags for the color plane. + _frag0: The index of the first fragment in the plane. + _hfrags: The number of horizontal fragments in a coded frame. + _vfrags: The number of vertical fragments in a coded frame.*/ +static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], + oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ /*Contains the (macro_block,block) indices for a 4x4 grid of fragments. The pattern is a 4x4 Hilbert space-filling curve. @@ -74,10 +68,10 @@ static void oc_sb_create_plane_mapping(oc_sb _sbs[],int _frag0,int _hfrags, {{1,0},{1,3},{2,0},{2,3}}, {{1,1},{1,2},{2,1},{2,2}} }; - oc_sb *sb; - int yfrag; - int y; - sb=_sbs; + ptrdiff_t yfrag; + unsigned sbi; + int y; + sbi=0; yfrag=_frag0; for(y=0;;y+=4){ int imax; @@ -87,30 +81,31 @@ static void oc_sb_create_plane_mapping(oc_sb _sbs[],int _frag0,int _hfrags, imax=_vfrags-y; if(imax>4)imax=4; else if(imax<=0)break; - for(x=0;;x+=4,sb++){ - int xfrag; - int jmax; - int quadi; - int i; + for(x=0;;x+=4,sbi++){ + ptrdiff_t xfrag; + int jmax; + int quadi; + int i; /*Figure out how many rows of blocks in this super block lie within the image.*/ jmax=_hfrags-x; if(jmax>4)jmax=4; else if(jmax<=0)break; /*By default, set all fragment indices to -1.*/ - memset(sb->map[0],0xFF,sizeof(sb->map)); + memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi])); /*Fill in the fragment map for this super block.*/ xfrag=yfrag+x; for(i=0;imap[SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j; + _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j; } xfrag+=_hfrags; } /*Mark which quadrants of this super block lie within the image.*/ for(quadi=0;quadi<4;quadi++){ - sb->quad_valid|=(oc_sb_quad_top_left_frag(sb,quadi)>=0)<=0)<=_fplane->nvfrags)break; - for(j=0;j<2;j++){ - if(_x+j>=_fplane->nhfrags)break; - _mb->map[0][i<<1|j]=(_y+i)*_fplane->nhfrags+_x+j; - } + int j; + for(i=0;i<2;i++)for(j=0;j<2;j++){ + _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j; } } /*Fills in the chroma plane fragment maps for a macro block. - This version is for use with chroma decimated in the X and Y directions. - _mb: The macro block to fill. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). + _mb_map: The macro block map to fill. _fplanes: The descriptions of the fragment planes. - _x: The X location of the upper-left hand fragment in the Y plane. - _y: The Y location of the upper-left hand fragment in the Y plane.*/ -static void oc_mb_fill_cmapping00(oc_mb *_mb, - const oc_fragment_plane _fplanes[3],int _x,int _y){ - int fragi; - _x>>=1; - _y>>=1; - fragi=_y*_fplanes[1].nhfrags+_x; - _mb->map[1][0]=fragi+_fplanes[1].froffset; - _mb->map[2][0]=fragi+_fplanes[2].froffset; + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + _xfrag0>>=1; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + _mb_map[1][0]=fragi+_fplanes[1].froffset; + _mb_map[2][0]=fragi+_fplanes[2].froffset; } /*Fills in the chroma plane fragment maps for a macro block. This version is for use with chroma decimated in the Y direction. - _mb: The macro block to fill. + _mb_map: The macro block map to fill. _fplanes: The descriptions of the fragment planes. - _x: The X location of the upper-left hand fragment in the Y plane. - _y: The Y location of the upper-left hand fragment in the Y plane.*/ -static void oc_mb_fill_cmapping01(oc_mb *_mb, - const oc_fragment_plane _fplanes[3],int _x,int _y){ - int fragi; - int j; - _y>>=1; - fragi=_y*_fplanes[1].nhfrags+_x; + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int j; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; for(j=0;j<2;j++){ - if(_x+j>=_fplanes[1].nhfrags)break; - _mb->map[1][j]=fragi+_fplanes[1].froffset; - _mb->map[2][j]=fragi+_fplanes[2].froffset; + _mb_map[1][j]=fragi+_fplanes[1].froffset; + _mb_map[2][j]=fragi+_fplanes[2].froffset; fragi++; } } /*Fills in the chroma plane fragment maps for a macro block. - This version is for use with chroma decimated in the X direction. - _mb: The macro block to fill. + This version is for use with chroma decimated in the X direction (4:2:2). + _mb_map: The macro block map to fill. _fplanes: The descriptions of the fragment planes. - _x: The X location of the upper-left hand fragment in the Y plane. - _y: The Y location of the upper-left hand fragment in the Y plane.*/ -static void oc_mb_fill_cmapping10(oc_mb *_mb, - const oc_fragment_plane _fplanes[3],int _x,int _y){ - int fragi; - int i; - _x>>=1; - fragi=_y*_fplanes[1].nhfrags+_x; + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int i; + _xfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; for(i=0;i<2;i++){ - if(_y+i>=_fplanes[1].nvfrags)break; - _mb->map[1][i<<1]=fragi+_fplanes[1].froffset; - _mb->map[2][i<<1]=fragi+_fplanes[2].froffset; + _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; + _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; fragi+=_fplanes[1].nhfrags; } } /*Fills in the chroma plane fragment maps for a macro block. - This version is for use with no chroma decimation. - This uses the already filled-in Y plane values. - _mb: The macro block to fill. + This version is for use with no chroma decimation (4:4:4). + This uses the already filled-in luma plane values. + _mb_map: The macro block map to fill. _fplanes: The descriptions of the fragment planes.*/ -static void oc_mb_fill_cmapping11(oc_mb *_mb, +static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], const oc_fragment_plane _fplanes[3]){ int k; for(k=0;k<4;k++){ - if(_mb->map[0][k]>=0){ - _mb->map[1][k]=_mb->map[0][k]+_fplanes[1].froffset; - _mb->map[2][k]=_mb->map[0][k]+_fplanes[2].froffset; - } + _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; + _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; } } /*The function type used to fill in the chroma plane fragment maps for a macro block. - _mb: The macro block to fill. + _mb_map: The macro block map to fill. _fplanes: The descriptions of the fragment planes. - _x: The X location of the upper-left hand fragment in the Y plane. - _y: The Y location of the upper-left hand fragment in the Y plane.*/ -typedef void (*oc_mb_fill_cmapping_func)(oc_mb *_mb, + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); /*A table of functions used to fill in the chroma plane fragment maps for a @@ -228,44 +216,43 @@ static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ /*Fills in the mapping from macro blocks to their corresponding fragment numbers in each plane. - _mbs: The array of macro blocks. - _fplanes: The descriptions of the fragment planes. - _ctype: The chroma decimation type.*/ -static void oc_mb_create_mapping(oc_mb _mbs[], - const oc_fragment_plane _fplanes[3],int _ctype){ + _mb_maps: The list of macro block maps. + _mb_modes: The list of macro block modes; macro blocks completely outside + the coded region are marked invalid. + _fplanes: The descriptions of the fragment planes. + _pixel_fmt: The chroma decimation type.*/ +static void oc_mb_create_mapping(oc_mb_map _mb_maps[], + signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ oc_mb_fill_cmapping_func mb_fill_cmapping; - oc_mb *mb0; + unsigned sbi; int y; - mb0=_mbs; - mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_ctype]; - /*Loop through the Y plane super blocks.*/ - for(y=0;y<_fplanes[0].nvfrags;y+=4){ + mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; + /*Loop through the luma plane super blocks.*/ + for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ int x; - for(x=0;x<_fplanes[0].nhfrags;x+=4,mb0+=4){ + for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ int ymb; /*Loop through the macro blocks in each super block in display order.*/ for(ymb=0;ymb<2;ymb++){ int xmb; for(xmb=0;xmb<2;xmb++){ - oc_mb *mb; - int mbx; - int mby; - mb=mb0+OC_MB_MAP[ymb][xmb]; + unsigned mbi; + int mbx; + int mby; + mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; mbx=x|xmb<<1; mby=y|ymb<<1; - mb->x=mbx<<3; - mb->y=mby<<3; - /*Initialize fragment indexes to -1.*/ - memset(mb->map,0xFF,sizeof(mb->map)); + /*Initialize fragment indices to -1.*/ + memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); /*Make sure this macro block is within the encoded region.*/ if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ - mb->mode=OC_MODE_INVALID; + _mb_modes[mbi]=OC_MODE_INVALID; continue; } - /*Fill in the fragment indices for the Y plane.*/ - oc_mb_fill_ymapping(mb,_fplanes,mbx,mby); + /*Fill in the fragment indices for the luma plane.*/ + oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); /*Fill in the fragment indices for the chroma planes.*/ - (*mb_fill_cmapping)(mb,_fplanes,mbx,mby); + (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); } } } @@ -276,18 +263,14 @@ static void oc_mb_create_mapping(oc_mb _mbs[], region of the frame. _state: The Theora state containing the fragments to be marked.*/ static void oc_state_border_init(oc_theora_state *_state){ - typedef struct{ - int x0; - int y0; - int xf; - int yf; - }oc_crop_rect; oc_fragment *frag; oc_fragment *yfrag_end; oc_fragment *xfrag_end; oc_fragment_plane *fplane; - oc_crop_rect *crop; - oc_crop_rect crop_rects[3]; + int crop_x0; + int crop_y0; + int crop_xf; + int crop_yf; int pli; int y; int x; @@ -301,20 +284,19 @@ static void oc_state_border_init(oc_theora_state *_state){ yfrag_end=frag=_state->frags; for(pli=0;pli<3;pli++){ fplane=_state->fplanes+pli; - crop=crop_rects+pli; /*Set up the cropping rectangle for this plane.*/ - crop->x0=_state->info.pic_x; - crop->xf=_state->info.pic_x+_state->info.pic_width; - crop->y0=_state->info.pic_y; - crop->yf=_state->info.pic_y+_state->info.pic_height; + crop_x0=_state->info.pic_x; + crop_xf=_state->info.pic_x+_state->info.pic_width; + crop_y0=_state->info.pic_y; + crop_yf=_state->info.pic_y+_state->info.pic_height; if(pli>0){ if(!(_state->info.pixel_fmt&1)){ - crop->x0=crop->x0>>1; - crop->xf=crop->xf+1>>1; + crop_x0=crop_x0>>1; + crop_xf=crop_xf+1>>1; } if(!(_state->info.pixel_fmt&2)){ - crop->y0=crop->y0>>1; - crop->yf=crop->yf+1>>1; + crop_y0=crop_y0>>1; + crop_yf=crop_yf+1>>1; } } y=0; @@ -327,13 +309,13 @@ static void oc_state_border_init(oc_theora_state *_state){ This guarantees that if we count a fragment as straddling the border below, at least one pixel in the fragment will be inside the displayable region.*/ - if(x+8<=crop->x0||crop->xf<=x||y+8<=crop->y0||crop->yf<=y|| - crop->x0>=crop->xf||crop->y0>=crop->yf){ + if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y|| + crop_x0>=crop_xf||crop_y0>=crop_yf){ frag->invalid=1; } /*Otherwise, check to see if it straddles the border.*/ - else if(xx0&&crop->x0xf&&crop->xfy0&&crop->y0yf&&crop->yf=crop->x0&&x+jxf&&y+i>=crop->y0&&y+iyf){ + if(x+j>=crop_x0&&x+j=crop_y0&&y+i=_state->nborders){ _state->nborders++; @@ -357,34 +339,35 @@ static void oc_state_border_init(oc_theora_state *_state){ _state->borders[i].npixels=npixels; } else if(_state->borders[i].mask!=mask)continue; - frag->border=_state->borders+i; + frag->borderi=i; break; } } + else frag->borderi=-1; } } } } -static void oc_state_frarray_init(oc_theora_state *_state){ - int yhfrags; - int yvfrags; - int chfrags; - int cvfrags; - int yfrags; - int cfrags; - int nfrags; - int yhsbs; - int yvsbs; - int chsbs; - int cvsbs; - int ysbs; - int csbs; - int nsbs; - int nmbs; - int hdec; - int vdec; - int pli; +static int oc_state_frarray_init(oc_theora_state *_state){ + int yhfrags; + int yvfrags; + int chfrags; + int cvfrags; + ptrdiff_t yfrags; + ptrdiff_t cfrags; + ptrdiff_t nfrags; + unsigned yhsbs; + unsigned yvsbs; + unsigned chsbs; + unsigned cvsbs; + unsigned ysbs; + unsigned csbs; + unsigned nsbs; + size_t nmbs; + int hdec; + int vdec; + int pli; /*Figure out the number of fragments in each plane.*/ /*These parameters have already been validated to be multiples of 16.*/ yhfrags=_state->info.frame_width>>3; @@ -393,8 +376,8 @@ static void oc_state_frarray_init(oc_theora_state *_state){ vdec=!(_state->info.pixel_fmt&2); chfrags=yhfrags+hdec>>hdec; cvfrags=yvfrags+vdec>>vdec; - yfrags=yhfrags*yvfrags; - cfrags=chfrags*cvfrags; + yfrags=yhfrags*(ptrdiff_t)yvfrags; + cfrags=chfrags*(ptrdiff_t)cvfrags; nfrags=yfrags+2*cfrags; /*Figure out the number of super blocks in each plane.*/ yhsbs=yhfrags+3>>2; @@ -404,7 +387,20 @@ static void oc_state_frarray_init(oc_theora_state *_state){ ysbs=yhsbs*yvsbs; csbs=chsbs*cvsbs; nsbs=ysbs+2*csbs; - nmbs=ysbs<<2; + nmbs=(size_t)ysbs<<2; + /*Check for overflow. + We support the ridiculous upper limits of the specification (1048560 by + 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, + but for those with 32-bit pointers (or smaller!) we have to check. + If the caller wants to prevent denial-of-service by imposing a more + reasonable upper limit on the size of attempted allocations, they must do + so themselves; we have no platform independent way to determine how much + system memory there is nor an application-independent way to decide what a + "reasonable" allocation is.*/ + if(yfrags/yhfrags!=yvfrags||2*cfrags>2!=ysbs){ + return TH_EIMPL; + } /*Initialize the fragment array.*/ _state->fplanes[0].nhfrags=yhfrags; _state->fplanes[0].nvfrags=yvfrags; @@ -425,34 +421,45 @@ static void oc_state_frarray_init(oc_theora_state *_state){ _state->fplanes[2].sboffset=ysbs+csbs; _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; _state->nfrags=nfrags; - _state->frags=_ogg_calloc(nfrags,sizeof(oc_fragment)); + _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); + _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); _state->nsbs=nsbs; - _state->sbs=_ogg_calloc(nsbs,sizeof(oc_sb)); + _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); + _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); _state->nhmbs=yhsbs<<1; _state->nvmbs=yvsbs<<1; _state->nmbs=nmbs; - _state->mbs=_ogg_calloc(nmbs,sizeof(oc_mb)); - _state->coded_fragis=_ogg_malloc(nfrags*sizeof(_state->coded_fragis[0])); - _state->uncoded_fragis=_state->coded_fragis+nfrags; - _state->coded_mbis=_ogg_malloc(nmbs*sizeof(_state->coded_mbis[0])); + _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); + _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); + _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); + if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| + _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| + _state->coded_fragis==NULL){ + return TH_EFAULT; + } /*Create the mapping from super blocks to fragments.*/ for(pli=0;pli<3;pli++){ oc_fragment_plane *fplane; fplane=_state->fplanes+pli; - oc_sb_create_plane_mapping(_state->sbs+fplane->sboffset, - fplane->froffset,fplane->nhfrags,fplane->nvfrags); + oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, + _state->sb_flags+fplane->sboffset,fplane->froffset, + fplane->nhfrags,fplane->nvfrags); } /*Create the mapping from macro blocks to fragments.*/ - oc_mb_create_mapping(_state->mbs,_state->fplanes,_state->info.pixel_fmt); - /*Initialize the invalid and border fields of each fragment.*/ + oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, + _state->fplanes,_state->info.pixel_fmt); + /*Initialize the invalid and borderi fields of each fragment.*/ oc_state_border_init(_state); + return 0; } static void oc_state_frarray_clear(oc_theora_state *_state){ - _ogg_free(_state->coded_mbis); _ogg_free(_state->coded_fragis); - _ogg_free(_state->mbs); - _ogg_free(_state->sbs); + _ogg_free(_state->mb_modes); + _ogg_free(_state->mb_maps); + _ogg_free(_state->sb_flags); + _ogg_free(_state->sb_maps); + _ogg_free(_state->frag_mvs); _ogg_free(_state->frags); } @@ -462,84 +469,144 @@ static void oc_state_frarray_clear(oc_theora_state *_state){ unrestricted motion vectors without special casing the boundary. If chroma is decimated in either direction, the padding is reduced by a factor of 2 on the appropriate sides. - _enc: The encoding context to store the buffers in.*/ -static void oc_state_ref_bufs_init(oc_theora_state *_state){ - th_info *info; + _nrefs: The number of reference buffers to init; must be 3 or 4.*/ +static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ + th_info *info; unsigned char *ref_frame_data; + size_t ref_frame_data_sz; + size_t ref_frame_sz; size_t yplane_sz; size_t cplane_sz; int yhstride; - int yvstride; + int yheight; int chstride; - int cvstride; - int yoffset; - int coffset; + int cheight; + ptrdiff_t yoffset; + ptrdiff_t coffset; + ptrdiff_t *frag_buf_offs; + ptrdiff_t fragi; + int hdec; + int vdec; int rfi; + int pli; + if(_nrefs<3||_nrefs>4)return TH_EINVAL; info=&_state->info; /*Compute the image buffer parameters for each plane.*/ + hdec=!(info->pixel_fmt&1); + vdec=!(info->pixel_fmt&2); yhstride=info->frame_width+2*OC_UMV_PADDING; - yvstride=info->frame_height+2*OC_UMV_PADDING; - chstride=yhstride>>!(info->pixel_fmt&1); - cvstride=yvstride>>!(info->pixel_fmt&2); - yplane_sz=(size_t)yhstride*yvstride; - cplane_sz=(size_t)chstride*cvstride; - yoffset=OC_UMV_PADDING+OC_UMV_PADDING*yhstride; - coffset=(OC_UMV_PADDING>>!(info->pixel_fmt&1))+ - (OC_UMV_PADDING>>!(info->pixel_fmt&2))*chstride; - _state->ref_frame_data=ref_frame_data=_ogg_malloc(3*(yplane_sz+2*cplane_sz)); + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>hdec; + cheight=yheight>>vdec; + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; + coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; + ref_frame_sz=yplane_sz+2*cplane_sz; + ref_frame_data_sz=_nrefs*ref_frame_sz; + /*Check for overflow. + The same caveats apply as for oc_state_frarray_init().*/ + if(yplane_sz/yhstride!=yheight||2*cplane_szfrag_buf_offs= + _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); + if(ref_frame_data==NULL||frag_buf_offs==NULL){ + _ogg_free(frag_buf_offs); + _ogg_free(ref_frame_data); + return TH_EFAULT; + } /*Set up the width, height and stride for the image buffers.*/ _state->ref_frame_bufs[0][0].width=info->frame_width; _state->ref_frame_bufs[0][0].height=info->frame_height; _state->ref_frame_bufs[0][0].stride=yhstride; _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= - info->frame_width>>!(info->pixel_fmt&1); + info->frame_width>>hdec; _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= - info->frame_height>>!(info->pixel_fmt&2); + info->frame_height>>vdec; _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= chstride; - memcpy(_state->ref_frame_bufs[1],_state->ref_frame_bufs[0], - sizeof(_state->ref_frame_bufs[0])); - memcpy(_state->ref_frame_bufs[2],_state->ref_frame_bufs[0], - sizeof(_state->ref_frame_bufs[0])); + for(rfi=1;rfi<_nrefs;rfi++){ + memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], + sizeof(_state->ref_frame_bufs[0])); + } /*Set up the data pointers for the image buffers.*/ - for(rfi=0;rfi<3;rfi++){ + for(rfi=0;rfi<_nrefs;rfi++){ + _state->ref_frame_data[rfi]=ref_frame_data; _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; ref_frame_data+=yplane_sz; _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; ref_frame_data+=cplane_sz; _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; ref_frame_data+=cplane_sz; - /*Flip the buffer upside down.*/ + /*Flip the buffer upside down. + This allows us to decode Theora's bottom-up frames in their natural + order, yet return a top-down buffer with a positive stride to the user.*/ oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], _state->ref_frame_bufs[rfi]); - /*Initialize the fragment pointers into this buffer.*/ - oc_state_fill_buffer_ptrs(_state,rfi,_state->ref_frame_bufs[rfi]); } - /*Initialize the reference frame indexes.*/ + _state->ref_ystride[0]=-yhstride; + _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; + /*Initialize the fragment buffer offsets.*/ + ref_frame_data=_state->ref_frame_data[0]; + fragi=0; + for(pli=0;pli<3;pli++){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + unsigned char *vpix; + ptrdiff_t stride; + ptrdiff_t vfragi_end; + int nhfrags; + iplane=_state->ref_frame_bufs[0]+pli; + fplane=_state->fplanes+pli; + vpix=iplane->data; + vfragi_end=fplane->froffset+fplane->nfrags; + nhfrags=fplane->nhfrags; + stride=iplane->stride; + while(fragiref_frame_idx[OC_FRAME_GOLD]= _state->ref_frame_idx[OC_FRAME_PREV]= _state->ref_frame_idx[OC_FRAME_SELF]=-1; + _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1; + return 0; } static void oc_state_ref_bufs_clear(oc_theora_state *_state){ - _ogg_free(_state->ref_frame_data); + _ogg_free(_state->frag_buf_offs); + _ogg_free(_state->ref_frame_data[0]); } void oc_state_vtable_init_c(oc_theora_state *_state){ + _state->opt_vtable.frag_copy=oc_frag_copy_c; _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; - _state->opt_vtable.state_frag_copy=oc_state_frag_copy_c; + _state->opt_vtable.idct8x8=oc_idct8x8_c; _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c; _state->opt_vtable.state_loop_filter_frag_rows= oc_state_loop_filter_frag_rows_c; _state->opt_vtable.restore_fpu=oc_restore_fpu_c; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; } /*Initialize the accelerated function pointers.*/ void oc_state_vtable_init(oc_theora_state *_state){ -#if defined(USE_ASM) +#if defined(OC_X86_ASM) oc_state_vtable_init_x86(_state); #else oc_state_vtable_init_c(_state); @@ -547,8 +614,8 @@ void oc_state_vtable_init(oc_theora_state *_state){ } -int oc_state_init(oc_theora_state *_state,const th_info *_info){ - int old_granpos; +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ + int ret; /*First validate the parameters.*/ if(_info==NULL)return TH_EFAULT; /*The width and height of the encoded frame must be multiples of 16. @@ -561,11 +628,16 @@ int oc_state_init(oc_theora_state *_state,const th_info *_info){ The displayable frame must fit inside the encoded frame. The color space must be one known by the encoder.*/ if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| - _info->frame_width>=0x100000||_info->frame_height>=0x100000|| + _info->frame_width<=0||_info->frame_width>=0x100000|| + _info->frame_height<=0||_info->frame_height>=0x100000|| _info->pic_x+_info->pic_width>_info->frame_width|| _info->pic_y+_info->pic_height>_info->frame_height|| - _info->pic_x>255|| - _info->frame_height-_info->pic_height-_info->pic_y>255|| + _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| + /*Note: the following <0 comparisons may generate spurious warnings on + platforms where enums are unsigned. + We could cast them to unsigned and just use the following >= comparison, + but there are a number of compilers which will mis-optimize this. + It's better to live with the spurious warnings.*/ _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ return TH_EINVAL; @@ -577,22 +649,24 @@ int oc_state_init(oc_theora_state *_state,const th_info *_info){ _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; _state->frame_type=OC_UNKWN_FRAME; oc_state_vtable_init(_state); - oc_state_frarray_init(_state); - oc_state_ref_bufs_init(_state); + ret=oc_state_frarray_init(_state); + if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); + if(ret<0){ + oc_state_frarray_clear(_state); + return ret; + } /*If the keyframe_granule_shift is out of range, use the maximum allowable value.*/ if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ _state->info.keyframe_granule_shift=31; } - _state->keyframe_num=1; - _state->curframe_num=0; + _state->keyframe_num=0; + _state->curframe_num=-1; /*3.2.0 streams mark the frame index instead of the frame count. This was changed with stream version 3.2.1 to conform to other Ogg codecs. - We subtract an extra one from the frame number for old streams.*/ - old_granpos=!TH_VERSION_CHECK(_info,3,2,1); - _state->curframe_num-=old_granpos; - _state->keyframe_num-=old_granpos; + We add an extra bias when computing granule positions for new streams.*/ + _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); return 0; } @@ -612,22 +686,24 @@ void oc_state_clear(oc_theora_state *_state){ _yend: The Y coordinate of the row to stop padding at.*/ void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, int _y0,int _yend){ - th_img_plane *iplane; - unsigned char *apix; - unsigned char *bpix; - unsigned char *epix; - int hpadding; + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); iplane=_state->ref_frame_bufs[_refi]+_pli; - apix=iplane->data+_y0*iplane->stride; + stride=iplane->stride; + apix=iplane->data+_y0*(ptrdiff_t)stride; bpix=apix+iplane->width-1; - epix=iplane->data+_yend*iplane->stride; - /*Note the use of != instead of <, which allows ystride to be negative.*/ + epix=iplane->data+_yend*(ptrdiff_t)stride; + /*Note the use of != instead of <, which allows the stride to be negative.*/ while(apix!=epix){ memset(apix-hpadding,apix[0],hpadding); memset(bpix+1,bpix[0],hpadding); - apix+=iplane->stride; - bpix+=iplane->stride; + apix+=stride; + bpix+=stride; } } @@ -638,25 +714,27 @@ void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, _refi: The index of the reference buffer to pad. _pli: The color plane.*/ void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ - th_img_plane *iplane; - unsigned char *apix; - unsigned char *bpix; - unsigned char *epix; - int hpadding; - int vpadding; - int fullw; + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; + int vpadding; + int fullw; hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); iplane=_state->ref_frame_bufs[_refi]+_pli; + stride=iplane->stride; fullw=iplane->width+(hpadding<<1); apix=iplane->data-hpadding; - bpix=iplane->data+(iplane->height-1)*iplane->stride-hpadding; - epix=apix-iplane->stride*vpadding; + bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; + epix=apix-stride*(ptrdiff_t)vpadding; while(apix!=epix){ - memcpy(apix-iplane->stride,apix,fullw); - memcpy(bpix+iplane->stride,bpix,fullw); - apix-=iplane->stride; - bpix+=iplane->stride; + memcpy(apix-stride,apix,fullw); + memcpy(bpix+stride,bpix,fullw); + apix-=stride; + bpix+=stride; } } @@ -673,73 +751,18 @@ void oc_state_borders_fill(oc_theora_state *_state,int _refi){ } } -/*Sets the buffer pointer in each fragment to point to the portion of the - image buffer which it corresponds to. - _state: The Theora state to fill. - _buf_idx: The index of the buffer pointer to fill. - The first three correspond to our reconstructed frame buffers, - while the last corresponds to the input image. - _img: The image buffer to fill the fragments with.*/ -void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, - th_ycbcr_buffer _img){ - int pli; - /*Special handling for the input image to give us the opportunity to skip - some updates. - The other buffers do not change throughout the encoding process.*/ - if(_buf_idx==OC_FRAME_IO){ - if(memcmp(_state->input,_img,sizeof(th_ycbcr_buffer))==0)return; - memcpy(_state->input,_img,sizeof(th_ycbcr_buffer)); - } - for(pli=0;pli<3;pli++){ - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag; - oc_fragment *vfrag_end; - unsigned char *vpix; - iplane=&_img[pli]; - fplane=&_state->fplanes[pli]; - vpix=iplane->data; - frag=_state->frags+fplane->froffset; - vfrag_end=frag+fplane->nfrags; - while(fragnhfrags;fragbuffer[_buf_idx]=hpix; - hpix+=8; - } - vpix+=iplane->stride<<3; - } - } -} - -/*Returns the macro block index of the macro block in the given position. - _state: The Theora state the macro block is contained in. - _mbx: The X coordinate of the macro block (in macro blocks, not pixels). - _mby: The Y coordinate of the macro block (in macro blocks, not pixels). - Return: The index of the macro block in the given position.*/ -int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby){ - return ((_mbx&~1)<<1)+(_mby&~1)*_state->nhmbs+OC_MB_MAP[_mby&1][_mbx&1]; -} - /*Determines the offsets in an image buffer to use for motion compensation. _state: The Theora state the offsets are to be computed with. _offsets: Returns the offset for the buffer(s). _offsets[0] is always set. _offsets[1] is set if the motion vector has non-zero fractional components. + _pli: The color plane index. _dx: The X component of the motion vector. _dy: The Y component of the motion vector. - _ystride: The Y stride in the buffer the motion vector points into. - _pli: The color plane index. Return: The number of offsets returned: 1 or 2.*/ -int oc_state_get_mv_offsets(oc_theora_state *_state,int _offsets[2], - int _dx,int _dy,int _ystride,int _pli){ - int xprec; - int yprec; - int xfrac; - int yfrac; +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy){ /*Here is a brief description of how Theora handles motion vectors: Motion vector components are specified to half-pixel accuracy in undecimated directions of each plane, and quarter-pixel accuracy in @@ -754,131 +777,142 @@ int oc_state_get_mv_offsets(oc_theora_state *_state,int _offsets[2], non-zero fractional parts. The second offset is computed by dividing (not shifting) by the appropriate amount, always truncating _away_ from zero.*/ +#if 0 + /*This version of the code doesn't use any tables, but is slower.*/ + int ystride; + int xprec; + int yprec; + int xfrac; + int yfrac; + int offs; + ystride=_state->ref_ystride[_pli]; /*These two variables decide whether we are in half- or quarter-pixel precision in each component.*/ - xprec=1+(!(_state->info.pixel_fmt&1)&&_pli); - yprec=1+(!(_state->info.pixel_fmt&2)&&_pli); - /*These two variables are either 0 if all the fractional bits are 0 or 1 if - any of them are non-zero.*/ - xfrac=!!(_dx&(1<>xprec)+(_dy>>yprec)*_ystride; + xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); + yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); + /*These two variables are either 0 if all the fractional bits are zero or -1 + if any of them are non-zero.*/ + xfrac=OC_SIGNMASK(-(_dx&(xprec|1))); + yfrac=OC_SIGNMASK(-(_dy&(yprec|1))); + offs=(_dx>>xprec)+(_dy>>yprec)*ystride; if(xfrac||yfrac){ - /*This branchless code is equivalent to: - if(_dx<0)_offests[0]=-(-_dx>>xprec); - else _offsets[0]=(_dx>>xprec); - if(_dy<0)_offsets[0]-=(-_dy>>yprec)*_ystride; - else _offsets[0]+=(_dy>>yprec)*_ystride; - _offsets[1]=_offsets[0]; - if(xfrac){ - if(_dx<0)_offsets[1]++; - else _offsets[1]--; - } - if(yfrac){ - if(_dy<0)_offsets[1]+=_ystride; - else _offsets[1]-=_ystride; - }*/ - _offsets[1]=_offsets[0]; - _offsets[_dx>=0]+=xfrac; - _offsets[_dy>=0]+=_ystride&-yfrac; + int xmask; + int ymask; + xmask=OC_SIGNMASK(_dx); + ymask=OC_SIGNMASK(_dy); + yfrac&=ystride; + _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); + _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); return 2; } - else return 1; + else{ + _offsets[0]=offs; + return 1; + } +#else + /*Using tables simplifies the code, and there's enough arithmetic to hide the + latencies of the memory references.*/ + static const signed char OC_MVMAP[2][64]={ + { + -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, + -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 + }, + { + -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, + -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 + } + }; + static const signed char OC_MVMAP2[2][64]={ + { + -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + }, + { + -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 + } + }; + int ystride; + int qpx; + int qpy; + int mx; + int my; + int mx2; + int my2; + int offs; + ystride=_state->ref_ystride[_pli]; + qpy=_pli!=0&&!(_state->info.pixel_fmt&2); + my=OC_MVMAP[qpy][_dy+31]; + my2=OC_MVMAP2[qpy][_dy+31]; + qpx=_pli!=0&&!(_state->info.pixel_fmt&1); + mx=OC_MVMAP[qpx][_dx+31]; + mx2=OC_MVMAP2[qpx][_dx+31]; + offs=my*ystride+mx; + if(mx2||my2){ + _offsets[1]=offs+my2*ystride+mx2; + _offsets[0]=offs; + return 2; + } + _offsets[0]=offs; + return 1; +#endif } -void oc_state_frag_recon(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){ - _state->opt_vtable.state_frag_recon(_state,_frag,_pli,_dct_coeffs, - _last_zzi,_ncoefs,_dc_iquant,_ac_iquant); +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs, + _last_zzi,_dc_quant); } -void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag, - int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, - ogg_uint16_t _dc_iquant, const ogg_uint16_t _ac_iquant[64]){ - ogg_int16_t dct_buf[64]; - ogg_int16_t res_buf[64]; - int dst_framei; - int dst_ystride; - int zzi; - int ci; - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - dequantize fewer coefficients and use a smaller transform when the block - ends with a long zero run instead of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ /*Special case only having a DC component.*/ if(_last_zzi<2){ ogg_int16_t p; - /*Why is the iquant product rounded in this case and no others? - Who knows.*/ - p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5); + int ci; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); /*LOOP VECTORIZES.*/ - for(ci=0;ci<64;ci++)res_buf[ci]=p; + for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p; } else{ - /*First, dequantize the coefficients.*/ - dct_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant); - for(zzi=1;zzi<_ncoefs;zzi++){ - int ci; - ci=OC_FZIG_ZAG[zzi]; - dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]); - } - /*Then, fill in the remainder of the coefficients with 0's, and perform - the iDCT.*/ - if(_last_zzi<10){ - for(;zzi<10;zzi++)dct_buf[OC_FZIG_ZAG[zzi]]=0; - oc_idct8x8_10_c(res_buf,dct_buf); - } - else{ - for(;zzi<64;zzi++)dct_buf[OC_FZIG_ZAG[zzi]]=0; - oc_idct8x8_c(res_buf,dct_buf); - } + /*First, dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8(_state,_dct_coeffs,_last_zzi); } /*Fill in the target buffer.*/ - dst_framei=_state->ref_frame_idx[OC_FRAME_SELF]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - /*For now ystride values in all ref frames assumed to be equal.*/ - if(_frag->mbmode==OC_MODE_INTRA){ - oc_frag_recon_intra(_state,_frag->buffer[dst_framei],dst_ystride,res_buf); - } + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs); else{ - int ref_framei; - int ref_ystride; - int mvoffsets[2]; - ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]]; - ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride; - if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],_frag->mv[1], - ref_ystride,_pli)>1){ - oc_frag_recon_inter2(_state,_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride, - _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf); - } - else{ - oc_frag_recon_inter(_state,_frag->buffer[dst_framei],dst_ystride, - _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf); + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2(_state, + dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs); } + else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs); } - oc_restore_fpu(_state); } /*Copies the fragments specified by the lists of fragment indices from one @@ -888,38 +922,30 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag, _dst_frame: The reference frame to copy to. _src_frame: The reference frame to copy from. _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli){ - _state->opt_vtable.state_frag_copy(_state,_fragis,_nfragis,_dst_frame, +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame, _src_frame,_pli); } -void oc_state_frag_copy_c(const oc_theora_state *_state,const int *_fragis, - int _nfragis,int _dst_frame,int _src_frame,int _pli){ - const int *fragi; - const int *fragi_end; - int dst_framei; - int dst_ystride; - int src_framei; - int src_ystride; - dst_framei=_state->ref_frame_idx[_dst_frame]; - src_framei=_state->ref_frame_idx[_src_frame]; - dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; - src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride; - fragi_end=_fragis+_nfragis; - for(fragi=_fragis;fragifrags+*fragi; - dst=frag->buffer[dst_framei]; - src=frag->buffer[src_framei]; - for(j=0;j<8;j++){ - memcpy(dst,src,sizeof(dst[0])*8); - dst+=dst_ystride; - src+=src_ystride; - } +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + oc_frag_copy(_state,dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); } } @@ -940,25 +966,24 @@ static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){ } static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){ - int y; + int x; _pix-=_ystride*2; - for(y=0;y<8;y++){ + for(x=0;x<8;x++){ int f; - f=_pix[0]-_pix[_ystride*3]+3*(_pix[_ystride*2]-_pix[_ystride]); + f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); /*The _bv array is used to compute the function f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ f=*(_bv+(f+4>>3)); - _pix[_ystride]=OC_CLAMP255(_pix[_ystride]+f); - _pix[_ystride*2]=OC_CLAMP255(_pix[_ystride*2]-f); - _pix++; + _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); + _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); } } /*Initialize the bounding values array used by the loop filter. _bv: Storage for the array. Return: 0 on success, or a non-zero value if no filtering need be applied.*/ -int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv){ +int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){ int flimit; int i; flimit=_state->loop_filter_limits[_state->qis[0]]; @@ -981,56 +1006,61 @@ int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv){ _pli: The color plane to filter. _fragy0: The Y coordinate of the first fragment row to filter. _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv, +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256], int _refi,int _pli,int _fragy0,int _fragy_end){ _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli, _fragy0,_fragy_end); } -void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv, +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv, int _refi,int _pli,int _fragy0,int _fragy_end){ - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag_top; - oc_fragment *frag0; - oc_fragment *frag; - oc_fragment *frag_end; - oc_fragment *frag0_end; - oc_fragment *frag_bot; + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; _bv+=127; - iplane=_state->ref_frame_bufs[_refi]+_pli; fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; /*The following loops are constructed somewhat non-intuitively on purpose. The main idea is: if a block boundary has at least one coded fragment on it, the filter is applied to it. However, the order that the filters are applied in matters, and VP3 chose the somewhat strange ordering used below.*/ - frag_top=_state->frags+fplane->froffset; - frag0=frag_top+_fragy0*fplane->nhfrags; - frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags; - frag_bot=_state->frags+fplane->froffset+fplane->nfrags; - while(frag0nhfrags; - while(fragcoded){ - if(frag>frag0){ - loop_filter_h(frag->buffer[_refi],iplane->stride,_bv); + while(fragi0fragi0)loop_filter_h(ref,ystride,_bv); + if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); + if(fragi+1frag_top){ - loop_filter_v(frag->buffer[_refi],iplane->stride,_bv); - } - if(frag+1coded){ - loop_filter_h(frag->buffer[_refi]+8,iplane->stride,_bv); - } - if(frag+fplane->nhfragsnhfrags)->coded){ - loop_filter_v((frag+fplane->nhfrags)->buffer[_refi], - iplane->stride,_bv); + if(fragi+nhfragsnhfrags; + fragi0+=nhfrags; } } @@ -1066,7 +1096,11 @@ int oc_state_dump_frame(const oc_theora_state *_state,int _frame, sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); fp=fopen(fname,"wb"); if(fp==NULL)return TH_EFAULT; - image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(image[0][0])); + image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); + if(image==NULL){ + fclose(fp); + return TH_EFAULT; + } png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); if(png==NULL){ oc_free_2d(image); @@ -1149,6 +1183,7 @@ int oc_state_dump_frame(const oc_theora_state *_state,int _frame, png_set_cHRM_fixed(png,info,31271,32902, 64000,33000,29000,60000,15000,6000); }break; + default:break; } png_set_pHYs(png,info,_state->info.aspect_numerator, _state->info.aspect_denominator,0); diff --git a/Engine/lib/libtheora/lib/tokenize.c b/Engine/lib/libtheora/lib/tokenize.c new file mode 100644 index 000000000..60574c359 --- /dev/null +++ b/Engine/lib/libtheora/lib/tokenize.c @@ -0,0 +1,1072 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + + +static int oc_make_eob_token(int _run_count){ + if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1; + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +static int oc_make_eob_token_full(int _run_count,int *_eb){ + if(_run_count<4){ + *_eb=0; + return OC_DCT_EOB1_TOKEN+_run_count-1; + } + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +/*Returns the number of blocks ended by an EOB token.*/ +static int oc_decode_eob_token(int _token,int _eb){ + return (0x20820C41U>>_token*5&0x1F)+_eb; +} + +/*TODO: This is now only used during DCT tokenization, and never for runs; it + should be simplified.*/ +static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){ + int neg; + int zero_run; + int token; + int eb; + neg=_val<0; + _val=abs(_val); + zero_run=_zzj-_zzi; + if(zero_run>0){ + int adj; + /*Implement a minor restriction on stack 1 so that we know during DC fixups + that extending a dctrun token from stack 1 will never overflow.*/ + adj=_zzi!=1; + if(_val<2&&zero_run<17+adj){ + if(zero_run<6){ + token=OC_DCT_RUN_CAT1A+zero_run-1; + eb=neg; + } + else if(zero_run<10){ + token=OC_DCT_RUN_CAT1B; + eb=zero_run-6+(neg<<2); + } + else{ + token=OC_DCT_RUN_CAT1C; + eb=zero_run-10+(neg<<3); + } + } + else if(_val<4&&zero_run<3+adj){ + if(zero_run<2){ + token=OC_DCT_RUN_CAT2A; + eb=_val-2+(neg<<1); + } + else{ + token=OC_DCT_RUN_CAT2B; + eb=zero_run-2+(_val-2<<1)+(neg<<2); + } + } + else{ + if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN; + else token=OC_DCT_ZRL_TOKEN; + eb=zero_run-1; + } + } + else if(_val<3){ + token=OC_ONE_TOKEN+(_val-1<<1)+neg; + eb=0; + } + else if(_val<7){ + token=OC_DCT_VAL_CAT2+_val-3; + eb=neg; + } + else if(_val<9){ + token=OC_DCT_VAL_CAT3; + eb=_val-7+(neg<<1); + } + else if(_val<13){ + token=OC_DCT_VAL_CAT4; + eb=_val-9+(neg<<2); + } + else if(_val<21){ + token=OC_DCT_VAL_CAT5; + eb=_val-13+(neg<<3); + } + else if(_val<37){ + token=OC_DCT_VAL_CAT6; + eb=_val-21+(neg<<4); + } + else if(_val<69){ + token=OC_DCT_VAL_CAT7; + eb=_val-37+(neg<<5); + } + else{ + token=OC_DCT_VAL_CAT8; + eb=_val-69+(neg<<9); + } + *_eb=eb; + return token; +} + +/*Token logging to allow a few fragments of efficient rollback. + Late SKIP analysis is tied up in the tokenization process, so we need to be + able to undo a fragment's tokens on a whim.*/ + +static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ + 0,16,16,16,16,16,32,32, + 32,32,32,32,32,32,32,48, + 48,48,48,48,48,48,48,48, + 48,48,48,48,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 +}; + +static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){ + return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits + +OC_DCT_TOKEN_EXTRA_BITS[_token]; +} + +static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc, + oc_token_checkpoint *_cp,int _pli,int _zzi){ + _cp->pli=_pli; + _cp->zzi=_zzi; + _cp->eob_run=_enc->eob_run[_pli][_zzi]; + _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi]; +} + +void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, + const oc_token_checkpoint *_stack,int _n){ + int i; + for(i=_n;i-->0;){ + int pli; + int zzi; + pli=_stack[i].pli; + zzi=_stack[i].zzi; + _enc->eob_run[pli][zzi]=_stack[i].eob_run; + _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens; + } +} + +static void oc_enc_token_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _token,int _eb){ + ptrdiff_t ti; + ti=_enc->ndct_tokens[_pli][_zzi]++; + _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token; + _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb; +} + +static void oc_enc_eob_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _run_count){ + int token; + int eb; + token=oc_make_eob_token_full(_run_count,&eb); + oc_enc_token_log(_enc,_pli,_zzi,token,eb); +} + + +void oc_enc_tokenize_start(oc_enc_ctx *_enc){ + memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens)); + memset(_enc->eob_run,0,sizeof(_enc->eob_run)); + memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs)); + memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last)); +} + +typedef struct oc_quant_token oc_quant_token; + +/*A single node in the Viterbi trellis. + We maintain up to 2 of these per coefficient: + - A token to code if the value is zero (EOB, zero run, or combo token). + - A token to code if the value is not zero (DCT value token).*/ +struct oc_quant_token{ + unsigned char next; + signed char token; + ogg_int16_t eb; + ogg_uint32_t cost; + int bits; + int qc; +}; + +/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then + dequantizes and de-zig-zags the result. + The DC coefficient is not preserved; it should be restored by the caller.*/ +int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, + ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, + int _zzi,oc_token_checkpoint **_stack,int _acmin){ + oc_token_checkpoint *stack; + ogg_int64_t zflags; + ogg_int64_t nzflags; + ogg_int64_t best_flags; + ogg_uint32_t d2_accum[64]; + oc_quant_token tokens[64][2]; + ogg_uint16_t *eob_run; + const unsigned char *dct_fzig_zag; + ogg_uint32_t cost; + int bits; + int eob; + int token; + int eb; + int next; + int huffi; + int zzi; + int ti; + int zzj; + int qc; + huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1]; + eob_run=_enc->eob_run[_pli]; + memset(tokens[0],0,sizeof(tokens[0])); + best_flags=nzflags=0; + zflags=1; + d2_accum[0]=0; + zzj=64; + for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){ + ogg_int32_t lambda; + ogg_uint32_t best_cost; + int best_bits=best_bits; + int best_next=best_next; + int best_token=best_token; + int best_eb=best_eb; + int best_qc=best_qc; + int flush_bits; + ogg_uint32_t d2; + int dq; + int e; + int c; + int s; + int tj; + lambda=_enc->lambda; + qc=_qdct[zzi]; + s=-(qc<0); + qc=qc+s^s; + c=_dct[OC_FZIG_ZAG[zzi]]; + if(qc<=1){ + ogg_uint32_t sum_d2; + int nzeros; + int dc_reserve; + /*The hard case: try a zero run.*/ + if(!qc){ + /*Skip runs that are already quantized to zeros. + If we considered each zero coefficient in turn, we might + theoretically find a better way to partition long zero runs (e.g., + a run of > 17 zeros followed by a 1 might be better coded as a short + zero run followed by a combo token, rather than the longer zero + token followed by a 1 value token), but zeros are so common that + this becomes very computationally expensive (quadratic instead of + linear in the number of coefficients), for a marginal gain.*/ + while(zzi>1&&!_qdct[zzi-1])zzi--; + /*The distortion of coefficients originally quantized to zero is + treated as zero (since we'll never quantize them to anything else).*/ + d2=0; + } + else{ + c=c+s^s; + d2=c*(ogg_int32_t)c; + } + eob=eob_run[zzi]; + nzeros=zzj-zzi; + zzj&=63; + sum_d2=d2+d2_accum[zzj]; + d2_accum[zzi]=sum_d2; + flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + /*We reserve 1 spot for combo run tokens that start in the 1st AC stack + to ensure they can be extended to include the DC coefficient if + necessary; this greatly simplifies stack-rewriting later on.*/ + dc_reserve=zzi+62>>6; + best_cost=0xFFFFFFFF; + for(;;){ + if(nzflags>>zzj&1){ + int cat; + int val; + int val_s; + int zzk; + int tk; + next=tokens[zzj][1].next; + tk=next&1; + zzk=next>>1; + /*Try a pure zero run to this point.*/ + cat=nzeros+55>>6; + token=OC_DCT_SHORT_ZRL_TOKEN+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + d2=sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzj][1].cost; + if(cost<=best_cost){ + best_next=(zzj<<1)+1; + best_token=token; + best_eb=nzeros-1; + best_cost=cost; + best_bits=bits+tokens[zzj][1].bits; + best_qc=0; + } + if(nzeros<16+dc_reserve){ + val=_qdct[zzj]; + val_s=-(val<0); + val=val+val_s^val_s; + if(val<=2){ + /*Try a +/- 1 combo token.*/ + if(nzeros<6){ + token=OC_DCT_RUN_CAT1A+nzeros-1; + eb=-val_s; + } + else{ + cat=nzeros+54>>6; + token=OC_DCT_RUN_CAT1B+cat; + eb=(-val_s<>1; + token=OC_DCT_RUN_CAT2A+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + val=2+((val+val_s^val_s)>2); + e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val; + d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzk][tk].cost; + if(cost<=best_cost){ + best_cost=cost; + best_bits=bits+tokens[zzk][tk].bits; + best_next=next; + best_token=token; + best_eb=(-val_s<<1+cat)+(val-2<>1); + best_qc=val+val_s^val_s; + } + } + } + /*zzj can't be coded as a zero, so stop trying to extend the run.*/ + if(!(zflags>>zzj&1))break; + } + /*We could try to consider _all_ potentially non-zero coefficients, but + if we already found a bunch of them not worth coding, it's fairly + unlikely they would now be worth coding from this position; skipping + them saves a lot of work.*/ + zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63; + if(zzj==0){ + /*We made it all the way to the end of the block; try an EOB token.*/ + if(eob<4095){ + bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1)) + -flush_bits; + } + else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN); + cost=sum_d2+bits*lambda; + /*If the best route so far is still a pure zero run to the end of the + block, force coding it as an EOB. + Even if it's not optimal for this block, it has a good chance of + getting combined with an EOB token from subsequent blocks, saving + bits overall.*/ + if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){ + best_next=0; + /*This token is just a marker; in reality we may not emit any + tokens, but update eob_run[] instead.*/ + best_token=OC_DCT_EOB1_TOKEN; + best_eb=0; + best_cost=cost; + best_bits=bits; + best_qc=0; + } + break; + } + nzeros=zzj-zzi; + } + tokens[zzi][0].next=(unsigned char)best_next; + tokens[zzi][0].token=(signed char)best_token; + tokens[zzi][0].eb=(ogg_int16_t)best_eb; + tokens[zzi][0].cost=best_cost; + tokens[zzi][0].bits=best_bits; + tokens[zzi][0].qc=best_qc; + zflags|=(ogg_int64_t)1<>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)token; + tokens[zzi][1].eb=0; + tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost; + tokens[zzi][1].bits=bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=1+s^s; + nzflags|=(ogg_int64_t)1<0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + if(qc<=2){ + e=2*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_TWO_TOKEN-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_ONE_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + best_eb=0; + } + else if(qc<=3){ + e=3*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_TWO_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=0; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=6){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2+qc-3; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=8){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT3; + best_eb=(-s<<1)+qc-7; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=6*dq-c; + d2=e*(ogg_int32_t)e; + token=OC_DCT_VAL_CAT2+3; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=-s; + best_bits=bits; + best_cost=cost; + qc=6; + } + } + else if(qc<=12){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT4; + best_eb=(-s<<2)+qc-9; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=8*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<1)+1; + best_bits=bits; + best_cost=cost; + qc=8; + } + } + else if(qc<=20){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT5; + best_eb=(-s<<3)+qc-13; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=12*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<2)+3; + best_bits=bits; + best_cost=cost; + qc=12; + } + } + else if(qc<=36){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT6; + best_eb=(-s<<4)+qc-21; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=20*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<3)+7; + best_bits=bits; + best_cost=cost; + qc=20; + } + } + else if(qc<=68){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT7; + best_eb=(-s<<5)+qc-37; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=36*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)best_token; + tokens[zzi][1].eb=best_eb; + tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost; + tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=qc+s^s; + nzflags|=(ogg_int64_t)1<state.opt_data.dct_fzig_zag; + zzi=1; + ti=best_flags>>1&1; + bits=tokens[zzi][ti].bits; + do{ + oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi); + eob=eob_run[zzi]; + if(tokens[zzi][ti].token=4095){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob=0; + } + eob_run[zzi]=eob; + /*We don't include the actual EOB cost for this block in the return value. + It will be paid for by the fragment that terminates the EOB run.*/ + bits-=tokens[zzi][ti].bits; + zzi=_zzi; + break; + } + /*Emit pending EOB run if any.*/ + if(eob>0){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob_run[zzi]=0; + } + oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb); + next=tokens[zzi][ti].next; + qc=tokens[zzi][ti].qc; + zzj=(next>>1)-1&63; + /*TODO: It may be worth saving the dequantized coefficient in the trellis + above; we had to compute it to measure the error anyway.*/ + _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]); + zzi=next>>1; + ti=next&1; + } + while(zzi); + *_stack=stack; + return bits; +} + +void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, + int _pli,int _fragy0,int _frag_yend){ + const oc_fragment_plane *fplane; + const oc_fragment *frags; + ogg_int16_t *frag_dc; + ptrdiff_t fragi; + int *pred_last; + int nhfrags; + int fragx; + int fragy; + fplane=_enc->state.fplanes+_pli; + frags=_enc->state.frags; + frag_dc=_enc->frag_dc; + pred_last=_enc->dc_pred_last[_pli]; + nhfrags=fplane->nhfrags; + fragi=fplane->froffset+_fragy0*nhfrags; + for(fragy=_fragy0;fragy<_frag_yend;fragy++){ + if(fragy==0){ + /*For the first row, all of the cases reduce to just using the previous + predictor for the same reference frame.*/ + for(fragx=0;fragx=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred); + pred_last[ref]=frags[fragi].dc; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } +} + +void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, + const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, + int _prev_ndct_tokens1,int _prev_eob_run1){ + const ogg_int16_t *frag_dc; + ptrdiff_t fragii; + unsigned char *dct_tokens0; + unsigned char *dct_tokens1; + ogg_uint16_t *extra_bits0; + ogg_uint16_t *extra_bits1; + ptrdiff_t ti0; + ptrdiff_t ti1r; + ptrdiff_t ti1w; + int eob_run0; + int eob_run1; + int neobs1; + int token; + int eb; + int token1=token1; + int eb1=eb1; + /*Return immediately if there are no coded fragments; otherwise we'd flush + any trailing EOB run into the AC 1 list and never read it back out.*/ + if(_ncoded_fragis<=0)return; + frag_dc=_enc->frag_dc; + dct_tokens0=_enc->dct_tokens[_pli][0]; + dct_tokens1=_enc->dct_tokens[_pli][1]; + extra_bits0=_enc->extra_bits[_pli][0]; + extra_bits1=_enc->extra_bits[_pli][1]; + ti0=_enc->ndct_tokens[_pli][0]; + ti1w=ti1r=_prev_ndct_tokens1; + eob_run0=_enc->eob_run[_pli][0]; + /*Flush any trailing EOB run for the 1st AC coefficient. + This is needed to allow us to track tokens to the end of the list.*/ + eob_run1=_enc->eob_run[_pli][1]; + if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1); + /*If there was an active EOB run at the start of the 1st AC stack, read it + in and decode it.*/ + if(_prev_eob_run1>0){ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + eob_run1=oc_decode_eob_token(token1,eb1); + /*Consume the portion of the run that came before these fragments.*/ + neobs1=eob_run1-_prev_eob_run1; + } + else eob_run1=neobs1=0; + for(fragii=0;fragii<_ncoded_fragis;fragii++){ + int val; + /*All tokens in the 1st AC coefficient stack are regenerated as the DC + coefficients are produced. + This can be done in-place; stack 1 cannot get larger.*/ + if(!neobs1){ + /*There's no active EOB run in stack 1; read the next token.*/ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + if(token10){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + token=oc_make_dct_token_full(0,0,val,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + } + else{ + /*Zero DC value; that means the entry in stack 1 might need to be coded + from stack 0. + This requires a stack 1 fixup.*/ + if(neobs1>0){ + /*We're in the middle of an active EOB run in stack 1. + Move it to stack 0.*/ + if(++eob_run0>=4095){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + eob_run1--; + } + else{ + /*No active EOB run in stack 1, so we can't extend one in stack 0. + Flush it if we've got it.*/ + if(eob_run0>0){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + /*Stack 1 token is one of: a pure zero run token, a single + coefficient token, or a zero run/coefficient combo token. + A zero run token is expanded and moved to token stack 0, and the + stack 1 entry dropped. + A single coefficient value may be transformed into combo token that + is moved to stack 0, or if it cannot be combined, it is left alone + and a single length-1 zero run is emitted in stack 0. + A combo token is extended and moved to stack 0. + During AC coding, we restrict the run lengths on combo tokens for + stack 1 to guarantee we can extend them.*/ + switch(token1){ + case OC_DCT_SHORT_ZRL_TOKEN:{ + if(eb1<7){ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + /*Fall through.*/ + } + case OC_DCT_ZRL_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_ONE_TOKEN: + case OC_MINUS_ONE_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_TWO_TOKEN: + case OC_MINUS_TWO_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_VAL_CAT2:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A: + case OC_DCT_RUN_CAT1A+1: + case OC_DCT_RUN_CAT1A+2: + case OC_DCT_RUN_CAT1A+3:{ + dct_tokens0[ti0]=(unsigned char)(token1+1); + extra_bits0[ti0]=(ogg_uint16_t)eb1; + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A+4:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1B:{ + if((eb1&3)<3){ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + eb1=((eb1&4)<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT1C:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1C; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT2A:{ + eb1=(eb1<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT2B:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + } + /*We can't merge tokens, write a short zero run and keep going.*/ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=0; + ti0++; + } + } + if(!neobs1){ + /*Flush any (inactive) EOB run.*/ + if(eob_run1>0){ + token=oc_make_eob_token_full(eob_run1,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1=0; + } + /*There's no active EOB run, so log the current token.*/ + dct_tokens1[ti1w]=(unsigned char)token1; + extra_bits1[ti1w]=(ogg_uint16_t)eb1; + ti1w++; + } + else{ + /*Otherwise consume one EOB from the current run.*/ + neobs1--; + /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/ + if(eob_run1-neobs1>=4095){ + token=oc_make_eob_token_full(4095,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1-=4095; + } + } + } + /*Save the current state.*/ + _enc->ndct_tokens[_pli][0]=ti0; + _enc->ndct_tokens[_pli][1]=ti1w; + _enc->eob_run[_pli][0]=eob_run0; + _enc->eob_run[_pli][1]=eob_run1; +} + +/*Final EOB run welding.*/ +void oc_enc_tokenize_finish(oc_enc_ctx *_enc){ + int pli; + int zzi; + /*Emit final EOB runs.*/ + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + int eob_run; + eob_run=_enc->eob_run[pli][zzi]; + if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run); + } + /*Merge the final EOB run of one token list with the start of the next, if + possible.*/ + for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){ + int old_tok1; + int old_tok2; + int old_eb1; + int old_eb2; + int new_tok; + int new_eb; + int zzj; + int plj; + ptrdiff_t ti=ti; + int run_count; + /*Make sure this coefficient has tokens at all.*/ + if(_enc->ndct_tokens[pli][zzi]<=0)continue; + /*Ensure the first token is an EOB run.*/ + old_tok2=_enc->dct_tokens[pli][zzi][0]; + if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Search for a previous coefficient that has any tokens at all.*/ + old_tok1=OC_NDCT_EOB_TOKEN_MAX; + for(zzj=zzi,plj=pli;zzj>=0;zzj--){ + while(plj-->0){ + ti=_enc->ndct_tokens[plj][zzj]-1; + if(ti>=_enc->dct_token_offs[plj][zzj]){ + old_tok1=_enc->dct_tokens[plj][zzj][ti]; + break; + } + } + if(plj>=0)break; + plj=3; + } + /*Ensure its last token was an EOB run.*/ + if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Pull off the associated extra bits, if any, and decode the runs.*/ + old_eb1=_enc->extra_bits[plj][zzj][ti]; + old_eb2=_enc->extra_bits[pli][zzi][0]; + run_count=oc_decode_eob_token(old_tok1,old_eb1) + +oc_decode_eob_token(old_tok2,old_eb2); + /*We can't possibly combine these into one run. + It might be possible to split them more optimally, but we'll just leave + them as-is.*/ + if(run_count>=4096)continue; + /*We CAN combine them into one run.*/ + new_tok=oc_make_eob_token_full(run_count,&new_eb); + _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok; + _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb; + _enc->dct_token_offs[pli][zzi]++; + } +} diff --git a/Engine/lib/libtheora/lib/x86/mmxencfrag.c b/Engine/lib/libtheora/lib/x86/mmxencfrag.c new file mode 100644 index 000000000..c79ff01fc --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/mmxencfrag.c @@ -0,0 +1,900 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ + + ********************************************************************/ +#include +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + ptrdiff_t ystride3; + ptrdiff_t ret; + __asm__ __volatile__( + /*Load the first 4 rows of each block.*/ + "movq (%[src]),%%mm0\n\t" + "movq (%[ref]),%%mm1\n\t" + "movq (%[src],%[ystride]),%%mm2\n\t" + "movq (%[ref],%[ystride]),%%mm3\n\t" + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" + "movq (%[src],%[ystride],2),%%mm4\n\t" + "movq (%[ref],%[ystride],2),%%mm5\n\t" + "movq (%[src],%[ystride3]),%%mm6\n\t" + "movq (%[ref],%[ystride3]),%%mm7\n\t" + /*Compute their SADs and add them in %%mm0*/ + "psadbw %%mm1,%%mm0\n\t" + "psadbw %%mm3,%%mm2\n\t" + "lea (%[src],%[ystride],4),%[src]\n\t" + "paddw %%mm2,%%mm0\n\t" + "lea (%[ref],%[ystride],4),%[ref]\n\t" + /*Load the next 3 rows as registers become available.*/ + "movq (%[src]),%%mm2\n\t" + "movq (%[ref]),%%mm3\n\t" + "psadbw %%mm5,%%mm4\n\t" + "psadbw %%mm7,%%mm6\n\t" + "paddw %%mm4,%%mm0\n\t" + "movq (%[ref],%[ystride]),%%mm5\n\t" + "movq (%[src],%[ystride]),%%mm4\n\t" + "paddw %%mm6,%%mm0\n\t" + "movq (%[ref],%[ystride],2),%%mm7\n\t" + "movq (%[src],%[ystride],2),%%mm6\n\t" + /*Start adding their SADs to %%mm0*/ + "psadbw %%mm3,%%mm2\n\t" + "psadbw %%mm5,%%mm4\n\t" + "paddw %%mm2,%%mm0\n\t" + "psadbw %%mm7,%%mm6\n\t" + /*Load last row as registers become available.*/ + "movq (%[src],%[ystride3]),%%mm2\n\t" + "movq (%[ref],%[ystride3]),%%mm3\n\t" + /*And finish adding up their SADs.*/ + "paddw %%mm4,%%mm0\n\t" + "psadbw %%mm3,%%mm2\n\t" + "paddw %%mm6,%%mm0\n\t" + "paddw %%mm2,%%mm0\n\t" + "movd %%mm0,%[ret]\n\t" + :[ret]"=a"(ret),[src]"+%r"(_src),[ref]"+r"(_ref),[ystride3]"=&r"(ystride3) + :[ystride]"r"((ptrdiff_t)_ystride) + ); + return (unsigned)ret; +} + +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + /*Early termination is for suckers.*/ + return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); +} + +/*Assumes the first two rows of %[ref1] and %[ref2] are in %%mm0...%%mm3, the + first two rows of %[src] are in %%mm4,%%mm5, and {1}x8 is in %%mm7. + We pre-load the next two rows of data as registers become available.*/ +#define OC_SAD2_LOOP \ + "#OC_SAD2_LOOP\n\t" \ + /*We want to compute (%%mm0+%%mm1>>1) on unsigned bytes without overflow, but \ + pavgb computes (%%mm0+%%mm1+1>>1). \ + The latter is exactly 1 too large when the low bit of two corresponding \ + bytes is only set in one of them. \ + Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ + correct the output of pavgb.*/ \ + "movq %%mm0,%%mm6\n\t" \ + "lea (%[ref1],%[ystride],2),%[ref1]\n\t" \ + "pxor %%mm1,%%mm0\n\t" \ + "pavgb %%mm1,%%mm6\n\t" \ + "lea (%[ref2],%[ystride],2),%[ref2]\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pand %%mm7,%%mm0\n\t" \ + "pavgb %%mm3,%%mm2\n\t" \ + "pxor %%mm3,%%mm1\n\t" \ + "movq (%[ref2],%[ystride]),%%mm3\n\t" \ + "psubb %%mm0,%%mm6\n\t" \ + "movq (%[ref1]),%%mm0\n\t" \ + "pand %%mm7,%%mm1\n\t" \ + "psadbw %%mm6,%%mm4\n\t" \ + "movd %[ret],%%mm6\n\t" \ + "psubb %%mm1,%%mm2\n\t" \ + "movq (%[ref2]),%%mm1\n\t" \ + "lea (%[src],%[ystride],2),%[src]\n\t" \ + "psadbw %%mm2,%%mm5\n\t" \ + "movq (%[ref1],%[ystride]),%%mm2\n\t" \ + "paddw %%mm4,%%mm5\n\t" \ + "movq (%[src]),%%mm4\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "movq (%[src],%[ystride]),%%mm5\n\t" \ + "movd %%mm6,%[ret]\n\t" \ + +/*Same as above, but does not pre-load the next two rows.*/ +#define OC_SAD2_TAIL \ + "#OC_SAD2_TAIL\n\t" \ + "movq %%mm0,%%mm6\n\t" \ + "pavgb %%mm1,%%mm0\n\t" \ + "pxor %%mm1,%%mm6\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pand %%mm7,%%mm6\n\t" \ + "pavgb %%mm3,%%mm2\n\t" \ + "pxor %%mm3,%%mm1\n\t" \ + "psubb %%mm6,%%mm0\n\t" \ + "pand %%mm7,%%mm1\n\t" \ + "psadbw %%mm0,%%mm4\n\t" \ + "psubb %%mm1,%%mm2\n\t" \ + "movd %[ret],%%mm6\n\t" \ + "psadbw %%mm2,%%mm5\n\t" \ + "paddw %%mm4,%%mm5\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "movd %%mm6,%[ret]\n\t" \ + +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ptrdiff_t ret; + __asm__ __volatile__( + "movq (%[ref1]),%%mm0\n\t" + "movq (%[ref2]),%%mm1\n\t" + "movq (%[ref1],%[ystride]),%%mm2\n\t" + "movq (%[ref2],%[ystride]),%%mm3\n\t" + "xor %[ret],%[ret]\n\t" + "movq (%[src]),%%mm4\n\t" + "pxor %%mm7,%%mm7\n\t" + "pcmpeqb %%mm6,%%mm6\n\t" + "movq (%[src],%[ystride]),%%mm5\n\t" + "psubb %%mm6,%%mm7\n\t" + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_TAIL + :[ret]"=&a"(ret),[src]"+r"(_src),[ref1]"+%r"(_ref1),[ref2]"+r"(_ref2) + :[ystride]"r"((ptrdiff_t)_ystride) + ); + return (unsigned)ret; +} + +/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their + 16-bit difference in %%mm0...%%mm7.*/ +#define OC_LOAD_SUB_8x4(_off) \ + "#OC_LOAD_SUB_8x4\n\t" \ + "movd "_off"(%[src]),%%mm0\n\t" \ + "movd "_off"(%[ref]),%%mm4\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm1\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm5\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "movd "_off"(%[src]),%%mm2\n\t" \ + "movd "_off"(%[ref]),%%mm7\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm3\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm6\n\t" \ + "punpcklbw %%mm4,%%mm0\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "punpcklbw %%mm4,%%mm4\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "movd "_off"(%[src]),%%mm4\n\t" \ + "movq %%mm0,"_off"*2(%[buf])\n\t" \ + "movd "_off"(%[ref]),%%mm0\n\t" \ + "punpcklbw %%mm5,%%mm1\n\t" \ + "punpcklbw %%mm5,%%mm5\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm2\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psubw %%mm7,%%mm2\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm7\n\t" \ + "punpcklbw %%mm6,%%mm3\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "punpcklbw %%mm6,%%mm6\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "movd "_off"(%[src]),%%mm6\n\t" \ + "punpcklbw %%mm0,%%mm4\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "psubw %%mm0,%%mm4\n\t" \ + "movd "_off"(%[ref]),%%mm0\n\t" \ + "punpcklbw %%mm7,%%mm5\n\t" \ + "neg %[src_ystride]\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psubw %%mm7,%%mm5\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm7\n\t" \ + "punpcklbw %%mm0,%%mm6\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "neg %[ref_ystride]\n\t" \ + "psubw %%mm0,%%mm6\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm0\n\t" \ + "lea (%[src],%[src_ystride],8),%[src]\n\t" \ + "punpcklbw %%mm0,%%mm7\n\t" \ + "neg %[src_ystride]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "lea (%[ref],%[ref_ystride],8),%[ref]\n\t" \ + "psubw %%mm0,%%mm7\n\t" \ + "neg %[ref_ystride]\n\t" \ + "movq "_off"*2(%[buf]),%%mm0\n\t" \ + +/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ +#define OC_LOAD_8x4(_off) \ + "#OC_LOAD_8x4\n\t" \ + "movd "_off"(%[src]),%%mm0\n\t" \ + "movd "_off"(%[src],%[ystride]),%%mm1\n\t" \ + "movd "_off"(%[src],%[ystride],2),%%mm2\n\t" \ + "pxor %%mm7,%%mm7\n\t" \ + "movd "_off"(%[src],%[ystride3]),%%mm3\n\t" \ + "punpcklbw %%mm7,%%mm0\n\t" \ + "movd "_off"(%[src4]),%%mm4\n\t" \ + "punpcklbw %%mm7,%%mm1\n\t" \ + "movd "_off"(%[src4],%[ystride]),%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm2\n\t" \ + "movd "_off"(%[src4],%[ystride],2),%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm3\n\t" \ + "movd "_off"(%[src4],%[ystride3]),%%mm7\n\t" \ + "punpcklbw %%mm4,%%mm4\n\t" \ + "punpcklbw %%mm5,%%mm5\n\t" \ + "psrlw $8,%%mm4\n\t" \ + "psrlw $8,%%mm5\n\t" \ + "punpcklbw %%mm6,%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psrlw $8,%%mm6\n\t" \ + "psrlw $8,%%mm7\n\t" \ + +/*Performs the first two stages of an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to + perform this stage in place with no temporary registers).*/ +#define OC_HADAMARD_AB_8x4 \ + "#OC_HADAMARD_AB_8x4\n\t" \ + /*Stage A: \ + Outputs 0-3 are swapped with 4-7 here.*/ \ + "paddw %%mm1,%%mm5\n\t" \ + "paddw %%mm2,%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "psubw %%mm6,%%mm2\n\t" \ + "paddw %%mm3,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + "psubw %%mm7,%%mm3\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + /*Stage B:*/ \ + "paddw %%mm2,%%mm0\n\t" \ + "paddw %%mm3,%%mm1\n\t" \ + "paddw %%mm6,%%mm4\n\t" \ + "paddw %%mm7,%%mm5\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + "psubw %%mm0,%%mm2\n\t" \ + "psubw %%mm1,%%mm3\n\t" \ + "psubw %%mm4,%%mm6\n\t" \ + "psubw %%mm5,%%mm7\n\t" \ + +/*Performs the last stage of an 8-point 1-D Hadamard transform in place. + Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in + place with no temporary registers).*/ +#define OC_HADAMARD_C_8x4 \ + "#OC_HADAMARD_C_8x4\n\t" \ + /*Stage C:*/ \ + "paddw %%mm1,%%mm0\n\t" \ + "paddw %%mm3,%%mm2\n\t" \ + "paddw %%mm5,%%mm4\n\t" \ + "paddw %%mm7,%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + "psubw %%mm0,%%mm1\n\t" \ + "psubw %%mm2,%%mm3\n\t" \ + "psubw %%mm4,%%mm5\n\t" \ + "psubw %%mm6,%%mm7\n\t" \ + +/*Performs an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform + in place with no temporary registers).*/ +#define OC_HADAMARD_8x4 \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_8x4 \ + +/*Performs the first part of the final stage of the Hadamard transform and + summing of absolute values. + At the end of this part, %%mm1 will contain the DC coefficient of the + transform.*/ +#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + /*We use the fact that \ + (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ + to merge the final butterfly with the abs and the first stage of \ + accumulation. \ + Thus we can avoid using pabsw, which is not available until SSSE3. \ + Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ + implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ + registers). \ + Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ + This implementation is only 26 (+4 for spilling registers).*/ \ + "#OC_HADAMARD_C_ABS_ACCUM_A_8x4\n\t" \ + "movq %%mm7,"_r7"(%[buf])\n\t" \ + "movq %%mm6,"_r6"(%[buf])\n\t" \ + /*mm7={0x7FFF}x4 \ + mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ + "pcmpeqb %%mm7,%%mm7\n\t" \ + "movq %%mm0,%%mm6\n\t" \ + "psrlw $1,%%mm7\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "pmaxsw %%mm1,%%mm0\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "psubw %%mm6,%%mm0\n\t" \ + /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ + mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm4,%%mm1\n\t" \ + "pmaxsw %%mm3,%%mm2\n\t" \ + "pmaxsw %%mm5,%%mm4\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "movq "_r7"(%[buf]),%%mm3\n\t" \ + +/*Performs the second part of the final stage of the Hadamard transform and + summing of absolute values.*/ +#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ + "#OC_HADAMARD_C_ABS_ACCUM_B_8x4\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "movq "_r6"(%[buf]),%%mm5\n\t" \ + "paddsw %%mm7,%%mm1\n\t" \ + "psubw %%mm6,%%mm2\n\t" \ + "psubw %%mm1,%%mm4\n\t" \ + /*mm7={1}x4 (needed for the horizontal add that follows) \ + mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ + "movq %%mm3,%%mm6\n\t" \ + "pmaxsw %%mm5,%%mm3\n\t" \ + "paddw %%mm2,%%mm0\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "paddw %%mm4,%%mm0\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "psrlw $14,%%mm7\n\t" \ + "psubw %%mm6,%%mm0\n\t" \ + +/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the + absolute value of each component, and accumulates everything into mm0. + This is the only portion of SATD which requires MMXEXT (we could use plain + MMX, but it takes 4 instructions and an extra register to work around the + lack of a pmaxsw, which is a pretty serious penalty).*/ +#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ + +/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each + component, and accumulates everything into mm0. + Note that mm0 will have an extra 4 added to each column, and that after + removing this value, the remainder will be half the conventional value.*/ +#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) + +/*Performs two 4x4 transposes (mostly) in place. + On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} + contains rows {a,b,c,d}. + On output, {0x40,0x50,0x60,0x70}+_off(%[buf]) contains {e,f,g,h}^T, and + {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ +#define OC_TRANSPOSE_4x4x2(_off) \ + "#OC_TRANSPOSE_4x4x2\n\t" \ + /*First 4x4 transpose:*/ \ + "movq %%mm5,0x10+"_off"(%[buf])\n\t" \ + /*mm0 = e3 e2 e1 e0 \ + mm1 = f3 f2 f1 f0 \ + mm2 = g3 g2 g1 g0 \ + mm3 = h3 h2 h1 h0*/ \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm3,%%mm2\n\t" \ + "punpckhwd %%mm3,%%mm5\n\t" \ + "movq %%mm0,%%mm3\n\t" \ + "punpcklwd %%mm1,%%mm0\n\t" \ + "punpckhwd %%mm1,%%mm3\n\t" \ + /*mm0 = f1 e1 f0 e0 \ + mm3 = f3 e3 f2 e2 \ + mm2 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm2,%%mm0\n\t" \ + "punpckhdq %%mm2,%%mm1\n\t" \ + "movq %%mm3,%%mm2\n\t" \ + "punpckhdq %%mm5,%%mm3\n\t" \ + "movq %%mm0,0x40+"_off"(%[buf])\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + /*mm0 = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + "movq 0x10+"_off"(%[buf]),%%mm5\n\t" \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm5 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm7 = d3 d2 d1 d0*/ \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm7,%%mm6\n\t" \ + "movq %%mm1,0x50+"_off"(%[buf])\n\t" \ + "punpckhwd %%mm7,%%mm0\n\t" \ + "movq %%mm4,%%mm7\n\t" \ + "punpcklwd %%mm5,%%mm4\n\t" \ + "movq %%mm2,0x60+"_off"(%[buf])\n\t" \ + "punpckhwd %%mm5,%%mm7\n\t" \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "movq %%mm3,0x70+"_off"(%[buf])\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm7,%%mm6\n\t" \ + "punpckhdq %%mm0,%%mm7\n\t" \ + "punpckldq %%mm0,%%mm6\n\t" \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ + +static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, + int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret; + unsigned ret2; + bufp=buf; + __asm__ __volatile__( + OC_LOAD_SUB_8x4("0x00") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x00") + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + "movq %%mm4,0x00(%[buf])\n\t" + "movq %%mm5,0x10(%[buf])\n\t" + "movq %%mm6,0x20(%[buf])\n\t" + "movq %%mm7,0x30(%[buf])\n\t" + OC_LOAD_SUB_8x4("0x04") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x08") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + "movq 0x10(%[buf]),%%mm1\n\t" + "movq 0x20(%[buf]),%%mm2\n\t" + "movq 0x30(%[buf]),%%mm3\n\t" + "movq 0x00(%[buf]),%%mm0\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38") + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + "mov %[thresh],%[ret2]\n\t" + "pmaddwd %%mm7,%%mm0\n\t" + "movq 0x50(%[buf]),%%mm1\n\t" + "movq 0x58(%[buf]),%%mm5\n\t" + "movq %%mm0,%%mm4\n\t" + "movq 0x60(%[buf]),%%mm2\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "movq 0x68(%[buf]),%%mm6\n\t" + "paddd %%mm0,%%mm4\n\t" + "movq 0x70(%[buf]),%%mm3\n\t" + "movd %%mm4,%[ret]\n\t" + "movq 0x78(%[buf]),%%mm7\n\t" + /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 + added to them, and a factor of two removed; correct the final sum here.*/ + "lea -32(%[ret],%[ret]),%[ret]\n\t" + "movq 0x40(%[buf]),%%mm0\n\t" + "cmp %[ret2],%[ret]\n\t" + "movq 0x48(%[buf]),%%mm4\n\t" + "jae 1f\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") + "pmaddwd %%mm7,%%mm0\n\t" + /*There isn't much to stick in here to hide the latency this time, but the + alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose + latency is even worse.*/ + "sub $32,%[ret]\n\t" + "movq %%mm0,%%mm4\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "paddd %%mm0,%%mm4\n\t" + "movd %%mm4,%[ret2]\n\t" + "lea (%[ret],%[ret2],2),%[ret]\n\t" + ".p2align 4,,15\n\t" + "1:\n\t" + /*Although it looks like we're using 7 registers here, gcc can alias %[ret] + and %[ret2] with some of the inputs, since for once we don't write to + them until after we're done using everything but %[buf] (which is also + listed as an output to ensure gcc _doesn't_ alias them against it).*/ + /*Note that _src_ystride and _ref_ystride must be given non-overlapping + constraints, otherewise if gcc can prove they're equal it will allocate + them to the same register (which is bad); _src and _ref face a similar + problem, though those are never actually the same.*/ + :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) + :[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride), + [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride), + [thresh]"m"(_thresh) + /*We have to use neg, so we actually clobber the condition codes for once + (not to mention cmp, sub, and add).*/ + :"cc" + ); + return ret; +} + +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); +} + +/*Our internal implementation of frag_copy2 takes an extra stride parameter so + we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ +static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ + __asm__ __volatile__( + /*Load the first 3 rows.*/ + "movq (%[src1]),%%mm0\n\t" + "movq (%[src2]),%%mm1\n\t" + "movq (%[src1],%[src_ystride]),%%mm2\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + "movq (%[src2],%[src_ystride]),%%mm3\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + "pxor %%mm7,%%mm7\n\t" + "movq (%[src1]),%%mm4\n\t" + "pcmpeqb %%mm6,%%mm6\n\t" + "movq (%[src2]),%%mm5\n\t" + /*mm7={1}x8.*/ + "psubb %%mm6,%%mm7\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + "pxor %%mm1,%%mm0\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm1 is free, start averaging %%mm3 into %%mm2 using %%mm1.*/ + "movq %%mm2,%%mm1\n\t" + "pand %%mm7,%%mm0\n\t" + "pavgb %%mm3,%%mm2\n\t" + "pxor %%mm3,%%mm1\n\t" + /*%%mm3 is free.*/ + "psubb %%mm0,%%mm6\n\t" + /*%%mm0 is free, start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm0\n\t" + /*Start averaging %%mm5 and %%mm4 using %%mm3.*/ + "movq %%mm4,%%mm3\n\t" + /*%%mm6 (row 0) is done; write it out.*/ + "movq %%mm6,(%[dst])\n\t" + "pand %%mm7,%%mm1\n\t" + "pavgb %%mm5,%%mm4\n\t" + "psubb %%mm1,%%mm2\n\t" + /*%%mm1 is free, continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm1\n\t" + "pxor %%mm5,%%mm3\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + /*%%mm2 (row 1) is done; write it out.*/ + "movq %%mm2,(%[dst],%[dst_ystride])\n\t" + "pand %%mm7,%%mm3\n\t" + /*Start loading the next row.*/ + "movq (%[src1]),%%mm2\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm3,%%mm4\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + /*%%mm4 (row 2) is done; write it out.*/ + "movq %%mm4,(%[dst])\n\t" + /*Continue loading the next row.*/ + "movq (%[src2]),%%mm3\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + "pxor %%mm1,%%mm0\n\t" + /*Start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm4\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm1 is free; start averaging %%mm3 into %%mm2 using %%mm1.*/ + "movq %%mm2,%%mm1\n\t" + "pand %%mm7,%%mm0\n\t" + /*Continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm5\n\t" + "pavgb %%mm3,%%mm2\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + "pxor %%mm3,%%mm1\n\t" + /*%%mm3 is free.*/ + "psubb %%mm0,%%mm6\n\t" + /*%%mm0 is free, start loading the next row.*/ + "movq (%[src1]),%%mm0\n\t" + /*Start averaging %%mm5 into %%mm4 using %%mm3.*/ + "movq %%mm4,%%mm3\n\t" + /*%%mm6 (row 3) is done; write it out.*/ + "movq %%mm6,(%[dst],%[dst_ystride])\n\t" + "pand %%mm7,%%mm1\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + "pavgb %%mm5,%%mm4\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm1,%%mm2\n\t" + /*%%mm1 is free; continue loading the next row.*/ + "movq (%[src2]),%%mm1\n\t" + "pxor %%mm5,%%mm3\n\t" + /*%%mm2 (row 4) is done; write it out.*/ + "movq %%mm2,(%[dst])\n\t" + "pand %%mm7,%%mm3\n\t" + /*Start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm2\n\t" + "psubb %%mm3,%%mm4\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + /*Continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm3\n\t" + /*%%mm4 (row 5) is done; write it out.*/ + "movq %%mm4,(%[dst],%[dst_ystride])\n\t" + "pxor %%mm1,%%mm0\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm4 is free; start averaging %%mm3 into %%mm2 using %%mm4.*/ + "movq %%mm2,%%mm4\n\t" + "pand %%mm7,%%mm0\n\t" + "pavgb %%mm3,%%mm2\n\t" + "pxor %%mm3,%%mm4\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm0,%%mm6\n\t" + "pand %%mm7,%%mm4\n\t" + /*%%mm6 (row 6) is done, write it out.*/ + "movq %%mm6,(%[dst])\n\t" + "psubb %%mm4,%%mm2\n\t" + /*%%mm2 (row 7) is done, write it out.*/ + "movq %%mm2,(%[dst],%[dst_ystride])\n\t" + :[dst]"+r"(_dst),[src1]"+%r"(_src1),[src2]"+r"(_src2) + :[dst_ystride]"r"((ptrdiff_t)_dst_ystride), + [src_ystride]"r"((ptrdiff_t)_src_ystride) + :"memory" + ); +} + +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + OC_ALIGN8(unsigned char ref[64]); + oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); +} + +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, + int _ystride){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret; + unsigned ret2; + bufp=buf; + __asm__ __volatile__( + OC_LOAD_8x4("0x00") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x00") + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + "movq %%mm4,0x00(%[buf])\n\t" + "movq %%mm5,0x10(%[buf])\n\t" + "movq %%mm6,0x20(%[buf])\n\t" + "movq %%mm7,0x30(%[buf])\n\t" + OC_LOAD_8x4("0x04") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x08") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + "movq 0x10(%[buf]),%%mm1\n\t" + "movq 0x20(%[buf]),%%mm2\n\t" + "movq 0x30(%[buf]),%%mm3\n\t" + "movq 0x00(%[buf]),%%mm0\n\t" + /*We split out the stages here so we can save the DC coefficient in the + middle.*/ + OC_HADAMARD_AB_8x4 + OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38") + "movd %%mm1,%[ret]\n\t" + OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38") + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + "pmaddwd %%mm7,%%mm0\n\t" + "movq 0x50(%[buf]),%%mm1\n\t" + "movq 0x58(%[buf]),%%mm5\n\t" + "movq 0x60(%[buf]),%%mm2\n\t" + "movq %%mm0,%%mm4\n\t" + "movq 0x68(%[buf]),%%mm6\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "movq 0x70(%[buf]),%%mm3\n\t" + "paddd %%mm0,%%mm4\n\t" + "movq 0x78(%[buf]),%%mm7\n\t" + "movd %%mm4,%[ret2]\n\t" + "movq 0x40(%[buf]),%%mm0\n\t" + "movq 0x48(%[buf]),%%mm4\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") + "pmaddwd %%mm7,%%mm0\n\t" + /*We assume that the DC coefficient is always positive (which is true, + because the input to the INTRA transform was not a difference).*/ + "movzx %w[ret],%[ret]\n\t" + "add %[ret2],%[ret2]\n\t" + "sub %[ret],%[ret2]\n\t" + "movq %%mm0,%%mm4\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "paddd %%mm0,%%mm4\n\t" + "movd %%mm4,%[ret]\n\t" + "lea -64(%[ret2],%[ret],2),%[ret]\n\t" + /*Although it looks like we're using 7 registers here, gcc can alias %[ret] + and %[ret2] with some of the inputs, since for once we don't write to + them until after we're done using everything but %[buf] (which is also + listed as an output to ensure gcc _doesn't_ alias them against it).*/ + :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) + :[src]"r"(_src),[src4]"r"(_src+4*_ystride), + [ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride) + /*We have to use sub, so we actually clobber the condition codes for once + (not to mention add).*/ + :"cc" + ); + return ret; +} + +void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + int i; + __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*mm0=[src]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[ref]*/ + "movq (%[ref]),%%mm1\n\t" + /*mm4=[src+ystride]*/ + "movq (%[src],%[ystride]),%%mm4\n\t" + /*mm5=[ref+ystride]*/ + "movq (%[ref],%[ystride]),%%mm5\n\t" + /*Compute [src]-[ref].*/ + "movq %%mm0,%%mm2\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm3\n\t" + "punpckhbw %%mm7,%%mm2\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "punpckhbw %%mm7,%%mm3\n\t" + "psubw %%mm1,%%mm0\n\t" + "psubw %%mm3,%%mm2\n\t" + /*Compute [src+ystride]-[ref+ystride].*/ + "movq %%mm4,%%mm1\n\t" + "punpcklbw %%mm7,%%mm4\n\t" + "movq %%mm5,%%mm3\n\t" + "punpckhbw %%mm7,%%mm1\n\t" + "lea (%[src],%[ystride],2),%[src]\n\t" + "punpcklbw %%mm7,%%mm5\n\t" + "lea (%[ref],%[ystride],2),%[ref]\n\t" + "punpckhbw %%mm7,%%mm3\n\t" + "psubw %%mm5,%%mm4\n\t" + "psubw %%mm3,%%mm1\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x00(%[residue])\n\t" + "movq %%mm2,0x08(%[residue])\n\t" + "movq %%mm4,0x10(%[residue])\n\t" + "movq %%mm1,0x18(%[residue])\n\t" + "lea 0x20(%[residue]),%[residue]\n\t" + :[residue]"+r"(_residue),[src]"+r"(_src),[ref]"+r"(_ref) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,int _ystride){ + ptrdiff_t ystride3; + __asm__ __volatile__( + /*mm0=[src]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[src+ystride]*/ + "movq (%[src],%[ystride]),%%mm1\n\t" + /*mm6={-1}x4*/ + "pcmpeqw %%mm6,%%mm6\n\t" + /*mm2=[src+2*ystride]*/ + "movq (%[src],%[ystride],2),%%mm2\n\t" + /*[ystride3]=3*[ystride]*/ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" + /*mm6={1}x4*/ + "psllw $15,%%mm6\n\t" + /*mm3=[src+3*ystride]*/ + "movq (%[src],%[ystride3]),%%mm3\n\t" + /*mm6={128}x4*/ + "psrlw $8,%%mm6\n\t" + /*mm7=0*/ + "pxor %%mm7,%%mm7\n\t" + /*[src]=[src]+4*[ystride]*/ + "lea (%[src],%[ystride],4),%[src]\n\t" + /*Compute [src]-128 and [src+ystride]-128*/ + "movq %%mm0,%%mm4\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm0\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x00(%[residue])\n\t" + "movq %%mm4,0x08(%[residue])\n\t" + "movq %%mm1,0x10(%[residue])\n\t" + "movq %%mm5,0x18(%[residue])\n\t" + /*mm0=[src+4*ystride]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[src+5*ystride]*/ + "movq (%[src],%[ystride]),%%mm1\n\t" + /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ + "movq %%mm2,%%mm4\n\t" + "punpcklbw %%mm7,%%mm2\n\t" + "movq %%mm3,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm2\n\t" + "punpcklbw %%mm7,%%mm3\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm3\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm2,0x20(%[residue])\n\t" + "movq %%mm4,0x28(%[residue])\n\t" + "movq %%mm3,0x30(%[residue])\n\t" + "movq %%mm5,0x38(%[residue])\n\t" + /*mm2=[src+6*ystride]*/ + "movq (%[src],%[ystride],2),%%mm2\n\t" + /*mm3=[src+7*ystride]*/ + "movq (%[src],%[ystride3]),%%mm3\n\t" + /*Compute [src+4*ystride]-128 and [src+5*ystride]-128*/ + "movq %%mm0,%%mm4\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm0\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x40(%[residue])\n\t" + "movq %%mm4,0x48(%[residue])\n\t" + "movq %%mm1,0x50(%[residue])\n\t" + "movq %%mm5,0x58(%[residue])\n\t" + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + "movq %%mm2,%%mm4\n\t" + "punpcklbw %%mm7,%%mm2\n\t" + "movq %%mm3,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm2\n\t" + "punpcklbw %%mm7,%%mm3\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm3\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm2,0x60(%[residue])\n\t" + "movq %%mm4,0x68(%[residue])\n\t" + "movq %%mm3,0x70(%[residue])\n\t" + "movq %%mm5,0x78(%[residue])\n\t" + :[src]"+r"(_src),[ystride3]"=&r"(ystride3) + :[residue]"r"(_residue),[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); +} + +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); +} + +#endif diff --git a/Engine/lib/libtheora/lib/x86/mmxfdct.c b/Engine/lib/libtheora/lib/x86/mmxfdct.c new file mode 100644 index 000000000..211875255 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/mmxfdct.c @@ -0,0 +1,665 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ********************************************************************/ +/*MMX fDCT implementation for x86_32*/ +/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +# define OC_FDCT_STAGE1_8x4 \ + "#OC_FDCT_STAGE1_8x4\n\t" \ + /*Stage 1:*/ \ + /*mm0=t7'=t0-t7*/ \ + "psubw %%mm7,%%mm0\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + /*mm1=t6'=t1-t6*/ \ + "psubw %%mm6,%%mm1\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + /*mm2=t5'=t2-t5*/ \ + "psubw %%mm5,%%mm2\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*mm3=t4'=t3-t4*/ \ + "psubw %%mm4,%%mm3\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + /*mm7=t0'=t0+t7*/ \ + "paddw %%mm0,%%mm7\n\t" \ + /*mm6=t1'=t1+t6*/ \ + "paddw %%mm1,%%mm6\n\t" \ + /*mm5=t2'=t2+t5*/ \ + "paddw %%mm2,%%mm5\n\t" \ + /*mm4=t3'=t3+t4*/ \ + "paddw %%mm3,%%mm4\n\t" \ + +# define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ + "#OC_FDCT8x4\n\t" \ + /*Stage 2:*/ \ + /*mm7=t3''=t0'-t3'*/ \ + "psubw %%mm4,%%mm7\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + /*mm6=t2''=t1'-t2'*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "movq %%mm7,"_r6"(%[y])\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*mm1=t5''=t6'-t5'*/ \ + "psubw %%mm2,%%mm1\n\t" \ + "movq %%mm6,"_r2"(%[y])\n\t" \ + /*mm4=t0''=t0'+t3'*/ \ + "paddw %%mm7,%%mm4\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + /*mm5=t1''=t1'+t2'*/ \ + "movq %%mm4,"_r0"(%[y])\n\t" \ + "paddw %%mm6,%%mm5\n\t" \ + /*mm2=t6''=t6'+t5'*/ \ + "paddw %%mm1,%%mm2\n\t" \ + "movq %%mm5,"_r4"(%[y])\n\t" \ + /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ + /*mm4, mm5, mm6, mm7 are free.*/ \ + /*Stage 3:*/ \ + /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ + "mov $0x5A806A0A,%[a]\n\t" \ + "pcmpeqb %%mm6,%%mm6\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psrlw $15,%%mm6\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + /*mm0=0, m2={-1}x4 \ + mm5:mm4=t5''*27146+0xB500*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm0,"_r7"(%[y])\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqb %%mm2,%%mm2\n\t" \ + /*mm2=t6'', mm1=t5''+(t5''!=0) \ + mm4=(t5''*27146+0xB500>>16)*/ \ + "pcmpeqw %%mm1,%%mm0\n\t" \ + "psrad $16,%%mm4\n\t" \ + "psubw %%mm2,%%mm0\n\t" \ + "movq "_r3"(%[y]),%%mm2\n\t" \ + "psrad $16,%%mm5\n\t" \ + "paddw %%mm0,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + "paddw %%mm1,%%mm4\n\t" \ + "movq "_r7"(%[y]),%%mm0\n\t" \ + "psraw $1,%%mm4\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + /*mm3=t4''=t4'+s*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*mm1=t5'''=t4'-s*/ \ + "psubw %%mm4,%%mm1\n\t" \ + /*mm1=0, mm3={-1}x4 \ + mm5:mm4=t6''*27146+0xB500*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm1,"_r5"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm3,"_r1"(%[y])\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "pxor %%mm1,%%mm1\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm1\n\t" \ + "psrad $16,%%mm5\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + /*mm1=t1'' \ + mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + "paddw %%mm2,%%mm4\n\t" \ + "movq "_r4"(%[y]),%%mm1\n\t" \ + "psraw $1,%%mm4\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm0=t7''=t7'+s*/ \ + "paddw %%mm4,%%mm0\n\t" \ + /*mm2=t6'''=t7'-s*/ \ + "psubw %%mm4,%%mm2\n\t" \ + /*Stage 4:*/ \ + /*mm0=0, mm2=t0'' \ + mm5:mm4=t1''*27146+0xB500*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq "_r0"(%[y]),%%mm2\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movq %%mm0,"_r7"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + /*mm7={27146,0x4000>>1}x2 \ + mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + "psrad $16,%%mm4\n\t" \ + "mov $0x20006A0A,%[a]\n\t" \ + "pcmpeqw %%mm1,%%mm0\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psrad $16,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "paddw %%mm1,%%mm0\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm4,%%mm0\n\t" \ + /*mm6={0x00000E3D}x2 \ + mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "mov $0x0E3D,%[a]\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pxor %%mm1,%%mm1\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + "pcmpeqw %%mm2,%%mm1\n\t" \ + /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + "psrad $16,%%mm4\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "psrad $16,%%mm5\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movq "_r5"(%[y]),%%mm1\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + "movq "_r3"(%[y]),%%mm2\n\t" \ + "movq %%mm0,%%mm7\n\t" \ + "pxor %%mm4,%%mm0\n\t" \ + "pand %%mm4,%%mm7\n\t" \ + "psraw $1,%%mm0\n\t" \ + "mov $0x7FFF54DC,%[a]\n\t" \ + "paddw %%mm7,%%mm0\n\t" \ + "movd %[a],%%mm7\n\t" \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm4=_y[4]=v=r-u*/ \ + "psubw %%mm0,%%mm4\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "movq %%mm4,"_r4"(%[y])\n\t" \ + /*mm0=0, mm7={36410}x4 \ + mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "mov $0x8E3A8E3A,%[a]\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm0,"_r0"(%[y])\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm0=0 \ + mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "paddw %%mm2,%%mm1\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + /*mm1=t7'', mm7={26568,0x3400}x2 \ + mm2=s=t6'''-(36410*u>>16)*/ \ + "movq %%mm4,%%mm1\n\t" \ + "mov $0x340067C8,%[a]\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "movq %%mm1,"_r5"(%[y])\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm1,%%mm4\n\t" \ + "movq "_r7"(%[y]),%%mm1\n\t" \ + "psubw %%mm4,%%mm2\n\t" \ + /*mm6={0x00007B1B}x2 \ + mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x7B1B,%[a]\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + /*mm7={64277-0x7FFF,0x7FFF}x2 \ + mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + "psrad $17,%%mm4\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psrad $17,%%mm5\n\t" \ + "mov $0x7FFF7B16,%[a]\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "paddw %%mm4,%%mm2\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm0=0, mm7={12785}x4 \ + mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "movq "_r1"(%[y]),%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x31F131F1,%[a]\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + /*mm1=t3'', mm7={20539,0x3000}x2 \ + mm4=s=(12785*u>>16)-t4''*/ \ + "movq %%mm4,"_r1"(%[y])\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "mov $0x3000503B,%[a]\n\t" \ + "movq "_r6"(%[y]),%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm2,%%mm4\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm6={0x00006CB7}x2 \ + mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ + "movq %%mm4,%%mm5\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x6CB7,%[a]\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + /*mm7={60547-0x7FFF,0x7FFF}x2 \ + mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + "psrad $20,%%mm4\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psrad $20,%%mm5\n\t" \ + "mov $0x7FFF6C84,%[a]\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "paddw %%mm4,%%mm2\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm0=0, mm7={25080}x4 \ + mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "movq %%mm2,"_r7"(%[y])\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "movq "_r2"(%[y]),%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x61F861F8,%[a]\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "movd %[a],%%mm7\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm1={-1}x4 \ + mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "mov $0x28005460,%[a]\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm1,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ + mm4=s=(25080*u>>16)-t2''*/ \ + "movq %%mm4,%%mm6\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "pxor %%mm5,%%mm5\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "psubw %%mm2,%%mm4\n\t" \ + /*mm2=s+(s!=0) \ + mm4:mm3=s*21600+0x2800*/ \ + "movq %%mm4,%%mm3\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "punpckhwd %%mm5,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "psubw %%mm1,%%mm0\n\t" \ + "punpcklwd %%mm5,%%mm3\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm3\n\t" \ + /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ + mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + "movq "_r4"(%[y]),%%mm0\n\t" \ + "psrad $18,%%mm4\n\t" \ + "movq "_r5"(%[y]),%%mm5\n\t" \ + "psrad $18,%%mm3\n\t" \ + "movq "_r7"(%[y]),%%mm1\n\t" \ + "packssdw %%mm4,%%mm3\n\t" \ + "movq "_r0"(%[y]),%%mm4\n\t" \ + "paddw %%mm2,%%mm3\n\t" \ + +/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. + On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and + {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ +# define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ + "#OC_TRANSPOSE8x4\n\t" \ + /*First 4x4 transpose:*/ \ + /*mm0 = e3 e2 e1 e0 \ + mm5 = f3 f2 f1 f0 \ + mm3 = g3 g2 g1 g0 \ + mm1 = h3 h2 h1 h0*/ \ + "movq %%mm0,%%mm2\n\t" \ + "punpcklwd %%mm5,%%mm0\n\t" \ + "punpckhwd %%mm5,%%mm2\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "punpcklwd %%mm1,%%mm3\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + /*mm0 = f1 e1 f0 e0 \ + mm2 = f3 e3 f2 e2 \ + mm3 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm3,%%mm0\n\t" \ + "movq %%mm0,"_r4"(%[y])\n\t" \ + "punpckhdq %%mm3,%%mm1\n\t" \ + "movq "_r1"(%[y]),%%mm0\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + "punpckhdq %%mm5,%%mm3\n\t" \ + "movq "_r3"(%[y]),%%mm5\n\t" \ + /*_y[4] = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm0 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm5 = d3 d2 d1 d0*/ \ + "movq %%mm4,%%mm7\n\t" \ + "punpcklwd %%mm0,%%mm4\n\t" \ + "punpckhwd %%mm0,%%mm7\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm5,%%mm6\n\t" \ + "punpckhwd %%mm5,%%mm0\n\t" \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm7,%%mm6\n\t" \ + "punpckhdq %%mm0,%%mm7\n\t" \ + "punpckldq %%mm0,%%mm6\n\t" \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ + +/*MMX implementation of the fDCT.*/ +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm__ __volatile__( + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add biases to correct for some systematic error that remains in + the full fDCT->iDCT round trip.*/ + "movq 0x00(%[x]),%%mm0\n\t" + "movq 0x10(%[x]),%%mm1\n\t" + "movq 0x20(%[x]),%%mm2\n\t" + "movq 0x30(%[x]),%%mm3\n\t" + "pcmpeqb %%mm4,%%mm4\n\t" + "pxor %%mm7,%%mm7\n\t" + "movq %%mm0,%%mm5\n\t" + "psllw $2,%%mm0\n\t" + "pcmpeqw %%mm7,%%mm5\n\t" + "movq 0x70(%[x]),%%mm7\n\t" + "psllw $2,%%mm1\n\t" + "psubw %%mm4,%%mm5\n\t" + "psllw $2,%%mm2\n\t" + "mov $1,%[a]\n\t" + "pslld $16,%%mm5\n\t" + "movd %[a],%%mm6\n\t" + "psllq $16,%%mm5\n\t" + "mov $0x10001,%[a]\n\t" + "psllw $2,%%mm3\n\t" + "movd %[a],%%mm4\n\t" + "punpckhwd %%mm6,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "movq 0x60(%[x]),%%mm6\n\t" + "paddw %%mm5,%%mm0\n\t" + "movq 0x50(%[x]),%%mm5\n\t" + "paddw %%mm4,%%mm0\n\t" + "movq 0x40(%[x]),%%mm4\n\t" + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + "psllw $2,%%mm7\n\t" + "psubw %%mm7,%%mm0\n\t" + "psllw $2,%%mm6\n\t" + "paddw %%mm7,%%mm7\n\t" + /*mm1=t6'=t1-t6*/ + "psllw $2,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psllw $2,%%mm4\n\t" + "paddw %%mm6,%%mm6\n\t" + /*mm2=t5'=t2-t5*/ + "psubw %%mm5,%%mm2\n\t" + "paddw %%mm5,%%mm5\n\t" + /*mm3=t4'=t3-t4*/ + "psubw %%mm4,%%mm3\n\t" + "paddw %%mm4,%%mm4\n\t" + /*mm7=t0'=t0+t7*/ + "paddw %%mm0,%%mm7\n\t" + /*mm6=t1'=t1+t6*/ + "paddw %%mm1,%%mm6\n\t" + /*mm5=t2'=t2+t5*/ + "paddw %%mm2,%%mm5\n\t" + /*mm4=t3'=t3+t4*/ + "paddw %%mm3,%%mm4\n\t" + OC_FDCT8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") + OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") + /*Swap out this 8x4 block for the next one.*/ + "movq 0x08(%[x]),%%mm0\n\t" + "movq %%mm7,0x30(%[y])\n\t" + "movq 0x78(%[x]),%%mm7\n\t" + "movq %%mm1,0x50(%[y])\n\t" + "movq 0x18(%[x]),%%mm1\n\t" + "movq %%mm6,0x20(%[y])\n\t" + "movq 0x68(%[x]),%%mm6\n\t" + "movq %%mm2,0x60(%[y])\n\t" + "movq 0x28(%[x]),%%mm2\n\t" + "movq %%mm5,0x10(%[y])\n\t" + "movq 0x58(%[x]),%%mm5\n\t" + "movq %%mm3,0x70(%[y])\n\t" + "movq 0x38(%[x]),%%mm3\n\t" + /*And increase its working precision, too.*/ + "psllw $2,%%mm0\n\t" + "movq %%mm4,0x00(%[y])\n\t" + "psllw $2,%%mm7\n\t" + "movq 0x48(%[x]),%%mm4\n\t" + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + "psubw %%mm7,%%mm0\n\t" + "psllw $2,%%mm1\n\t" + "paddw %%mm7,%%mm7\n\t" + "psllw $2,%%mm6\n\t" + /*mm1=t6'=t1-t6*/ + "psubw %%mm6,%%mm1\n\t" + "psllw $2,%%mm2\n\t" + "paddw %%mm6,%%mm6\n\t" + "psllw $2,%%mm5\n\t" + /*mm2=t5'=t2-t5*/ + "psubw %%mm5,%%mm2\n\t" + "psllw $2,%%mm3\n\t" + "paddw %%mm5,%%mm5\n\t" + "psllw $2,%%mm4\n\t" + /*mm3=t4'=t3-t4*/ + "psubw %%mm4,%%mm3\n\t" + "paddw %%mm4,%%mm4\n\t" + /*mm7=t0'=t0+t7*/ + "paddw %%mm0,%%mm7\n\t" + /*mm6=t1'=t1+t6*/ + "paddw %%mm1,%%mm6\n\t" + /*mm5=t2'=t2+t5*/ + "paddw %%mm2,%%mm5\n\t" + /*mm4=t3'=t3+t4*/ + "paddw %%mm3,%%mm4\n\t" + OC_FDCT8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") + OC_TRANSPOSE8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, + so we only have to do half the stores and loads.*/ + "movq 0x00(%[y]),%%mm0\n\t" + "movq %%mm1,0x58(%[y])\n\t" + "movq 0x10(%[y]),%%mm1\n\t" + "movq %%mm2,0x68(%[y])\n\t" + "movq 0x20(%[y]),%%mm2\n\t" + "movq %%mm3,0x78(%[y])\n\t" + "movq 0x30(%[y]),%%mm3\n\t" + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") + OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") + /*mm0={-2}x4*/ + "pcmpeqw %%mm0,%%mm0\n\t" + "paddw %%mm0,%%mm0\n\t" + /*Round the results.*/ + "psubw %%mm0,%%mm1\n\t" + "psubw %%mm0,%%mm2\n\t" + "psraw $2,%%mm1\n\t" + "psubw %%mm0,%%mm3\n\t" + "movq %%mm1,0x18(%[y])\n\t" + "psraw $2,%%mm2\n\t" + "psubw %%mm0,%%mm4\n\t" + "movq 0x08(%[y]),%%mm1\n\t" + "psraw $2,%%mm3\n\t" + "psubw %%mm0,%%mm5\n\t" + "psraw $2,%%mm4\n\t" + "psubw %%mm0,%%mm6\n\t" + "psraw $2,%%mm5\n\t" + "psubw %%mm0,%%mm7\n\t" + "psraw $2,%%mm6\n\t" + "psubw %%mm0,%%mm1\n\t" + "psraw $2,%%mm7\n\t" + "movq 0x40(%[y]),%%mm0\n\t" + "psraw $2,%%mm1\n\t" + "movq %%mm7,0x30(%[y])\n\t" + "movq 0x78(%[y]),%%mm7\n\t" + "movq %%mm1,0x08(%[y])\n\t" + "movq 0x50(%[y]),%%mm1\n\t" + "movq %%mm6,0x20(%[y])\n\t" + "movq 0x68(%[y]),%%mm6\n\t" + "movq %%mm2,0x28(%[y])\n\t" + "movq 0x60(%[y]),%%mm2\n\t" + "movq %%mm5,0x10(%[y])\n\t" + "movq 0x58(%[y]),%%mm5\n\t" + "movq %%mm3,0x38(%[y])\n\t" + "movq 0x70(%[y]),%%mm3\n\t" + "movq %%mm4,0x00(%[y])\n\t" + "movq 0x48(%[y]),%%mm4\n\t" + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") + OC_TRANSPOSE8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") + /*mm0={-2}x4*/ + "pcmpeqw %%mm0,%%mm0\n\t" + "paddw %%mm0,%%mm0\n\t" + /*Round the results.*/ + "psubw %%mm0,%%mm1\n\t" + "psubw %%mm0,%%mm2\n\t" + "psraw $2,%%mm1\n\t" + "psubw %%mm0,%%mm3\n\t" + "movq %%mm1,0x58(%[y])\n\t" + "psraw $2,%%mm2\n\t" + "psubw %%mm0,%%mm4\n\t" + "movq 0x48(%[y]),%%mm1\n\t" + "psraw $2,%%mm3\n\t" + "psubw %%mm0,%%mm5\n\t" + "movq %%mm2,0x68(%[y])\n\t" + "psraw $2,%%mm4\n\t" + "psubw %%mm0,%%mm6\n\t" + "movq %%mm3,0x78(%[y])\n\t" + "psraw $2,%%mm5\n\t" + "psubw %%mm0,%%mm7\n\t" + "movq %%mm4,0x40(%[y])\n\t" + "psraw $2,%%mm6\n\t" + "psubw %%mm0,%%mm1\n\t" + "movq %%mm5,0x50(%[y])\n\t" + "psraw $2,%%mm7\n\t" + "movq %%mm6,0x60(%[y])\n\t" + "psraw $2,%%mm1\n\t" + "movq %%mm7,0x70(%[y])\n\t" + "movq %%mm1,0x48(%[y])\n\t" + :[a]"=&r"(a) + :[y]"r"(_y),[x]"r"(_x) + :"memory" + ); +} + +#endif diff --git a/Engine/lib/libtheora/lib/dec/x86/mmxfrag.c b/Engine/lib/libtheora/lib/x86/mmxfrag.c similarity index 83% rename from Engine/lib/libtheora/lib/dec/x86/mmxfrag.c rename to Engine/lib/libtheora/lib/x86/mmxfrag.c index b4f8167a6..2c732939c 100644 --- a/Engine/lib/libtheora/lib/dec/x86/mmxfrag.c +++ b/Engine/lib/libtheora/lib/x86/mmxfrag.c @@ -5,13 +5,13 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: mmxfrag.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ @@ -20,12 +20,20 @@ Additional optimization by Nils Pipenbrinck. Note: Loops are unrolled for best performance. The iteration each instruction belongs to is marked in the comments as #i.*/ -#include "x86int.h" #include +#include "x86int.h" +#include "mmxfrag.h" -#if defined(USE_ASM) +#if defined(OC_X86_ASM) -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, const ogg_int16_t *_residue){ __asm__ __volatile__( /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ @@ -67,9 +75,9 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, /*#0 Write row.*/ "movq %%mm1,(%[dst])\n\t" /*#1 Write row.*/ - "movq %%mm3,(%[dst],%[dst_ystride])\n\t" + "movq %%mm3,(%[dst],%[ystride])\n\t" /*#2 Write row.*/ - "movq %%mm5,(%[dst],%[dst_ystride],2)\n\t" + "movq %%mm5,(%[dst],%[ystride],2)\n\t" /*#3 Load low residue.*/ "movq 6*8(%[residue]),%%mm1\n\t" /*#3 Load high residue.*/ @@ -101,11 +109,11 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, /*#5 Pack to byte.*/ "packuswb %%mm6,%%mm5\n\t" /*#3 Write row.*/ - "movq %%mm1,(%[dst],%[dst_ystride3])\n\t" + "movq %%mm1,(%[dst],%[ystride3])\n\t" /*#4 Write row.*/ "movq %%mm3,(%[dst4])\n\t" /*#5 Write row.*/ - "movq %%mm5,(%[dst4],%[dst_ystride])\n\t" + "movq %%mm5,(%[dst4],%[ystride])\n\t" /*#6 Load low residue.*/ "movq 12*8(%[residue]),%%mm1\n\t" /*#6 Load high residue.*/ @@ -127,21 +135,21 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, /*#7 Pack to byte.*/ "packuswb %%mm4,%%mm3\n\t" /*#6 Write row.*/ - "movq %%mm1,(%[dst4],%[dst_ystride],2)\n\t" + "movq %%mm1,(%[dst4],%[ystride],2)\n\t" /*#7 Write row.*/ - "movq %%mm3,(%[dst4],%[dst_ystride3])\n\t" + "movq %%mm3,(%[dst4],%[ystride3])\n\t" : :[residue]"r"(_residue), [dst]"r"(_dst), - [dst4]"r"(_dst+(_dst_ystride<<2)), - [dst_ystride]"r"((ptrdiff_t)_dst_ystride), - [dst_ystride3]"r"((ptrdiff_t)_dst_ystride*3) + [dst4]"r"(_dst+(_ystride<<2)), + [ystride]"r"((ptrdiff_t)_ystride), + [ystride3]"r"((ptrdiff_t)_ystride*3) :"memory" ); } -void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){ +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ int i; /*Zero mm0.*/ __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::); @@ -150,7 +158,7 @@ void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, /*#0 Load source.*/ "movq (%[src]),%%mm3\n\t" /*#1 Load source.*/ - "movq (%[src],%[src_ystride]),%%mm7\n\t" + "movq (%[src],%[ystride]),%%mm7\n\t" /*#0 Get copy of src.*/ "movq %%mm3,%%mm4\n\t" /*#0 Expand high source.*/ @@ -178,29 +186,23 @@ void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, /*#1 Pack final row pixels.*/ "packuswb %%mm2,%%mm7\n\t" /*Advance src.*/ - "lea (%[src],%[src_ystride],2),%[src]\n\t" + "lea (%[src],%[ystride],2),%[src]\n\t" /*#0 Write row.*/ "movq %%mm3,(%[dst])\n\t" /*#1 Write row.*/ - "movq %%mm7,(%[dst],%[dst_ystride])\n\t" + "movq %%mm7,(%[dst],%[ystride])\n\t" /*Advance dst.*/ - "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "lea (%[dst],%[ystride],2),%[dst]\n\t" :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src) - :[dst_ystride]"r"((ptrdiff_t)_dst_ystride), - [src_ystride]"r"((ptrdiff_t)_src_ystride) + :[ystride]"r"((ptrdiff_t)_ystride) :"memory" ); } } -void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, - int _src2_ystride,const ogg_int16_t *_residue){ +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ int i; - /*NOTE: This assumes that - _dst_ystride==_src1_ystride&&_dst_ystride==_src2_ystride. - This is currently always the case, but a slower fallback version will need - to be written if it ever is not.*/ /*Zero mm7.*/ __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); for(i=4;i-->0;){ @@ -278,8 +280,8 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, /*Advance dest ptr.*/ "lea (%[dst],%[ystride],2),%[dst]\n\t" :[dst]"+r"(_dst),[residue]"+r"(_residue), - [src1]"+r"(_src1),[src2]"+r"(_src2) - :[ystride]"r"((ptrdiff_t)_dst_ystride) + [src1]"+%r"(_src1),[src2]"+r"(_src2) + :[ystride]"r"((ptrdiff_t)_ystride) :"memory" ); } diff --git a/Engine/lib/libtheora/lib/x86/mmxfrag.h b/Engine/lib/libtheora/lib/x86/mmxfrag.h new file mode 100644 index 000000000..a39842762 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/mmxfrag.h @@ -0,0 +1,64 @@ +#if !defined(_x86_mmxfrag_H) +# define _x86_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + ptrdiff_t ystride3; \ + src=(_src); \ + dst=(_dst); \ + __asm__ __volatile__( \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*ystride3=ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[src],%[ystride],4),%[src]\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[dst],%[ystride],4),%[dst]\n\t" \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \ + :[ystride]"r"((ptrdiff_t)(_ystride)) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/Engine/lib/libtheora/lib/dec/x86/mmxidct.c b/Engine/lib/libtheora/lib/x86/mmxidct.c similarity index 86% rename from Engine/lib/libtheora/lib/dec/x86/mmxidct.c rename to Engine/lib/libtheora/lib/x86/mmxidct.c index 5dbbe201a..76424e636 100644 --- a/Engine/lib/libtheora/lib/dec/x86/mmxidct.c +++ b/Engine/lib/libtheora/lib/x86/mmxidct.c @@ -5,25 +5,22 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: mmxidct.c 15400 2008-10-15 12:10:58Z tterribe $ + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ /*MMX acceleration of Theora's iDCT. Originally written by Rudolf Marek, based on code from On2's VP3.*/ -#include -#include "../dct.h" -#include "../idct.h" - #include "x86int.h" +#include "../dct.h" -#if defined(USE_ASM) +#if defined(OC_X86_ASM) /*These are offsets into the table of constants below.*/ /*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ @@ -194,7 +191,7 @@ static const ogg_uint16_t __attribute__((aligned(8),used)) J(7) = h3 g3 f3 e3 I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. - J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. Since r1 is free at entry, we calculate the Js first.*/ /*19 cycles.*/ @@ -313,9 +310,9 @@ static const ogg_uint16_t __attribute__((aligned(8),used)) #define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) #define OC_8 OC_MID(OC_EIGHT_OFFSET,0) -void oc_idct8x8_mmx(ogg_int16_t _y[64]){ - /*This routine accepts an 8x8 matrix, but in transposed form. - Every 4x4 submatrix is transposed.*/ +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ __asm__ __volatile__( #define OC_I(_k) OC_M2STR((_k*16))"(%[y])" #define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" @@ -339,7 +336,6 @@ void oc_idct8x8_mmx(ogg_int16_t _y[64]){ OC_COLUMN_IDCT #undef OC_I #undef OC_J - "emms\n\t" : :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) ); @@ -507,7 +503,7 @@ void oc_idct8x8_mmx(ogg_int16_t _y[64]){ "movq %%mm0,"OC_I(0)"\n\t" \ "#end OC_COLUMN_IDCT_10\n\t" \ -void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){ +static void oc_idct8x8_10(ogg_int16_t _y[64]){ __asm__ __volatile__( #define OC_I(_k) OC_M2STR((_k*16))"(%[y])" #define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" @@ -527,9 +523,42 @@ void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){ OC_COLUMN_IDCT_10 #undef OC_I #undef OC_J - "emms\n\t" : :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) ); } + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + #endif diff --git a/Engine/lib/libtheora/lib/x86/mmxloop.h b/Engine/lib/libtheora/lib/x86/mmxloop.h new file mode 100644 index 000000000..2e870c795 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/mmxloop.h @@ -0,0 +1,215 @@ +#if !defined(_x86_mmxloop_H) +# define _x86_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX \ + "#OC_LOOP_FILTER8_MMX\n\t" \ + /*mm7=0*/ \ + "pxor %%mm7,%%mm7\n\t" \ + /*mm6:mm0={a0,...,a7}*/ \ + "movq %%mm0,%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm0\n\t" \ + "punpckhbw %%mm7,%%mm6\n\t" \ + /*mm3:mm5={d0,...,d7}*/ \ + "movq %%mm3,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm3\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + "psubw %%mm3,%%mm0\n\t" \ + "psubw %%mm5,%%mm6\n\t" \ + /*mm3:mm1={b0,...,b7}*/ \ + "movq %%mm1,%%mm3\n\t" \ + "punpcklbw %%mm7,%%mm1\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm3\n\t" \ + /*mm5:mm4={c0,...,c7}*/ \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + "pcmpeqw %%mm7,%%mm7\n\t" \ + "psubw %%mm1,%%mm4\n\t" \ + "psrlw $14,%%mm7\n\t" \ + "psubw %%mm3,%%mm5\n\t" \ + /*Scale by 3.*/ \ + "pmullw %%mm7,%%mm4\n\t" \ + "pmullw %%mm7,%%mm5\n\t" \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + "psrlw $1,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "psllw $2,%%mm7\n\t" \ + "movq (%[ll]),%%mm0\n\t" \ + "paddw %%mm6,%%mm5\n\t" \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + "psubw %%mm7,%%mm4\n\t" \ + "psubw %%mm7,%%mm5\n\t" \ + "psraw $3,%%mm4\n\t" \ + "psraw $3,%%mm5\n\t" \ + "pcmpeqb %%mm7,%%mm7\n\t" \ + "packsswb %%mm5,%%mm4\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "pxor %%mm7,%%mm4\n\t" \ + "packuswb %%mm3,%%mm1\n\t" \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + "pcmpgtb %%mm4,%%mm6\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "pxor %%mm6,%%mm4\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "psubb %%mm6,%%mm4\n\t" \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + "paddusb %%mm4,%%mm7\n\t" \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + "paddusb %%mm7,%%mm4\n\t" \ + "psubusb %%mm7,%%mm4\n\t" \ + /*Now split mm4 by the original sign of -R_i.*/ \ + "movq %%mm4,%%mm5\n\t" \ + "pand %%mm6,%%mm4\n\t" \ + "pandn %%mm5,%%mm6\n\t" \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + "paddusb %%mm4,%%mm1\n\t" \ + "psubusb %%mm4,%%mm2\n\t" \ + "psubusb %%mm6,%%mm1\n\t" \ + "paddusb %%mm6,%%mm2\n\t" \ + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + ptrdiff_t ystride3__; \ + __asm__ __volatile__( \ + /*mm0={a0,...,a7}*/ \ + "movq (%[pix]),%%mm0\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*mm3={d0,...,d7}*/ \ + "movq (%[pix],%[ystride3]),%%mm3\n\t" \ + /*mm1={b0,...,b7}*/ \ + "movq (%[pix],%[ystride]),%%mm1\n\t" \ + /*mm2={c0,...,c7}*/ \ + "movq (%[pix],%[ystride],2),%%mm2\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + "movq %%mm1,(%[pix],%[ystride])\n\t" \ + "movq %%mm2,(%[pix],%[ystride],2)\n\t" \ + :[ystride3]"=&r"(ystride3__) \ + :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \ + [ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + unsigned char *pix__; \ + ptrdiff_t ystride3__; \ + ptrdiff_t d__; \ + pix__=(_pix)-2; \ + __asm__ __volatile__( \ + /*x x x x d0 c0 b0 a0*/ \ + "movd (%[pix]),%%mm0\n\t" \ + /*x x x x d1 c1 b1 a1*/ \ + "movd (%[pix],%[ystride]),%%mm1\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*x x x x d2 c2 b2 a2*/ \ + "movd (%[pix],%[ystride],2),%%mm2\n\t" \ + /*x x x x d3 c3 b3 a3*/ \ + "lea (%[pix],%[ystride],4),%[d]\n\t" \ + "movd (%[pix],%[ystride3]),%%mm3\n\t" \ + /*x x x x d4 c4 b4 a4*/ \ + "movd (%[d]),%%mm4\n\t" \ + /*x x x x d5 c5 b5 a5*/ \ + "movd (%[d],%[ystride]),%%mm5\n\t" \ + /*x x x x d6 c6 b6 a6*/ \ + "movd (%[d],%[ystride],2),%%mm6\n\t" \ + /*x x x x d7 c7 b7 a7*/ \ + "movd (%[d],%[ystride3]),%%mm7\n\t" \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "punpcklbw %%mm1,%%mm0\n\t" \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + "punpcklbw %%mm3,%%mm2\n\t" \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "movq %%mm0,%%mm3\n\t" \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "punpcklwd %%mm2,%%mm0\n\t" \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "punpckhwd %%mm2,%%mm3\n\t" \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "movq %%mm0,%%mm1\n\t" \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "punpcklbw %%mm5,%%mm4\n\t" \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + "punpcklbw %%mm7,%%mm6\n\t" \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "movq %%mm4,%%mm5\n\t" \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + "punpcklwd %%mm6,%%mm4\n\t" \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + "punpckhwd %%mm6,%%mm5\n\t" \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "movq %%mm3,%%mm2\n\t" \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + "punpckldq %%mm4,%%mm0\n\t" \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + "punpckhdq %%mm4,%%mm1\n\t" \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + "punpckldq %%mm5,%%mm2\n\t" \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + "punpckhdq %%mm5,%%mm3\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + "movq %%mm1,%%mm0\n\t" \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + "punpcklbw %%mm2,%%mm1\n\t" \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + "punpckhbw %%mm2,%%mm0\n\t" \ + /*[d]=c1 b1 c0 b0*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm1\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c3 b3 c2 b2*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + "lea (%[pix],%[ystride],4),%[pix]\n\t" \ + /*[d]=c5 b5 c4 b4*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm0\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c7 b7 c6 b6*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \ + :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/Engine/lib/libtheora/lib/x86/mmxstate.c b/Engine/lib/libtheora/lib/x86/mmxstate.c new file mode 100644 index 000000000..808b0a789 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/mmxstate.c @@ -0,0 +1,188 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm__ __volatile__( + /*mm0=0000 0000 0000 AAAA*/ + "movd %[p],%%mm0\n\t" + /*mm0=0000 0000 AAAA AAAA*/ + "punpcklwd %%mm0,%%mm0\n\t" + /*mm0=AAAA AAAA AAAA AAAA*/ + "punpckldq %%mm0,%%mm0\n\t" + "movq %%mm0,(%[y])\n\t" + "movq %%mm0,8(%[y])\n\t" + "movq %%mm0,16(%[y])\n\t" + "movq %%mm0,24(%[y])\n\t" + "movq %%mm0,32(%[y])\n\t" + "movq %%mm0,40(%[y])\n\t" + "movq %%mm0,48(%[y])\n\t" + "movq %%mm0,56(%[y])\n\t" + "movq %%mm0,64(%[y])\n\t" + "movq %%mm0,72(%[y])\n\t" + "movq %%mm0,80(%[y])\n\t" + "movq %%mm0,88(%[y])\n\t" + "movq %%mm0,96(%[y])\n\t" + "movq %%mm0,104(%[y])\n\t" + "movq %%mm0,112(%[y])\n\t" + "movq %%mm0,120(%[y])\n\t" + : + :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p) + :"memory" + ); + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1 +#include "x86enc.h" + +#if defined(OC_X86_64_ASM) + +# define OC_FDCT8x8 \ + /*Note: xmm15={0}x8 and xmm14={-1}x8.*/ \ + "#OC_FDCT8x8\n\t" \ + /*Stage 1:*/ \ + "movdqa %%xmm0,%%xmm11\n\t" \ + "movdqa %%xmm1,%%xmm10\n\t" \ + "movdqa %%xmm2,%%xmm9\n\t" \ + "movdqa %%xmm3,%%xmm8\n\t" \ + /*xmm11=t7'=t0-t7*/ \ + "psubw %%xmm7,%%xmm11\n\t" \ + /*xmm10=t6'=t1-t6*/ \ + "psubw %%xmm6,%%xmm10\n\t" \ + /*xmm9=t5'=t2-t5*/ \ + "psubw %%xmm5,%%xmm9\n\t" \ + /*xmm8=t4'=t3-t4*/ \ + "psubw %%xmm4,%%xmm8\n\t" \ + /*xmm0=t0'=t0+t7*/ \ + "paddw %%xmm7,%%xmm0\n\t" \ + /*xmm1=t1'=t1+t6*/ \ + "paddw %%xmm6,%%xmm1\n\t" \ + /*xmm5=t2'=t2+t5*/ \ + "paddw %%xmm2,%%xmm5\n\t" \ + /*xmm4=t3'=t3+t4*/ \ + "paddw %%xmm3,%%xmm4\n\t" \ + /*xmm2,3,6,7 are now free.*/ \ + /*Stage 2:*/ \ + "movdqa %%xmm0,%%xmm3\n\t" \ + "mov $0x5A806A0A,%[a]\n\t" \ + "movdqa %%xmm1,%%xmm2\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "movdqa %%xmm10,%%xmm6\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + /*xmm2=t2''=t1'-t2'*/ \ + "psubw %%xmm5,%%xmm2\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + /*xmm3=t3''=t0'-t3'*/ \ + "psubw %%xmm4,%%xmm3\n\t" \ + "psubw %%xmm14,%%xmm12\n\t" \ + /*xmm10=t5''=t6'-t5'*/ \ + "psubw %%xmm9,%%xmm10\n\t" \ + "paddw %%xmm12,%%xmm12\n\t" \ + /*xmm4=t0''=t0'+t3'*/ \ + "paddw %%xmm0,%%xmm4\n\t" \ + /*xmm1=t1''=t1'+t2'*/ \ + "paddw %%xmm5,%%xmm1\n\t" \ + /*xmm6=t6''=t6'+t5'*/ \ + "paddw %%xmm9,%%xmm6\n\t" \ + /*xmm0,xmm5,xmm9 are now free.*/ \ + /*Stage 3:*/ \ + /*xmm10:xmm5=t5''*27146+0xB500 \ + xmm0=t5''*/ \ + "movdqa %%xmm10,%%xmm5\n\t" \ + "movdqa %%xmm10,%%xmm0\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm5\n\t" \ + "pmaddwd %%xmm13,%%xmm5\n\t" \ + /*xmm5=(t5''*27146+0xB500>>16)+t5''*/ \ + "psrad $16,%%xmm10\n\t" \ + "psrad $16,%%xmm5\n\t" \ + "packssdw %%xmm10,%%xmm5\n\t" \ + "paddw %%xmm0,%%xmm5\n\t" \ + /*xmm0=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + "pcmpeqw %%xmm15,%%xmm0\n\t" \ + "psubw %%xmm14,%%xmm0\n\t" \ + "paddw %%xmm5,%%xmm0\n\t" \ + "movdqa %%xmm8,%%xmm5\n\t" \ + "psraw $1,%%xmm0\n\t" \ + /*xmm5=t5'''=t4'-s*/ \ + "psubw %%xmm0,%%xmm5\n\t" \ + /*xmm8=t4''=t4'+s*/ \ + "paddw %%xmm0,%%xmm8\n\t" \ + /*xmm0,xmm7,xmm9,xmm10 are free.*/ \ + /*xmm7:xmm9=t6''*27146+0xB500*/ \ + "movdqa %%xmm6,%%xmm7\n\t" \ + "movdqa %%xmm6,%%xmm9\n\t" \ + "punpckhwd %%xmm12,%%xmm7\n\t" \ + "pmaddwd %%xmm13,%%xmm7\n\t" \ + "punpcklwd %%xmm12,%%xmm9\n\t" \ + "pmaddwd %%xmm13,%%xmm9\n\t" \ + /*xmm9=(t6''*27146+0xB500>>16)+t6''*/ \ + "psrad $16,%%xmm7\n\t" \ + "psrad $16,%%xmm9\n\t" \ + "packssdw %%xmm7,%%xmm9\n\t" \ + "paddw %%xmm6,%%xmm9\n\t" \ + /*xmm9=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + "pcmpeqw %%xmm15,%%xmm6\n\t" \ + "psubw %%xmm14,%%xmm6\n\t" \ + "paddw %%xmm6,%%xmm9\n\t" \ + "movdqa %%xmm11,%%xmm7\n\t" \ + "psraw $1,%%xmm9\n\t" \ + /*xmm7=t6'''=t7'-s*/ \ + "psubw %%xmm9,%%xmm7\n\t" \ + /*xmm9=t7''=t7'+s*/ \ + "paddw %%xmm11,%%xmm9\n\t" \ + /*xmm0,xmm6,xmm10,xmm11 are free.*/ \ + /*Stage 4:*/ \ + /*xmm10:xmm0=t1''*27146+0xB500*/ \ + "movdqa %%xmm1,%%xmm0\n\t" \ + "movdqa %%xmm1,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm0\n\t" \ + "pmaddwd %%xmm13,%%xmm0\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + /*xmm0=(t1''*27146+0xB500>>16)+t1''*/ \ + "psrad $16,%%xmm0\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "mov $0x20006A0A,%[a]\n\t" \ + "packssdw %%xmm10,%%xmm0\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm1,%%xmm0\n\t" \ + /*xmm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "paddw %%xmm1,%%xmm0\n\t" \ + /*xmm10:xmm4=t0''*27146+0x4000*/ \ + "movdqa %%xmm4,%%xmm1\n\t" \ + "movdqa %%xmm4,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm4\n\t" \ + "pmaddwd %%xmm13,%%xmm4\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + /*xmm4=(t0''*27146+0x4000>>16)+t0''*/ \ + "psrad $16,%%xmm4\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "mov $0x6CB7,%[a]\n\t" \ + "packssdw %%xmm10,%%xmm4\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "paddw %%xmm1,%%xmm4\n\t" \ + /*xmm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "mov $0x7FFF6C84,%[a]\n\t" \ + "paddw %%xmm1,%%xmm4\n\t" \ + /*xmm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + "movdqa %%xmm0,%%xmm6\n\t" \ + "pxor %%xmm4,%%xmm0\n\t" \ + "pand %%xmm4,%%xmm6\n\t" \ + "psraw $1,%%xmm0\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm6,%%xmm0\n\t" \ + /*xmm4=_y[4]=v=r-u*/ \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm0,%%xmm4\n\t" \ + /*xmm1,xmm6,xmm10,xmm11 are free.*/ \ + /*xmm6:xmm10=60547*t3''+0x6CB7*/ \ + "movdqa %%xmm3,%%xmm10\n\t" \ + "movdqa %%xmm3,%%xmm6\n\t" \ + "punpcklwd %%xmm3,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x61F861F8,%[a]\n\t" \ + "punpckhwd %%xmm3,%%xmm6\n\t" \ + "pmaddwd %%xmm13,%%xmm6\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm6\n\t" \ + /*xmm1:xmm2=25080*t2'' \ + xmm12=t2''*/ \ + "movdqa %%xmm2,%%xmm11\n\t" \ + "movdqa %%xmm2,%%xmm12\n\t" \ + "pmullw %%xmm13,%%xmm2\n\t" \ + "pmulhw %%xmm13,%%xmm11\n\t" \ + "movdqa %%xmm2,%%xmm1\n\t" \ + "punpcklwd %%xmm11,%%xmm2\n\t" \ + "punpckhwd %%xmm11,%%xmm1\n\t" \ + /*xmm10=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + "paddd %%xmm2,%%xmm10\n\t" \ + "paddd %%xmm1,%%xmm6\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm3\n\t" \ + "psrad $16,%%xmm6\n\t" \ + "psubw %%xmm14,%%xmm3\n\t" \ + "packssdw %%xmm6,%%xmm10\n\t" \ + "paddw %%xmm3,%%xmm10\n\t" \ + /*xmm2=_y[2]=u \ + xmm10=s=(25080*u>>16)-t2''*/ \ + "movdqa %%xmm10,%%xmm2\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "psubw %%xmm12,%%xmm10\n\t" \ + /*xmm1:xmm6=s*21600+0x2800*/ \ + "pxor %%xmm12,%%xmm12\n\t" \ + "psubw %%xmm14,%%xmm12\n\t" \ + "mov $0x28005460,%[a]\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "movdqa %%xmm10,%%xmm6\n\t" \ + "movdqa %%xmm10,%%xmm1\n\t" \ + "punpcklwd %%xmm12,%%xmm6\n\t" \ + "pmaddwd %%xmm13,%%xmm6\n\t" \ + "mov $0x0E3D,%[a]\n\t" \ + "punpckhwd %%xmm12,%%xmm1\n\t" \ + "pmaddwd %%xmm13,%%xmm1\n\t" \ + /*xmm6=(s*21600+0x2800>>18)+s*/ \ + "psrad $18,%%xmm6\n\t" \ + "psrad $18,%%xmm1\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "packssdw %%xmm1,%%xmm6\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "paddw %%xmm10,%%xmm6\n\t" \ + /*xmm6=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + "mov $0x7FFF54DC,%[a]\n\t" \ + "pcmpeqw %%xmm15,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddw %%xmm10,%%xmm6\n\t " \ + /*xmm1,xmm3,xmm10,xmm11 are free.*/ \ + /*xmm11:xmm10=54491*t5'''+0x0E3D*/ \ + "movdqa %%xmm5,%%xmm10\n\t" \ + "movdqa %%xmm5,%%xmm11\n\t" \ + "punpcklwd %%xmm5,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x8E3A8E3A,%[a]\n\t" \ + "punpckhwd %%xmm5,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + /*xmm7:xmm12=36410*t6''' \ + xmm1=t6'''*/ \ + "movdqa %%xmm7,%%xmm3\n\t" \ + "movdqa %%xmm7,%%xmm1\n\t" \ + "pmulhw %%xmm13,%%xmm3\n\t" \ + "pmullw %%xmm13,%%xmm7\n\t" \ + "paddw %%xmm1,%%xmm3\n\t" \ + "movdqa %%xmm7,%%xmm12\n\t" \ + "punpckhwd %%xmm3,%%xmm7\n\t" \ + "punpcklwd %%xmm3,%%xmm12\n\t" \ + /*xmm10=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + "paddd %%xmm12,%%xmm10\n\t" \ + "paddd %%xmm7,%%xmm11\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm5\n\t" \ + "psrad $16,%%xmm11\n\t" \ + "psubw %%xmm14,%%xmm5\n\t" \ + "packssdw %%xmm11,%%xmm10\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + "paddw %%xmm5,%%xmm10\n\t" \ + /*xmm5=_y[5]=u \ + xmm1=s=t6'''-(36410*u>>16)*/ \ + "psubw %%xmm14,%%xmm12\n\t" \ + "movdqa %%xmm10,%%xmm5\n\t" \ + "mov $0x340067C8,%[a]\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm5,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm10,%%xmm1\n\t" \ + /*xmm11:xmm3=s*26568+0x3400*/ \ + "movdqa %%xmm1,%%xmm3\n\t" \ + "movdqa %%xmm1,%%xmm11\n\t" \ + "punpcklwd %%xmm12,%%xmm3\n\t" \ + "pmaddwd %%xmm13,%%xmm3\n\t" \ + "mov $0x7B1B,%[a]\n\t" \ + "punpckhwd %%xmm12,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + /*xmm3=(s*26568+0x3400>>17)+s*/ \ + "psrad $17,%%xmm3\n\t" \ + "psrad $17,%%xmm11\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "packssdw %%xmm11,%%xmm3\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "paddw %%xmm1,%%xmm3\n\t" \ + /*xmm3=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + "mov $0x7FFF7B16,%[a]\n\t" \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddw %%xmm1,%%xmm3\n\t " \ + /*xmm1,xmm7,xmm10,xmm11 are free.*/ \ + /*xmm11:xmm10=64277*t7''+0x7B1B*/ \ + "movdqa %%xmm9,%%xmm10\n\t" \ + "movdqa %%xmm9,%%xmm11\n\t" \ + "punpcklwd %%xmm9,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x31F131F1,%[a]\n\t" \ + "punpckhwd %%xmm9,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + /*xmm12:xmm7=12785*t4''*/ \ + "movdqa %%xmm8,%%xmm7\n\t" \ + "movdqa %%xmm8,%%xmm1\n\t" \ + "pmullw %%xmm13,%%xmm7\n\t" \ + "pmulhw %%xmm13,%%xmm1\n\t" \ + "movdqa %%xmm7,%%xmm12\n\t" \ + "punpcklwd %%xmm1,%%xmm7\n\t" \ + "punpckhwd %%xmm1,%%xmm12\n\t" \ + /*xmm10=u=(12785*t4''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + "paddd %%xmm7,%%xmm10\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm9\n\t" \ + "psrad $16,%%xmm11\n\t" \ + "psubw %%xmm14,%%xmm9\n\t" \ + "packssdw %%xmm11,%%xmm10\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + "paddw %%xmm9,%%xmm10\n\t" \ + /*xmm1=_y[1]=u \ + xmm10=s=(12785*u>>16)-t4''*/ \ + "psubw %%xmm14,%%xmm12\n\t" \ + "movdqa %%xmm10,%%xmm1\n\t" \ + "mov $0x3000503B,%[a]\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm8,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + /*xmm8:xmm7=s*20539+0x3000*/ \ + "movdqa %%xmm10,%%xmm7\n\t" \ + "movdqa %%xmm10,%%xmm8\n\t" \ + "punpcklwd %%xmm12,%%xmm7\n\t" \ + "pmaddwd %%xmm13,%%xmm7\n\t" \ + "punpckhwd %%xmm12,%%xmm8\n\t" \ + "pmaddwd %%xmm13,%%xmm8\n\t" \ + /*xmm7=(s*20539+0x3000>>20)+s*/ \ + "psrad $20,%%xmm7\n\t" \ + "psrad $20,%%xmm8\n\t" \ + "packssdw %%xmm8,%%xmm7\n\t" \ + "paddw %%xmm10,%%xmm7\n\t" \ + /*xmm7=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm10\n\t" \ + "psubw %%xmm14,%%xmm10\n\t" \ + "paddw %%xmm10,%%xmm7\n\t " \ + +# define OC_TRANSPOSE8x8 \ + "#OC_TRANSPOSE8x8\n\t" \ + "movdqa %%xmm4,%%xmm8\n\t" \ + /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \ + "punpcklwd %%xmm5,%%xmm4\n\t" \ + /*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \ + "punpckhwd %%xmm5,%%xmm8\n\t" \ + /*xmm5 is free.*/ \ + "movdqa %%xmm0,%%xmm5\n\t" \ + /*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \ + "punpcklwd %%xmm1,%%xmm0\n\t" \ + /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \ + "punpckhwd %%xmm1,%%xmm5\n\t" \ + /*xmm1 is free.*/ \ + "movdqa %%xmm6,%%xmm1\n\t" \ + /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \ + "punpcklwd %%xmm7,%%xmm6\n\t" \ + /*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \ + "punpckhwd %%xmm7,%%xmm1\n\t" \ + /*xmm7 is free.*/ \ + "movdqa %%xmm2,%%xmm7\n\t" \ + /*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \ + "punpcklwd %%xmm3,%%xmm7\n\t" \ + /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \ + "punpckhwd %%xmm3,%%xmm2\n\t" \ + /*xmm3 is free.*/ \ + "movdqa %%xmm0,%%xmm3\n\t" \ + /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \ + "punpckldq %%xmm7,%%xmm0\n\t" \ + /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \ + "punpckhdq %%xmm7,%%xmm3\n\t" \ + /*xmm7 is free.*/ \ + "movdqa %%xmm5,%%xmm7\n\t" \ + /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \ + "punpckldq %%xmm2,%%xmm5\n\t" \ + /*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \ + "punpckhdq %%xmm2,%%xmm7\n\t" \ + /*xmm2 is free.*/ \ + "movdqa %%xmm4,%%xmm2\n\t" \ + /*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \ + "punpckldq %%xmm6,%%xmm2\n\t" \ + /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \ + "punpckhdq %%xmm6,%%xmm4\n\t" \ + /*xmm6 is free.*/ \ + "movdqa %%xmm8,%%xmm6\n\t" \ + /*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \ + "punpckldq %%xmm1,%%xmm6\n\t" \ + /*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \ + "punpckhdq %%xmm1,%%xmm8\n\t" \ + /*xmm1 is free.*/ \ + "movdqa %%xmm0,%%xmm1\n\t" \ + /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \ + "punpcklqdq %%xmm2,%%xmm0\n\t" \ + /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \ + "punpckhqdq %%xmm2,%%xmm1\n\t" \ + /*xmm2 is free.*/ \ + "movdqa %%xmm3,%%xmm2\n\t" \ + /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \ + "punpcklqdq %%xmm4,%%xmm2\n\t" \ + /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \ + "punpckhqdq %%xmm4,%%xmm3\n\t" \ + /*xmm4 is free.*/ \ + "movdqa %%xmm5,%%xmm4\n\t" \ + /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \ + "punpcklqdq %%xmm6,%%xmm4\n\t" \ + /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \ + "punpckhqdq %%xmm6,%%xmm5\n\t" \ + /*xmm6 is free.*/ \ + "movdqa %%xmm7,%%xmm6\n\t" \ + /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \ + "punpcklqdq %%xmm8,%%xmm6\n\t" \ + /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \ + "punpckhqdq %%xmm8,%%xmm7\n\t" \ + /*xmm8 is free.*/ \ + +/*SSE2 implementation of the fDCT for x86-64 only. + Because of the 8 extra XMM registers on x86-64, this version can operate + without any temporary stack access at all.*/ +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm__ __volatile__( + /*Load the input.*/ + "movdqa 0x00(%[x]),%%xmm0\n\t" + "movdqa 0x10(%[x]),%%xmm1\n\t" + "movdqa 0x20(%[x]),%%xmm2\n\t" + "movdqa 0x30(%[x]),%%xmm3\n\t" + "movdqa 0x40(%[x]),%%xmm4\n\t" + "movdqa 0x50(%[x]),%%xmm5\n\t" + "movdqa 0x60(%[x]),%%xmm6\n\t" + "movdqa 0x70(%[x]),%%xmm7\n\t" + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add a few biases to correct for some systematic error that + remains in the full fDCT->iDCT round trip.*/ + /*xmm15={0}x8*/ + "pxor %%xmm15,%%xmm15\n\t" + /*xmm14={-1}x8*/ + "pcmpeqb %%xmm14,%%xmm14\n\t" + "psllw $2,%%xmm0\n\t" + /*xmm8=xmm0*/ + "movdqa %%xmm0,%%xmm8\n\t" + "psllw $2,%%xmm1\n\t" + /*xmm8={_x[7...0]==0}*/ + "pcmpeqw %%xmm15,%%xmm8\n\t" + "psllw $2,%%xmm2\n\t" + /*xmm8={_x[7...0]!=0}*/ + "psubw %%xmm14,%%xmm8\n\t" + "psllw $2,%%xmm3\n\t" + /*%[a]=1*/ + "mov $1,%[a]\n\t" + /*xmm8={_x[6]!=0,0,_x[4]!=0,0,_x[2]!=0,0,_x[0]!=0,0}*/ + "pslld $16,%%xmm8\n\t" + "psllw $2,%%xmm4\n\t" + /*xmm9={0,0,0,0,0,0,0,1}*/ + "movd %[a],%%xmm9\n\t" + /*xmm8={0,0,_x[2]!=0,0,_x[0]!=0,0}*/ + "pshufhw $0x00,%%xmm8,%%xmm8\n\t" + "psllw $2,%%xmm5\n\t" + /*%[a]={1}x2*/ + "mov $0x10001,%[a]\n\t" + /*xmm8={0,0,0,0,0,0,0,_x[0]!=0}*/ + "pshuflw $0x01,%%xmm8,%%xmm8\n\t" + "psllw $2,%%xmm6\n\t" + /*xmm10={0,0,0,0,0,0,1,1}*/ + "movd %[a],%%xmm10\n\t" + /*xmm0=_x[7...0]+{0,0,0,0,0,0,0,_x[0]!=0}*/ + "paddw %%xmm8,%%xmm0\n\t" + "psllw $2,%%xmm7\n\t" + /*xmm0=_x[7...0]+{0,0,0,0,0,0,1,(_x[0]!=0)+1}*/ + "paddw %%xmm10,%%xmm0\n\t" + /*xmm1=_x[15...8]-{0,0,0,0,0,0,0,1}*/ + "psubw %%xmm9,%%xmm1\n\t" + /*Transform columns.*/ + OC_FDCT8x8 + /*Transform rows.*/ + OC_TRANSPOSE8x8 + OC_FDCT8x8 + /*TODO: zig-zag ordering?*/ + OC_TRANSPOSE8x8 + /*xmm14={-2,-2,-2,-2,-2,-2,-2,-2}*/ + "paddw %%xmm14,%%xmm14\n\t" + "psubw %%xmm14,%%xmm0\n\t" + "psubw %%xmm14,%%xmm1\n\t" + "psraw $2,%%xmm0\n\t" + "psubw %%xmm14,%%xmm2\n\t" + "psraw $2,%%xmm1\n\t" + "psubw %%xmm14,%%xmm3\n\t" + "psraw $2,%%xmm2\n\t" + "psubw %%xmm14,%%xmm4\n\t" + "psraw $2,%%xmm3\n\t" + "psubw %%xmm14,%%xmm5\n\t" + "psraw $2,%%xmm4\n\t" + "psubw %%xmm14,%%xmm6\n\t" + "psraw $2,%%xmm5\n\t" + "psubw %%xmm14,%%xmm7\n\t" + "psraw $2,%%xmm6\n\t" + "psraw $2,%%xmm7\n\t" + /*Store the result.*/ + "movdqa %%xmm0,0x00(%[y])\n\t" + "movdqa %%xmm1,0x10(%[y])\n\t" + "movdqa %%xmm2,0x20(%[y])\n\t" + "movdqa %%xmm3,0x30(%[y])\n\t" + "movdqa %%xmm4,0x40(%[y])\n\t" + "movdqa %%xmm5,0x50(%[y])\n\t" + "movdqa %%xmm6,0x60(%[y])\n\t" + "movdqa %%xmm7,0x70(%[y])\n\t" + :[a]"=&r"(a) + :[y]"r"(_y),[x]"r"(_x) + :"memory" + ); +} +#endif diff --git a/Engine/lib/libtheora/lib/x86/x86enc.c b/Engine/lib/libtheora/lib/x86/x86enc.c new file mode 100644 index 000000000..43b7be3ea --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/x86enc.c @@ -0,0 +1,49 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +#include "../cpu.c" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){ + ogg_uint32_t cpu_flags; + cpu_flags=oc_cpu_flags_get(); + oc_enc_vtable_init_c(_enc); + if(cpu_flags&OC_CPU_X86_MMX){ + _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; + } + if(cpu_flags&OC_CPU_X86_MMXEXT){ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; + } + if(cpu_flags&OC_CPU_X86_SSE2){ +# if defined(OC_X86_64_ASM) + /*_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;*/ +# endif + } +} +#endif diff --git a/Engine/lib/libtheora/lib/x86/x86enc.h b/Engine/lib/libtheora/lib/x86/x86enc.h new file mode 100644 index 000000000..06c3908bc --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/x86enc.h @@ -0,0 +1,47 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ + +#if !defined(_x86_x86enc_H) +# define _x86_x86enc_H (1) +# include "../encint.h" +# include "x86int.h" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); +void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,const unsigned char *_y,int _stride); +void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,int _stride); +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/Engine/lib/libtheora/lib/x86/x86int.h b/Engine/lib/libtheora/lib/x86/x86int.h new file mode 100644 index 000000000..ede724f5a --- /dev/null +++ b/Engine/lib/libtheora/lib/x86/x86int.h @@ -0,0 +1,42 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_x86_x86int_H) +# define _x86_x86int_H (1) +# include "../internal.h" + +void oc_state_vtable_init_x86(oc_theora_state *_state); + +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue); +void oc_frag_recon_inter_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_mmx(void); + +#endif diff --git a/Engine/lib/libtheora/lib/dec/x86/x86state.c b/Engine/lib/libtheora/lib/x86/x86state.c similarity index 58% rename from Engine/lib/libtheora/lib/dec/x86/x86state.c rename to Engine/lib/libtheora/lib/x86/x86state.c index 28a559ba4..a786bec28 100644 --- a/Engine/lib/libtheora/lib/dec/x86/x86state.c +++ b/Engine/lib/libtheora/lib/x86/x86state.c @@ -5,33 +5,57 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $ + last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include "x86int.h" -#if defined(USE_ASM) +#if defined(OC_X86_ASM) -#include "../../cpu.c" +#include "../cpu.c" + +/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into + each quadrant of the destination.*/ +static const unsigned char OC_FZIG_ZAG_MMX[128]={ + 0, 8, 1, 2, 9,16,24,17, + 10, 3,32,11,18,25, 4,12, + 5,26,19,40,33,34,41,48, + 27, 6,13,20,28,21,14, 7, + 56,49,42,35,43,50,57,36, + 15,22,29,30,23,44,37,58, + 51,59,38,45,52,31,60,53, + 46,39,47,54,61,62,55,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, +}; void oc_state_vtable_init_x86(oc_theora_state *_state){ _state->cpu_flags=oc_cpu_flags_get(); if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; - _state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; _state->opt_vtable.state_loop_filter_frag_rows= oc_state_loop_filter_frag_rows_mmx; _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; } else oc_state_vtable_init_c(_state); } diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxencfrag.c b/Engine/lib/libtheora/lib/x86_vc/mmxencfrag.c new file mode 100644 index 000000000..94f1d0651 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxencfrag.c @@ -0,0 +1,969 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ + + ********************************************************************/ +#include +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + ptrdiff_t ret; + __asm{ +#define SRC esi +#define REF edx +#define YSTRIDE ecx +#define YSTRIDE3 edi + mov YSTRIDE,_ystride + mov SRC,_src + mov REF,_ref + /*Load the first 4 rows of each block.*/ + movq mm0,[SRC] + movq mm1,[REF] + movq mm2,[SRC][YSTRIDE] + movq mm3,[REF][YSTRIDE] + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + movq mm4,[SRC+YSTRIDE*2] + movq mm5,[REF+YSTRIDE*2] + movq mm6,[SRC+YSTRIDE3] + movq mm7,[REF+YSTRIDE3] + /*Compute their SADs and add them in mm0*/ + psadbw mm0,mm1 + psadbw mm2,mm3 + lea SRC,[SRC+YSTRIDE*4] + paddw mm0,mm2 + lea REF,[REF+YSTRIDE*4] + /*Load the next 3 rows as registers become available.*/ + movq mm2,[SRC] + movq mm3,[REF] + psadbw mm4,mm5 + psadbw mm6,mm7 + paddw mm0,mm4 + movq mm5,[REF+YSTRIDE] + movq mm4,[SRC+YSTRIDE] + paddw mm0,mm6 + movq mm7,[REF+YSTRIDE*2] + movq mm6,[SRC+YSTRIDE*2] + /*Start adding their SADs to mm0*/ + psadbw mm2,mm3 + psadbw mm4,mm5 + paddw mm0,mm2 + psadbw mm6,mm7 + /*Load last row as registers become available.*/ + movq mm2,[SRC+YSTRIDE3] + movq mm3,[REF+YSTRIDE3] + /*And finish adding up their SADs.*/ + paddw mm0,mm4 + psadbw mm2,mm3 + paddw mm0,mm6 + paddw mm0,mm2 + movd [ret],mm0 +#undef SRC +#undef REF +#undef YSTRIDE +#undef YSTRIDE3 + } + return (unsigned)ret; +} + +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + /*Early termination is for suckers.*/ + return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); +} + +#define OC_SAD2_LOOP __asm{ \ + /*We want to compute (mm0+mm1>>1) on unsigned bytes without overflow, but \ + pavgb computes (mm0+mm1+1>>1). \ + The latter is exactly 1 too large when the low bit of two corresponding \ + bytes is only set in one of them. \ + Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ + correct the output of pavgb.*/ \ + __asm movq mm6,mm0 \ + __asm lea REF1,[REF1+YSTRIDE*2] \ + __asm pxor mm0,mm1 \ + __asm pavgb mm6,mm1 \ + __asm lea REF2,[REF2+YSTRIDE*2] \ + __asm movq mm1,mm2 \ + __asm pand mm0,mm7 \ + __asm pavgb mm2,mm3 \ + __asm pxor mm1,mm3 \ + __asm movq mm3,[REF2+YSTRIDE] \ + __asm psubb mm6,mm0 \ + __asm movq mm0,[REF1] \ + __asm pand mm1,mm7 \ + __asm psadbw mm4,mm6 \ + __asm movd mm6,RET \ + __asm psubb mm2,mm1 \ + __asm movq mm1,[REF2] \ + __asm lea SRC,[SRC+YSTRIDE*2] \ + __asm psadbw mm5,mm2 \ + __asm movq mm2,[REF1+YSTRIDE] \ + __asm paddw mm5,mm4 \ + __asm movq mm4,[SRC] \ + __asm paddw mm6,mm5 \ + __asm movq mm5,[SRC+YSTRIDE] \ + __asm movd RET,mm6 \ +} + +/*Same as above, but does not pre-load the next two rows.*/ +#define OC_SAD2_TAIL __asm{ \ + __asm movq mm6,mm0 \ + __asm pavgb mm0,mm1 \ + __asm pxor mm6,mm1 \ + __asm movq mm1,mm2 \ + __asm pand mm6,mm7 \ + __asm pavgb mm2,mm3 \ + __asm pxor mm1,mm3 \ + __asm psubb mm0,mm6 \ + __asm pand mm1,mm7 \ + __asm psadbw mm4,mm0 \ + __asm psubb mm2,mm1 \ + __asm movd mm6,RET \ + __asm psadbw mm5,mm2 \ + __asm paddw mm5,mm4 \ + __asm paddw mm6,mm5 \ + __asm movd RET,mm6 \ +} + +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ptrdiff_t ret; + __asm{ +#define REF1 ecx +#define REF2 edi +#define YSTRIDE esi +#define SRC edx +#define RET eax + mov YSTRIDE,_ystride + mov SRC,_src + mov REF1,_ref1 + mov REF2,_ref2 + movq mm0,[REF1] + movq mm1,[REF2] + movq mm2,[REF1+YSTRIDE] + movq mm3,[REF2+YSTRIDE] + xor RET,RET + movq mm4,[SRC] + pxor mm7,mm7 + pcmpeqb mm6,mm6 + movq mm5,[SRC+YSTRIDE] + psubb mm7,mm6 + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_TAIL + mov [ret],RET +#undef REF1 +#undef REF2 +#undef YSTRIDE +#undef SRC +#undef RET + } + return (unsigned)ret; +} + +/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their + 16-bit difference in mm0...mm7.*/ +#define OC_LOAD_SUB_8x4(_off) __asm{ \ + __asm movd mm0,[_off+SRC] \ + __asm movd mm4,[_off+REF] \ + __asm movd mm1,[_off+SRC+SRC_YSTRIDE] \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm movd mm5,[_off+REF+REF_YSTRIDE] \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm movd mm2,[_off+SRC] \ + __asm movd mm7,[_off+REF] \ + __asm movd mm3,[_off+SRC+SRC_YSTRIDE] \ + __asm movd mm6,[_off+REF+REF_YSTRIDE] \ + __asm punpcklbw mm0,mm4 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm punpcklbw mm4,mm4 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm psubw mm0,mm4 \ + __asm movd mm4,[_off+SRC] \ + __asm movq [_off*2+BUF],mm0 \ + __asm movd mm0,[_off+REF] \ + __asm punpcklbw mm1,mm5 \ + __asm punpcklbw mm5,mm5 \ + __asm psubw mm1,mm5 \ + __asm movd mm5,[_off+SRC+SRC_YSTRIDE] \ + __asm punpcklbw mm2,mm7 \ + __asm punpcklbw mm7,mm7 \ + __asm psubw mm2,mm7 \ + __asm movd mm7,[_off+REF+REF_YSTRIDE] \ + __asm punpcklbw mm3,mm6 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm punpcklbw mm6,mm6 \ + __asm psubw mm3,mm6 \ + __asm movd mm6,[_off+SRC] \ + __asm punpcklbw mm4,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm punpcklbw mm0,mm0 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm psubw mm4,mm0 \ + __asm movd mm0,[_off+REF] \ + __asm punpcklbw mm5,mm7 \ + __asm neg SRC_YSTRIDE \ + __asm punpcklbw mm7,mm7 \ + __asm psubw mm5,mm7 \ + __asm movd mm7,[_off+SRC+SRC_YSTRIDE] \ + __asm punpcklbw mm6,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm punpcklbw mm0,mm0 \ + __asm neg REF_YSTRIDE \ + __asm psubw mm6,mm0 \ + __asm movd mm0,[_off+REF+REF_YSTRIDE] \ + __asm lea SRC,[SRC+SRC_YSTRIDE*8] \ + __asm punpcklbw mm7,mm0 \ + __asm neg SRC_YSTRIDE \ + __asm punpcklbw mm0,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*8] \ + __asm psubw mm7,mm0 \ + __asm neg REF_YSTRIDE \ + __asm movq mm0,[_off*2+BUF] \ +} + +/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ +#define OC_LOAD_8x4(_off) __asm{ \ + __asm movd mm0,[_off+SRC] \ + __asm movd mm1,[_off+SRC+YSTRIDE] \ + __asm movd mm2,[_off+SRC+YSTRIDE*2] \ + __asm pxor mm7,mm7 \ + __asm movd mm3,[_off+SRC+YSTRIDE3] \ + __asm punpcklbw mm0,mm7 \ + __asm movd mm4,[_off+SRC4] \ + __asm punpcklbw mm1,mm7 \ + __asm movd mm5,[_off+SRC4+YSTRIDE] \ + __asm punpcklbw mm2,mm7 \ + __asm movd mm6,[_off+SRC4+YSTRIDE*2] \ + __asm punpcklbw mm3,mm7 \ + __asm movd mm7,[_off+SRC4+YSTRIDE3] \ + __asm punpcklbw mm4,mm4 \ + __asm punpcklbw mm5,mm5 \ + __asm psrlw mm4,8 \ + __asm psrlw mm5,8 \ + __asm punpcklbw mm6,mm6 \ + __asm punpcklbw mm7,mm7 \ + __asm psrlw mm6,8 \ + __asm psrlw mm7,8 \ +} + +/*Performs the first two stages of an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to + perform this stage in place with no temporary registers).*/ +#define OC_HADAMARD_AB_8x4 __asm{ \ + /*Stage A: \ + Outputs 0-3 are swapped with 4-7 here.*/ \ + __asm paddw mm5,mm1 \ + __asm paddw mm6,mm2 \ + __asm paddw mm1,mm1 \ + __asm paddw mm2,mm2 \ + __asm psubw mm1,mm5 \ + __asm psubw mm2,mm6 \ + __asm paddw mm7,mm3 \ + __asm paddw mm4,mm0 \ + __asm paddw mm3,mm3 \ + __asm paddw mm0,mm0 \ + __asm psubw mm3,mm7 \ + __asm psubw mm0,mm4 \ + /*Stage B:*/ \ + __asm paddw mm0,mm2 \ + __asm paddw mm1,mm3 \ + __asm paddw mm4,mm6 \ + __asm paddw mm5,mm7 \ + __asm paddw mm2,mm2 \ + __asm paddw mm3,mm3 \ + __asm paddw mm6,mm6 \ + __asm paddw mm7,mm7 \ + __asm psubw mm2,mm0 \ + __asm psubw mm3,mm1 \ + __asm psubw mm6,mm4 \ + __asm psubw mm7,mm5 \ +} + +/*Performs the last stage of an 8-point 1-D Hadamard transform in place. + Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in + place with no temporary registers).*/ +#define OC_HADAMARD_C_8x4 __asm{ \ + /*Stage C:*/ \ + __asm paddw mm0,mm1 \ + __asm paddw mm2,mm3 \ + __asm paddw mm4,mm5 \ + __asm paddw mm6,mm7 \ + __asm paddw mm1,mm1 \ + __asm paddw mm3,mm3 \ + __asm paddw mm5,mm5 \ + __asm paddw mm7,mm7 \ + __asm psubw mm1,mm0 \ + __asm psubw mm3,mm2 \ + __asm psubw mm5,mm4 \ + __asm psubw mm7,mm6 \ +} + +/*Performs an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform + in place with no temporary registers).*/ +#define OC_HADAMARD_8x4 __asm{ \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_8x4 \ +} + +/*Performs the first part of the final stage of the Hadamard transform and + summing of absolute values. + At the end of this part, mm1 will contain the DC coefficient of the + transform.*/ +#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) __asm{ \ + /*We use the fact that \ + (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ + to merge the final butterfly with the abs and the first stage of \ + accumulation. \ + Thus we can avoid using pabsw, which is not available until SSSE3. \ + Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ + implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ + registers). \ + Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ + This implementation is only 26 (+4 for spilling registers).*/ \ + __asm movq [_r7+BUF],mm7 \ + __asm movq [_r6+BUF],mm6 \ + /*mm7={0x7FFF}x4 \ + mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ + __asm pcmpeqb mm7,mm7 \ + __asm movq mm6,mm0 \ + __asm psrlw mm7,1 \ + __asm paddw mm6,mm1 \ + __asm pmaxsw mm0,mm1 \ + __asm paddsw mm6,mm7 \ + __asm psubw mm0,mm6 \ + /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ + mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ + __asm movq mm6,mm2 \ + __asm movq mm1,mm4 \ + __asm pmaxsw mm2,mm3 \ + __asm pmaxsw mm4,mm5 \ + __asm paddw mm6,mm3 \ + __asm paddw mm1,mm5 \ + __asm movq mm3,[_r7+BUF] \ +} + +/*Performs the second part of the final stage of the Hadamard transform and + summing of absolute values.*/ +#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) __asm{ \ + __asm paddsw mm6,mm7 \ + __asm movq mm5,[_r6+BUF] \ + __asm paddsw mm1,mm7 \ + __asm psubw mm2,mm6 \ + __asm psubw mm4,mm1 \ + /*mm7={1}x4 (needed for the horizontal add that follows) \ + mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ + __asm movq mm6,mm3 \ + __asm pmaxsw mm3,mm5 \ + __asm paddw mm0,mm2 \ + __asm paddw mm6,mm5 \ + __asm paddw mm0,mm4 \ + __asm paddsw mm6,mm7 \ + __asm paddw mm0,mm3 \ + __asm psrlw mm7,14 \ + __asm psubw mm0,mm6 \ +} + +/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the + absolute value of each component, and accumulates everything into mm0. + This is the only portion of SATD which requires MMXEXT (we could use plain + MMX, but it takes 4 instructions and an extra register to work around the + lack of a pmaxsw, which is a pretty serious penalty).*/ +#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ + OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ +} + +/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each + component, and accumulates everything into mm0. + Note that mm0 will have an extra 4 added to each column, and that after + removing this value, the remainder will be half the conventional value.*/ +#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ +} + +/*Performs two 4x4 transposes (mostly) in place. + On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} + contains rows {a,b,c,d}. + On output, {0x40,0x50,0x60,0x70}+_off+BUF contains {e,f,g,h}^T, and + {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ +#define OC_TRANSPOSE_4x4x2(_off) __asm{ \ + /*First 4x4 transpose:*/ \ + __asm movq [0x10+_off+BUF],mm5 \ + /*mm0 = e3 e2 e1 e0 \ + mm1 = f3 f2 f1 f0 \ + mm2 = g3 g2 g1 g0 \ + mm3 = h3 h2 h1 h0*/ \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm2,mm3 \ + __asm punpckhwd mm5,mm3 \ + __asm movq mm3,mm0 \ + __asm punpcklwd mm0,mm1 \ + __asm punpckhwd mm3,mm1 \ + /*mm0 = f1 e1 f0 e0 \ + mm3 = f3 e3 f2 e2 \ + mm2 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm2 \ + __asm punpckhdq mm1,mm2 \ + __asm movq mm2,mm3 \ + __asm punpckhdq mm3,mm5 \ + __asm movq [0x40+_off+BUF],mm0 \ + __asm punpckldq mm2,mm5 \ + /*mm0 = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + __asm movq mm5,[0x10+_off+BUF] \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm5 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm7 = d3 d2 d1 d0*/ \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm7 \ + __asm movq [0x50+_off+BUF],mm1 \ + __asm punpckhwd mm0,mm7 \ + __asm movq mm7,mm4 \ + __asm punpcklwd mm4,mm5 \ + __asm movq [0x60+_off+BUF],mm2 \ + __asm punpckhwd mm7,mm5 \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm movq [0x70+_off+BUF],mm3 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm7 \ + __asm punpckhdq mm7,mm0 \ + __asm punpckldq mm6,mm0 \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ +} + +static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, + int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret1; + unsigned ret2; + bufp=buf; + __asm{ +#define SRC esi +#define REF eax +#define SRC_YSTRIDE ecx +#define REF_YSTRIDE edx +#define BUF edi +#define RET eax +#define RET2 edx + mov SRC,_src + mov SRC_YSTRIDE,_src_ystride + mov REF,_ref + mov REF_YSTRIDE,_ref_ystride + mov BUF,bufp + OC_LOAD_SUB_8x4(0x00) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x00) + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + movq [0x00+BUF],mm4 + movq [0x10+BUF],mm5 + movq [0x20+BUF],mm6 + movq [0x30+BUF],mm7 + OC_LOAD_SUB_8x4(0x04) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x08) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + movq mm1,[0x10+BUF] + movq mm2,[0x20+BUF] + movq mm3,[0x30+BUF] + movq mm0,[0x00+BUF] + OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38) + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + mov RET2,_thresh + pmaddwd mm0,mm7 + movq mm1,[0x50+BUF] + movq mm5,[0x58+BUF] + movq mm4,mm0 + movq mm2,[0x60+BUF] + punpckhdq mm0,mm0 + movq mm6,[0x68+BUF] + paddd mm4,mm0 + movq mm3,[0x70+BUF] + movd RET,mm4 + movq mm7,[0x78+BUF] + /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 + added to them, and a factor of two removed; correct the final sum here.*/ + lea RET,[RET+RET-32] + movq mm0,[0x40+BUF] + cmp RET,RET2 + movq mm4,[0x48+BUF] + jae at_end + OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) + pmaddwd mm0,mm7 + /*There isn't much to stick in here to hide the latency this time, but the + alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose + latency is even worse.*/ + sub RET,32 + movq mm4,mm0 + punpckhdq mm0,mm0 + paddd mm4,mm0 + movd RET2,mm4 + lea RET,[RET+RET2*2] + align 16 +at_end: + mov ret1,RET +#undef SRC +#undef REF +#undef SRC_YSTRIDE +#undef REF_YSTRIDE +#undef BUF +#undef RET +#undef RET2 + } + return ret1; +} + +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); +} + + +/*Our internal implementation of frag_copy2 takes an extra stride parameter so + we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ +static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ + __asm{ + /*Load the first 3 rows.*/ +#define DST_YSTRIDE edi +#define SRC_YSTRIDE esi +#define DST eax +#define SRC1 edx +#define SRC2 ecx + mov DST_YSTRIDE,_dst_ystride + mov SRC_YSTRIDE,_src_ystride + mov DST,_dst + mov SRC1,_src1 + mov SRC2,_src2 + movq mm0,[SRC1] + movq mm1,[SRC2] + movq mm2,[SRC1+SRC_YSTRIDE] + lea SRC1,[SRC1+SRC_YSTRIDE*2] + movq mm3,[SRC2+SRC_YSTRIDE] + lea SRC2,[SRC2+SRC_YSTRIDE*2] + pxor mm7,mm7 + movq mm4,[SRC1] + pcmpeqb mm6,mm6 + movq mm5,[SRC2] + /*mm7={1}x8.*/ + psubb mm7,mm6 + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + pxor mm0,mm1 + pavgb mm6,mm1 + /*mm1 is free, start averaging mm3 into mm2 using mm1.*/ + movq mm1,mm2 + pand mm0,mm7 + pavgb mm2,mm3 + pxor mm1,mm3 + /*mm3 is free.*/ + psubb mm6,mm0 + /*mm0 is free, start loading the next row.*/ + movq mm0,[SRC1+SRC_YSTRIDE] + /*Start averaging mm5 and mm4 using mm3.*/ + movq mm3,mm4 + /*mm6 [row 0] is done; write it out.*/ + movq [DST],mm6 + pand mm1,mm7 + pavgb mm4,mm5 + psubb mm2,mm1 + /*mm1 is free, continue loading the next row.*/ + movq mm1,[SRC2+SRC_YSTRIDE] + pxor mm3,mm5 + lea SRC1,[SRC1+SRC_YSTRIDE*2] + /*mm2 [row 1] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm2 + pand mm3,mm7 + /*Start loading the next row.*/ + movq mm2,[SRC1] + lea DST,[DST+DST_YSTRIDE*2] + psubb mm4,mm3 + lea SRC2,[SRC2+SRC_YSTRIDE*2] + /*mm4 [row 2] is done; write it out.*/ + movq [DST],mm4 + /*Continue loading the next row.*/ + movq mm3,[SRC2] + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + pxor mm0,mm1 + /*Start loading the next row.*/ + movq mm4,[SRC1+SRC_YSTRIDE] + pavgb mm6,mm1 + /*mm1 is free; start averaging mm3 into mm2 using mm1.*/ + movq mm1,mm2 + pand mm0,mm7 + /*Continue loading the next row.*/ + movq mm5,[SRC2+SRC_YSTRIDE] + pavgb mm2,mm3 + lea SRC1,[SRC1+SRC_YSTRIDE*2] + pxor mm1,mm3 + /*mm3 is free.*/ + psubb mm6,mm0 + /*mm0 is free, start loading the next row.*/ + movq mm0,[SRC1] + /*Start averaging mm5 into mm4 using mm3.*/ + movq mm3,mm4 + /*mm6 [row 3] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm6 + pand mm1,mm7 + lea SRC2,[SRC2+SRC_YSTRIDE*2] + pavgb mm4,mm5 + lea DST,[DST+DST_YSTRIDE*2] + psubb mm2,mm1 + /*mm1 is free; continue loading the next row.*/ + movq mm1,[SRC2] + pxor mm3,mm5 + /*mm2 [row 4] is done; write it out.*/ + movq [DST],mm2 + pand mm3,mm7 + /*Start loading the next row.*/ + movq mm2,[SRC1+SRC_YSTRIDE] + psubb mm4,mm3 + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + /*Continue loading the next row.*/ + movq mm3,[SRC2+SRC_YSTRIDE] + /*mm4 [row 5] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm4 + pxor mm0,mm1 + pavgb mm6,mm1 + /*mm4 is free; start averaging mm3 into mm2 using mm4.*/ + movq mm4,mm2 + pand mm0,mm7 + pavgb mm2,mm3 + pxor mm4,mm3 + lea DST,[DST+DST_YSTRIDE*2] + psubb mm6,mm0 + pand mm4,mm7 + /*mm6 [row 6] is done, write it out.*/ + movq [DST],mm6 + psubb mm2,mm4 + /*mm2 [row 7] is done, write it out.*/ + movq [DST+DST_YSTRIDE],mm2 +#undef SRC1 +#undef SRC2 +#undef SRC_YSTRIDE +#undef DST_YSTRIDE +#undef DST + } +} + +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + OC_ALIGN8(unsigned char ref[64]); + oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); +} + +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, + int _ystride){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret1; + unsigned ret2; + bufp=buf; + __asm{ +#define SRC eax +#define SRC4 esi +#define BUF edi +#define RET eax +#define RET_WORD ax +#define RET2 ecx +#define YSTRIDE edx +#define YSTRIDE3 ecx + mov SRC,_src + mov BUF,bufp + mov YSTRIDE,_ystride + /* src4 = src+4*ystride */ + lea SRC4,[SRC+YSTRIDE*4] + /* ystride3 = 3*ystride */ + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + OC_LOAD_8x4(0x00) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x00) + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + movq [0x00+BUF],mm4 + movq [0x10+BUF],mm5 + movq [0x20+BUF],mm6 + movq [0x30+BUF],mm7 + OC_LOAD_8x4(0x04) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x08) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + movq mm1,[0x10+BUF] + movq mm2,[0x20+BUF] + movq mm3,[0x30+BUF] + movq mm0,[0x00+BUF] + /*We split out the stages here so we can save the DC coefficient in the + middle.*/ + OC_HADAMARD_AB_8x4 + OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38) + movd RET,mm1 + OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38) + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + pmaddwd mm0,mm7 + movq mm1,[0x50+BUF] + movq mm5,[0x58+BUF] + movq mm2,[0x60+BUF] + movq mm4,mm0 + movq mm6,[0x68+BUF] + punpckhdq mm0,mm0 + movq mm3,[0x70+BUF] + paddd mm4,mm0 + movq mm7,[0x78+BUF] + movd RET2,mm4 + movq mm0,[0x40+BUF] + movq mm4,[0x48+BUF] + OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) + pmaddwd mm0,mm7 + /*We assume that the DC coefficient is always positive (which is true, + because the input to the INTRA transform was not a difference).*/ + movzx RET,RET_WORD + add RET2,RET2 + sub RET2,RET + movq mm4,mm0 + punpckhdq mm0,mm0 + paddd mm4,mm0 + movd RET,mm4 + lea RET,[-64+RET2+RET*2] + mov [ret1],RET +#undef SRC +#undef SRC4 +#undef BUF +#undef RET +#undef RET_WORD +#undef RET2 +#undef YSTRIDE +#undef YSTRIDE3 + } + return ret1; +} + +void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], + const unsigned char *_src, const unsigned char *_ref,int _ystride){ + int i; + __asm pxor mm7,mm7 + for(i=4;i-->0;){ + __asm{ +#define SRC edx +#define YSTRIDE esi +#define RESIDUE eax +#define REF ecx + mov YSTRIDE,_ystride + mov RESIDUE,_residue + mov SRC,_src + mov REF,_ref + /*mm0=[src]*/ + movq mm0,[SRC] + /*mm1=[ref]*/ + movq mm1,[REF] + /*mm4=[src+ystride]*/ + movq mm4,[SRC+YSTRIDE] + /*mm5=[ref+ystride]*/ + movq mm5,[REF+YSTRIDE] + /*Compute [src]-[ref].*/ + movq mm2,mm0 + punpcklbw mm0,mm7 + movq mm3,mm1 + punpckhbw mm2,mm7 + punpcklbw mm1,mm7 + punpckhbw mm3,mm7 + psubw mm0,mm1 + psubw mm2,mm3 + /*Compute [src+ystride]-[ref+ystride].*/ + movq mm1,mm4 + punpcklbw mm4,mm7 + movq mm3,mm5 + punpckhbw mm1,mm7 + lea SRC,[SRC+YSTRIDE*2] + punpcklbw mm5,mm7 + lea REF,[REF+YSTRIDE*2] + punpckhbw mm3,mm7 + psubw mm4,mm5 + psubw mm1,mm3 + /*Write the answer out.*/ + movq [RESIDUE+0x00],mm0 + movq [RESIDUE+0x08],mm2 + movq [RESIDUE+0x10],mm4 + movq [RESIDUE+0x18],mm1 + lea RESIDUE,[RESIDUE+0x20] + mov _residue,RESIDUE + mov _src,SRC + mov _ref,REF +#undef SRC +#undef YSTRIDE +#undef RESIDUE +#undef REF + } + } +} + +void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,int _ystride){ + __asm{ +#define YSTRIDE edx +#define YSTRIDE3 edi +#define RESIDUE ecx +#define SRC eax + mov YSTRIDE,_ystride + mov RESIDUE,_residue + mov SRC,_src + /*mm0=[src]*/ + movq mm0,[SRC] + /*mm1=[src+ystride]*/ + movq mm1,[SRC+YSTRIDE] + /*mm6={-1}x4*/ + pcmpeqw mm6,mm6 + /*mm2=[src+2*ystride]*/ + movq mm2,[SRC+YSTRIDE*2] + /*[ystride3]=3*[ystride]*/ + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + /*mm6={1}x4*/ + psllw mm6,15 + /*mm3=[src+3*ystride]*/ + movq mm3,[SRC+YSTRIDE3] + /*mm6={128}x4*/ + psrlw mm6,8 + /*mm7=0*/ + pxor mm7,mm7 + /*[src]=[src]+4*[ystride]*/ + lea SRC,[SRC+YSTRIDE*4] + /*Compute [src]-128 and [src+ystride]-128*/ + movq mm4,mm0 + punpcklbw mm0,mm7 + movq mm5,mm1 + punpckhbw mm4,mm7 + psubw mm0,mm6 + punpcklbw mm1,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm1,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x00],mm0 + movq [RESIDUE+0x08],mm4 + movq [RESIDUE+0x10],mm1 + movq [RESIDUE+0x18],mm5 + /*mm0=[src+4*ystride]*/ + movq mm0,[SRC] + /*mm1=[src+5*ystride]*/ + movq mm1,[SRC+YSTRIDE] + /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ + movq mm4,mm2 + punpcklbw mm2,mm7 + movq mm5,mm3 + punpckhbw mm4,mm7 + psubw mm2,mm6 + punpcklbw mm3,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm3,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x20],mm2 + movq [RESIDUE+0x28],mm4 + movq [RESIDUE+0x30],mm3 + movq [RESIDUE+0x38],mm5 + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + movq mm2,[SRC+YSTRIDE*2] + movq mm3,[SRC+YSTRIDE3] + movq mm4,mm0 + punpcklbw mm0,mm7 + movq mm5,mm1 + punpckhbw mm4,mm7 + psubw mm0,mm6 + punpcklbw mm1,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm1,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x40],mm0 + movq [RESIDUE+0x48],mm4 + movq [RESIDUE+0x50],mm1 + movq [RESIDUE+0x58],mm5 + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + movq mm4,mm2 + punpcklbw mm2,mm7 + movq mm5,mm3 + punpckhbw mm4,mm7 + psubw mm2,mm6 + punpcklbw mm3,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm3,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x60],mm2 + movq [RESIDUE+0x68],mm4 + movq [RESIDUE+0x70],mm3 + movq [RESIDUE+0x78],mm5 +#undef YSTRIDE +#undef YSTRIDE3 +#undef RESIDUE +#undef SRC + } +} + +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); +} + +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxfdct.c b/Engine/lib/libtheora/lib/x86_vc/mmxfdct.c new file mode 100644 index 000000000..d908ce241 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxfdct.c @@ -0,0 +1,670 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ********************************************************************/ + /*MMX fDCT implementation for x86_32*/ +/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +#define OC_FDCT_STAGE1_8x4 __asm{ \ + /*Stage 1:*/ \ + /*mm0=t7'=t0-t7*/ \ + __asm psubw mm0,mm7 \ + __asm paddw mm7,mm7 \ + /*mm1=t6'=t1-t6*/ \ + __asm psubw mm1, mm6 \ + __asm paddw mm6,mm6 \ + /*mm2=t5'=t2-t5*/ \ + __asm psubw mm2,mm5 \ + __asm paddw mm5,mm5 \ + /*mm3=t4'=t3-t4*/ \ + __asm psubw mm3,mm4 \ + __asm paddw mm4,mm4 \ + /*mm7=t0'=t0+t7*/ \ + __asm paddw mm7,mm0 \ + /*mm6=t1'=t1+t6*/ \ + __asm paddw mm6,mm1 \ + /*mm5=t2'=t2+t5*/ \ + __asm paddw mm5,mm2 \ + /*mm4=t3'=t3+t4*/ \ + __asm paddw mm4,mm3\ +} + +#define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ + /*Stage 2:*/ \ + /*mm7=t3''=t0'-t3'*/ \ + __asm psubw mm7,mm4 \ + __asm paddw mm4,mm4 \ + /*mm6=t2''=t1'-t2'*/ \ + __asm psubw mm6,mm5 \ + __asm movq [Y+_r6],mm7 \ + __asm paddw mm5,mm5 \ + /*mm1=t5''=t6'-t5'*/ \ + __asm psubw mm1,mm2 \ + __asm movq [Y+_r2],mm6 \ + /*mm4=t0''=t0'+t3'*/ \ + __asm paddw mm4,mm7 \ + __asm paddw mm2,mm2 \ + /*mm5=t1''=t1'+t2'*/ \ + __asm movq [Y+_r0],mm4 \ + __asm paddw mm5,mm6 \ + /*mm2=t6''=t6'+t5'*/ \ + __asm paddw mm2,mm1 \ + __asm movq [Y+_r4],mm5 \ + /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ + /*mm4, mm5, mm6, mm7 are free.*/ \ + /*Stage 3:*/ \ + /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ + __asm mov A,0x5A806A0A \ + __asm pcmpeqb mm6,mm6 \ + __asm movd mm7,A \ + __asm psrlw mm6,15 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm6,mm6 \ + /*mm0=0, m2={-1}x4 \ + mm5:mm4=t5''*27146+0xB500*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r3],mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r7],mm0 \ + __asm punpckhwd mm5,mm6 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqb mm2,mm2 \ + /*mm2=t6'', mm1=t5''+(t5''!=0) \ + mm4=(t5''*27146+0xB500>>16)*/ \ + __asm pcmpeqw mm0,mm1 \ + __asm psrad mm4,16 \ + __asm psubw mm0,mm2 \ + __asm movq mm2, [Y+_r3] \ + __asm psrad mm5,16 \ + __asm paddw mm1,mm0 \ + __asm packssdw mm4,mm5 \ + /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + __asm paddw mm4,mm1 \ + __asm movq mm0, [Y+_r7] \ + __asm psraw mm4,1 \ + __asm movq mm1,mm3 \ + /*mm3=t4''=t4'+s*/ \ + __asm paddw mm3,mm4 \ + /*mm1=t5'''=t4'-s*/ \ + __asm psubw mm1,mm4 \ + /*mm1=0, mm3={-1}x4 \ + mm5:mm4=t6''*27146+0xB500*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r5],mm1 \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r1],mm3 \ + __asm punpckhwd mm5,mm6 \ + __asm pxor mm1,mm1 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqb mm3,mm3 \ + /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ + __asm psrad mm4,16 \ + __asm pcmpeqw mm1,mm2 \ + __asm psrad mm5,16 \ + __asm psubw mm1,mm3 \ + __asm packssdw mm4,mm5 \ + __asm paddw mm2,mm1 \ + /*mm1=t1'' \ + mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + __asm paddw mm4,mm2 \ + __asm movq mm1,[Y+_r4] \ + __asm psraw mm4,1 \ + __asm movq mm2,mm0 \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm0=t7''=t7'+s*/ \ + __asm paddw mm0,mm4 \ + /*mm2=t6'''=t7'-s*/ \ + __asm psubw mm2,mm4 \ + /*Stage 4:*/ \ + /*mm0=0, mm2=t0'' \ + mm5:mm4=t1''*27146+0xB500*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r3],mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm movq mm2,[Y+_r0] \ + __asm punpckhwd mm5,mm6 \ + __asm movq [Y+_r7],mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pxor mm0,mm0 \ + /*mm7={27146,0x4000>>1}x2 \ + mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + __asm psrad mm4,16 \ + __asm mov A,0x20006A0A \ + __asm pcmpeqw mm0,mm1 \ + __asm movd mm7,A \ + __asm psrad mm5,16 \ + __asm psubw mm0,mm3 \ + __asm packssdw mm4,mm5 \ + __asm paddw mm0,mm1 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm0,mm4 \ + /*mm6={0x00000E3D}x2 \ + mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm mov A,0x0E3D \ + __asm pmaddwd mm4,mm7 \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm pxor mm1,mm1 \ + __asm punpckldq mm6,mm6 \ + __asm pcmpeqw mm1,mm2 \ + /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + __asm psrad mm4,16 \ + __asm psubw mm1,mm3 \ + __asm psrad mm5,16 \ + __asm paddw mm2,mm1 \ + __asm packssdw mm4,mm5 \ + __asm movq mm1,[Y+_r5] \ + __asm paddw mm4,mm2 \ + /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + __asm movq mm2,[Y+_r3] \ + __asm movq mm7,mm0 \ + __asm pxor mm0,mm4 \ + __asm pand mm7,mm4 \ + __asm psraw mm0,1 \ + __asm mov A,0x7FFF54DC \ + __asm paddw mm0,mm7 \ + __asm movd mm7,A \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm4=_y[4]=v=r-u*/ \ + __asm psubw mm4,mm0 \ + __asm punpckldq mm7,mm7 \ + __asm movq [Y+_r4],mm4 \ + /*mm0=0, mm7={36410}x4 \ + mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm1 \ + __asm mov A,0x8E3A8E3A \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r0],mm0 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqw mm1,mm0 \ + __asm movd mm7,A \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm0=0 \ + mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm paddw mm1,mm2 \ + __asm pmullw mm3,mm7 \ + __asm pxor mm0,mm0 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm pxor mm6,mm6 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm3,mm3 \ + __asm packssdw mm4,mm5 \ + __asm psubw mm6,mm3 \ + /*mm1=t7'', mm7={26568,0x3400}x2 \ + mm2=s=t6'''-(36410*u>>16)*/ \ + __asm movq mm1,mm4 \ + __asm mov A,0x340067C8 \ + __asm pmulhw mm4,mm7 \ + __asm movd mm7,A \ + __asm movq [Y+_r5],mm1 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm4,mm1 \ + __asm movq mm1,[Y+_r7] \ + __asm psubw mm2,mm4 \ + /*mm6={0x00007B1B}x2 \ + mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x7B1B \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm psubw mm0,mm3 \ + __asm punpckldq mm6,mm6 \ + /*mm7={64277-0x7FFF,0x7FFF}x2 \ + mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + __asm psrad mm4,17 \ + __asm paddw mm2,mm0 \ + __asm psrad mm5,17 \ + __asm mov A,0x7FFF7B16 \ + __asm packssdw mm4,mm5 \ + __asm movd mm7,A \ + __asm paddw mm2,mm4 \ + __asm punpckldq mm7,mm7 \ + /*mm0=0, mm7={12785}x4 \ + mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm movq [Y+_r3],mm2 \ + __asm punpcklwd mm4,mm1 \ + __asm movq mm2,[Y+_r1] \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x31F131F1 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqw mm1,mm0 \ + __asm movd mm7,A \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm pmullw mm3,mm7 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm pxor mm6,mm6 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm3,mm3 \ + __asm packssdw mm4,mm5 \ + __asm psubw mm6,mm3 \ + /*mm1=t3'', mm7={20539,0x3000}x2 \ + mm4=s=(12785*u>>16)-t4''*/ \ + __asm movq [Y+_r1],mm4 \ + __asm pmulhw mm4,mm7 \ + __asm mov A,0x3000503B \ + __asm movq mm1,[Y+_r6] \ + __asm movd mm7,A \ + __asm psubw mm4,mm2 \ + __asm punpckldq mm7,mm7 \ + /*mm6={0x00006CB7}x2 \ + mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ + __asm movq mm5,mm4 \ + __asm movq mm2,mm4 \ + __asm punpcklwd mm4,mm6 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x6CB7 \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm psubw mm0,mm3 \ + __asm punpckldq mm6,mm6 \ + /*mm7={60547-0x7FFF,0x7FFF}x2 \ + mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + __asm psrad mm4,20 \ + __asm paddw mm2,mm0 \ + __asm psrad mm5,20 \ + __asm mov A,0x7FFF6C84 \ + __asm packssdw mm4,mm5 \ + __asm movd mm7,A \ + __asm paddw mm2,mm4 \ + __asm punpckldq mm7,mm7 \ + /*mm0=0, mm7={25080}x4 \ + mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm movq [Y+_r7],mm2 \ + __asm punpcklwd mm4,mm1 \ + __asm movq mm2,[Y+_r2] \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x61F861F8 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm movd mm7,A \ + __asm pcmpeqw mm1,mm0 \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm pmullw mm3,mm7 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm1={-1}x4 \ + mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm mov A,0x28005460 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm1,mm1 \ + __asm packssdw mm4,mm5 \ + /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ + mm4=s=(25080*u>>16)-t2''*/ \ + __asm movq mm6,mm4 \ + __asm pmulhw mm4,mm7 \ + __asm pxor mm5,mm5 \ + __asm movd mm7,A \ + __asm psubw mm5,mm1 \ + __asm punpckldq mm7,mm7 \ + __asm psubw mm4,mm2 \ + /*mm2=s+(s!=0) \ + mm4:mm3=s*21600+0x2800*/ \ + __asm movq mm3,mm4 \ + __asm movq mm2,mm4 \ + __asm punpckhwd mm4,mm5 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm psubw mm0,mm1 \ + __asm punpcklwd mm3,mm5 \ + __asm paddw mm2,mm0 \ + __asm pmaddwd mm3,mm7 \ + /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ + mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + __asm movq mm0,[Y+_r4] \ + __asm psrad mm4,18 \ + __asm movq mm5,[Y+_r5] \ + __asm psrad mm3,18 \ + __asm movq mm1,[Y+_r7] \ + __asm packssdw mm3,mm4 \ + __asm movq mm4,[Y+_r0] \ + __asm paddw mm3,mm2 \ +} + +/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. + On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and + {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ +#define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ + /*First 4x4 transpose:*/ \ + /*mm0 = e3 e2 e1 e0 \ + mm5 = f3 f2 f1 f0 \ + mm3 = g3 g2 g1 g0 \ + mm1 = h3 h2 h1 h0*/ \ + __asm movq mm2,mm0 \ + __asm punpcklwd mm0,mm5 \ + __asm punpckhwd mm2,mm5 \ + __asm movq mm5,mm3 \ + __asm punpcklwd mm3,mm1 \ + __asm punpckhwd mm5,mm1 \ + /*mm0 = f1 e1 f0 e0 \ + mm2 = f3 e3 f2 e2 \ + mm3 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm3 \ + __asm movq [Y+_r4],mm0 \ + __asm punpckhdq mm1,mm3 \ + __asm movq mm0,[Y+_r1] \ + __asm movq mm3,mm2 \ + __asm punpckldq mm2,mm5 \ + __asm punpckhdq mm3,mm5 \ + __asm movq mm5,[Y+_r3] \ + /*_y[4] = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm0 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm5 = d3 d2 d1 d0*/ \ + __asm movq mm7,mm4 \ + __asm punpcklwd mm4,mm0 \ + __asm punpckhwd mm7,mm0 \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm5 \ + __asm punpckhwd mm0,mm5 \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm7 \ + __asm punpckhdq mm7,mm0 \ + __asm punpckldq mm6,mm0 \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ +} + +/*MMX implementation of the fDCT.*/ +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm{ +#define Y eax +#define A ecx +#define X edx + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add biases to correct for some systematic error that remains in + the full fDCT->iDCT round trip.*/ + mov X, _x + mov Y, _y + movq mm0,[0x00+X] + movq mm1,[0x10+X] + movq mm2,[0x20+X] + movq mm3,[0x30+X] + pcmpeqb mm4,mm4 + pxor mm7,mm7 + movq mm5,mm0 + psllw mm0,2 + pcmpeqw mm5,mm7 + movq mm7,[0x70+X] + psllw mm1,2 + psubw mm5,mm4 + psllw mm2,2 + mov A,1 + pslld mm5,16 + movd mm6,A + psllq mm5,16 + mov A,0x10001 + psllw mm3,2 + movd mm4,A + punpckhwd mm5,mm6 + psubw mm1,mm6 + movq mm6,[0x60+X] + paddw mm0,mm5 + movq mm5,[0x50+X] + paddw mm0,mm4 + movq mm4,[0x40+X] + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + psllw mm7,2 + psubw mm0,mm7 + psllw mm6,2 + paddw mm7,mm7 + /*mm1=t6'=t1-t6*/ + psllw mm5,2 + psubw mm1,mm6 + psllw mm4,2 + paddw mm6,mm6 + /*mm2=t5'=t2-t5*/ + psubw mm2,mm5 + paddw mm5,mm5 + /*mm3=t4'=t3-t4*/ + psubw mm3,mm4 + paddw mm4,mm4 + /*mm7=t0'=t0+t7*/ + paddw mm7,mm0 + /*mm6=t1'=t1+t6*/ + paddw mm6,mm1 + /*mm5=t2'=t2+t5*/ + paddw mm5,mm2 + /*mm4=t3'=t3+t4*/ + paddw mm4,mm3 + OC_FDCT8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) + OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) + /*Swap out this 8x4 block for the next one.*/ + movq mm0,[0x08+X] + movq [0x30+Y],mm7 + movq mm7,[0x78+X] + movq [0x50+Y],mm1 + movq mm1,[0x18+X] + movq [0x20+Y],mm6 + movq mm6,[0x68+X] + movq [0x60+Y],mm2 + movq mm2,[0x28+X] + movq [0x10+Y],mm5 + movq mm5,[0x58+X] + movq [0x70+Y],mm3 + movq mm3,[0x38+X] + /*And increase its working precision, too.*/ + psllw mm0,2 + movq [0x00+Y],mm4 + psllw mm7,2 + movq mm4,[0x48+X] + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + psubw mm0,mm7 + psllw mm1,2 + paddw mm7,mm7 + psllw mm6,2 + /*mm1=t6'=t1-t6*/ + psubw mm1,mm6 + psllw mm2,2 + paddw mm6,mm6 + psllw mm5,2 + /*mm2=t5'=t2-t5*/ + psubw mm2,mm5 + psllw mm3,2 + paddw mm5,mm5 + psllw mm4,2 + /*mm3=t4'=t3-t4*/ + psubw mm3,mm4 + paddw mm4,mm4 + /*mm7=t0'=t0+t7*/ + paddw mm7,mm0 + /*mm6=t1'=t1+t6*/ + paddw mm6,mm1 + /*mm5=t2'=t2+t5*/ + paddw mm5,mm2 + /*mm4=t3'=t3+t4*/ + paddw mm4,mm3 + OC_FDCT8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) + OC_TRANSPOSE8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, + so we only have to do half the stores and loads.*/ + movq mm0,[0x00+Y] + movq [0x58+Y],mm1 + movq mm1,[0x10+Y] + movq [0x68+Y],mm2 + movq mm2,[0x20+Y] + movq [0x78+Y],mm3 + movq mm3,[0x30+Y] + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) + OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) + /*mm0={-2}x4*/ + pcmpeqw mm0,mm0 + paddw mm0,mm0 + /*Round the results.*/ + psubw mm1,mm0 + psubw mm2,mm0 + psraw mm1,2 + psubw mm3,mm0 + movq [0x18+Y],mm1 + psraw mm2,2 + psubw mm4,mm0 + movq mm1,[0x08+Y] + psraw mm3,2 + psubw mm5,mm0 + psraw mm4,2 + psubw mm6,mm0 + psraw mm5,2 + psubw mm7,mm0 + psraw mm6,2 + psubw mm1,mm0 + psraw mm7,2 + movq mm0,[0x40+Y] + psraw mm1,2 + movq [0x30+Y],mm7 + movq mm7,[0x78+Y] + movq [0x08+Y],mm1 + movq mm1,[0x50+Y] + movq [0x20+Y],mm6 + movq mm6,[0x68+Y] + movq [0x28+Y],mm2 + movq mm2,[0x60+Y] + movq [0x10+Y],mm5 + movq mm5,[0x58+Y] + movq [0x38+Y],mm3 + movq mm3,[0x70+Y] + movq [0x00+Y],mm4 + movq mm4,[0x48+Y] + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) + OC_TRANSPOSE8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) + /*mm0={-2}x4*/ + pcmpeqw mm0,mm0 + paddw mm0,mm0 + /*Round the results.*/ + psubw mm1,mm0 + psubw mm2,mm0 + psraw mm1,2 + psubw mm3,mm0 + movq [0x58+Y],mm1 + psraw mm2,2 + psubw mm4,mm0 + movq mm1,[0x48+Y] + psraw mm3,2 + psubw mm5,mm0 + movq [0x68+Y],mm2 + psraw mm4,2 + psubw mm6,mm0 + movq [0x78+Y],mm3 + psraw mm5,2 + psubw mm7,mm0 + movq [0x40+Y],mm4 + psraw mm6,2 + psubw mm1,mm0 + movq [0x50+Y],mm5 + psraw mm7,2 + movq [0x60+Y],mm6 + psraw mm1,2 + movq [0x70+Y],mm7 + movq [0x48+Y],mm1 +#undef Y +#undef A +#undef X + } +} + +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxfrag.c b/Engine/lib/libtheora/lib/x86_vc/mmxfrag.c new file mode 100644 index 000000000..4eb2084dc --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxfrag.c @@ -0,0 +1,337 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $ + + ********************************************************************/ + +/*MMX acceleration of fragment reconstruction for motion compensation. + Originally written by Rudolf Marek. + Additional optimization by Nils Pipenbrinck. + Note: Loops are unrolled for best performance. + The iteration each instruction belongs to is marked in the comments as #i.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 esi + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue){ + __asm{ +#define DST edx +#define DST4 esi +#define YSTRIDE eax +#define YSTRIDE3 edi +#define RESIDUE ecx + mov DST,_dst + mov YSTRIDE,_ystride + mov RESIDUE,_residue + lea DST4,[DST+YSTRIDE*4] + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ + pcmpeqw mm0,mm0 + /*#0 Load low residue.*/ + movq mm1,[0*8+RESIDUE] + /*#0 Load high residue.*/ + movq mm2,[1*8+RESIDUE] + /*Set mm0 to 0x8000800080008000.*/ + psllw mm0,15 + /*#1 Load low residue.*/ + movq mm3,[2*8+RESIDUE] + /*#1 Load high residue.*/ + movq mm4,[3*8+RESIDUE] + /*Set mm0 to 0x0080008000800080.*/ + psrlw mm0,8 + /*#2 Load low residue.*/ + movq mm5,[4*8+RESIDUE] + /*#2 Load high residue.*/ + movq mm6,[5*8+RESIDUE] + /*#0 Bias low residue.*/ + paddsw mm1,mm0 + /*#0 Bias high residue.*/ + paddsw mm2,mm0 + /*#0 Pack to byte.*/ + packuswb mm1,mm2 + /*#1 Bias low residue.*/ + paddsw mm3,mm0 + /*#1 Bias high residue.*/ + paddsw mm4,mm0 + /*#1 Pack to byte.*/ + packuswb mm3,mm4 + /*#2 Bias low residue.*/ + paddsw mm5,mm0 + /*#2 Bias high residue.*/ + paddsw mm6,mm0 + /*#2 Pack to byte.*/ + packuswb mm5,mm6 + /*#0 Write row.*/ + movq [DST],mm1 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm3 + /*#2 Write row.*/ + movq [DST+YSTRIDE*2],mm5 + /*#3 Load low residue.*/ + movq mm1,[6*8+RESIDUE] + /*#3 Load high residue.*/ + movq mm2,[7*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm3,[8*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm4,[9*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm5,[10*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm6,[11*8+RESIDUE] + /*#3 Bias low residue.*/ + paddsw mm1,mm0 + /*#3 Bias high residue.*/ + paddsw mm2,mm0 + /*#3 Pack to byte.*/ + packuswb mm1,mm2 + /*#4 Bias low residue.*/ + paddsw mm3,mm0 + /*#4 Bias high residue.*/ + paddsw mm4,mm0 + /*#4 Pack to byte.*/ + packuswb mm3,mm4 + /*#5 Bias low residue.*/ + paddsw mm5,mm0 + /*#5 Bias high residue.*/ + paddsw mm6,mm0 + /*#5 Pack to byte.*/ + packuswb mm5,mm6 + /*#3 Write row.*/ + movq [DST+YSTRIDE3],mm1 + /*#4 Write row.*/ + movq [DST4],mm3 + /*#5 Write row.*/ + movq [DST4+YSTRIDE],mm5 + /*#6 Load low residue.*/ + movq mm1,[12*8+RESIDUE] + /*#6 Load high residue.*/ + movq mm2,[13*8+RESIDUE] + /*#7 Load low residue.*/ + movq mm3,[14*8+RESIDUE] + /*#7 Load high residue.*/ + movq mm4,[15*8+RESIDUE] + /*#6 Bias low residue.*/ + paddsw mm1,mm0 + /*#6 Bias high residue.*/ + paddsw mm2,mm0 + /*#6 Pack to byte.*/ + packuswb mm1,mm2 + /*#7 Bias low residue.*/ + paddsw mm3,mm0 + /*#7 Bias high residue.*/ + paddsw mm4,mm0 + /*#7 Pack to byte.*/ + packuswb mm3,mm4 + /*#6 Write row.*/ + movq [DST4+YSTRIDE*2],mm1 + /*#7 Write row.*/ + movq [DST4+YSTRIDE3],mm3 +#undef DST +#undef DST4 +#undef YSTRIDE +#undef YSTRIDE3 +#undef RESIDUE + } +} + +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm0.*/ + __asm pxor mm0,mm0; + for(i=4;i-->0;){ + __asm{ +#define DST edx +#define SRC ecx +#define YSTRIDE edi +#define RESIDUE eax + mov DST,_dst + mov SRC,_src + mov YSTRIDE,_ystride + mov RESIDUE,_residue + /*#0 Load source.*/ + movq mm3,[SRC] + /*#1 Load source.*/ + movq mm7,[SRC+YSTRIDE] + /*#0 Get copy of src.*/ + movq mm4,mm3 + /*#0 Expand high source.*/ + punpckhbw mm4,mm0 + /*#0 Expand low source.*/ + punpcklbw mm3,mm0 + /*#0 Add residue high.*/ + paddsw mm4,[8+RESIDUE] + /*#1 Get copy of src.*/ + movq mm2,mm7 + /*#0 Add residue low.*/ + paddsw mm3,[RESIDUE] + /*#1 Expand high source.*/ + punpckhbw mm2,mm0 + /*#0 Pack final row pixels.*/ + packuswb mm3,mm4 + /*#1 Expand low source.*/ + punpcklbw mm7,mm0 + /*#1 Add residue low.*/ + paddsw mm7,[16+RESIDUE] + /*#1 Add residue high.*/ + paddsw mm2,[24+RESIDUE] + /*Advance residue.*/ + lea RESIDUE,[32+RESIDUE] + /*#1 Pack final row pixels.*/ + packuswb mm7,mm2 + /*Advance src.*/ + lea SRC,[SRC+YSTRIDE*2] + /*#0 Write row.*/ + movq [DST],mm3 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm7 + /*Advance dst.*/ + lea DST,[DST+YSTRIDE*2] + mov _residue,RESIDUE + mov _dst,DST + mov _src,SRC +#undef DST +#undef SRC +#undef YSTRIDE +#undef RESIDUE + } + } +} + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm7.*/ + __asm pxor mm7,mm7; + for(i=4;i-->0;){ + __asm{ +#define SRC1 ecx +#define SRC2 edi +#define YSTRIDE esi +#define RESIDUE edx +#define DST eax + mov YSTRIDE,_ystride + mov DST,_dst + mov RESIDUE,_residue + mov SRC1,_src1 + mov SRC2,_src2 + /*#0 Load src1.*/ + movq mm0,[SRC1] + /*#0 Load src2.*/ + movq mm2,[SRC2] + /*#0 Copy src1.*/ + movq mm1,mm0 + /*#0 Copy src2.*/ + movq mm3,mm2 + /*#1 Load src1.*/ + movq mm4,[SRC1+YSTRIDE] + /*#0 Unpack lower src1.*/ + punpcklbw mm0,mm7 + /*#1 Load src2.*/ + movq mm5,[SRC2+YSTRIDE] + /*#0 Unpack higher src1.*/ + punpckhbw mm1,mm7 + /*#0 Unpack lower src2.*/ + punpcklbw mm2,mm7 + /*#0 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*Advance src1 ptr.*/ + lea SRC1,[SRC1+YSTRIDE*2] + /*Advance src2 ptr.*/ + lea SRC2,[SRC2+YSTRIDE*2] + /*#0 Lower src1+src2.*/ + paddsw mm0,mm2 + /*#0 Higher src1+src2.*/ + paddsw mm1,mm3 + /*#1 Copy src1.*/ + movq mm2,mm4 + /*#0 Build lo average.*/ + psraw mm0,1 + /*#1 Copy src2.*/ + movq mm3,mm5 + /*#1 Unpack lower src1.*/ + punpcklbw mm4,mm7 + /*#0 Build hi average.*/ + psraw mm1,1 + /*#1 Unpack higher src1.*/ + punpckhbw mm2,mm7 + /*#0 low+=residue.*/ + paddsw mm0,[RESIDUE] + /*#1 Unpack lower src2.*/ + punpcklbw mm5,mm7 + /*#0 high+=residue.*/ + paddsw mm1,[8+RESIDUE] + /*#1 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*#1 Lower src1+src2.*/ + paddsw mm5,mm4 + /*#0 Pack and saturate.*/ + packuswb mm0,mm1 + /*#1 Higher src1+src2.*/ + paddsw mm3,mm2 + /*#0 Write row.*/ + movq [DST],mm0 + /*#1 Build lo average.*/ + psraw mm5,1 + /*#1 Build hi average.*/ + psraw mm3,1 + /*#1 low+=residue.*/ + paddsw mm5,[16+RESIDUE] + /*#1 high+=residue.*/ + paddsw mm3,[24+RESIDUE] + /*#1 Pack and saturate.*/ + packuswb mm5,mm3 + /*#1 Write row ptr.*/ + movq [DST+YSTRIDE],mm5 + /*Advance residue ptr.*/ + add RESIDUE,32 + /*Advance dest ptr.*/ + lea DST,[DST+YSTRIDE*2] + mov _dst,DST + mov _residue,RESIDUE + mov _src1,SRC1 + mov _src2,SRC2 +#undef SRC1 +#undef SRC2 +#undef YSTRIDE +#undef RESIDUE +#undef DST + } + } +} + +void oc_restore_fpu_mmx(void){ + __asm emms; +} + +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxfrag.h b/Engine/lib/libtheora/lib/x86_vc/mmxfrag.h new file mode 100644 index 000000000..45ee93e77 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxfrag.h @@ -0,0 +1,61 @@ +#if !defined(_x86_vc_mmxfrag_H) +# define _x86_vc_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + src=(_src); \ + dst=(_dst); \ + __asm mov SRC,src \ + __asm mov DST,dst \ + __asm mov YSTRIDE,_ystride \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*ystride3=ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*Pointer to next 4.*/ \ + __asm lea SRC,[SRC+YSTRIDE*4] \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + /*Pointer to next 4.*/ \ + __asm lea DST,[DST+YSTRIDE*4] \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + } \ + while(0) + +# endif +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxidct.c b/Engine/lib/libtheora/lib/x86_vc/mmxidct.c new file mode 100644 index 000000000..8f5ff6803 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxidct.c @@ -0,0 +1,562 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of Theora's iDCT. + Originally written by Rudolf Marek, based on code from On2's VP3.*/ +#include "x86int.h" +#include "../dct.h" + +#if defined(OC_X86_ASM) + +/*These are offsets into the table of constants below.*/ +/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ +#define OC_COSINE_OFFSET (0) +/*A row of 8's.*/ +#define OC_EIGHT_OFFSET (56) + + + +/*A table of constants used by the MMX routines.*/ +static const __declspec(align(16))ogg_uint16_t + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + +/*38 cycles*/ +#define OC_IDCT_BEGIN __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm7,OC_J(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm6,mm7 \ + __asm movq mm5,mm1 \ + __asm pmulhw mm1,mm2 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm5,mm7 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm paddw mm6,mm7 \ + __asm paddw mm2,mm1 \ + __asm movq mm1,OC_J(7) \ + __asm paddw mm7,mm5 \ + __asm movq mm5,mm0 \ + __asm pmulhw mm0,mm3 \ + __asm paddw mm4,mm7 \ + __asm pmulhw mm5,mm1 \ + __asm movq mm7,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,mm7 \ + __asm movq mm2,OC_I(2) \ + __asm pmulhw mm7,mm1 \ + __asm paddw mm5,mm1 \ + __asm movq mm1,mm2 \ + __asm pmulhw mm2,OC_C(2) \ + __asm psubw mm3,mm5 \ + __asm movq mm5,OC_J(6) \ + __asm paddw mm0,mm7 \ + __asm movq mm7,mm5 \ + __asm psubw mm0,mm4 \ + __asm pmulhw mm5,OC_C(2) \ + __asm paddw mm2,mm1 \ + __asm pmulhw mm1,OC_C(6) \ + __asm paddw mm4,mm4 \ + __asm paddw mm4,mm0 \ + __asm psubw mm3,mm6 \ + __asm paddw mm5,mm7 \ + __asm paddw mm6,mm6 \ + __asm pmulhw mm7,OC_C(6) \ + __asm paddw mm6,mm3 \ + __asm movq OC_I(1),mm4 \ + __asm psubw mm1,mm5 \ + __asm movq mm4,OC_C(4) \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm paddw mm7,mm2 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm movq mm3,OC_J(4) \ + __asm psubw mm5,mm1 \ + __asm paddw mm2,mm0 \ + __asm psubw mm6,mm3 \ + __asm movq mm0,mm6 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm3,mm3 \ + __asm paddw mm1,mm1 \ + __asm paddw mm3,mm0 \ + __asm paddw mm1,mm5 \ + __asm pmulhw mm4,mm3 \ + __asm paddw mm6,mm0 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm paddw mm4,mm3 \ + __asm psubw mm2,mm1 \ +} + +/*38+8=46 cycles.*/ +#define OC_ROW_IDCT __asm{ \ + OC_IDCT_BEGIN \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G.+C.*/ \ + __asm paddw mm0,mm7 \ +} + +/*The following macro does two 4x4 transposes in place. + At entry, we assume: + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first.*/ +/*19 cycles.*/ +#define OC_TRANSPOSE __asm{ \ + __asm movq mm1,mm4 \ + __asm punpcklwd mm4,mm5 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm1,mm5 \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm7 \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm1 \ + __asm movq OC_J(4),mm4 \ + __asm punpckhwd mm0,mm7 \ + __asm movq OC_J(5),mm5 \ + __asm punpckhdq mm6,mm0 \ + __asm movq mm4,OC_I(0) \ + __asm punpckldq mm1,mm0 \ + __asm movq mm5,OC_I(1) \ + __asm movq mm0,mm4 \ + __asm movq OC_J(7),mm6 \ + __asm punpcklwd mm0,mm5 \ + __asm movq OC_J(6),mm1 \ + __asm punpckhwd mm4,mm5 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm2,mm3 \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm2 \ + __asm punpckhdq mm1,mm2 \ + __asm movq mm2,mm4 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm5,mm3 \ + __asm movq OC_I(1),mm1 \ + __asm punpckhdq mm4,mm5 \ + __asm punpckldq mm2,mm5 \ + __asm movq OC_I(3),mm4 \ + __asm movq OC_I(2),mm2 \ +} + +/*38+19=57 cycles.*/ +#define OC_COLUMN_IDCT __asm{ \ + OC_IDCT_BEGIN \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8] +#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) +#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) + +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+(_k*16)+64] +#define OC_J(_k) [Y+(_k-4)*16+72] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*25 cycles.*/ +#define OC_IDCT_BEGIN_10 __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm nop \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm1,mm2 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm pxor mm6,mm6 \ + __asm paddw mm2,mm1 \ + __asm movq mm5,OC_I(2) \ + __asm pmulhw mm0,mm3 \ + __asm movq mm1,mm5 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm pmulhw mm5,OC_C(2) \ + __asm psubw mm0,mm4 \ + __asm movq mm7,OC_I(2) \ + __asm paddw mm4,mm4 \ + __asm paddw mm7,mm5 \ + __asm paddw mm4,mm0 \ + __asm pmulhw mm1,OC_C(6) \ + __asm psubw mm3,mm6 \ + __asm movq OC_I(1),mm4 \ + __asm paddw mm6,mm6 \ + __asm movq mm4,OC_C(4) \ + __asm paddw mm6,mm3 \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm paddw mm2,mm0 \ + __asm psubw mm5,mm1 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm6,OC_I(0) \ + __asm paddw mm1,mm1 \ + __asm movq mm4,mm6 \ + __asm paddw mm1,mm5 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm psubw mm2,mm1 \ + __asm nop \ +} + +/*25+8=33 cycles.*/ +#define OC_ROW_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ +} + +/*25+19=44 cycles'*/ +#define OC_COLUMN_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +static void oc_idct8x8_10(ogg_int16_t _y[64]){ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + /*Done with dequant, descramble, and partial transpose. + Now do the iDCT itself.*/ + OC_ROW_IDCT_10 + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxloop.h b/Engine/lib/libtheora/lib/x86_vc/mmxloop.h new file mode 100644 index 000000000..2561fca2a --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxloop.h @@ -0,0 +1,219 @@ +#if !defined(_x86_vc_mmxloop_H) +# define _x86_vc_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX __asm{ \ + /*mm7=0*/ \ + __asm pxor mm7,mm7 \ + /*mm6:mm0={a0,...,a7}*/ \ + __asm movq mm6,mm0 \ + __asm punpcklbw mm0,mm7 \ + __asm punpckhbw mm6,mm7 \ + /*mm3:mm5={d0,...,d7}*/ \ + __asm movq mm5,mm3 \ + __asm punpcklbw mm3,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + __asm psubw mm0,mm3 \ + __asm psubw mm6,mm5 \ + /*mm3:mm1={b0,...,b7}*/ \ + __asm movq mm3,mm1 \ + __asm punpcklbw mm1,mm7 \ + __asm movq mm4,mm2 \ + __asm punpckhbw mm3,mm7 \ + /*mm5:mm4={c0,...,c7}*/ \ + __asm movq mm5,mm2 \ + __asm punpcklbw mm4,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + __asm pcmpeqw mm7,mm7 \ + __asm psubw mm4,mm1 \ + __asm psrlw mm7,14 \ + __asm psubw mm5,mm3 \ + /*Scale by 3.*/ \ + __asm pmullw mm4,mm7 \ + __asm pmullw mm5,mm7 \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + __asm psrlw mm7,1 \ + __asm paddw mm4,mm0 \ + __asm psllw mm7,2 \ + __asm movq mm0,[LL] \ + __asm paddw mm5,mm6 \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + __asm psubw mm4,mm7 \ + __asm psubw mm5,mm7 \ + __asm psraw mm4,3 \ + __asm psraw mm5,3 \ + __asm pcmpeqb mm7,mm7 \ + __asm packsswb mm4,mm5 \ + __asm pxor mm6,mm6 \ + __asm pxor mm4,mm7 \ + __asm packuswb mm1,mm3 \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + __asm pcmpgtb mm6,mm4 \ + __asm psubb mm7,mm0 \ + __asm pxor mm4,mm6 \ + __asm psubb mm7,mm0 \ + __asm psubb mm4,mm6 \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + __asm paddusb mm7,mm4 \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + __asm paddusb mm4,mm7 \ + __asm psubusb mm4,mm7 \ + /*Now split mm4 by the original sign of -R_i.*/ \ + __asm movq mm5,mm4 \ + __asm pand mm4,mm6 \ + __asm pandn mm6,mm5 \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + __asm paddusb mm1,mm4 \ + __asm psubusb mm2,mm4 \ + __asm psubusb mm1,mm6 \ + __asm paddusb mm2,mm6 \ +} + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable pix__ in order to fix compilation errors like: \ + "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \ + unsigned char *pix__; \ + unsigned char *ll__; \ + ll__=(_ll); \ + pix__=(_pix); \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + __asm mov PIX,pix__ \ + __asm sub PIX,YSTRIDE \ + __asm sub PIX,YSTRIDE \ + /*mm0={a0,...,a7}*/ \ + __asm movq mm0,[PIX] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*mm3={d0,...,d7}*/ \ + __asm movq mm3,[PIX+YSTRIDE3] \ + /*mm1={b0,...,b7}*/ \ + __asm movq mm1,[PIX+YSTRIDE] \ + /*mm2={c0,...,c7}*/ \ + __asm movq mm2,[PIX+YSTRIDE*2] \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + __asm movq [PIX+YSTRIDE],mm1 \ + __asm movq [PIX+YSTRIDE*2],mm2 \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable ll__ in order to fix compilation errors like: \ + "error C2443: operand size conflict".*/ \ + unsigned char *ll__; \ + unsigned char *pix__; \ + ll__=(_ll); \ + pix__=(_pix)-2; \ + __asm mov PIX,pix__ \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + /*x x x x d0 c0 b0 a0*/ \ + __asm movd mm0,[PIX] \ + /*x x x x d1 c1 b1 a1*/ \ + __asm movd mm1,[PIX+YSTRIDE] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*x x x x d2 c2 b2 a2*/ \ + __asm movd mm2,[PIX+YSTRIDE*2] \ + /*x x x x d3 c3 b3 a3*/ \ + __asm lea D,[PIX+YSTRIDE*4] \ + __asm movd mm3,[PIX+YSTRIDE3] \ + /*x x x x d4 c4 b4 a4*/ \ + __asm movd mm4,[D] \ + /*x x x x d5 c5 b5 a5*/ \ + __asm movd mm5,[D+YSTRIDE] \ + /*x x x x d6 c6 b6 a6*/ \ + __asm movd mm6,[D+YSTRIDE*2] \ + /*x x x x d7 c7 b7 a7*/ \ + __asm movd mm7,[D+YSTRIDE3] \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm punpcklbw mm0,mm1 \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + __asm punpcklbw mm2,mm3 \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm movq mm3,mm0 \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm punpcklwd mm0,mm2 \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm punpckhwd mm3,mm2 \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm movq mm1,mm0 \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm punpcklbw mm4,mm5 \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + __asm punpcklbw mm6,mm7 \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm movq mm5,mm4 \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + __asm punpcklwd mm4,mm6 \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + __asm punpckhwd mm5,mm6 \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm movq mm2,mm3 \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + __asm punpckldq mm0,mm4 \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + __asm punpckhdq mm1,mm4 \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + __asm punpckldq mm2,mm5 \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + __asm punpckhdq mm3,mm5 \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + __asm movq mm0,mm1 \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + __asm punpcklbw mm1,mm2 \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + __asm punpckhbw mm0,mm2 \ + /*[d]=c1 b1 c0 b0*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm1,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c3 b3 c2 b2*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + __asm lea PIX,[PIX+YSTRIDE*4] \ + /*[d]=c5 b5 c4 b4*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm0,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c7 b7 c6 b6*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + } \ + while(0) + +# endif +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/mmxstate.c b/Engine/lib/libtheora/lib/x86_vc/mmxstate.c new file mode 100644 index 000000000..73bd1981c --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/mmxstate.c @@ -0,0 +1,211 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm{ +#define Y eax +#define P ecx + mov Y,_dct_coeffs + movzx P,p + /*mm0=0000 0000 0000 AAAA*/ + movd mm0,P + /*mm0=0000 0000 AAAA AAAA*/ + punpcklwd mm0,mm0 + /*mm0=AAAA AAAA AAAA AAAA*/ + punpckldq mm0,mm0 + movq [Y],mm0 + movq [8+Y],mm0 + movq [16+Y],mm0 + movq [24+Y],mm0 + movq [32+Y],mm0 + movq [40+Y],mm0 + movq [48+Y],mm0 + movq [56+Y],mm0 + movq [64+Y],mm0 + movq [72+Y],mm0 + movq [80+Y],mm0 + movq [88+Y],mm0 + movq [96+Y],mm0 + movq [104+Y],mm0 + movq [112+Y],mm0 + movq [120+Y],mm0 +#undef Y +#undef P + } + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 edi + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1opt_vtable.frag_sub=oc_enc_frag_sub_mmx; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; + } + if(cpu_flags&OC_CPU_X86_MMXEXT){ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; + } + if(cpu_flags&OC_CPU_X86_SSE2){ +# if defined(OC_X86_64_ASM) + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2; +# endif + } +} +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/x86enc.h b/Engine/lib/libtheora/lib/x86_vc/x86enc.h new file mode 100644 index 000000000..581484641 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/x86enc.h @@ -0,0 +1,47 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ + +#if !defined(_x86_vc_x86enc_H) +# define _x86_vc_x86enc_H (1) +# include "../encint.h" +# include "x86int.h" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); +void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,const unsigned char *_y,int _stride); +void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,int _stride); +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/Engine/lib/libtheora/lib/x86_vc/x86int.h b/Engine/lib/libtheora/lib/x86_vc/x86int.h new file mode 100644 index 000000000..4cca48531 --- /dev/null +++ b/Engine/lib/libtheora/lib/x86_vc/x86int.h @@ -0,0 +1,42 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_x86_vc_x86int_H) +# define _x86_vc_x86int_H (1) +# include "../internal.h" + +void oc_state_vtable_init_x86(oc_theora_state *_state); + +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue); +void oc_frag_recon_inter_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_mmx(void); + +#endif diff --git a/Engine/lib/libtheora/lib/dec/x86_vc/x86state.c b/Engine/lib/libtheora/lib/x86_vc/x86state.c similarity index 53% rename from Engine/lib/libtheora/lib/dec/x86_vc/x86state.c rename to Engine/lib/libtheora/lib/x86_vc/x86state.c index 735390823..a786bec28 100644 --- a/Engine/lib/libtheora/lib/dec/x86_vc/x86state.c +++ b/Engine/lib/libtheora/lib/x86_vc/x86state.c @@ -5,37 +5,58 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $ + last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ -#if defined(USE_ASM) - #include "x86int.h" -#include "../../cpu.c" + +#if defined(OC_X86_ASM) + +#include "../cpu.c" + +/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into + each quadrant of the destination.*/ +static const unsigned char OC_FZIG_ZAG_MMX[128]={ + 0, 8, 1, 2, 9,16,24,17, + 10, 3,32,11,18,25, 4,12, + 5,26,19,40,33,34,41,48, + 27, 6,13,20,28,21,14, 7, + 56,49,42,35,43,50,57,36, + 15,22,29,30,23,44,37,58, + 51,59,38,45,52,31,60,53, + 46,39,47,54,61,62,55,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, +}; void oc_state_vtable_init_x86(oc_theora_state *_state){ _state->cpu_flags=oc_cpu_flags_get(); - - /* fill with defaults */ - oc_state_vtable_init_c(_state); - - /* patch MMX functions */ if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; - _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; - _state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; - _state->opt_vtable.state_loop_filter_frag_rows=oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; } + else oc_state_vtable_init_c(_state); } - #endif