diff -Naur qemu-0.9.1.orig/configure qemu-0.9.1/configure
--- qemu-0.9.1.orig/configure	2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1/configure	2008-02-06 10:19:59.000000000 +0100
@@ -790,6 +790,12 @@
 if test "$cpu" = "i386" ; then
   echo "ARCH=i386" >> $config_mak
   echo "#define HOST_I386 1" >> $config_h
+
+  # add check for gcc4 breakage
+  echo "#if (__GNUC__ > 3)" >> $config_h
+  echo "#define GCC_BREAKS_T_REGISTER" >> $config_h
+  echo "#endif" >> $config_h
+
 elif test "$cpu" = "x86_64" ; then
   echo "ARCH=x86_64" >> $config_mak
   echo "#define HOST_X86_64 1" >> $config_h
diff -Naur qemu-0.9.1.orig/dyngen.c qemu-0.9.1/dyngen.c
--- qemu-0.9.1.orig/dyngen.c	2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1/dyngen.c	2008-02-06 10:31:41.000000000 +0100
@@ -1488,6 +1488,16 @@
         if (get32((uint32_t *)p) != 0x4e800020)
             error("blr expected at the end of %s", name);
         copy_size = p - p_start;
+
+/* blr check for inline returns */
+
+        if(strstart(name, "op_", NULL) && !strstart(name, "op_exit", NULL)) {
+            for(p=p_start; p < p_end - 4; p+=4) {
+                if ((get32((uint32_t *)p) & 0xfc00fff0) == 0x4c000020) {
+                    error("Inline blr detected in %s. Please append FORCE_RET to the function.", name);
+                }
+            }
+        }
     }
 #elif defined(HOST_S390)
     {
@@ -1931,6 +1941,17 @@
                     type = ELF32_R_TYPE(rel->r_info);
                     addend = rel->r_addend;
                     reloc_offset = rel->r_offset - start_offset;
+                    if (strstart(sym_name, "__op_jmp", &p)) {
+                        int n;
+                        n = strtol(p, NULL, 10);
+                        /* __op_jmp relocations are done at
+                           runtime to do translated block
+                           chaining: the offset of the instruction
+                           needs to be stored */
+                        fprintf(outfile, "    jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n",
+                                n, reloc_offset);
+                        continue;
+                    }
                     switch(type) {
                     case R_X86_64_32:
                         fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (uint32_t)%s + %d;\n",
diff -Naur qemu-0.9.1.orig/exec-all.h qemu-0.9.1/exec-all.h
--- qemu-0.9.1.orig/exec-all.h	2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1/exec-all.h	2008-02-06 10:31:41.000000000 +0100
@@ -142,6 +142,9 @@
 #if defined(__i386__) && !defined(_WIN32)
 #define USE_DIRECT_JUMP
 #endif
+#if defined(__x86_64__)
+#define USE_DIRECT_JUMP
+#endif
 
 typedef struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
@@ -228,7 +231,7 @@
     asm volatile ("sync" : : : "memory");
     asm volatile ("isync" : : : "memory");
 }
-#elif defined(__i386__)
+#elif defined(__i386__) || defined(__x86_64__)
 static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr)
 {
     /* patch the branch destination */
@@ -320,6 +323,18 @@
 		  "1:\n");\
 } while (0)
 
+#elif defined(__x86_64__) && defined(USE_DIRECT_JUMP)
+
+#define GOTO_TB(opname, tbparam, n)\
+do {\
+    asm volatile (ASM_DATA_SECTION\
+		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
+		  ".quad 1f\n"\
+		  ASM_PREVIOUS_SECTION \
+                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
+		  "1:\n");\
+} while (0)
+
 #else
 
 /* jump to next block operations (more portable code, does not need
diff -Naur qemu-0.9.1.orig/softmmu_header.h qemu-0.9.1/softmmu_header.h
--- qemu-0.9.1.orig/softmmu_header.h	2008-01-06 20:38:42.000000000 +0100
+++ qemu-0.9.1/softmmu_header.h	2008-02-06 10:19:59.000000000 +0100
@@ -189,9 +189,15 @@
 #else
 #error unsupported size
 #endif
+#ifdef GCC_BREAKS_T_REGISTER
+                  "pushl %%ecx\n"
+#endif
                   "pushl %6\n"
                   "call %7\n"
                   "popl %%eax\n"
+#ifdef GCC_BREAKS_T_REGISTER
+                  "popl %%ecx\n"
+#endif
                   "jmp 2f\n"
                   "1:\n"
                   "addl 8(%%edx), %%eax\n"
@@ -209,14 +215,22 @@
                   : "r" (ptr),
 /* NOTE: 'q' would be needed as constraint, but we could not use it
    with T1 ! */
+#if (DATA_SIZE == 1 || DATA_SIZE == 2) && defined(GCC_BREAKS_T_REGISTER)
+                  "q" (v),
+#else
                   "r" (v),
+#endif
                   "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS),
                   "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
                   "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MMU_INDEX][0].addr_write)),
                   "i" (CPU_MMU_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
+#ifdef GCC_BREAKS_T_REGISTER
+                  : "%eax", "%edx", "memory", "cc");
+#else
                   : "%eax", "%ecx", "%edx", "memory", "cc");
+#endif
 }
 
 #else
diff -Naur qemu-0.9.1.orig/target-alpha/cpu.h qemu-0.9.1/target-alpha/cpu.h
--- qemu-0.9.1.orig/target-alpha/cpu.h	2008-01-06 20:38:44.000000000 +0100
+++ qemu-0.9.1/target-alpha/cpu.h	2008-02-06 10:19:59.000000000 +0100
@@ -275,6 +275,8 @@
      * used to emulate 64 bits target on 32 bits hosts
      */
     target_ulong t0, t1, t2;
+#elif defined(GCC_BREAKS_T_REGISTER)
+    target_ulong t2;
 #endif
     /* */
     double ft0, ft1, ft2;
diff -Naur qemu-0.9.1.orig/target-alpha/exec.h qemu-0.9.1/target-alpha/exec.h
--- qemu-0.9.1.orig/target-alpha/exec.h	2008-01-06 20:38:44.000000000 +0100
+++ qemu-0.9.1/target-alpha/exec.h	2008-02-06 10:19:59.000000000 +0100
@@ -36,6 +36,12 @@
 #define T1 (env->t1)
 #define T2 (env->t2)
 
+#elif defined(GCC_BREAKS_T_REGISTER)
+
+register uint64_t T0 asm(AREG1);
+register uint64_t T1 asm(AREG2);
+#define T2 (env->t2)
+
 #else
 
 register uint64_t T0 asm(AREG1);
diff -Naur qemu-0.9.1.orig/target-alpha/op_template.h qemu-0.9.1/target-alpha/op_template.h
--- qemu-0.9.1.orig/target-alpha/op_template.h	2008-01-06 20:38:44.000000000 +0100
+++ qemu-0.9.1/target-alpha/op_template.h	2008-02-06 10:31:23.000000000 +0100
@@ -29,7 +29,26 @@
 #if !defined(HOST_SPARC) && !defined(HOST_SPARC64)
 void OPPROTO glue(op_reset_FT, REG) (void)
 {
+#ifdef HOST_PPC
+    /* We have a problem with HOST_PPC here:
+       We want this code:
+         glue(FT, REG) = 0;
+       unfortunately GCC4 notices that this stores (double)0.0 into
+       env->ft0 and emits that constant into the .rodata, and instructions
+       to load that zero from there.  But that construct can't be parsed by dyngen.
+       We could add -ffast-math for compiling op.c, that would just make it generate
+       two stores of zeros into both words of ft0.  But -ffast-math may have other
+       side-effects regarding the emulation.  We could use __builtin_memset,
+       which perhaps would be the sanest.  That relies on -O2 and our other options
+       to inline that memset, which currently it does, but who knows for how long.
+       So, we simply do that by hand, and a barely typesafe way :-/  */
+    union baeh { double d; unsigned int i[2];};
+    union baeh *p = (union baeh*)&(glue(FT, REG));
+    p->i[0] = 0;
+    p->i[1] = 0;
+#else
     glue(FT, REG) = 0;
+#endif
     RETURN();
 }
 #else
diff -Naur qemu-0.9.1.orig/target-arm/cpu.h qemu-0.9.1/target-arm/cpu.h
--- qemu-0.9.1.orig/target-arm/cpu.h	2008-01-06 20:38:44.000000000 +0100
+++ qemu-0.9.1/target-arm/cpu.h	2008-02-06 10:19:59.000000000 +0100
@@ -66,6 +66,9 @@
  */
 
 typedef struct CPUARMState {
+#if defined(GCC_BREAKS_T_REGISTER)
+    uint32_t t2;
+#endif
     /* Regs for current mode.  */
     uint32_t regs[16];
     /* Frequently accessed CPSR bits are stored separately for efficiently.
diff -Naur qemu-0.9.1.orig/target-arm/exec.h qemu-0.9.1/target-arm/exec.h
--- qemu-0.9.1.orig/target-arm/exec.h	2008-01-06 20:38:44.000000000 +0100
+++ qemu-0.9.1/target-arm/exec.h	2008-02-06 10:19:59.000000000 +0100
@@ -23,7 +23,12 @@
 register struct CPUARMState *env asm(AREG0);
 register uint32_t T0 asm(AREG1);
 register uint32_t T1 asm(AREG2);
+#if defined(GCC_BREAKS_T_REGISTER)
+#define T2 (env->t2)
+#else
 register uint32_t T2 asm(AREG3);
+#endif
+
 
 /* TODO: Put these in FP regs on targets that have such things.  */
 /* It is ok for FT0s and FT0d to overlap.  Likewise FT1s and FT1d.  */
diff -Naur qemu-0.9.1.orig/target-i386/cpu.h qemu-0.9.1/target-i386/cpu.h
--- qemu-0.9.1.orig/target-i386/cpu.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-i386/cpu.h	2008-02-06 10:19:59.000000000 +0100
@@ -470,6 +470,8 @@
 #if TARGET_LONG_BITS > HOST_LONG_BITS
     /* temporaries if we cannot store them in host registers */
     target_ulong t0, t1, t2;
+#elif defined(GCC_BREAKS_T_REGISTER)
+    target_ulong t1;
 #endif
 
     /* standard registers */
diff -Naur qemu-0.9.1.orig/target-i386/exec.h qemu-0.9.1/target-i386/exec.h
--- qemu-0.9.1.orig/target-i386/exec.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-i386/exec.h	2008-02-06 10:19:59.000000000 +0100
@@ -39,6 +39,12 @@
 #define T1 (env->t1)
 #define T2 (env->t2)
 
+#elif defined(GCC_BREAKS_T_REGISTER)
+
+register target_ulong T0 asm(AREG1);
+#define T1 (env->t1)
+register target_ulong T2 asm(AREG3);
+
 #else
 
 /* XXX: use unsigned long instead of target_ulong - better code will
diff -Naur qemu-0.9.1.orig/target-i386/op.c qemu-0.9.1/target-i386/op.c
--- qemu-0.9.1.orig/target-i386/op.c	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-i386/op.c	2008-02-06 10:31:17.000000000 +0100
@@ -290,6 +290,7 @@
     EDX = (uint32_t)(res >> 32);
     CC_DST = res;
     CC_SRC = (res != (int32_t)res);
+    FORCE_RET();
 }
 
 void OPPROTO op_imulw_T0_T1(void)
@@ -299,6 +300,7 @@
     T0 = res;
     CC_DST = res;
     CC_SRC = (res != (int16_t)res);
+    FORCE_RET();
 }
 
 void OPPROTO op_imull_T0_T1(void)
@@ -308,6 +310,7 @@
     T0 = res;
     CC_DST = res;
     CC_SRC = (res != (int32_t)res);
+    FORCE_RET();
 }
 
 #ifdef TARGET_X86_64
diff -Naur qemu-0.9.1.orig/target-i386/ops_template.h qemu-0.9.1/target-i386/ops_template.h
--- qemu-0.9.1.orig/target-i386/ops_template.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-i386/ops_template.h	2008-02-06 10:31:17.000000000 +0100
@@ -467,6 +467,7 @@
     int count;
     count = T1 & SHIFT_MASK;
     CC_SRC = T0 >> count;
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void)
diff -Naur qemu-0.9.1.orig/target-mips/cpu.h qemu-0.9.1/target-mips/cpu.h
--- qemu-0.9.1.orig/target-mips/cpu.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-mips/cpu.h	2008-02-06 10:19:59.000000000 +0100
@@ -149,6 +149,8 @@
     target_ulong t0;
     target_ulong t1;
     target_ulong t2;
+#elif defined(GCC_BREAKS_T_REGISTER)
+    target_ulong t2;
 #endif
     target_ulong HI[MIPS_DSP_ACC][MIPS_TC_MAX];
     target_ulong LO[MIPS_DSP_ACC][MIPS_TC_MAX];
diff -Naur qemu-0.9.1.orig/target-mips/exec.h qemu-0.9.1/target-mips/exec.h
--- qemu-0.9.1.orig/target-mips/exec.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-mips/exec.h	2008-02-06 10:19:59.000000000 +0100
@@ -14,6 +14,10 @@
 #define T0 (env->t0)
 #define T1 (env->t1)
 #define T2 (env->t2)
+#elif defined(GCC_BREAKS_T_REGISTER)
+register target_ulong T0 asm(AREG1);
+register target_ulong T1 asm(AREG2);
+#define T2 (env->t2)
 #else
 register target_ulong T0 asm(AREG1);
 register target_ulong T1 asm(AREG2);
diff -Naur qemu-0.9.1.orig/target-ppc/exec.h qemu-0.9.1/target-ppc/exec.h
--- qemu-0.9.1.orig/target-ppc/exec.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-ppc/exec.h	2008-02-06 10:29:35.000000000 +0100
@@ -38,6 +38,10 @@
 #define T1 (env->t1)
 #define T2 (env->t2)
 #define TDX "%016" PRIx64
+#elif defined(GCC_BREAKS_T_REGISTER)
+register unsigned long T0 asm(AREG1);
+register unsigned long T1 asm(AREG2);
+#define T2 (env->t2)
 #else
 register unsigned long T0 asm(AREG1);
 register unsigned long T1 asm(AREG2);
diff -Naur qemu-0.9.1.orig/target-sparc/exec.h qemu-0.9.1/target-sparc/exec.h
--- qemu-0.9.1.orig/target-sparc/exec.h	2008-01-06 20:38:45.000000000 +0100
+++ qemu-0.9.1/target-sparc/exec.h	2008-02-06 10:19:59.000000000 +0100
@@ -32,9 +32,13 @@
 
 #else
 #define REGWPTR env->regwptr
+#if !defined(GCC_BREAKS_T_REGISTER)
 register uint32_t T2 asm(AREG3);
-#endif
 #define reg_T2
+#else
+#define T2 (env->t2)
+#endif
+#endif
 #endif
 
 #define FT0 (env->ft0)