这个 2048 RSA 数字的 Bignum 乘法是蒙哥马利乘法吗?

逆向工程 艾达 二元分析
2021-06-10 03:32:08

下面的代码片段是某种bignum涉及 2048 位数字乘法,可能在 RSA 解密的上下文中。

  • 有人认识这种模式吗?
  • 可能是某种Montgomery乘法(不太可能)吗?
  • 或者某些人认可的 RSA 混淆?

有 2 个乘法循环。我用loop1标记了它们loop2loop1做我期望的部分乘法和加法。但是第二个循环如何在数学上有意义它使用一个单独的向量相乘。这会累积成模运算吗?

我将 IDA 编译转换为下面更具可读性的表示,原始在最后。谁能认出一个模式?

struct mod {
  int len;
  uint32_t _pad0[64]; /* 4 */
  uint32_t r[64];     /* 4 + (256*1) :offset-260  */
  uint32_t _pad2[64]; /* 4 + (256*2) :offset-516  */
  uint32_t _pad3[64]; /* 4 + (256*3) :offset-772  */
  uint32_t _pad4[64]; /* 4 + (256*4) :offset-1028 */
  int fac;            /* 4 + (256*5) :offset-1284 */
};

#define align8(c) (((uint64_t)(c)) & (~0x7ULL))

int64_t some_kind_of_mult(int32_t *acc , int32_t *a , int32_t *b, struct mod *n)
{
  int i0, i1, i2, j0 ;
  uint32_t top;
  uint64_t c0 = 0, c1 = 0, v0, v1, v2, v3, c01, c01_h;
  intt64_t m1, res, topc0 = 0;

  memset(acc,0,256);
  res = 0;
  l = n->len;

  if ( !l )
    return res;

  top = topc0 = 0;

  for (i0 = 0; i0 < l; i0++)
  {
      /* loop1: multiply a[0..l] with partial b[i0], accumulate resule in acc[0..l] */
      for (c1=0, i1=0; i1 < l; i1++)
      {
          v1 = ((uint64_t)a[i1] * (uint64_t)b[i0]) + acc[i1] + c1;
          acc[i1] = v1;
          c1 = v1 >> 32;
      }

      top = (uint32_t)c1 + topc0;
      c01 = c1 + topc0;

      m1 = (uint32_t)(acc[0] * n->fac);
      c2 = (m1 * (uint64_t)(n->r[0]) + acc[0]) >> 32;

      /* loop2: do some acc[0..l] postprocessing involving some acc[i2-1] = .. acc[i2] ... calculcation */
      for (i2 = 1; i2 < l; i2++)
      {
          v2 = acc[i2] + c2 + (m1 * n->r[i2]);
          acc[i2-1] = v2;
          c2 = (v2 >> 32);
      }

      acc[l-1] = top + c2;
      topc0 = (c01 >> 32) + ((top + c2) >> 32);
  }

  if ( !topc0 && l-1 >= 0 )
  {
      res = l-1;
      if ( acc[l-1] < n->r[l-1] )
        /* possible error in IDA function */
        return res;
      if ( *v28 <= v29 )
        /* possible error in IDA function */
        JUMPOUT(loc_BC46E10);
  }

  res &= 0xffffffff00000000ULL;;
  /* loop3: some final substraction on acc[0-l] */
  for (j0 = 0; j0 < l; j0++)
  {
      v3 = acc[j0] - (uint64_t)n->r[j0] - (uint32_t)res;
      acc[j0] = v3;
      res = -(v3 >> 32);
  }

  return res;
}

这是原始的 IDA 反编译:

 int64 fastcall sub_BC46730(_DWORD *a1, int64 a2, int64 a3, int64 a4)
{
    _DWORD *v4; // r9@1
    signed __int64 v5; // rdi@1
    __int64 v6; // rbp@1
    __int64 result; // rax@1
    unsigned int v8; // ecx@1
    unsigned int v9; // er13@1
    __int64 v10; // r10@2
    unsigned __int64 v11; // rdi@3
    __int64 v12; // r8@3
    unsigned __int64 v13; // rax@4
    unsigned __int64 v14; // rax@4
    __int64 v15; // r14@5
    unsigned __int64 v16; // r11@5
    __int64 v17; // rax@5
    unsigned __int64 v18; // r11@5
    __int64 v19; // r8@5
    unsigned __int64 v20; // r15@5
    signed __int64 v21; // rdi@6
    _DWORD *v22; // rcx@6
    __int64 v23; // rax@7
    unsigned __int64 v24; // rax@7
    unsigned __int64 v25; // rax@7
    __int64 v26; // rsi@9
    unsigned __int64 v27; // rcx@11
    unsigned int *v28; // rdi@14
    unsigned int v29; // ebx@14
    __int64 v30; // [sp+0h] [bp-40h]@2
    int v31; // [sp+8h] [bp-38h]@2
    int v32; // [sp+Ch] [bp-34h]@2
    v4 = a1;
    v5 = (signed __int64)(a1 + 2);
    v6 = a4;
    *(_QWORD *)(v5 - 8) = 0LL;
    *(_QWORD *)(v5 + 240) = 0LL;
    result = 0LL;
    memset(
        (void *)(v5 & 0xFFFFFFFFFFFFFFF8LL),
        0,
        8 * ((unsigned __int64)((unsigned int)v4 - (v5 & 0xFFFFFFF8) + 256) >> 3));
    v8 = 0;
    v9 = *(_DWORD *)v6;
    if ( *(_DWORD *)v6 )
    {
        v10 = 0LL;
        v31 = *(_DWORD *)(v6 + 1284);
        v32 = v9 - 1;
        v30 = v9 - 1;
        do {
            v11 = 0LL;
            v12 = 0LL;
            /* loop1: */
            do {
                v13 = *(_DWORD *)(a2 + v11) * (unsigned __int64)*(_DWORD *)(a3 + 4 * v10) + v4[v11 / 4] + v12;
                v4[v11 / 4] = v13;
                v11 += 4LL;
                v14 = v13 >> 32;
                v12 = (unsigned int)v14;
            } while ( v11 != 4 * v30 + 4 );
            v15 = (unsigned int)v14 + v8;
            v16 = v14 + v8;
            v17 = *v4;
            v18 = v16 >> 32;
            v19 = (unsigned int)(v17 * v31);
            v20 = (v19 * (unsigned __int64)*(_DWORD *)(v6 + 260) + v17) >> 32;
            if ( v9 <= 1 )
                goto LABEL_16;
            v21 = v6 + 264;
            v22 = v4;
            /* loop2: */
            do
            {
                v23 = v22[1];
                v21 += 4LL;
                ++v22;
                v24 = v23 + v20 + v19 * *(_DWORD *)(v21 - 4);
                *(v22 - 1) = v24;
                v25 = v24 >> 32;
                v20 = (unsigned int)v25;
            }
            while ( v21 != v6 + 4LL * (v9 - 2) + 268 );
            ++v10;
            v4[v30] = v15 + v25;
            v8 = v18 + ((v15 + v25) >> 32);
        }
        while ( v9 > (unsigned int)v10 );
        v26 = 0LL;
        if ( !v8 && v32 >= 0 )
        {
            result = v32;
            v28 = &v4[v32];
            v29 = *(_DWORD *)(4LL * v32 + v6 + 260);
            if ( *v28 < v29 )
                return result;
            if ( *v28 <= v29 )
            LABEL_16:
                JUMPOUT(loc_BC46E10);
        }
        LODWORD(result) = 0;
        do
        {
            v27 = v4[v26] - (unsigned __int64)*(_DWORD *)(v6 + 4 * v26 + 260) - (unsigned int)result;
            v4[v26++] = v27;
            result = (unsigned int)-HIDWORD(v27);
        }
        while ( v9 > (unsigned int)v26 );
    }
    return result;
}
1个回答

该结构似乎类似于蒙哥马利 CIOS 实施

首席信息官蒙哥马利

来自 Researchgate 上的论文:粗集成操作数扫描

更多关于 CIOS Montgomery 的参考:https : //www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf