
// ====================== DECLARATIONS ======================

#if defined(_VISION_DX11)

#ifdef USE_VARIANCE_SHADOWS

#define DECLARE_SHADOWMAP_SAMPLER(_samplerregister_) \
          Texture2D <float2> ShadowMap : register(t##_samplerregister_); \
          sampler ShadowMapSampler : register(s##_samplerregister_);

#else

#define DECLARE_SHADOWMAP_SAMPLER(_samplerregister_) \
          Texture2D <float> ShadowMap : register(t##_samplerregister_); \
          sampler ShadowMapSampler : register(s##_samplerregister_);

#endif


#else

#define DECLARE_SHADOWMAP_SAMPLER(_samplerregister_) \
        sampler2D ShadowMap : register(s##_samplerregister_);

#endif

/*
=======================================================================================================================
Helper Functions
=======================================================================================================================
*/

#ifdef _VISION_PS3

inline float ReadNativeDepth(float4 rawDepth)
{
    float3 depthFactor = float3( 65536.0/16777215.0, 256.0/16777215.0, 1.0/16777215.0 );
    float fDepth = dot( round( rawDepth.arg * 255.0 ), depthFactor );    
    return 2*fDepth-1;
}

#define READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(__ShadowTex__, __ShadowTexSampler__, __Uv__) \
      ReadNativeDepth(vTex2D(__ShadowTex__, __ShadowTexSampler__, (__Uv__)));

#elif defined(_VISION_PSP2)

#define READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(__ShadowTex__, __ShadowTexSampler__, __Uv__) \
      tex2D<float>(__ShadowTex__, (__Uv__)).r;
//READ_CONVERTED_DEPTH(__ShadowTex__, __ShadowTexSampler__, (__Uv__))

#else

#define READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(__ShadowTex__, __ShadowTexSampler__, __Uv__) \
      vTex2D(__ShadowTex__, __ShadowTexSampler__, (__Uv__)).r;
//READ_CONVERTED_DEPTH(__ShadowTex__, __ShadowTexSampler__, (__Uv__))

#endif

inline float4 GetDisplayPosition(float3 vPos)
{
  float3 temp;
  float3 ModPos = modf(abs(vPos) * 0.01f, temp);
  return float4(ModPos, 1.0f);
}

/*
=======================================================================================================================
Cascade Selection
=======================================================================================================================
*/

inline int GetCascadeForIntervalSelectionDepth(float4 vWorldSpacePos)
{
  float fDepth = dot(vWorldSpacePos, EyePlane);
  return dot(fDepth.xxxx > CascadeTextureSplitsEnd, float4(1,1,1,1));
}

inline int GetCascadeForIntervalSelection(float4 vWorldSpacePos)
{
  float fDepth = length(vWorldSpacePos.xyz - EyePos);

  // saves a couple of instructions
  return dot(fDepth.xxxx > CascadeTextureSplitsEnd, float4(1,1,1,1));
}

inline int GetFaceForPointLight(float4 vWorldSpacePos)
{
  float3 dir = vWorldSpacePos.xyz - LightWorldSpace;
  float4 face;
  face.x = dot(dir, float3(0.0f, -0.57735f, 0.8165f));
  face.y = dot(dir, float3(0.0f, -0.57735f, -0.8165f));
  face.z = dot(dir, float3(-0.8165f, 0.57735f, 0.0f));
  face.w = dot(dir, float3( 0.8165f, 0.57735f, 0.0f));

  float2 max0 = max(face.xz, face.yw);
  float stp = step(max0.y, max0.x);
  float2 stp2 = step(face.xz, face.yw);
  return stp2.x * stp + (stp2.y + 2) * (1.0 - stp);
}

/*
=======================================================================================================================
PCF(n) Shadowing
=======================================================================================================================
*/

#ifndef _VISION_WIIU
  static const float2 poissonDisk[16] = { 
    float2( 0.14383161, -0.14100790 ),
    float2( -0.94201624, -0.39906216 ), 
    float2( 0.94558609, -0.76890725 ), 
    float2( -0.094184101, -0.92938870 ), 
    float2( 0.34495938, 0.29387760 ), 
    float2( -0.91588581, 0.45771432 ), 
    float2( -0.81544232, -0.87912464 ), 
    float2( -0.38277543, 0.27676845 ), 
    float2( 0.97484398, 0.75648379 ), 
    float2( 0.44323325, -0.97511554 ), 
    float2( 0.53742981, -0.47373420 ), 
    float2( -0.26496911, -0.41893023 ), 
    float2( 0.79197514, 0.19090188 ), 
    float2( -0.24188840, 0.99706507 ), 
    float2( -0.81409955, 0.91437590 ), 
    float2( 0.19984126, 0.78641367 )
  };
#endif

$if defined (SHADOWING_MODE_PCF8) || defined(SHADOWING_MODE_PCF16) || defined(SHADOWING_MODE_PCSS16)

#if defined(_VISION_PS3)
#define SampleShadowMap(_tex, _sampler, _texCoords, _compareDepth) \
  tex2Dproj(_tex, float4(_texCoords, _compareDepth, 1.0)).r
#define CalculateShadowResult(_shadowMapValues, _compareDepth) \
  (1.0 - _shadowMapValues)

	$ifndef SHADOWING_MODE_PCSS16
		#elif defined(_VISION_DX11)
			#define SampleShadowMap(_tex, _sampler, _texCoords, _compareDepth) \
				_tex.SampleCmp(_sampler, _texCoords, _compareDepth)
			#define CalculateShadowResult(_shadowMapValues, _compareDepth) \
				(_shadowMapValues)
	$endif

#elif defined(_VISION_PSP2)
#define SampleShadowMap(_tex, _sampler, _texCoords, _compareDepth) \
  tex2D<float>(_tex, _texCoords).r
#define CalculateShadowResult(_shadowMapValues, _compareDepth) \
  (_shadowMapValues > _compareDepth)
  
#else
#define SampleShadowMap(_tex, _sampler, _texCoords, _compareDepth) \
  vTex2D(_tex, _sampler, _texCoords)
#define CalculateShadowResult(_shadowMapValues, _compareDepth) \
  (_shadowMapValues > _compareDepth)
#endif

inline float4 GetRandomRot(float2 screenCoords)
{
  float2 noiseCoords = screenCoords * InvScreenSize.zw;
  float2 random = vTex2D(NoiseTex, NoiseTexSampler, noiseCoords).xy * 2.0 - 1.0;
  random = normalize(random);
  return float4(random.x, -random.y, random.y, random.x);
}

inline float DoPoissonDiskSampling(float4 vProjCoords, int numSamples, float2 sampleRadius, float4 rot)
{
#ifdef _VISION_WIIU
  float2 poissonDisk[16] = { 
    float2( 0.14383161, -0.14100790 ),
    float2( -0.94201624, -0.39906216 ), 
    float2( 0.94558609, -0.76890725 ), 
    float2( -0.094184101, -0.92938870 ), 
    float2( 0.34495938, 0.29387760 ), 
    float2( -0.91588581, 0.45771432 ), 
    float2( -0.81544232, -0.87912464 ), 
    float2( -0.38277543, 0.27676845 ), 
    float2( 0.97484398, 0.75648379 ), 
    float2( 0.44323325, -0.97511554 ), 
    float2( 0.53742981, -0.47373420 ), 
    float2( -0.26496911, -0.41893023 ), 
    float2( 0.79197514, 0.19090188 ), 
    float2( -0.24188840, 0.99706507 ), 
    float2( -0.81409955, 0.91437590 ), 
    float2( 0.19984126, 0.78641367 )
  };
#endif

  float4 fTexDepth;
  float4 vOffset;
  float fSum = 0.0;

#if defined(_VISION_PS3) || defined(_VISION_PSP2) || defined(_VISION_WIIU)
  float compareDepth = vProjCoords.z * 0.5 + 0.5;
#else
  float compareDepth = vProjCoords.z;
#endif

  float sampleWeight = 1.0 / numSamples;
  rot *= sampleRadius.xyyx;

  UNROLL_LOOP
  for (int i=0; i<numSamples; i+=4)
  {
    vOffset.xy = poissonDisk[i];    
    vOffset.zw = poissonDisk[i+1];
$ifdef RANDOMIZED
    vOffset = rot.xyzw * float4(vOffset.xx, vOffset.ww) + rot.zwxy * float4(vOffset.yy, vOffset.zz);
$else
    vOffset *= sampleRadius.xyxy;
$endif

    fTexDepth.x = SampleShadowMap(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy, compareDepth);
    fTexDepth.y = SampleShadowMap(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw, compareDepth);

    vOffset.xy = poissonDisk[i+2];    
    vOffset.zw = poissonDisk[i+3];
$ifdef RANDOMIZED
    vOffset = rot.xyzw * float4(vOffset.xx, vOffset.ww) + rot.zwxy * float4(vOffset.yy, vOffset.zz);
$else
    vOffset *= sampleRadius.xyxy;
$endif

    fTexDepth.z = SampleShadowMap(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy, compareDepth);
    fTexDepth.w = SampleShadowMap(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw, compareDepth);

    fSum += dot(CalculateShadowResult(fTexDepth, compareDepth), sampleWeight.xxxx);
  }

  return fSum;
}



//vProjCoords.xy: texture coordinates, in correct range. vProjCoords.z: Normalized pixel depth.
inline float ComputeShadowTerm_PCFN(float4 vProjCoords, float fCascadeRelativeScale, float fRadius, float2 screenCoords)
{
  float2 fSampleRadius = InvShadowMapSize.xy * fRadius * fCascadeRelativeScale;

#if defined(_VISION_PS3)
  int numSamples = SHADOWING_NUM_SAMPLES/2;
#else
  int numSamples = SHADOWING_NUM_SAMPLES;
#endif

  return DoPoissonDiskSampling(vProjCoords, numSamples, fSampleRadius, GetRandomRot(screenCoords));
}

$endif

/*
=======================================================================================================================
PCF4 Shadowing
=======================================================================================================================
*/

$ifdef SHADOWING_MODE_PCF4

//vProjCoords.xy: texture coordinates, in correct range. vProjCoords.z: Normalized pixel depth.
inline float ComputeShadowTerm_PCF4(float4 vProjCoords)
{
#if defined(_VISION_PS3)
   //ps3 version uses hardware pcf
  float compareDepth = vProjCoords.z * 0.5 + 0.5;
  return 1.0 - tex2Dproj(ShadowTex, float4(vProjCoords.xy, compareDepth, 1.0)).r;
#elif defined(_VISION_PSP2)
   //psp2 version uses hardware pcf
  float compareDepth = vProjCoords.z * 0.5 + 0.5;
  return tex2Dproj(ShadowTex, float4(vProjCoords.xy, compareDepth, 1.0)).r;
#elif defined(_VISION_DX11)
  return ShadowTex.SampleCmp(ShadowTexSampler, vProjCoords.xy, vProjCoords.z);

#else
  float4 fTexDepth;
  float2 integerPart;
  
  float2 fractionalPart = modf(vProjCoords.xy / InvShadowMapSize.xy, integerPart);
  integerPart *= InvShadowMapSize.xy;

  float2  lookUp;
  lookUp.x = integerPart.x;
  lookUp.y = integerPart.y;
  fTexDepth.r = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(ShadowTex, ShadowTexSampler, lookUp); 
  lookUp.x = integerPart.x + InvShadowMapSize.x;
  fTexDepth.g = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(ShadowTex, ShadowTexSampler, lookUp);  
  lookUp.y = integerPart.y + InvShadowMapSize.y;
  fTexDepth.a = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(ShadowTex, ShadowTexSampler, lookUp); 
  lookUp.x = integerPart.x;
  fTexDepth.b = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(ShadowTex, ShadowTexSampler, lookUp); 
  
  #if defined(_VISION_WIIU)
    float compareDepth = vProjCoords.z * 0.5 + 0.5;
  #else
    float compareDepth = vProjCoords.z;
  #endif

  float4 st = saturate(ceil(fTexDepth - compareDepth));
  float shadowTerm = lerp( lerp( st.r, st.g, fractionalPart.x ),
                      lerp( st.b, st.a, fractionalPart.x ),
                      fractionalPart.y );
  return saturate(shadowTerm);
#endif
}

$endif

/*
=======================================================================================================================
PCSS Shadowing
=======================================================================================================================
*/

$ifdef SHADOWING_MODE_PCSS16

inline void FindBlocker(float4 vProjCoords, out float fAverageBlockerDepth, out float fNumBlockers, float fCascadeRelativeScale, float4 rot)
{
#ifdef _VISION_WIIU
  float2 poissonDisk[16] = { 
    float2( 0.14383161, -0.14100790 ),
    float2( -0.94201624, -0.39906216 ), 
    float2( 0.94558609, -0.76890725 ), 
    float2( -0.094184101, -0.92938870 ), 
    float2( 0.34495938, 0.29387760 ), 
    float2( -0.91588581, 0.45771432 ), 
    float2( -0.81544232, -0.87912464 ), 
    float2( -0.38277543, 0.27676845 ), 
    float2( 0.97484398, 0.75648379 ), 
    float2( 0.44323325, -0.97511554 ), 
    float2( 0.53742981, -0.47373420 ), 
    float2( -0.26496911, -0.41893023 ), 
    float2( 0.79197514, 0.19090188 ), 
    float2( -0.24188840, 0.99706507 ), 
    float2( -0.81409955, 0.91437590 ), 
    float2( 0.19984126, 0.78641367 )
  };
#endif

  float2 fSampleRadius = InvShadowMapSize.xy * ShadowParameters.x;
  rot *= fSampleRadius.xyyx;

  fAverageBlockerDepth = 0; 
  fNumBlockers = 0;

  float4 fTexDepth;
  float4 vOffset;

  // TODO: gather4 variant
  UNROLL_LOOP
  for( int i = 0; i < SHADOWING_NUM_BLOCKERSEARCH_SAMPLES; i+=4) 
  { 
    vOffset.xy = poissonDisk[i];    
    vOffset.zw = poissonDisk[i+1];
$ifdef RANDOMIZED
    vOffset = rot.xyzw * float4(vOffset.xx, vOffset.ww) + rot.zwxy * float4(vOffset.yy, vOffset.zz);
$else
    vOffset *= fSampleRadius.xyxy;
$endif

#ifdef _VISION_PS3
    fTexDepth.x = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(PatchedShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy);
    fTexDepth.y = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(PatchedShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw);
#else
    fTexDepth.x = vTex2D1Comp(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy).r; 
    fTexDepth.y = vTex2D1Comp(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw).r;
#endif
    
    vOffset.xy = poissonDisk[i+2];    
    vOffset.zw = poissonDisk[i+3];
$ifdef RANDOMIZED
    vOffset = rot.xyzw * float4(vOffset.xx, vOffset.ww) + rot.zwxy * float4(vOffset.yy, vOffset.zz);
$else
    vOffset *= fSampleRadius.xyxy;
$endif

#ifdef _VISION_PS3
    fTexDepth.z = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(PatchedShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy);
    fTexDepth.w = READ_NORMALIZED_DEPTH_FROM_SHADOWMAP(PatchedShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw);
#else
    fTexDepth.z = vTex2D1Comp(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.xy).r; 
    fTexDepth.w = vTex2D1Comp(ShadowTex, ShadowTexSampler, vProjCoords.xy + vOffset.zw).r;
#endif

    float4 fRes = fTexDepth < vProjCoords.zzzz;
    fAverageBlockerDepth += dot(fRes * fTexDepth, 1.0); 
    
    fNumBlockers += dot(fRes, 1.0);       
  }

  fAverageBlockerDepth /= fNumBlockers;
} 

inline float ComputeShadowTerm_PCSS(float4 vProjCoords, float fCascadeRelativeScale, float fDepth, float2 screenCoords)
{
#if defined(_VISION_WIIU)
  vProjCoords.z = vProjCoords.z * 0.5 + 0.5;
#endif

  float4 rot = GetRandomRot(screenCoords);
  float fAvgBlockerDepth; 
  float fNumBlockers; 
  FindBlocker( vProjCoords, fAvgBlockerDepth, fNumBlockers, fCascadeRelativeScale, rot);
  
  float2 fSampleRadius = InvShadowMapSize.xy * fCascadeRelativeScale;
  if (fNumBlockers > 0.0f)
  {
    fSampleRadius *= ShadowParameters.x * (vProjCoords.z - fAvgBlockerDepth) / fAvgBlockerDepth; 
  }
  fSampleRadius += fDepth * ShadowParameters.y * InvShadowMapSize.xy * fCascadeRelativeScale;
  fSampleRadius = max(fSampleRadius, InvShadowMapSize.xy);
 
  int numSamples = SHADOWING_NUM_SAMPLES;
  return DoPoissonDiskSampling(vProjCoords, numSamples, fSampleRadius, rot);
}
$endif

/*
=======================================================================================================================
CHS Shadowing
=======================================================================================================================
*/

$ifdef SHADOWING_MODE_CHS
#define FILTER_SIZE    11
#define FS  FILTER_SIZE
#define FS2 ( FILTER_SIZE / 2 )

// 4 control matrices for a dynamic cubic bezier filter weights matrix

static const float C3[11][11] = { 
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 }, 
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
  { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 },
};

static const float C2[11][11] = { 
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, 
  { 0.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0 },
  { 0.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};

static const float C1[11][11] = { 
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, 
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,1.0,1.0,1.0,1.0,1.0,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,1.0,1.0,1.0,1.0,1.0,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,1.0,1.0,1.0,1.0,1.0,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,1.0,1.0,1.0,1.0,1.0,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,1.0,1.0,1.0,1.0,1.0,0.5,0.0,0.0 },
  { 0.0,0.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};

static const float C0[11][11] = { 
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 }, 
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.2,0.2,0.2,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.2,1.0,0.2,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.2,0.2,0.2,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
  { 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 },
};

// compute dynamic weight at a certain row, column of the matrix
float Fw( int r, int c, float fL )
{
  return (1.0-fL)*(1.0-fL)*(1.0-fL) * C0[r][c] +
    fL*fL*fL * C3[r][c] +
    3.0f * (1.0-fL)*(1.0-fL)*fL * C1[r][c]+
    3.0f * fL*fL*(1.0-fL) * C2[r][c];
} 

#define BLOCKER_FILTER_SIZE    11
#define BFS  BLOCKER_FILTER_SIZE
#define BFS2 ( BLOCKER_FILTER_SIZE / 2 )

inline float ComputeShadowTerm_CHS(float4 vProjCoords, float fCascadeRelativeScale, float fRadius)
{ 
#ifdef _VISION_DX11
  float s = 0.0f;
  float2 stc = vProjCoords.xy / InvShadowMapSize;
  float2 tc = floor(stc) * InvShadowMapSize;
  float2 fc = frac(stc);
  int row;
  int col;
  float w = 0.0;
  float avgBlockerDepth = 0;
  float blockerCount = 0;
  float fRatio;
  float4 v1[FS2 + 1];
  float2 v0[FS2 + 1];
  float2 off;
  float fDepth = vProjCoords.z;

  // find number of blockers and sum up blocker depth
  for( row = -BFS2; row <= BFS2; row += 2 )
  {
    for( col = -BFS2; col <= BFS2; col += 2 )
    {
      float4 d4 = ShadowTex.Gather( ShadowTexSampler, tc.xy, int2( col, row ) );			
			float4 b4  = d4 < fDepth.xxxx;   

			blockerCount += dot( b4, (1.0).xxxx );
			avgBlockerDepth += dot( d4, b4 );
    }
  }
  // compute ratio using formulas from PCSS
  if( blockerCount > 0.0 )
  {
    avgBlockerDepth /= blockerCount;
    fRatio = saturate(((fDepth - avgBlockerDepth) * fRadius * fCascadeRelativeScale) / avgBlockerDepth);
    fRatio *= fRatio;
  }
  else
  {
    fRatio = 0.0;
  }
  //return fRatio;

  // sum up weights of dynamic filter matrix
	for( row = 0; row < FS; ++row )
	{
	   for( col = 0; col < FS; ++col )
	   {
		  w += Fw(row,col,fRatio);
	   }
	}

  // filter shadow map samples using the dynamic weights
	[unroll(FILTER_SIZE)]for( row = -FS2; row <= FS2; row += 2 )
	{
		for( col = -FS2; col <= FS2; col += 2 )
		{
			float4 d4 = ShadowTex.GatherRed( ShadowTexSampler, tc.xy, int2( col, row ) );
			v1[(col+FS2)/2] = d4 > fDepth.xxxx; 
  
			if( col == -FS2 )
			{
				s += ( 1 - fc.y ) * ( v1[0].w * ( Fw(row+FS2,0,fRatio) - 
									  Fw(row+FS2,0,fRatio) * fc.x ) + v1[0].z * 
									( fc.x * ( Fw(row+FS2,0,fRatio) - 
									  Fw(row+FS2,1,fRatio) ) +  
									  Fw(row+FS2,1,fRatio) ) );
				s += (     fc.y ) * ( v1[0].x * ( Fw(row+FS2,0,fRatio) - 
									  Fw(row+FS2,0,fRatio) * fc.x ) + 
									  v1[0].y * ( fc.x * ( Fw(row+FS2,0,fRatio) - 
									  Fw(row+FS2,1,fRatio) ) +  
									  Fw(row+FS2,1,fRatio) ) );
				if( row > -FS2 )
				{
					s += ( 1 - fc.y ) * ( v0[0].x * ( Fw(row+FS2-1,0,fRatio) - 
										  Fw(row+FS2-1,0,fRatio) * fc.x ) + v0[0].y * 
										( fc.x * ( Fw(row+FS2-1,0,fRatio) - 
										  Fw(row+FS2-1,1,fRatio) ) +  
										  Fw(row+FS2-1,1,fRatio) ) );
					s += (     fc.y ) * ( v1[0].w * ( Fw(row+FS2-1,0,fRatio) - 
										  Fw(row+FS2-1,0,fRatio) * fc.x ) + v1[0].z * 
										( fc.x * ( Fw(row+FS2-1,0,fRatio) - 
										  Fw(row+FS2-1,1,fRatio) ) +  
										  Fw(row+FS2-1,1,fRatio) ) );
				}
			}
			else if( col == FS2 )
			{
				s += ( 1 - fc.y ) * ( v1[FS2].w * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - 
									  Fw(row+FS2,FS-1,fRatio) ) + 
									  Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].z * fc.x * 
									  Fw(row+FS2,FS-1,fRatio) );
				s += (     fc.y ) * ( v1[FS2].x * ( fc.x * ( Fw(row+FS2,FS-2,fRatio) - 
									  Fw(row+FS2,FS-1,fRatio) ) + 
									  Fw(row+FS2,FS-1,fRatio) ) + v1[FS2].y * fc.x * 
									  Fw(row+FS2,FS-1,fRatio) );
				if( row > -FS2 )
				{
					s += ( 1 - fc.y ) * ( v0[FS2].x * ( fc.x * 
										( Fw(row+FS2-1,FS-2,fRatio) - 
										  Fw(row+FS2-1,FS-1,fRatio) ) + 
										  Fw(row+FS2-1,FS-1,fRatio) ) + 
										  v0[FS2].y * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
					s += (     fc.y ) * ( v1[FS2].w * ( fc.x * 
										( Fw(row+FS2-1,FS-2,fRatio) - 
										  Fw(row+FS2-1,FS-1,fRatio) ) + 
										  Fw(row+FS2-1,FS-1,fRatio) ) + 
										  v1[FS2].z * fc.x * Fw(row+FS2-1,FS-1,fRatio) );
				}
			}
			else
			{
				s += ( 1 - fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * 
									( Fw(row+FS2,col+FS2-1,fRatio) - 
									  Fw(row+FS2,col+FS2+0,fRatio) ) + 
									  Fw(row+FS2,col+FS2+0,fRatio) ) +
									  v1[(col+FS2)/2].z * ( fc.x * 
									( Fw(row+FS2,col+FS2-0,fRatio) - 
									  Fw(row+FS2,col+FS2+1,fRatio) ) + 
									  Fw(row+FS2,col+FS2+1,fRatio) ) );
				s += (     fc.y ) * ( v1[(col+FS2)/2].x * ( fc.x * 
									( Fw(row+FS2,col+FS2-1,fRatio) - 
									  Fw(row+FS2,col+FS2+0,fRatio) ) + 
									  Fw(row+FS2,col+FS2+0,fRatio) ) +
									  v1[(col+FS2)/2].y * ( fc.x * 
									( Fw(row+FS2,col+FS2-0,fRatio) - 
									  Fw(row+FS2,col+FS2+1,fRatio) ) + 
									  Fw(row+FS2,col+FS2+1,fRatio) ) );
				if( row > -FS2 )
				{
					s += ( 1 - fc.y ) * ( v0[(col+FS2)/2].x * ( fc.x * 
										( Fw(row+FS2-1,col+FS2-1,fRatio) - 
										  Fw(row+FS2-1,col+FS2+0,fRatio) ) + 
										  Fw(row+FS2-1,col+FS2+0,fRatio) ) +
										  v0[(col+FS2)/2].y * ( fc.x * 
										( Fw(row+FS2-1,col+FS2-0,fRatio) - 
										  Fw(row+FS2-1,col+FS2+1,fRatio) ) + 
										  Fw(row+FS2-1,col+FS2+1,fRatio) ) );
					s += (     fc.y ) * ( v1[(col+FS2)/2].w * ( fc.x * 
										( Fw(row+FS2-1,col+FS2-1,fRatio) - 
										  Fw(row+FS2-1,col+FS2+0,fRatio) ) + 
										  Fw(row+FS2-1,col+FS2+0,fRatio) ) +
										  v1[(col+FS2)/2].z * ( fc.x * 
										( Fw(row+FS2-1,col+FS2-0,fRatio) - 
										  Fw(row+FS2-1,col+FS2+1,fRatio) ) + 
										  Fw(row+FS2-1,col+FS2+1,fRatio) ) );
				}
			}
            
			if( row != FS2 )
			{
				v0[(col+FS2)/2] = v1[(col+FS2)/2].xy;
			}
		}
	}

	return saturate(s/w);

#else
  return 1.0f;
#endif
}

$endif

