在SQL Server中,如何用SQL去实现得到一批字符串的全部组合呢?这个是同事在实际需求当中遇到的一个问题,他的具体需求如下所示:
传入参数格式为'1,2,3,224,15,6' 'A,BC,GHT,TTY,B,E'
输出的内容为分割后字符串的所有非排列组合
!-阶乘,如!=5××××=120 (M!/(N!*((M-N)!)))
公式描述:组合数公式是从m个不同元素中,任取n(n≤m)个元素并成一组,叫做从m个不同元素中取出n个元素的一个组合;
从m个不同元素中取出n(n≤m)个元素的所有组合的个数,叫做从m个不同元素中取出n个元素的组合数。用符号c(m,n) 表示。
如果有5个数字那么就是M=5 单个数字组合N=1 (M!/(N!*((M-N)!)))=5
是M=5 2个数字组合N=2 (M!/(N!*((M-N)!)))=10
是M=5 3个数字组合N=3 (M!/(N!*((M-N)!)))=10
是M=5 4个数字组合N=4 (M!/(N!*((M-N)!)))=5
是M=5 5个数字组合N=5 (M!/(N!*((M-N)!)))=1
可能全部的组合有 10 + 10 + 5+ 5 + 1 =31 种。传入的参数分割后越多,组合数也就越庞大。
他最开始的实现方式就是用多层循环实现(此处就不贴代码了),但是当要实现组合的字符串数量增多时,效率性能就下降得非常厉害,后面我参考一种写法写了下面SQL语句。
--创建辅助表SEQ_NUMBER
CREATE TABLE SEQ_NUMBER(COL_NUM INT);
DECLARE @Index INT =1;
WHILE @Index <=32
BEGIN
INSERT INTO SEQ_NUMBER VALUES(@Index);
SET @Index +=1;
END;
GO
--创建辅助表,用于保存拆分后的字符串
CREATE TABLE SplitString(COL_NUM INT IDENTITY(1,1) ,VAL VARCHAR(32));
--创建函数
CREATE FUNCTION FN_GET_COMBINATIONS()
RETURNS @OutTable TABLE(COL VARCHAR(32), VAL VARCHAR(32))
AS
BEGIN
DECLARE @Str VARCHAR(32)='';
DECLARE @Index INT =1;
DECLARE @RowCount INT;
SELECT @RowCount=COUNT(*) FROM SplitString;
--注意,如果字符串分隔后有5个字符串(A,BC,GHT,TTY,B,E),就使用12345, 如果分割后有6个字符串,就必须用123456,以此类推
WHILE @Index <= @RowCount
BEGIN
SET @Str = @Str + CAST(@Index AS VARCHAR(2))
SET @Index = @Index +1
END
INSERT INTO @OutTable
SELECT S.COL_NUM, T.VAL FROM SEQ_NUMBER S , SplitString T
WHERE S.COL_NUM = T.COL_NUM ORDER BY T.COL_NUM;
WHILE NOT EXISTS(SELECT 1 FROM @OutTable where COL = @Str)
INSERT INTO @OutTable
SELECT T3.COL + T2.COL, T3.VAL + T2.VAL FROM @OutTable AS T2,@OutTable AS T3 WHERE len(t3.col) = 1 and charindex(T3.COL,T2.COL) =0 and T2.COL > T3.COL
RETURN;
END
然后假如,我们需要实现字符串'1,2,3,4,5'中1,2,3,4,5的所有组合方式,那么我们用下面SQL就能得到结果。如下所示,当然你也可以用诸如'A,BC,GHT,TTY,B,E'这样的字符串去获取字符串的所有组合。
DECLARE @String VARCHAR(200)
DECLARE @SqlText VARCHAR(MAX)
DECLARE @Index INT=1;
SET @String='A,BC,GHT,TTY,B';
SET @SqlText='SELECT COL='''+ REPLACE(@STRING,',',''' UNION ALL SELECT ''')+''''
--清空旧数据,保存需要进行组合的字符串数据。
TRUNCATE TABLE SplitString;
INSERT INTO SplitString
EXEC (@SqlText)
SELECT DISTINCT
CHAR_VAL ,
COL_NUM ,LEN(COL_NUM) AS STR_CNT
FROM dbo.FN_GET_COMBINATIONS()
WHERE LEN(COL_NUM) >= 1
ORDER BY LEN(COL_NUM) ,COL_NUM;
后面测试发现,当组合的字符串数量超过或等于10个时,这个函数就有问题了(有兴趣的可以自行测试,例如传入的参数为’1,2,3,4,5,6,7,8,9,10’)。所以又对这个进行了一番修改。目前最多支持获取26个字符串的全部组合,这个已经完全满足业务需要了。如果再需要跟多的字符串组合,则还需修改函数。这个函数效率也是与需要组合的字符串个数有相关,如果组合11个字符串,基本上需要3~4秒的时间,如果组合的字符串个数越多,则所需时间越多。当然,如果组合的字符串个数7~8个,几乎就是1秒内。
CREATE TABLE SEQ_CHARACTER(COL_NUM INT ,COL_CHAR VARCHAR(36));
INSERT INTO SEQ_CHARACTER
SELECT 1 ,'A' UNION ALL
SELECT 2 ,'B' UNION ALL
SELECT 3 ,'C' UNION ALL
SELECT 4 ,'D' UNION ALL
SELECT 5 ,'E' UNION ALL
SELECT 6 ,'F' UNION ALL
SELECT 7 ,'G' UNION ALL
SELECT 8 ,'H' UNION ALL
SELECT 9 ,'I' UNION ALL
SELECT 10,'J' UNION ALL
SELECT 11,'K' UNION ALL
SELECT 12,'L' UNION ALL
SELECT 13,'M' UNION ALL
SELECT 14,'N' UNION ALL
SELECT 15,'O' UNION ALL
SELECT 16,'P' UNION ALL
SELECT 17,'Q' UNION ALL
SELECT 18,'R' UNION ALL
SELECT 19,'S' UNION ALL
SELECT 20,'T' UNION ALL
SELECT 21,'U' UNION ALL
SELECT 22,'V' UNION ALL
SELECT 23,'W' UNION ALL
SELECT 24,'X' UNION ALL
SELECT 25,'Y' UNION ALL
SELECT 26,'Z'
CREATE TABLE SplitString(COL_NUM INT IDENTITY(1,1) ,VAL VARCHAR(32));
CREATE FUNCTION FN_GET_COMBINATIONS()
RETURNS @OutTable TABLE(COL_NUM VARCHAR(32), CHAR_VAL VARCHAR(32))
AS
BEGIN
DECLARE @Str VARCHAR(32)='';
DECLARE @Index INT =1;
DECLARE @RowCount INT;
SELECT @RowCount=COUNT(*) FROM SplitString;
--注意,如果字符串分隔后有5个字符串(A,BC,GHT,TTY,B,E),就使用12345, 如果分割后有6个字符串,就必须用123456,以此类推
WHILE @Index <= @RowCount
BEGIN
SELECT @Str = @Str + LTRIM(RTRIM(COL_CHAR)) FROM SEQ_CHARACTER WHERE COL_NUM=@Index
SET @Index = @Index +1
END
INSERT INTO @OutTable
SELECT S.COL_CHAR, T.VAL FROM SEQ_CHARACTER S , SplitString T
WHERE S.COL_NUM = T.COL_NUM ORDER BY T.COL_NUM;
WHILE NOT EXISTS(SELECT 1 FROM @OutTable where COL_NUM = @Str)
INSERT INTO @OutTable
SELECT T3.COL_NUM + T2.COL_NUM, T3.CHAR_VAL + T2.CHAR_VAL FROM @OutTable AS T2,@OutTable AS T3 WHERE len(T3.COL_NUM) = 1 and charindex(T3.COL_NUM,T2.COL_NUM) =0 and T2.COL_NUM > T3.COL_NUM
RETURN;
END
测试脚本如下:
DECLARE @String VARCHAR(200)
DECLARE @SqlText VARCHAR(MAX)
DECLARE @Index INT=1;
SET @String='A,B,C,D,E,F,G,H,G,H,I';
SET @SqlText='SELECT COL='''+ REPLACE(@STRING,',',''' UNION ALL SELECT ''')+''''
--清空旧数据,保存需要进行组合的字符串数据。
TRUNCATE TABLE SplitString;
INSERT INTO SplitString
EXEC (@SqlText)
SELECT DISTINCT
CHAR_VAL ,
COL_NUM ,LEN(COL_NUM) AS STR_CNT
FROM dbo.FN_GET_COMBINATIONS()
WHERE LEN(COL_NUM) >= 1
ORDER BY LEN(COL_NUM) ,COL_NUM;