/*
Many times, it is a lot quicker to take some pain up-front and make a proper development/test harness for a routine (function or procedure) rather than think ‘I’m feeling lucky today!’. Then, you keep code and harness together from then on. Every time you run the build script, it runs the test harness too. The advantage is that, if the test harness persists, then it is much less likely that someone, probably ‘you-in-the-future’ unintentionally breaks the code. If you store the actual code for the procedure as well as the test harness, then it is likely that any bugs in functionality will break the build rather than to introduce subtle bugs later on that could even slip through testing and get into production.
This is just an example of what I mean.
Imagine we had a database that was storing addresses with embedded UK postcodes. We really wouldn’t want that. Instead, we might want the postcode in one column and the address in another. In effect, we’d want to extract the entire postcode string and place it in another column. This might be part of a table refactoring or int could easily be part of a process of importing addresses from another system.
We could easily decide to do this with a function that takes in a table as its parameter, and produces a table as its output. This is all very well, but we’d need to work on it, and test it when you make an alteration. By its very nature, a routine like this either works very well or horribly, but there is every chance that you might introduce subtle errors by fidding with it, and if young Thomas, the rather cocky developer who has just joined touches it, it is bound to break.
right, we drop the function we’re developing and re-create it. This is so we
avoid the problem of having to change CREATE to ALTER when working on it. */
IF EXISTS(SELECT * FROM sys.objects WHERE name LIKE ‘ExtractPostcode’
and schema_name(schema_ID)=‘Dbo’)
DROP FUNCTION dbo.ExtractPostcode
GO
/* we drop the user-defined table type and recreate it */
IF EXISTS(SELECT * FROM sys.types WHERE name LIKE ‘AddressesWithPostCodes’
and schema_name(schema_ID)=‘Dbo’)
DROP TYPE dbo.AddressesWithPostCodes
GO
/* we drop the user defined table type and recreate it */
IF EXISTS(SELECT * FROM sys.types WHERE name LIKE ‘OutputFormat’
and schema_name(schema_ID)=‘Dbo’)
DROP TYPE dbo.OutputFormat
GO
/* and now create the table type that we can use to pass the addresses to the function */
CREATE TYPE AddressesWithPostCodes AS TABLE
(
AddressWithPostcode_ID INT IDENTITY PRIMARY KEY, –because they work better that way!
Address_ID INT NOT NULL, –the address we are fixing
TheAddress VARCHAR(100) NOT NULL –The actual address
)
GO
CREATE TYPE OutputFormat AS TABLE
(
Address_ID INT PRIMARY KEY, –the address we are fixing
TheAddress VARCHAR(1000) NULL, –The actual address
ThePostCode VARCHAR(105) NOT NULL – The Postcode
)
GO
CREATE FUNCTION ExtractPostcode(@AddressesWithPostCodes AddressesWithPostCodes READONLY)
/**
summary: >
This Table-valued function takes a table type as a parameter, containing a table of addresses along with their integer IDs. Each address has an embedded postcode somewhere in it but not consistently in a particular place. The routine takes out the postcode and puts it in its own column, passing back a table where theinteger key is accompanied by the address without the (first) postcode and the postcode. If no postcode, then the address is returned unchanged and the postcode will be a blank string
Author: Phil Factor
Revision: 1.3
date: 20 May 2014
example:
– code:
returns: >
Table of Address_ID, TheAddress and ThePostCode.
**/
RETURNS @FixedAddresses TABLE
(
Address_ID INT, –the address we are fixing
TheAddress VARCHAR(1000) NULL, –The actual address
ThePostCode VARCHAR(105) NOT NULL – The Postcode
)
AS
– body of the function
BEGIN
DECLARE @BlankRange VARCHAR(10)
SELECT @BlankRange = CHAR(0)+‘- ‘+CHAR(160)
INSERT INTO @FixedAddresses(Address_ID, TheAddress, ThePostCode)
SELECT Address_ID,
CASE WHEN start>0 THEN REPLACE(STUFF([Theaddress],start,matchlength,”),‘ ‘,‘ ‘)
ELSE TheAddress END
AS TheAddress,
CASE WHEN Start>0 THEN SUBSTRING([Theaddress],start,matchlength-1) ELSE ” END AS ThePostCode
FROM
(–we have a derived table with the results we need for the chopping
SELECT MAX(PATINDEX([matched],‘ ‘+[Theaddress] collate SQL_Latin1_General_CP850_Bin)) AS start,
MAX( CASE WHEN PATINDEX([matched],‘ ‘+[Theaddress] collate SQL_Latin1_General_CP850_Bin)>0 THEN TheLength ELSE 0 END) AS matchlength,
MAX(TheAddress) AS TheAddress,
Address_ID
FROM (SELECT –first the match, then the length. There are three possible valid matches
‘%['+@BlankRange+'][A-Z][0-9] [0-9][A-Z][A-Z]%’, 7 –seven character postcode
UNION ALL SELECT ‘%['+@BlankRange+'][A-Z][A-Z0-9][A-Z0-9] [0-9][A-Z][A-Z]%’, 8
UNION ALL SELECT ‘%['+@BlankRange+'][A-Z][A-Z][A-Z0-9][A-Z0-9] [0-9][A-Z][A-Z]%’, 9)
AS f(Matched,TheLength)
CROSS JOIN @AddressesWithPostCodes
GROUP BY [address_ID]
) WORK;
RETURN
END
GO
——————————-end of the function————————
IF NOT EXISTS (SELECT * FROM sys.objects WHERE name LIKE ‘ExtractPostcode’)
BEGIN
RAISERROR (‘There was an error creating the function.’,16,1)
RETURN
END
/* now the job is only half done because we need to make sure that it works.
So we now load our sample data, making sure that for each Sample, we have what we actually think the output should be. */
DECLARE @InputTable AddressesWithPostCodes
INSERT INTO @InputTable(Address_ID,TheAddress)
VALUES(1,’14 Mason mews, Awkward Hill, Bibury, Cirencester, GL7 5NH’),
(2,’5 Binney St Abbey Ward Buckinghamshire HP11 2AX UK’),
(3,‘BH6 3BE 8 Moor street, East Southbourne and Tuckton W Bournemouth UK’),
(4,’505 Exeter Rd, DN36 5RP Hawerby cum BeesbyLincolnshire UK’),
(5,”),
(6,’9472 Lind St, Desborough Northamptonshire NN14 2GH NN14 3GH UK’),
(7,’7457 Cowl St, #70 Bargate Ward Southampton SO14 3TY UK’),
(8,”’The Pippins”, 20 Gloucester Pl, Chirton Ward, Tyne & Wear NE29 7AD UK’),
(9,’929 Augustine lane, Staple Hill Ward South Gloucestershire BS16 4LL UK’),
(10,’45 Bradfield road, Parwich Derbyshire DE6 1QN UK’),
(11,’63A Northampton St, Wilmington Kent DA2 7PP UK’),
(12,’5 Hygeia avenue, Loundsley Green WardDerbyshire S40 4LY UK’),
(13,’2150 Morley St,Dee Ward Dumfries and Galloway DG8 7DE UK’),
(14,’24 Bolton St, Broxburn, Uphall and Winchburg West Lothian EH52 5TL UK’),
(15,’4 Forrest St, Weston-Super-Mare North Somerset BS23 3HG UK’),
(16,’89 Noon St, Carbrooke Norfolk IP25 6JQ UK’),
(17,’99 Guthrie St, New Milton Hampshire BH25 5DF UK’),
(18,’7 Richmond St, Parkham Devon EX39 5DJ UK’),
(19,’9165 laburnum St, Darnall Ward Yorkshire, South S4 7WN UK’)
Declare @OutputTable OutputFormat –the table of what we think the correct results should be
Declare @IncorrectRows OutputFormat –done for error reporting
–here is the table of what we think the output should be, along with a few edge cases.
INSERT INTO @OutputTable(Address_ID,TheAddress, ThePostcode)
VALUES
(1, ’14 Mason mews, Awkward Hill, Bibury, Cirencester, ‘,‘GL7 5NH’),
(2, ’5 Binney St Abbey Ward Buckinghamshire UK’,‘HP11 2AX’),
(3, ’8 Moor street, East Southbourne and Tuckton W Bournemouth UK’,‘BH6 3BE’),
(4, ’505 Exeter Rd,Hawerby cum Beesby Lincolnshire UK’,‘DN36 5RP’),
(5, ”,”),
(6, ’9472 Lind St,Desborough Northamptonshire NN14 3GH UK’,‘NN14 2GH’),
(7, ’7457 Cowl St, #70 Bargate Ward Southampton UK’,‘SO14 3TY’),
(8, ”’The Pippins”, 20 Gloucester Pl, Chirton Ward,Tyne & Wear UK’,‘NE29 7AD’),
(9, ’929 Augustine lane, Staple Hill Ward South Gloucestershire UK’,‘BS16 4LL’),
(10, ’45 Bradfield road, ParwichDerbyshire UK’,‘DE6 1QN’),
(11, ’63A Northampton St,Wilmington Kent UK’,‘DA2 7PP’),
(12, ’5 Hygeia avenue, Loundsley Green WardDerbyshire UK’,‘S40 4LY’),
(13, ’2150 Morley St, Dee Ward Dumfries and Galloway UK’,‘DG8 7DE’),
(14, ’24 Bolton St,Broxburn, Uphall and Winchburg West Lothian UK’,‘EH52 5TL’),
(15, ’4 Forrest St,Weston-Super-Mare North Somerset UK’,‘BS23 3HG’),
(16, ’89 Noon St, Carbrooke Norfolk UK’,‘IP25 6JQ’),
(17, ’99 Guthrie St, New Milton Hampshire UK’,‘BH25 5DF’),
(18, ’7 Richmond St, Parkham Devon UK’,‘EX39 5DJ’),
(19, ’9165 laburnum St, Darnall Ward Yorkshire, South UK’,‘S4 7WN’)
insert into @IncorrectRows(Address_ID,TheAddress, ThePostcode)
SELECT Address_ID,TheAddress,ThePostCode FROM dbo.ExtractPostcode(@InputTable)
EXCEPT
SELECT Address_ID,TheAddress,ThePostCode FROM @outputTable;
If @@RowCount>0
Begin
PRINT ‘The following rows gave ‘;
SELECT Address_ID,TheAddress,ThePostCode FROM @IncorrectRows
RAISERROR (‘These rows gave unexpected results.’,16,1);
end
/* For tear-down, we drop the user defined table type */
IF EXISTS(SELECT * FROM sys.types WHERE name LIKE ‘OutputFormat’
and schema_name(schema_ID)=‘Dbo’)
DROP TYPE dbo.OutputFormat
GO
/* once this is working, the development work turns from a chore into a delight and one ends up hitting execute so much more often to catch mistakes as soon as possible. It also prevents a wildly-broken routine getting into a build! */