Hive RegexSerDe Multiline Log matching showing NULL values after one line

172 views Asked by At

Trying to load Apache Tomcat logs with multiple lines for single row , but its only loading single line and showing NULL values for rest of lines until it reaches next record .

I have tried regex from earlier post but they are not working , the same regular expression is working in testing tools here is the link for one of them : http://rubular.com/r/nVrWhuwg1c

It is properly recognizing the lines and seperating them in correct groups but the same code when i am trying in hive its not working .

Sample Record :

12-Dec-2013 12:03:26.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/jsp-api-2.3.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/jsp-api-2.3.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.catalina.startup.ContextConfig.processJarsForWebFragments(ContextConfig.java:1898)
    at org.apache.catalina.startup.ContextConfig.webConfig(ContextConfig.java:1126)
    at org.apache.catalina.startup.ContextConfig.configureStart(ContextConfig.java:775)
    at org.apache.catalina.startup.ContextConfig.lifecycleEvent(ContextConfig.java:299)
    at org.apache.catalina.util.LifecycleBase.fireLifecycleEvent(LifecycleBase.java:94)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5105)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

13-Dec-2016 12:03:24.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/el-api-3.0.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/el-api-3.0.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.jasper.servlet.TldScanner.scanJars(TldScanner.java:262)
    at org.apache.jasper.servlet.TldScanner.scan(TldScanner.java:104)
    at org.apache.jasper.servlet.JasperInitializer.onStartup(JasperInitializer.java:101)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5196)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)


Create Table Query which i am trying 

CREATE EXTERNAL TABLE apache_cat_log_test 
(
    logdatetime STRING, 
    logtype STRING, 
    requestid STRING, 
    verbosedata STRING,
    msg string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
  "input.regex" = "^(\\d{2}-\\w{3}-\\d{4})\\s+(.*?)\\s+(\\w+)\\s*(.*?)\\s*$((?:\\s*(?!\\d{2}).*?$)*)"
 )
STORED AS TEXTFILE
location '/catlogs';
0

There are 0 answers